{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings          #-}
{-# LANGUAGE TupleSections              #-}
{-# LANGUAGE CPP              #-}

{-|

This implements a common DSL for regular expression replacement text. This
is represented with the 'Replace' data type. It also implements the
'IsString' interface, so if 'OverloadedStrings' is on, you can use a raw
string to build the replacement.

-}


module Data.Text.ICU.Replace
    (
    -- * @OverloadedStrings@ Syntax
    -- $doc

    -- * Types
      Replace

    -- * High-level interface
    , replace
    , replace'
    , replaceAll
    , replaceAll'

    -- * Low-level interface
    , rgroup
    , rtext
    , rstring
    , rfn
    , rtfn
    , rbuilder
    ) where


import           Control.Applicative
import           Data.Attoparsec.Text
import           Data.Foldable
#if __GLASGOW_HASKELL__ >= 804
import           Data.Semigroup (Semigroup)
#endif
import           Data.Monoid
import           Data.String
import qualified Data.Text              as T
import           Data.Text.ICU
import qualified Data.Text.ICU          as ICU
import qualified Data.Text.Lazy         as TL
import qualified Data.Text.Lazy.Builder as TB
import           Data.Tuple
import           Prelude                hiding (span)



-- | A 'Replace' instance is a function from a regular expression match to
-- a 'Data.Text.Lazy.Builder.Builder'. This naturally forms a 'Monoid', so
-- they're easy to combine.
--
-- 'Replace' also implements 'IsString', so raw strings can be used to
-- construct them.
newtype Replace = Replace { Replace -> Match -> Builder
unReplace :: Match -> TB.Builder } deriving
                  ( Semigroup Replace
Replace
Semigroup Replace
-> Replace
-> (Replace -> Replace -> Replace)
-> ([Replace] -> Replace)
-> Monoid Replace
[Replace] -> Replace
Replace -> Replace -> Replace
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
mconcat :: [Replace] -> Replace
$cmconcat :: [Replace] -> Replace
mappend :: Replace -> Replace -> Replace
$cmappend :: Replace -> Replace -> Replace
mempty :: Replace
$cmempty :: Replace
$cp1Monoid :: Semigroup Replace
Monoid
#if __GLASGOW_HASKELL__ >= 804
                  , b -> Replace -> Replace
NonEmpty Replace -> Replace
Replace -> Replace -> Replace
(Replace -> Replace -> Replace)
-> (NonEmpty Replace -> Replace)
-> (forall b. Integral b => b -> Replace -> Replace)
-> Semigroup Replace
forall b. Integral b => b -> Replace -> Replace
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
stimes :: b -> Replace -> Replace
$cstimes :: forall b. Integral b => b -> Replace -> Replace
sconcat :: NonEmpty Replace -> Replace
$csconcat :: NonEmpty Replace -> Replace
<> :: Replace -> Replace -> Replace
$c<> :: Replace -> Replace -> Replace
Semigroup
#endif
                  )

instance IsString Replace where
    fromString :: String -> Replace
fromString = Text -> Replace
parseReplace (Text -> Replace) -> (String -> Text) -> String -> Replace
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack

type MatchState = (Match, Int)

-- | Execute a regular expression on a 'Data.Text.Text' and replace the
-- first match.
replace :: Regex        -- ^ The regular expression to match.
        -> Replace      -- ^ The specification to replace it with.
        -> T.Text       -- ^ The text to operate on.
        -> T.Text       -- ^ The text with the first match replaced.
replace :: Regex -> Replace -> Text -> Text
replace Regex
re Replace
r Text
t = Text -> (Match -> Text) -> Maybe Match -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
t (Replace -> Match -> Text
replace' Replace
r) (Maybe Match -> Text) -> Maybe Match -> Text
forall a b. (a -> b) -> a -> b
$ Regex -> Text -> Maybe Match
ICU.find Regex
re Text
t

-- | Replace one regular expression match with the 'Replace'.
replace' :: Replace     -- ^ The specification to replace it with.
         -> Match       -- ^ The match to replace.
         -> T.Text      -- ^ The text with the match replaced.
replace' :: Replace -> Match -> Text
replace' Replace
r Match
m =
    Last MatchState -> Builder -> Text
finish (Maybe MatchState -> Last MatchState
forall a. Maybe a -> Last a
Last (MatchState -> Maybe MatchState
forall a. a -> Maybe a
Just (Match
m, Int
0))) (Builder -> Text) -> (Builder -> Builder) -> Builder -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Builder
p Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>) (Builder -> Text) -> Builder -> Text
forall a b. (a -> b) -> a -> b
$ Replace -> Match -> Builder
unReplace Replace
r Match
m
    where
        p :: Builder
p = (Text -> Builder) -> Maybe Text -> Builder
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap Text -> Builder
TB.fromText (Maybe Text -> Builder) -> Maybe Text -> Builder
forall a b. (a -> b) -> a -> b
$ Int -> Match -> Maybe Text
prefix Int
0 Match
m

-- | Execute a regular expression on a 'Data.Text.Text' and replace all
-- matches.
replaceAll :: Regex     -- ^ The regular expression to match.
           -> Replace   -- ^ The specification to replace it with.
           -> T.Text    -- ^ The text to operate on.
           -> T.Text    -- ^ The text with all matches replaced.
replaceAll :: Regex -> Replace -> Text -> Text
replaceAll Regex
re Replace
r Text
t = case Regex -> Text -> [Match]
ICU.findAll Regex
re Text
t of
                        [] -> Text
t
                        [Match]
ms -> Replace -> [Match] -> Text
replaceAll' Replace
r [Match]
ms

-- | Replace all regular expression matches with the 'Replace'.
replaceAll' :: Replace  -- ^ The specification to replace it with.
            -> [Match]  -- ^ The matches to replace.
            -> T.Text   -- ^ The text with all matches replaced.
replaceAll' :: Replace -> [Match] -> Text
replaceAll' Replace
r =
    (Last MatchState -> Builder -> Text)
-> (Last MatchState, Builder) -> Text
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Last MatchState -> Builder -> Text
finish ((Last MatchState, Builder) -> Text)
-> ([Match] -> (Last MatchState, Builder)) -> [Match] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Last MatchState, Builder) -> Match -> (Last MatchState, Builder))
-> (Last MatchState, Builder)
-> [Match]
-> (Last MatchState, Builder)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (Last MatchState, Builder) -> Match -> (Last MatchState, Builder)
step (Maybe MatchState -> Last MatchState
forall a. Maybe a -> Last a
Last Maybe MatchState
forall a. Maybe a
Nothing, Builder
forall a. Monoid a => a
mempty)
    where
        step :: (Last MatchState, TB.Builder)
             -> Match
             -> (Last MatchState, TB.Builder)
        step :: (Last MatchState, Builder) -> Match -> (Last MatchState, Builder)
step (Last Maybe MatchState
prev, Builder
buffer) Match
m =
            let s :: Text
s      = Match -> Text
span Match
m
                g :: Text
g      = Maybe Text -> Text
forall (t :: * -> *) m. (Foldable t, Monoid m) => t m -> m
fold (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Int -> Match -> Maybe Text
group Int
0 Match
m
                offset :: Int
offset = (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
s) (Int -> Int) -> (Int -> Int) -> Int -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
g) (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int -> (MatchState -> Int) -> Maybe MatchState -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int
0 MatchState -> Int
forall a b. (a, b) -> b
snd Maybe MatchState
prev
            in  ( Maybe MatchState -> Last MatchState
forall a. Maybe a -> Last a
Last (Maybe MatchState -> Last MatchState)
-> Maybe MatchState -> Last MatchState
forall a b. (a -> b) -> a -> b
$ MatchState -> Maybe MatchState
forall a. a -> Maybe a
Just (Match
m, Int
offset)
                , Builder
buffer Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Text -> Builder
TB.fromText Text
s Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Replace -> Match -> Builder
unReplace Replace
r Match
m
                )

-- | This handles the last match by including not only the match's prefix
-- and the replacement text, but also the suffix trailing the match.
finish :: Last MatchState       -- ^ The state of the match to get the prefix
                                -- and suffix from.
       -> TB.Builder            -- ^ The current replacement's output.
       -> T.Text
finish :: Last MatchState -> Builder -> Text
finish Last MatchState
m Builder
b =   Text -> Text
TL.toStrict
           (Text -> Text) -> (Maybe Text -> Text) -> Maybe Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.   Builder -> Text
TB.toLazyText
           (Builder -> Text) -> (Maybe Text -> Builder) -> Maybe Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.   Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
mappend Builder
b
           (Builder -> Builder)
-> (Maybe Text -> Builder) -> Maybe Text -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
.   Text -> Builder
TB.fromText
           (Text -> Builder) -> (Maybe Text -> Text) -> Maybe Text -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
.   Maybe Text -> Text
forall (t :: * -> *) m. (Foldable t, Monoid m) => t m -> m
fold
           (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$   Int -> Match -> Maybe Text
suffix Int
0
           (Match -> Maybe Text)
-> (MatchState -> Match) -> MatchState -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.   MatchState -> Match
forall a b. (a, b) -> a
fst
           (MatchState -> Maybe Text) -> Maybe MatchState -> Maybe Text
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Last MatchState -> Maybe MatchState
forall a. Last a -> Maybe a
getLast Last MatchState
m

-- | Create a 'Replace' that inserts a regular expression group.
rgroup :: Int       -- ^ The number of the group in a regular expression.
       -> Replace   -- ^ The 'Replace' that inserts a group's match.
rgroup :: Int -> Replace
rgroup Int
g = (Match -> Builder) -> Replace
Replace ((Match -> Builder) -> Replace) -> (Match -> Builder) -> Replace
forall a b. (a -> b) -> a -> b
$ Maybe Builder -> Builder
forall (t :: * -> *) m. (Foldable t, Monoid m) => t m -> m
fold (Maybe Builder -> Builder)
-> (Match -> Maybe Builder) -> Match -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Builder) -> Maybe Text -> Maybe Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Builder
TB.fromText (Maybe Text -> Maybe Builder)
-> (Match -> Maybe Text) -> Match -> Maybe Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Match -> Maybe Text
group Int
g

-- | Create a 'Replace' that inserts static 'Data.Text.Text'.
rtext :: T.Text     -- ^ The static 'Data.Text.Text' to insert.
      -> Replace    -- ^ The 'Replace' that inserts the static 'Data.Text.Text'.
rtext :: Text -> Replace
rtext = Builder -> Replace
rbuilder (Builder -> Replace) -> (Text -> Builder) -> Text -> Replace
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Builder
TB.fromText

-- | Create a 'Replace' that inserts a static 'String'.
rstring :: String   -- ^ The static 'String' to insert.
        -> Replace  -- ^ The 'Replace' that inserts the static 'String'.
rstring :: String -> Replace
rstring = Builder -> Replace
rbuilder (Builder -> Replace) -> (String -> Builder) -> String -> Replace
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Builder
TB.fromString

-- | Create a 'Replace' from a function that transforms a 'Match' into
-- a 'Data.Text.Lazy.Builder.Builder'.
rfn :: (Match -> TB.Builder)    -- ^ The function that creates the replacement text.
    -> Replace                  -- ^ The 'Replace' based off that function.
rfn :: (Match -> Builder) -> Replace
rfn = (Match -> Builder) -> Replace
Replace

-- | Create a 'Replace' From a function that transforms a 'Match' into
-- a 'Data.Text.Text'.
rtfn :: (Match -> T.Text)       -- ^ The function that creates the replacement text.
     -> Replace                 -- ^ The 'Replace' based off that function.
rtfn :: (Match -> Text) -> Replace
rtfn = (Match -> Builder) -> Replace
Replace ((Match -> Builder) -> Replace)
-> ((Match -> Text) -> Match -> Builder)
-> (Match -> Text)
-> Replace
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Builder
TB.fromText (Text -> Builder) -> (Match -> Text) -> Match -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
.)

-- | Create a 'Replace' that inserts a static 'Data.Text.Lazy.Builder.Builder'.
rbuilder :: TB.Builder  -- ^ The 'Data.Text.Lazy.Builder.Builder' to insert.
         -> Replace     -- ^ The 'Replace' that inserts the static
                        -- 'Data.Text.Lazy.Builder.Builder'.
rbuilder :: Builder -> Replace
rbuilder = (Match -> Builder) -> Replace
Replace ((Match -> Builder) -> Replace)
-> (Builder -> Match -> Builder) -> Builder -> Replace
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Match -> Builder
forall a b. a -> b -> a
const

-- | This parses a 'Data.Text.Text' into a 'Replace' structure.
--
-- Generally, input text is considered to be static.
--
-- However, groups from the regular expression's matches can be insert
-- using @$1@ (to insert the first group) or @${7}@ (to insert the seventh
-- group).
--
-- Dollar signs can be included in the output by doubling them (@$$@).
parseReplace :: T.Text -> Replace
parseReplace :: Text -> Replace
parseReplace Text
t = (String -> Replace)
-> (Replace -> Replace) -> Either String Replace -> Replace
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Replace -> String -> Replace
forall a b. a -> b -> a
const (Replace -> String -> Replace) -> Replace -> String -> Replace
forall a b. (a -> b) -> a -> b
$ Text -> Replace
rtext Text
t) Replace -> Replace
forall a. a -> a
id
               (Either String Replace -> Replace)
-> Either String Replace -> Replace
forall a b. (a -> b) -> a -> b
$ Parser Replace -> Text -> Either String Replace
forall a. Parser a -> Text -> Either String a
parseOnly (Parser Replace
replacement Parser Replace -> Parser Text () -> Parser Replace
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput) Text
t

-- A replacement.
replacement :: Parser Replace
replacement :: Parser Replace
replacement = [Replace] -> Replace
forall a. Monoid a => [a] -> a
mconcat ([Replace] -> Replace) -> Parser Text [Replace] -> Parser Replace
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Replace -> Parser Text [Replace]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser Replace
dollarGroup Parser Replace -> Parser Replace -> Parser Replace
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Replace
raw)

-- A @\$\d+@ or @\$\{\d+\}@ group. This could also be an escaped dollar
-- sign (@$$@).
dollarGroup :: Parser Replace
dollarGroup :: Parser Replace
dollarGroup = Char -> Parser Char
char Char
'$' Parser Char -> Parser Replace -> Parser Replace
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser Replace
grp Parser Replace -> Parser Replace -> Parser Replace
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Replace
escaped)
    where curly :: Parser Text Int
curly   = Char -> Parser Char
char Char
'{' Parser Char -> Parser Text Int -> Parser Text Int
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text Int
forall a. Integral a => Parser a
decimal Parser Text Int -> Parser Char -> Parser Text Int
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'}'
          grp :: Parser Replace
grp     = Int -> Replace
rgroup (Int -> Replace) -> Parser Text Int -> Parser Replace
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser Text Int
forall a. Integral a => Parser a
decimal Parser Text Int -> Parser Text Int -> Parser Text Int
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text Int
curly)
          escaped :: Parser Replace
escaped = Text -> Replace
rtext (Text -> Replace) -> (Char -> Text) -> Char -> Replace
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
T.singleton (Char -> Replace) -> Parser Char -> Parser Replace
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'$'

-- A raw input string. It must contain no dollar signs (@$@).
raw :: Parser Replace
raw :: Parser Replace
raw = Text -> Replace
rtext (Text -> Replace) -> Parser Text Text -> Parser Replace
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text Text
takeWhile1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'$')


-- $doc
--
-- The syntax used with 'OverloadedStrings' is meant to be similar to that
-- used in other regular expression libraries in other programming
-- languages.
--
-- Generally, input text is considered to be static.
--
-- >>> replaceAll "a" "b" "aaa"
-- "bbb"
-- >>> replaceAll "ab" "ba" "cdababcd"
-- "cdbabacd"
--
-- However, groups from the regular expression's matches can be insert
-- using @$1@ (to insert the first group) or @${7}@ (to insert the seventh
-- group).
--
-- >>> replaceAll "(.*), (.*)" "$2 $1" "Beeblebrox, Zaphod"
-- "Zaphod Beeblebrox"
-- >>> replaceAll "4(\\d)" "${1}4" "7458"
-- "7548"
--
-- Dollar signs can be included in the output by doubling them (@$$@).
--
-- >>> replaceAll "(\\d+\\.\\d+)" "$$$1" "9.99"
-- "$9.99"