\begin{code} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} module Text.RE.Replace ( Replace(..) , ReplaceMethods(..) , replaceMethods , Context(..) , Location(..) , isTopLocation , replace , replaceAll , replaceAllCaptures , replaceAllCaptures_ , replaceAllCapturesM , replaceCaptures , replaceCaptures_ , replaceCapturesM , expandMacros , expandMacros' ) where import Control.Applicative import Data.Array import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as LBS import Data.Char import qualified Data.Foldable as F import Data.Functor.Identity import qualified Data.HashMap.Strict as HM import Data.Maybe import Data.Monoid import qualified Data.Sequence as S import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified Data.Text.Lazy as LT import Prelude.Compat import Text.Heredoc import Text.RE.Capture import Text.RE.CaptureID import Text.RE.Options import Text.Read import Text.Regex.TDFA import Text.Regex.TDFA.Text() import Text.Regex.TDFA.Text.Lazy() \end{code} \begin{code} -- | Replace provides the missing methods needed to replace the matched -- text; lengthE is the minimum implementation class (Extract a,Monoid a) => Replace a where -- | length function for a lengthE :: a -> Int -- | inject String into a packE :: String -> a -- | project a onto a String unpackE :: a -> String -- | inject into Text textifyE :: a -> T.Text -- | project Text onto a detextifyE :: T.Text -> a -- | append a newline appendNewlineE :: a -> a -- | apply a substitution function to a Capture substE :: (a->a) -> Capture a -> a -- | convert a template containing $0, $1, etc., in the first -- argument, into a 'phi' replacement function for use with -- replaceAllCaptures and replaceCaptures parseTemplateE :: a -> Match a -> Location -> Capture a -> Maybe a textifyE = T.pack . unpackE detextifyE = packE . T.unpack appendNewlineE = (<> packE "\n") substE f m@Capture{..} = capturePrefix m <> f capturedText <> captureSuffix m \end{code} \begin{code} -- | a selction of the Replace methods can be encapsulated with ReplaceMethods -- for the higher-order replacement functions data ReplaceMethods a = ReplaceMethods { methodLength :: a -> Int , methodSubst :: (a->a) -> Capture a -> a } -- | replaceMethods encapsulates ReplaceMethods a from a Replace a context replaceMethods :: Replace a => ReplaceMethods a replaceMethods = ReplaceMethods { methodLength = lengthE , methodSubst = substE } \end{code} \begin{code} -- | @Context@ specifies which contexts the substitutions should be applied data Context = TOP -- ^ substitutions should be applied to the top-level only, -- the text that matched the whole RE | SUB -- ^ substitutions should only be applied to the text -- captured by bracketed sub-REs | ALL -- ^ the substitution function should be applied to all -- captures, the top level and the sub-expression captures deriving (Show) -- | the @Location@ information passed into the substitution function -- specifies which sub-expression is being substituted data Location = Location { locationMatch :: Int -- ^ the zero-based, i-th string to be matched, -- when matching all strings, zero when only the -- first string is being matched , locationCapture :: CaptureOrdinal -- ^ 0, when matching the top-level string -- matched by the whole RE, 1 for the top-most, -- left-most redex captured by bracketed -- sub-REs, etc. } deriving (Show) \end{code} \begin{code} -- | True iff the location references a complete match -- (i.e., not a bracketed capture) isTopLocation :: Location -> Bool isTopLocation = (==0) . locationCapture \end{code} \begin{code} -- | replace all with a template, $0 for whole text, $1 for first -- capture, etc. replaceAll :: Replace a => a -> Matches a -> a replaceAll tpl ac = replaceAllCaptures TOP (parseTemplateE tpl) ac \end{code} \begin{code} -- | substitutes using a function that takes the full Match -- context and returns the same replacement text as the _phi_phi -- context. replaceAllCaptures :: Replace a => Context -> (Match a->Location->Capture a->Maybe a) -> Matches a -> a \end{code} \begin{code} replaceAllCaptures = replaceAllCaptures_ replaceMethods \end{code} \begin{code} -- | replaceAllCaptures_ is like like replaceAllCaptures but takes the -- Replace methods through the ReplaceMethods argument replaceAllCaptures_ :: Extract a => ReplaceMethods a -> Context -> (Match a->Location->Capture a->Maybe a) -> Matches a -> a replaceAllCaptures_ s ctx phi ac = runIdentity $ replaceAllCapturesM s ctx (lift_phi phi) ac \end{code} \begin{code} -- | replaceAllCapturesM is just a monadically generalised version of -- replaceAllCaptures_ replaceAllCapturesM :: (Extract a,Monad m) => ReplaceMethods a -> Context -> (Match a->Location->Capture a->m (Maybe a)) -> Matches a -> m a replaceAllCapturesM r ctx phi_ Matches{..} = replaceCapturesM r ALL phi $ Match matchesSource cnms arr where phi _ (Location _ i) = case arr_c!i of Just caps -> phi_ caps . uncurry Location $ arr_i ! i Nothing -> const $ return Nothing arr_c = listArray bds $ concat $ [ repl (rangeSize $ bounds $ matchArray cs) cs | cs <- allMatches ] arr_i = listArray bds j_ks arr = listArray bds $ [ arr_ ! k | arr_ <- map matchArray allMatches , k <- indices arr_ ] bds = (0,CaptureOrdinal $ length j_ks-1) j_ks = [ (j,k) | (j,arr_) <- zip [0..] $ map matchArray allMatches , k <- indices arr_ ] repl 0 _ = [] repl n x = case ctx of TOP -> Just x : replicate (n-1) Nothing SUB -> Nothing : replicate (n-1) (Just x) ALL -> replicate n $ Just x cnms = fromMaybe noCaptureNames $ listToMaybe $ map captureNames allMatches \end{code} \begin{code} -- | replace with a template containing $0 for whole text, -- $1 for first capture, etc. replace :: Replace a => Match a -> a -> a replace c tpl = replaceCaptures TOP (parseTemplateE tpl) c \end{code} \begin{code} -- | substitutes using a function that takes the full Match -- context and returns the same replacement text as the _phi_phi -- context. replaceCaptures :: Replace a => Context -> (Match a->Location->Capture a->Maybe a) -> Match a -> a replaceCaptures = replaceCaptures_ replaceMethods \end{code} \begin{code} -- | replaceCaptures_ is like replaceCaptures but takes the Replace methods -- through the ReplaceMethods argument replaceCaptures_ :: Extract a => ReplaceMethods a -> Context -> (Match a->Location->Capture a->Maybe a) -> Match a -> a replaceCaptures_ s ctx phi caps = runIdentity $ replaceCapturesM s ctx (lift_phi phi) caps \end{code} \begin{code} -- | replaceCapturesM is just a monadically generalised version of -- replaceCaptures_ replaceCapturesM :: (Monad m,Extract a) => ReplaceMethods a -> Context -> (Match a->Location->Capture a->m (Maybe a)) -> Match a -> m a replaceCapturesM ReplaceMethods{..} ctx phi_ caps@Match{..} = do (hay',_) <- foldr sc (return (matchSource,[])) $ zip [0..] $ elems matchArray return hay' where sc (i,cap0) act = do (hay,ds) <- act let ndl = capturedText cap cap = adj hay ds cap0 mb <- phi i cap case mb of Nothing -> return (hay,ds) Just ndl' -> return ( methodSubst (const ndl') cap , (captureOffset cap,len'-len) : ds ) where len' = methodLength ndl' len = methodLength ndl adj hay ds cap = Capture { captureSource = hay , capturedText = before len $ after off0 hay , captureOffset = off0 , captureLength = len } where len = len0 + sum [ delta | (off,delta) <- ds , off < off0 + len0 ] len0 = captureLength cap off0 = captureOffset cap phi i cap = case ctx of TOP | i/=0 -> return Nothing SUB | i==0 ->return Nothing _ -> case not $ hasCaptured cap of True -> return Nothing False -> phi_ caps (Location 0 i) cap \end{code} \begin{code} -- the Replace instances instance Replace [Char] where lengthE = length packE = id unpackE = id textifyE = T.pack detextifyE = T.unpack appendNewlineE = (<>"\n") parseTemplateE = parseTemplateE' id instance Replace B.ByteString where lengthE = B.length packE = B.pack unpackE = B.unpack textifyE = TE.decodeUtf8 detextifyE = TE.encodeUtf8 appendNewlineE = (<>"\n") parseTemplateE = parseTemplateE' B.unpack instance Replace LBS.ByteString where lengthE = fromEnum . LBS.length packE = LBS.pack unpackE = LBS.unpack textifyE = TE.decodeUtf8 . LBS.toStrict detextifyE = LBS.fromStrict . TE.encodeUtf8 appendNewlineE = (<>"\n") parseTemplateE = parseTemplateE' LBS.unpack instance Replace (S.Seq Char) where lengthE = S.length packE = S.fromList unpackE = F.toList parseTemplateE = parseTemplateE' F.toList instance Replace T.Text where lengthE = T.length packE = T.pack unpackE = T.unpack textifyE = id detextifyE = id appendNewlineE = (<>"\n") parseTemplateE = parseTemplateE' T.unpack instance Replace LT.Text where lengthE = fromEnum . LT.length packE = LT.pack unpackE = LT.unpack textifyE = LT.toStrict detextifyE = LT.fromStrict appendNewlineE = (<>"\n") parseTemplateE = parseTemplateE' LT.unpack \end{code} \begin{code} -- | expand all of the @{..} macros in the RE in the argument String -- according to the Macros argument, preprocessing the RE String -- according to the Mode argument (used internally) expandMacros :: (r->String) -> Macros r -> String -> String expandMacros x_src hm s = case HM.null hm of True -> s False -> expandMacros' (fmap x_src . flip HM.lookup hm) s \end{code} \begin{code} -- | expand the @{..} macos in the argument string using the given -- function expandMacros' :: (MacroID->Maybe String) -> String -> String expandMacros' lu = fixpoint e_m where e_m re_s = replaceAllCaptures TOP phi $ re_s $=~ [here|@(@|\{([^{}]+)\})|] where phi mtch _ cap = case txt == "@@" of True -> Just "@" False -> Just $ fromMaybe txt $ lu ide where txt = capturedText cap ide = MacroID $ capturedText $ capture c2 mtch c2 = IsCaptureOrdinal $ CaptureOrdinal 2 \end{code} \begin{code} lift_phi :: Monad m => (Match a->Location->Capture a->Maybe a) -> (Match a->Location->Capture a->m (Maybe a)) lift_phi phi_ = phi where phi caps' loc' cap' = return $ phi_ caps' loc' cap' \end{code} \begin{code} parseTemplateE' :: ( Replace a , RegexContext Regex a (Matches a) , RegexMaker Regex CompOption ExecOption String ) => (a->String) -> a -> Match a -> Location -> Capture a -> Maybe a parseTemplateE' unpack tpl mtch _ _ = Just $ replaceAllCaptures TOP phi $ tpl $=~ [here|\$(\$|[0-9]|\{([^{}]+)\})|] where phi t_mtch _ _ = case t_mtch !$? c2 of Just cap -> case readMaybe stg of Nothing -> this $ IsCaptureName $ CaptureName $ T.pack stg Just cn -> this $ IsCaptureOrdinal $ CaptureOrdinal cn where stg = unpack $ capturedText cap Nothing -> case s == "$" of True -> Just t False -> this $ IsCaptureOrdinal $ CaptureOrdinal $ read s where s = unpack t t = capturedText $ capture c1 t_mtch this cid = capturedText <$> mtch !$? cid c1 = IsCaptureOrdinal $ CaptureOrdinal 1 c2 = IsCaptureOrdinal $ CaptureOrdinal 2 \end{code} \begin{code} fixpoint :: (Eq a) => (a->a) -> a -> a fixpoint f = chk . iterate f where chk (x:x':_) | x==x' = x chk xs = chk $ tail xs \end{code} \begin{code} ($=~) :: ( RegexContext Regex source target , RegexMaker Regex CompOption ExecOption String ) => source -> String -> target ($=~) = (=~) \end{code}