{-# OPTIONS_GHC -fglasgow-exts -fno-warn-orphans #-} {-| The "Text.Regex.Lib.WrapDFAEngine" provides the backend for "Text.Regex.DFA". This provides the 'Regex' type and 'RegexOptions' instance for them, and 'RegexMaker' instances for 'String' and 'ByteString'. Details on the DFA engine can be found in "Text.Regex.DFA" and license information in "Text.Regex.Lazy.DFAEngine". -} module Text.Regex.DFA.Wrap(Regex(..),CompOption(..),ExecOption(..),(=~),(=~~),makeCompat) where import Text.Regex.Base.RegexLike(RegexMaker(..),RegexOptions(..),RegexContext(..)) import Text.Regex.DFA.Common(CompOption(..),ExecOption(..)) import Text.Regex.DFA.Engine import Text.Regex.DFA.Pattern import Text.Regex.DFA.ReadRegex(decodePatternSet) import Data.Char(toUpper,toLower) import Data.List(nub,sort) import qualified Data.Set as Set(toList) -- | The DFA backend specific 'Regex' type, used by this module's '=~' -- and '=~~' operators. data Regex = Regex {asPattern::Pattern ,asLexer::Lexer ,compOptions::CompOption ,execOptions::ExecOption} instance RegexOptions Regex CompOption ExecOption where blankCompOpt = CompOption {caseSensitive = True,multiline = True} blankExecOpt = ExecOption defaultCompOpt = CompOption {caseSensitive = True,multiline = True} defaultExecOpt = ExecOption setExecOpts e r = r {execOptions=e} getExecOpts r = execOptions r -- | This is the pure functional matching operator. If the target -- cannot be produced then some empty result will be returned. If -- there is an error in processing, then 'error' will be called. (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target (=~) x r = let q :: Regex q = makeRegex r in match q x -- | This is the monadic matching operator. If a single match fails, -- then 'fail' will be called. (=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,Monad m) => source1 -> source -> m target (=~~) x r = do (q::Regex) <- makeRegexM r matchM q x -- | If ('dfaClean' pat) is True then ('makeCompat' pat) should not throw -- an error. This translates a 'Pattern' into a DFA 'Regex' makeCompat :: CompOption -> Pattern -> Regexp makeCompat c pIn = makeCompatCont c pIn emptyOp makeCompatCont :: CompOption -> Pattern -> Regexp -> Regexp makeCompatCont CompOption {caseSensitive = caseOpt, multiline = lineOpt} = reflect where reflect :: Pattern -> Regexp -> Regexp reflect pIn cont = case pIn of -- -- This first set of cases do not actually accept characters -- PEmpty -> cont PGroup _ p -> reflect p cont POr [] -> cont POr ps -> orRE (map (\p->reflect p cont) ps) PConcat [] -> cont PConcat ps -> foldr ($) cont (map reflect ps) PQuest p -> quest (reflect p emptyOp) cont PPlus p -> plus (reflect p emptyOp) cont PStar p -> star (reflect p emptyOp) cont -- Handle PBound by reduction to simpler Pattern forms PBound 0 Nothing p -> reflect (PStar p) cont PBound i Nothing p | 0 reflect (PConcat ((replicate i p)++[PStar p])) cont | otherwise -> die PBound 0 (Just 0) _ -> cont PBound 0 (Just 1) p -> reflect (PQuest p) cont PBound 0 (Just j) p | j>1 -> reflect (PQuest (PConcat [p,PBound 0 (Just (pred j)) p])) cont | otherwise -> die PBound i (Just j) p | 0 reflect (PConcat (replicate i p)) cont | 0 reflect (PConcat ((replicate i p)++[PBound 0 (Just (j-i)) p])) cont | otherwise -> die -- Predicates PCarat i -> (if lineOpt then beginLine i else beginInput i) +> cont PDollar i -> (if lineOpt then endLine i else endInput i) +> cont -- -- From here down are the patterns that actually accept characters -- PDot i -> (if lineOpt then altNot i ['\n'] else allChar i) +> cont PAny i patset -> let chars = charCases . Set.toList . decodePatternSet $ patset in alt i chars +> cont PAnyNot i patset -> let chars = charCases . Set.toList . decodePatternSet $ patset in altNot i chars +> cont PEscape i c -> charOrAlt i c +> cont PChar i c -> charOrAlt i c +> cont where die = error ("Text.Regex.DFA.Wrap makeCompatCont failed: show pIn") charOrAlt i c = if caseOpt || (toUpper c == toLower c) then char i c else alt i [toLower c,toUpper c] charCases cs = if caseOpt then cs else let chars'up = map toUpper cs chars'down = map toLower cs in nub . sort $ (chars'up ++ chars'down)