{-| Internal definitions for "Text.Regex.TDFA.QuasiQuoter". Expect these definitions to change. -} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE DeriveDataTypeable #-} module Text.Regex.TDFA.QuasiQuoter.Internal where import Prelude ( Either(..) , String(..) , fmap , foldr , (.) , (++) , null , Char , snd , fst , Maybe(..) , Bool(..) , not , maybe , either , error , Show(..) , Eq(..) , Ord(..) , all , otherwise ) -- import Control.Applicative ( (<|>) ) -- import Data.List ( find , intersect , nub , (\\) ) -- import qualified Text.Regex.TDFA.Common as TDFA ( CompOption(..) , ExecOption(..) ) -- import Text.Regex.TDFA.Common ( DoPa(..) , CompOption(CompOption) , ExecOption(ExecOption) , GroupIndex(..) ) -- import Data.Either ( partitionEithers ) -- import Control.Monad ( guard , Monad(..) ) -- import Text.Regex.TDFA.ReadRegex ( parseRegex ) -- import Text.Regex.TDFA.Pattern ( Pattern(..) , PatternSet(..) , PatternSetCharacterClass(..) , PatternSetCollatingElement(..) , PatternSetEquivalenceClass(..) ) -- import Text.Regex.TDFA.TDFA ( patternToRegex ) -- import Data.Typeable ( Typeable ) -- import Data.Data ( Data ) -- import Data.Char ( isHexDigit , chr ) -- import Numeric ( readHex ) -- -- | A flag indicates whether a particular option should be True ("on") -- or False ("off") when compiling a regular expression string. -- -- Refer to 'Text.Regex.TDFA.Common.CompOption' and -- 'Text.Regex.TDFA.Common.ExecOption' for the meanings of each flag. -- data Flag = CaseSensitive { flagValue :: Bool } | Multiline { flagValue :: Bool } | RightAssoc { flagValue :: Bool } | NewSyntax { flagValue :: Bool } | LastStarGreedy { flagValue :: Bool } | CaptureGroups { flagValue :: Bool } deriving (Show, Eq, Ord) -- -- | Options to use when compiling a regular expression string. -- -- Refer to 'Text.Regex.TDFA.Common.CompOption' and -- 'Text.Regex.TDFA.Common.ExecOption' for the meanings of each option. -- data Options = Options { caseSensitive :: Bool , multiline :: Bool , rightAssoc :: Bool , newSyntax :: Bool , lastStarGreedy :: Bool , captureGroups :: Bool } deriving (Show, Eq, Ord) -- -- These standalone orphan instances are required for uses of -- 'dataToExpQ' within this module. deriving instance Typeable PatternSetEquivalenceClass deriving instance Data PatternSetEquivalenceClass deriving instance Typeable PatternSetCollatingElement deriving instance Data PatternSetCollatingElement deriving instance Typeable PatternSetCharacterClass deriving instance Data PatternSetCharacterClass deriving instance Typeable DoPa deriving instance Data DoPa deriving instance Typeable PatternSet deriving instance Data PatternSet deriving instance Typeable Pattern deriving instance Data Pattern deriving instance Typeable CompOption deriving instance Data CompOption deriving instance Typeable ExecOption deriving instance Data ExecOption -- | Replaces escape sequences with their respective characters. Any -- sequence not listed will be left as-is. -- -- @ -- Sequence | Character -- ----------+-------------------- -- \\\\ | \\ -- \\n | Newline -- \\r | Carriage return -- \\t | Horizontal tab -- \\f | Form feed -- \\v | Vertical tab -- \\xFFFF | Code point (in hex) -- |~] | |] -- \\|~] | |~] -- @ -- -- Note that if you are reading the source file and not the generated -- Haddock documentation that the backslashes have been doubled up. This -- is also why the table is incorrectly formatted in source. -- unescape :: String -> String unescape = unescaped where delim ('|':'~':']':xs) = Just ("|]", xs) delim _ = Nothing control xxs@(d1:d2:d3:d4:xs) | all isHexDigit ds = Just ([chr v], xs) | otherwise = Nothing where ds = [d1,d2,d3,d4] (v,_):_ = readHex ds control _ = Nothing escaped ('\\':xs) = Just ("\\", xs) escaped ('n' :xs) = Just ("\n", xs) escaped ('r' :xs) = Just ("\r", xs) escaped ('t' :xs) = Just ("\t", xs) escaped ('f' :xs) = Just ("\f", xs) escaped ('v' :xs) = Just ("\v", xs) escaped ('x' :xs) = control xs escaped ('|':'~':']':xs) = Just ("|~]", xs) escaped _ = Nothing unescaped ('\\':xs) = case escaped xs of Just (cs, xs') -> cs ++ unescaped xs' Nothing -> '\\' : unescaped xs unescaped xxs@(x:xs) = case delim xxs of Just (cs, xs') -> cs ++ unescaped xs' Nothing -> x : unescaped xs unescaped [] = [] -- -- | The AST for the regular expression string. If there is a parse -- error a corresponding error string is returned. -- compilePattern :: String -> Either String (Pattern, (GroupIndex, DoPa)) compilePattern = either (Left . show) Right . parseRegex . unescape -- | The default options used when compiling a regular expression -- string. -- -- Refer to 'Text.Regex.TDFA.Common.CompOption' and -- 'Text.Regex.TDFA.Common.ExecOption' for the meanings of each option. -- defaultOptions :: Options defaultOptions = Options { caseSensitive = True , multiline = True , rightAssoc = True , newSyntax = True , lastStarGreedy = False , captureGroups = True } -- -- | Converts this module's representation of regular expression string -- compiler options to that of "Text.Regex.TDFA"'s. -- optionsToTDFAOptions :: Options -> (CompOption, ExecOption) optionsToTDFAOptions opts = ( CompOption { TDFA.caseSensitive = caseSensitive opts , TDFA.multiline = multiline opts , TDFA.rightAssoc = rightAssoc opts , TDFA.newSyntax = newSyntax opts , TDFA.lastStarGreedy = lastStarGreedy opts } , ExecOption { TDFA.captureGroups = captureGroups opts } ) -- -- | Overrides the option indicated by the flag. -- applyFlag :: Flag -> Options -> Options applyFlag (CaseSensitive v) opts = opts { caseSensitive = v } applyFlag (Multiline v) opts = opts { multiline = v } applyFlag (RightAssoc v) opts = opts { rightAssoc = v } applyFlag (NewSyntax v) opts = opts { newSyntax = v } applyFlag (LastStarGreedy v) opts = opts { lastStarGreedy = v } applyFlag (CaptureGroups v) opts = opts { captureGroups = v } -- | A relation of flags and characters. Each flag is related with one -- character and that character is unique. Not all characters are -- related to a flag. -- flagsChars :: [(Char, Flag)] flagsChars = [ ('C', CaseSensitive True), ('c', CaseSensitive False) , ('M', Multiline True), ('m', Multiline False) , ('R', RightAssoc True), ('r', RightAssoc False) , ('N', NewSyntax True), ('n', NewSyntax False) , ('A', LastStarGreedy True), ('a', LastStarGreedy False) , ('G', CaptureGroups True), ('g', CaptureGroups False) ] -- -- | The flag related to the character (by 'flagsChars'), if any. -- charToFlag :: Char -> Maybe Flag charToFlag c = fmap snd (find ((==c) . fst) flagsChars) -- | The character related to the flag (by 'flagsChars'). -- flagToChar :: Flag -> Char flagToChar f = case find ((==f) . snd) flagsChars of Just (c,_) -> c Nothing -> error ("Missing char for flag: " ++ show f) -- -- | The flag which indicates the opposite option. That is, if the flag -- indicates an option to be True ("on") then the negated flag indicates -- the same option to be False ("off") and vice versa. -- negateFlag :: Flag -> Flag negateFlag f = f { flagValue = not (flagValue f) } -- | The set of flags related to the set of characters using the -- relation 'flagsChars'. -- -- - No character may occur twice. -- - If a character is present the character related to the negated flag -- must not be. -- - All characters must be related to a flag. -- -- If one of these conditions are not met then a corresponding error -- message is returned instead. -- parseFlags :: String -> Either String [Flag] parseFlags chars = do let results = fmap (\x -> maybe (Left x) Right (charToFlag x)) chars (notFlags, flags) = partitionEithers results extraFlags = flags \\ nub flags conflictingFlags = intersect flags (fmap negateFlag flags) guard (null notFlags) <|> Left ("These are not flags '" ++ notFlags ++ "'") guard (null extraFlags) <|> Left ("Duplicate flags '" ++ fmap flagToChar extraFlags ++ "'") guard (null conflictingFlags) <|> Left ("Conflicting flags '" ++ fmap flagToChar conflictingFlags ++ "'") Right flags -- -- | The switch of Left and Right for an Either value. -- switch :: Either a b -> Either b a switch = either Right Left -- | Apply all flags to the default options. See 'parseFlags' and -- 'applyFlag'. -- flagStringToOptions :: String -> Either String Options flagStringToOptions = fmap (foldr applyFlag defaultOptions) . parseFlags