-- | This "Text.Regex.DFA.Pattern" module provides the 'Pattern' data -- type and its subtypes. This 'Pattern' type is used to represent -- the parsed form of a Regular Expression and is syntax independent. -- -- It is possible to construct values of 'Pattern' that are invalid -- regular expressions. -- -- There are also several module Text.Regex.DFA.Pattern (Pattern(..) ,PatternSet(..) ,PatternSetCharacterClass(..),PatternSetCollatingElement(..),PatternSetEquivalenceClass(..) ,PatternIndex ,showPattern ,showPatternP -- ** Pattern DoPa ,DoPa(..) ,newDoPa ) where {- By Chris Kuklewicz, 2006. BSD License, see the LICENSE file. -} import Data.List(intersperse,partition) import qualified Data.Set as Set(toAscList,toList) import Data.Set(Set) data DoPa = DoPa {dopaIndex :: Int} deriving (Eq,Ord) instance Show DoPa where show (DoPa {dopaIndex=i}) = show i newDoPa :: Int -> DoPa newDoPa i = DoPa i data Pattern = PEmpty | PGroup PatternIndex Pattern | POr [Pattern] | PConcat [Pattern] | PQuest Pattern | PPlus Pattern | PStar Pattern | PBound Int (Maybe Int) Pattern -- The rest of these need an index of where in the regex string it is from | PCarat DoPa | PDollar DoPa -- The following test and accept a single character | PDot DoPa -- Any character (newline?) at all | PAny DoPa PatternSet -- Square bracketed things | PAnyNot DoPa PatternSet -- Inverted square bracketed things | PEscape DoPa Char -- Backslashed Character | PChar DoPa Char -- Specific Character deriving (Eq,Show) showPattern :: Pattern -> String showPattern pIn = case pIn of PEmpty -> "()" PGroup _ p -> ('(':showPattern p)++")" POr ps -> concat $ intersperse "|" (map showPattern ps) PConcat ps -> concatMap showPattern ps PQuest p -> (showPattern p)++"?" PPlus p -> (showPattern p)++"+" PStar p -> (showPattern p)++"*" PBound i (Just j) p | i==j -> showPattern p ++ ('{':show i)++"}" PBound i mj p -> showPattern p ++ ('{':show i) ++ maybe ",}" (\j -> ',':show j++"}") mj -- PCarat _ -> "^" PDollar _ -> "$" PDot _ -> "." PAny _ (PatternSet s scc sce sec) -> let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s charSpec = (if ']' `elem` special then (']':) else id) (byRange normal) scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec in concat ['[':charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"] PAnyNot _ (PatternSet s scc sce sec) -> let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s charSpec = (if ']' `elem` special then (']':) else id) (byRange normal) scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec in concat ["[^",charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"] PEscape _ c -> '\\':c:[] PChar _ c -> [c] where byRange xAll@(x:xs) | length xAll <=3 = xAll | otherwise = groupRange x 1 xs byRange _ = undefined groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys else (if n <=3 then take n [x..] else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys groupRange x n [] = if n <=3 then take n [x..] else x:'-':(toEnum (pred n+fromEnum x)):[] showPatternP :: Pattern -> String showPatternP pIn = case pIn of PEmpty -> paren $ "" PGroup _ p -> paren $ showPatternP p POr ps -> paren $ concat $ intersperse "|" (map showPatternP ps) PConcat ps -> paren $ concatMap showPatternP ps PQuest p -> (showPatternP p)++"?" PPlus p -> (showPatternP p)++"+" PStar p -> ( showPatternP p)++"*" PBound i (Just j) p | i==j -> showPatternP p ++ ('{':show i)++"}" PBound i mj p -> showPatternP p ++ ('{':show i) ++ maybe ",}" (\j -> ',':show j++"}") mj -- PCarat _ -> "^" PDollar _ -> "$" PDot _ -> "." PAny _ (PatternSet s scc sce sec) -> let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s charSpec = (if ']' `elem` special then (']':) else id) (byRange normal) scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec in concat ['[':charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"] PAnyNot _ (PatternSet s scc sce sec) -> let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s charSpec = (if ']' `elem` special then (']':) else id) (byRange normal) scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec in concat ["[^",charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"] PEscape _ c -> '\\':c:[] PChar _ c -> [c] where byRange xAll@(x:xs) | length xAll <=3 = xAll | otherwise = groupRange x 1 xs byRange _ = undefined groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys else (if n <=3 then take n [x..] else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys groupRange x n [] = if n <=3 then take n [x..] else x:'-':(toEnum (pred n+fromEnum x)):[] paren s = ('(':s)++")" data PatternSet = PatternSet (Maybe (Set Char)) (Maybe (Set (PatternSetCharacterClass))) (Maybe (Set PatternSetCollatingElement)) (Maybe (Set PatternSetEquivalenceClass)) deriving (Eq,Show) newtype PatternSetCharacterClass = PatternSetCharacterClass {unSCC::String} deriving (Eq,Ord,Show) -- [: :] newtype PatternSetCollatingElement = PatternSetCollatingElement {unSCE::String} deriving (Eq,Ord,Show) -- [. .] newtype PatternSetEquivalenceClass = PatternSetEquivalenceClass {unSEC::String} deriving (Eq,Ord,Show) -- [= =] -- | PatternIndex is for indexing submatches from parenthesized groups (PGroup) type PatternIndex = Int {- -- helper function isPostAtom :: Pattern -> Bool isPostAtom p = case p of PQuest _ -> True PPlus _ -> True PStar _ -> True PBound _ _ _ -> True _ -> False -}