module Text.Regex.TDFA.Pattern
(Pattern(..)
,PatternSet(..)
,PatternSetCharacterClass(..)
,PatternSetCollatingElement(..)
,PatternSetEquivalenceClass(..)
,GroupIndex
,DoPa(..)
,showPattern
,starTrans
,starTrans',simplify',dfsPattern
) where
import Data.List(intersperse,partition)
import qualified Data.Set as Set(toAscList,toList)
import Data.Set(Set)
import Text.Regex.TDFA.Common(DoPa(..),GroupIndex,common_error)
err :: String -> a
err = common_error "Text.Regex.TDFA.Pattern"
data Pattern = PEmpty
| PGroup (Maybe GroupIndex) Pattern
| POr [Pattern]
| PConcat [Pattern]
| PQuest Pattern
| PPlus Pattern
| PStar Bool Pattern
| PBound Int (Maybe Int) Pattern
| PCarat {getDoPa::DoPa}
| PDollar {getDoPa::DoPa}
| PDot {getDoPa::DoPa}
| PAny {getDoPa::DoPa,getPatternSet::PatternSet}
| PAnyNot {getDoPa::DoPa,getPatternSet::PatternSet}
| PEscape {getDoPa::DoPa,getPatternChar::Char}
| PChar {getDoPa::DoPa,getPatternChar::Char}
| PNonCapture Pattern
| PNonEmpty Pattern
deriving (Eq,Show)
showPattern :: Pattern -> String
showPattern pIn =
case pIn of
PEmpty -> "()"
PGroup _ p -> paren (showPattern p)
POr ps -> concat $ intersperse "|" (map showPattern ps)
PConcat ps -> concatMap showPattern ps
PQuest p -> (showPattern p)++"?"
PPlus p -> (showPattern p)++"+"
PStar _ p -> (showPattern p)++"*"
PBound i (Just j) p | i==j -> showPattern p ++ ('{':show i)++"}"
PBound i mj p -> showPattern p ++ ('{':show i) ++ maybe ",}" (\j -> ',':show j++"}") mj
PCarat _ -> "^"
PDollar _ -> "$"
PDot _ -> "."
PAny _ ps -> ('[':show ps)++"]"
PAnyNot _ ps -> ('[':'^':show ps)++"]"
PEscape _ c -> '\\':c:[]
PChar _ c -> [c]
PNonCapture p -> showPattern p
PNonEmpty p -> showPattern p
where groupRange x n (y:ys) = if (fromEnum y)(fromEnum x) == n then groupRange x (succ n) ys
else (if n <=3 then take n [x..]
else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys
groupRange x n [] = if n <=3 then take n [x..]
else x:'-':(toEnum (pred n+fromEnum x)):[]
paren s = ('(':s)++")"
data PatternSet = PatternSet (Maybe (Set Char))
(Maybe (Set PatternSetCharacterClass))
(Maybe (Set PatternSetCollatingElement))
(Maybe (Set PatternSetEquivalenceClass))
deriving (Eq)
instance Show PatternSet where
showsPrec i (PatternSet s scc sce sec) =
let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
scc' = maybe "" ((concatMap show) . Set.toList) scc
sce' = maybe "" ((concatMap show) . Set.toList) sce
sec' = maybe "" ((concatMap show) . Set.toList) sec
in shows charSpec
. showsPrec i scc' . showsPrec i sce' . showsPrec i sec'
. if '-' `elem` special then showChar '-' else id
where byRange xAll@(x:xs) | length xAll <=3 = xAll
| otherwise = groupRange x 1 xs
byRange _ = undefined
groupRange x n (y:ys) = if (fromEnum y)(fromEnum x) == n then groupRange x (succ n) ys
else (if n <=3 then take n [x..]
else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys
groupRange x n [] = if n <=3 then take n [x..]
else x:'-':(toEnum (pred n+fromEnum x)):[]
newtype PatternSetCharacterClass = PatternSetCharacterClass {unSCC::String}
deriving (Eq,Ord)
newtype PatternSetCollatingElement = PatternSetCollatingElement {unSCE::String}
deriving (Eq,Ord)
newtype PatternSetEquivalenceClass = PatternSetEquivalenceClass {unSEC::String}
deriving (Eq,Ord)
instance Show PatternSetCharacterClass where
showsPrec _ p = showChar '[' . showChar ':' . shows (unSCC p) . showChar ':' . showChar ']'
instance Show PatternSetCollatingElement where
showsPrec _ p = showChar '[' . showChar '.' . shows (unSCE p) . showChar '.' . showChar ']'
instance Show PatternSetEquivalenceClass where
showsPrec _ p = showChar '[' . showChar '=' . shows (unSEC p) . showChar '=' . showChar ']'
starTrans :: Pattern -> Pattern
starTrans = dfsPattern (simplify' . starTrans')
dfsPattern :: (Pattern -> Pattern)
-> Pattern
-> Pattern
dfsPattern f = dfs
where unary c = f . c . dfs
dfs pattern = case pattern of
POr ps -> f (POr (map dfs ps))
PConcat ps -> f (PConcat (map dfs ps))
PGroup i p -> unary (PGroup i) p
PQuest p -> unary PQuest p
PPlus p -> unary PPlus p
PStar i p -> unary (PStar i) p
PBound i mi p -> unary (PBound i mi) p
_ -> f pattern
starTrans' :: Pattern -> Pattern
starTrans' pIn =
case pIn of
PQuest p -> POr [p,PEmpty]
PPlus p -> PConcat [p,simplify' $ PStar False p]
PBound i _ _ | i<0 -> PEmpty
PBound i (Just j) _ | i>j -> PEmpty
PBound _ (Just 0) _ -> PEmpty
PBound 0 Nothing p -> PStar True p
PBound 0 (Just 1) p -> POr [p,PEmpty]
PBound i Nothing p -> asGroup . PConcat $ apply (p':) (pred i) [p,simplify' $ PStar False p]
where p' = nonCapture' p
PBound 0 (Just j) p | cannotMatchNull p -> apply (quest' . (concat' p)) (pred j) (quest' p)
| canOnlyMatchNull p -> quest' p
| otherwise -> POr [ simplify' (PConcat (p : replicate (pred j) (nonEmpty' p))) , PEmpty ]
PBound i (Just j) p | i == j -> asGroup . PConcat $ apply (p':) (pred i) [p]
| cannotMatchNull p -> asGroup . PConcat $ apply (p':) (pred i) $ (p:) $
[apply (quest' . (concat' p)) (pred (ji)) (quest' p)]
| canOnlyMatchNull p -> p
| otherwise -> asGroup . PConcat $ (replicate (pred i) p') ++ p : (replicate (ji) (nonEmpty' p))
where p' = nonCapture' p
PEmpty -> pass
PGroup {} -> pass
PStar {} -> pass
POr {} -> pass
PConcat {} -> pass
PCarat {} -> pass
PDollar {} -> pass
PDot {} -> pass
PAny {} -> pass
PAnyNot {} -> pass
PEscape {} -> pass
PChar {} -> pass
PNonCapture {} -> pass
PNonEmpty {} -> pass
where
quest' = (\p -> simplify' $ POr [p,PEmpty])
concat' a b = simplify' $ PConcat [a,b]
nonEmpty' = PNonEmpty
nonCapture' = PNonCapture
apply f n x = foldr ($) x (replicate n f)
asGroup p = PGroup Nothing (simplify' p)
pass = pIn
simplify' :: Pattern -> Pattern
simplify' x@(POr _) =
let ps' = case span notPEmpty (flatten x) of
(notEmpty,[]) -> notEmpty
(notEmpty,_:rest) -> notEmpty ++ (PEmpty:filter notPEmpty rest)
in case ps' of
[] -> PEmpty
[p] -> p
_ -> POr ps'
simplify' x@(PConcat _) =
let ps' = filter notPEmpty (flatten x)
in case ps' of
[] -> PEmpty
[p] -> p
_ -> PConcat ps'
simplify' (PStar _ PEmpty) = PEmpty
simplify' other = other
flatten :: Pattern -> [Pattern]
flatten (POr ps) = (concatMap (\x -> case x of
POr ps' -> ps'
p -> [p]) ps)
flatten (PConcat ps) = (concatMap (\x -> case x of
PConcat ps' -> ps'
p -> [p]) ps)
flatten _ = err "flatten can only be applied to POr or PConcat"
notPEmpty :: Pattern -> Bool
notPEmpty PEmpty = False
notPEmpty _ = True
cannotMatchNull :: Pattern -> Bool
cannotMatchNull pIn =
case pIn of
PEmpty -> False
PGroup _ p -> cannotMatchNull p
POr [] -> False
POr ps -> all cannotMatchNull ps
PConcat [] -> False
PConcat ps -> any cannotMatchNull ps
PQuest _ -> False
PPlus p -> cannotMatchNull p
PStar {} -> False
PBound 0 _ _ -> False
PBound _ _ p -> cannotMatchNull p
PCarat _ -> False
PDollar _ -> False
PNonCapture p -> cannotMatchNull p
PNonEmpty _ -> False
_ -> True
canOnlyMatchNull :: Pattern -> Bool
canOnlyMatchNull pIn =
case pIn of
PEmpty -> True
PGroup _ p -> canOnlyMatchNull p
POr [] -> True
POr ps -> all canOnlyMatchNull ps
PConcat [] -> True
PConcat ps -> all canOnlyMatchNull ps
PQuest p -> canOnlyMatchNull p
PPlus p -> canOnlyMatchNull p
PStar _ p -> canOnlyMatchNull p
PBound _ (Just 0) _ -> True
PBound _ _ p -> canOnlyMatchNull p
PCarat _ -> True
PDollar _ -> True
PNonCapture p -> canOnlyMatchNull p
PNonEmpty p -> canOnlyMatchNull p
_ ->False