module Text.Regex.DFA.Pattern
(Pattern(..)
,PatternSet(..)
,PatternSetCharacterClass(..),PatternSetCollatingElement(..),PatternSetEquivalenceClass(..)
,PatternIndex
,showPattern
,showPatternP
,DoPa(..)
,newDoPa
) where
import Data.List(intersperse,partition)
import qualified Data.Set as Set(toAscList,toList)
import Data.Set(Set)
data DoPa = DoPa {dopaIndex :: Int} deriving (Eq,Ord)
instance Show DoPa where
show (DoPa {dopaIndex=i}) = show i
newDoPa :: Int -> DoPa
newDoPa i = DoPa i
data Pattern = PEmpty
| PGroup PatternIndex Pattern
| POr [Pattern]
| PConcat [Pattern]
| PQuest Pattern
| PPlus Pattern
| PStar Pattern
| PBound Int (Maybe Int) Pattern
| PCarat DoPa
| PDollar DoPa
| PDot DoPa
| PAny DoPa PatternSet
| PAnyNot DoPa PatternSet
| PEscape DoPa Char
| PChar DoPa Char
deriving (Eq,Show)
showPattern :: Pattern -> String
showPattern pIn =
case pIn of
PEmpty -> "()"
PGroup _ p -> ('(':showPattern p)++")"
POr ps -> concat $ intersperse "|" (map showPattern ps)
PConcat ps -> concatMap showPattern ps
PQuest p -> (showPattern p)++"?"
PPlus p -> (showPattern p)++"+"
PStar p -> (showPattern p)++"*"
PBound i (Just j) p | i==j -> showPattern p ++ ('{':show i)++"}"
PBound i mj p -> showPattern p ++ ('{':show i) ++ maybe ",}" (\j -> ',':show j++"}") mj
PCarat _ -> "^"
PDollar _ -> "$"
PDot _ -> "."
PAny _ (PatternSet s scc sce sec) ->
let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc
sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce
sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec
in concat ['[':charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"]
PAnyNot _ (PatternSet s scc sce sec) ->
let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc
sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce
sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec
in concat ["[^",charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"]
PEscape _ c -> '\\':c:[]
PChar _ c -> [c]
where byRange xAll@(x:xs) | length xAll <=3 = xAll
| otherwise = groupRange x 1 xs
byRange _ = undefined
groupRange x n (y:ys) = if (fromEnum y)(fromEnum x) == n then groupRange x (succ n) ys
else (if n <=3 then take n [x..]
else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys
groupRange x n [] = if n <=3 then take n [x..]
else x:'-':(toEnum (pred n+fromEnum x)):[]
showPatternP :: Pattern -> String
showPatternP pIn =
case pIn of
PEmpty -> paren $ ""
PGroup _ p -> paren $ showPatternP p
POr ps -> paren $ concat $ intersperse "|" (map showPatternP ps)
PConcat ps -> paren $ concatMap showPatternP ps
PQuest p -> (showPatternP p)++"?"
PPlus p -> (showPatternP p)++"+"
PStar p -> ( showPatternP p)++"*"
PBound i (Just j) p | i==j -> showPatternP p ++ ('{':show i)++"}"
PBound i mj p -> showPatternP p ++ ('{':show i) ++ maybe ",}" (\j -> ',':show j++"}") mj
PCarat _ -> "^"
PDollar _ -> "$"
PDot _ -> "."
PAny _ (PatternSet s scc sce sec) ->
let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc
sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce
sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec
in concat ['[':charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"]
PAnyNot _ (PatternSet s scc sce sec) ->
let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc
sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce
sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec
in concat ["[^",charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"]
PEscape _ c -> '\\':c:[]
PChar _ c -> [c]
where byRange xAll@(x:xs) | length xAll <=3 = xAll
| otherwise = groupRange x 1 xs
byRange _ = undefined
groupRange x n (y:ys) = if (fromEnum y)(fromEnum x) == n then groupRange x (succ n) ys
else (if n <=3 then take n [x..]
else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys
groupRange x n [] = if n <=3 then take n [x..]
else x:'-':(toEnum (pred n+fromEnum x)):[]
paren s = ('(':s)++")"
data PatternSet = PatternSet (Maybe (Set Char)) (Maybe (Set (PatternSetCharacterClass)))
(Maybe (Set PatternSetCollatingElement)) (Maybe (Set PatternSetEquivalenceClass)) deriving (Eq,Show)
newtype PatternSetCharacterClass = PatternSetCharacterClass {unSCC::String} deriving (Eq,Ord,Show)
newtype PatternSetCollatingElement = PatternSetCollatingElement {unSCE::String} deriving (Eq,Ord,Show)
newtype PatternSetEquivalenceClass = PatternSetEquivalenceClass {unSEC::String} deriving (Eq,Ord,Show)
type PatternIndex = Int