module Text.Regex.TDFA.CorePattern(Q(..),P(..),WhichTest(..),Wanted(..)
,TestInfo,OP(..),SetTestInfo(..),NullView
,patternToQ,cleanNullView,cannotAccept,mustAccept) where
import Control.Monad.RWS
import Data.Array.IArray(Array,(!),accumArray,listArray)
import Data.List(sort)
import Data.IntMap.EnumMap(EnumMap)
import qualified Data.IntMap.EnumMap as Map(singleton,null,assocs,keysSet)
import Data.Maybe(isNothing)
import Data.IntSet.EnumSet(EnumSet)
import qualified Data.IntSet.EnumSet as Set(singleton,toList,isSubsetOf)
import Text.Regex.TDFA.Common
import Text.Regex.TDFA.Pattern(Pattern(..),starTrans)
err :: String -> a
err = common_error "Text.Regex.TDFA.CorePattern"
debug :: (Show a) => a -> b -> b
debug _ = id
data P = Empty
| Or [Q]
| Seq Q Q
| Star { getOrbit :: Maybe Tag
, resetOrbits :: [Tag]
, firstNull :: Bool
, unStar :: Q}
| Test TestInfo
| OneChar Pattern
| NonEmpty Q
deriving (Show,Eq)
data Q = Q {nullQ :: NullView
,takes :: (Position,Maybe Position)
,preReset :: [Tag]
,preTag,postTag :: Maybe Tag
,tagged :: Bool
,childGroups :: Bool
,wants :: Wanted
,unQ :: P} deriving (Eq)
type TestInfo = (WhichTest,DoPa)
newtype SetTestInfo = SetTestInfo {getTests :: EnumMap WhichTest (EnumSet DoPa)} deriving (Eq)
instance Monoid SetTestInfo where
mempty = SetTestInfo mempty
SetTestInfo x `mappend` SetTestInfo y = SetTestInfo (x `mappend` y)
instance Show SetTestInfo where
show (SetTestInfo sti) = "SetTestInfo "++show (mapSnd (Set.toList) $ Map.assocs sti)
type NullView = [(SetTestInfo,WinTags)]
data HandleTag = NoTag
| Advice Tag
| Apply Tag
deriving (Show)
data Wanted = WantsQNFA | WantsQT | WantsBoth | WantsEither deriving (Eq,Show)
instance Show Q where
show = showQ
showQ :: Q -> String
showQ q = "Q { nullQ = "++show (nullQ q)++
"\n , takes = "++show (takes q)++
"\n , preReset = "++show (preReset q)++
"\n , preTag = "++show (preTag q)++
"\n , postTag = "++show (postTag q)++
"\n , tagged = "++show (tagged q)++
"\n , wants = "++show (wants q)++
"\n , unQ = "++ indent (unQ q)++" }"
where indent = unlines . (\(h:t) -> h : (map (spaces ++) t)) . lines . show
spaces = replicate 10 ' '
notNull :: NullView
notNull = []
emptyNull :: WinTags -> NullView
emptyNull tags = (mempty, tags) : []
testNull :: TestInfo -> WinTags -> NullView
testNull (w,d) tags = (SetTestInfo (Map.singleton w (Set.singleton d)), tags) : []
cleanNullView :: NullView -> NullView
cleanNullView [] = []
cleanNullView (first@(SetTestInfo sti,_):rest) | Map.null sti = first : []
| otherwise =
first : cleanNullView (filter (not . (setTI `Set.isSubsetOf`) . Map.keysSet . getTests . fst) rest)
where setTI = Map.keysSet sti
mergeNullViews :: NullView -> NullView -> NullView
mergeNullViews s1 s2 = cleanNullView $ do
(test1,tag1) <- s1
(test2,tag2) <- s2
return (mappend test1 test2,mappend tag1 tag2)
addTagsToNullView :: WinTags -> NullView -> NullView
addTagsToNullView [] nv = nv
addTagsToNullView tags nv= do
(test,tags') <- nv
return (test,tags `mappend` tags')
addResetsToNullView :: [Tag]-> NullView -> NullView
addResetsToNullView resetTags nv = [ (test, prepend tags) | (test,tags) <- nv ]
where prepend = foldr (\h t -> (h:).t) id . map (\tag->(tag,PreUpdate ResetGroupStopTask)) $ resetTags
seqTake :: (Int, Maybe Int) -> (Int, Maybe Int) -> (Int, Maybe Int)
seqTake (x1,y1) (x2,y2) = (x1+x2,liftM2 (+) y1 y2)
orTakes :: [(Int, Maybe Int)] -> (Int,Maybe Int)
orTakes [] = (0,Just 0)
orTakes ts = let (xs,ys) = unzip ts
in (minimum xs, foldl1 (liftM2 max) ys)
apply :: HandleTag -> Maybe Tag
apply (Apply tag) = Just tag
apply _ = Nothing
toAdvice :: HandleTag -> HandleTag
toAdvice (Apply tag) = Advice tag
toAdvice s = s
noTag :: HandleTag -> Bool
noTag NoTag = True
noTag _ = False
fromHandleTag :: HandleTag -> Tag
fromHandleTag (Apply tag) = tag
fromHandleTag (Advice tag) = tag
fromHandleTag _ = error "fromHandleTag"
winTags :: Maybe Tag -> Maybe Tag -> WinTags
winTags (Just a) (Just b) = [(a,PreUpdate TagTask),(b,PreUpdate TagTask)]
winTags (Just a) Nothing = [(a,PreUpdate TagTask)]
winTags Nothing (Just b) = [(b,PreUpdate TagTask)]
winTags Nothing Nothing = mempty
varies :: Q -> Bool
varies Q {takes = (_,Nothing)} = True
varies Q {takes = (x,Just y)} = x/=y
mustAccept :: Q -> Bool
mustAccept q = (0/=) . fst . takes $ q
canAccept :: Q -> Bool
canAccept q = maybe True (0/=) $ snd . takes $ q
cannotAccept :: Q -> Bool
cannotAccept q = maybe False (0==) $ snd . takes $ q
type PM = RWS (Maybe GroupIndex) [Either Tag GroupInfo] ([OP]->[OP],Tag)
type HHQ = HandleTag
-> HandleTag
-> PM Q
makeGroupArray :: GroupIndex -> [GroupInfo] -> Array GroupIndex [GroupInfo]
makeGroupArray maxGroupIndex groups = accumArray (\earlier later -> later:earlier) [] (1,maxGroupIndex) filler
where filler = map (\gi -> (thisIndex gi,gi)) groups
fromRight :: [Either Tag GroupInfo] -> [GroupInfo]
fromRight [] = []
fromRight ((Right x):xs) = x:fromRight xs
fromRight ((Left _):xs) = fromRight xs
partitionEither :: [Either Tag GroupInfo] -> ([Tag],[GroupInfo])
partitionEither = helper id id where
helper :: ([Tag]->[Tag]) -> ([GroupInfo]->[GroupInfo]) -> [Either Tag GroupInfo] -> ([Tag],[GroupInfo])
helper ls rs [] = (ls [],rs [])
helper ls rs ((Right x):xs) = helper ls (rs.(x:)) xs
helper ls rs ((Left x):xs) = helper (ls.(x:)) rs xs
patternToQ :: CompOption -> (Pattern,(GroupIndex,DoPa)) -> (Q,Array Tag OP,Array GroupIndex [GroupInfo])
patternToQ compOpt (pOrig,(maxGroupIndex,_)) = (tnfa,aTags,aGroups) where
(tnfa,(tag_dlist,nextTag),groups) = runRWS monad startReader startState
aTags = listArray (0,pred nextTag) (tag_dlist [])
aGroups = makeGroupArray maxGroupIndex (fromRight groups)
monad = go (starTrans pOrig) (Advice 0) (Advice 1)
startReader :: Maybe GroupIndex
startReader = Just 0
startState :: ([OP]->[OP],Tag)
startState = ( (Minimize:) . (Maximize:) , 2)
makeOrbit :: PM (Maybe Tag)
makeOrbit = do Apply x <- uniq Orbit
tell [Left x]
return (Just x)
withOrbit :: PM a -> PM (a,[Tag])
withOrbit = listens childStars
where childStars x = let (ts,_) = partitionEither x in ts
getParentIndex :: PM (Maybe GroupIndex)
getParentIndex = ask
makeGroup :: GroupInfo -> PM ()
makeGroup = tell . (:[]) . Right
nonCapture :: PM a -> PM a
nonCapture = local (const Nothing)
withParent :: GroupIndex -> PM a -> PM (a,[Tag])
withParent this = local (const (Just this)) . listens childGroupInfo
where childGroupInfo x =
let (_,gs) = partitionEither x
children :: [GroupIndex]
children = norep . sort . map thisIndex
. filter ((this==).parentIndex) $ gs
in concatMap (map stopTag . (aGroups!)) (this:children)
uniq :: OP -> PM HandleTag
uniq newOp = do (op,s) <- get
let op' = op . (newOp:)
s' = succ s
put $! debug ("\n"++show (s,newOp)++"\n") (op',s')
return (Apply s)
combineConcat :: [Pattern] -> HHQ
combineConcat | rightAssoc compOpt = (\ps -> foldr combineSeq (go (last ps)) (map go $ init ps))
| otherwise = (\ps -> foldl combineSeq (go (head ps)) (map go $ tail ps))
where combineSeq :: HHQ -> HHQ -> HHQ
combineSeq pFront pEnd = (\ m1 m2 -> mdo
let bothVary = varies qFront && varies qEnd
a <- if noTag m1 && bothVary then uniq Minimize else return m1
b <- if noTag m2 && bothVary then uniq Maximize else return m2
mid <- case (noTag a,canAccept qFront,noTag b,canAccept qEnd) of
(False,False,_,_) -> return (toAdvice a)
(_,_,False,False) -> return (toAdvice b)
_ -> if tagged qFront || tagged qEnd then uniq Maximize else return NoTag
qFront <- pFront a mid
qEnd <- pEnd (toAdvice mid) b
let wanted = if WantsEither == wants qEnd then wants qFront else wants qEnd
return $ Q (mergeNullViews (nullQ qFront) (nullQ qEnd))
(seqTake (takes qFront) (takes qEnd))
[] Nothing Nothing
bothVary (childGroups qFront || childGroups qEnd) wanted
(Seq qFront qEnd)
)
go :: Pattern -> HHQ
go pIn m1 m2 =
let die = error $ "patternToQ cannot handle "++show pIn
nil = return $ Q {nullQ=emptyNull (winTags (apply m1) (apply m2))
,takes=(0,Just 0)
,preReset=[],preTag=apply m1,postTag=apply m2
,tagged=False,childGroups=False,wants=WantsEither
,unQ=Empty}
one = return $ Q {nullQ=notNull
,takes=(1,Just 1)
,preReset=[],preTag=apply m1,postTag=apply m2
,tagged=False,childGroups=False,wants=WantsQNFA
,unQ = OneChar pIn}
test myTest = return $ Q {nullQ=testNull myTest (winTags (apply m1) (apply m2))
,takes=(0,Just 0)
,preReset=[],preTag=apply m1,postTag=apply m2
,tagged=False,childGroups=False,wants=WantsQT
,unQ=Test myTest }
in case pIn of
PEmpty -> nil
POr [] -> nil
POr [p] -> go p m1 m2
POr ps -> mdo
let canVary = varies ans || childGroups ans
a <- if noTag m1 && canVary then uniq Minimize else return m1
b <- if noTag m2 && canVary then uniq Maximize else return m2
let aAdvice = toAdvice a
bAdvice = toAdvice b
op' = if canVary then uniq Maximize else return bAdvice
cs <- fmap (++[bAdvice]) $ replicateM (pred $ length ps) op'
qs <- mapM (\(p,c) -> go p aAdvice c) (zip ps cs)
let wqs = map wants qs
wanted = if any (WantsBoth==) wqs then WantsBoth
else case (any (WantsQNFA==) wqs,any (WantsQT==) wqs) of
(True,True) -> WantsBoth
(True,False) -> WantsQNFA
(False,True) -> WantsQT
(False,False) -> WantsEither
nullView = addTagsToNullView (winTags (apply a) (apply b)) . cleanNullView . concatMap nullQ $ qs
let ans = Q nullView
(orTakes . map takes $ qs)
[] (apply a) (apply b)
canVary (any childGroups qs) wanted
(Or qs)
return ans
PConcat [] -> nil
PConcat ps -> combineConcat ps m1 m2
PStar mayFirstBeNull p -> mdo
let accepts = canAccept q
needsOrbit = varies q && childGroups q
needsTags = needsOrbit || accepts
a <- if noTag m1 && needsTags then uniq Minimize else return m1
b <- if noTag m2 && needsTags then uniq Maximize else return m2
c <- if needsOrbit then makeOrbit else return Nothing
(q,resetTags) <- withOrbit (go p NoTag NoTag)
let nullView = emptyNull (winTags (apply a) (apply b))
return $ Q nullView
(0,if accepts then Nothing else (Just 0))
[] (apply a) (apply b)
needsTags (childGroups q) WantsQT
(Star c resetTags mayFirstBeNull q)
PCarat dopa -> test (Test_BOL,dopa)
PDollar dopa -> test (Test_EOL,dopa)
PChar {} -> one
PDot {} -> one
PAny {} -> one
PAnyNot {} -> one
PEscape {} -> one
PGroup Nothing p -> go p m1 m2
PGroup (Just this) p -> do
mParent <- getParentIndex
case mParent of
Nothing -> go p m1 m2
Just parent -> do
a <- if noTag m1 then uniq Minimize else return m1
b <- if isNothing (apply m2) then uniq Maximize else return m2
(q,resetTags) <- withParent this (go p a b)
makeGroup (GroupInfo this parent (fromHandleTag a) (fromHandleTag b))
return $ q { nullQ = addResetsToNullView resetTags (nullQ q)
, tagged = True
, childGroups = True
, preReset = resetTags `mappend` (preReset q) }
PNonCapture p -> nonCapture (go p m1 m2)
PNonEmpty p -> mdo
let needsTags = canAccept q
a <- if noTag m1 && needsTags then uniq Minimize else return m1
b <- if noTag m2 && needsTags then uniq Maximize else return m2
q <- go p (toAdvice a) (toAdvice b)
when (not needsTags) (err $ "PNonEmpty could not accept characters: "++show (p,pOrig))
when (mustAccept q) (err $ "patternToQ : PNonEmpty provided with a *mustAccept* pattern: "++show (p,pOrig))
return $ Q (emptyNull (winTags (apply a) (apply b)))
(0,snd (takes q))
[] (apply a) (apply b)
needsTags (childGroups q) (wants q)
(NonEmpty q)
PPlus {} -> die
PQuest {} -> die
PBound {} -> die