-- | "Text.Regex.TDFA.TDFA" converts the QNFA from TNFA into the DFA. -- A DFA state corresponds to a Set of QNFA states, repesented as list -- of Index which are used to lookup the DFA state in a lazy Trie -- which holds all possible subsets of QNFA states. module Text.Regex.TDFA.TDFA(patternToRegex,DFA(..),DT(..) ,examineDFA,nfaToDFA,dfaMap) where --import Control.Arrow((***)) import Data.Monoid(Monoid(..)) import Control.Monad.State(State,MonadState(..),execState) import Data.Array.IArray(Array,(!),bounds,{-assocs-}) import Data.IntMap(IntMap) import qualified Data.IntMap as IMap(empty,keys,delete,null,lookup,fromDistinctAscList ,member,unionWith,singleton,union ,toAscList,Key,elems,toList,insert ,insertWith,insertWithKey) import Data.IntMap.CharMap2(CharMap(..)) import qualified Data.IntMap.CharMap2 as Map(empty) --import Data.IntSet(IntSet) import qualified Data.IntSet as ISet(empty,singleton,null) import Data.List(foldl') import qualified Data.Map (Map,empty,member,insert,elems) import Data.Sequence as S((|>),{-viewl,ViewL(..)-}) import Text.Regex.TDFA.Common {- all -} import Text.Regex.TDFA.IntArrTrieSet(TrieSet) import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc,fromSinglesMerge) import Text.Regex.TDFA.Pattern(Pattern) --import Text.Regex.TDFA.RunMutState(toInstructions) import Text.Regex.TDFA.TNFA(patternToNFA) --import Debug.Trace {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} err :: String -> a err s = common_error "Text.Regex.TDFA.TDFA" s dlose :: DFA dlose = DFA { d_id = ISet.empty , d_dt = Simple' { dt_win = IMap.empty , dt_trans = Map.empty , dt_other = Transition dlose dlose mempty } } -- dumb smart constructor for tracing construction (I wanted to monitor laziness) {-# INLINE makeDFA #-} makeDFA :: SetIndex -> DT -> DFA makeDFA i dt = DFA i dt -- Note that no CompOption or ExecOption parameter is needed. nfaToDFA :: ((Index,Array Index QNFA),Array Tag OP,Array GroupIndex [GroupInfo]) -> CompOption -> ExecOption -> Regex nfaToDFA ((startIndex,aQNFA),aTagOp,aGroupInfo) co eo = Regex dfa startIndex indexBounds tagBounds trie aTagOp aGroupInfo ifa co eo where dfa = indexesToDFA [startIndex] indexBounds = bounds aQNFA tagBounds = bounds aTagOp ifa = (not (multiline co)) && isDFAFrontAnchored dfa indexesToDFA = {-# SCC "nfaToDFA.indexesToDFA" #-} Trie.lookupAsc trie -- Lookup in cache trie :: TrieSet DFA trie = Trie.fromSinglesMerge dlose mergeDFA (bounds aQNFA) indexToDFA newTransition :: DTrans -> Transition newTransition dtrans = Transition { trans_many = indexesToDFA (IMap.keys dtransWithSpawn) , trans_single = indexesToDFA (IMap.keys dtrans) , trans_how = dtransWithSpawn } where dtransWithSpawn = addSpawn dtrans makeTransition :: DTrans -> Transition makeTransition dtrans | hasSpawn = Transition { trans_many = indexesToDFA (IMap.keys dtrans) , trans_single = indexesToDFA (IMap.keys (IMap.delete startIndex dtrans)) , trans_how = dtrans } | otherwise = Transition { trans_many = indexesToDFA (IMap.keys dtrans) , trans_single = indexesToDFA (IMap.keys dtrans) , trans_how = dtrans } where hasSpawn = maybe False IMap.null (IMap.lookup startIndex dtrans) -- coming from (-1) means spawn a new starting item addSpawn :: DTrans -> DTrans addSpawn dtrans | IMap.member startIndex dtrans = dtrans | otherwise = IMap.insert startIndex mempty dtrans indexToDFA :: Index -> DFA -- used to seed the Trie from the NFA indexToDFA i = {-# SCC "nfaToDFA.indexToDFA" #-} makeDFA (ISet.singleton source) (qtToDT qtIn) where (QNFA {q_id = source,q_qt = qtIn}) = aQNFA!i qtToDT :: QT -> DT qtToDT (Testing {qt_test=wt, qt_dopas=dopas, qt_a=a, qt_b=b}) = Testing' { dt_test = wt , dt_dopas = dopas , dt_a = qtToDT a , dt_b = qtToDT b } qtToDT (Simple {qt_win=w, qt_trans=t, qt_other=o}) = Simple' { dt_win = makeWinner , dt_trans = fmap qtransToDFA t -- , dt_other = if IMap.null o then Just (newTransition $ IMap.singleton startIndex mempty) else Just (qtransToDFA o)} , dt_other = qtransToDFA o} where makeWinner :: IntMap {- Index -} Instructions -- (RunState ()) makeWinner | noWin w = IMap.empty | otherwise = IMap.singleton source (cleanWin w) qtransToDFA :: QTrans -> Transition qtransToDFA qtrans = {-# SCC "nfaToDFA.indexToDFA.qtransToDFA" #-} newTransition dtrans where dtrans :: DTrans dtrans =IMap.fromDistinctAscList . mapSnd (IMap.singleton source) $ best best :: [(Index {- Destination -} ,(DoPa,Instructions))] best = pickQTrans aTagOp $ qtrans -- The DFA states are built up by merging the singleton ones converted from the NFA. -- Thus the "source" indices in the DTrans should not collide. mergeDFA :: DFA -> DFA -> DFA mergeDFA d1 d2 = {-# SCC "nfaToDFA.mergeDFA" #-} makeDFA i dt where i = d_id d1 `mappend` d_id d2 dt = d_dt d1 `mergeDT` d_dt d2 mergeDT,nestDT :: DT -> DT -> DT mergeDT (Simple' w1 t1 o1) (Simple' w2 t2 o2) = Simple' w t o where w = w1 `mappend` w2 t = fuseDTrans -- t1 o1 t2 o2 o = mergeDTrans o1 o2 -- This is very much like mergeQTrans mergeDTrans :: Transition -> Transition -> Transition mergeDTrans (Transition {trans_how=dt1}) (Transition {trans_how=dt2}) = makeTransition dtrans where dtrans = IMap.unionWith IMap.union dt1 dt2 -- This is very much like fuseQTrans fuseDTrans :: CharMap Transition fuseDTrans = CharMap (IMap.fromDistinctAscList (fuse l1 l2)) where l1 = IMap.toAscList (unCharMap t1) l2 = IMap.toAscList (unCharMap t2) fuse :: [(IMap.Key, Transition)] -> [(IMap.Key, Transition)] -> [(IMap.Key, Transition)] fuse [] y = fmap (fmap (mergeDTrans o1)) y fuse x [] = fmap (fmap (mergeDTrans o2)) x fuse x@((xc,xa):xs) y@((yc,ya):ys) = case compare xc yc of LT -> (xc,mergeDTrans o2 xa) : fuse xs y EQ -> (xc,mergeDTrans xa ya) : fuse xs ys GT -> (yc,mergeDTrans o1 ya) : fuse x ys mergeDT dt1@(Testing' wt1 dopas1 a1 b1) dt2@(Testing' wt2 dopas2 a2 b2) = case compare wt1 wt2 of LT -> nestDT dt1 dt2 EQ -> Testing' { dt_test = wt1 , dt_dopas = dopas1 `mappend` dopas2 , dt_a = mergeDT a1 a2 , dt_b = mergeDT b1 b2 } GT -> nestDT dt2 dt1 mergeDT dt1@(Testing' {}) dt2 = nestDT dt1 dt2 mergeDT dt1 dt2@(Testing' {}) = nestDT dt2 dt1 nestDT dt1@(Testing' {dt_a=a,dt_b=b}) dt2 = dt1 { dt_a = mergeDT a dt2, dt_b = mergeDT b dt2 } nestDT _ _ = err "nestDT called on Simple -- cannot happen" patternToRegex :: (Pattern,(GroupIndex, DoPa)) -> CompOption -> ExecOption -> Regex patternToRegex pattern compOpt execOpt = nfaToDFA (patternToNFA compOpt pattern) compOpt execOpt dfaMap :: DFA -> Data.Map.Map SetIndex DFA dfaMap = seen (Data.Map.empty) where seen old d@(DFA {d_id=i,d_dt=dt}) = if i `Data.Map.member` old then old else let new = Data.Map.insert i d old in foldl' seen new (flattenDT dt) -- Get all trans_many states flattenDT :: DT -> [DFA] flattenDT (Simple' {dt_trans=(CharMap mt),dt_other=o}) = concatMap (\d -> [trans_many d {-,trans_single d-}]) . (:) o . IMap.elems $ mt flattenDT (Testing' {dt_a=a,dt_b=b}) = flattenDT a ++ flattenDT b examineDFA :: Regex -> String examineDFA (Regex {regex_dfa=dfa}) = unlines . (:) ("Number of reachable DFA states: "++show (length dfas)) . map show $ dfas where dfas = Data.Map.elems $ dfaMap dfa {- fillMap :: Tag -> IntMap (Position,Bool) fillMap tag = IMap.fromDistinctAscList [(t,(-1,True)) | t <- [0..tag] ] diffMap :: IntMap (Position,Bool) -> IntMap (Position,Bool) -> [(Index,(Position,Bool))] diffMap old new = IMap.toList (IMap.differenceWith (\a b -> if a==b then Nothing else Just b) old new) examineDFA :: (DFA,Index,Array Tag OP,Array GroupIndex [GroupInfo]) -> String examineDFA (dfa,_,aTags,_) = unlines $ map (examineDFA' (snd . bounds $ aTags)) (Map.elems $ dfaMap dfa) examineDFA' :: Tag -> DFA -> String examineDFA' maxTag = showDFA (fillMap maxTag) {- instance Show DFA where show (DFA {d_id=i,d_dt=dt}) = "DFA {d_id = "++show (ISet.toList i) ++"\n ,d_dt = "++ show dt ++"\n}" -} -- instance Show DT where show = showDT showDFA :: IntMap (Position,Bool) -> DFA -> String showDFA m (DFA {d_id=i,d_dt=dt}) = "DFA {d_id = "++show (ISet.toList i) ++"\n ,d_dt = "++ showDT m dt ++"\n}" -} -- pick QTrans can be told the unique source and knows all the -- destinations (hmm...along with qt_win)! So if in ascending destination order the last source -- is free to mutatate the old state. If the QTrans has only one -- entry then all we need to do is mutate that entry when making a -- transition. -- pickQTrans :: Array Tag OP -> QTrans -> [({-Destination-}Index,(DoPa,Instructions))] pickQTrans op tr = mapSnd (bestTrans op) . IMap.toList $ tr cleanWin :: WinTags -> Instructions cleanWin = toInstructions bestTrans :: Array Tag OP -> [TagCommand] -> (DoPa,Instructions) bestTrans _ [] = err "bestTrans : There were no transition choose from!" bestTrans aTagOP (f:fs) | null fs = canonical f | otherwise = answer -- if null toDisplay then answer else trace toDisplay answer where answer = foldl' pick (canonical f) fs {- toDisplay | null fs = "" | otherwise = unlines $ "bestTrans" : show (answer) : "from among" : concatMap (\x -> [show x, show (toInstructions (snd x))]) (f:fs) -} canonical :: TagCommand -> (DoPa,Instructions) canonical (dopa,spec) = (dopa, toInstructions spec) pick :: (DoPa,Instructions) -> TagCommand -> (DoPa,Instructions) pick win@(dopa1,winI) (dopa2,spec) = let nextI = toInstructions spec -- in case compareWith choose winPos nextPos of -- XXX 2009: add in enterOrbit information in case compareWith choose (toListing winI) (toListing nextI) of GT -> win LT -> (dopa2,nextI) EQ -> if dopa1 >= dopa2 then win else (dopa2,nextI) -- no deep reason not to just pick win toListing :: Instructions -> [(Tag,Action)] toListing (Instructions {newPos = nextPos}) = filter notReset nextPos where notReset (_,SetVal (-1)) = False notReset _ = True {- toListing (Instructions {newPos = nextPos}) = mergeTagOrbit nextPos (filter snd nextFlags) mergeTagOrbit xx [] = xx mergeTagOrbit [] yy = yy mergeTagOrbit xx@(x:xs) yy@(y:ys) = case compare (fst x) (fst y) of GT -> y : mergeTagOrbit xx ys LT -> x : mergeTagOrbit xs yy EQ -> x : mergeTagOrbit xs ys -- keep tag setting over orbit setting. -} {-# INLINE choose #-} choose :: Maybe (Tag,Action) -> Maybe (Tag,Action) -> Ordering choose Nothing Nothing = EQ choose Nothing x = flipOrder (choose x Nothing) choose (Just (tag,_post)) Nothing = case aTagOP!tag of Maximize -> GT Minimize -> LT -- needed to choose best path inside nested * operators, -- this needs a leading Minimize tag inside at least the parent * operator Ignore -> GT -- XXX this is a guess in analogy with Maximize for the end bit of a group Orbit -> LT -- trace ("choose LT! Just "++show tag++" < Nothing") LT -- 2009 XXX : comment out next line and use the Orbit instead -- Orbit -> err $ "bestTrans.choose : Very Unexpeted Orbit in Just Nothing: "++show (tag,post,aTagOP,f:fs) choose (Just (tag,post1)) (Just (_,post2)) = case aTagOP!tag of Maximize -> order Minimize -> flipOrder order Ignore -> EQ Orbit -> EQ -- Orbit -> err $ "bestTrans.choose : Very Unexpeted Orbit in Just Just: "++show (tag,(post1,post2),aTagOP,f:fs) where order = case (post1,post2) of (SetPre,SetPre) -> EQ (SetPost,SetPost) -> EQ (SetPre,SetPost) -> LT (SetPost,SetPre) -> GT (SetVal v1,SetVal v2) -> compare v1 v2 _ -> err $ "bestTrans.compareWith.choose sees incomparable "++show (tag,post1,post2) {-# INLINE compareWith #-} compareWith :: (Ord x,Monoid a) => (Maybe (x,b) -> Maybe (x,c) -> a) -> [(x,b)] -> [(x,c)] -> a compareWith comp = cw where cw [] [] = comp Nothing Nothing cw xx@(x:xs) yy@(y:ys) = case compare (fst x) (fst y) of GT -> comp Nothing (Just y) `mappend` cw xx ys EQ -> comp (Just x) (Just y) `mappend` cw xs ys LT -> comp (Just x) Nothing `mappend` cw xs yy cw xx [] = foldr (\x rest -> comp (Just x) Nothing `mappend` rest) mempty xx cw [] yy = foldr (\y rest -> comp Nothing (Just y) `mappend` rest) mempty yy isDFAFrontAnchored :: DFA -> Bool isDFAFrontAnchored = isDTFrontAnchored . d_dt where isDTFrontAnchored :: DT -> Bool isDTFrontAnchored (Simple' {}) = False isDTFrontAnchored (Testing' {dt_test=wt,dt_a=a,dt_b=b}) | wt == Test_BOL = isDTLosing b | otherwise = isDTFrontAnchored a && isDTFrontAnchored b where -- can DT never win or accept a character (when following trans_single)? isDTLosing :: DT -> Bool isDTLosing (Testing' {dt_a=a',dt_b=b'}) = isDTLosing a' && isDTLosing b' isDTLosing (Simple' {dt_win=w}) | not (IMap.null w) = False -- can win with 0 characters isDTLosing (Simple' {dt_trans=CharMap mt,dt_other=o}) = let ts = o : IMap.elems mt in all transLoses ts where transLoses :: Transition -> Bool transLoses (Transition {trans_single=dfa,trans_how=dtrans}) = isDTLose dfa || onlySpawns dtrans where isDTLose :: DFA -> Bool isDTLose dfa' = ISet.null (d_id dfa') onlySpawns :: DTrans -> Bool onlySpawns t = case IMap.elems t of [m] -> IMap.null m _ -> False {- toInstructions -} toInstructions :: TagList -> Instructions toInstructions spec = let (p,o) = execState (assemble spec) (mempty,mempty) in Instructions { newPos = IMap.toList p , newOrbits = if IMap.null o then Nothing else Just $ alterOrbits (IMap.toList o) } type CompileInstructions a = State ( IntMap Action -- 2009: change to SetPre | SetPost enum , IntMap AlterOrbit ) a data AlterOrbit = AlterReset -- removing the Orbits record from the OrbitLog | AlterLeave -- set inOrbit to False | AlterModify { newInOrbit :: Bool -- set inOrbit to the newInOrbit value , freshOrbit :: Bool} -- freshOrbit of True means to set getOrbits to mempty deriving (Show) -- freshOrbit of False means try appending position or else Seq.empty assemble :: TagList -> CompileInstructions () assemble = mapM_ oneInstruction where oneInstruction (tag,command) = case command of PreUpdate TagTask -> setPreTag tag PreUpdate ResetGroupStopTask -> resetGroupTag tag PreUpdate SetGroupStopTask -> setGroupTag tag PreUpdate ResetOrbitTask -> resetOrbit tag PreUpdate EnterOrbitTask -> enterOrbit tag PreUpdate LeaveOrbitTask -> leaveOrbit tag PostUpdate TagTask -> setPostTag tag PostUpdate ResetGroupStopTask -> resetGroupTag tag PostUpdate SetGroupStopTask -> setGroupTag tag _ -> err ("assemble : Weird orbit command: "++show (tag,command)) setPreTag :: Tag -> CompileInstructions () setPreTag = modifyPos SetPre setPostTag :: Tag -> CompileInstructions () setPostTag = modifyPos SetPost resetGroupTag :: Tag -> CompileInstructions () resetGroupTag = modifyPos (SetVal (-1)) setGroupTag :: Tag -> CompileInstructions () setGroupTag = modifyPos (SetVal 0) -- The following is ten times more complicated than it ought to be. Sorry, I was too new, and now -- too busy to clean this up. resetOrbit :: Tag -> CompileInstructions () resetOrbit tag = modifyPos (SetVal (-1)) tag >> modifyOrbit (IMap.insert tag AlterReset) enterOrbit :: Tag -> CompileInstructions () enterOrbit tag = modifyPos (SetVal 0) tag >> modifyOrbit changeOrbit where changeOrbit = IMap.insertWith overwriteOrbit tag appendNewOrbit appendNewOrbit = AlterModify {newInOrbit = True, freshOrbit = False} -- try to append startNewOrbit = AlterModify {newInOrbit = True, freshOrbit = True} -- will start a new series overwriteOrbit _ AlterReset = startNewOrbit overwriteOrbit _ AlterLeave = startNewOrbit overwriteOrbit _ (AlterModify {newInOrbit = False}) = startNewOrbit overwriteOrbit _ (AlterModify {newInOrbit = True}) = err $ "enterOrbit: Cannot enterOrbit twice in a row: " ++ show tag leaveOrbit :: Tag -> CompileInstructions () leaveOrbit tag = modifyOrbit escapeOrbit where escapeOrbit = IMap.insertWith setInOrbitFalse tag AlterLeave where setInOrbitFalse _ x@(AlterModify {}) = x {newInOrbit = False} setInOrbitFalse _ x = x modifyPos :: Action -> Tag -> CompileInstructions () modifyPos todo tag = do (a,c) <- get let a' = IMap.insert tag todo a seq a' $ put (a',c) modifyOrbit :: (IntMap AlterOrbit -> IntMap AlterOrbit) -> CompileInstructions () modifyOrbit f = do (a,c) <- get let c' = f c seq c' $ put (a,c') ---- alterOrbits :: [(Tag,AlterOrbit)] -> (Position -> OrbitTransformer) alterOrbits x = let items = map alterOrbit x in (\ pos m -> foldl (flip ($)) m (map ($ pos) items)) alterOrbit :: (Tag,AlterOrbit) -> (Position -> OrbitTransformer) alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = True}) = (\ pos m -> IMap.insert tag (Orbits { inOrbit = inOrbit' , basePos = pos , ordinal = Nothing , getOrbits = mempty}) m) alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = False}) = (\ pos m -> IMap.insertWithKey (updateOrbit pos) tag (newOrbit pos) m) where newOrbit pos = Orbits { inOrbit = inOrbit' , basePos = pos , ordinal = Nothing , getOrbits = mempty} updateOrbit pos _tag new old | inOrbit old = old { inOrbit = inOrbit' , getOrbits = getOrbits old |> pos } | otherwise = new alterOrbit (tag,AlterReset) = (\ _ m -> IMap.delete tag m) alterOrbit (tag,AlterLeave) = (\ _ m -> case IMap.lookup tag m of Nothing -> m Just x -> IMap.insert tag (x {inOrbit=False}) m)