module Text.Regex.TDFA.TDFA(patternToRegex,DFA(..),DT(..)
,examineDFA,nfaToDFA,dfaMap) where
import Control.Monad.Instances()
import Control.Monad.RWS
import Control.Monad.State(State,MonadState(..),execState)
import Data.Array.IArray(Array,(!),bounds,)
import Data.IntMap(IntMap)
import qualified Data.IntMap as IMap
import Data.IntMap.CharMap2(CharMap(..))
import qualified Data.IntMap.CharMap2 as Map(empty)
import qualified Data.IntSet as ISet
import Data.List(foldl')
import qualified Data.Map (Map,empty,member,insert,elems)
import Data.Maybe(isJust)
import Data.Sequence as S((|>),)
import Text.Regex.TDFA.Common
import Text.Regex.TDFA.IntArrTrieSet(TrieSet)
import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc,fromSinglesMerge)
import Text.Regex.TDFA.Pattern(Pattern)
import Text.Regex.TDFA.TNFA(patternToNFA)
err :: String -> a
err s = common_error "Text.Regex.TDFA.TDFA" s
dlose :: DFA
dlose = DFA { d_id = ISet.empty
, d_dt = Simple' { dt_win = IMap.empty
, dt_trans = Map.empty
, dt_other = Transition dlose dlose mempty } }
makeDFA :: SetIndex -> DT -> DFA
makeDFA i dt = DFA i dt
nfaToDFA :: ((Index,Array Index QNFA),Array Tag OP,Array GroupIndex [GroupInfo])
-> CompOption -> ExecOption
-> Regex
nfaToDFA ((startIndex,aQNFA),aTagOp,aGroupInfo) co eo = Regex dfa startIndex indexBounds tagBounds trie aTagOp aGroupInfo ifa co eo where
dfa = indexesToDFA [startIndex]
indexBounds = bounds aQNFA
tagBounds = bounds aTagOp
ifa = (not (multiline co)) && isDFAFrontAnchored dfa
indexesToDFA = Trie.lookupAsc trie
trie :: TrieSet DFA
trie = Trie.fromSinglesMerge dlose mergeDFA (bounds aQNFA) indexToDFA
newTransition :: DTrans -> Transition
newTransition dtrans = Transition { trans_many = indexesToDFA (IMap.keys dtransWithSpawn)
, trans_single = indexesToDFA (IMap.keys dtrans)
, trans_how = dtransWithSpawn }
where dtransWithSpawn = addSpawn dtrans
makeTransition :: DTrans -> Transition
makeTransition dtrans | hasSpawn = Transition { trans_many = indexesToDFA (IMap.keys dtrans)
, trans_single = indexesToDFA (IMap.keys (IMap.delete startIndex dtrans))
, trans_how = dtrans }
| otherwise = Transition { trans_many = indexesToDFA (IMap.keys dtrans)
, trans_single = indexesToDFA (IMap.keys dtrans)
, trans_how = dtrans }
where hasSpawn = maybe False IMap.null (IMap.lookup startIndex dtrans)
addSpawn :: DTrans -> DTrans
addSpawn dtrans | IMap.member startIndex dtrans = dtrans
| otherwise = IMap.insert startIndex mempty dtrans
indexToDFA :: Index -> DFA
indexToDFA i = makeDFA (ISet.singleton source) (qtToDT qtIn)
where
(QNFA {q_id = source,q_qt = qtIn}) = aQNFA!i
qtToDT :: QT -> DT
qtToDT (Testing {qt_test=wt, qt_dopas=dopas, qt_a=a, qt_b=b}) =
Testing' { dt_test = wt
, dt_dopas = dopas
, dt_a = qtToDT a
, dt_b = qtToDT b }
qtToDT (Simple {qt_win=w, qt_trans=t, qt_other=o}) =
Simple' { dt_win = makeWinner
, dt_trans = fmap qtransToDFA t
, dt_other = qtransToDFA o}
where
makeWinner :: IntMap Instructions
makeWinner | noWin w = IMap.empty
| otherwise = IMap.singleton source (cleanWin w)
qtransToDFA :: QTrans -> Transition
qtransToDFA qtrans =
newTransition dtrans
where
dtrans :: DTrans
dtrans =IMap.fromDistinctAscList . mapSnd (IMap.singleton source) $ best
best :: [(Index ,(DoPa,Instructions))]
best = pickQTrans aTagOp $ qtrans
mergeDFA :: DFA -> DFA -> DFA
mergeDFA d1 d2 = makeDFA i dt
where
i = d_id d1 `mappend` d_id d2
dt = d_dt d1 `mergeDT` d_dt d2
mergeDT,nestDT :: DT -> DT -> DT
mergeDT (Simple' w1 t1 o1) (Simple' w2 t2 o2) = Simple' w t o
where
w = w1 `mappend` w2
t = fuseDTrans
o = mergeDTrans o1 o2
mergeDTrans :: Transition -> Transition -> Transition
mergeDTrans (Transition {trans_how=dt1}) (Transition {trans_how=dt2}) = makeTransition dtrans
where dtrans = IMap.unionWith IMap.union dt1 dt2
fuseDTrans :: CharMap Transition
fuseDTrans = CharMap (IMap.fromDistinctAscList (fuse l1 l2))
where
l1 = IMap.toAscList (unCharMap t1)
l2 = IMap.toAscList (unCharMap t2)
fuse :: [(IMap.Key, Transition)]
-> [(IMap.Key, Transition)]
-> [(IMap.Key, Transition)]
fuse [] y = fmap (fmap (mergeDTrans o1)) y
fuse x [] = fmap (fmap (mergeDTrans o2)) x
fuse x@((xc,xa):xs) y@((yc,ya):ys) =
case compare xc yc of
LT -> (xc,mergeDTrans o2 xa) : fuse xs y
EQ -> (xc,mergeDTrans xa ya) : fuse xs ys
GT -> (yc,mergeDTrans o1 ya) : fuse x ys
mergeDT dt1@(Testing' wt1 dopas1 a1 b1) dt2@(Testing' wt2 dopas2 a2 b2) =
case compare wt1 wt2 of
LT -> nestDT dt1 dt2
EQ -> Testing' { dt_test = wt1
, dt_dopas = dopas1 `mappend` dopas2
, dt_a = mergeDT a1 a2
, dt_b = mergeDT b1 b2 }
GT -> nestDT dt2 dt1
mergeDT dt1@(Testing' {}) dt2 = nestDT dt1 dt2
mergeDT dt1 dt2@(Testing' {}) = nestDT dt2 dt1
nestDT dt1@(Testing' {dt_a=a,dt_b=b}) dt2 = dt1 { dt_a = mergeDT a dt2, dt_b = mergeDT b dt2 }
nestDT _ _ = err "nestDT called on Simple -- cannot happen"
patternToRegex :: (Pattern,(GroupIndex, DoPa)) -> CompOption -> ExecOption -> Regex
patternToRegex pattern compOpt execOpt = nfaToDFA (patternToNFA compOpt pattern) compOpt execOpt
dfaMap :: DFA -> Data.Map.Map SetIndex DFA
dfaMap = seen (Data.Map.empty) where
seen old d@(DFA {d_id=i,d_dt=dt}) =
if i `Data.Map.member` old
then old
else let new = Data.Map.insert i d old
in foldl' seen new (flattenDT dt)
flattenDT :: DT -> [DFA]
flattenDT (Simple' {dt_trans=(CharMap mt),dt_other=o}) = concatMap (\d -> [trans_many d ]) . (:) o . IMap.elems $ mt
flattenDT (Testing' {dt_a=a,dt_b=b}) = flattenDT a ++ flattenDT b
examineDFA :: Regex -> String
examineDFA (Regex {regex_dfa=dfa}) = unlines . (:) ("Number of reachable DFA states: "++show (length dfas)) . map show $ dfas
where dfas = Data.Map.elems $ dfaMap dfa
pickQTrans :: Array Tag OP -> QTrans -> [(Index,(DoPa,Instructions))]
pickQTrans op tr = mapSnd (bestTrans op) . IMap.toList $ tr
cleanWin :: WinTags -> Instructions
cleanWin = toInstructions
bestTrans :: Array Tag OP -> [TagCommand] -> (DoPa,Instructions)
bestTrans _ [] = err "bestTrans : There were no transition choose from!"
bestTrans aTagOP (f:fs) | null fs = canonical f
| otherwise = answer
where
answer = foldl' pick (canonical f) fs
canonical :: TagCommand -> (DoPa,Instructions)
canonical (dopa,spec) = (dopa, toInstructions spec)
pick :: (DoPa,Instructions) -> TagCommand -> (DoPa,Instructions)
pick win@(dopa1,winI) (dopa2,spec) =
let nextI = toInstructions spec
in case compareWith choose (toListing winI) (toListing nextI) of
GT -> win
LT -> (dopa2,nextI)
EQ -> if dopa1 >= dopa2 then win else (dopa2,nextI)
toListing :: Instructions -> [(Tag,Action)]
toListing (Instructions {newPos = nextPos}) = filter notReset nextPos
where notReset (_,SetVal (1)) = False
notReset _ = True
choose :: Maybe (Tag,Action) -> Maybe (Tag,Action) -> Ordering
choose Nothing Nothing = EQ
choose Nothing x = flipOrder (choose x Nothing)
choose (Just (tag,_post)) Nothing =
case aTagOP!tag of
Maximize -> GT
Minimize -> LT
Ignore -> GT
Orbit -> LT
choose (Just (tag,post1)) (Just (_,post2)) =
case aTagOP!tag of
Maximize -> order
Minimize -> flipOrder order
Ignore -> EQ
Orbit -> EQ
where order = case (post1,post2) of
(SetPre,SetPre) -> EQ
(SetPost,SetPost) -> EQ
(SetPre,SetPost) -> LT
(SetPost,SetPre) -> GT
(SetVal v1,SetVal v2) -> compare v1 v2
_ -> err $ "bestTrans.compareWith.choose sees incomparable "++show (tag,post1,post2)
compareWith :: (Ord x,Monoid a) => (Maybe (x,b) -> Maybe (x,c) -> a) -> [(x,b)] -> [(x,c)] -> a
compareWith comp = cw where
cw [] [] = comp Nothing Nothing
cw xx@(x:xs) yy@(y:ys) =
case compare (fst x) (fst y) of
GT -> comp Nothing (Just y) `mappend` cw xx ys
EQ -> comp (Just x) (Just y) `mappend` cw xs ys
LT -> comp (Just x) Nothing `mappend` cw xs yy
cw xx [] = foldr (\x rest -> comp (Just x) Nothing `mappend` rest) mempty xx
cw [] yy = foldr (\y rest -> comp Nothing (Just y) `mappend` rest) mempty yy
isDFAFrontAnchored :: DFA -> Bool
isDFAFrontAnchored = isDTFrontAnchored . d_dt
where
isDTFrontAnchored :: DT -> Bool
isDTFrontAnchored (Simple' {}) = False
isDTFrontAnchored (Testing' {dt_test=wt,dt_a=a,dt_b=b}) | wt == Test_BOL = isDTLosing b
| otherwise = isDTFrontAnchored a && isDTFrontAnchored b
where
isDTLosing :: DT -> Bool
isDTLosing (Testing' {dt_a=a,dt_b=b}) = isDTLosing a && isDTLosing b
isDTLosing (Simple' {dt_win=w}) | not (IMap.null w) = False
isDTLosing (Simple' {dt_trans=CharMap mt,dt_other=o}) =
let ts = o : IMap.elems mt
in all transLoses ts
where
transLoses :: Transition -> Bool
transLoses (Transition {trans_single=dfa,trans_how=dtrans}) = isDTLose dfa || onlySpawns dtrans
where
isDTLose :: DFA -> Bool
isDTLose dfa = ISet.null (d_id dfa)
onlySpawns :: DTrans -> Bool
onlySpawns t = case IMap.elems t of
[m] -> IMap.null m
_ -> False
toInstructions :: TagList -> Instructions
toInstructions spec =
let (p,o) = execState (assemble spec) (mempty,mempty)
in Instructions { newPos = IMap.toList p
, newOrbits = if IMap.null o then Nothing
else Just $ alterOrbits (IMap.toList o)
}
type CompileInstructions a = State
( IntMap Action
, IntMap AlterOrbit
) a
data AlterOrbit = AlterReset
| AlterLeave
| AlterModify { newInOrbit :: Bool
, freshOrbit :: Bool}
deriving (Show)
assemble :: TagList -> CompileInstructions ()
assemble = mapM_ oneInstruction where
oneInstruction (tag,command) =
case command of
PreUpdate TagTask -> setPreTag tag
PreUpdate ResetGroupStopTask -> resetGroupTag tag
PreUpdate SetGroupStopTask -> setGroupTag tag
PreUpdate ResetOrbitTask -> resetOrbit tag
PreUpdate EnterOrbitTask -> enterOrbit tag
PreUpdate LeaveOrbitTask -> leaveOrbit tag
PostUpdate TagTask -> setPostTag tag
PostUpdate ResetGroupStopTask -> resetGroupTag tag
PostUpdate SetGroupStopTask -> setGroupTag tag
_ -> err ("assemble : Weird orbit command: "++show (tag,command))
setPreTag :: Tag -> CompileInstructions ()
setPreTag = modifyPos SetPre
setPostTag :: Tag -> CompileInstructions ()
setPostTag = modifyPos SetPost
resetGroupTag :: Tag -> CompileInstructions ()
resetGroupTag = modifyPos (SetVal (1))
setGroupTag :: Tag -> CompileInstructions ()
setGroupTag = modifyPos (SetVal 0)
resetOrbit :: Tag -> CompileInstructions ()
resetOrbit tag = modifyPos (SetVal (1)) tag >> modifyOrbit (IMap.insert tag AlterReset)
enterOrbit :: Tag -> CompileInstructions ()
enterOrbit tag = modifyPos (SetVal 0) tag >> modifyOrbit changeOrbit where
changeOrbit = IMap.insertWith overwriteOrbit tag appendNewOrbit
appendNewOrbit = AlterModify {newInOrbit = True, freshOrbit = False}
startNewOrbit = AlterModify {newInOrbit = True, freshOrbit = True}
overwriteOrbit _ AlterReset = startNewOrbit
overwriteOrbit _ AlterLeave = startNewOrbit
overwriteOrbit _ (AlterModify {newInOrbit = False}) = startNewOrbit
overwriteOrbit _ (AlterModify {newInOrbit = True}) =
err $ "enterOrbit: Cannot enterOrbit twice in a row: " ++ show tag
leaveOrbit :: Tag -> CompileInstructions ()
leaveOrbit tag = modifyOrbit escapeOrbit where
escapeOrbit = IMap.insertWith setInOrbitFalse tag AlterLeave where
setInOrbitFalse _ x@(AlterModify {}) = x {newInOrbit = False}
setInOrbitFalse _ x = x
modifyPos :: Action -> Tag -> CompileInstructions ()
modifyPos todo tag = do
(a,c) <- get
let a' = IMap.insert tag todo a
seq a' $ put (a',c)
modifyOrbit :: (IntMap AlterOrbit -> IntMap AlterOrbit) -> CompileInstructions ()
modifyOrbit f = do
(a,c) <- get
let c' = f c
seq c' $ put (a,c')
alterOrbits :: [(Tag,AlterOrbit)] -> (Position -> OrbitTransformer)
alterOrbits x = let items = map alterOrbit x
in (\ pos m -> foldl (flip ($)) m (map ($ pos) items))
alterOrbit :: (Tag,AlterOrbit) -> (Position -> OrbitTransformer)
alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = True}) =
(\ pos m -> IMap.insert tag (Orbits { inOrbit = inOrbit'
, basePos = pos
, ordinal = Nothing
, getOrbits = mempty}) m)
alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = False}) =
(\ pos m -> IMap.insertWithKey (updateOrbit pos) tag (newOrbit pos) m) where
newOrbit pos = Orbits { inOrbit = inOrbit'
, basePos = pos
, ordinal = Nothing
, getOrbits = mempty}
updateOrbit pos _tag new old | inOrbit old = old { inOrbit = inOrbit'
, getOrbits = getOrbits old |> pos }
| otherwise = new
alterOrbit (tag,AlterReset) = (\ _ m -> IMap.delete tag m)
alterOrbit (tag,AlterLeave) = (\ _ m -> case IMap.lookup tag m of
Nothing -> m
Just x -> IMap.insert tag (x {inOrbit=False}) m)