module Text.Regex.TDFA.MutRunBS (findMatch,findMatchAll,countMatchAll) where
import Control.Monad(MonadPlus(..))
import Control.Monad.ST(ST)
import qualified Control.Monad.ST.Lazy as Lazy(ST,runST,strictToLazyST)
import Data.Array.IArray((!),array,bounds)
import Data.Array.MArray(rangeSize)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Unsafe as B(unsafeIndex)
import Data.IntMap.CharMap(CharMap(..))
import qualified Data.IntMap as IMap(null,lookup)
import Text.Regex.Base(MatchArray,RegexOptions(..))
import Text.Regex.TDFA.Common
import Text.Regex.TDFA.TDFA(isDFAFrontAnchored)
import Text.Regex.TDFA.RunMutState(TagEngine(..),newTagEngine2,tagsToGroupsST,newScratch,resetScratch,SScratch(..))
import Text.Regex.TDFA.Wrap()
lazy :: ST s a -> Lazy.ST s a
lazy = Lazy.strictToLazyST
index :: B.ByteString -> Int -> Int
index input off = fromEnum (B.unsafeIndex input off)
findMatch :: Regex -> B.ByteString -> Maybe MatchArray
findMatch regexIn inputIn = case matchHere regexIn 0 inputIn of
[] -> Nothing
(ma:_) -> Just ma
findMatchAll :: Regex -> B.ByteString -> [MatchArray]
findMatchAll regexIn inputIn = matchHere regexIn 0 inputIn
countMatchAll :: Regex -> B.ByteString -> Int
countMatchAll regexIn inputIn = length (matchHere regex 0 inputIn) where
regex = setExecOpts (ExecOption {captureGroups = False,testMatch = False}) regexIn
matchHere :: Regex -> Position -> B.ByteString -> [MatchArray]
matchHere regexIn offsetIn inputIn = ans where
ans = if subCapture then runHerePure else noCap
where subCapture = captureGroups (regex_execOptions regexIn)
&& (1<=rangeSize (bounds (regex_groups regexIn)))
frontAnchored = (not (multiline (regex_compOptions regexIn)))
&& isDFAFrontAnchored (regex_dfa regexIn)
final = B.length inputIn
test | multiline (regex_compOptions regexIn) = test_multiline
| otherwise = test_singleline
where test_multiline Test_BOL off = off == 0 || newline == index inputIn (pred off)
test_multiline Test_EOL off = off == final || newline == index inputIn off
test_singleline Test_BOL off = off == 0
test_singleline Test_EOL off = off == final
newline = fromEnum '\n'
runHerePure :: [MatchArray]
runHerePure = Lazy.runST (do
TagEngine findTrans updateWinner performTrans <- lazy (newTagEngine2 regexIn)
let
runHere winning dt s1 s2 off =
s1 `seq` s2 `seq` off `seq`
case dt of
Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
if test wt off
then runHere winning a s1 s2 off
else runHere winning b s1 s2 off
Simple' {dt_win=w, dt_trans=(CharMap t), dt_other=o} -> do
if off==final then updateWinner s1 off winning w else do
case IMap.lookup (index inputIn off) t `mplus` o of
Nothing -> updateWinner s1 off winning w
Just (dfa,trans) -> do
findTrans s1 off trans
winning' <- updateWinner s1 off winning w
performTrans s1 s2 off trans
runHere winning' (d_dt dfa) s2 s1 (succ off)
(SScratch s1 s2 w0) <- lazy (newScratch regexIn offsetIn)
let go off = off `seq` do
answer <- lazy (runHere Nothing (d_dt (regex_dfa regexIn)) s1 s2 off)
case answer of
Nothing -> if off==final
then return []
else do let off' = succ off
() <- lazy (resetScratch regexIn off' s1 w0)
go off'
Just (w,off') -> do
ma <- lazy (tagsToGroupsST (regex_groups regexIn) w)
let len = snd (ma!0)
rest <- if len==0
then if off'==final then return []
else do let off'' = succ off'
() <- lazy (resetScratch regexIn off'' s1 w0)
go off''
else do () <- lazy (resetScratch regexIn off' s1 w0)
go off'
return (ma:rest)
if frontAnchored
then if offsetIn/=0 then return []
else do
answer <- lazy (runHere Nothing (d_dt (regex_dfa regexIn)) s1 s2 offsetIn)
case answer of
Nothing -> return []
Just (w,_) -> do
ma <- lazy (tagsToGroupsST (regex_groups regexIn) w)
return (ma:[])
else go offsetIn )
noCap =
let dtIn = (d_dt (regex_dfa regexIn))
go off =
case runHereNoCap Nothing dtIn off of
Nothing -> if off==final then [] else go (succ off)
Just off' ->
let len = off'off
ma = array (0,0) [(0,(off,len))]
rest = if len==0
then if off'==final then []
else go (succ off')
else go off'
in (ma:rest)
in if frontAnchored
then if offsetIn /= 0 then []
else case runHereNoCap Nothing dtIn offsetIn of
Nothing -> []
Just off' ->
let len = off'offsetIn
ma = array (0,0) [(0,(offsetIn,len))]
in (ma:[])
else go offsetIn
runHereNoCap winning dt off =
off `seq`
case dt of
Simple' {dt_win=w, dt_trans=(CharMap t), dt_other=o} ->
let winning' = if IMap.null w then winning else Just off
in seq winning' $
if off==final then winning'
else case IMap.lookup (index inputIn off) t `mplus` o of
Nothing -> winning'
Just (DFA {d_dt=dt'},_) ->
runHereNoCap winning' dt' (succ off)
Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
if test wt off
then runHereNoCap winning a off
else runHereNoCap winning b off