module Text.Regex.Deriv.ByteString.BitCode
( Regex
, CompOption(..)
, ExecOption(..)
, defaultCompOpt
, defaultExecOpt
, compile
, execute
, regexec
) where
import Prelude hiding (Word)
import System.IO.Unsafe
import Data.IORef
import qualified Data.HashTable.IO as H
import qualified Data.Hashable as Ha
import qualified Data.Dequeue as DQ
import Data.List
import Data.Maybe
import Data.Char (ord)
import GHC.Int
import GHC.Arr
import qualified Data.IntMap as IM
import qualified Data.ByteString.Char8 as S
import qualified Data.Map as M
import Control.Monad
import Text.Regex.Base(RegexOptions(..),RegexLike(..),MatchArray)
import Text.Regex.Deriv.RE
import Text.Regex.Deriv.Common (IsPhi(..), IsEpsilon(..))
import Text.Regex.Deriv.Pretty (Pretty(..))
import Text.Regex.Deriv.Common (Range(..), Letter, PosEpsilon(..), my_hash, my_lookup, GFlag(..), IsGreedy(..), preBinder, subBinder, mainBinder)
import Text.Regex.Deriv.IntPattern (Pat(..), toBinder, Binder(..), strip, listifyBinder)
import Text.Regex.Deriv.Parse
import qualified Text.Regex.Deriv.Dictionary as D (Dictionary(..), Key(..), insertNotOverwrite, lookupAll, empty, isIn, nub, member, lookup, insert)
logger io = unsafePerformIO io
type Path = DQ.BankersDequeue Int
rightApp :: Path -> Path -> Path
rightApp p1 p2 = case DQ.popBack p1 of
{ (Just x, p1') -> rightApp p1' (DQ.pushFront p2 x)
; (Nothing, _ ) -> p2
}
leftApp :: Path -> Path -> Path
leftApp p1 p2 = case DQ.popFront p2 of
{ (Just x, p2') -> leftApp (DQ.pushBack p1 x) p2'
; (Nothing, _ ) -> p1
}
smartApp :: Path -> Path -> Path
smartApp p1 p2 | DQ.length p1 > DQ.length p2 = leftApp p1 p2
| otherwise = rightApp p1 p2
singleton :: Int -> Path
singleton i = DQ.pushFront DQ.empty i
emptyP = DQ.empty
appP p1 p2 = smartApp p1 p2
zero = 0
one = 1
zeroP = singleton zero
oneP = singleton one
zeroZeroP = DQ.fromList [zero,zero]
zeroOneP = DQ.fromList [zero,one]
consP = flip DQ.pushFront
headP p = case DQ.first p of { Just x -> x; _ -> error "headP is applied to an empty path." }
unconsP p = case DQ.popFront p of
{ (Nothing, _ ) -> Nothing
; (Just x , p') -> Just (x, p')
}
nullP = DQ.null
data U where
Nil :: U
EmptyU :: U
Letter :: Char -> U
LeftU :: U -> U
RightU :: U -> U
Pair :: (U,U) -> U
List :: [U] -> U
deriving Show
data SPath = SChoice Path [SPath]
| SPair Path SPath SPath
| SStar Path SPath
| SL Path
| SEps Path
| SPhi
deriving Show
mkSPath :: RE -> SPath
mkSPath Empty = SEps emptyP
mkSPath (L c) = SL emptyP
mkSPath Any = SL emptyP
mkSPath (Choice [r1,r2] _) = SChoice emptyP [mkSPath r1, mkSPath r2]
mkSPath (Choice [r] _) = SChoice emptyP [mkSPath r]
mkSPath (Seq r1 r2) = SPair emptyP (mkSPath r1) (mkSPath r2)
mkSPath (Star r _) = SStar emptyP (mkSPath r)
mkSPath Phi = SPhi
mkSPath r = error ("mkSPath fail" ++ show r)
mkEmpty :: RE -> SPath -> Path
mkEmpty Empty = (\x -> case x of { (SEps p) -> p })
mkEmpty (Choice [r1,r2] _ )
| nullable r1 = let f = mkEmpty r1
in (\(SChoice p [sp1,sp2]) -> p `appP` (consP zero (f sp1)))
| nullable r2 = let f = mkEmpty r2
in (\(SChoice p [sp1,sp2]) -> p `appP` (consP one (f sp2)))
mkEmpty (Choice [r] _ )
| nullable r = let f = mkEmpty r
in (\(SChoice p [sp]) -> p `appP` (f sp))
mkEmpty (ChoiceInt [r1,r2])
| nullable r1 = let f = mkEmpty r1
in (\(SChoice p [sp1,sp2]) -> p `appP` (f sp1))
| nullable r2 = let f = mkEmpty r2
in (\(SChoice p [sp1,sp2]) -> p `appP` (f sp2))
mkEmpty (Seq r1 r2) =
let f1 = mkEmpty r1
f2 = mkEmpty r2
in (\ (SPair p sp1 sp2) -> p `appP` (f1 sp1) `appP` (f2 sp2))
mkEmpty (Star r _) = (\x -> case x of (SStar p _) -> p `appP` oneP
_ -> error ("mkEmptyStar is applied to " ++ show x)
)
prefix :: Path -> SPath -> SPath
prefix b (SEps p) = SEps $! (b `appP` p)
prefix b (SL p) = SL $! (b `appP` p)
prefix b (SChoice p [sp1,sp2]) =
let !bp = b `appP` p
in SChoice bp $! [sp1, sp2]
prefix b (SChoice p [sp]) = let !bp = b `appP` p in SChoice bp $! [sp]
prefix b (SPair p sp1 sp2) = let !bp = b `appP` p in SPair bp sp1 sp2
prefix b (SStar p sp) = let !bp = b `appP` p in SStar bp sp
prefix b SPhi = SPhi
prefix b p = error $ "prefix error: b=" ++ (show b) ++ " p=" ++ show p
nullable = posEpsilon
deriv :: RE -> Char -> (RE, SPath -> SPath)
deriv Phi l = (Phi, \_ -> SPhi)
deriv Empty l = (Phi, \_ -> SPhi)
deriv (L l') l
| l == l' = (Empty, \(SL p) -> (SEps p))
| otherwise = (Phi, \_ -> SPhi)
deriv (Choice [r1,r2] gf) l = let (r1',!f1) = deriv r1 l
(r2',!f2) = deriv r2 l
in r1' `seq` r2' `seq` (Choice [r1',r2'] gf, \(SChoice !p [!sp1,!sp2]) ->
let !sp1' = f1 sp1
!sp2' = f2 sp2
in SChoice p $! [sp1',sp2'])
deriv (Choice [r] gf) l = let (r',!f) = deriv r l
in r' `seq` (Choice [r'] gf, \(SChoice !p [!sp]) ->
let !sp' = f sp
in SChoice p [sp'])
deriv (ChoiceInt [r1,r2]) l = let (r1',!f1) = deriv r1 l
(r2',!f2) = deriv r2 l
in r1' `seq` r2' `seq` (ChoiceInt [r1', r2'], \(SChoice !p [!sp1,!sp2]) ->
let !sp1' = f1 sp1
!sp2' = f2 sp2
in SChoice p $! [sp1', sp2'])
deriv (Seq r1 r2) l = let (r1', !f1) = deriv r1 l
(r2', !f2) = deriv r2 l
f3 = mkEmpty r1
in r1' `seq` r2' `seq` if nullable r1
then (ChoiceInt [(Seq r1' r2), r2'], (\(SPair !p !sp1 !sp2) ->
let !sp1' = f1 sp1
!sp2' = f2 sp2
!p1'' = f3 sp1
!p' = (p `appP` p1'')
!sp' = prefix p' sp2'
!spp12 = (SPair p sp1' sp2)
in SChoice emptyP $! [spp12, sp' ]))
else (Seq r1' r2, (\(SPair !p !sp1 !sp2) ->
let !sp1' = f1 sp1
in SPair p sp1' sp2))
deriv (Star r gf) l = let (r', f) = deriv r l
in r' `seq` (Seq r' (Star r gf), \(SStar !p !sp) ->
let !sp' = f sp
!p' = (p `appP` zeroP)
in SPair p' sp' $! (SStar emptyP sp))
deriv Any _ = (Empty, \(SL p) -> (SEps p))
deriv r l = error ("deriv failed: " ++ (show r) ++ "/" ++ (show l))
simp :: RE -> (RE, SPath -> SPath)
simp (Seq Empty r)
| isPhi r = (Phi, \sp ->SPhi)
| otherwise = (r, \(SPair !p1 (SEps !p2) !sp2) ->
let !p12 = p1 `appP` p2
in prefix p12 sp2)
simp (Seq r1 r2)
| isPhi r1 || isPhi r2 = (Phi, \sp -> SPhi)
| otherwise = let (r1', !f1) = simp r1
(r2', !f2) = simp r2
in r1' `seq` r2' `seq` (Seq r1' r2', \(SPair !p !sp1 !sp2) ->
let !sp1' = f1 sp1
!sp2' = f2 sp2
in SPair p sp1' sp2')
simp (Choice [(Choice [r1,r2] gf2), r3] gf1) =
(ChoiceInt [r1, ChoiceInt [r2,r3]], \(SChoice !p1 [SChoice !p2 [!sp1, !sp2], !sp3]) ->
let !p' = p1 `appP` p2 `appP` zeroZeroP
!sp1' = prefix p' sp1
!p'' = p1 `appP` p2 `appP` zeroOneP
!sp2' = prefix p'' sp2
!p''' = p1 `appP` oneP
!sp3' = prefix p''' sp3
in SChoice emptyP [ sp1', SChoice emptyP [ sp2', sp3']])
simp (Choice [r1,r2] gf)
| r1 == r2 = (r1, \(SChoice !p [!sp1,!sp2]) -> let !p' = (p `appP` zeroP) in prefix p' sp1)
| isPhi r1 = (r2, \(SChoice !p [!sp1,!sp2]) -> let !p' = (p `appP` oneP) in prefix p' sp2)
| isPhi r2 = (r1, \(SChoice !p [!sp1,!sp2]) -> let !p' = (p `appP` zeroP) in prefix p' sp1)
| isJust (simpChoice r1 [] r2) = fromJust (simpChoice r1 [] r2)
| otherwise = let (r1',!f1) = simp r1
(r2',!f2) = simp r2
in r1' `seq` r2' `seq` (Choice [r1',r2'] gf, \(SChoice !p [!sp1,!sp2]) ->
let !sp1' = f1 sp1
!sp2' = f2 sp2
in SChoice p [sp1', sp2'])
simp (ChoiceInt [ChoiceInt [r1,r2], r3]) =
(ChoiceInt [r1, ChoiceInt [r2,r3]], \(SChoice !p1 [SChoice !p2 [!sp1, !sp2], !sp3]) ->
let
!p12 = (p1 `appP` p2)
!sp1' = prefix p12 sp1
!sp2' = prefix p12 sp2
!sp3' = prefix p1 sp3
in SChoice emptyP [sp1', SChoice emptyP [sp2', sp3']])
simp (ChoiceInt [r1,r2])
| r1 == r2 = (r1, \(SChoice !p [!sp1,!sp2]) -> prefix p sp1)
| isPhi r1 = (r2, \(SChoice !p [!sp1,!sp2]) -> prefix p sp2)
| isPhi r2 = (r1, \(SChoice !p [!sp1,!sp2]) -> prefix p sp1)
| isJust (simpChoice r1 [] r2) = fromJust (simpChoice r1 [] r2)
| otherwise = let (r1',!f1) = simp r1
(r2',!f2) = simp r2
in r1' `seq` r2' `seq` (ChoiceInt [r1',r2'], \(SChoice !p [!sp1,!sp2]) ->
let !sp1' = f1 sp1
!sp2' = f2 sp2
in SChoice p [sp1', sp2'])
simp (Star Empty gf) = (Empty, \(SStar !p1 (SEps !p2)) ->
let !p = (p1 `appP` p2 `appP` oneP)
in SEps p)
simp (Star r gf) | isPhi r = (Empty, \(SStar !p !sp) -> let !p' = (p `appP` oneP) in SEps p' )
| otherwise = let (r', !f) = simp r
in r' `seq` (Star r' gf, \(SStar !p !sp) -> let !sp' = f sp in SStar p sp')
simp (Choice [r] gf) = let (r',!f) = simp r
in r' `seq` (Choice [r'] gf, \(SChoice !p [!sp]) -> let !sp' = f sp in SChoice p [sp'])
simp r = (r, \sp -> sp)
simpChoice :: RE -> [RE] -> RE -> Maybe (RE, SPath -> SPath)
simpChoice r1 alts rn
| r1 == rn = Just (mkChoice (r1:alts) Greedy,
\v -> prefixNthChoiceLeft (length alts) v)
| otherwise = case rn of
{ Choice [rna,rnb] gf
| r1 == rna ->
Just (mkChoice ((r1:alts) ++ [rnb]) gf,
\v -> prefixNthChoiceRight (length alts + 1) v)
; ChoiceInt [rna,rnb]
| r1 == rna ->
Just (mkChoiceInt ((r1:alts) ++ [rnb]) ,
\v -> prefixNthChoiceRight (length alts + 1) v)
| otherwise -> simpChoice r1 (alts ++ [rna]) rnb
; _ -> Nothing
}
where mkChoice [r1,r2] gf = Choice [r1,r2] gf
mkChoice (r:rs) gf = Choice [r, mkChoice rs gf] gf
mkChoiceInt [r1,r2] = ChoiceInt [r1,r2]
mkChoiceInt (r:rs) = ChoiceInt [r, mkChoiceInt rs]
prefixNthChoiceLeft :: Int -> SPath -> SPath
prefixNthChoiceLeft 0 (SChoice p [sp1,sp2]) = (p `appP` zeroP) `prefix` sp1
prefixNthChoiceLeft n (SChoice p [sp1,sp2]) = SChoice p [sp1, prefixNthChoiceLeft (n1) sp2]
prefixNthChoiceRight :: Int -> SPath -> SPath
prefixNthChoiceRight 0 (SChoice p [sp1,sp2]) = (p `appP` oneP) `prefix` sp2
prefixNthChoiceRight n (SChoice p [sp1,sp2]) = SChoice p [sp1, prefixNthChoiceRight (n1) sp2]
simpFix :: RE -> (RE, SPath -> SPath)
simpFix r = let (r', !f) = simp r
io = logger (print r >> print "======")
in r' `seq` if r == r'
then (r, \sp -> sp)
else let (r'', f') = simpFix r'
in (r'', f' . f)
builder :: [Char]
-> [ (Int,Char,Int,SPath -> SPath) ]
-> M.Map RE Int
-> Int
-> [RE]
-> ([ (Int,Char,Int, SPath -> SPath) ], M.Map RE Int)
builder sig acc_delta dict max_id curr_res
| null curr_res = (acc_delta, dict)
| otherwise =
let new_delta = sig `seq` curr_res `seq` [ r'' `seq` r `seq` g `seq` l `seq` (r,l,r'', g) | r <- curr_res, l <- sig, let (r',f) = deriv r l, let (r'',f') = r' `seq` simpFix r', let g = f' . f]
new_res = new_delta `seq` dict `seq` nub [ r' | (r,l,r',f) <- new_delta, not (r' `M.member` dict) ]
(dict', max_id') = new_delta `seq` dict `seq`
foldl' (\(d,id) r ->
let io = logger (putStrLn $show (r,id+1))
in (M.insert r (id+1) d, id+1)) (dict, max_id) new_res
acc_delta_next = new_delta `seq` acc_delta ++ (map (\(r,l,r',f) -> (getId dict' r, l, getId dict' r', f)) new_delta)
in dict' `seq` max_id' `seq` new_res `seq` builder sig acc_delta_next dict' max_id' new_res
where getId :: M.Map RE Int -> RE -> Int
getId m r = case M.lookup r m of
{ Just i -> i
; Nothing -> error "getId failed: this should not happen" }
instance Ha.Hashable GFlag where
hashWithSalt salt Greedy = Ha.hashWithSalt salt (19::Int)
hashWithSalt salt NotGreedy = Ha.hashWithSalt salt (23::Int)
instance Ha.Hashable RE where
hashWithSalt salt Empty = Ha.hashWithSalt salt (29::Int)
hashWithSalt salt (L x) = Ha.hashWithSalt salt (Ha.hashWithSalt 31 (Ha.hash x))
hashWithSalt salt (Choice rs g) = Ha.hashWithSalt salt (Ha.hashWithSalt 37 (Ha.hashWithSalt (Ha.hash g) rs))
hashWithSalt salt (Seq r1 r2) = Ha.hashWithSalt salt (Ha.hashWithSalt 41 (Ha.hashWithSalt (Ha.hash r1) r2))
hashWithSalt salt (Star r g) = Ha.hashWithSalt salt (Ha.hashWithSalt 43 (Ha.hashWithSalt (Ha.hash g) r))
hashWithSalt salt Any = Ha.hashWithSalt salt (47 ::Int)
hashWithSalt salt (Not cs) = Ha.hashWithSalt salt (Ha.hashWithSalt 53 cs)
hashWithSalt salt (ChoiceInt rs) = Ha.hashWithSalt salt (Ha.hashWithSalt 59 rs)
type DfaTable = IM.IntMap (Int, SPath -> SPath)
buildDfaTable :: RE -> (DfaTable, IM.IntMap RE)
buildDfaTable r =
let sig = sigmaRE r
init_dict = M.insert r 0 M.empty
(delta, mapping) = sig `seq` init_dict `seq` builder sig [] init_dict 0 [r]
table = delta`seq` IM.fromList (map (\(s,c,d,f) -> (my_hash s c, (d,f))) delta)
r_mapping = mapping `seq` IM.fromList (map (\(x,y) -> (y,x)) (M.toList mapping))
in table`seq` r_mapping `seq` (table,r_mapping)
type Word = S.ByteString
execDfa :: DfaTable -> Word -> [(Int, SPath)] -> [(Int, SPath)]
execDfa dfaTable w' [] = []
execDfa dfaTable w' currStateSPaths =
case S.uncons w' of
Nothing -> currStateSPaths
Just (l,w) ->
let ((i,sp):_) = currStateSPaths
k = my_hash i l
in case IM.lookup k dfaTable of
{ Nothing -> []
; Just (j, f) ->
let sp' = sp `seq` f sp
nextStateSPaths = j `seq` sp' `seq` [(j,sp')]
in nextStateSPaths `seq` w `seq`
execDfa dfaTable w nextStateSPaths
}
execDfa2 :: IM.IntMap RE -> DfaTable -> Word -> [(Int, SPath)] -> [(Int, SPath)]
execDfa2 im dfaTable w' [] = []
execDfa2 im dfaTable w' currStateSPaths =
case S.uncons w' of
Nothing -> currStateSPaths
Just (l,w) ->
let ((i,sp):_) = currStateSPaths
k = my_hash i l
in case IM.lookup k dfaTable of
{ Nothing -> []
; Just (j, f) ->
let sp' = sp `seq` f sp
nextStateSPaths = j `seq` sp' `seq` [(j,sp')]
in nextStateSPaths `seq` w `seq`
execDfa2 im dfaTable w nextStateSPaths
}
match :: [(RE,SPath)] -> String -> [(RE,SPath)]
match [(r,sp)] (c:cs) = case deriv r c of
{ (r',f) -> let (r'',f') = simp r'
in
match [(r', f sp)] cs
}
match [(r,sp)] [] = [(r,sp)]
match2 :: [(RE,SPath)] -> String -> [(RE,SPath)]
match2 [(r,sp)] (c:cs) = case deriv r c of
{ (r',f) -> let (r'',f') = simp r'
in match2 [(r'', (f'. f) sp)] cs
}
match2 [(r,sp)] [] = [(r,sp)]
retrieveEmpty :: RE -> SPath -> Path
retrieveEmpty Empty (SEps p) = p
retrieveEmpty (Choice [r1,r2] gf) (SChoice p [sp1, sp2])
| nullable r1 = p `appP` (consP zero (retrieveEmpty r1 sp1))
| nullable r2 = p `appP` (consP one (retrieveEmpty r2 sp2))
retrieveEmpty (Choice [r] gf) (SChoice p [sp])
| nullable r = p `appP` (retrieveEmpty r sp)
retrieveEmpty (ChoiceInt [r1,r2]) (SChoice p [sp1, sp2])
| nullable r1 = p `appP` (retrieveEmpty r1 sp1)
| nullable r2 = p `appP` (retrieveEmpty r2 sp2)
retrieveEmpty (Seq r1 r2) (SPair p sp1 sp2) = p `appP` (retrieveEmpty r1 sp1) `appP` (retrieveEmpty r2 sp2)
retrieveEmpty (Star r gf) (SStar p sp) = p `appP` oneP
retrieveEmpty r sr = error ("retrieveEmpty failed:" ++ show (posEpsilon r) ++ (show r) ++ (show sr))
decode2 :: RE -> Path -> (U, Path)
decode2 Phi bs = (Nil,bs)
decode2 Empty bs = (EmptyU,bs)
decode2 (L l) bs = (Letter l, bs)
decode2 sr@(Choice [r1,r2] gf) bs' =
case unconsP bs' of
Just (b, bs) | b == zero -> let (u,p) = decode2 r1 bs
in (LeftU u, p)
| b == one -> let (u,p) = decode2 r2 bs
in (RightU u, p)
Nothing -> error ("decode2 failed:" ++ show sr ++ show bs')
decode2 (Choice [r] gf) bs = decode2 r bs
decode2 (Seq r1 r2) bs = let (u1,p1) = decode2 r1 bs
(u2,p2) = decode2 r2 p1
in (Pair (u1,u2), p2)
decode2 (sr@(Star r gf)) bs' =
case unconsP bs' of
Just (b, bs) | b == zero -> let (u,p1) = decode2 r bs
(List us,p2) = decode2 sr p1
in (List (u:us), p2)
| b == one -> (List [],bs)
Nothing -> error ("decode2 failed:" ++ show sr ++ show bs')
decode2 sr bs = error ("decode2 failed:" ++ show sr ++ show bs)
decode :: RE -> Path -> U
decode r bs = let (u,p) = decode2 r bs
in if nullP p
then u
else error $ "invalid bit coding u=" ++ show u ++ " p=" ++ show p
extract :: Pat -> RE -> U -> [(Int,Word)]
extract (PVar i _ p) r u
| strip p == r = [(i, flatten u)]
| otherwise = error ("the structures of the pattern and regex are not in sync" ++ show p ++ " vs " ++ show r)
extract (PE rs) (Choice rs' _) u = []
extract (PStar p _) (Star r _) (List []) = []
extract (PStar p _) (Star r _) (List [u]) = extract p r u
extract p'@(PStar p _) r'@(Star r _) (List (u:us)) = extract p' r' (List us)
extract (PPair p1 p2) (Seq r1 r2) (Pair (u1,u2)) = extract p1 r1 u1 ++ extract p2 r2 u2
extract (PChoice [p1,p2] _) (Choice [r1,r2] _) (LeftU u) = extract p1 r1 u
extract (PChoice [p1,p2] _) (Choice [r1,r2] _) (RightU u) = extract p2 r2 u
extract (PEmpty p) Empty _ = []
extractSR :: Pat -> RE -> U -> Int -> ([(Int, Range)], Int)
extractSR (PVar i _ p) r u start_index
| strip p == r = let l = S.length $ flatten u
(e,_) = extractSR p r u start_index
in ([(i, Range start_index l)]++e , start_index + l)
| otherwise = error ("the structures of the pattern and regex are not in sync" ++ show p ++ " vs " ++ show r)
extractSR (PE rs) (Choice rs' _) u start_index = ([],start_index)
extractSR (PStar p _) (Star r _) (List []) start_index = ([], start_index)
extractSR (PStar p _) (Star r _) (List [u]) start_index = extractSR p r u start_index
extractSR p'@(PStar p _) r'@(Star r _) (List (u:us)) start_index = extractSR p' r' (List us) start_index
extractSR (PPair p1 p2) (Seq r1 r2) (Pair (u1,u2)) start_index =
let (l1, i1) = extractSR p1 r1 u1 start_index
(l2, i2) = extractSR p2 r2 u2 i1
in (l1 ++ l2, i2)
extractSR (PChoice [p1,p2] _) (Choice [r1,r2] _) (LeftU u) start_index = extractSR p1 r1 u start_index
extractSR (PChoice [p1,p2] _) (Choice [r1,r2] _) (RightU u) start_index = extractSR p2 r2 u start_index
extractSR (PEmpty p) Empty _ start_index = ([],start_index)
extractSR (PChoice [p] _) (Choice [r] _) u start_index = extractSR p r u start_index
extractSR p r u _ = error ("etractSR failed:" ++ (show p) ++ show r ++ show u)
flatten :: U -> Word
flatten u = S.pack (flatten' u)
flatten' :: U -> [Char]
flatten' Nil = []
flatten' EmptyU = []
flatten' (Letter c) = [c]
flatten' (LeftU u) = flatten' u
flatten' (RightU u) = flatten' u
flatten' (Pair (u1,u2)) = flatten' u1 ++ flatten' u2
flatten' (List us) = concatMap flatten' us
compilePat :: Pat -> (DfaTable, Pat, IM.IntMap RE)
compilePat p =
let p' = normChoice p
r = strip p'
(dfa,im) = buildDfaTable r
in (dfa, p', im)
class NormChoice a where
normChoice :: a -> a
instance NormChoice RE where
normChoice Empty = Empty
normChoice (L c) = L c
normChoice (Seq r1 r2) = Seq (normChoice r1) (normChoice r2)
normChoice (Star r gf) = Star (normChoice r) gf
normChoice Phi = Phi
normChoice (Choice [r] gf) = Choice [normChoice r] gf
normChoice (Choice [r1,r2] gf) = Choice [normChoice r1,normChoice r2] gf
normChoice (Choice (r:rs) gf) = Choice [normChoice r, normChoice (Choice rs gf)] gf
normChoice Any = Any
instance NormChoice Pat where
normChoice (PVar i rng p) = PVar i rng (normChoice p)
normChoice (PE rs) = PE (map normChoice rs)
normChoice (PPair p1 p2) = PPair (normChoice p1) (normChoice p2)
normChoice (PChoice [p] gf) = PChoice [normChoice p] gf
normChoice (PChoice [p1,p2] gf) = PChoice [normChoice p1, normChoice p2] gf
normChoice (PChoice (p:ps) gf) = PChoice [normChoice p, normChoice (PChoice ps gf)] gf
normChoice (PPlus p1 p2) = PPlus (normChoice p1) (normChoice p2)
normChoice (PStar p gf) = PStar (normChoice p) gf
normChoice (PEmpty p) = PEmpty (normChoice p)
type Env = [(Int,Range)]
execPatMatch :: (DfaTable, Pat, IM.IntMap RE) -> Word -> Maybe Env
execPatMatch (dfa, p, im) w =
let res = dfa `seq` p `seq` im `seq` execDfa dfa w [(0, mkSPath (strip p))]
in case res of
{ [] -> Nothing
; [ (i, sp) ] ->
let r' = im IM.! i
path = r' `seq` sp `seq` retrieveEmpty r' sp
r = p `seq` strip p
parseTree = path `seq` decode r path
(env, _) = parseTree `seq` extractSR p r parseTree 0
in Just env
}
p4 = PVar 0 [] (PPair (PVar 1 [] ((PPair p_x p_y))) p_z)
where p_x = PVar 2 [] (PE [(Choice [(L 'A'),(Seq (L 'A') (L 'B'))] Greedy)])
p_y = PVar 3 [] (PE [(Choice [(Seq (L 'B') (Seq (L 'A') (L 'A'))), (L 'A')] Greedy)])
p_z = PVar 4 [] (PE [(Choice [(Seq (L 'A') (L 'C')), (L 'C')] Greedy)])
p3 = PVar 0 [] (PStar ( PVar 1 [] ( PChoice [(PVar 2 [] (PChoice [p3,p4] Greedy)), p5] Greedy)) Greedy)
where p3 = PVar 3 [] (PE [(L 'A')])
p4 = PVar 4 [] (PE [(Seq (L 'A') (L 'B'))])
p5 = PVar 5 [] (PE [(L 'B')])
p0 = PVar 0 [] (PChoice [PPair (PE [Empty]) (PPair (PStar (PVar 1 [] (PChoice [PPair (PVar 2 [] (PChoice [PPair (PVar 3 [] (PE [Choice [L 'A',Seq (L 'A') (L 'B')] Greedy ])) (PVar 4 [] (PE [Choice [Seq (Seq (L 'B') (L 'A')) (L 'A'),L 'A'] Greedy ]))] Greedy)) (PVar 5 [] (PE [Choice [Seq (L 'A') (L 'C'),L 'C'] Greedy]))] Greedy)) Greedy ) (PE [Empty]))] Greedy)
p1 = PVar 0 [] (PChoice [PPair (PE [Empty]) (PPair
(PVar 1 [] (PChoice [PPair (PVar 2 [] (PChoice [PPair (PVar 3 [] (PE [Choice [L 'A',Seq (L 'A') (L 'B')] Greedy ])) (PVar 4 [] (PE [Choice [Seq (Seq (L 'B') (L 'A')) (L 'A'),L 'A'] Greedy ]))] Greedy)) (PVar 5 [] (PE [Choice [Seq (L 'A') (L 'C'),L 'C'] Greedy]))] Greedy))
(PE [Empty]))] Greedy)
p2 = PVar 0 [] (PPair (PE [Empty]) (PPair
(PVar 1 [] (PChoice [PPair (PVar 2 [] (PChoice [PPair (PVar 3 [] (PE [Choice [L 'A',Seq (L 'A') (L 'B')] Greedy ])) (PVar 4 [] (PE [Choice [Seq (Seq (L 'B') (L 'A')) (L 'A'),L 'A'] Greedy ]))] Greedy)) (PVar 5 [] (PE [Choice [Seq (L 'A') (L 'C'),L 'C'] Greedy]))] Greedy))
(PE [Empty])) )
q2 = PVar 0 []
(PVar 1 [] (PChoice [PPair (PVar 2 [] (PChoice [PPair (PVar 3 [] (PE [Choice [L 'A',Seq (L 'A') (L 'B')] Greedy ])) (PVar 4 [] (PE [Choice [Seq (Seq (L 'B') (L 'A')) (L 'A'),L 'A'] Greedy ]))] Greedy)) (PVar 5 [] (PE [Choice [Seq (L 'A') (L 'C'),L 'C'] Greedy]))] Greedy))
type Regex = (DfaTable, Pat, IM.IntMap RE)
compile :: CompOption
-> ExecOption
-> S.ByteString
-> Either String Regex
compile compOpt execOpt bs =
case parsePatPosix (S.unpack bs) of
Left err -> Left ("parseRegex for Text.Regex.Deriv.ByteString failed:"++show err)
Right (pat,posixBnd) ->
Right (compilePat pat)
execute :: Regex
-> S.ByteString
-> Either String (Maybe Env)
execute r bs = Right (execPatMatch r bs)
regexec :: Regex
-> S.ByteString
-> Either String (Maybe (S.ByteString, S.ByteString, S.ByteString, [S.ByteString]))
regexec r bs =
case execPatMatch r bs of
Nothing -> Right Nothing
Just env ->
let pre = case lookup preBinder env of { Just e -> rg_collect bs e ; Nothing -> S.empty }
post = case lookup subBinder env of { Just e -> rg_collect bs e ; Nothing -> S.empty }
full_len = S.length bs
pre_len = S.length pre
post_len = S.length post
main_len = full_len pre_len post_len
main_and_post = S.drop pre_len bs
main = main_and_post `seq` main_len `seq` S.take main_len main_and_post
matched = map ((rg_collect bs) . snd) (filter (\(v,w) -> v > mainBinder && v < subBinder ) env)
in
Right (Just (pre,main,post,matched))
rg_collect :: S.ByteString -> Range -> S.ByteString
rg_collect w (Range i j) = S.take (j' i' + 1) (S.drop i' w)
where i' = fromIntegral i
j' = fromIntegral j
rg_collect_many w rs = foldl' S.append S.empty $ map (rg_collect w) rs
data CompOption = CompOption {
caseSensitive :: Bool
, multiline :: Bool
, rightAssoc :: Bool
, newSyntax :: Bool
, lastStarGreedy :: Bool
} deriving (Read,Show)
data ExecOption = ExecOption {
captureGroups :: Bool
} deriving (Read,Show)
instance RegexOptions Regex CompOption ExecOption where
blankCompOpt = CompOption { caseSensitive = True
, multiline = False
, rightAssoc = True
, newSyntax = False
, lastStarGreedy = False
}
blankExecOpt = ExecOption { captureGroups = True }
defaultCompOpt = CompOption { caseSensitive = True
, multiline = True
, rightAssoc = True
, newSyntax = True
, lastStarGreedy = False
}
defaultExecOpt = ExecOption { captureGroups = True }
setExecOpts e r = undefined
getExecOpts r = undefined
choice r = Choice r Greedy