{-|
This module is a modification of "Text.Regex.Lazy.DFAEngine" to search ByteStrings (
see ). This uses 'index' to
access the 'Word8' as a 'Char8'.
-}
module Text.Regex.DFA.EngineFPS(findRegex,countRegex,matchesHere,matchesRegex,accept) where
import Text.Regex.DFA.Engine(peek, accept, lexAccept, lexFailure,
Lexer(..), Cont(..),Boundary(..))
import Data.ByteString.Char8(ByteString)
import qualified Data.ByteString.Char8 as B(null,tail,length,index)
-- This scans through the input to find the first match
findRegex :: Lexer -- ^ The regular expression to match
-> ByteString -- ^ The input string to scan along, looking for a match
-> (Int, Maybe (Int,Int)) -- ^ The length of the string before the match, Nothing if there was no match or Just length of the match, index of the input past the match
findRegex lexer input =
let len = B.length input
loop s i | i == len = (len,Nothing)
| otherwise =
let n=applyHere lexer s len (-1) i
in if n==(-1) then loop s (succ i)
else (i,Just (n-i,n))
in loop input 0
-- internal. Passes in the length of the input even though it is O(1) to compute
applyHere :: Lexer -- ^ The Lexer from (accept Regexp)
-> ByteString -- ^ The input (Data.FastPackedString.FastString)
-> Int -- ^ A index 'end' such that (here<=end<=B.length input)
-> Int -- ^ A value 'value' to return if there is no match, usually (-1)
-> Int -- ^ A index 'here' (0<=here Int -- ^ Will be the index past the match or 'value'
{-# INLINE applyHere #-}
applyHere lexerIn input end valueIn hereIn =
let final = (B.length input)
loop (Lexer _ action cont) value here | here `seq` value `seq` False = undefined
| otherwise =
let value' = if action == lexAccept then here else value
in case cont of
Done -> value'
_ -> if here == end
then value'
else case peek cont (B.index input here) of
(Lexer _ action' _ ) | action' == lexFailure -> value'
lexer' -> loop lexer' value' (succ here)
loop (Predicate _ p yes no) value here | here `seq` value `seq` False = undefined
| otherwise =
let t = case p of
BeginLine -> (here==0) || ('\n' == B.index input (pred here))
EndLine -> (here==final) || ('\n' == B.index input here)
BeginInput -> here==0
EndInput -> here==final
lexer = if t then yes else no
in loop lexer value here
in loop lexerIn valueIn hereIn
-- | This counts the number of matches to regex in the string, (it
-- checks each possible starting position). This should be the same
-- as ((length (splitRegex re input))-1) but more efficient
countRegex :: Lexer -> ByteString -> Int
countRegex lexer input =
let len = B.length input
loop i n | n `seq` (i == len) = n
| otherwise =
if (-1) == applyHere lexer input len (-1) i
then loop (succ i) n
else loop (succ i) (succ n)
in loop 0 0
-- | This checks the regex anchored at the start of the ByteString and return
-- Nothing if there is no match or (Just n) for a match of length n
matchesHere :: Lexer -> ByteString -> Maybe Int
matchesHere lexer input =
let n=applyHere lexer input (B.length input) (-1) 0
in if n == (-1) then Nothing else Just n
matchesRegex :: Lexer -> ByteString -> Bool
matchesRegex lexer input | B.null input = False
| otherwise =
case applyHere lexer input (B.length input) (-1) 0 of
(-1) -> matchesRegex lexer (B.tail input)
_ -> True