{-| This module is a modification of "Text.Regex.Lazy.DFAEngine" to search ByteStrings ( see ). This uses 'index' to access the 'Word8' as a 'Char8'. -} module Text.Regex.DFA.ByteString.EngineFPS(findRegex,countRegex,matchesHere,matchesRegex,accept) where import Text.Regex.DFA.Engine(peek, accept, lexAccept, lexFailure, Lexer(..), Cont(..),Boundary(..)) import Data.ByteString.Lazy.Char8(ByteString) import qualified Data.ByteString.Lazy.Char8 as B(null,tail,length,index) -- This scans through the input to find the first match findRegex :: Lexer -- ^ The regular expression to match -> ByteString -- ^ The input string to scan along, looking for a match -> (Int, Maybe (Int,Int)) -- ^ The length of the string before the match, Nothing if there was no match or Just length of the match, index of the input past the match findRegex lexer input = let len = fromEnum $ B.length input loop s i | i == len = (len,Nothing) | otherwise = let n=applyHere lexer s len (-1) i in if n==(-1) then loop s (succ i) else (i,Just (n-i,n)) in loop input 0 -- internal. Passes in the length of the input even though it is O(1) to compute applyHere :: Lexer -- ^ The Lexer from (accept Regexp) -> ByteString -- ^ The input (Data.FastPackedString.FastString) -> Int -- ^ A index 'end' such that (here<=end<=B.length input) -> Int -- ^ A value 'value' to return if there is no match, usually (-1) -> Int -- ^ A index 'here' (0<=here Int -- ^ Will be the index past the match or 'value' {-# INLINE applyHere #-} applyHere lexerIn input end valueIn hereIn = let final = fromEnum (B.length input) loop (Lexer _ action cont) value here | here `seq` value `seq` False = undefined | otherwise = let value' = if action == lexAccept then here else value in case cont of Done -> value' _ -> if here == end then value' else case peek cont (B.index input (toEnum here)) of (Lexer _ action' _ ) | action' == lexFailure -> value' lexer' -> loop lexer' value' (succ here) loop (Predicate _ p yes no) value here | here `seq` value `seq` False = undefined | otherwise = let t = case p of BeginLine -> (here==0) || ('\n' == B.index input (toEnum $ pred here)) EndLine -> (here==final) || ('\n' == B.index input (toEnum here)) BeginInput -> here==0 EndInput -> here==final lexer = if t then yes else no in loop lexer value here in loop lexerIn valueIn hereIn -- | This counts the number of matches to regex in the string, (it -- checks each possible starting position). This should be the same -- as ((length (splitRegex re input))-1) but more efficient countRegex :: Lexer -> ByteString -> Int countRegex lexer input = let len = fromEnum $ B.length input loop i n | n `seq` (i == len) = n | otherwise = if (-1) == applyHere lexer input len (-1) i then loop (succ i) n else loop (succ i) (succ n) in loop 0 0 -- | This checks the regex anchored at the start of the ByteString and return -- Nothing if there is no match or (Just n) for a match of length n matchesHere :: Lexer -> ByteString -> Maybe Int matchesHere lexer input = let n=applyHere lexer input (fromEnum $ B.length input) (-1) 0 in if n == (-1) then Nothing else Just n matchesRegex :: Lexer -> ByteString -> Bool matchesRegex lexer input | B.null input = False | otherwise = case applyHere lexer input (fromEnum $ B.length input) (-1) 0 of (-1) -> matchesRegex lexer (B.tail input) _ -> True