Haskell Code by HsColour

{-|

This module is a modification of "Text.Regex.Lazy.DFAEngine" to search ByteStrings (
see <http://www.cse.unsw.edu.au/~dons/fps/>).  This uses 'index' to
access the 'Word8' as a 'Char8'.

-}
module Text.Regex.DFA.ByteString.EngineFPS(findRegex,countRegex,matchesHere,matchesRegex,accept) where

import Text.Regex.DFA.Engine(peek, accept, lexAccept, lexFailure,
  Lexer(..), Cont(..),Boundary(..))
import Data.ByteString.Lazy.Char8(ByteString)
import qualified Data.ByteString.Lazy.Char8 as B(null,tail,length,index)

-- This scans through the input to find the first match
findRegex :: Lexer                  -- ^ The regular expression to match
          -> ByteString             -- ^ The input string to scan along, looking for a match
          -> (Int, Maybe (Int,Int)) -- ^ The length of the string before the match, Nothing if there was no match or Just length of the match, index of the input past the match
findRegex lexer input = 
  let len = fromEnum $ B.length input
      loop s i | i == len = (len,Nothing)
               | otherwise =
        let n=applyHere lexer s len (-1) i
        in if n==(-1) then loop s (succ i)
           else (i,Just (n-i,n))
  in loop input 0

-- internal.  Passes in the length of the input even though it is O(1) to compute
applyHere :: Lexer -- ^ The Lexer from (accept Regexp)
          -> ByteString   -- ^ The input (Data.FastPackedString.FastString)
          -> Int   -- ^ A index 'end' such that (here<=end<=B.length input)
          -> Int   -- ^ A value 'value' to return if there is no match, usually (-1)
          -> Int   -- ^ A index 'here' (0<=here<B.length input) where to anchor the start of the match
          -> Int   -- ^ Will be the index past the match or 'value'
{-# INLINE applyHere #-}
applyHere lexerIn input end valueIn hereIn =
  let final = fromEnum (B.length input)
      loop (Lexer _ action cont) value here | here `seq` value `seq` False = undefined
                                          | otherwise =
        let value' = if action == lexAccept then here else value
        in case cont of
             Done -> value'
             _ -> if here == end
                    then value'
                    else case peek cont (B.index input (toEnum here)) of
                           (Lexer _ action' _ ) | action' == lexFailure -> value'
                           lexer' -> loop lexer' value' (succ here)
      loop (Predicate _ p yes no) value here | here `seq` value `seq` False = undefined
                                           | otherwise =
        let t = case p of
                  BeginLine -> (here==0) || ('\n' == B.index input (toEnum $ pred here))
                  EndLine -> (here==final) || ('\n' == B.index input (toEnum here))
                  BeginInput -> here==0
                  EndInput -> here==final
            lexer = if t then yes else no
        in loop lexer value here
  in loop lexerIn valueIn hereIn

-- | This counts the number of matches to regex in the string, (it
-- checks each possible starting position).  This should be the same
-- as ((length (splitRegex re input))-1) but more efficient
countRegex :: Lexer -> ByteString -> Int
countRegex lexer input =
  let len = fromEnum $ B.length input
      loop i n | n `seq` (i == len) = n
               | otherwise =
        if (-1) == applyHere lexer input len (-1) i
          then loop (succ i) n
          else loop (succ i) (succ n)
  in loop 0 0

-- | This checks the regex anchored at the start of the ByteString and return
-- Nothing if there is no match or (Just n) for a match of length n
matchesHere :: Lexer -> ByteString -> Maybe Int
matchesHere lexer input =
  let n=applyHere lexer input (fromEnum $ B.length input) (-1) 0
  in if n == (-1) then Nothing else Just n

matchesRegex :: Lexer -> ByteString -> Bool
matchesRegex lexer input | B.null input = False
                         | otherwise =
  case applyHere lexer input (fromEnum $ B.length input) (-1) 0 of
    (-1) -> matchesRegex lexer (B.tail input)
    _ -> True