-- | "Text.Regex.TDFA.Run" is the main module for matching a DFA
-- against a String.  Many of the associated functions are exported to
-- other modules to help match against other types.
--
-- 2009-January: logic changes to capturing in matchHere (need to change noCap XXX TODO):
-- The logic below has been changed to recognize an empty match at the end of the string.
-- The logic below has been changed to proceed after the first empty match.
module Text.Regex.TDFA.MutRun (findMatch,findMatchAll,countMatchAll) where

import Control.Monad(MonadPlus(..))
import Control.Monad.ST(ST)
import qualified Control.Monad.ST.Lazy as Lazy(ST,runST,strictToLazyST)
import Data.Array.IArray((!),array,bounds)
import Data.Array.MArray(rangeSize)
import qualified Data.IntMap.CharMap as Map(lookup,null)
import qualified Data.IntMap as IMap(null)
import Data.Maybe(isNothing)

import Text.Regex.Base(MatchArray,RegexOptions(..))
import Text.Regex.TDFA.Common
import Text.Regex.TDFA.TDFA(isDFAFrontAnchored)
import Text.Regex.TDFA.RunMutState(TagEngine(..),newTagEngine,tagsToGroupsST,newScratch,resetScratch,SScratch(..))
import Text.Regex.TDFA.Wrap()
-- import Debug.Trace

{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}

{-# INLINE lazy #-}
lazy :: ST s a -> Lazy.ST s a
lazy = Lazy.strictToLazyST

-- err :: String -> a
-- err = common_error "Text.Regex.TDFA.MutRun"

{-# INLINE findMatch #-}
findMatch :: Regex -> String -> Maybe MatchArray
findMatch regexIn stringIn = case matchHere regexIn 0 '\n' stringIn of
                               [] -> Nothing
                               (ma:_) -> Just ma

{-# INLINE findMatchAll #-}
findMatchAll :: Regex -> String -> [MatchArray]
findMatchAll regexIn stringIn = matchHere regexIn 0 '\n' stringIn

{-# INLINE countMatchAll #-}
countMatchAll :: Regex -> String -> Int
countMatchAll regexIn stringIn = length (matchHere regex 0 '\n' stringIn) where
  regex = setExecOpts (ExecOption {captureGroups = False,testMatch = False}) regexIn

{-
There are four possible routines use by matchHere, depending on
whether it needs to collect submatch data and whether the pattern is
only permitted to start matching at offsetIn==0.
-}
matchHere :: Regex -> Position -> Char -> String -> [MatchArray]
matchHere regexIn offsetIn prevIn inputIn = ans where
  ans = if subCapture then runHerePure else noCap
    where subCapture = captureGroups (regex_execOptions regexIn)
                    && (1<=rangeSize (bounds (regex_groups regexIn)))

  frontAnchored = (not (multiline (regex_compOptions regexIn)))
               && isDFAFrontAnchored (regex_dfa regexIn)

  -- Select which style of ^ $ tests are performed.
  test | multiline (regex_compOptions regexIn) = test_multiline
       | otherwise = test_singleline
    where test_multiline Test_BOL _off prev _input = prev == '\n'
          test_multiline Test_EOL _off _prev input = case input of
                                                       [] -> True
                                                       (next:_) -> next == '\n'
          test_singleline Test_BOL off _prev _input = off == 0
          test_singleline Test_EOL _off _prev input = null input
  
  runHerePure :: [MatchArray]
  runHerePure = Lazy.runST (do
    TagEngine findTrans updateWinner performTrans <- lazy (newTagEngine regexIn)
    let -- runHere :: Maybe (WScratch s,(Position,Char,String)) -> DT
        --         -> MScratch s -> MScratch s
        --         -> Position -> Char -> String
        --         -> ST s (Maybe (WScratch s,(Position,Char,String)))
        runHere winning dt s1 s2 off prev input = {-# SCC "runHere" #-}
          s1 `seq` s2 `seq` off `seq` prev `seq` input `seq`
          case dt of
            Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
              if test wt off prev input
                then runHere winning a s1 s2 off prev input
                else runHere winning b s1 s2 off prev input
            Simple' {dt_win=w, dt_trans=t, dt_other=o} -> do
              case input of
                [] -> updateWinner s1 (off,prev,input) winning w
                (c:input') ->
                  case Map.lookup c t `mplus` o of
                    Nothing -> updateWinner s1 (off,prev,input) winning w
                    Just (dfa,trans) -> do
                      findTrans s1 off trans
                      winning' <- updateWinner s1 (off,prev,input) winning w
                      performTrans s1 s2 off trans
                      runHere winning' (d_dt dfa) s2 s1 (succ off) c input'
        -- end of runHere
    -- body of runHerePure continues
    (SScratch s1 s2 w0) <- lazy (newScratch regexIn offsetIn)
    let go off prev input = {-# SCC "runHerePure.go" #-}
         off `seq` prev `seq` input `seq` do
          answer <- lazy (runHere Nothing (d_dt (regex_dfa regexIn)) s1 s2 off prev input)
          case answer of
            Nothing -> case input of
                         [] -> return []
                         (prev':input') -> let off' = succ off
                                           in do () <- lazy (resetScratch regexIn off' s1 w0)
                                                 go off' prev' input'
            Just (w,(off',prev',input')) -> do
              ma <- lazy (tagsToGroupsST (regex_groups regexIn) w)
              let len = snd (ma!0)
              rest <- if len==0
                        then case input' of
                               [] -> return []
                               (prev'':input'') -> do
                                 let off'' = succ off'
                                 () <- lazy (resetScratch regexIn off'' s1 w0)
                                 go off'' prev'' input''
                        else do
                          () <- lazy (resetScratch regexIn off' s1 w0)
                          go off' prev' input'
              return (ma:rest)
    if frontAnchored
      then if offsetIn/=0 then return [] 
             else do
               answer <- lazy (runHere Nothing (d_dt (regex_dfa regexIn)) s1 s2 offsetIn prevIn inputIn)
               case answer of
                 Nothing -> return []
                 Just (w,_) -> do
                   ma <- lazy (tagsToGroupsST (regex_groups regexIn) w)
                   return (ma:[])
      else go offsetIn prevIn inputIn ) -- end Lazy.runST
  -- end of runHerePure

  noCap = {-# SCC "noCap" #-}
    let dtIn = (d_dt (regex_dfa regexIn))
        go off prev input = off `seq` prev `seq` input `seq`
          case runHereNoCap Nothing dtIn off prev input of
            Nothing -> case input of
                         [] -> []
                         (prev':input') -> let off' = succ off
                                           in go off' prev' input'
            Just (off',prev',input') ->
              let len = off'-off
                  ma = array (0,0) [(0,(off,len))]
                  rest = if len == 0
                           then case input' of
                                  [] -> []
                                  (prev'':input'') ->
                                    let off'' = succ off'
                                    in go off'' prev'' input''
                           else go off' prev' input'
{-
                  rest = if len == 0 || null input then []
                           else go off' prev' input'
-}
              in (ma:rest)
    in if frontAnchored
         then if offsetIn /= 0 then []
                else case runHereNoCap Nothing dtIn offsetIn prevIn inputIn of
                       Nothing -> []
                       Just (off',_prev',_input') ->
                         let len = off'-offsetIn
                             ma = array (0,0) [(0,(offsetIn,len))]
                         in (ma:[])
         else go offsetIn prevIn inputIn

  runHereNoCap winning dt off prev input =  {-# SCC "runHereNoCap" #-}
    off `seq` prev `seq` input `seq`
    case dt of
      Simple' {dt_win=w, dt_trans=t, dt_other=o} ->
        let winning' = if IMap.null w then winning else Just (off,prev,input)
        in seq winning' $
           if Map.null t && isNothing o then winning' else
             case input of
               [] -> winning'
               (c:input') ->
                 case Map.lookup c t `mplus` o of
                   Nothing -> winning'
                   Just (dfa,_) -> let dt' = d_dt dfa
                                       off' = succ off
                                       prev' = c
                                   in seq off' $
                                      runHereNoCap winning' dt' off' prev' input'
      Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
        if test wt off prev input
          then runHereNoCap winning a off prev input
          else runHereNoCap winning b off prev input