-- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String
module Text.Regex.TDFA.NewDFA.Engine_NC(execMatch) where

import Control.Monad(when,forM,forM_,liftM2,foldM,join,MonadPlus(..),filterM)
import Data.Array.Base(unsafeRead,unsafeWrite,STUArray(..))
-- #ifdef __GLASGOW_HASKELL__
import GHC.Arr(STArray(..))
import GHC.ST(ST(..))
import GHC.Prim(MutableByteArray#,RealWorld,Int#,sizeofMutableByteArray#,unsafeCoerce#)
{-
-- #else
import Control.Monad.ST(ST)
import Data.Array.ST(STArray)
-- #endif
-}
import Prelude hiding ((!!))

import Data.Array.MArray(MArray(..),unsafeFreeze,getAssocs)
import Data.Array.IArray(Array,bounds,assocs)
--import qualified Data.Foldable as F
import qualified Data.IntMap.CharMap2 as CMap(lookup,findWithDefault)
import Data.IntMap(IntMap)
import qualified Data.IntMap as IMap(null,toList,lookup,insert,keys,member)
import Data.Ix(Ix,rangeSize,range)
import Data.Maybe(catMaybes,listToMaybe)
import Data.Monoid(Monoid(..))
--import Data.IntSet(IntSet)
import qualified Data.IntSet as ISet(toAscList,null)
import qualified Data.Array.ST
import Data.Array.IArray((!))
import qualified Data.Array.MArray as MA
import Data.List(partition,sort,foldl',sortBy,groupBy)
import Data.STRef
import qualified Control.Monad.ST.Lazy as L
import qualified Control.Monad.ST.Strict as S
import Data.Sequence(Seq,ViewL(..),viewl)
import qualified Data.Sequence as Seq
import qualified Data.ByteString.Char8 as SBS
import qualified Data.ByteString.Lazy.Char8 as LBS

import Text.Regex.Base(MatchArray,MatchOffset,MatchLength)
import qualified Text.Regex.TDFA.IntArrTrieSet as Trie
import Text.Regex.TDFA.Common hiding (indent)
import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons))

-- import Debug.Trace

-- trace :: String -> a -> a
-- trace _ a = a

err :: String -> a
err s = common_error "Text.Regex.TDFA.NewDFA.Engine_NC"  s

{-# INLINE (!!) #-}
(!!) :: (MArray a e (S.ST s),Ix i) => a i e -> i -> S.ST s e
(!!) = MA.readArray -- unsafeRead
{-# INLINE set #-}
set :: (MArray a e (S.ST s),Ix i) => a i e -> i -> e -> S.ST s ()
set = MA.writeArray -- unsafeWrite

{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-}
{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-}
{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-}
{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-}
execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
execMatch (Regex { regex_dfa = (DFA {d_id=didIn,d_dt=dtIn})
                 , regex_init = startState
                 , regex_b_index = b_index
                 , regex_b_tags = b_tags_all
                 , regex_trie = trie
                 , regex_tags = aTags
                 , regex_groups = aGroups
                 , regex_compOptions = CompOption { multiline = newline }
                 , regex_execOptions = ExecOption { captureGroups = capture
                                                  , testMatch = _checkMatch }})
          offsetIn prevIn inputIn = L.runST runCaptureGroup where

  !test = mkTest newline         

  runCaptureGroup = {-# SCC "runCaptureGroup" #-} do
    obtainNext <- L.strictToLazyST constructNewEngine
    let loop = do vals <- L.strictToLazyST obtainNext
                  if null vals -- force vals before defining valsRest
                    then return []
                    else do valsRest <- loop
                            return (vals ++ valsRest)
    loop

  constructNewEngine :: S.ST s (S.ST s [MatchArray])
  constructNewEngine =  {-# SCC "constructNewEngine" #-} do
    storeNext <- newSTRef undefined
    writeSTRef storeNext (goNext storeNext)
    let obtainNext = join (readSTRef storeNext)
    return obtainNext

  goNext storeNext = {-# SCC "goNext" #-} do
    (SScratch s1In s2In winQ) <- newScratch b_index
    set s1In startState offsetIn
    writeSTRef storeNext (err "obtainNext called while goNext is running!")
    eliminatedStateFlag <- newSTRef False
    eliminatedRespawnFlag <- newSTRef False
    let next s1 s2 did dt offset prev input = {-# SCC "goNext.next" #-}
          case dt of
            Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
              if test wt offset prev input
                then next s1 s2 did a offset prev input
                else next s1 s2 did b offset prev input
            Simple' {dt_win=w,dt_trans=t, dt_other=o}
              | IMap.null w ->
                  case uncons input of
                    Nothing -> finalizeWinners
                    Just (c,input') -> do
                      case CMap.findWithDefault o c t of
                        Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} ->
                          findTrans s1 s2 did' dt' dtrans offset c input'
              | otherwise -> do
                  (did',dt') <- processWinner s1 did dt w offset
                  next' s1 s2 did' dt' offset prev input

        next' s1 s2 did dt offset prev input = {-# SCC "goNext'.next" #-}
          case dt of
            Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
              if test wt offset prev input
                then next' s1 s2 did a offset prev input
                else next' s1 s2 did b offset prev input
            Simple' {dt_win=w,dt_trans=t, dt_other=o} ->
              case uncons input of
                Nothing -> finalizeWinners
                Just (c,input') -> do
                  case CMap.findWithDefault o c t of
                    Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} ->
                      findTrans s1 s2 did' dt' dtrans offset c input'

        findTrans s1 s2 did' dt' dtrans offset prev' input' =  {-# SCC "goNext.findTrans" #-} do
          --
          let findTransTo (destIndex,sources) = do
                val <- if IMap.null sources then return (succ offset)
                         else return . minimum =<< mapM (s1 !!) (IMap.keys sources)
                set s2 destIndex val
                return val
          earlyStart <- fmap minimum $ mapM findTransTo (IMap.toList dtrans)
          --
          earlyWin <- readSTRef (mq_earliest winQ)
          if earlyWin < earlyStart
            then do
              winnersR <- getMQ earlyStart winQ
              writeSTRef storeNext (next s2 s1 did' dt' (succ offset) prev' input')
              mapM wsToGroup (reverse winnersR)
            else do
              let offset' = succ offset in seq offset' $ next s2 s1 did' dt' offset' prev' input'

        processWinner s1 did dt w offset = {-# SCC "goNext.newWinnerThenProceed" #-} do
          let getStart (sourceIndex,_) = s1 !! sourceIndex
          vals <- mapM getStart (IMap.toList w)
          let low = minimum vals   -- perhaps a non-empty winner
              high = maximum vals  -- perhaps an empty winner
          if low < offset
            then do
              putMQ (WScratch low offset) winQ
              when (high==offset || IMap.member startState w) $
                putMQ (WScratch offset offset) winQ
              let keepState i1 = do
                    startsAt <- s1 !! i1
                    let keep = (startsAt <= low) || (offset <= startsAt)
                    if keep
                      then return True
                      else if i1 == startState
                             then {- check for additional empty winner -}
                                  set s1 i1 (succ offset) >> return True
                             else writeSTRef eliminatedStateFlag True >> return False
              states' <- filterM keepState (ISet.toAscList did)
              flag <- readSTRef eliminatedStateFlag
              if flag
                then do
                  writeSTRef eliminatedStateFlag False
                  let DFA {d_id=did',d_dt=dt'} = Trie.lookupAsc trie states'
                  return (did',dt')
                else do
                  return (did,dt)
            else do
               -- offset == low == minimum vals == maximum vals == high; vals == [offset]
               putMQ (WScratch offset offset) winQ
               return (did,dt)

        finalizeWinners = do
          winnersR <- readSTRef (mq_list winQ)
          resetMQ winQ
          writeSTRef storeNext (return [])
          mapM wsToGroup (reverse winnersR)

    -- goNext then ends with the next statement
    next s1In s2In didIn dtIn offsetIn prevIn inputIn

----

{-# INLINE mkTest #-}
mkTest :: Uncons text => Bool -> WhichTest -> Index -> Char -> text -> Bool
mkTest isMultiline = if isMultiline then test_multiline else test_singleline
  where test_multiline Test_BOL _off prev _input = prev == '\n'
        test_multiline Test_EOL _off _prev input = case uncons input of
                                                     Nothing -> True
                                                     Just (next,_) -> next == '\n'
        test_singleline Test_BOL off _prev _input = off == 0
        test_singleline Test_EOL _off _prev input = case uncons input of
                                                      Nothing -> True
                                                      _ -> False

----

{- MUTABLE WINNER QUEUE -}

data MQ s = MQ { mq_earliest :: !(STRef s Position)
               , mq_list :: !(STRef s [WScratch])
               }

newMQ :: S.ST s (MQ s)
newMQ = do
  earliest <- newSTRef maxBound
  list <- newSTRef []
  return (MQ earliest list)

resetMQ :: MQ s -> S.ST s ()
resetMQ (MQ {mq_earliest=earliest,mq_list=list}) = do
  writeSTRef earliest maxBound
  writeSTRef list []

putMQ :: WScratch -> MQ s -> S.ST s ()
putMQ ws@(WScratch {ws_start=start,ws_stop=stop}) (MQ {mq_earliest=earliest,mq_list=list}) = do
  startE <- readSTRef earliest
  if start <= startE
    then writeSTRef earliest start >> writeSTRef list [ws]
    else do
      old <- readSTRef list
      let !rest = dropWhile (\ w -> start <= ws_start w) old 
          !new = ws : rest
      writeSTRef list new

getMQ :: Position -> MQ s -> ST s [WScratch]
getMQ pos (MQ {mq_earliest=earliest,mq_list=list}) = do
  old <- readSTRef list
  case span (\ w -> pos <= ws_start w) old of
    ([],ans) -> do
      writeSTRef earliest maxBound
      writeSTRef list []
      return ans
    (new,ans) -> do
      writeSTRef earliest (ws_start (last new))
      writeSTRef list new
      return ans

{- MUTABLE SCRATCH DATA STRUCTURES -}

data SScratch s = SScratch { _s_1 :: !(MScratch s)
                           , _s_2 :: !(MScratch s)
                           , _s_mq :: !(MQ s)
                           }
type MScratch s = STUArray s Index Position
data WScratch = WScratch {ws_start,ws_stop :: !Position}
  deriving Show

{- DEBUGGING HELPERS -}
{- CREATING INITIAL MUTABLE SCRATCH DATA STRUCTURES -}

{-# INLINE newA #-}
newA :: (MArray (STUArray s) e (ST s)) => (Tag,Tag) -> e -> S.ST s (STUArray s Tag e)
newA b_tags initial = newArray b_tags initial

{-# INLINE newA_ #-}
newA_ :: (MArray (STUArray s) e (ST s)) => (Tag,Tag) -> S.ST s (STUArray s Tag e)
newA_ b_tags = newArray_ b_tags

newScratch :: (Index,Index) -> S.ST s (SScratch s)
newScratch b_index = do
  s1 <- newMScratch b_index
  s2 <- newMScratch b_index
  winQ <- newMQ
  return (SScratch s1 s2 winQ)

newMScratch :: (Index,Index) -> S.ST s (MScratch s)
newMScratch b_index = newA b_index (-1)

{- CONVERT WINNERS TO MATCHARRAY -}

wsToGroup :: WScratch -> ST s MatchArray
wsToGroup (WScratch start stop) = do
  ma <- newArray (0,0) (start,stop-start)  :: ST s (STArray s Int (MatchOffset,MatchLength))
  unsafeFreeze ma