{-# LANGUAGE GADTs #-}
{-# OPTIONS_GHC -fno-do-lambda-eta-expansion #-}
module Text.Regex.Applicative.Compile (compile) where

import Control.Monad.Trans.State
import Text.Regex.Applicative.Types
import Control.Applicative
import Data.Maybe
import qualified Data.IntMap as IntMap

compile :: RE s a -> (a -> [Thread s r]) -> [Thread s r]
compile e k = compile2 e (SingleCont k)

data Cont a = SingleCont !a | EmptyNonEmpty !a !a

instance Functor Cont where
    fmap f k =
        case k of
            SingleCont a -> SingleCont (f a)
            EmptyNonEmpty a b -> EmptyNonEmpty (f a) (f b)

emptyCont :: Cont a -> a
emptyCont k =
    case k of
        SingleCont a -> a
        EmptyNonEmpty a _ -> a
nonEmptyCont :: Cont a -> a
nonEmptyCont k =
    case k of
        SingleCont a -> a
        EmptyNonEmpty _ a -> a

-- The whole point of this module is this function, compile2, which needs to be
-- compiled with -fno-do-lambda-eta-expansion for efficiency.
--
-- Since this option would make other code perform worse, we place this
-- function in a separate module and make sure it's not inlined.
--
-- The point of "-fno-do-lambda-eta-expansion" is to make sure the tree is
-- "compiled" only once.
--
-- compile2 function takes two continuations: one when the match is empty and
-- one when the match is non-empty. See the "Rep" case for the reason.
compile2 :: RE s a -> Cont (a -> [Thread s r]) -> [Thread s r]
compile2 e =
    case e of
        Eps -> \k -> emptyCont k ()
        Symbol i p -> \k -> [t $ nonEmptyCont k] where
          -- t :: (a -> [Thread s r]) -> Thread s r
          t k = Thread i $ \s ->
            if p s then k s else []
        App n1 n2 ->
            let a1 = compile2 n1
                a2 = compile2 n2
            in \k -> case k of
                SingleCont k -> a1 $ SingleCont $ \a1_value -> a2 $ SingleCont $ k . a1_value
                EmptyNonEmpty ke kn ->
                    a1 $ EmptyNonEmpty
                        -- empty
                        (\a1_value -> a2 $ EmptyNonEmpty (ke . a1_value) (kn . a1_value))
                        -- non-empty
                        (\a1_value -> a2 $ EmptyNonEmpty (kn . a1_value) (kn . a1_value))
        Alt n1 n2 ->
            let a1 = compile2 n1
                a2 = compile2 n2
            in \k -> a1 k ++ a2 k
        Fail -> const []
        Fmap f n -> let a = compile2 n in \k -> a $ fmap (. f) k
        -- This is actually the point where we use the difference between
        -- continuations. For the inner RE the empty continuation is a
        -- "failing" one in order to avoid non-termination.
        Rep g f b n ->
            let a = compile2 n
                threads b k =
                    combine g
                        (a $ EmptyNonEmpty (\_ -> []) (\v -> let b' = f b v in threads b' (SingleCont $ nonEmptyCont k)))
                        (emptyCont k b)
            in threads b
        Void n -> let a = compile2_ n in \k -> a $ fmap ($ ()) k

data FSMState
    = SAccept
    | STransition ThreadId

type FSMMap s = IntMap.IntMap (s -> Bool, [FSMState])

mkNFA :: RE s a -> ([FSMState], (FSMMap s))
mkNFA e =
    flip runState IntMap.empty $
        go e [SAccept]
  where
  go :: RE s a -> [FSMState] -> State (FSMMap s) [FSMState]
  go e k =
    case e of
        Eps -> return k
        Symbol i@(ThreadId n) p -> do
            modify $ IntMap.insert n $
                (p, k)
            return [STransition i]
        App n1 n2 -> go n1 =<< go n2 k
        Alt n1 n2 -> (++) <$> go n1 k <*> go n2 k
        Fail -> return []
        Fmap _ n -> go n k
        Rep g _ _ n ->
            let entries = findEntries n
                cont = combine g entries k
            in
            -- return value of 'go' is ignored -- it should be a subset of
            -- 'cont'
            go n cont >> return cont
        Void n -> go n k

  findEntries :: RE s a -> [FSMState]
  findEntries e =
    -- A simple (although a bit inefficient) way to find all entry points is
    -- just to use 'go'
    evalState (go e []) IntMap.empty

compile2_ :: RE s a -> Cont [Thread s r] -> [Thread s r]
compile2_ e =
    let (entries, fsmap) = mkNFA e
        mkThread _ k1 (STransition i@(ThreadId n)) =
            let (p, cont) = fromMaybe (error "Unknown id") $ IntMap.lookup n fsmap
            in [Thread i $ \s ->
                if p s
                    then concatMap (mkThread k1 k1) cont
                    else []]
        mkThread k0 _ SAccept = k0

    in \k -> concatMap (mkThread (emptyCont k) (nonEmptyCont k)) entries

combine g continue stop =
    case g of
        Greedy -> continue ++ stop
        NonGreedy -> stop ++ continue