{-# LANGUAGE RecordWildCards      #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE CPP #-}


-- | Earley-style TAG parsing based on automata, with a distinction
-- between active and passive items.


module NLP.Partage.Earley.AutoAP
(
-- * Earley-style parsing
  recognize
, recognizeFrom
, parse
, earley
-- ** With automata precompiled
, recognizeAuto
, recognizeFromAuto
, parseAuto
, earleyAuto

-- * Parsing trace (hypergraph)
, Hype
-- ** Extracting parsed trees
, parsedTrees
-- ** Stats
, hyperNodesNum
, hyperEdgesNum
-- ** Printing
, printHype

-- * Sentence position
, Pos
) where


import           Prelude hiding             (span, (.))
import           Control.Applicative        ((<$>))
import           Control.Monad      (guard, void, (>=>), when, forM_)
import           Control.Monad.Trans.Class  (lift)
-- import           Control.Monad.Trans.Maybe  (MaybeT (..))
import qualified Control.Monad.RWS.Strict   as RWS
import           Control.Category ((>>>), (.))

import           Data.Function              (on)
import           Data.Maybe     ( isJust, isNothing, mapMaybe
                                , maybeToList )
import qualified Data.Map.Strict            as M
import           Data.Ord       ( comparing )
import           Data.List      ( sortBy )
import qualified Data.Set                   as S
import qualified Data.PSQueue               as Q
import           Data.PSQueue (Binding(..))
import           Data.Lens.Light
import qualified Data.Vector                as V

import qualified Pipes                      as P
-- import qualified Pipes.Prelude              as P

import           Data.DAWG.Ord (ID)
-- import qualified Data.DAWG.Ord.Dynamic      as D

import           NLP.Partage.SOrd
import           NLP.Partage.FactGram (FactGram)
import           NLP.Partage.FactGram.Internal
                                ( Lab(..), Rule(..), viewLab )
import qualified NLP.Partage.Auto as A
import qualified NLP.Partage.Auto.DAWG  as D
import qualified NLP.Partage.Tree       as T


--------------------------------------------------
-- BASE TYPES
--------------------------------------------------


-- | A position in the input sentence.
type Pos = Int


data Span = Span {
    -- | The starting position.
      _beg   :: Pos
    -- | The ending position (or rather the position of the dot).
    , _end   :: Pos
    -- | Coordinates of the gap (if applies)
    , _gap   :: Maybe (Pos, Pos)
    } deriving (Show, Eq, Ord)
$( makeLenses [''Span] )


-- | Active chart item : state reference + span.
data Active = Active {
      _state :: ID
    , _spanA :: Span
    } deriving (Show, Eq, Ord)
$( makeLenses [''Active] )


-- | Passive chart item : label + span.
data Passive n t = Passive {
      _label :: Lab n t
    , _spanP :: Span
    } deriving (Show, Eq, Ord)
$( makeLenses [''Passive] )


-- | Does it represent regular rules?
regular :: Span -> Bool
regular = isNothing . getL gap


-- | Does it represent auxiliary rules?
auxiliary :: Span -> Bool
auxiliary = isJust . getL gap


-- | Print an active item.
printSpan :: Span -> IO ()
printSpan span = do
    putStr . show $ getL beg span
    putStr ", "
    case getL gap span of
        Nothing -> return ()
        Just (p, q) -> do
            putStr $ show p
            putStr ", "
            putStr $ show q
            putStr ", "
    putStr . show $ getL end span


-- | Print an active item.
printActive :: Active -> IO ()
printActive p = do
    putStr "("
    putStr . show $ getL state p
    putStr ", "
    printSpan $ getL spanA p
    putStrLn ")"


-- | Print a passive item.
printPassive :: (Show n, Show t) => Passive n t -> IO ()
printPassive p = do
    putStr "("
    putStr . viewLab $ getL label p
    putStr ", "
    printSpan $ getL spanP p
    putStrLn ")"


--------------------------------------------------
-- Traversal
--------------------------------------------------


-- | Traversal represents an action of inducing a new item on the
-- basis of one or two other chart items.  It can be seen as an
-- application of one of the inference rules specifying the parsing
-- algorithm.
--
-- TODO: Sometimes there is no need to store all the arguments of the
-- inference rules, it seems.  From one of the arguments the other
-- one could be derived.
data Trav n t
    = Scan
        { _scanFrom :: Active
        -- ^ The input active state
        , _scanTerm :: t
        -- ^ The scanned terminal
        }
    | Subst
        { _passArg  :: Passive n t
        -- ^ The passive argument of the action
        , _actArg   :: Active
        -- ^ The active argument of the action
        }
    -- ^ Pseudo substitution
    | Foot
        { _actArg   :: Active
        -- ^ The passive argument of the action
        -- , theFoot  :: n
        , _theFoot  :: Passive n t
        -- ^ The foot non-terminal
        }
    -- ^ Foot adjoin
    | Adjoin
        { _passAdj  :: Passive n t
        -- ^ The adjoined item
        , _passMod  :: Passive n t
        -- ^ The modified item
        }
    -- ^ Adjoin terminate with two passive arguments
    deriving (Show, Eq, Ord)


-- | Print a traversal.
printTrav :: (Show n, Show t) => Item n t -> Trav n t -> IO ()
printTrav q' (Scan p x) = do
    putStr "# " >> printActive p
    putStr "+ " >> print x
    putStr "= " >> printItem q'
printTrav q' (Subst p q) = do
    putStr "# " >> printActive q
    putStr "+ " >> printPassive p
    putStr "= " >> printItem q'
printTrav q' (Foot q p) = do
    putStr "# " >> printActive q
    putStr "+ " >> printPassive p
    putStr "= " >> printItem q'
printTrav q' (Adjoin p s) = do
    putStr "# " >> printPassive p
    putStr "+ " >> printPassive s
    putStr "= " >> printItem q'


--------------------------------------------------
-- Priority
--------------------------------------------------


-- | Priority type.
--
-- NOTE: Priority has to be composed from two elements because
-- otherwise `tryAdjoinTerm` could work incorrectly.  That is,
-- the modified item could be popped from the queue after the
-- modifier (auxiliary) item and, as a result, adjunction would
-- not be considered.
type Prio = (Int, Int)


-- | Priority of an active item.  Crucial for the algorithm --
-- states have to be removed from the queue in a specific order.
prioA :: Active -> Prio
prioA p =
    let i = getL (beg . spanA) p
        j = getL (end . spanA) p
    in  (j, j - i)


-- | Priority of a passive item.  Crucial for the algorithm --
-- states have to be removed from the queue in a specific order.
prioP :: Passive n t -> Prio
prioP p =
    let i = getL (beg . spanP) p
        j = getL (end . spanP) p
    in  (j, j - i)


-- | Extended priority which preservs information about the traversal
-- leading to the underlying chart item.
data ExtPrio n t = ExtPrio
    { prioVal   :: Prio
    -- ^ The actual priority
    , prioTrav  :: S.Set (Trav n t)
    -- ^ Traversal leading to the underlying chart item
    } deriving (Show)

instance (Eq n, Eq t) => Eq (ExtPrio n t) where
    (==) = (==) `on` prioVal
instance (Ord n, Ord t) => Ord (ExtPrio n t) where
    compare = compare `on` prioVal


-- | Construct a new `ExtPrio`.
extPrio :: Prio -> ExtPrio n t
extPrio p = ExtPrio p S.empty


-- | Join two priorities:
-- * The actual priority preserved is the lower of the two,
-- * The traversals are unioned.
--
-- NOTE: at the moment, priority is strictly specified by the
-- underlying chart item itself so we know that both priorities must
-- be equal.  Later when we start using probabilities this statement
-- will no longer hold.
joinPrio :: (Ord n, Ord t) => ExtPrio n t -> ExtPrio n t -> ExtPrio n t
joinPrio x y = ExtPrio
    (min (prioVal x) (prioVal y))
    (S.union (prioTrav x) (prioTrav y))


--------------------------------------------------
-- Item Type
--------------------------------------------------


-- | Passive or active item.
data Item n t
    = ItemP (Passive n t)
    | ItemA Active
    deriving (Show, Eq, Ord)


-- | Print an active item.
printItem :: (Show n, Show t) => Item n t -> IO ()
printItem (ItemP p) = printPassive p
printItem (ItemA p) = printActive p


-- | Priority of an active item.  Crucial for the algorithm --
-- states have to be removed from the queue in a specific order.
prio :: Item n t -> Prio
prio (ItemP p) = prioP p
prio (ItemA p) = prioA p


--------------------------------------------------
-- Earley monad
--------------------------------------------------


-- | The reader of the earley monad: vector of sets of terminals.
type EarRd t = V.Vector (S.Set t)


-- | A hypergraph dynamically constructed during parsing.
data Hype n t = Hype
    { automat :: A.GramAuto n t
    -- ^ The underlying automaton (abstract implementation)

    , withBody :: M.Map (Lab n t) (S.Set ID)
    -- ^ A data structure which, for each label, determines the
    -- set of automaton states from which this label goes out
    -- as a body transition.

    -- , doneActive  :: M.Map (ID, Pos) (S.Set (Active n t))
    , doneActive  :: M.Map Pos (M.Map ID
        (M.Map Active (S.Set (Trav n t))))
    -- ^ Processed active items partitioned w.r.t ending
    -- positions and state IDs.

    -- , donePassive :: S.Set (Passive n t)
    , donePassive :: M.Map (Pos, n, Pos)
        (M.Map (Passive n t) (S.Set (Trav n t)))
    -- ^ Processed passive items.

    , waiting     :: Q.PSQ (Item n t) (ExtPrio n t)
    -- ^ The set of states waiting on the queue to be processed.
    -- Invariant: the intersection of `done' and `waiting' states
    -- is empty.
    --
    -- NOTE2: Don't understand the note below...
    -- NOTE: The only operation which requires active states to
    -- be put to the queue in the current algorithm is the scan
    -- operation.  So perhaps we could somehow bypass this
    -- problem and perform scan elsewhere.  Nevertheless, it is
    -- not certain that the same will apply to the probabilistic
    -- parser.
    }


-- | Make an initial `Hype` from a set of states.
mkHype
    :: (Ord n, Ord t)
    => A.GramAuto n t
    -> S.Set Active
    -> Hype n t
mkHype dag s = Hype
    { automat = dag
    , withBody = mkWithBody dag
    , doneActive  = M.empty
    , donePassive = M.empty
    , waiting = Q.fromList
        [ ItemA p :-> extPrio (prioA p)
        | p <- S.toList s ] }


-- | Create the `withBody` component based on the automaton.
mkWithBody
    :: (Ord n, Ord t)
    => A.GramAuto n t
    -> M.Map (Lab n t) (S.Set ID)
mkWithBody dag = M.fromListWith S.union
    [ (x, S.singleton i)
    | (i, A.Body x, _j) <- A.allEdges dag ]


-- | Earley parser monad.  Contains the input sentence (reader)
-- and the state of the computation `Hype'.
type Earley n t = RWS.RWST (EarRd t) () (Hype n t) IO


-- | Read word from the given position of the input.
readInput :: Pos -> P.ListT (Earley n t) t
readInput i = do
    -- ask for the input
    sent <- RWS.ask
    -- just a safe way to retrieve the i-th element
    -- each $ take 1 $ drop i xs
    xs <- some $ sent V.!? i
    each $ S.toList xs


--------------------------------------------------
-- Hypergraph stats
--------------------------------------------------


-- | Number of nodes in the parsing hypergraph.
hyperNodesNum :: Hype n t -> Int
hyperNodesNum e
    = length (listPassive e)
    + length (listActive e)


-- | Number of edges in the parsing hypergraph.
hyperEdgesNum :: forall n t. Hype n t -> Int
hyperEdgesNum earSt
    = sumOver listPassive
    + sumOver listActive
  where
    sumOver :: (Hype n t -> [(a, S.Set (Trav n t))]) -> Int
    sumOver listIt = sum
        [ S.size travSet
        | (_, travSet) <- listIt earSt ]


-- | Extract hypergraph (hyper)edges.
hyperEdges :: Hype n t -> [(Item n t, Trav n t)]
hyperEdges earSt =
    passiveEdges ++ activeEdges
  where
    passiveEdges =
        [ (ItemP p, trav)
        | (p, travSet) <- listPassive earSt
        , trav <- S.toList travSet ]
    activeEdges =
        [ (ItemA p, trav)
        | (p, travSet) <- listActive earSt
        , trav <- S.toList travSet ]


-- | Print the hypergraph edges.
printHype :: (Show n, Show t) => Hype n t -> IO ()
printHype earSt =
    forM_ edges $ \(p, trav) ->
        printTrav p trav
  where
    edges  = sortIt (hyperEdges earSt)
    sortIt = sortBy (comparing $ prio.fst)


--------------------
-- Active items
--------------------


-- | List all active done items together with the corresponding
-- traversals.
listActive :: Hype n t -> [(Active, S.Set (Trav n t))]
listActive = (M.elems >=> M.elems >=> M.toList) . doneActive


-- | Return the corresponding set of traversals for an active item.
activeTrav
    :: (Ord n, Ord t)
    => Active -> Hype n t
    -> Maybe (S.Set (Trav n t))
activeTrav p
    = (   M.lookup (p ^. spanA ^. end)
      >=> M.lookup (p ^. state)
      >=> M.lookup p )
    . doneActive


-- | Check if the active item is not already processed.
_isProcessedA :: (Ord n, Ord t) => Active -> Hype n t -> Bool
_isProcessedA p =
    check . activeTrav p
  where
    check (Just _) = True
    check _        = False


-- | Check if the active item is not already processed.
isProcessedA :: (Ord n, Ord t) => Active -> Earley n t Bool
isProcessedA p = _isProcessedA p <$> RWS.get


-- | Mark the active item as processed (`done').
saveActive
    :: (Ord t, Ord n)
    => Active
    -> S.Set (Trav n t)
    -> Earley n t ()
saveActive p ts =
    RWS.state $ \s -> ((), s {doneActive = newDone s})
  where
    newDone st =
        M.insertWith
            ( M.unionWith
                ( M.unionWith S.union ) )
            ( p ^. spanA ^. end )
            ( M.singleton (p ^. state)
                ( M.singleton p ts ) )
            ( doneActive st )


--------------------
-- Passive items
--------------------


-- | List all passive done items together with the corresponding
-- traversals.
listPassive :: Hype n t -> [(Passive n t, S.Set (Trav n t))]
listPassive = (M.elems >=> M.toList) . donePassive


-- | Return the corresponding set of traversals for a passive item.
passiveTrav
    :: (Ord n, Ord t)
    => Passive n t -> Hype n t
    -> Maybe (S.Set (Trav n t))
passiveTrav p
    = ( M.lookup
        ( p ^. spanP ^. beg
        , nonTerm $ p ^. label
        , p ^. spanP ^. end ) >=> M.lookup p )
    . donePassive


-- | Check if the state is not already processed.
_isProcessedP :: (Ord n, Ord t) => Passive n t -> Hype n t -> Bool
_isProcessedP x =
    check . passiveTrav x
  where
    check (Just _) = True
    check _        = False


-- | Check if the passive item is not already processed.
isProcessedP :: (Ord n, Ord t) => Passive n t -> Earley n t Bool
isProcessedP p = _isProcessedP p <$> RWS.get


-- | Mark the passive item as processed (`done').
savePassive
    :: (Ord t, Ord n)
    => Passive n t
    -> S.Set (Trav n t)
    -> Earley n t ()
savePassive p ts =
    RWS.state $ \s -> ((), s {donePassive = newDone s})
  where
    newDone st =
        M.insertWith
            ( M.unionWith S.union )
            ( p ^. spanP ^. beg
            , nonTerm $ p ^. label
            , p ^. spanP ^. end )
            ( M.singleton p ts )
            ( donePassive st )


--------------------
-- Waiting Queue
--------------------


-- | Add the active item to the waiting queue.  Check first if it
-- is not already in the set of processed (`done') states.
pushActive :: (Ord t, Ord n) => Active -> Trav n t -> Earley n t ()
pushActive p t = isProcessedA p >>= \b -> if b
    then saveActive p $ S.singleton t
    else modify' $ \s -> s {waiting = newWait (waiting s)}
  where
    newWait = Q.insertWith joinPrio (ItemA p) newPrio
    newPrio = ExtPrio (prioA p) (S.singleton t)
-- pushActive p = RWS.state $ \s ->
--     let waiting' = if isProcessedA p s
--             then waiting s
--             else Q.insert (ItemA p) (prioA p) (waiting s)
--     in  ((), s {waiting = waiting'})


-- | Add the passive item to the waiting queue.  Check first if it
-- is not already in the set of processed (`done') states.
pushPassive :: (Ord t, Ord n) => Passive n t -> Trav n t -> Earley n t ()
pushPassive p t = isProcessedP p >>= \b -> if b
    then savePassive p $ S.singleton t
    else modify' $ \s -> s {waiting = newWait (waiting s)}
  where
    newWait = Q.insertWith joinPrio (ItemP p) newPrio
    newPrio = ExtPrio (prioP p) (S.singleton t)
-- -- | Add the passive item to the waiting queue.  Check first if it
-- -- is not already in the set of processed (`done') states.
-- pushPassive :: (Ord t, Ord n) => Passive n t -> Earley n t ()
-- pushPassive p = RWS.state $ \s ->
--     let waiting' = if isProcessedP p s
--             then waiting s
--             else Q.insert (ItemP p) (prioP p) (waiting s)
--     in  ((), s {waiting = waiting'})


-- | Add to the waiting queue all items induced from the given item.
pushInduced :: (Ord t, Ord n) => Active -> Trav n t -> Earley n t ()
pushInduced p t = do
    hasElems (getL state p) >>= \b -> when b
        (pushActive p t)
    P.runListT $ do
        x <- heads (getL state p)
        lift . flip pushPassive t $
            Passive x (getL spanA p)


-- | Remove a state from the queue.
popItem
    :: (Ord t, Ord n)
    => Earley n t
        (Maybe (Binding (Item n t) (ExtPrio n t)))
popItem = RWS.state $ \st -> case Q.minView (waiting st) of
    Nothing -> (Nothing, st)
    Just (b, s) -> (Just b, st {waiting = s})


---------------------------------
-- Extraction of Processed Items
---------------------------------


-- | Return all active processed items which:
-- * expect a given label,
-- * end on the given position.
expectEnd
    :: (Ord n, Ord t) => Lab n t -> Pos
    -> P.ListT (Earley n t) Active
expectEnd sym i = do
    Hype{..} <- lift RWS.get
    -- determine items which end on the given position
    doneEnd <- some $ M.lookup i doneActive
    -- determine automaton states from which the given label
    -- leaves as a body transition
    stateSet <- some $ M.lookup sym withBody
    -- pick one of the states
    stateID <- each $ S.toList stateSet
    --
    -- ALTERNATIVE: state <- each . S.toList $
    --      stateSet `S.intersection` M.keySet doneEnd
    --
    -- determine items which refer to the chosen states
    doneEndLab <- some $ M.lookup stateID doneEnd
    -- return them all!
    each $ M.keys doneEndLab


-- | Check if a passive item exists with:
-- * the given root non-terminal value (but not top-level
--   auxiliary)
-- * the given span
rootSpan
    :: Ord n => n -> (Pos, Pos)
    -> P.ListT (Earley n t) (Passive n t)
rootSpan x (i, j) = do
    Hype{..} <- lift RWS.get
    -- listValues (i, x, j) donePassive
    each $ case M.lookup (i, x, j) donePassive of
        Nothing -> []
        Just m -> M.keys m


-- -- | List all processed passive items.
-- listDone :: Done n t -> [Item n t]
-- listDone done = ($ done) $
--     M.elems >=> M.elems >=>
--     M.elems >=> S.toList


--------------------------------------------------
-- New Automaton-Based Primitives
--------------------------------------------------


-- | Follow the given terminal in the underlying automaton.
followTerm :: (Ord n, Ord t) => ID -> t -> P.ListT (Earley n t) ID
followTerm i c = do
    -- get the underlying automaton
    auto <- RWS.gets automat
    -- follow the label
    some $ A.follow auto i (A.Body $ Term c)


-- | Follow the given body transition in the underlying automaton.
-- It represents the transition function of the automaton.
--
-- TODO: merge with `followTerm`.
follow :: (Ord n, Ord t) => ID -> Lab n t -> P.ListT (Earley n t) ID
follow i x = do
    -- get the underlying automaton
    auto <- RWS.gets automat
    -- follow the label
    some $ A.follow auto i (A.Body x)


-- | Rule heads outgoing from the given automaton state.
heads :: ID -> P.ListT (Earley n t) (Lab n t)
heads i = do
    auto <- RWS.gets automat
    let mayHead (x, _) = case x of
            A.Body _  -> Nothing
            A.Head y -> Just y
    each $ mapMaybe mayHead $ A.edges auto i


-- -- | Rule body elements outgoing from the given automaton state.
-- elems :: ID -> P.ListT (Earley n t) (Lab n t)
-- elems i = do
--     auto <- RWS.gets automat
--     let mayBody (x, _) = case x of
--             A.Body y  -> Just y
--             A.Head _ -> Nothing
--     each $ mapMaybe mayBody $ A.edges auto i


-- | Check if any element leaves the given state.
hasElems :: ID -> Earley n t Bool
hasElems i = do
    auto <- RWS.gets automat
    let mayBody (x, _) = case x of
            A.Body y  -> Just y
            A.Head _ -> Nothing
    return
        . not . null
        . mapMaybe mayBody
        $ A.edges auto i


--------------------------------------------------
-- SCAN
--------------------------------------------------


-- | Try to perform SCAN on the given active state.
tryScan :: (SOrd t, SOrd n) => Active -> Earley n t ()
tryScan p = void $ P.runListT $ do
    -- read the word immediately following the ending position of
    -- the state
    c <- readInput $ getL (spanA >>> end) p
    -- follow appropriate terminal transition outgoing from the
    -- given automaton state
    j <- followTerm (getL state p) c
    -- construct the resultant active item
    -- let q = p {state = j, end = end p + 1}
    let q = setL state j
          . modL' (spanA >>> end) (+1)
          $ p
#ifdef Debug
    -- print logging information
    lift . lift $ do
        putStr "[S]  " >> printActive p
        putStr "  :  " >> printActive q
#endif
    -- push the resulting state into the waiting queue
    lift $ pushInduced q $ Scan p c


--------------------------------------------------
-- SUBST
--------------------------------------------------


-- | Try to use the passive item `p` to complement
-- (=> substitution) other rules.
trySubst :: (SOrd t, SOrd n) => Passive n t -> Earley n t ()
trySubst p = void $ P.runListT $ do
    let pLab = getL label p
        pSpan = getL spanP p
    -- make sure that `p' represents regular rules
    guard . regular $ pSpan
    -- find active items which end where `p' begins and which
    -- expect the non-terminal provided by `p' (ID included)
    q <- expectEnd pLab (getL beg pSpan)
    -- follow the transition symbol
    j <- follow (getL state q) pLab
    -- construct the resultant state
    -- let q' = q {state = j, spanA = spanA p {end = end p}}
    let q' = setL state j
           . setL (end.spanA) (getL end pSpan)
           $ q
#ifdef Debug
    -- print logging information
    lift . lift $ do
        putStr "[U]  " >> printPassive p
        putStr "  +  " >> printActive q
        putStr "  :  " >> printActive q'
#endif
    -- push the resulting state into the waiting queue
    lift $ pushInduced q' $ Subst p q


--------------------------------------------------
-- FOOT ADJOIN
--------------------------------------------------


-- | `tryAdjoinInit p q':
-- * `p' is a completed state (regular or auxiliary)
-- * `q' not completed and expects a *real* foot
tryAdjoinInit :: (SOrd n, SOrd t) => Passive n t -> Earley n t ()
tryAdjoinInit p = void $ P.runListT $ do
    let pLab = p ^. label
        pSpan = p ^. spanP
    -- make sure that the corresponding rule is either regular or
    -- intermediate auxiliary ((<=) used as implication here)
    guard $ auxiliary pSpan <= not (topLevel pLab)
    -- find all active items which expect a foot with the given
    -- symbol and which end where `p` begins
    let foot = AuxFoot $ nonTerm pLab
    q <- expectEnd foot (getL beg pSpan)
    -- follow the foot
    j <- follow (getL state q) foot
    -- construct the resultant state
    let q' = setL state j
           . setL (spanA >>> end) (pSpan ^. end)
           . setL (spanA >>> gap) (Just
                ( pSpan ^. beg
                , pSpan ^. end ))
           $ q
#ifdef Debug
    -- print logging information
    lift . lift $ do
        putStr "[A]  " >> printPassive p
        putStr "  +  " >> printActive q
        putStr "  :  " >> printActive q'
#endif
    -- push the resulting state into the waiting queue
    lift $ pushInduced q' $ Foot q p -- -- $ nonTerm foot


--------------------------------------------------
-- INTERNAL ADJOIN
--------------------------------------------------


-- | `tryAdjoinCont p q':
-- * `p' is a completed, auxiliary state
-- * `q' not completed and expects a *dummy* foot
tryAdjoinCont :: (SOrd n, SOrd t) => Passive n t -> Earley n t ()
tryAdjoinCont p = void $ P.runListT $ do
    let pLab = p ^. label
        pSpan = p ^. spanP
    -- make sure the label is not top-level (internal spine
    -- non-terminal)
    guard . not $ topLevel pLab
    -- make sure that `p' is an auxiliary item
    guard . auxiliary $ pSpan
    -- find all rules which expect a spine non-terminal provided
    -- by `p' and which end where `p' begins
    q <- expectEnd pLab (pSpan ^. beg)
    -- follow the spine non-terminal
    j <- follow (q ^. state) pLab
    -- construct the resulting state; the span of the gap of the
    -- inner state `p' is copied to the outer state based on `q'
    let q' = setL state j
           . setL (spanA >>> end) (pSpan ^. end)
           . setL (spanA >>> gap) (pSpan ^. gap)
           $ q
#ifdef Debug
    -- logging info
    lift . lift $ do
        putStr "[B]  " >> printPassive p
        putStr "  +  " >> printActive q
        putStr "  :  " >> printActive q'
#endif
    -- push the resulting state into the waiting queue
    lift $ pushInduced q' $ Subst p q


--------------------------------------------------
-- ROOT ADJOIN
--------------------------------------------------


-- | Adjoin a fully-parsed auxiliary state `p` to a partially parsed
-- tree represented by a fully parsed rule/state `q`.
tryAdjoinTerm :: (SOrd t, SOrd n) => Passive n t -> Earley n t ()
tryAdjoinTerm q = void $ P.runListT $ do
    let qLab = q ^. label
        qSpan = q ^. spanP
    -- make sure the label is top-level
    guard $ topLevel qLab
    -- make sure that it is an auxiliary item (by definition only
    -- auxiliary states have gaps)
    (gapBeg, gapEnd) <- each $ maybeToList $ qSpan ^. gap
    -- take all passive items with a given span and a given
    -- root non-terminal (IDs irrelevant)
    p <- rootSpan (nonTerm qLab) (gapBeg, gapEnd)
    let p' = setL (spanP >>> beg) (qSpan ^. beg)
           . setL (spanP >>> end) (qSpan ^. end)
           $ p
#ifdef Debug
    lift . lift $ do
        putStr "[C]  " >> printPassive q
        putStr "  +  " >> printPassive p
        putStr "  :  " >> printPassive p'
#endif
    lift $ pushPassive p' $ Adjoin q p


--------------------------------------------------
-- Earley step
--------------------------------------------------


-- | Step of the algorithm loop.  `p' is the state popped up from
-- the queue.
step
    :: (SOrd t, SOrd n)
    => Binding (Item n t) (ExtPrio n t)
    -> Earley n t ()
step (ItemP p :-> e) = do
    mapM_ ($ p)
      [ trySubst
      , tryAdjoinInit
      , tryAdjoinCont
      , tryAdjoinTerm ]
    savePassive p $ prioTrav e
step (ItemA p :-> e) = do
    mapM_ ($ p)
      [ tryScan ]
    saveActive p $ prioTrav e


---------------------------
-- Extracting Parsed Trees
---------------------------


-- | Extract the set of parsed trees obtained on the given input
-- sentence.  Should be run on the result of the earley algorithm.
parsedTrees
    :: forall n t. (Ord n, Ord t)
    => Hype n t    -- ^ Final state of the earley parser
    -> n            -- ^ The start symbol
    -> Int          -- ^ Length of the input sentence
    -> S.Set (T.Tree n t)
parsedTrees earSt start n

    = S.fromList
    $ concatMap fromPassive
    $ finalFrom start n earSt

  where

    fromPassive :: Passive n t -> [T.Tree n t]
    fromPassive p = concat
        [ fromPassiveTrav p trav
        | travSet <- maybeToList $ passiveTrav p earSt
        , trav <- S.toList travSet ]

    fromPassiveTrav p (Scan q t) =
        [ T.Branch
            (nonTerm $ getL label p)
            (reverse $ T.Leaf t : ts)
        | ts <- fromActive q ]

--     fromPassiveTrav p (Foot q x) =
--         [ T.Branch
--             (nonTerm $ getL label p)
--             (reverse $ T.Branch x [] : ts)
--         | ts <- fromActive q ]

    fromPassiveTrav p (Foot q _p') =
        [ T.Branch
            (nonTerm $ getL label p)
            (reverse $ T.Branch (nonTerm $ p ^. label) [] : ts)
        | ts <- fromActive q ]

    fromPassiveTrav p (Subst qp qa) =
        [ T.Branch
            (nonTerm $ getL label p)
            (reverse $ t : ts)
        | ts <- fromActive qa
        , t  <- fromPassive qp ]

    fromPassiveTrav _p (Adjoin qa qm) =
        [ replaceFoot ini aux
        | aux <- fromPassive qa
        , ini <- fromPassive qm ]

    -- | Replace foot (the only non-terminal leaf) by the given
    -- initial tree.
    replaceFoot ini (T.Branch _ []) = ini
    replaceFoot ini (T.Branch x ts) = T.Branch x $ map (replaceFoot ini) ts
    replaceFoot _ t@(T.Leaf _)    = t


    fromActive  :: Active -> [[T.Tree n t]]
    fromActive p = case activeTrav p earSt of
        Nothing -> error "fromActive: unknown active item"
        Just travSet -> if S.null travSet
            then [[]]
            else concatMap
                (fromActiveTrav p)
                (S.toList travSet)

    fromActiveTrav _p (Scan q t) =
        [ T.Leaf t : ts
        | ts <- fromActive q ]

    fromActiveTrav _p (Foot q p) =
        [ T.Branch (nonTerm $ p ^. label) [] : ts
        | ts <- fromActive q ]

--     fromActiveTrav _p (Foot q x) =
--         [ T.Branch x [] : ts
--         | ts <- fromActive q ]

    fromActiveTrav _p (Subst qp qa) =
        [ t : ts
        | ts <- fromActive qa
        , t  <- fromPassive qp ]

    fromActiveTrav _p (Adjoin _ _) =
        error "parsedTrees: fromActiveTrav called on a passive item"


--------------------------------------------------
-- EARLEY
--------------------------------------------------


-- | Does the given grammar generate the given sentence?
-- Uses the `earley` algorithm under the hood.
recognize
#ifdef Debug
    :: (SOrd t, SOrd n)
#else
    :: (Ord t, Ord n)
#endif
    => FactGram n t         -- ^ The grammar (set of rules)
    -> [S.Set t]            -- ^ Input sentence
    -> IO Bool
recognize gram =
    recognizeAuto (D.fromGram gram)


-- | Does the given grammar generate the given sentence from the
-- given non-terminal symbol (i.e. from an initial tree with this
-- symbol in its root)?  Uses the `earley` algorithm under the
-- hood.
recognizeFrom
#ifdef Debug
    :: (SOrd t, SOrd n)
#else
    :: (Ord t, Ord n)
#endif
    => FactGram n t         -- ^ The grammar (set of rules)
    -> n                    -- ^ The start symbol
    -> [S.Set t]            -- ^ Input sentence
    -> IO Bool
recognizeFrom gram =
    recognizeFromAuto (D.fromGram gram)


-- | Parse the given sentence and return the set of parsed trees.
parse
#ifdef Debug
    :: (SOrd t, SOrd n)
#else
    :: (Ord t, Ord n)
#endif
    => FactGram n t         -- ^ The grammar (set of rules)
    -> n                    -- ^ The start symbol
    -> [S.Set t]            -- ^ Input sentence
    -> IO (S.Set (T.Tree n t))
parse gram = parseAuto $ D.fromGram gram


-- | Perform the earley-style computation given the grammar and
-- the input sentence.
earley
#ifdef Debug
    :: (SOrd t, SOrd n)
#else
    :: (Ord t, Ord n)
#endif
    => FactGram n t         -- ^ The grammar (set of rules)
    -> [S.Set t]            -- ^ Input sentence
    -> IO (Hype n t)
earley gram = earleyAuto $ D.fromGram gram


--------------------------------------------------
-- Parsing with automaton
--------------------------------------------------


-- | See `recognize`.
recognizeAuto
#ifdef Debug
    :: (SOrd t, SOrd n)
#else
    :: (Ord t, Ord n)
#endif
    => A.GramAuto n t           -- ^ Grammar automaton
    -> [S.Set t]            -- ^ Input sentence
    -> IO Bool
recognizeAuto auto xs =
    isRecognized xs <$> earleyAuto auto xs


-- | See `recognizeFrom`.
recognizeFromAuto
#ifdef Debug
    :: (SOrd t, SOrd n)
#else
    :: (Ord t, Ord n)
#endif
    => A.GramAuto n t       -- ^ Grammar automaton
    -> n                    -- ^ The start symbol
    -> [S.Set t]            -- ^ Input sentence
    -> IO Bool
recognizeFromAuto auto start xs = do
    earSt <- earleyAuto auto xs
    return $ (not.null) (finalFrom start (length xs) earSt)


-- | See `parse`.
parseAuto
#ifdef Debug
    :: (SOrd t, SOrd n)
#else
    :: (Ord t, Ord n)
#endif
    => A.GramAuto n t           -- ^ Grammar automaton
    -> n                    -- ^ The start symbol
    -> [S.Set t]            -- ^ Input sentence
    -> IO (S.Set (T.Tree n t))
parseAuto auto start xs = do
    earSt <- earleyAuto auto xs
    return $ parsedTrees earSt start (length xs)


-- | See `earley`.
earleyAuto
#ifdef Debug
    :: (SOrd t, SOrd n)
#else
    :: (Ord t, Ord n)
#endif
    => A.GramAuto n t           -- ^ Grammar automaton
    -> [S.Set t]            -- ^ Input sentence
    -> IO (Hype n t)
earleyAuto dawg xs =
    fst <$> RWS.execRWST loop (V.fromList xs) st0
  where
    -- we put in the initial state all the states with the dot on
    -- the left of the body of the rule (-> left = []) on all
    -- positions of the input sentence.
    st0 = mkHype dawg $ S.fromList
        [ Active root Span
            { _beg   = i
            , _end   = i
            , _gap   = Nothing }
        | i <- [0 .. length xs - 1]
        , root <- S.toList (A.roots dawg) ]
    -- the computation is performed as long as the waiting queue
    -- is non-empty.
    loop = popItem >>= \mp -> case mp of
        Nothing -> return ()
        Just p  -> step p >> loop


--------------------------------------------------
-- New utilities
--------------------------------------------------


-- | Return the list of final passive chart items.
finalFrom
    :: (Ord n, Eq t)
    => n            -- ^ The start symbol
    -> Int          -- ^ The length of the input sentence
    -> Hype n t    -- ^ Result of the earley computation
    -> [Passive n t]
finalFrom start n Hype{..} =
    case M.lookup (0, start, n) donePassive of
        Nothing -> []
        Just m ->
            [ p
            | p <- M.keys m
            , p ^. label == NonT start Nothing ]


-- -- | Return the list of final passive chart items.
-- final
--     :: (Ord n, Eq t)
--     -> Int          -- ^ The length of the input sentence
--     -> Hype n t    -- ^ Result of the earley computation
--     -> [Passive n t]
-- final start n Hype{..} =
--     case M.lookup (0, start, n) donePassive of
--         Nothing -> []
--         Just m ->
--             [ p
--             | p <- M.keys m
--             , p ^. label == NonT start Nothing ]


-- | Check whether the given sentence is recognized
-- based on the resulting state of the earley parser.
--
-- TODO: The function returns `True` also when a subtree
-- of an elementary tree is recognized, it seems.
isRecognized
    :: (SOrd t, SOrd n)
    => [S.Set t]            -- ^ Input sentence
    -> Hype n t            -- ^ Earley parsing result
    -> Bool
isRecognized xs Hype{..} =
    (not . null)
    (complete
        (agregate donePassive))
  where
    n = length xs
    complete done =
        [ True | item <- S.toList done
        , item ^. spanP ^. beg == 0
        , item ^. spanP ^. end == n
        , isNothing (item ^. spanP ^. gap) ]
    agregate = S.unions . map M.keysSet . M.elems


--------------------------------------------------
-- Utilities
--------------------------------------------------


-- | Strict modify (in mtl starting from version 2.2).
modify' :: RWS.MonadState s m => (s -> s) -> m ()
modify' f = do
  x <- RWS.get
  RWS.put $! f x


-- -- | MaybeT transformer.
-- maybeT :: Monad m => Maybe a -> MaybeT m a
-- maybeT = MaybeT . return


-- | ListT from a list.
each :: Monad m => [a] -> P.ListT m a
each = P.Select . P.each


-- | ListT from a maybe.
some :: Monad m => Maybe a -> P.ListT m a
some = each . maybeToList


-- | Is the rule with the given head top-level?
topLevel :: Lab n t -> Bool
topLevel x = case x of
    NonT{..}  -> isNothing labID
    AuxRoot{} -> True
    _         -> False


-- -- | Pipe all values from the set corresponding to the given key.
-- listValues
--     :: (Monad m, Ord a)
--     => a -> M.Map a (S.Set b)
--     -> P.ListT m b
-- listValues x m = each $ case M.lookup x m of
--     Nothing -> []
--     Just s -> S.toList s