{-# LANGUAGE RecordWildCards #-}


-- | A version in which a separate automaton is built (according to
-- the underlying building function) for each distinct rule head
-- symbol, i.e. the set of rules is first partitioned w.r.t. the
-- heads of the individual rules, and then for each partition a
-- separate automaton is built.


module NLP.Partage.Auto.Set
(
-- -- AutoSet
--   AutoSet
-- , buildAutoSet
--
-- -- * Interface
-- , shell
  fromGram
) where


import           Control.Applicative ((<$>))
import           Control.Monad (forM)
import qualified Control.Monad.State.Strict as E
-- -- import           Control.Monad.Trans.Class (lift)

import           Data.List (foldl')
import           Data.Maybe (maybeToList)
import qualified Data.Set                   as S
import qualified Data.Map.Strict            as M

import           Data.DAWG.Ord (ID)

import           NLP.Partage.FactGram
    ( FactGram, Lab(..), Rule(..) )


import qualified NLP.Partage.Auto as A


--------------------------------------------------
-- Interface
--------------------------------------------------


-- | Abstract over the concrete implementation of automaton.
shell :: (Ord a) => AutoSet a -> A.Auto a
shell AutoSet{..} = A.Auto
    { roots  = S.fromList
             . map unExID
             . S.toList $ rootSet
             -- we could note in the specification of the
             -- `Mini.Auto` that it doesn't have to be very
             -- efficient because it is run only once per
             -- parsing session
    , follow = \e x -> do
        (autoID, i) <- M.lookup (ExID e) fromExID
        auto        <- M.lookup autoID autoMap
        j           <- A.follow auto i x
        unExID     <$> M.lookup (autoID, j) toExID
    , edges  = \e -> do
        let mtl = maybeToList
        (autoID, i) <- mtl $ M.lookup (ExID e) fromExID
        auto        <- mtl $ M.lookup autoID autoMap
        (x, j)      <- A.edges auto i
        e'          <- mtl $ M.lookup (autoID, j) toExID
        return (x, unExID e')
    }


-- | Build the set of automata from the given grammar.
fromGram
    :: (Ord n, Ord t)
    => (FactGram n t -> A.GramAuto n t)
        -- ^ The underlying automaton construction method
    -> FactGram n t
        -- ^ The grammar to compress
    -> A.GramAuto n t
fromGram mkOne = shell . buildAutoSet mkOne


--------------------------------------------------
-- Implementation
--------------------------------------------------


-- | An external identifier (in contrast to internal identifiers
-- which are local to individual component automata).
newtype ExID = ExID { unExID :: ID }
    deriving (Show, Eq, Ord)


-- | An automaton identifier.
newtype AutoID = AutoID { _unAutoID :: ID }
    deriving (Show, Eq, Ord)


-- | An ensemble of automata.
data AutoSet a = AutoSet
    { autoMap   :: M.Map AutoID (A.Auto a)
    -- ^ individual automata and their identifiers
    , rootSet   :: S.Set ExID
    -- ^ A set of roots of the ensemble
    , fromExID  :: M.Map ExID (AutoID, ID)
    -- ^ Map external IDs to internal ones
    , toExID    :: M.Map (AutoID, ID) ExID
    -- ^ Reverse of `fromEx`
    }


-- | An empty `AutoSet`.
emptyAS :: AutoSet a
emptyAS = AutoSet M.empty S.empty M.empty M.empty


-- | Assuming that two `AutoSet`s are disjoint (i.e. they have
-- disjoint sets of `AutoID`s and disjoin sets of `ExID`s), we can
-- union them easily.
unionAS :: AutoSet a -> AutoSet a -> AutoSet a
unionAS p q = AutoSet
    { autoMap   = M.union (autoMap p) (autoMap q)
    , rootSet   = S.union (rootSet p) (rootSet q)
    , fromExID  = M.union (fromExID p) (fromExID q)
    , toExID    = M.union (toExID p) (toExID q) }


-- | Union a list of `AutoSet`s.
unionsAS :: [AutoSet a] -> AutoSet a
unionsAS = foldl' unionAS emptyAS


-- | Build automata from the given grammar.
buildAutoSet
    :: (Ord n, Ord t)
    => (FactGram n t -> A.GramAuto n t)
        -- ^ The underlying automaton construction method
    -> FactGram n t
        -- ^ The grammar to compress
    -> AutoSet (A.Edge (Lab n t))
buildAutoSet mkOne gram = runM $
    unionsAS <$> sequence
        [ mkAutoSet
            (AutoID autoID)
            (mkOne ruleSet)
        | (autoID, ruleSet)
            <- zip [0..] (M.elems gramByHead) ]
  where
    -- grammar divided by rule heads
    gramByHead = M.fromListWith S.union
        [ (headR r, S.singleton r)
        | r <- S.toList gram ]
    -- build a single automatom
    mkAutoSet autoID auto = do
        rootMap <- mkNodeMap (A.roots auto)
        descMap <- mkNodeMap (A.allIDs auto S.\\ A.roots auto)
        let nodeMap = rootMap `M.union` descMap
        return AutoSet
            { autoMap   = M.singleton autoID auto
            , rootSet   = M.keysSet rootMap
            , fromExID  = nodeMap
            , toExID    = rev1to1 nodeMap }
      where
        mkNodeMap inNodeSet = fmap M.fromList $
            forM (S.toList inNodeSet) $ \i -> do
                e <- newExID
                return (e, (autoID, i))
    -- low-level monad-related functions
    runM = flip E.evalState (0 :: Int)
    newExID = E.state $ \k -> (ExID k, k + 1)
    -- reverse bijection
    rev1to1 = M.fromList . map swap . M.toList
    swap (x, y) = (y, x)