{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE FlexibleContexts #-}


-- | Prefix tree grammar representation: the set of rules is stored
-- in a form of a prefix tree.


module NLP.Partage.Auto.Trie
(
-- -- * Trie
--   Trie
-- , empty
-- , insert
-- , fromLang
--
-- -- * From grammar
-- , buildTrie
--
-- -- * Interface
-- , shell
  fromGram
) where


import qualified Control.Arrow as Arr
import           Control.Applicative ((<$>), (<*>), pure)
import qualified Control.Monad.State.Strict as E

import           Data.Maybe (fromMaybe)
import           Data.List (foldl')
import qualified Data.Set as S
import qualified Data.Map.Strict as M

import           Data.DAWG.Ord (ID)

import qualified NLP.Partage.Auto as A
import           NLP.Partage.FactGram (FactGram, Lab(..), Rule(..))


--------------------------------------------------
-- Trie
--------------------------------------------------


-- | Simple trie implementation.
newtype Trie a = Trie { _unTrie :: M.Map a (Trie a) }


-- | Empty trie.
empty :: Trie a
empty = Trie M.empty


-- | Insert new element into the trie.
insert :: Ord a => [a] -> Trie a -> Trie a
insert (x:xs) (Trie t) =
    let s = fromMaybe empty (M.lookup x t)
    in  Trie $ M.insert x (insert xs s) t
insert [] t = t


-- | Build trie from language.
fromLang :: Ord a => [[a]] -> Trie a
fromLang = foldl' (flip insert) empty


--------------------------------------------------
-- Trie from grammar
--------------------------------------------------


-- | Build trie from the given grammar.
buildTrie :: (Ord n, Ord t) => FactGram n t -> Trie (A.Edge (Lab n t))
buildTrie gram = fromLang
    [ map A.Body bodyR ++ [A.Head headR]
    | Rule{..} <- S.toList gram ]


--------------------------------------------------
-- Interface
--------------------------------------------------


-- | Abstract over the concrete implementation.
shell :: (Ord n, Ord t) => Trie (A.Edge (Lab n t)) -> A.GramAuto n t
shell d0 = A.Auto
    { roots  = S.singleton (rootID d)
    , follow = follow d
    , edges  = edges d }
    where d = convert d0


-- | Build the trie-based representation of the given grammar.
fromGram :: (Ord n, Ord t) => FactGram n t -> A.GramAuto n t
fromGram = shell . buildTrie


-- | Node type.
type Node a = M.Map a ID


-- | Alternative trie represetation with explicit node identifiers.
data ITrie a = ITrie
    { rootID    :: ID
    -- ^ Root of the trie
    , nodeMap   :: M.Map ID (Node a)
    }


-- | Follow symbol from the given node.
follow :: Ord a => ITrie a -> ID -> a -> Maybe ID
follow ITrie{..} i x = do
    node <- M.lookup i nodeMap
    M.lookup x node


-- | All edges outgoing from the given node ID.
edges :: ITrie a -> ID -> [(a, ID)]
edges ITrie{..} i = case M.lookup i nodeMap of
    Nothing -> []
    Just m  -> M.toList m


-- | Convert `Trie` to `ITrie`.
convert :: Ord a => Trie a -> ITrie a
convert t0 = ITrie
    { rootID  = root
    , nodeMap = nodeMap' }
  where
    (root, (_, nodeMap')) = E.runState (doit t0) (0 :: Int, M.empty)
    doit (Trie t) = do
        i <- newID
        node <- M.fromList <$> sequence
            [ (,) <$> pure x <*> doit s
            | (x, s) <- M.toList t ]
        yield i node
        return i
    newID = E.state $ \(n, m) -> (n, (n + 1, m))
    yield i node = E.modify $ Arr.second (M.insert i node)