{-# LANGUAGE RecordWildCards #-}

-- | Internal representation of the "Data.DAWG" automaton.  Names in this
-- module correspond to a graphical representation of automaton: nodes refer
-- to states and edges refer to transitions.

module Data.DAWG.Internal
( 
-- * Node
  Node (..)
, Id
, edges
, onSym
, subst
-- * Graph
, Graph (..)
, empty
, size
, nodeBy
, nodeID
, insert
, delete
) where

import Control.Applicative ((<$>), (<*>))
import Data.Binary (Binary, Get, put, get)
import qualified Data.Map as M
import qualified Data.IntSet as IS
import qualified Data.IntMap as IM

import qualified Data.DAWG.VMap as V

-- | Node identifier.
type Id = Int

-- | Two nodes (states) belong to the same equivalence class (and,
-- consequently, they must be represented as one node in the graph)
-- iff they are equal with respect to their values and outgoing
-- edges.
--
-- Since 'Value' nodes are distinguished from 'Branch' nodes, two values
-- equal with respect to '==' function are always kept in one 'Value'
-- node in the graph.  It doesn't change the fact that to all 'Branch'
-- nodes one value is assigned through the epsilon transition.
--
-- Invariant: the 'value' identifier always points to the 'Value' node.
-- Edges in the 'edgeMap', on the other hand, point to 'Branch' nodes.
data Node a
    = Branch {
        -- | Epsilon transition.
          eps       :: {-# UNPACK #-} !Id
        -- | Map from alphabet symbols to 'Branch' node identifiers.
        , edgeMap   :: !(V.VMap Id) }
    | Value
        { unValue :: !a }
    deriving (Show, Eq, Ord)

instance Functor Node where
    fmap f (Value x) = Value (f x)
    fmap _ (Branch x y) = Branch x y

instance Binary a => Binary (Node a) where
    put Branch{..} = put (1 :: Int) >> put eps >> put edgeMap
    put Value{..}  = put (2 :: Int) >> put unValue
    get = do
        x <- get :: Get Int
        case x of
            1 -> Branch <$> get <*> get
            _ -> Value <$> get

-- | List of non-epsilon edges outgoing from the 'Branch' node.
edges :: Node a -> [(Int, Id)]
edges (Branch _ es)     = V.toList es
edges (Value _)         = error "edges: value node"

-- | Identifier of the child determined by the given symbol.
onSym :: Int -> Node a -> Maybe Id
onSym x (Branch _ es) = V.lookup x es
onSym _ (Value _)     = error "onSym: value node"

-- | Substitue the identifier of the child determined by the given symbol.
subst :: Int -> Id -> Node a -> Node a
subst x i (Branch w es) = Branch w (V.insert x i es)
subst _ _ (Value _)     = error "subst: value node"

-- | A set of nodes.  To every node a unique identifier is assigned.
-- Invariants: 
--
--   * freeIDs \\intersection occupiedIDs = \\emptySet,
--
--   * freeIDs \\sum occupiedIDs =
--     {0, 1, ..., |freeIDs \\sum occupiedIDs| - 1},
--
-- where occupiedIDs = elemSet idMap.
--
-- TODO: Is it possible to merge 'freeIDs' with 'ingoMap' to reduce
-- the memory footprint?
data Graph a = Graph {
    -- | Map from nodes to IDs.
      idMap     :: !(M.Map (Node a) Id)
    -- | Set of free IDs.
    , freeIDs   :: !IS.IntSet
    -- | Map from IDs to nodes. 
    , nodeMap   :: !(IM.IntMap (Node a))
    -- | Number of ingoing paths (different paths from the root
    -- to the given node) for each node ID in the graph.
    -- The number of ingoing paths can be also interpreted as
    -- a number of occurences of the node in a tree representation
    -- of the graph.
    , ingoMap   :: !(IM.IntMap Int) }
    deriving (Show, Eq, Ord)

instance (Ord a, Binary a) => Binary (Graph a) where
    put Graph{..} = do
    	put idMap
	put freeIDs
	put nodeMap
	put ingoMap
    get = Graph <$> get <*> get <*> get <*> get

-- | Empty graph.
empty :: Graph a
empty = Graph M.empty IS.empty IM.empty IM.empty

-- | Size of the graph (number of nodes).
size :: Graph a -> Int
size = M.size . idMap

-- | Node with the given identifier.
nodeBy :: Id -> Graph a -> Node a
nodeBy i g = nodeMap g IM.! i

-- | Retrieve the node identifier.
nodeID :: Ord a => Node a -> Graph a -> Id
nodeID n g = idMap g M.! n

-- | Add new graph node.
newNode :: Ord a => Node a -> Graph a -> (Id, Graph a)
newNode n Graph{..} =
    (i, Graph idMap' freeIDs' nodeMap' ingoMap')
  where
    idMap'      = M.insert  n i idMap
    nodeMap'    = IM.insert i n nodeMap
    ingoMap'    = IM.insert i 1 ingoMap
    (i, freeIDs') = if IS.null freeIDs
        then (M.size idMap, freeIDs)
        else IS.deleteFindMin freeIDs

-- | Remove node from the graph.
remNode :: Ord a => Id -> Graph a -> Graph a
remNode i Graph{..} =
    Graph idMap' freeIDs' nodeMap' ingoMap'
  where
    idMap'      = M.delete  n idMap
    nodeMap'    = IM.delete i nodeMap
    ingoMap'    = IM.delete i ingoMap
    freeIDs'    = IS.insert i freeIDs
    n           = nodeMap IM.! i

-- | Increment the number of ingoing paths.
incIngo :: Id -> Graph a -> Graph a
incIngo i g = g { ingoMap = IM.insertWith' (+) i 1 (ingoMap g) }

-- | Decrement the number of ingoing paths and return
-- the resulting number.
decIngo :: Id -> Graph a -> (Int, Graph a)
decIngo i g =
    let k = (ingoMap g IM.! i) - 1
    in  (k, g { ingoMap = IM.insert i k (ingoMap g) })

-- | Insert node into the graph.  If the node was already a member
-- of the graph, just increase the number of ingoing paths.
-- NOTE: Number of ingoing paths will not be changed for any descendants
-- of the node, so the operation alone will not ensure that properties
-- of the graph are preserved.
insert :: Ord a => Node a -> Graph a -> (Id, Graph a)
insert n g = case M.lookup n (idMap g) of
    Just i  -> (i, incIngo i g)
    Nothing -> newNode n g

-- | Delete node from the graph.  If the node was present in the graph
-- at multiple positions, just decrease the number of ingoing paths.
-- NOTE: The function does not delete descendant nodes which may become
-- inaccesible nor does it change the number of ingoing paths for any
-- descendant of the node.
delete :: Ord a => Node a -> Graph a -> Graph a
delete n g = if num == 0
    then remNode i g'
    else g'
  where
    i = nodeID n g
    (num, g') = decIngo i g