{-# LANGUAGE RecordWildCards #-}

-- | Internal representation of automata nodes.

module Data.DAWG.Node
(
-- * Basic types
  ID
, Sym
-- * Node
, Node (..)
, onSym
, trans
, edges
, subst
, toGeneric
-- * Edge
, Edge
, to
, label
, annotate
, labeled
) where

import Control.Applicative ((<$>), (<*>))
import Control.Arrow (second)
import Data.Binary (Binary, Get, put, get)
import Data.Vector.Unboxed (Unbox)

import qualified Data.DAWG.VMap as M
import qualified Data.DAWG.Node.Specialized as N

-- | Node identifier.
type ID = Int

-- | Internal representation of the transition symbol.
type Sym = Int

-- | Edge with label.
type Edge a = (ID, a)

-- | Destination ID.
to :: Edge a -> ID
to = fst
{-# INLINE to #-}

-- | Edge label.
label :: Edge a -> a
label = snd
{-# INLINE label #-}

-- | Annotate edge wit a new label.
annotate :: a -> Edge b -> Edge a
annotate x (i, _) = (i, x)
{-# INLINE annotate #-}

-- | Construct edge annotated with a given label.
labeled :: a -> ID -> Edge a
labeled x i = (i, x)
{-# INLINE labeled #-}

-- | Two nodes (states) belong to the same equivalence class (and,
-- consequently, they must be represented as one node in the graph)
-- iff they are equal with respect to their values and outgoing
-- edges.
--
-- Since 'Leaf' nodes are distinguished from 'Branch' nodes, two values
-- equal with respect to '==' function are always kept in one 'Leaf'
-- node in the graph.  It doesn't change the fact that to all 'Branch'
-- nodes one value is assigned through the epsilon transition.
--
-- Invariant: the 'eps' identifier always points to the 'Leaf' node.
-- Edges in the 'edgeMap', on the other hand, point to 'Branch' nodes.
data Node a b 
    = Branch {
        -- | Epsilon transition.
          eps       :: {-# UNPACK #-} !ID
        -- | Labeled edges outgoing from the node.
        , edgeMap   :: !(M.VMap b) }
    | Leaf { value  :: !a }
    deriving (Show, Eq, Ord)

instance (Unbox b, Binary a, Binary b) => Binary (Node a b) where
    put Branch{..} = put (1 :: Int) >> put eps >> put edgeMap
    put Leaf{..}   = put (2 :: Int) >> put value
    get = do
        x <- get :: Get Int
        case x of
            1 -> Branch <$> get <*> get
            _ -> Leaf <$> get

-- | Transition function.
onSym :: Unbox b => Sym -> Node a b -> Maybe b
onSym x (Branch _ es)   = M.lookup x es
onSym _ (Leaf _)        = Nothing
{-# INLINE onSym #-}

-- | List of symbol/edge pairs outgoing from the node.
trans :: Unbox b => Node a b -> [(Sym, b)]
trans (Branch _ es)     = M.toList es
trans (Leaf _)          = []
{-# INLINE trans #-}

-- | List of outgoing edges.
edges :: Unbox b => Node a b -> [b]
edges = map snd . trans
{-# INLINE edges #-}

-- | Substitue edge determined by a given symbol.
subst :: Unbox b => Sym -> b -> Node a b -> Node a b
subst x e (Branch w es) = Branch w (M.insert x e es)
subst _ _ l             = l
{-# INLINE subst #-}

-- | Yield generic version of a specialized node.
toGeneric :: N.Node a -> Node a (Edge ())
toGeneric N.Leaf{..}    = Leaf value
toGeneric N.Branch{..}  = Branch eps (annEdges edgeMap) where
    annEdges = M.fromList . map annEdge . M.toList
    annEdge = second (labeled ())