{-# LANGUAGE DeriveDataTypeable #-}

-- | The underlying structure for a Trie. 
--
-- This is an internal module, but is exposed for now because I'm not
-- sure how to best generically target features such as structural 
-- diffs. And we might eventually benefit from zippers, etc. After 
-- experimenting and learning, I ask you to push the best generic 
-- code into the vcache-trie package (i.e. send a pull request).
module Data.VCache.Trie.Type
    ( Trie(..)
    , Node(..)
    , Children
    , Child
    , unsafeTrieAddr
    ) where

import Control.Applicative
import Data.Word
import qualified Data.Array.IArray as A
import Data.ByteString (ByteString)
import Data.Typeable
import Data.Maybe

import Database.VCache
-- Thoughts:
--
-- I think we shouldn't encode too much information into any one
-- node, so I'm aiming for:
--
--   encode at most one extended prefix per node
--   encode at most one value per node
--   encode at most 256 references to child nodes
--
-- Children are serialized as a simple [(Word8, VRef Node)] list.
-- The high branching factor favors sparse, flat tries, which is
-- useful because VCache taxes deep lookups more than looking up
-- larger values. 
--


-- | A node should either accept a value or branch into at least two
-- children. 
data Node a = Node
    { trie_branch :: {-# UNPACK #-} !(Children a) -- arity 256; one byte from prefix
    , trie_prefix :: {-# UNPACK #-} !ByteString   -- compact extended prefix
    , trie_accept :: !(Maybe a)                   -- value associated with prefix
    } deriving (Eq, Typeable)
-- Invariant for nodes: either we accept or we have at least two children

type Children a = A.Array Word8 (Child a)
type Child a = Maybe (VRef (Node a))


-- | A trie data structure with bytestring keys, above VCache.
-- 
-- A trie supports keys of arbitrary size, though very large keys may
-- cause performance degradation. Values are directly serialized into
-- nodes, so very large values should use indirection.
-- 
data Trie a = Trie 
    { trie_root  :: !(Child a)
    , trie_space :: !VSpace
    } deriving (Eq, Typeable) 

instance (VCacheable a) => VCacheable (Node a) where
    get = Node <$> getChildren <*> get <*> get
    put (Node c p v) = putChildren c >> put p >> put v
instance (VCacheable a) => VCacheable (Trie a) where
    get = Trie <$> get <*> getVSpace
    put = put . trie_root
instance Show (Trie a) where
    showsPrec _ t = showString "Trie#" . shows (unsafeTrieAddr t)

-- | Obtain unique address for Trie value. As with VRef addresses, this
-- should be stable while the Trie is reachable, but may change if the
-- value is GC'd and later reconstructed at a new address. Exposed for
-- memoization and similar purposes. 
unsafeTrieAddr :: Trie a -> Word64
unsafeTrieAddr = maybe 0 unsafeVRefAddr . trie_root
{-# INLINE unsafeTrieAddr #-}

mkChildren :: [(Word8, VRef (Node a))] -> Children a
mkChildren = A.accumArray ins Nothing (minBound, maxBound) where
    ins _ c = Just c

getChildren :: (VCacheable a) => VGet (Children a)
getChildren = mkChildren <$> get -- get a list of pairs

listChildren :: Children a -> [(Word8, VRef (Node a))]
listChildren = mapMaybe toChild . A.assocs where
    toChild (_, Nothing) = Nothing
    toChild (ix, Just c) = Just (ix, c)

putChildren :: (VCacheable a) => Children a -> VPut ()
putChildren = put . listChildren