{-# LANGUAGE GADTs #-}
-- |
-- Module    : Data.StableTree.Types
-- Copyright : Jeremy Groven
-- License   : BSD3
--
-- This is the core implementation of the stable tree. The primary functions
-- exported by this module are 'nextBottom' and 'nextBranch', which gather
-- values or lower-level 'Tree's into 'Tree's of the next level.
--
-- This module is fairly esoteric. "Data.StableTree" or "Data.StableTree.IO"
-- are probably what you actually want to be using.
module Data.StableTree.Types
( IsKey(..)
, Tree(..)
, Complete
, Incomplete
, Depth
, ValueCount
, nextBottom
, nextBranch
, getKey
, completeKey
, treeContents
, branchContents
, getDepth
, getValueCount
) where

import Data.StableTree.Types.Key

import qualified Data.Map as Map
import Control.Arrow ( first, second )
import Data.Map ( Map )
import Data.List ( intercalate )

-- |Used to indicate that a 'Tree' is not complete
data Incomplete 

-- |Used to indicate that a 'Tree' is complete
data Complete   

-- |Alias to indicate how deep a branch in a tree is. Bottoms have depth 0
type Depth = Int

-- |Alias that indicates the total number of values underneath a tree
type ValueCount = Int

-- |The actual Rose Tree structure. StableTree is built on one main idea: every
-- 'Key' is either 'Terminal' or 'Nonterminal'. A complete 'Tree' is one whose
-- final element's Key is terminal, and the rest of the Keys are not (exept for
-- two freebies at the beginning to guarantee convergence). A complete tree
-- always has complete children.
--
-- If we don't have enough data to generate a complete tree (i.e. we ran out of
-- elements before hitting a terminal key), then an 'Incomplete' tree is
-- generated. Incomplete trees are always contained by other incomplete trees,
-- and a tree built from only the complete chlidren of an incomplete tree would
-- never itself be complete.
--
-- It is easiest to understand how this structure promotes stability by looking
-- at how trees typically work. The easiest tree to understand is a simple,
-- well balanced, binary tree. In that case, we would have a structure like this:
--
-- @
--       |D|
--   |B|     |F|
-- |A| |C| |E| |G|
-- @
--
-- Now, suppose that we want to delete the data stored in @|A|@. Then, we'll
-- get a new structure that shares nothing in common with the original one:
--
-- @
--       |E|
--   |C|     |G|
-- |B| |D| |F|
-- @
--
-- The entire tree had to be re-written. This structure is clearly unstable
-- under mutation. Making the tree wider doesn't help much if the tree's size
-- is changing. Simple updates to existing keys are handled well by branches
-- with many children, but deleting from or adding to the beginning of the tree
-- will always cause every single branch to change, which is what this
-- structure is trying to avoid.
--
-- Instead, the stable tree branches have variable child counts. A branch is
-- considered full when its highest key is "terminal", which is determined by
-- hashing the key and looking at some bits of the hash. I've found that a
-- target branch size of 16 children works fairly well, so we check to see if
-- the hash has its least-significant four bits set; if that's the case, the
-- key is terminal. A branch gets two free children (meaning it doesn't care
-- about whether the keys are temrinal or not), and then a run of nonterminal
-- keys, and a final, terminal key. Under this scheme, inserting a new entry
-- into a branch will probably mean inserting a nonterminal key, and it will
-- probably be inserted into the run of nonterminal children. If that's the
-- case, no neighbors will be affected, and only the parents will have to
-- change to point to the new branch. Stability is acheived!
data Tree c k v where
  Bottom :: (SomeKey k, v)
         -> (SomeKey k, v)
         -> Map (Key Nonterminal k) v
         -> (Key Terminal k, v)
         -> Tree Complete k v

  Branch :: Depth
         -> (SomeKey k, ValueCount, Tree Complete k v)
         -> (SomeKey k, ValueCount, Tree Complete k v)
         -> Map (Key Nonterminal k) (ValueCount, Tree Complete k v)
         -> (Key Terminal k, ValueCount, Tree Complete k v)
         -> Tree Complete k v

  -- Either an empty or a singleton tree
  IBottom0 :: Maybe (SomeKey k, v)
           -> Tree Incomplete k v

  -- Any number of items, but not ending with a terminal key
  IBottom1 :: (SomeKey k, v)
           -> (SomeKey k, v)
           -> Map (Key Nonterminal k) v
           -> Tree Incomplete k v

  -- A strut to lift an incomplete tree to the next level up
  IBranch0 :: Depth
           -> (SomeKey k, ValueCount, Tree Incomplete k v)
           -> Tree Incomplete k v

  -- A joining of a single complete and maybe an incomplete
  IBranch1 :: Depth
           -> (SomeKey k, ValueCount, Tree Complete k v)
           -> Maybe (SomeKey k, ValueCount, Tree Incomplete k v)
           -> Tree Incomplete k v

  -- A branch that doesn't have a terminal, and that might have an IBranch
  IBranch2 :: Depth
           -> (SomeKey k, ValueCount, Tree Complete k v)
           -> (SomeKey k, ValueCount, Tree Complete k v)
           -> Map (Key Nonterminal k) (ValueCount, Tree Complete k v)
           -> Maybe (SomeKey k, ValueCount, Tree Incomplete k v)
           -> Tree Incomplete k v

-- |Wrap up some of a k/v map into a 'Tree'. A 'Right' result gives a complete
-- tree and the map updated to not have the key/values that went into that
-- tree. A 'Left' result gives an incomplete tree that contains everything that
-- the given map contained.
nextBottom :: (Ord k, IsKey k)
           => Map k v
           -> Either (Tree Incomplete k v)
                     (Tree Complete k v, Map k v)
nextBottom values =
  case Map.minViewWithKey values >>= return . second Map.minViewWithKey of
    Nothing -> Left $ IBottom0 Nothing
    Just ((k,v), Nothing) -> Left $ IBottom0 $ Just (wrap k, v)
    Just (f1, Just (f2, remain)) ->
      go (first wrap f1) (first wrap f2) Map.empty remain

  where
  go f1 f2 accum remain =
    case Map.minViewWithKey remain of
      Nothing ->
        Left $ IBottom1 f1 f2 accum
      Just ((k, v), remain') ->
        case wrap k of
          SomeKey_N nonterm ->
            go f1 f2 (Map.insert nonterm v accum) remain'
          SomeKey_T term ->
            Right (Bottom f1 f2 accum (term, v), remain')

-- |Generate a parent for a k/Tree map. A 'Right' result gives a complete tree
-- and the map updated to not have the key/trees that went into that tree. A
-- 'Left' result gives an incomplete tree that contains everything that the
-- given map contained.
nextBranch :: (Ord k, IsKey k)
           => Map k (Tree Complete k v)
           -> Maybe (k, Tree Incomplete k v)
           -> Either (Tree Incomplete k v)
                     (Tree Complete k v, Map k (Tree Complete k v))
nextBranch branches mIncomplete =
  let freebies = Map.minViewWithKey branches
                 >>= return . second Map.minViewWithKey
  in case freebies of
    Nothing -> 
      case mIncomplete of
        Nothing       -> Left $ IBottom0 Nothing
        Just (ik, iv) -> Left $ IBranch0 depth (wrap ik, getValueCount iv, iv)
    Just ((k,v), Nothing) ->
      Left $ IBranch1 depth (wrap k, getValueCount v, v) $ wrapMKey mIncomplete
    Just (f1, Just (f2, remain)) ->
      go (wrapKey f1) (wrapKey f2) Map.empty remain

  where
  go f1 f2 accum remain =
    let popd = Map.minViewWithKey remain >>= return . first wrapKey
    in case popd of
      Nothing ->
        Left $ IBranch2 depth f1 f2 accum $ wrapMKey mIncomplete
      Just ((SomeKey_T term,c,v), remain') ->
        Right ( Branch depth f1 f2 accum (term, c, v), remain' )
      Just ((SomeKey_N nonterm,c,v), remain') ->
        go f1 f2 (Map.insert nonterm (c,v) accum) remain'

  wrapKey (k,v) = (wrap k, getValueCount v, v)

  wrapMKey = (>>=return . wrapKey)

  depth = case Map.elems branches of
    [] ->
      case mIncomplete of
        Nothing -> 1
        Just (_, v) -> 1 + getDepth v
    elems ->
      let depths@(f:r) = map getDepth elems
          (best, rest) = case mIncomplete of
                          Nothing -> (f, r)
                          Just (_, v) -> (getDepth v, depths)
      in if all (==best) rest
        then 1 + best
        else error "Depth mismatch in nextBranch"

-- |Get the key of the first entry in this branch. If the branch is empty,
-- returns Nothing.
getKey :: Tree c k v -> Maybe k
getKey (Bottom (k,_) _ _ _)       = Just $ unwrap k
getKey (IBottom0 Nothing)         = Nothing
getKey (IBottom0 (Just (k,_)))    = Just $ unwrap k
getKey (IBottom1 (k,_) _ _)       = Just $ unwrap k
getKey (Branch _ (k,_,_) _ _ _)   = Just $ unwrap k
getKey (IBranch0 _ (k,_,_))       = Just $ unwrap k
getKey (IBranch1 _ (k,_,_) _)     = Just $ unwrap k
getKey (IBranch2 _ (k,_,_) _ _ _) = Just $ unwrap k

-- |Get the key of the fist entry in this complete branch. This function is
-- total.
completeKey :: Tree Complete k v -> k
completeKey (Bottom (k,_) _ _ _)     = unwrap k
completeKey (Branch _ (k,_,_) _ _ _)   = unwrap k

-- |Convert an entire Tree into a k/v map.
treeContents :: Ord k => Tree c k v -> Map k v
treeContents t =
  case branchContents t of
    Left ( completes, Nothing) ->
      Map.unions $ map (treeContents . snd) $ Map.elems completes
    Left ( completes, Just (_k, _c, iv)) ->
      Map.unions $ treeContents iv:map (treeContents . snd) (Map.elems completes)
    Right x -> x

-- |Get the number of levels of branches that live below this one
getDepth :: Tree c k v -> Depth
getDepth (Bottom _ _ _ _)       = 0
getDepth (Branch d _ _ _ _)   = d
getDepth (IBottom0 _)           = 0
getDepth (IBottom1 _ _ _)       = 0
getDepth (IBranch0 d _)       = d
getDepth (IBranch1 d _ _)     = d
getDepth (IBranch2 d _ _ _ _) = d

-- |Get the number of actual values that live below this branch
getValueCount :: Tree c k v -> ValueCount
getValueCount (Bottom _ _ m _) = 3 + Map.size m
getValueCount (IBottom0 Nothing)     = 0
getValueCount (IBottom0 _)           = 1
getValueCount (IBottom1 _ _ m)       = 2 + Map.size m

getValueCount (Branch _ (_,c1,_) (_,c2,_) nterm (_,c3,_)) =
  c1 + c2 + c3 + sum (map fst $ Map.elems nterm)
getValueCount (IBranch0 _ (_,c,_)) =
  c
getValueCount (IBranch1 _ (_,c,_) Nothing) =
  c
getValueCount (IBranch1 _ (_,c1,_) (Just (_,c2,_))) =
  c1+c2
getValueCount (IBranch2 _ (_,c1,_) (_,c2,_) m i) =
  c1 + c2 + sum (map fst $ Map.elems m) + maybe 0 (\(_,c3,_)->c3) i

-- |Non-recursive function to simply get the immediate children of the given
-- branch. This will either give the key/value map of a Bottom, or the key/tree
-- map of a non-bottom branch.
branchContents :: Ord k
               => Tree c k v
               -> Either ( Map k (ValueCount, Tree Complete k v)
                         , Maybe (k, ValueCount, Tree Incomplete k v))
                         ( Map k v )
branchContents (Bottom (k1,v1) (k2,v2) terms (kt,vt)) =
  let terms' = Map.mapKeys fromKey terms
      conts  = Map.insert (unwrap k1) v1
             $ Map.insert (unwrap k2) v2
             $ Map.insert (fromKey kt) vt
             terms'
  in Right conts
branchContents (Branch _d (k1,c1,v1) (k2,c2,v2) terms (kt,ct,vt)) =
  let terms' = Map.mapKeys fromKey terms
      conts  = Map.insert (unwrap k1) (c1,v1)
             $ Map.insert (unwrap k2) (c2,v2)
             $ Map.insert (fromKey kt) (ct,vt)
             terms'
  in Left (conts, Nothing)
branchContents (IBottom0 Nothing) =
  Right Map.empty
branchContents (IBottom0 (Just (k,v))) =
  Right $ Map.singleton (unwrap k) v
branchContents (IBottom1 (k1,v1) (k2,v2) terms) =
  let terms' = Map.mapKeys fromKey terms
      conts  = Map.insert (unwrap k1) v1
             $ Map.insert (unwrap k2) v2
             terms'
  in Right conts
branchContents (IBranch0 _d (ik,ic,iv)) =
  Left (Map.empty, Just (unwrap ik, ic, iv))
branchContents (IBranch1 _d (k1,c1,v1) mIncomplete) =
  Left ( Map.singleton (unwrap k1) (c1,v1)
       , mIncomplete >>= (\(k,c,v) -> return (unwrap k,c,v)))
branchContents (IBranch2 _d (k1,c1,v1) (k2,c2,v2) terms mIncomplete) =
  let terms' = Map.mapKeys fromKey terms
      conts  = Map.insert (unwrap k1) (c1,v1)
             $ Map.insert (unwrap k2) (c2,v2)
             terms'
  in Left (conts, mIncomplete >>= \(k,c,v) -> return (unwrap k, c, v))

instance (Ord k, Show k, Show v) => Show (Tree c k v) where
  show t@(Bottom _ _ _ _)       = branchShow "Bottom" t
  show t@(Branch _ _ _ _ _)   = branchShow "Branch" t
  show t@(IBottom0 _)           = branchShow "IBottom" t
  show t@(IBottom1 _ _ _)       = branchShow "IBottom" t
  show t@(IBranch0 _ _)       = branchShow "IBranch" t
  show t@(IBranch1 _ _ _)     = branchShow "IBranch" t
  show t@(IBranch2 _ _ _ _ _) = branchShow "IBranch" t

branchShow :: (Ord k, Show k, Show v) => String -> Tree c k v -> String
branchShow header t =
  case branchContents t of
    Left (ts, Nothing) ->
      let strs = [show k ++ " => " ++ show v | (k, v) <- Map.toAscList ts]
          str  = intercalate ", " strs
      in header ++ "(" ++ show (getDepth t) ++ ")<" ++ str ++ ">"
    Left (ts, Just (ik, _ic, iv)) ->
      let strs = [ show k ++ " => " ++ show v | (k, v) <- Map.toAscList ts
                 ] ++ [show ik ++ " => " ++ show iv]
          str  = intercalate ", " strs
      in header ++ "(" ++ show (getDepth t) ++ ")<" ++ str ++ ">"
    Right vals ->
      let strs = [ show k ++ " => " ++ show v | (k, v) <- Map.toAscList vals ]
          str  = intercalate ", " strs
      in header ++ "(" ++ show (getDepth t) ++ ")<" ++ str ++ ">"