{-# LANGUAGE ScopedTypeVariables, FlexibleContexts, Unsafe #-}

module Data.Columbia.Dictionaries (
-- | ** Conversions to and from 'LazyMap's
lazyMapToMap, mapToLazyMap,
-- | ** Rudimentary manipulation operations
lazyMapToAscList, lazyMapSize, lazyMapRange, lazyMapOpenRange, lazyMapMinKey, lazyMapMaxKey, lazyMapMerge,
-- | ** Seeking to and reading a sub-range
seekInRange) where

import Data.Word
import Data.Typeable hiding (Proxy)
import qualified Data.Map as M
import Data.Map (Map)
import Data.List
import Control.Monad.Reader
import Data.Generics.SYB.WithClass.Basics
import Data.Columbia.Internal.SeekableStream
import Data.Columbia.Internal.IntegralTypes
import Data.Columbia.Internal.Headers
import Data.Columbia.Internal.Orphans
import Data.Columbia.CompoundData
import Data.Columbia.WithAddress
import Data.Columbia.FRecord

import Unsafe.Coerce

-- | A data type that is representation-equivalent to the 'Map' data type.
data StrictMap k v = BinPrimePrime !Int !k v !(StrictMap k v) !(StrictMap k v) | TipPrimePrime

unsafeStrictMapToMap :: StrictMap k v -> Map k v
unsafeStrictMapToMap = unsafeCoerce

lazyMapToStrictMap :: LazyMap k v -> StrictMap k v
lazyMapToStrictMap (BinPrime sz k v bin bin2) = BinPrimePrime sz k v(lazyMapToStrictMap(copoint bin)) (lazyMapToStrictMap(copoint bin2))
lazyMapToStrictMap TipPrime = TipPrimePrime

-- | It forces a lazy dictionary into a dictionary. This takes O(n) time as opposed to the
--   O(n log n) that it would take to build the 'Map' via its interface. It is
--   still very wrong and this conversion should only be used as a step in reading
--   dictionaries from a stream representation.
lazyMapToMap :: LazyMap k v -> Map k v
lazyMapToMap = unsafeStrictMapToMap. lazyMapToStrictMap

unsafeMapToStrictMap :: Map k v -> StrictMap k v
unsafeMapToStrictMap = unsafeCoerce

strictMapToLazyMap :: StrictMap k v -> LazyMap k v
strictMapToLazyMap (BinPrimePrime sz k v bin bin2) = BinPrime sz k v(point(strictMapToLazyMap bin)) (point(strictMapToLazyMap bin2))
strictMapToLazyMap TipPrimePrime = TipPrime

mapToLazyMap :: Map k v -> LazyMap k v
mapToLazyMap = strictMapToLazyMap. unsafeMapToStrictMap

lazyMapToAscList :: LazyMap k v -> [(k,v)]
lazyMapToAscList (BinPrime _ k v bin bin2) = lazyMapToAscList(copoint bin) ++ (k,v) : lazyMapToAscList(copoint bin2)
lazyMapToAscList TipPrime = []

lazyMapSize :: LazyMap k v -> Int
lazyMapSize (BinPrime sz _ _ _ _) = sz
lazyMapSize TipPrime = 0

-- | 'lazyMapRange' is for computing a specific range in a 'LazyMap' -- no balancing is done.
lazyMapRange :: (Ord k) => k -> k -> WithAddress(LazyMap k v) -> WithAddress(LazyMap k v)
lazyMapRange k k2 bin = case copoint bin of
	BinPrime _ k3 v bin' bin2' -> let
		bin'' = lazyMapRange k k2 bin'
		bin2'' = lazyMapRange k k2 bin2' in
		if k3 < k then
		bin2''
		else if k3 > k2 then
		bin''
		else 
		point$BinPrime(succ(lazyMapSize(copoint bin'')+lazyMapSize(copoint bin2''))) k3 v bin'' bin2''
	TipPrime -> bin

lazyMapOpenRange :: (Ord k) => k -> k -> WithAddress(LazyMap k v) -> WithAddress(LazyMap k v)
lazyMapOpenRange k k2 bin = case copoint bin of
	BinPrime _ k3 v bin' bin2' -> let
		bin'' = lazyMapOpenRange k k2 bin'
		bin2'' = lazyMapOpenRange k k2 bin2' in
		if k3 <= k then
		bin2''
		else if k3 >= k2 then
		bin''
		else 
		point$BinPrime(succ(lazyMapSize(copoint bin'')+lazyMapSize(copoint bin2''))) k3 v bin'' bin2''
	TipPrime -> bin

-- | These find the minimum and maximum keys resp. of 'LazyMap's provided the keys
--   are ordered.
lazyMapMinKey :: LazyMap k v -> Maybe k
lazyMapMinKey (BinPrime _ k _ bin _) = lazyMapMinKey(copoint bin) `mplus` return k
lazyMapMinKey TipPrime = mzero

lazyMapMaxKey :: LazyMap k v -> Maybe k
lazyMapMaxKey (BinPrime _ k _ _ bin2) = lazyMapMaxKey(copoint bin2) `mplus` return k
lazyMapMaxKey TipPrime = mzero

-- | It merges two 'LazyMap's in O(n) time, provided the /lazyMapMaxKey lm <= lazyMapMinKey lm2/.
--   No attempt is made to balance the resulting tree structure.
lazyMapMerge :: WithAddress(LazyMap k v) -> WithAddress(LazyMap k v) -> WithAddress(LazyMap k v)
lazyMapMerge bin bin2 = case copoint bin of
	BinPrime sz k v bin' bin2' -> point$BinPrime(sz+lazyMapSize(copoint bin2)) k v bin'(lazyMapMerge bin2' bin2)
	TipPrime -> bin2

----------------------------------------------

_findSplitAddresses :: (Ord k, Data ctx k, Sat(ctx Int), HasField ctx RWCtx, Monad m)
	=> Proxy ctx
	-> k
	-> k
	-> ReaderT(SeekableStream m Word8) m [(k,Either Word32 Word32)]
_findSplitAddresses proxy k k2 = do
	n <- getPosition
	seekByPointer
	hdr@(_, ctor, _) <- readHeader
	seek n
	if ctor == 1 then do
		seekToField 1
		sz :: Int <- fixT proxy readOneLayer
		seek n
		seekToField 2
		k3 <- fixT proxy readOneLayer
		if k3 < k then do
				seek n
				seekToField 5
				_findSplitAddresses proxy k k2
			else if k3 > k2 then do
				seek n
				seekToField 4
				_findSplitAddresses proxy k k2
			else if sz == 1 then
				return$![(k3,Right n)]
			else do
				seek n
				seekToField 4
				n1 <- getPosition
				ls <- _findSplitAddresses proxy k k2
				seek n
				seekToField 5
				n2 <- getPosition
				ls2 <- _findSplitAddresses proxy k k2
				let ls' = snd <$> ls
				let ls2' = snd <$> ls2
				return$!if ls' == [Right n1] && ls2' == [Right n2] then
					-- It coalesces two included subtrees as a single subtree.
					[(k3,Right n)]
					else
					-- If only part of the subtrees are included, it returns the root key
					-- as a singleton map.
					ls ++ (k3,Left n) : ls2
		else
			return$![]

-- | 'seekInRange' is a seeking operation. It is applicable to 'LazyMap's in a stream representation;
--   it requires a stream to be seeked at a 'LazyMap'. Its function is to locate the entries of a
--   dictionary that are bounded above and below by certain key values. The key values located
--   are partitioned into subtrees, in such a way as to maximize the number of subtrees that can
--   be directly referenced in the file (and minimize the number of new nodes constructed
--   to structure the resulting tree).
seekInRange :: forall ctx k v m. (Ord k,
	Data ctx k,
	Data ctx v,
	Sat(ctx Int),
	Sat(ctx(LazyMap k v)),
	Sat(ctx Word32),
	Sat(ctx(WithAddress(LazyMap k v))),
	HasField ctx RWCtx,
	Monad m)
	=> k
	-> k
	-> PolyTraversal ctx m(WithAddress(LazyMap k v))
seekInRange k k2 proxy m = do
	ls <- _findSplitAddresses proxy k k2
	-- 'lazyMapMerge' is a right-biased merge; by doing left-biased merging at this line it
	-- achieves rough (not perfect) balance.
	liftM(foldl' lazyMapMerge(point TipPrime))$
		mapM(\(k,ei)->either
		(\n->do { seek n; seekToField 3; liftM(point.mapToLazyMap. M.singleton k) m })
		(\n->do { seek n; m })
		ei)
		ls