-- | Generic hashing on trees. We recursively compute hashes of all subtrees,
-- giving fast inequality testing, and a fast, but meaningless (more-or-less random)
-- ordering on the set of trees (so that we can put them into Map-s).
--
-- The way it works is that when we compute the hash of a node, we use the hashes of the 
-- children directly; this way, you can also incrementally build up a hashed tree.
--
module Data.Generics.Fixplate.Hash
  ( -- * Hashed tree type
    HashAnn(..) , getHash , unHashAnn
  , HashMu , topHash
  , forgetHash
    -- * Interface to the user's hash functions
  , HashValue(..)    
    -- * Hashing tres
  , hashTree , hashTreeWith
  , hashNode , hashNodeWith
  ) where

--------------------------------------------------------------------------------

-- import Data.Generics.Fixplate.Hash.Class

import Prelude as Prelude

import Control.Monad ( liftM )
import Control.Applicative ()

import Data.Generics.Fixplate
import Data.Foldable    as F
import Data.Traversable as T

import Text.Show ()

--------------------------------------------------------------------------------

-- | Hash annotation (question: should the Hash field be strict? everything else in the library is lazy...)
--
-- This is custom datatype instead of reusing 'Ann' because of the different Eq\/Ord instances we need.
--
data HashAnn hash f a = HashAnn hash (f a) deriving Show

getHash :: HashAnn hash f a -> hash
getHash (HashAnn hash _) = hash

unHashAnn :: HashAnn hash f a -> f a
unHashAnn (HashAnn _ x) = x

--------------------------------------------------------------------------------

-- | A tree annotated with hashes of all subtrees. This gives us fast inequality testing,
-- and fast (but meaningless!) ordering for 'Map'-s.
type HashMu hash f = Mu (HashAnn hash f) 

-- | The hash of the complete tree.
topHash :: HashMu hash f -> hash
topHash (Fix (HashAnn hash _)) = hash

--------------------------------------------------------------------------------

instance Functor f => Functor (HashAnn hash f) where
  fmap f (HashAnn attr t) = HashAnn attr (fmap f t)

instance Foldable f => Foldable (HashAnn hash f) where
  foldl f x (HashAnn _ t) = F.foldl f x t
  foldr f x (HashAnn _ t) = F.foldr f x t

instance Traversable f => Traversable (HashAnn hash f) where
  traverse f (HashAnn x t) = HashAnn x <$> T.traverse f t
  mapM f (HashAnn x t) = liftM (HashAnn x) (T.mapM f t)

--------------------------------------------------------------------------------

instance (Eq hash, EqF f) => EqF (HashAnn hash f) where
  equalF (HashAnn h1 x1) (HashAnn h2 x2) = if h1 /= h2 then False else equalF x1 x2 

instance (Ord hash, OrdF f) => OrdF (HashAnn hash f) where
  compareF (HashAnn h1 x1) (HashAnn h2 x2) = case compare h1 h2 of
    LT -> LT
    GT -> GT
    EQ -> compareF x1 x2

instance (ShowF f, Show hash) => ShowF (HashAnn hash f) where
  showsPrecF d (HashAnn hash x) = showParen (d>app_prec) 
    $ showString "HashAnn " 
    . showsPrec  (app_prec+1) hash 
    . showChar ' '
    . showsPrecF (app_prec+1) x
    where
      app_prec = 10

--------------------------------------------------------------------------------

forgetHash :: Functor f => HashMu hash f -> Mu f
forgetHash = go where
  go = Fix . fmap go . unHashAnn . unFix 

--------------------------------------------------------------------------------

data Void = Void ; instance Show Void where show _ = "_"

{-
showDigest :: (Functor f, ShowF f, HashValue hash) => f a -> hash -> hash
showDigest t = hashDigest $ showF (fmap (const Void) t) 
-}

{-# INLINE showDigest #-}
showDigest :: (Functor f, ShowF f) => HashValue hash -> f a -> hash -> hash
showDigest hashv t = _hashString hashv $ showF (fmap (const Void) t) 

--------------------------------------------------------------------------------

-- | This function uses the 'ShowF' instance to compute
-- the hash of a node; this way you always have a working
-- version without writing any additional code.
--
-- However, you can also supply your own hash implementation 
-- (which can be more efficient, for example), if you use 'hashTreeWith' instead.
hashTree :: (Foldable f, Functor f, ShowF f) => HashValue hash -> Mu f -> HashMu hash f 
hashTree hashv = hashTreeWith hashv (showDigest hashv) 

hashTreeWith :: (Foldable f, Functor f) => HashValue hash -> (f Hole -> hash -> hash) -> Mu f -> HashMu hash f 
hashTreeWith hashv user = go where
  go (Fix x) = worker (fmap go x)
  worker = hashNodeWith hashv user

--------------------------------------------------------------------------------

-- | A concrete hash implementation. We don't use type classes since 
-- 
--  * a hash type class does not belong to this library;
--
--  * we don't want to restrict the user's design space
--
-- Thus we simulate type classes with record types.
--
data HashValue hash = HashValue
  { _emptyHash  :: hash                    -- ^ the hash of an empty byte sequence
{-
  , _hashWord8  :: Word8  -> hash -> hash    -- ^ digest a byte
  , _hashWord16 :: Word16 -> hash -> hash    -- ^ digest two bytes
  , _hashWord32 :: Word32 -> hash -> hash    -- ^ digest four bytes
  , _hashWord64 :: Word64 -> hash -> hash    -- ^ digest eight bytes
-}
  , _hashChar   :: Char -> hash -> hash    -- ^ digest a (unicode) character
  , _hashHash   :: hash -> hash -> hash    -- ^ digest a hash value 
  }

{-# INLINE _hashString #-}
_hashString :: HashValue hash -> String -> hash -> hash
_hashString hashv xs e = Prelude.foldr f e xs where 
  f = _hashChar hashv

{-# INLINE _computeHash #-}
_computeHash :: HashValue hash -> [hash] -> hash 
_computeHash hashv hs = Prelude.foldr f e hs where 
  e = _emptyHash hashv
  f = _hashHash  hashv

{- 

-- | A minimal hash implementation. For efficiency reasons, we make a distinction between
-- this and 'HashValue' (for example if a hash function can readily digest 32 bit words, 
-- it will be probably faster than if we feed bytes to it). 
--
-- The function 'makeHashValue' can be used to convert between the two.
data ByteHashValue = ByteHashValue
  { _minEmptyHash  :: hash                     -- ^ the hash of an empty byte sequence
  , _minHashWord8  :: Word8 -> hash -> hash    -- ^ digest a byte
  , _minHashBytes  :: hash -> [Word8]          -- ^ convert a hash value to a sequence of bytes
  }

makeHashValue :: ByteHashValue hash -> HashValue hash
makeHashValue (ByteHashable empty hashWord8 hashBytes) = 
  HashValue
    { _emptyHash  = empty
{-
    , _hashWord8  = hashWord8
    , _hashWord16 = hashWord16
    , _hashWord32 = hashWord32
    , _hashWord64 = hashWord64
-}  
    , _hashChar c = hashChar c
    , _hashHash h = foldr (.) id (map hashWord8 $ hashBytes h)
  }
  where
    hashWord32 w = hashWord8 a . hashWord8 b . hashWord8 c . hashWord8 d where
      a = fromIntegral (255 .&. (       w   ))
      b = fromIntegral (255 .&. (shiftR w  8))
      c = fromIntegral (255 .&. (shiftR w 16))
      d = fromIntegral (255 .&. (shiftR w 24)) 
    hashWord16 w = hashWord8 a . hashWord8 b where
      a = fromIntegral (255 .&. (       w   ))
      b = fromIntegral (255 .&. (shiftR w  8))
    hashWord64 w = hashWord32 a . hashWord32 b where
      a = fromIntegral (0xffffffff .&. (       w   ))
      b = fromIntegral (0xffffffff .&. (shiftR w 32))

    -- We only use the lowest 16 bits here. This is questionable,
    -- but typical use case is ASCII, 16 bits cover a big part of Unicode, and for byte based
    -- hashes it is twice as fast as the more correct 32 bit version would be.
    hashChar c = hashWord16 (fromIntegral $ ord c)

-}

--------------------------------------------------------------------------------

-- | Build a hashed node from the children.
hashNode :: (Foldable f, Functor f, ShowF f) => HashValue hash -> f (HashMu hash f) -> HashMu hash f 
hashNode hashv = hashNodeWith hashv (showDigest hashv) 

hashNodeWith :: (Foldable f, Functor f) => HashValue hash -> (f Hole -> hash -> hash) -> f (HashMu hash f) -> HashMu hash f 
hashNodeWith hashv user x = Fix (HashAnn h x) where
  h  = user (fmap (const Hole) x) h0
  h0 = _computeHash hashv $ toList $ fmap (getHash . unFix) x
--  h0 = foldl' (flip hashHash) emptyHash $ toList $ fmap (getHash . unFix) x

--------------------------------------------------------------------------------