-- | Generic hashing on trees. We recursively compute hashes of all subtrees, -- giving fast inequality testing, and a fast, but meaningless (more-or-less random) -- ordering on the set of trees (so that we can put them into Map-s). -- -- The way it works is that when we compute the hash of a node, we use the hashes of the -- children directly; this way, you can also incrementally build up a hashed tree. -- module Data.Generics.Fixplate.Hash ( -- * Hashed tree type HashAnn(..) , getHash , unHashAnn , HashMu , topHash , forgetHash -- * Interface to the user's hash functions , HashValue(..) -- * Hashing tres , hashTree , hashTreeWith , hashNode , hashNodeWith ) where -------------------------------------------------------------------------------- -- import Data.Generics.Fixplate.Hash.Class import Prelude as Prelude import Control.Monad ( liftM ) import Control.Applicative () import Data.Generics.Fixplate import Data.Foldable as F import Data.Traversable as T import Text.Show () -------------------------------------------------------------------------------- -- | Hash annotation (question: should the Hash field be strict? everything else in the library is lazy...) -- -- This is custom datatype instead of reusing 'Ann' because of the different Eq\/Ord instances we need. -- data HashAnn hash f a = HashAnn hash (f a) deriving Show getHash :: HashAnn hash f a -> hash getHash (HashAnn hash _) = hash unHashAnn :: HashAnn hash f a -> f a unHashAnn (HashAnn _ x) = x -------------------------------------------------------------------------------- -- | A tree annotated with hashes of all subtrees. This gives us fast inequality testing, -- and fast (but meaningless!) ordering for 'Map'-s. type HashMu hash f = Mu (HashAnn hash f) -- | The hash of the complete tree. topHash :: HashMu hash f -> hash topHash (Fix (HashAnn hash _)) = hash -------------------------------------------------------------------------------- instance Functor f => Functor (HashAnn hash f) where fmap f (HashAnn attr t) = HashAnn attr (fmap f t) instance Foldable f => Foldable (HashAnn hash f) where foldl f x (HashAnn _ t) = F.foldl f x t foldr f x (HashAnn _ t) = F.foldr f x t instance Traversable f => Traversable (HashAnn hash f) where traverse f (HashAnn x t) = HashAnn x <$> T.traverse f t mapM f (HashAnn x t) = liftM (HashAnn x) (T.mapM f t) -------------------------------------------------------------------------------- instance (Eq hash, EqF f) => EqF (HashAnn hash f) where equalF (HashAnn h1 x1) (HashAnn h2 x2) = if h1 /= h2 then False else equalF x1 x2 instance (Ord hash, OrdF f) => OrdF (HashAnn hash f) where compareF (HashAnn h1 x1) (HashAnn h2 x2) = case compare h1 h2 of LT -> LT GT -> GT EQ -> compareF x1 x2 instance (ShowF f, Show hash) => ShowF (HashAnn hash f) where showsPrecF d (HashAnn hash x) = showParen (d>app_prec) $ showString "HashAnn " . showsPrec (app_prec+1) hash . showChar ' ' . showsPrecF (app_prec+1) x where app_prec = 10 -------------------------------------------------------------------------------- forgetHash :: Functor f => HashMu hash f -> Mu f forgetHash = go where go = Fix . fmap go . unHashAnn . unFix -------------------------------------------------------------------------------- data Void = Void ; instance Show Void where show _ = "_" {- showDigest :: (Functor f, ShowF f, HashValue hash) => f a -> hash -> hash showDigest t = hashDigest $ showF (fmap (const Void) t) -} {-# INLINE showDigest #-} showDigest :: (Functor f, ShowF f) => HashValue hash -> f a -> hash -> hash showDigest hashv t = _hashString hashv $ showF (fmap (const Void) t) -------------------------------------------------------------------------------- -- | This function uses the 'ShowF' instance to compute -- the hash of a node; this way you always have a working -- version without writing any additional code. -- -- However, you can also supply your own hash implementation -- (which can be more efficient, for example), if you use 'hashTreeWith' instead. hashTree :: (Foldable f, Functor f, ShowF f) => HashValue hash -> Mu f -> HashMu hash f hashTree hashv = hashTreeWith hashv (showDigest hashv) hashTreeWith :: (Foldable f, Functor f) => HashValue hash -> (f Hole -> hash -> hash) -> Mu f -> HashMu hash f hashTreeWith hashv user = go where go (Fix x) = worker (fmap go x) worker = hashNodeWith hashv user -------------------------------------------------------------------------------- -- | A concrete hash implementation. We don't use type classes since -- -- * a hash type class does not belong to this library; -- -- * we don't want to restrict the user's design space -- -- Thus we simulate type classes with record types. -- data HashValue hash = HashValue { _emptyHash :: hash -- ^ the hash of an empty byte sequence {- , _hashWord8 :: Word8 -> hash -> hash -- ^ digest a byte , _hashWord16 :: Word16 -> hash -> hash -- ^ digest two bytes , _hashWord32 :: Word32 -> hash -> hash -- ^ digest four bytes , _hashWord64 :: Word64 -> hash -> hash -- ^ digest eight bytes -} , _hashChar :: Char -> hash -> hash -- ^ digest a (unicode) character , _hashHash :: hash -> hash -> hash -- ^ digest a hash value } {-# INLINE _hashString #-} _hashString :: HashValue hash -> String -> hash -> hash _hashString hashv xs e = Prelude.foldr f e xs where f = _hashChar hashv {-# INLINE _computeHash #-} _computeHash :: HashValue hash -> [hash] -> hash _computeHash hashv hs = Prelude.foldr f e hs where e = _emptyHash hashv f = _hashHash hashv {- -- | A minimal hash implementation. For efficiency reasons, we make a distinction between -- this and 'HashValue' (for example if a hash function can readily digest 32 bit words, -- it will be probably faster than if we feed bytes to it). -- -- The function 'makeHashValue' can be used to convert between the two. data ByteHashValue = ByteHashValue { _minEmptyHash :: hash -- ^ the hash of an empty byte sequence , _minHashWord8 :: Word8 -> hash -> hash -- ^ digest a byte , _minHashBytes :: hash -> [Word8] -- ^ convert a hash value to a sequence of bytes } makeHashValue :: ByteHashValue hash -> HashValue hash makeHashValue (ByteHashable empty hashWord8 hashBytes) = HashValue { _emptyHash = empty {- , _hashWord8 = hashWord8 , _hashWord16 = hashWord16 , _hashWord32 = hashWord32 , _hashWord64 = hashWord64 -} , _hashChar c = hashChar c , _hashHash h = foldr (.) id (map hashWord8 $ hashBytes h) } where hashWord32 w = hashWord8 a . hashWord8 b . hashWord8 c . hashWord8 d where a = fromIntegral (255 .&. ( w )) b = fromIntegral (255 .&. (shiftR w 8)) c = fromIntegral (255 .&. (shiftR w 16)) d = fromIntegral (255 .&. (shiftR w 24)) hashWord16 w = hashWord8 a . hashWord8 b where a = fromIntegral (255 .&. ( w )) b = fromIntegral (255 .&. (shiftR w 8)) hashWord64 w = hashWord32 a . hashWord32 b where a = fromIntegral (0xffffffff .&. ( w )) b = fromIntegral (0xffffffff .&. (shiftR w 32)) -- We only use the lowest 16 bits here. This is questionable, -- but typical use case is ASCII, 16 bits cover a big part of Unicode, and for byte based -- hashes it is twice as fast as the more correct 32 bit version would be. hashChar c = hashWord16 (fromIntegral $ ord c) -} -------------------------------------------------------------------------------- -- | Build a hashed node from the children. hashNode :: (Foldable f, Functor f, ShowF f) => HashValue hash -> f (HashMu hash f) -> HashMu hash f hashNode hashv = hashNodeWith hashv (showDigest hashv) hashNodeWith :: (Foldable f, Functor f) => HashValue hash -> (f Hole -> hash -> hash) -> f (HashMu hash f) -> HashMu hash f hashNodeWith hashv user x = Fix (HashAnn h x) where h = user (fmap (const Hole) x) h0 h0 = _computeHash hashv $ toList $ fmap (getHash . unFix) x -- h0 = foldl' (flip hashHash) emptyHash $ toList $ fmap (getHash . unFix) x --------------------------------------------------------------------------------