{-|
License : CC0 1.0 Universal Public Domain Dedication
Maintainer : niswegmann@gmail.com
Stability : provisional
Portability : portable (Haskell 2010)
For details of the implementation of MurmurHash3, see the following webpages:
*
*
-}
module Data.Digest.Murmur
( Hash
, hash
, Hashable (..)
, HashGen
, salt
, combine
) where
import Data.Array
import Data.Bits
import Data.Char
import Data.Complex
import Data.Int
import Data.List
import Data.Ratio
import Data.Word
--------------------------------------------------------------------------------
-- MurmurHash3 uses a mixer and a finalizer. The mixer maps a block (the data we
-- want to hash) and a hash state into a new hash state, where both blocks and
-- hash states are represented by 32-bit words. The finalizer forces all bits
-- in the hash state to avalanche.
-- Mixes two blocks.
{-# INLINE mix #-}
mix :: Word32 -> Word32 -> Word32
mix b0 h0 =
let
b1 = b0 * c1
b2 = b1 `rotateL` r1
b3 = b2 * c2
h1 = h0 `xor` b3
h2 = h1 `rotateL` r2
h3 = h2 * m1 + m2
in
h3
where
c1 = 0xcc9e2d51
c2 = 0x1b873593
r1 = 15
r2 = 13
m1 = 5
m2 = 0xe6546b64
-- Forces all bits of a hash block to avalanche.
{-# INLINE finalize #-}
finalize :: Word32 -> Word32
finalize h0 =
let
h1 = h0 `xor` (h0 `shiftR` r1)
h2 = h1 * c1
h3 = h2 `xor` (h2 `shiftR` r2)
h4 = h3 * c2
h5 = h4 `xor` (h4 `shiftR` r3)
in
h5
where
c1 = 0x85ebca6b
c2 = 0xc2b2ae35
r1 = 16
r2 = 13
r3 = 16
--------------------------------------------------------------------------------
-- Hash generators.
-- | A hash generator is a function that maps a hash state into a new hash state.
-- The internal representation of hash states is kept transparent.
newtype HashGen = HashGen (Word32 -> Word32)
-- Runs a hash generator on a seed.
runHashGen :: HashGen -> Word32 -> Hash
runHashGen (HashGen f) = finalize . f
-- | Returns a hash generator that mixes its input with a 32-bit word.
-- Is typically used for enumerating constructors when deriving 'Hashable'.
{-# INLINE salt #-}
salt :: Word32 -> HashGen
salt = HashGen . mix
-- | Combines two hash generators such that the output of the first generator is
-- piped into the next. This works similar to function composition.
-- Indeed, for all /f/, /g/, /h/, we have that
--
-- > f `combine` (g `combine` h) == (f `combine` g) `combine` h
{-# INLINE combine #-}
combine :: HashGen -> HashGen -> HashGen
combine (HashGen f) (HashGen g) = HashGen (g . f)
--------------------------------------------------------------------------------
-- | Type class for computing hash generators from values.
--
-- Making custom data types instantiate 'Hashable' is straightforward; given
-- the following tree data structure:
--
-- > data Tree a
-- > = Tip
-- > | Bin a (Tree a) (Tree a)
--
-- ...we make it instantiate 'Hashable' like this:
--
-- > instance Hashable a => Hashable (Tree a) where
-- > hashGen Tip = salt 0x0
-- > hashGen (Bin x l r) = hashGen x `combine` hashGen l `combine` hashGen r
--
-- For sum data types such as 'Either' we typically want to avoid that
--
-- > Left "foo"
--
-- hashes to the same hash as
--
-- > Right "foo"
--
-- ...hence we add some 'salt' for each constructor:
--
-- > instance (Hashable a, Hashable b) => Hashable (Either a b) where
-- > hashGen (Left x) = salt 0x1 `combine` hashGen x
-- > hashGen (Right y) = salt 0x2 `combine` hashGen y
--
class Hashable a where
-- | Returns a hash generator for the argument.
hashGen :: a -> HashGen
-- | A 32-bit hash.
type Hash = Word32
-- | Computes a 32-bit hash from a /hashable/ value.
hash :: Hashable a => a -> Hash
hash = flip runHashGen defaultSeed . hashGen
where
-- The default seed is an arbitrary 32-bit word.
defaultSeed :: Word32
defaultSeed = 4294967291
--------------------------------------------------------------------------------
-- Integral hash generators:
{-# INLINE hashWord8 #-}
hashWord8 :: Word8 -> HashGen
hashWord8 = hashWord32 . fromIntegral
{-# INLINE hashWord16 #-}
hashWord16 :: Word16 -> HashGen
hashWord16 = hashWord32 . fromIntegral
{-# INLINE hashWord32 #-}
hashWord32 :: Word32 -> HashGen
hashWord32 = HashGen . mix
{-# INLINE hashWord64 #-}
hashWord64 :: Word64 -> HashGen
hashWord64 x = hashWord32 lo `combine` hashWord32 hi
where
lo = fromIntegral x
hi = fromIntegral (x `shiftR` 32)
{-# INLINE hashInt #-}
hashInt :: Int -> HashGen
hashInt =
if bitSize (undefined :: Int) <= 32
then hashWord32 . fromIntegral
else hashWord64 . fromIntegral
{-# INLINE hashInteger #-}
hashInteger :: Integer -> HashGen
hashInteger k
| k < 0 = foldl' combine (salt 0x1) . blocks $ abs k
| otherwise = foldl' combine (salt 0x0) . blocks $ k
where
blocks 0 = []
blocks x = hashWord32 (fromInteger x) : blocks (x `shiftR` 32)
--------------------------------------------------------------------------------
-- Instances:
instance Hashable Char where
hashGen = hashGen . ord
instance Hashable Word where
hashGen = hashInt . fromIntegral
instance Hashable Int where
hashGen = hashInt
instance Hashable Word8 where
hashGen = hashWord8
instance Hashable Word16 where
hashGen = hashWord16
instance Hashable Word32 where
hashGen = hashWord32
instance Hashable Word64 where
hashGen = hashWord64
instance Hashable Int8 where
hashGen = hashWord8 . fromIntegral
instance Hashable Int16 where
hashGen = hashWord16 . fromIntegral
instance Hashable Int32 where
hashGen = hashWord32 . fromIntegral
instance Hashable Int64 where
hashGen = hashWord64 . fromIntegral
instance Hashable Integer where
hashGen = hashInteger
instance Hashable () where
hashGen () = salt 0x0
instance Hashable Bool where
hashGen False = salt 0x0
hashGen True = salt 0x1
instance Hashable a => Hashable (Maybe a) where
hashGen Nothing = salt 0x0
hashGen (Just x) = hashGen x
instance (Hashable a, Hashable b) => Hashable (Either a b) where
hashGen (Left x) = salt 0x0 `combine` hashGen x
hashGen (Right y) = salt 0x1 `combine` hashGen y
instance Hashable a => Hashable [a] where
hashGen xs = foldl' combine (salt 0x0) (fmap hashGen xs)
instance (Hashable a, Ix i) => Hashable (Array i a) where
hashGen = hashGen . elems
instance (Hashable a, Hashable b) => Hashable (a, b) where
hashGen (x, y) = hashGen x `combine` hashGen y
instance (Hashable a, Hashable b, Hashable c) => Hashable (a, b, c) where
hashGen (x, y, z) = hashGen x `combine` hashGen y `combine` hashGen z
instance (Hashable a, Hashable b, Hashable c, Hashable d)
=> Hashable (a, b, c, d) where
hashGen (x, y, z, w) =
hashGen x `combine` hashGen y `combine` hashGen z `combine` hashGen w
instance (Hashable a, Integral a) => Hashable (Ratio a) where
hashGen x = hashGen (numerator x) `combine` hashGen (denominator x)
instance Hashable Float where
hashGen = hashGen . toRational
instance Hashable Double where
hashGen = hashGen . toRational
instance (Hashable a, RealFloat a) => Hashable (Complex a) where
hashGen x = hashGen (realPart x) `combine` hashGen (imagPart x)