{-| License : CC0 1.0 Universal Public Domain Dedication Maintainer : niswegmann@gmail.com Stability : provisional Portability : portable (Haskell 2010) For details of the implementation of MurmurHash3, see the following webpages: * * -} module Data.Digest.Murmur ( Hash , hash , Hashable (..) , HashGen , salt , combine ) where import Data.Array import Data.Bits import Data.Char import Data.Complex import Data.Int import Data.List import Data.Ratio import Data.Word -------------------------------------------------------------------------------- -- MurmurHash3 uses a mixer and a finalizer. The mixer maps a block (the data we -- want to hash) and a hash state into a new hash state, where both blocks and -- hash states are represented by 32-bit words. The finalizer forces all bits -- in the hash state to avalanche. -- Mixes two blocks. {-# INLINE mix #-} mix :: Word32 -> Word32 -> Word32 mix b0 h0 = let b1 = b0 * c1 b2 = b1 `rotateL` r1 b3 = b2 * c2 h1 = h0 `xor` b3 h2 = h1 `rotateL` r2 h3 = h2 * m1 + m2 in h3 where c1 = 0xcc9e2d51 c2 = 0x1b873593 r1 = 15 r2 = 13 m1 = 5 m2 = 0xe6546b64 -- Forces all bits of a hash block to avalanche. {-# INLINE finalize #-} finalize :: Word32 -> Word32 finalize h0 = let h1 = h0 `xor` (h0 `shiftR` r1) h2 = h1 * c1 h3 = h2 `xor` (h2 `shiftR` r2) h4 = h3 * c2 h5 = h4 `xor` (h4 `shiftR` r3) in h5 where c1 = 0x85ebca6b c2 = 0xc2b2ae35 r1 = 16 r2 = 13 r3 = 16 -------------------------------------------------------------------------------- -- Hash generators. -- | A hash generator is a function that maps a hash state into a new hash state. -- The internal representation of hash states is kept transparent. newtype HashGen = HashGen (Word32 -> Word32) -- Runs a hash generator on a seed. runHashGen :: HashGen -> Word32 -> Hash runHashGen (HashGen f) = finalize . f -- | Returns a hash generator that mixes its input with a 32-bit word. -- Is typically used for enumerating constructors when deriving 'Hashable'. {-# INLINE salt #-} salt :: Word32 -> HashGen salt = HashGen . mix -- | Combines two hash generators such that the output of the first generator is -- piped into the next. This works similar to function composition. -- Indeed, for all /f/, /g/, /h/, we have that -- -- > f `combine` (g `combine` h) == (f `combine` g) `combine` h {-# INLINE combine #-} combine :: HashGen -> HashGen -> HashGen combine (HashGen f) (HashGen g) = HashGen (g . f) -------------------------------------------------------------------------------- -- | Type class for computing hash generators from values. -- -- Making custom data types instantiate 'Hashable' is straightforward; given -- the following tree data structure: -- -- > data Tree a -- > = Tip -- > | Bin a (Tree a) (Tree a) -- -- ...we make it instantiate 'Hashable' like this: -- -- > instance Hashable a => Hashable (Tree a) where -- > hashGen Tip = salt 0x0 -- > hashGen (Bin x l r) = hashGen x `combine` hashGen l `combine` hashGen r -- -- For sum data types such as 'Either' we typically want to avoid that -- -- > Left "foo" -- -- hashes to the same hash as -- -- > Right "foo" -- -- ...hence we add some 'salt' for each constructor: -- -- > instance (Hashable a, Hashable b) => Hashable (Either a b) where -- > hashGen (Left x) = salt 0x1 `combine` hashGen x -- > hashGen (Right y) = salt 0x2 `combine` hashGen y -- class Hashable a where -- | Returns a hash generator for the argument. hashGen :: a -> HashGen -- | A 32-bit hash. type Hash = Word32 -- | Computes a 32-bit hash from a /hashable/ value. hash :: Hashable a => a -> Hash hash = flip runHashGen defaultSeed . hashGen where -- The default seed is an arbitrary 32-bit word. defaultSeed :: Word32 defaultSeed = 4294967291 -------------------------------------------------------------------------------- -- Integral hash generators: {-# INLINE hashWord8 #-} hashWord8 :: Word8 -> HashGen hashWord8 = hashWord32 . fromIntegral {-# INLINE hashWord16 #-} hashWord16 :: Word16 -> HashGen hashWord16 = hashWord32 . fromIntegral {-# INLINE hashWord32 #-} hashWord32 :: Word32 -> HashGen hashWord32 = HashGen . mix {-# INLINE hashWord64 #-} hashWord64 :: Word64 -> HashGen hashWord64 x = hashWord32 lo `combine` hashWord32 hi where lo = fromIntegral x hi = fromIntegral (x `shiftR` 32) {-# INLINE hashInt #-} hashInt :: Int -> HashGen hashInt = if bitSize (undefined :: Int) <= 32 then hashWord32 . fromIntegral else hashWord64 . fromIntegral {-# INLINE hashInteger #-} hashInteger :: Integer -> HashGen hashInteger k | k < 0 = foldl' combine (salt 0x1) . blocks $ abs k | otherwise = foldl' combine (salt 0x0) . blocks $ k where blocks 0 = [] blocks x = hashWord32 (fromInteger x) : blocks (x `shiftR` 32) -------------------------------------------------------------------------------- -- Instances: instance Hashable Char where hashGen = hashGen . ord instance Hashable Word where hashGen = hashInt . fromIntegral instance Hashable Int where hashGen = hashInt instance Hashable Word8 where hashGen = hashWord8 instance Hashable Word16 where hashGen = hashWord16 instance Hashable Word32 where hashGen = hashWord32 instance Hashable Word64 where hashGen = hashWord64 instance Hashable Int8 where hashGen = hashWord8 . fromIntegral instance Hashable Int16 where hashGen = hashWord16 . fromIntegral instance Hashable Int32 where hashGen = hashWord32 . fromIntegral instance Hashable Int64 where hashGen = hashWord64 . fromIntegral instance Hashable Integer where hashGen = hashInteger instance Hashable () where hashGen () = salt 0x0 instance Hashable Bool where hashGen False = salt 0x0 hashGen True = salt 0x1 instance Hashable a => Hashable (Maybe a) where hashGen Nothing = salt 0x0 hashGen (Just x) = hashGen x instance (Hashable a, Hashable b) => Hashable (Either a b) where hashGen (Left x) = salt 0x0 `combine` hashGen x hashGen (Right y) = salt 0x1 `combine` hashGen y instance Hashable a => Hashable [a] where hashGen xs = foldl' combine (salt 0x0) (fmap hashGen xs) instance (Hashable a, Ix i) => Hashable (Array i a) where hashGen = hashGen . elems instance (Hashable a, Hashable b) => Hashable (a, b) where hashGen (x, y) = hashGen x `combine` hashGen y instance (Hashable a, Hashable b, Hashable c) => Hashable (a, b, c) where hashGen (x, y, z) = hashGen x `combine` hashGen y `combine` hashGen z instance (Hashable a, Hashable b, Hashable c, Hashable d) => Hashable (a, b, c, d) where hashGen (x, y, z, w) = hashGen x `combine` hashGen y `combine` hashGen z `combine` hashGen w instance (Hashable a, Integral a) => Hashable (Ratio a) where hashGen x = hashGen (numerator x) `combine` hashGen (denominator x) instance Hashable Float where hashGen = hashGen . toRational instance Hashable Double where hashGen = hashGen . toRational instance (Hashable a, RealFloat a) => Hashable (Complex a) where hashGen x = hashGen (realPart x) `combine` hashGen (imagPart x)