{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, MagicHash #-} ------------------------------------------------------------------------ -- | -- Module : Data.Hash -- Copyright : (c) Milan Straka 2010 -- (c) Johan Tibell 2011 -- License : BSD-style -- Maintainer : fox@ucw.cz -- Stability : provisional -- Portability : portable -- -- This module defines a class, 'Hashable', for types that can be -- converted to a hash value. This class exists for the benefit of -- hashing-based data structures. The module provides instances for -- basic types and a way to combine hash values. -- -- The 'hash' function should be as collision-free as possible, which -- means that the 'hash' function must map the inputs to the hash -- values as evenly as possible. module Data.Hashable ( -- * Computing hash values Hashable(..) -- * Creating new instances -- $blocks , hashPtr #if defined(__GLASGOW_HASKELL__) , hashByteArray #endif , combine ) where import Data.Bits (bitSize, shiftL, shiftR, xor) import Data.Int (Int8, Int16, Int32, Int64) import Data.Word (Word, Word8, Word16, Word32, Word64) import Data.List (foldl') import qualified Data.ByteString as B import qualified Data.ByteString.Internal as B import qualified Data.ByteString.Unsafe as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Internal as BL import Foreign.C (CInt, CString) import Foreign.Ptr (Ptr, castPtr) #if defined(__GLASGOW_HASKELL__) import GHC.Base (ByteArray#, Int(..), indexWord8Array#) import GHC.Word (Word8(..)) #endif ------------------------------------------------------------------------ -- * Computing hash values -- | The class of types that can be converted to a hash value. class Hashable a where -- | Return a hash value for the argument. -- -- The general contract of 'hash' is: -- -- * This integer need not remain consistent from one execution -- of an application to another execution of the same -- application. -- -- * If two values are equal according to the '==' method, then -- applying the 'hash' method on each of the two values must -- produce the same integer result. -- -- * It is /not/ required that if two values are unequal -- according to the '==' method, then applying the 'hash' -- method on each of the two values must produce distinct -- integer results. However, the programmer should be aware -- that producing distinct integer results for unequal values -- may improve the performance of hashing-based data -- structures. hash :: a -> Int instance Hashable () where hash _ = 0 instance Hashable Bool where hash x = case x of { True -> 1; False -> 0 } instance Hashable Int where hash = id instance Hashable Int8 where hash = fromIntegral instance Hashable Int16 where hash = fromIntegral instance Hashable Int32 where hash = fromIntegral instance Hashable Int64 where hash n | bitSize n == 64 = fromIntegral n | otherwise = fromIntegral (fromIntegral n `xor` (fromIntegral n `shiftR` 32 :: Word64)) instance Hashable Word where hash = fromIntegral instance Hashable Word8 where hash = fromIntegral instance Hashable Word16 where hash = fromIntegral instance Hashable Word32 where hash = fromIntegral instance Hashable Word64 where hash n | bitSize n == 64 = fromIntegral n | otherwise = fromIntegral (n `xor` (n `shiftR` 32)) instance Hashable Char where hash = fromEnum instance Hashable a => Hashable (Maybe a) where hash Nothing = 0 hash (Just a) = 1 `combine` hash a instance (Hashable a, Hashable b) => Hashable (Either a b) where hash (Left a) = 0 `combine` hash a hash (Right b) = 1 `combine` hash b instance (Hashable a1, Hashable a2) => Hashable (a1, a2) where hash (a1, a2) = hash a1 `combine` hash a2 instance (Hashable a1, Hashable a2, Hashable a3) => Hashable (a1, a2, a3) where hash (a1, a2, a3) = hash a1 `combine` hash a2 `combine` hash a3 instance (Hashable a1, Hashable a2, Hashable a3, Hashable a4) => Hashable (a1, a2, a3, a4) where hash (a1, a2, a3, a4) = hash a1 `combine` hash a2 `combine` hash a3 `combine` hash a4 instance (Hashable a1, Hashable a2, Hashable a3, Hashable a4, Hashable a5) => Hashable (a1, a2, a3, a4, a5) where hash (a1, a2, a3, a4, a5) = hash a1 `combine` hash a2 `combine` hash a3 `combine` hash a4 `combine` hash a5 instance (Hashable a1, Hashable a2, Hashable a3, Hashable a4, Hashable a5, Hashable a6) => Hashable (a1, a2, a3, a4, a5, a6) where hash (a1, a2, a3, a4, a5, a6) = hash a1 `combine` hash a2 `combine` hash a3 `combine` hash a4 `combine` hash a5 `combine` hash a6 instance (Hashable a1, Hashable a2, Hashable a3, Hashable a4, Hashable a5, Hashable a6, Hashable a7) => Hashable (a1, a2, a3, a4, a5, a6, a7) where hash (a1, a2, a3, a4, a5, a6, a7) = hash a1 `combine` hash a2 `combine` hash a3 `combine` hash a4 `combine` hash a5 `combine` hash a6 `combine` hash a7 instance Hashable a => Hashable [a] where {-# SPECIALIZE instance Hashable [Char] #-} hash = foldl' hashAndCombine 0 hashAndCombine :: Hashable h => Int -> h -> Int hashAndCombine acc h = acc `combine` hash h instance Hashable B.ByteString where hash bs = B.inlinePerformIO $ B.unsafeUseAsCStringLen bs $ \(p, len) -> hashPtr p (fromIntegral len) instance Hashable BL.ByteString where hash = BL.foldlChunks hashAndCombine 0 ------------------------------------------------------------------------ -- * Creating new instances -- $blocks -- -- The functions below can be used when creating new instances of -- 'Hashable'. For example, the 'hash' method for many string-like -- types can be defined in terms of either 'hashPtr' or -- 'hashByteArray'. Here's how you could implement an instance for -- the 'B.ByteString' data type, from the @bytestring@ package: -- -- > import qualified Data.ByteString as B -- > import qualified Data.ByteString.Internal as B -- > import qualified Data.ByteString.Unsafe as B -- > import Data.Hashable -- > import Foreign.Ptr (castPtr) -- > -- > instance Hashable B.ByteString where -- > hash bs = B.inlinePerformIO $ -- > B.unsafeUseAsCStringLen bs $ \ (p, len) -> -- > hashPtr p (fromIntegral len) -- -- The 'combine' function can be used to implement 'Hashable' -- instances for data types with more than one field, using this -- recipe: -- -- > instance (Hashable a, Hashable b) => Hashable (Foo a b) where -- > hash (Foo a b) = 17 `combine` hash a `combine` hash b -- -- A nonzero seed is used so the hash value will be affected by -- initial fields whose hash value is zero. If no seed was provided, -- the overall hash value would be unaffected by any such initial -- fields, which could increase collisions. The value 17 is -- arbitrary. -- | Compute a hash value for the content of this pointer. hashPtr :: Ptr a -- ^ pointer to the data to hash -> Int -- ^ length, in bytes -> IO Int -- ^ hash value hashPtr p len = fromIntegral `fmap` hashByteString (castPtr p) (fromIntegral len) #if defined(__GLASGOW_HASKELL__) -- | Compute a hash value for the content of this 'ByteArray#', -- beginning at the specified offset, using specified number of bytes. -- Availability: GHC. hashByteArray :: ByteArray# -- ^ data to hash -> Int -- ^ offset, in bytes -> Int -- ^ length, in bytes -> Int -- ^ hash value hashByteArray ba0 off len = go ba0 off len 0 where -- Bernstein's hash go :: ByteArray# -> Int -> Int -> Int -> Int go !ba !i !n !h | i < n = let h' = (h + h `shiftL` 5) `xor` (fromIntegral $ unsafeIndexWord8 ba i) in go ba (i + 1) n h' | otherwise = h -- | Unchecked read of an immutable array. May return garbage or -- crash on an out-of-bounds access. unsafeIndexWord8 :: ByteArray# -> Int -> Word8 unsafeIndexWord8 ba (I# i#) = case indexWord8Array# ba i# of r# -> (W8# r#) {-# INLINE unsafeIndexWord8 #-} #endif -- | Combine two given hash values. 'combine' has zero as a left -- identity. combine :: Int -> Int -> Int combine h1 h2 = (h1 + h1 `shiftL` 5) `xor` h2 ------------------------------------------------------------------------ -- * Foreign imports foreign import ccall unsafe hashByteString :: CString -> CInt -> IO CInt