{-# LANGUAGE CPP, ForeignFunctionInterface, TypeOperators #-}

-- |
-- Module: Data.BloomFilter.Hash
-- Copyright: Bryan O'Sullivan
-- License: BSD3
--
-- Maintainer: Bryan O'Sullivan <bos@serpentine.com>
-- Stability: unstable
-- Portability: portable
--
-- Fast hashing of Haskell values.  The hash functions used are Bob
-- Jenkins's public domain functions, which combine high performance
-- with excellent mixing properties.  For more details, see
-- <http://burtleburtle.net/bob/hash/>.
--
-- In addition to the usual "one input, one output" hash functions,
-- this module provides multi-output hash functions, suitable for use
-- in applications that need multiple hashes, such as Bloom filtering.

module Data.BloomFilter.Hash
    (
    -- * Basic hash functionality
      Hashable(..)
    , hash32
    , hash64
    , hashSalt32
    , hashSalt64
    -- * Compute a family of hash values
    , hashes
    , cheapHashes
    -- * Hash functions for 'Storable' instances
    , hashOne32
    , hashOne64
    , hashList32
    , hashList64
    ) where

import Control.Monad (foldM)
import Data.Bits ((.&.), (.|.), xor)
import Data.BloomFilter.Util (FastShift(..))
import Data.List (unfoldr)
import Data.Int (Int8, Int16, Int32, Int64)
import Data.Word (Word8, Word16, Word32, Word64)
import Foreign.C.Types (CInt, CSize)
import Foreign.Marshal.Array (withArrayLen)
import Foreign.Marshal.Utils (with)
import Foreign.Ptr (Ptr, castPtr, plusPtr)
import Foreign.Storable (Storable, peek, sizeOf)
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.ByteString as SB
import qualified Data.ByteString.Lazy as LB

#include "HsBaseConfig.h"

-- Make sure we're not performing any expensive arithmetic operations.
-- import Prelude hiding ((/), (*), div, divMod, mod, rem)

foreign import ccall unsafe "_jenkins_hashword" hashWord
    :: Ptr CInt -> CSize -> CInt -> IO CInt

foreign import ccall unsafe "_jenkins_hashword2" hashWord2
    :: Ptr CInt -> CSize -> Ptr CInt -> Ptr CInt -> IO ()

foreign import ccall unsafe "_jenkins_hashlittle" hashLittle
    :: Ptr a -> CSize -> CInt -> IO CInt

foreign import ccall unsafe "_jenkins_hashlittle2" hashLittle2
    :: Ptr a -> CSize -> Ptr CInt -> Ptr CInt -> IO ()

class Hashable a where
    -- | Compute a 32-bit hash of a value.  The salt value perturbs
    -- the result.
    hashIO32 :: a               -- ^ value to hash
             -> Word32          -- ^ salt
             -> IO Word32

    -- | Compute a 64-bit hash of a value.  The first salt value
    -- perturbs the first element of the result, and the second salt
    -- perturbs the second.
    hashIO64 :: a               -- ^ value to hash
             -> Word64           -- ^ salt
             -> IO Word64
    hashIO64 v salt = do
                   let s1 = fromIntegral (salt `shiftR` 32) .&. maxBound
                       s2 = fromIntegral salt
                   h1 <- hashIO32 v s1
                   h2 <- hashIO32 v s2
                   return $ (fromIntegral h1 `shiftL` 32) .|. fromIntegral h2
      
-- | Compute a 32-bit hash.
hash32 :: Hashable a => a -> Word32
hash32 = hashSalt32 0x106fc397

hash64 :: Hashable a => a -> Word64
hash64 = hashSalt64 0x106fc397cf62f64d3

-- | Compute a salted 32-bit hash.
hashSalt32 :: Hashable a => Word32  -- ^ salt
           -> a                 -- ^ value to hash
           -> Word32
{-# INLINE hashSalt32 #-}
hashSalt32 salt k =
    let !r = unsafePerformIO $ hashIO32 k salt
    in r

-- | Compute a salted 64-bit hash.
hashSalt64 :: Hashable a => Word64  -- ^ salt
           -> a                 -- ^ value to hash
           -> Word64
{-# INLINE hashSalt64 #-}
hashSalt64 salt k =
    let !r = unsafePerformIO $ hashIO64 k salt
    in r

-- | Compute a list of 32-bit hashes.  The value to hash may be
-- inspected as many times as there are hashes requested.
hashes :: Hashable a => Int     -- ^ number of hashes to compute
       -> a                     -- ^ value to hash
       -> [Word32]
hashes n v = unfoldr go (n,0x3f56da2d3ddbb9f6)
    where go (k,s) | k <= 0    = Nothing
                   | otherwise = let s' = hashSalt32 s v
                                 in Just (s', (k-1,s'))

-- | Compute a list of 32-bit hashes relatively cheaply.  The value to
-- hash is inspected at most twice, regardless of the number of hashes
-- requested.
--
-- We use a variant of Kirsch and Mitzenmacher's technique from \"Less
-- Hashing, Same Performance: Building a Better Bloom Filter\",
-- <http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf>.
--
-- Where Kirsch and Mitzenmacher multiply the second hash by a
-- coefficient, we shift right by the coefficient.  This offers better
-- performance (as a shift is much cheaper than a multiply), and the
-- low order bits of the final hash stay well mixed.
cheapHashes :: Hashable a => Int -- ^ number of hashes to compute
            -> a                 -- ^ value to hash
            -> [Word32]
{-# SPECIALIZE cheapHashes :: Int -> SB.ByteString -> [Word32] #-}
{-# SPECIALIZE cheapHashes :: Int -> LB.ByteString -> [Word32] #-}
{-# SPECIALIZE cheapHashes :: Int -> String -> [Word32] #-}
cheapHashes k v = [h1 + (h2 `shiftR` i) | i <- [0..j]]
    where h = hashSalt64 0x9150a946c4a8966e v
          h1 = fromIntegral (h `shiftR` 32) .&. maxBound
          h2 = fromIntegral h
          j = fromIntegral k - 1

instance Hashable () where
    hashIO32 _ salt = return salt

instance Hashable Integer where
    hashIO32 k salt | k < 0 = hashIO32 (unfoldr go (-k))
                                   (salt `xor` 0x3ece731e9c1c64f8)
                  | otherwise = hashIO32 (unfoldr go k) salt
        where go 0 = Nothing
              go i = Just (fromIntegral i :: Word32, i `shiftR` 32)

instance Hashable Bool where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Ordering where
    hashIO32 = hashIO32 . fromEnum
    hashIO64 = hashIO64 . fromEnum

instance Hashable Char where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Int where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Float where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Double where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Int8 where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Int16 where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Int32 where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Int64 where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Word8 where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Word16 where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Word32 where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

instance Hashable Word64 where
    hashIO32 = hashOne32
    hashIO64 = hashOne64

-- | A fast unchecked shift.  Nasty, but otherwise GHC 6.8.2 does a
-- test and branch on every shift.
div4 :: CSize -> CSize
div4 k = fromIntegral ((fromIntegral k :: HTYPE_SIZE_T) `shiftR` 2)

alignedHash :: Ptr a -> CSize -> Word32 -> IO Word32
alignedHash ptr bytes salt
    | bytes .&. 3 == 0 = hashWord (castPtr ptr) (div4 bytes) salt' >>= cast32
    | otherwise        = hashLittle ptr bytes salt' >>= cast32
  where salt' = fromIntegral salt

cast32 :: CInt -> IO Word32
cast32 = return . fromIntegral

alignedHash2 :: Ptr a -> CSize -> Word64 -> IO Word64
alignedHash2 ptr bytes salt =
    with (fromIntegral salt) $ \sp -> do
      let p1 = castPtr sp
          p2 = castPtr sp `plusPtr` 4
      go p1 p2
      peek sp
  where go p1 p2
          | bytes .&. 3 == 0 = hashWord2 (castPtr ptr) (div4 bytes) p1 p2
          | otherwise        = hashLittle2 ptr bytes p1 p2

instance Hashable SB.ByteString where
    hashIO32 bs salt = SB.useAsCStringLen bs $ \(ptr, len) -> do
                     alignedHash ptr (fromIntegral len) salt

    {-# INLINE hashIO64 #-}
    hashIO64 bs salt = SB.useAsCStringLen bs $ \(ptr, len) -> do
                      alignedHash2 ptr (fromIntegral len) salt

instance Hashable LB.ByteString where
    hashIO32 bs salt = foldM (flip hashIO32) salt (LB.toChunks bs)

    {-# INLINE hashIO64 #-}
    hashIO64 bs salt = foldM go salt (LB.toChunks bs)
        where go a s = hashIO64 s a

instance Hashable a => Hashable (Maybe a) where
    hashIO32 Nothing salt = return salt
    hashIO32 (Just k) salt = hashIO32 k salt
    hashIO64 Nothing salt = return salt
    hashIO64 (Just k) salt = hashIO64 k salt

instance (Hashable a, Hashable b) => Hashable (Either a b) where
    hashIO32 (Left a) salt = hashIO32 a salt
    hashIO32 (Right b) salt = hashIO32 b (salt + 1)
    hashIO64 (Left a) salt = hashIO64 a salt
    hashIO64 (Right b) salt = hashIO64 b (salt + 1)

instance (Hashable a, Hashable b) => Hashable (a, b) where
    hashIO32 (a,b) salt = hashIO32 a salt >>= hashIO32 b
    hashIO64 (a,b) salt = hashIO64 a salt >>= hashIO64 b

instance (Hashable a, Hashable b, Hashable c) => Hashable (a, b, c) where
    hashIO32 (a,b,c) salt = hashIO32 a salt >>= hashIO32 b >>= hashIO32 c

instance (Hashable a, Hashable b, Hashable c, Hashable d) =>
    Hashable (a, b, c, d) where
    hashIO32 (a,b,c,d) salt =
        hashIO32 a salt >>= hashIO32 b >>= hashIO32 c >>= hashIO32 d

instance (Hashable a, Hashable b, Hashable c, Hashable d, Hashable e) =>
    Hashable (a, b, c, d, e) where
    hashIO32 (a,b,c,d,e) salt =
        hashIO32 a salt >>= hashIO32 b >>= hashIO32 c >>= hashIO32 d >>= hashIO32 e

instance Storable a => Hashable [a] where
    hashIO32 = hashList32

    {-# INLINE hashIO64 #-}
    hashIO64 = hashList64

-- | Compute a 32-bit hash of a 'Storable' instance.
hashOne32 :: Storable a => a -> Word32 -> IO Word32
hashOne32 k salt = with k $ \ptr ->
                 alignedHash ptr (fromIntegral (sizeOf k)) salt

-- | Compute a 64-bit hash of a 'Storable' instance.
hashOne64 :: Storable a => a -> Word64 -> IO Word64
hashOne64 k salt = with k $ \ptr ->
                   alignedHash2 ptr (fromIntegral (sizeOf k)) salt

-- | Compute a 32-bit hash of a list of 'Storable' instances.
hashList32 :: Storable a => [a] -> Word32 -> IO Word32
hashList32 xs salt =
    withArrayLen xs $ \len ptr ->
        alignedHash ptr (fromIntegral (len * sizeOf (head xs))) salt

-- | Compute a 64-bit hash of a list of 'Storable' instances.
hashList64 :: Storable a => [a] -> Word64 -> IO Word64
hashList64 xs salt =
    withArrayLen xs $ \len ptr ->
        alignedHash2 ptr (fromIntegral (len * sizeOf (head xs))) salt