src/Data/Hashabler/Internal.hs

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash , UnliftedFFITypes #-}
-- For mixType:
{-# LANGUAGE KindSignatures, PolyKinds #-}
module Data.Hashabler.Internal where

-- To avoid circular dependencies.


import Data.Word
import Data.Int
import Data.Bits
import Data.Char
import Data.List

-- For ByteString & Text instances:
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Lazy.Internal as BL (foldlChunks, ByteString)
#if MIN_VERSION_bytestring(0,10,4)
import qualified Data.ByteString.Short.Internal as BSh
#endif
import qualified Data.Text as T
import qualified Data.Text.Internal as T
import qualified Data.Text.Array as T (Array(..))
import qualified Data.Primitive as P
import qualified Data.Text.Lazy as TL (foldlChunks, Text)
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Storable (peekByteOff)

import Control.Exception(assert)

import Data.Version(Version, versionBranch)
import Data.Unique(Unique, hashUnique)

-- For getting our Int from ThreadId:
import Foreign.C (CInt(..))
import GHC.Conc(ThreadId(..))
import GHC.Prim(ThreadId#)

-- For TypeRep
import Data.Typeable
import GHC.Fingerprint.Type(Fingerprint(..))
#if  __GLASGOW_HASKELL__ >= 710
#else 
-- __GLASGOW_HASKELL__ >= 702
import Data.Typeable.Internal(TypeRep(..))
#endif

import System.Mem.StableName
import Data.Ratio (Ratio, denominator, numerator)

-- For Integer:
#ifdef MIN_VERSION_integer_gmp
import GHC.Exts (Int(..))
import GHC.Integer.GMP.Internals (Integer(..))
# if MIN_VERSION_integer_gmp(1,0,0)
import GHC.Integer.GMP.Internals (BigNat(BN#))
# endif
#endif

-- For GHC 7.10 Natural and Void:
#if MIN_VERSION_base(4,8,0)
import Data.Void (Void, absurd)
import GHC.Natural (Natural(..))
# ifdef MIN_VERSION_integer_gmp
import GHC.Exts (Word(..))
# endif
#endif

-- For WORD_SIZE_IN_BITS constant:
-- TODO Use Data.Primitive.MachDeps ?
#include "MachDeps.h"
import MachDeps

-- Do error just once, and assume 32 else 64 below:
#if WORD_SIZE_IN_BITS == 32
-- for fast div by power of two:
#define LOG_SIZEOF_WORD 2
#elif WORD_SIZE_IN_BITS == 64
#define LOG_SIZEOF_WORD 3
#else
#error We only know how to support 32-bit and 64-bit systems, sorry.
#endif

import Unsafe.Coerce

-- for reading the bytes of ByteStrings:
# if MIN_VERSION_bytestring(0,10,6)
-- This eliminates ~5ns overhead vs unsafeDupablePerformIO. We don't understand
-- the implications but will use it in the same way it's used in ByteString.
import Data.ByteString.Internal(accursedUnutterablePerformIO)
# else
import System.IO.Unsafe (unsafeDupablePerformIO)

----------------------------- END IMPORTS --------------------------------

accursedUnutterablePerformIO :: IO a -> a
accursedUnutterablePerformIO = unsafeDupablePerformIO
# endif


-- COMMENTED BELOW, WHEN FOUND NOT BENEFICIAL:
-- These should be fine in all cases:
coerceInt32Word32 :: Int32 -> Word32
coerceInt32Word32 = 
#  if WORD_SIZE_IN_BITS == 32
    unsafeCoerce
#  else
    fromIntegral -- TODO or is unsafeCoerce okay on 64-bit?
#  endif
coerceInt64Word64 :: Int64 -> Word64
coerceInt64Word64 = unsafeCoerce
-- coerceIntWord :: Int -> Word
-- coerceIntWord = unsafeCoerce

-- For when argument is known in-bounds:
-- unsafeCoerceWord8 :: Word -> Word8
-- unsafeCoerceWord8 = unsafeCoerce

-- For 32-bit:
#if WORD_SIZE_IN_BITS == 32
unsafeCoerceIntWord32 :: Int -> Word32
unsafeCoerceIntWord32 = unsafeCoerce
-- unsafeCoerceWordWord32 :: Word -> Word32
-- unsafeCoerceWordWord32 = unsafeCoerce
#endif
  -- But why is that? The unsafeCoerce version simply has more instructions AFAICT!


#if MIN_VERSION_base(4,7,0)
-- Exported from Data.Word in base >= 4.7
#else
byteSwap32 :: Word32 -> Word32
byteSwap32 = _byteSwap32
# if WORD_SIZE_IN_BITS == 64
byteSwap64 :: Word64 -> Word64
byteSwap64 = _byteSwap64
# endif
#endif

-- TODO This is probably so slow it deserves a warning...
_byteSwap32 :: Word32 -> Word32
_byteSwap32 = \w-> 
    let mask0 = 0xFF000000
        mask1 = 0x00FF0000
        mask2 = 0x0000FF00
        mask3 = 0x000000FF
     in (unsafeShiftR (w .&. mask0) 24) .|.
        (unsafeShiftR (w .&. mask1) 8)  .|.
        (unsafeShiftL (w .&. mask2) 8)  .|.
        (unsafeShiftL (w .&. mask3) 24)

_byteSwap64 :: Word64 -> Word64
_byteSwap64 = \w-> 
    let mask0 = 0xFF00000000000000
        mask1 = 0x00FF000000000000
        mask2 = 0x0000FF0000000000
        mask3 = 0x000000FF00000000
        mask4 = 0x00000000FF000000
        mask5 = 0x0000000000FF0000
        mask6 = 0x000000000000FF00
        mask7 = 0x00000000000000FF
     in (unsafeShiftR (w .&. mask0) 56) .|.
        (unsafeShiftR (w .&. mask1) 40) .|.
        (unsafeShiftR (w .&. mask2) 24) .|.
        (unsafeShiftR (w .&. mask3) 8)  .|.
        (unsafeShiftL (w .&. mask4) 8)  .|.
        (unsafeShiftL (w .&. mask5) 24) .|.
        (unsafeShiftL (w .&. mask6) 40) .|.
        (unsafeShiftL (w .&. mask7) 56)

-- TODO BENCHMARKING for 'abs' see: http://graphics.stanford.edu/~seander/bithacks.html#IntegerAbs  and  http://stackoverflow.com/q/22445019/176841


foreign import ccall unsafe "rts_getThreadId" getThreadId :: ThreadId# -> CInt 


{-
-- see also the non-powers of two mapping methods outlined:
--  http://www.isthe.com/chongo/tech/comp/fnv/#FNV-1a
-}







-- EXTRACTING BYTES FROM DIFFERENT TYPES ----------------------------
-- NOTE we're to hash the resulting Word8s from left to right

-- TODO check inlining on these:

bytes16 :: Word16 -> (Word8, Word8)
{-# INLINE bytes16 #-}
bytes16 wd = (shifted 8, fromIntegral wd)
     where shifted = fromIntegral . unsafeShiftR wd

bytes32 :: Word32 -> (Word8,Word8,Word8,Word8)
{-# INLINE bytes32 #-}
bytes32 wd = (shifted 24, shifted 16, shifted 8, fromIntegral wd)
     where shifted = fromIntegral . unsafeShiftR wd

bytes64 :: Word64 -> (Word8,Word8,Word8,Word8,Word8,Word8,Word8,Word8)
{-# INLINE bytes64 #-}
bytes64 = \wd64->
#  if WORD_SIZE_IN_BITS == 32
    _bytes64_32 wd64
#  else
    _bytes64_64 wd64
#  endif

_bytes64_64 :: Word64 -> (Word8,Word8,Word8,Word8,Word8,Word8,Word8,Word8)
{-# INLINE _bytes64_64 #-}
_bytes64_64 wd = ( shifted 56, shifted 48, shifted 40, shifted 32
                 , shifted 24, shifted 16, shifted 8, fromIntegral wd)
     where shifted = fromIntegral . unsafeShiftR wd

-- faster for 32-bit archs
_bytes64_32 :: Word64 -> (Word8,Word8,Word8,Word8,Word8,Word8,Word8,Word8)
{-# INLINE _bytes64_32 #-}
_bytes64_32 wd = 
    let (wd0, wd1) = words32 wd
        (b0,b1,b2,b3) = bytes32 wd0
        (b4,b5,b6,b7) = bytes32 wd1
     in (b0,b1,b2,b3,b4,b5,b6,b7)

-- TODO this more efficiently on 32-bit platforms where this is I think a noop.
words32 :: Word64 -> (Word32, Word32)
{-# INLINE words32 #-}
words32 wd64 = (fromIntegral $ unsafeShiftR wd64 32, fromIntegral wd64)


-- HASHABLE CLASS AND INSTANCES -------------------------------------

newtype Hash32 a = Hash32 { hashWord32 :: Word32 }
    deriving (Show, Read, Eq)
newtype Hash64 a = Hash64 { hashWord64 :: Word64 }
    deriving (Show, Read, Eq)
data Hash128 a = Hash128 { hashWord128_0 :: !Word64, hashWord128_1 :: !Word64 }
    deriving (Show, Read, Eq)


-- | A class of types that can be converted into a hash value.  We expect all
-- instances to display "good" hashing properties (w/r/t avalanche, bit
-- independence, etc.) when passed to an ideal hash function.
--
-- We try to ensure that bytes are extracted from values in a way that is
-- portable across architectures (where possible), and straightforward to
-- replicate on other platforms and in other languages. Portable instances are
-- also instances of 'StableHashable', and non-portable instances are
-- __NOTE__-ed in instance docs here as well.
--
-- See the section <#principled "Defining Hashable instances"> for details of
-- what we expect from instances.
class Hashable a where
    -- | Add the bytes from the second argument into the hash, producing a new
    -- hash value. This is essentially a left fold of the methods of
    -- 'HashState' over individual bytes extracted from @a@.
    --
    -- For some instances of 'HashState', this method might be a complete
    -- hashing algorithm, or might comprise the core of a hashing algorithm
    -- (perhaps with some final mixing), or might do something completely apart
    -- from hashing (e.g. simply cons bytes into a list for debugging).
    --
    -- Implementations must ensure that, for the same data: 
    --    
    -- - @Word16\/32\/64@ arguments passed into the methods of 'HashState', and...
    --
    -- - the choice of @mix@ function itself...
    --
    -- ...are consistent across architectures of different word size and
    -- endianness. For example do not define an instance which conditionally
    -- implements 'mix64' only on 64-bit architectures.
    hash :: (HashState h)=> h -> a -> h


-- | A class for defining how a hash function consumes input data. Bytes are
-- fed to these methods in our 'Hashable' instances, which promise to call
-- these methods in a platform-independent way.
--
-- Instances of 'HashState' only need to define 'mix8', but may additionally
-- handle @mix@-ing in larger word chunks for performance reasons. For instance
-- a hash function which operates on four bytes at a time might make use of
-- 'mix32', and perhaps in 'mix8' pad with three additional 0s.
--
-- Endianness is normalized in 'Hashable' instances, so these mix methods can
-- expect to receive identical words across platforms.
class HashState h where
    -- | Mix in one byte.
    mix8 :: h -> Word8 -> h
    -- | Mix in a 2-byte word. Defaults to two 'mix8' on bytes from most to
    -- least significant.
    mix16 :: h -> Word16 -> h
    -- | Mix in a 4-byte word. Defaults to four 'mix8' on bytes from most to
    -- least significant.
    mix32 :: h -> Word32 -> h
    -- | Mix in a 8-byte word. Defaults to two 'mix32' on 32-byte words from
    -- most to least significant.
    mix64 :: h -> Word64 -> h

    -- Hash functions are likely to take individual bytes, or chunks of 32 or
    -- 64 bits, so I think these defaults make sense.
    {-# INLINE mix16 #-}
    mix16 h = \wd16-> 
       let (wd8_0,wd8_1) = bytes16 wd16
        in h `mix8` wd8_0 `mix8` wd8_1

    {-# INLINE mix32 #-}
    mix32 h = \wd32->
       let (b0,b1,b2,b3) = bytes32 wd32
        in h `mix8` b0 `mix8` b1 `mix8` b2 `mix8` b3 

    {-# INLINE mix64 #-}
    mix64 h = \wd64->
       let (w32_0, w32_1) = words32 wd64
        in h `mix32` w32_0 `mix32` w32_1




-- FNV HASH -------------------------------------------------

-- The special FNV primes required for different hash output sizes:

fnvPrime32 :: Word32
fnvPrime64 :: Word64
{-# INLINE fnvPrime32 #-}
{-# INLINE fnvPrime64 #-}
fnvPrime32 = 16777619
fnvPrime64 = 1099511628211
-- fnvPrime128 = 309485009821345068724781371
-- fnvPrime256 = 374144419156711147060143317175368453031918731002211


fnvOffsetBasis32 :: FNV32
fnvOffsetBasis64 :: FNV64
{-# INLINE fnvOffsetBasis32 #-}
{-# INLINE fnvOffsetBasis64 #-}
fnvOffsetBasis32 = FNV32 2166136261
fnvOffsetBasis64 = FNV64 14695981039346656037
-- fnvOffsetBasis128 = FNV128 144066263297769815596495629667062367629
-- fnvOffsetBasis256 = FNV256 100029257958052580907070968620625704837092796014241193945225284501741471925557


newtype FNV32 = FNV32 { fnv32 :: Word32 }
    deriving (Eq, Ord, Read, Show)

newtype FNV64 = FNV64 { fnv64 :: Word64 }
    deriving (Eq, Ord, Read, Show)


-- | @
-- 'mix8' ('FNV32' h32) b = 'FNV32' $ (h32 ``xor`` fromIntegral b) * 'fnvPrime32'
-- @
instance HashState FNV32 where
    {-# INLINE mix8 #-}
    mix8 (FNV32 h32) = \b-> FNV32 $ (h32 `xor` fromIntegral b) * fnvPrime32


-- | Hash a value using the standard spec-prescribed 32-bit seed value.
--
-- @
--   hashFNV32 = 'Hash32' . fnv32 . 'hash' 'fnvOffsetBasis32'
-- @
hashFNV32 :: Hashable a=> a -> Hash32 a
{-# INLINE hashFNV32 #-}
hashFNV32 = Hash32 . fnv32 . hash fnvOffsetBasis32


-- | @
-- 'mix8' ('FNV64' h64) b = 'FNV64' $ (h64 ``xor`` fromIntegral b) * 'fnvPrime64'
-- @
instance HashState FNV64 where
    {-# INLINE mix8 #-}
    mix8 (FNV64 h64) = \b-> FNV64 $ (h64 `xor` fromIntegral b) * fnvPrime64


-- | Hash a value using the standard spec-prescribed 64-bit seed value. This
-- may be slow on 32-bit machines.
--
-- @
--   hashFNV64 = 'Hash64' . fnv64 . 'hash' 'fnvOffsetBasis64'
-- @
hashFNV64 :: Hashable a=> a -> Hash64 a
{-# INLINE hashFNV64 #-}
hashFNV64 = Hash64 . fnv64 . hash fnvOffsetBasis64


-- ------------------------------------------------------------------
-- NUMERIC TYPES:

-- TODO TESTING: for 7.8 and below, see if we can get a small value into J#, and then test that it hashes to the same as the literal small value
--                (look at code; simple */div or +/- don't seem to do it)

-- NOTE: non-obviously, but per our rule about variable-width values, this must
-- also be wrapped in a `mixConstructor`; consider the hashes of (0xDEAD,
-- 0xBEEF) and (0xDE, 0xADBEEF). The way we mix in the sign handles this.
-- 
-- I would rather truncate to 8-bit "limbs" but using 32-bit limbs seems like a
-- good tradeoff: on 64-bit platforms we just do a conditional instead of on
-- avg 4 extra hash ops, and on 32-bit no extra work is required.
--
-- | Arbitrary-precision integers are hashed as follows: the magnitude is
-- represented with 32-bit chunks (at least one, for zero; but no more than
-- necessary), then bytes are added to the hash from most to least significant
-- (including all initial padding 0s). Finally 'mixConstructor' is called on
-- the resulting hash value, with @(1::Word8)@ if the @Integer@ was negative,
-- otherwise with @0@.
instance Hashable Integer where
    {-# INLINE hash #-}
-- integer-gmp implementation: --------------------------------------
#ifdef MIN_VERSION_integer_gmp
    hash h = \i-> case i of
      (S# n#) ->
        let magWord = magnitudeAsWord (I# n#)
            sign = _signByte (I# n#)
         in mixConstructor sign $ 
#           if WORD_SIZE_IN_BITS == 32
              h `mix32` magWord
#           else
              -- only hash enough 32-bit chunks as needed to represent magnitude
              h `mixSignificantMachWord64` magWord
              -- TODO benchmark and try unsafeCoerce on 64-bit
#           endif

-- GHC 7.10: ------------------------
--
#   if MIN_VERSION_integer_gmp(1,0,0)
        -- NOTE: these used only when out of range of Int:
      (Jp# bn) -> mixConstructor 0 $ hash32BigNatBytes h bn
      (Jn# bn) -> mixConstructor 1 $ hash32BigNatBytes h bn

-- GHC 7.8 and below: ---------------
--
-- J# is more or less directly the gmp arbitrary precision int type, where:
--     1) sz# is number of limbs, or negative of that for negative
--     2) limbs stored little endian (i.e. i[0] is least significant limb)
--     3) whenever sz# is non-zero , the most significant limb is non-zero; the
--        value 0 is represented by sz# == 0, in which case ba# is ignored
--     4) a limb is machine Word size.
-- And some Integer-specific caveats/notes:
--     5) J# may be used for even small integers
--     6) ba# may be over-allocated, so size should be ignored
#   else
--    Note, 5 and 3 together mean that we have to special case for sz == 0,
--    even though I can't get that case to occur in practice:
      (J# 0# _) -> mixConstructor 0 (h `mix32` 0)
      (J# sz# ba#) -> 
             -- Note, (abs minBound == minBound) but I don't think that value
             -- is possible since we wouldn't even be able to specify the size
             -- (maxBound+1) as an Int value.
         let numLimbs = abs (I# sz#)
             sign = _signByte (I# sz#)
          in assert ((I# sz#) /= minBound) $
              mixConstructor sign $ 
               hash32BigNatByteArrayBytes h numLimbs (P.ByteArray ba#)
#   endif

-- other Integer implementations: -----------------------------------
#else
    -- For non-gmp Integer; quite slow.
    hash = _hash32Integer
#endif


-- TODO benchmark against conditional
-- Helper to quickly (hopefully) extract sign bit (1 for negative, 0 otherwise)
-- from Int. Assumes two's complement.
_signByte :: Int -> Word8
{-# INLINE _signByte #-}
_signByte n = fromIntegral ((fromIntegral n :: Word) 
                              `unsafeShiftR` (WORD_SIZE_IN_BITS - 1))

-- Exposed for testing. In particular we ensure that magnitudeAsWord minBound
-- is correct.
-- TODO make Int -> Word, and use fromIntegral at usage site, maybe
magnitudeAsWord :: Int 
#             if WORD_SIZE_IN_BITS == 32
                -> Word32
#             else
                -> Word64
#             endif
magnitudeAsWord = fromIntegral . abs

#if WORD_SIZE_IN_BITS == 64
-- Helper for hashing a 64-bit word, possibly omiting the first 32-bit chunk
-- (if 0). We use this when normalizing big natural representations.
mixSignificantMachWord64 :: (HashState h)=> h -> Word64 -> h
{-# INLINE mixSignificantMachWord64 #-}
mixSignificantMachWord64 h w64 = 
     let (word32_0, word32_1) = words32 w64
      in if word32_0 == 0 
          then h `mix32` word32_1
          else h `mix32` word32_0 `mix32`  word32_1
#endif


-- Very slow Integer-implementation-agnostic hashing:
_hash32Integer :: (HashState h)=> h -> Integer -> h
_hash32Integer h i = 
    let (sgn, limbs) = _integerWords i
     in mixConstructor sgn $ 
         foldl' hash h limbs

-- Convert an opaque Integer into a gmp-like format, except that we order our
-- list of limbs returned from most to least significant:
_integerWords :: Integer -> (Word8, [Word32])
_integerWords nSigned = (sign , go (abs nSigned) []) where
    sign = if nSigned < 0 then 1 else 0
           -- we will hash at least one limb (even if zero):
    go nMag acc = let (nMag', w32) = splitAtLastWord nMag
                   in (if nMag' == 0 then id else go nMag') (w32:acc)

    splitAtLastWord :: Integer -> (Integer, Word32)
    splitAtLastWord x = 
      assert (x >= 0) $
        (x `shiftR` 32, fromIntegral x)


#ifdef MIN_VERSION_integer_gmp
-- GHC 7.10:
# if MIN_VERSION_integer_gmp(1,0,0)
-- Internal. Hashable instances will require a 'mixConstructor'. We use the same
-- chunks-of-32-bits scheme as in the Integer instance.
--
-- Invariants of BigNat (from docs):
--   - ByteArray# size is an exact multiple of Word# size
--   - limbs are stored in least-significant-limb-first order,
--   - the most-significant limb must be non-zero, except for
--      0 which is represented as a 1-limb.
--      - NOTE, though: Jp#/Jn# in Integer on GHC 7.10 guarantee that contained
--        BigNat are non-zero
hash32BigNatBytes :: (HashState h)=> h -> BigNat -> h
{-# INLINE hash32BigNatBytes #-}
hash32BigNatBytes h (BN# ba#) = 
    let ba = P.ByteArray ba#
        szBytes = P.sizeofByteArray ba
        numLimbs = szBytes `unsafeShiftR` LOG_SIZEOF_WORD
     in assert (numLimbs >= 1 && (numLimbs * SIZEOF_HSWORD) == szBytes) $
         hash32BigNatByteArrayBytes h numLimbs ba


-- | The @BigNat@'s value is represented in 32-bit chunks (at least one, for
-- zero; but no more than necessary), then bytes are added to the hash from
-- most to least significant (including all initial padding 0s). Finally
-- @'mixConstructor' 0@ is called on the resulting hash value.
--
-- Exposed only in GHC 7.10.
instance Hashable BigNat where
    {-# INLINE hash #-}
    hash h = mixConstructor 0 . hash32BigNatBytes h

# endif


-- Hashing of internals of BigNat-format ByteArrays of at least 1 limb, for old
-- and new style Integer from integer-gmp.
hash32BigNatByteArrayBytes :: (HashState h)=> h -> Int -> P.ByteArray -> h
{-# INLINE hash32BigNatByteArrayBytes #-}
hash32BigNatByteArrayBytes h numLimbs ba = 
  assert (numLimbs > 0) $
    let mostSigLimbIx = numLimbs - 1
        -- NOTE: to correctly handle small-endian, we must read in Word-size
        -- chunks (not just Word32 size)
        go !h' (-1) = h'
        go !h' !ix = let wd = P.indexByteArray ba ix
                      in go (h' `mixWord` wd) (ix - 1)
#  if WORD_SIZE_IN_BITS == 32
        mixWord = mix32
     in go h mostSigLimbIx
#  else
        mixWord h' wd = let (wd32_0, wd32_1) = words32 wd
                         in h' `mix32` wd32_0 `mix32` wd32_1
        -- handle dropping possibly-empty most-significant Word32, before
        -- processing remaining limbs:
        h0 = let mostSigLimb = P.indexByteArray ba mostSigLimbIx
              in h `mixSignificantMachWord64` mostSigLimb
        ix0 = mostSigLimbIx - 1
     in go h0 ix0
#  endif

#endif

-- Also GHC 7.10:
#if MIN_VERSION_base(4,8,0)
-- | The @Natural@'s value is represented in 32-bit chunks (at least one, for
-- zero; but no more than necessary), then bytes are added to the hash from
-- most to least significant (including all initial padding 0s). Finally
-- @'mixConstructor' 0@ is called on the resulting hash value.
--
-- Exposed only in GHC 7.10
instance Hashable Natural where
    {-# INLINE hash #-}
    hash h nat = case nat of
# if defined (MIN_VERSION_integer_gmp)
#   if MIN_VERSION_integer_gmp(1,0,0)
        -- For Word-size natural
        (NatS# wd#) -> mixConstructor 0 $
#         if WORD_SIZE_IN_BITS == 32
            h `mix32` (fromIntegral $ W# wd#)  -- TODO benchmark unsafeCoerce
#         else
            h `mixSignificantMachWord64` (fromIntegral $ W# wd#)  -- TODO benchmark unsafeCoerce on 64-bit
#         endif
        -- Else using a BigNat (which instance calls required mixConstructor):
        (NatJ# bn)  -> hash h bn
#   else
        -- Natural represented with non-negative Integer:
        n -> hash h $ toInteger n
#   endif
# else
        -- Natural represented with non-negative Integer:
        n -> hash h $ toInteger n
# endif

-- This is the instance in void-0.7:
--
-- | > hash _ _ = absurd
--
-- Exposed only in GHC 7.10
instance Hashable Void where
    hash _ = absurd

#endif


-- | > hash s a = s `hash` numerator a `hash` denominator a
instance (Integral a, Hashable a) => Hashable (Ratio a) where
    {-# INLINE hash #-}
    hash s a = s `hash` numerator a `hash` denominator a


-- ---------
-- Architecture-dependent types, with special handling.

-- | __NOTE__: @Int@ has platform-dependent size. When hashing on 64-bit machines
-- if the @Int@ value to be hashed falls in the 32-bit Int range, we first cast
-- it to an Int32. This should help ensure that programs that are correct
-- across architectures will also produce the same hash values.
instance Hashable Int where
    {-# INLINE hash #-}
    hash h i =
#     if WORD_SIZE_IN_BITS == 32
        mix32 h $ unsafeCoerceIntWord32 i
#     else
        _hash32_Int_64 h (fromIntegral i)
#     endif

-- | __NOTE__: @Word@ has platform-dependent size. When hashing on 64-bit
-- machines if the @Word@ value to be hashed falls in the 32-bit Word range, we
-- first cast it to a Word32. This should help ensure that programs that are
-- correct across architectures will also produce the same hash values.
instance Hashable Word where
    {-# INLINE hash #-}
    hash h w =
#     if WORD_SIZE_IN_BITS == 32
        hash h (fromIntegral w :: Word32)
#     else
        _hash32_Word_64 h (fromIntegral w) -- TODO benchmarking unsafeCoerce on 64-bit
#     endif


-- TODO Benchmarking + try unsafeCoerce on 64-bit
-- NOTE: the expressions in the conditionals alone make these quite slow on
--       32-bit machines, so don't worry about benchmarking this directly.
_hash32_Int_64 :: (HashState h)=> h -> Int64 -> h
{-# INLINE _hash32_Int_64 #-}
_hash32_Int_64 h = \i->
    -- Can we losslessly cast to 32-bit representation?
    if i <= (fromIntegral (maxBound :: Int32)) && 
       i >= (fromIntegral (minBound :: Int32)) -- TODO benchmark and maybe use (.&.), and check ==0
        then hash h (fromIntegral i :: Int32)
        else hash h i

_hash32_Word_64 :: (HashState h)=> h -> Word64 -> h
{-# INLINE _hash32_Word_64 #-}
_hash32_Word_64 h = \w->
    -- Can we losslessly cast to 32-bit representation?
    if w <= (fromIntegral (maxBound :: Word32))
        then hash h (fromIntegral w :: Word32)
        else hash h w



-- | Hash a Float by way of 'decodeFloat'. __NOTE__: this means 0 and -0 are
-- considered equal, among other things. In general floating point values
-- cannot be reasonably compared for absolute equality, so be careful if you
-- make use of this instance. 
instance Hashable Float where
    {-# INLINE hash #-}
    hash h = hash h . (\x-> decodeFloat x)

-- | Hash a Double by way of 'decodeFloat'. __NOTE__: this means 0 and -0 are
-- considered equal, among other things. In general floating point values
-- cannot be reasonably compared for absolute equality, so be careful if you
-- make use of this instance. 
instance Hashable Double where
    {-# INLINE hash #-}
    hash h = hash h . (\x-> decodeFloat x)


-- GHC uses two's complement representation for signed ints; C has this
-- undefined, I guess; just cast to Word and hash.

instance Hashable Int8 where
    {-# INLINE hash #-}
    hash h = mix8 h . fromIntegral

instance Hashable Int16 where
    {-# INLINE hash #-}
    hash h = mix16 h . fromIntegral

instance Hashable Int32 where
    {-# INLINE hash #-}
    hash h = mix32 h . coerceInt32Word32


instance Hashable Int64 where
    {-# INLINE hash #-}
    hash h = mix64 h . coerceInt64Word64

-- Straightforward hashing of different Words and byte arrays:

instance Hashable Word8 where
    {-# INLINE hash #-}
    hash = mix8

instance Hashable Word16 where
    {-# INLINE hash #-}
    hash = mix16

instance Hashable Word32 where
    {-# INLINE hash #-}
    hash = mix32

instance Hashable Word64 where
    {-# INLINE hash #-}
    hash = mix64


-- ------------------------------------------------------------------
-- ARRAYS AND LIST:


-- Since below have variable-length, we'll use this helper (which is also
-- useful for multi-constructor types):

-- | > mixConstructor n h = h `mix8` (0xFF - n)
mixConstructor :: (HashState h)
               => Word8  -- ^ Constructor number. We recommend starting from 0 and incrementing.
               -> h      -- ^ Hash state value to mix our byte into
               -> h      -- ^ New hash state
{-# INLINE mixConstructor #-}
mixConstructor n = \h-> h `mix8` (0xFF - n)

-- | Strict @ByteString@
instance Hashable B.ByteString where
    {-# INLINE hash #-}
    hash h = mixConstructor 0 .
        hashByteString h

-- TODO benchmarks for fusion:
-- | Lazy @ByteString@
instance Hashable BL.ByteString where
    {-# INLINE hash #-}
    hash h = mixConstructor 0 .
        BL.foldlChunks hashByteString h

#if MIN_VERSION_bytestring(0,10,4)
-- | Exposed only in bytestring >= v0.10.4
instance Hashable BSh.ShortByteString where
    {-# INLINE hash #-}
    hash h  = 
      \(BSh.SBS ba_) -> -- when MIN_VERSION_base(4,3,0)
        let ba = P.ByteArray ba_
         in mixConstructor 0 $
              hashByteArray h (P.sizeofByteArray ba) ba
#endif

-- | Strict @Text@, hashed as big endian UTF-16.
instance Hashable T.Text where
    {-# INLINE hash #-}
    hash h = mixConstructor 0 .
        hashText h

-- TODO benchmarks for fusion:
-- | Lazy @Text@, hashed as big endian UTF-16.
instance Hashable TL.Text where
    {-# INLINE hash #-}
    hash h = mixConstructor 0 .
        TL.foldlChunks hashText h

-- | Here we hash each byte of the array in turn. If using this to hash some
-- data stored internally as a @ByteArray#@, be aware that depending on the
-- size and alignment requirements of that data, as well as the endianness of
-- your machine, this might result in different hash values across different
-- architectures.
instance Hashable P.ByteArray where
    {-# INLINE hash #-}
    hash h = \ba-> mixConstructor 0 $
        hashByteArray h (P.sizeofByteArray ba) ba

-- ------------------------------------------------------------------
-- MISC THINGS:



-- TODO look at core
-- | Hash a @Char@ as big endian UTF-16. Note that Char permits values in the
-- reserved unicode range U+D800 to U+DFFF; these Char values are added to the
-- hash just as if they were valid 16-bit characters.
instance Hashable Char where
    {-# INLINE hash #-}
    hash h = go where
      -- Encoding a unicode code point in UTF-16. adapted from
      -- Data.Text.Internal.Unsafe.Char.unsafeWrite:
    --go c | n .&. complement 0xFFFF == 0 =  -- TODO try this, etc. TODO try look at core & try unsafeCoerce
      go c | n < 0x10000 = h `mix16` fromIntegral n
              -- TODO MODIFY lo AND CALL mix32, 
           | otherwise = h `mix16` lo `mix16` hi

        where n = ord c
              m = n - 0x10000
              lo = fromIntegral $ (m `unsafeShiftR` 10) + 0xD800
              hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00


-- | __NOTE__: no promise of consistency across runs or platforms.
instance Hashable ThreadId where
    {-# INLINE hash #-}
    hash h = \(ThreadId tid)-> 
        hash h (fromIntegral $ getThreadId tid :: Word)

-- | __NOTE__: no promise of consistency across platforms or GHC versions.
instance Hashable TypeRep where
    {-# INLINE hash #-}
    hash h = hash h . typeRepInt32

typeRepInt32 :: TypeRep -> Int32
{-# INLINE typeRepInt32 #-}
typeRepInt32 = 
# if __GLASGOW_HASKELL__ >= 710
    -- Fingerprint is just the MD5, so taking any Int from it is fine
    (\(Fingerprint i64 _) -> fromIntegral i64) . typeRepFingerprint
# else
-- __GLASGOW_HASKELL__ >= 702
    -- Fingerprint is just the MD5, so taking any Int from it is fine
    \(TypeRep (Fingerprint i64 _) _ _) -> fromIntegral i64
# endif


-- | __NOTE__: No promise of stability across runs or platforms. Implemented via
-- 'hashStableName'.
instance Hashable (StableName a) where
    {-# INLINE hash #-}
    hash h = \x-> hash h $ hashStableName x

-- | The (now deprecated) @versionTags@ field is ignored, and we follow the
-- 'Eq' instance which does not ignore trailing zeros.
instance Hashable Version where
    {-# INLINE hash #-}
    hash h = \x-> hash h $ versionBranch x

-- | __NOTE__: No promise of stability across runs or platforms. Implemented via
-- 'hashUnique'.
instance Hashable Unique where
    {-# INLINE hash #-}
    hash h = \x-> hash h $ hashUnique x

-- ------------------------------------------------------------------
-- ALGEBRAIC DATA TYPES:


-- ---------
-- Sum types

-- | > hash h = hash h . \b-> if b then (1::Word8) else 0
instance Hashable Bool where
    {-# INLINE hash #-}
    hash h = hash h . \b-> if b then (1::Word8) else 0


instance Hashable Ordering where
    {-# INLINE hash #-}
    hash h = flip mixConstructor h . fromIntegral . fromEnum

instance Hashable a => Hashable [a] where
    -- TODO OPTIMIZE (see notes below)
    {-# INLINE hash #-}
    hash h = mixConstructor 0 .
        hashFoldl' h

instance Hashable a => Hashable (Maybe a) where
    {-# INLINE hash #-}
    hash h Nothing  = mixConstructor 0 h
    hash h (Just a) = mixConstructor 1 $ hash h a

instance (Hashable a, Hashable b) => Hashable (Either a b) where
    {-# INLINE hash #-}
    hash h = either (mx 0) (mx 1) where
        mx n = mixConstructor n . hash h


-- ---------
-- Tuples (product types)

-- Per our rules, this must perturb the hash value by at least a byte, even
-- though its value is entirely "fixed" by its type. Consider [()]; the
-- instance relies on () following the rule.
--
-- | > hash = const . mixConstructor 0
instance Hashable () where
    {-# INLINE hash #-}
    hash = const . mixConstructor 0

instance (Hashable a1, Hashable a2) => Hashable (a1, a2) where
    {-# INLINE hash #-}
    hash h (a,b) = h `hash` a `hash` b

instance (Hashable a1, Hashable a2, Hashable a3) => Hashable (a1, a2, a3) where
    {-# INLINE hash #-}
    hash h (a,b,c) = h `hash` a `hash` b `hash` c

instance (Hashable a1, Hashable a2, Hashable a3, Hashable a4) => Hashable (a1, a2, a3, a4) where
    {-# INLINE hash #-}
    hash h (a,b,c,d) = h `hash` a `hash` b `hash` c `hash` d

instance (Hashable a1, Hashable a2, Hashable a3, Hashable a4, Hashable a5) => Hashable (a1, a2, a3, a4, a5) where
    {-# INLINE hash #-}
    hash h (a,b,c,d,e) = h `hash` a `hash` b `hash` c `hash` d `hash` e

instance (Hashable a1, Hashable a2, Hashable a3, Hashable a4, Hashable a5, Hashable a6) => Hashable (a1, a2, a3, a4, a5, a6) where
    {-# INLINE hash #-}
    hash h (a,b,c,d,e,f) = h `hash` a `hash` b `hash` c `hash` d `hash` e `hash` f

instance (Hashable a1, Hashable a2, Hashable a3, Hashable a4, Hashable a5, Hashable a6, Hashable a7) => Hashable (a1, a2, a3, a4, a5, a6, a7) where
    {-# INLINE hash #-}
    hash h (a,b,c,d,e,f,g) = h `hash` a `hash` b `hash` c `hash` d `hash` e `hash` f `hash` g

instance (Hashable a1, Hashable a2, Hashable a3, Hashable a4, Hashable a5, Hashable a6, Hashable a7, Hashable a8) => Hashable (a1, a2, a3, a4, a5, a6, a7, a8) where
    {-# INLINE hash #-}
    hash hsh (a,b,c,d,e,f,g,h) = hsh `hash` a `hash` b `hash` c `hash` d `hash` e `hash` f `hash` g `hash` h

instance (Hashable a, Hashable b, Hashable c, Hashable d, Hashable e, Hashable f, Hashable g, Hashable h, Hashable i)=> Hashable (a, b, c, d, e, f, g, h, i) where
    {-# INLINE hash #-}
    hash hsh (a, b, c, d, e, f, g, h, i) = hsh `hash` a `hash` b `hash` c `hash` d `hash` e `hash` f `hash` g `hash` h `hash` i

instance (Hashable a, Hashable b, Hashable c, Hashable d, Hashable e, Hashable f, Hashable g, Hashable h, Hashable i, Hashable j)=> Hashable (a, b, c, d, e, f, g, h, i, j) where
    {-# INLINE hash #-}
    hash hsh (a, b, c, d, e, f, g, h, i, j) = hsh `hash` a `hash` b `hash` c `hash` d `hash` e `hash` f `hash` g `hash` h `hash` i `hash` j

instance (Hashable a, Hashable b, Hashable c, Hashable d, Hashable e, Hashable f, Hashable g, Hashable h, Hashable i, Hashable j, Hashable k)=> Hashable (a, b, c, d, e, f, g, h, i, j, k) where
    {-# INLINE hash #-}
    hash hsh (a, b, c, d, e, f, g, h, i, j, k) = hsh `hash` a `hash` b `hash` c `hash` d `hash` e `hash` f `hash` g `hash` h `hash` i `hash` j `hash` k

instance (Hashable a, Hashable b, Hashable c, Hashable d, Hashable e, Hashable f, Hashable g, Hashable h, Hashable i, Hashable j, Hashable k, Hashable l)=> Hashable (a, b, c, d, e, f, g, h, i, j, k, l) where
    {-# INLINE hash #-}
    hash hsh (a, b, c, d, e, f, g, h, i, j, k, l) = hsh `hash` a `hash` b `hash` c `hash` d `hash` e `hash` f `hash` g `hash` h `hash` i `hash` j `hash` k `hash` l

instance (Hashable a, Hashable b, Hashable c, Hashable d, Hashable e, Hashable f, Hashable g, Hashable h, Hashable i, Hashable j, Hashable k, Hashable l, Hashable m)=> Hashable (a, b, c, d, e, f, g, h, i, j, k, l, m) where
    {-# INLINE hash #-}
    hash hsh (a, b, c, d, e, f, g, h, i, j, k, l, m) = hsh `hash` a `hash` b `hash` c `hash` d `hash` e `hash` f `hash` g `hash` h `hash` i `hash` j `hash` k `hash` l `hash` m

instance (Hashable a, Hashable b, Hashable c, Hashable d, Hashable e, Hashable f, Hashable g, Hashable h, Hashable i, Hashable j, Hashable k, Hashable l, Hashable m, Hashable n)=> Hashable (a, b, c, d, e, f, g, h, i, j, k, l, m, n) where
    {-# INLINE hash #-}
    hash hsh (a, b, c, d, e, f, g, h, i, j, k, l, m, n) = hsh `hash` a `hash` b `hash` c `hash` d `hash` e `hash` f `hash` g `hash` h `hash` i `hash` j `hash` k `hash` l `hash` m `hash` n

instance (Hashable a, Hashable b, Hashable c, Hashable d, Hashable e, Hashable f, Hashable g, Hashable h, Hashable i, Hashable j, Hashable k, Hashable l, Hashable m, Hashable n, Hashable o)=> Hashable (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) where
    {-# INLINE hash #-}
    hash hsh (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) = hsh `hash` a `hash` b `hash` c `hash` d `hash` e `hash` f `hash` g `hash` h `hash` i `hash` j `hash` k `hash` l `hash` m `hash` n `hash` o




-- | A helper for implementing 'typeHash' by 'xor'-ing type parameters with a
-- new random hash value. E.g.:
--
-- @
--  instance (StableHashable a, StableHashable b) => StableHashable (a, b) where
--      typeHash = mixType (mixType (TypeHash 12071780118071628513))
--                   \         \                \__ a new random value for (,)
--                    \         \____ mix in the type hash for 'a'
--                     \__ mix in the type hash for 'b'
-- @
mixType :: forall a t. StableHashable a=> TypeHash t -> TypeHash (t a)
mixType (TypeHash t)= TypeHash (t `xor` (typeHashWord (typeHash :: TypeHash a)))


-- | A value that uniquely identifies a 'StableHashable' type. This serves to
-- both version a type with respect to its 'Hashable' instance, and distinguish
-- types from each other (similar to 'TypeRep') across program runs, platforms
-- and library versions.
newtype TypeHash (a :: k) = TypeHash { typeHashWord :: Word64 }
    deriving (Eq, Read, Show, Bits)

-- > typeHashOf _ = typeHash
typeHashOf :: StableHashable a=> a -> TypeHash a
typeHashOf _ = typeHash

-- > typeHashOfProxy _ = typeHash
typeHashOfProxy :: StableHashable a=> proxy a -> TypeHash a
typeHashOfProxy _ = typeHash

-- | Types whose hashes can be compared across platforms. This is somewhat like
-- a limited, but cross-platform 'Typeable'.
--
-- Instances are expected to be universally-unique, and should be generated
-- randomly. Type parameters can be hashed together using 'mixType', like:
--
-- > instance (StableHashable b) => StableHashable (A b) where
-- >     typeHash = mixType (TypeHash 530184177609460980)
--
-- When 'Hashable' instances change, the 'TypeHash' must be changed to a new
-- random value. This lets us \"version\" a set of hashes; if we store a
-- 'TypeHash' along with a set of hashes in program /A/, in program /B/ we can
-- compare the stored value with our own 'TypeHash' and verify that hashes we
-- generate in program /B/ can be meaningfully compared.
--
-- Note, obviously this doesn't ensure that values were hashed with the same
-- hashing algorithm, and you should come up with your own means to serialize
-- that information if you need to.
class Hashable a=> StableHashable a where
    typeHash :: TypeHash a

-- | The value here depends on whether we're on a 32 or 64-bit platform. See
-- also the instance documentation for 'Hashable'.
instance StableHashable Int where
#  if WORD_SIZE_IN_BITS == 32
    typeHash = TypeHash 14906715774445347101
#  else
    typeHash = TypeHash 13382803217769822997
#  endif
-- | The value here depends on whether we're on a 32 or 64-bit platform. See
-- also the instance documentation for 'Hashable'.
instance StableHashable Word where
#  if WORD_SIZE_IN_BITS == 32
    typeHash = TypeHash 10996918434311873249
#  else
    typeHash = TypeHash 943142231655442729
#  endif

instance StableHashable Integer where
    typeHash = TypeHash 96690694942656444

#ifdef MIN_VERSION_integer_gmp
# if MIN_VERSION_integer_gmp(1,0,0)
instance StableHashable BigNat where
    typeHash = TypeHash 2111364012200171327
# endif
#endif
#if MIN_VERSION_base(4,8,0)
instance StableHashable Natural where
    typeHash = TypeHash 11915819290390802320
instance StableHashable Void where
    typeHash = TypeHash 13639848524738715571
#endif

instance (Integral a, StableHashable a) => StableHashable (Ratio a) where
    typeHash = mixType (TypeHash 330184177609460989)
instance StableHashable Float where
    typeHash = TypeHash 4320606117681153984
instance StableHashable Double where
    typeHash = TypeHash 8849395475307970354
instance StableHashable Int8 where
    typeHash = TypeHash 17471749136236681265
instance StableHashable Int16 where
    typeHash = TypeHash 14440046210595456836
instance StableHashable Int32 where
    typeHash = TypeHash 13431688382274668720
instance StableHashable Int64 where
    typeHash = TypeHash 14806970576519879451
instance StableHashable Word8 where
    typeHash = TypeHash 6643259480050182665
instance StableHashable Word16 where
    typeHash = TypeHash 3765569608911963661
instance StableHashable Word32 where
    typeHash = TypeHash 6402686280864807547
instance StableHashable Word64 where
    typeHash = TypeHash 14652873008202722152
instance StableHashable B.ByteString where
    typeHash = TypeHash 171314019417081845
instance StableHashable BL.ByteString where
    typeHash = TypeHash 10099361054646539018
#if MIN_VERSION_bytestring(0,10,4)
instance StableHashable BSh.ShortByteString where
    typeHash = TypeHash 15680327781389053206
#endif
instance StableHashable T.Text where
    typeHash = TypeHash 14746544807555826150
instance StableHashable TL.Text where
    typeHash = TypeHash 10657741718622626930
instance StableHashable P.ByteArray where
    typeHash = TypeHash 9976019413528454024
instance StableHashable Char where
    typeHash = TypeHash 4949001641339870281
instance StableHashable Version where
    typeHash = TypeHash 702645064027678737
instance StableHashable Bool where
    typeHash = TypeHash 2172478990419421580
instance StableHashable Ordering where
    typeHash = TypeHash 9293112338546135338
instance StableHashable a => StableHashable [a] where
    typeHash = mixType (TypeHash 8959911929979074606)
instance StableHashable a => StableHashable (Maybe a) where
    typeHash = mixType (TypeHash 17404804613103585388)
instance (StableHashable a, StableHashable b) => StableHashable (Either a b) where
    typeHash = mixType (TypeHash 2275317158072284048)
instance StableHashable () where
    typeHash = TypeHash 6095166973227743591
instance (StableHashable a, StableHashable b) => StableHashable (a, b) where
    typeHash = mixType $ mixType (TypeHash 12071780118071628513)
instance (StableHashable a, StableHashable b, StableHashable c) => StableHashable (a, b, c) where
    typeHash = mixType $ mixType $ (TypeHash 4618299809208311661)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d) => StableHashable (a, b, c, d) where
    typeHash = mixType $mixType $mixType $   (TypeHash 2134528412514930125)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e) => StableHashable (a, b, c, d, e) where
    typeHash = mixType $mixType $mixType $mixType $    (TypeHash 6145113094462899758)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e, StableHashable f) => StableHashable (a, b, c, d, e, f) where
    typeHash = mixType $mixType $mixType $mixType $mixType $     (TypeHash 44771254230381456)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e, StableHashable f, StableHashable g) => StableHashable (a, b, c, d, e, f, g) where
    typeHash = mixType $mixType $mixType $mixType $mixType $mixType $      (TypeHash 9917360176431723073)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e, StableHashable f, StableHashable g, StableHashable h) => StableHashable (a, b, c, d, e, f, g, h) where
    typeHash = mixType $mixType $mixType $mixType $mixType $mixType $mixType $       (TypeHash 2303481052083416811)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e, StableHashable f, StableHashable g, StableHashable h, StableHashable i)=> StableHashable (a, b, c, d, e, f, g, h, i) where
    typeHash = mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $        (TypeHash 6307505215888440984)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e, StableHashable f, StableHashable g, StableHashable h, StableHashable i, StableHashable j)=> StableHashable (a, b, c, d, e, f, g, h, i, j) where
    typeHash = mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $         (TypeHash 16862409449834578942)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e, StableHashable f, StableHashable g, StableHashable h, StableHashable i, StableHashable j, StableHashable k)=> StableHashable (a, b, c, d, e, f, g, h, i, j, k) where
    typeHash = mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $          (TypeHash 12571504671032409264)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e, StableHashable f, StableHashable g, StableHashable h, StableHashable i, StableHashable j, StableHashable k, StableHashable l)=> StableHashable (a, b, c, d, e, f, g, h, i, j, k, l) where
    typeHash = mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $           (TypeHash 18057402240390888799)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e, StableHashable f, StableHashable g, StableHashable h, StableHashable i, StableHashable j, StableHashable k, StableHashable l, StableHashable m)=> StableHashable (a, b, c, d, e, f, g, h, i, j, k, l, m) where
    typeHash = mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $            (TypeHash 1509508551579382043)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e, StableHashable f, StableHashable g, StableHashable h, StableHashable i, StableHashable j, StableHashable k, StableHashable l, StableHashable m, StableHashable n)=> StableHashable (a, b, c, d, e, f, g, h, i, j, k, l, m, n) where
    typeHash = mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $             (TypeHash 17220521008040723494)
instance (StableHashable a, StableHashable b, StableHashable c, StableHashable d, StableHashable e, StableHashable f, StableHashable g, StableHashable h, StableHashable i, StableHashable j, StableHashable k, StableHashable l, StableHashable m, StableHashable n, StableHashable o)=> StableHashable (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) where
    typeHash = mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $mixType $               (TypeHash 17803228377227705691)



-- WISHLIST:
--   - :: Word64 -> (Word32,Word32)  for 32-bit machines.

-- TODO PERFORMANCE:
--   x try using peek, where: peekByteOff addr off = peek (addr `plusPtr` off)
--     (if it works add elsewhere)
--   - when we have branch pred test:
--     x try doing single conditional check at beginning to determine code path to take (whether we can skip hashRemainingBytes)
--     x can we do branchless be using a function pointer (or a haskell array of functions)?
--     - also make preferential use of mix16/mix32
--       - we can enumerate all possibilities with a case, matching all rem bytes 0-7.
--   X try moving unsafe calls closer into branches make things not in monad
--    X  (then test again with unsafeDupablePerformIO
--   - look again at core
--   - consider Text improvements
--   - try again making hash8ByteLoop a totally self-recursive function, or combine with hashRemainingBytes


-- This is about twice as fast as a loop with single byte peeks:
hashByteString :: (HashState h)=> h -> B.ByteString -> h
{-# INLINE hashByteString #-}
hashByteString h = \(B.PS fp off lenBytes) ->
  -- similar to: https://github.com/haskell/bytestring/blob/dd3c07d115840d13482426a0084a39201eb6b6d4/Data/ByteString/Unsafe.hs#L78
  -- 'hashable' also uses this.
  accursedUnutterablePerformIO $
      withForeignPtr fp $ \base ->
        let !bytesRem = lenBytes .&. 7  -- lenBytes `mod` 8
            !ixOutOfBounds = off+lenBytes
            -- index where we begin to read (bytesRem < 8) individual bytes:
            !bytesIx = ixOutOfBounds-bytesRem

            hash8ByteLoop !hAcc !ix 
                | ix == bytesIx = hashRemainingBytes hAcc bytesIx
                | otherwise     = assert (ix < bytesIx) $ do
                    -- TODO do we need to worry about alignment constraints for peek here?
                    -- TODO PERFORMANCE: especially if above is necessary:
                    --    try eliminating a branch advancing ix by 0 for remaining byts of a word, so we replicate last byte
                    w64Dirty <- peekByteOff base ix
                    let w64 = clean8ByteChunk w64Dirty
                    hash8ByteLoop (hAcc `mix64` w64) (ix + 8)

            hashRemainingBytes !hAcc !ix 
                | ix == ixOutOfBounds = return hAcc
                | otherwise = assert (ix < ixOutOfBounds) $ do
                    byt <- peekByteOff base ix
                    hashRemainingBytes (hAcc `mix8` byt) (ix+1)

         in hash8ByteLoop h off 



{- 
import Foreign.Ptr (castPtr, plusPtr)
import Foreign.Storable (peek)

-- This is about twice as fast as a loop with single byte peeks:
hashByteString :: (HashState h)=> h -> B.ByteString -> h
{-# INLINE hashByteString #-}
hashByteString h = \(B.PS fp off lenBytes) ->
  -- similar to: https://github.com/haskell/bytestring/blob/dd3c07d115840d13482426a0084a39201eb6b6d4/Data/ByteString/Unsafe.hs#L78
  accursedUnutterablePerformIO $
      withForeignPtr fp $ \base ->
        let !bytesRem = lenBytes .&. 7  -- lenBytes `mod` 8
            !ix0 = base `plusPtr` off
            !ixOutOfBounds = ix0 `plusPtr` lenBytes
            -- index where we begin to read (bytesRem < 8) individual bytes:
            !bytesIx = ixOutOfBounds `plusPtr` negate bytesRem

            hash8ByteLoop !hAcc !ix 
                | ix == bytesIx = hashRemainingBytes hAcc (castPtr bytesIx)
                | otherwise     = assert (ix < bytesIx) $ do
                    w64Dirty <- peek ix -- (base `plusPtr` ix)
                    let w64 = if littleEndian
                                then byteSwap64 w64Dirty
                                else w64Dirty

                    hash8ByteLoop (hAcc `mix64` w64) (ix `plusPtr` 8)

            hashRemainingBytes !hAcc !ix 
                | ix == ixOutOfBounds = return hAcc
                | otherwise = assert (ix < ixOutOfBounds) $ do
                    byt <- peek ix
                    hashRemainingBytes (hAcc `mix8` byt) (ix `plusPtr` 1)

         in hash8ByteLoop h ix0
-}
{- FAILED UNFOLDING EXPERIMENT #1
-- import GHC.Prim(tagToEnum#)

-- This is about twice as fast as a loop with single byte peeks:
hashByteString :: (HashState h)=> h -> B.ByteString -> h
{-# INLINE hashByteString #-}
hashByteString h = \(B.PS fp off lenBytes) ->
  -- similar to: https://github.com/haskell/bytestring/blob/dd3c07d115840d13482426a0084a39201eb6b6d4/Data/ByteString/Unsafe.hs#L78
  accursedUnutterablePerformIO $
    withForeignPtr fp $ \base ->
        let !bytesRem = lenBytes .&. 7  -- lenBytes `mod` 8
            !word64Chunks = lenBytes `unsafeShiftR` 3 -- `div` 8
            !ixOutOfBounds = off+lenBytes
            -- index where we begin to read (bytesRem < 8) individual bytes:
            !bytesIx = ixOutOfBounds-bytesRem

            hash8ByteLoop !hAcc !ix 
                | ix == bytesIx = return hAcc
             -- | ix == bytesIx = hashRemainingBytes hAcc bytesIx
                | otherwise     = assert (ix < bytesIx) $ do
                    w64Dirty <- peekByteOff base ix
                    let w64 = if littleEndian
                                then byteSwap64 w64Dirty
                                else w64Dirty

                    hash8ByteLoop (hAcc `mix64` w64) (ix + 8)

            hashRemainingBytes !hAcc !ix 
                | ix == ixOutOfBounds = return hAcc
                | otherwise = assert (ix < ixOutOfBounds) $ do
                    byt <- peekByteOff base ix
                    hashRemainingBytes (hAcc `mix8` byt) (ix+1)

    -- TODO - Inline,
    --      - try nested, non-tuple case
    --      - move accursed... into here
    --      - tagToEnum to get jump table
         in case tagToEnum word64Chunks of
            N0-> case tagToEnum bytesRem of
            -- small, no need for hash8ByteLoop
                  N1 -> mix8 h <$> peekByteOff base off
                  N2 -> mix16 h <$> peekByteOff base off
                  N3 -> do
                    hAcc <- mix16 h <$> peekByteOff base off
                    mix8 hAcc <$> peekByteOff base (off+2)
                  N4 -> mix32 h <$> peekByteOff base off
                  N5 -> do
                    hAcc <- mix32 h <$> peekByteOff base off
                    mix8 hAcc <$> peekByteOff base (off+4)
                  N6 -> do
                    hAcc <- mix32 h <$> peekByteOff base off
                    mix16 hAcc <$> peekByteOff base (off+4)
                  _ -> assert (bytesRem == 7) $ do
                    hAcc <- mix32 h <$> peekByteOff base off
                    hAcc' <- mix16 hAcc <$> peekByteOff base (off+4)
                    mix8 hAcc' <$> peekByteOff base (off+6)
              -- no need for hashRemainingBytes
            _ -> case bytesRem of
                   0 -> hash8ByteLoop h off
                   _ -> error "TODO"
              -- for some word64 chunks:
              -- (1, _) -> 
              -- (2, _) -> 
              -- (3, _) ->
              -- (4, _) ->
              -- (5, _) ->
              -- (6, _) ->
              -- (7, _) ->

tagToEnum :: Int -> N
{-# INLINE tagToEnum #-}
tagToEnum (I# i#) = tagToEnum# i# :: N
data N = N0 | N1 | N2 | N3 | N4 | N5 | N6 | N7 | N8
-}


-- TODO PERFORMANCE:
--   copy ByteString benchmark
--   try out an incorrect version with mix64
--     if faster try out mix64 with proper endian adjustments.

-- NOTE: we can't simply call hashByteArray here; Text is stored as
-- machine-endian UTF-16 (as promised by public Data.Text.Foreign), so we need
-- to read Word16 here in order to hash as Big-Endian UTF-16.
hashText :: (HashState h)=> h -> T.Text -> h
{-# INLINE hashText #-}
hashText h = \(T.Text (T.Array ba_) off lenWord16) -> 
    let ba = P.ByteArray ba_
        !word16sRem = lenWord16 .&. 3         -- lenWord16 `mod` 4
        -- index where we begin to read (word16sRem < 4) individual Word16s:
        !word16sIx = off+lenWord16-word16sRem
        !ixFinal = off+lenWord16-1

        hash4Word16sLoop !hAcc !ix 
            | ix == word16sIx = hashRemainingWord16s hAcc word16sIx
            | otherwise     = assert (ix < word16sIx) $
                -- CAREFUL: Word16s are stored in machine-endian, so we must
                -- read them out as Word16:
                let w0 = P.indexByteArray ba ix
                    w1 = P.indexByteArray ba (ix+1)
                    w2 = P.indexByteArray ba (ix+2)
                    w3 = P.indexByteArray ba (ix+3)
                 in hash4Word16sLoop (hAcc `mix16` w0 `mix16` w1 `mix16` w2 `mix16` w3) (ix + 4)
        
        hashRemainingWord16s !hAcc !ix 
            | ix > ixFinal  = hAcc 
            | otherwise     = assert (ix <= ixFinal) $
                let w0 = P.indexByteArray ba ix
                 in hashRemainingWord16s (hAcc `mix16` w0) (ix+1)
     in hash4Word16sLoop h off 

hashByteArray :: (HashState h)=> h -> Int -> P.ByteArray -> h
{-# INLINE hashByteArray #-}
hashByteArray h !lenBytes ba = 
    let !bytesRem = lenBytes .&. 7         -- lenBytes `mod` 8
        -- index where we begin to read (bytesRem < 8) individual bytes:
        !bytesIx = lenBytes-bytesRem
        !ixFinal = lenBytes-1
        -- bytesIx in elements of Word32:
        !bytesIxWd = bytesIx `unsafeShiftR` 3  -- `div` 8

        -- Index `ix` in terms of elements of Word32 or Word64, depending on
        -- WORD_SIZE_IN_BITS
        hash8ByteLoop !hAcc !ix 
            | ix == bytesIxWd = hashRemainingBytes hAcc bytesIx
            | otherwise     = assert (ix < bytesIxWd) $
                    let w64Dirty = P.indexByteArray ba ix
                        w64 = clean8ByteChunk w64Dirty
                     in hash8ByteLoop (hAcc `mix64` w64) (ix + 1)
        
        hashRemainingBytes !hAcc !ix 
            | ix > ixFinal  = hAcc 
            | otherwise     = assert (ix <= ixFinal) $
                let b0 = P.indexByteArray ba ix
                 in hashRemainingBytes (hAcc `mix8` b0) (ix+1)
     in hash8ByteLoop h 0 

-- Take the result of doing a read of a Word64 (e.g. by indexByteArray) and
-- convert it into the value of that memory location in big endian. This must
-- take into account endianness as well as understanding of how ghc/ghcjs
-- stores a Word64 on a 32-bit platform.
clean8ByteChunk :: Word64 -> Word64
{-# INLINE clean8ByteChunk #-}
clean8ByteChunk w64Dirty 
  | littleEndian = 
#    if WORD_SIZE_IN_BITS == 64
      byteSwap64 w64Dirty
#    else
      let (w32Dirty_0, w32Dirty_1) = words32 w64Dirty
          w64_0 = (fromIntegral $ byteSwap32 w32Dirty_0) `unsafeShiftL` 32
          w64_1 =  fromIntegral $ byteSwap32 w32Dirty_1
       in w64_0 .|. w64_1
#    endif
  | otherwise = w64Dirty
-- TODO test on both bytearray and bytestrings [1..8], and check on 


---------------- LIST INSTANCE SCRATCH WORK:
-- 
-- We need to look at how inlining progresses and figure out a way to have our
-- list instance be optimal. See scratch work below.



-- TODO more different benchmarks of different types of lists, and ways of
-- constructing, and examine which of these two to use (and when):
--   We might be able to NOINLINE hashLeftUnfolded version (if performance
--   unaffected), and then re-write to hashFoldl' version based on argument
--   TODO :
--     or use our own rules so that we can get both fusion and unrolling?
--     (or would that not be helpful, since values already in a register?)

-- 7.10
--   APPLIED TO (take 250 $ iterate (+1) (1::Word8))  339.4 ns  !! MATCHING BASELINE
--   APPLIED TO ([1.. 250 :: Word8])                  1.766 μs
-- 7.8
--   APPLIED TO (take 250 $ iterate (+1) (1::Word8))  8.938 μs  -- NOTE: in general, 7.8 seems to do poorly applying folds to this in the context of criterion benchmarks
--   APPLIED TO ([1.. 250 :: Word8])                  846.5 ns
hashFoldl' :: (Hashable a, HashState h)=> h -> [a] -> h
-- hashFoldl' :: Word32 -> [Word8] -> Word32  -- NOTE: tested above w/ this monomorphic sig
{-# INLINE hashFoldl' #-}
hashFoldl' = foldl' (\h' a-> h' `hash` a)

-- 7.10
--   APPLIED TO ([1.. 250 :: Word8])                  675.6 ns
-- 7.8
--   APPLIED TO ([1.. 250 :: Word8])                  729.6 ns
hashLeftUnfolded :: (Hashable a, HashState h)=> h -> [a] -> h
-- hashLeftUnfolded :: Word32 -> [Word8] -> Word32  -- NOTE: tested above w/ this monomorphic sig
{-# INLINE hashLeftUnfolded #-}
hashLeftUnfolded = go
    where go !h [] = h
          -- This seems to be sweet spot on my machine:
          go !h (a1:a2:a3:a4:a5:a6:as) = go (h `hash` a1 `hash` a2 `hash` a3 `hash` a4 `hash` a5 `hash` a6) as
          go !h (a1:as) = go (h `hash` a1) as