{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UnboxedSums #-}
{-# LANGUAGE UnboxedTuples #-}

-- | Implementation of static hash map data structure.
module Data.Bytes.HashMap
  ( Map
  , empty
  , lookup
  , fromList
  , fromTrustedList
  , fromListWith
  , elements

    -- * Used for testing
  , HashMapException (..)
  , distribution
  , distinctEntropies
  ) where

-- Implementation notes. This module uses a variant of the technique
-- described in http://stevehanov.ca/blog/?id=119 with the big difference
-- being that we do not throw away the keys. You can only throw away the
-- keys in very specific problem domains where you somehow control
-- everything that is going to be looked up.
--
-- General implementation thoughts. It would be really nice to figure
-- out how to parallelize hashing. We currently go one byte at a time.
-- Processing more bytes at a time would cut down on memory loads.
-- However, doing more than one byte at a time is tricky. When you
-- get to the end of a string, you end up having to do some extra
-- finagling to make sure you do not read past the end. I have tried
-- to do this in the past, and it is difficult to do it correctly.
--
-- Other thought: Using a random 64-bit word for each byte is pretty
-- heavy handed. 64-bit words give us 32-bit hashes, but in most cases,
-- we are not building maps that are that big. We really only need
-- 16-bit hashes most of the time (maps with less than 64K values).
-- Switching to 32-bit words would save space. Plus, if we did this,
-- we could also use SSE _mm_add_epi32 and _mm_add_epi32 to process
-- four bytes at a time.

import Prelude hiding (lookup)

import Control.Exception (Exception, throw)
import Control.Monad (when)
import Control.Monad.ST (ST, runST, stToIO)
import Control.Monad.Trans.Except (ExceptT (ExceptT), runExceptT)
import Data.Bytes.HashMap.Internal (Map (Map))
import Data.Bytes.Types (Bytes (Bytes))
import Data.Foldable (foldlM, for_)
import Data.Int (Int32)
import Data.Ord (Down (Down))
import Data.Primitive (ByteArray (..), PrimArray (..))
import Data.Primitive.SmallArray (SmallArray (..))
import Data.Primitive.Unlifted.Array (UnliftedArray, UnliftedArray_ (UnliftedArray))
import Data.Primitive.Unlifted.Array.Primops (UnliftedArray#)
import Data.STRef (STRef, newSTRef, readSTRef, writeSTRef)
import Foreign.Ptr (plusPtr)
import GHC.Exts (ByteArray#, Int (I#), Int#, Ptr, RealWorld, SmallArray#)
import GHC.IO (ioToST)
import GHC.Word (Word (W#), Word32, Word8)
import System.Entropy (CryptHandle, hGetEntropy)

import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Unsafe as ByteString
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Hash as Hash
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Primitive as PM
import qualified Data.Primitive.Unlifted.Array as PM
import qualified GHC.Exts as Exts

{- | Recover the elements of the hashmap. These are ordered
lexicographically by their corresponding keys. That is, this
function returns the same output regardless of the entropy used
to build the hashmap.
-}
elements :: Map v -> [v]
elements :: forall v. Map v -> [v]
elements (Map ByteArray
_ UnliftedArray ByteArray
_ PrimArray Int32
_ UnliftedArray ByteArray
keys SmallArray v
vals) = Map ByteArray v -> [v]
forall k a. Map k a -> [a]
Map.elems (Map ByteArray v -> Int -> Map ByteArray v
go Map ByteArray v
forall k a. Map k a
Map.empty (SmallArray v -> Int
forall a. SmallArray a -> Int
PM.sizeofSmallArray SmallArray v
vals Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))
 where
  go :: Map ByteArray v -> Int -> Map ByteArray v
go !Map ByteArray v
acc !Int
ix = case Int
ix of
    (-1) -> Map ByteArray v
acc
    Int
_ ->
      let !k :: ByteArray
k = UnliftedArray ByteArray -> Int -> ByteArray
forall a. PrimUnlifted a => UnliftedArray a -> Int -> a
PM.indexUnliftedArray UnliftedArray ByteArray
keys Int
ix
          !v :: v
v = SmallArray v -> Int -> v
forall a. SmallArray a -> Int -> a
PM.indexSmallArray SmallArray v
vals Int
ix
       in Map ByteArray v -> Int -> Map ByteArray v
go (ByteArray -> v -> Map ByteArray v -> Map ByteArray v
forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert ByteArray
k v
v Map ByteArray v
acc) (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)

{- | Build a static hash map. This may be used on input that comes
from an adversarial user. It always produces a perfect hash map.
-}
fromList :: CryptHandle -> [(Bytes, v)] -> IO (Map v)
fromList :: forall v. CryptHandle -> [(Bytes, v)] -> IO (Map v)
fromList CryptHandle
h = CryptHandle -> (v -> v -> v) -> [(Bytes, v)] -> IO (Map v)
forall v.
CryptHandle -> (v -> v -> v) -> [(Bytes, v)] -> IO (Map v)
fromListWith CryptHandle
h v -> v -> v
forall a b. a -> b -> a
const

{- | Build a map from keys that are known at compile time.
All keys must be 64 bytes or less. This uses a built-in source
of entropy and is entirely deterministic. An adversarial user
could feed this function keys that cause it to error out rather
than completing.
-}
fromTrustedList :: [(Bytes, v)] -> Map v
fromTrustedList :: forall v. [(Bytes, v)] -> Map v
fromTrustedList [(Bytes, v)]
xs = (forall s. ST s (Map v)) -> Map v
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Map v)) -> Map v)
-> (forall s. ST s (Map v)) -> Map v
forall a b. (a -> b) -> a -> b
$ do
  STRef s Int
ref <- Int -> ST s (STRef s Int)
forall a s. a -> ST s (STRef s a)
newSTRef Int
0
  STRef s Int
-> (STRef s Int -> Int -> ST s ByteArray)
-> (v -> v -> v)
-> [(Bytes, v)]
-> ST s (Map v)
forall a s v.
a
-> (a -> Int -> ST s ByteArray)
-> (v -> v -> v)
-> [(Bytes, v)]
-> ST s (Map v)
fromListWithGen STRef s Int
ref STRef s Int -> Int -> ST s ByteArray
forall s. STRef s Int -> Int -> ST s ByteArray
askForEntropyST v -> v -> v
forall a b. a -> b -> a
const [(Bytes, v)]
xs

-- | An empty @Map@.
empty :: Map v
empty :: forall v. Map v
empty = ByteArray
-> UnliftedArray ByteArray
-> PrimArray Int32
-> UnliftedArray ByteArray
-> SmallArray v
-> Map v
forall v.
ByteArray
-> UnliftedArray ByteArray
-> PrimArray Int32
-> UnliftedArray ByteArray
-> SmallArray v
-> Map v
Map ByteArray
forall a. Monoid a => a
mempty UnliftedArray_ ByteArray# ByteArray
UnliftedArray ByteArray
forall a. Monoid a => a
mempty PrimArray Int32
forall a. Monoid a => a
mempty UnliftedArray_ ByteArray# ByteArray
UnliftedArray ByteArray
forall a. Monoid a => a
mempty SmallArray v
forall a. Monoid a => a
mempty

-- | Returns the value associated with the key in the map.
lookup :: Bytes -> Map v -> Maybe v
{-# INLINE lookup #-}
lookup :: forall v. Bytes -> Map v -> Maybe v
lookup
  (Bytes (ByteArray ByteArray#
keyArr) (I# Int#
keyOff) (I# Int#
keyLen))
  (Map (ByteArray ByteArray#
entropyA) (UnliftedArray UnliftedArray# (Unlifted ByteArray)
entropies) (PrimArray ByteArray#
offsets) (UnliftedArray UnliftedArray# (Unlifted ByteArray)
keys) (SmallArray SmallArray# v
vals)) =
    case (# ByteArray#, Int#, Int# #)
-> (# ByteArray#, UnliftedArray# ByteArray#, ByteArray#,
      UnliftedArray# ByteArray#, SmallArray# v #)
-> (# (# #) | v #)
forall v.
(# ByteArray#, Int#, Int# #)
-> (# ByteArray#, UnliftedArray# ByteArray#, ByteArray#,
      UnliftedArray# ByteArray#, SmallArray# v #)
-> (# (# #) | v #)
lookup# (# ByteArray#
keyArr, Int#
keyOff, Int#
keyLen #) (# ByteArray#
entropyA, UnliftedArray# ByteArray#
UnliftedArray# (Unlifted ByteArray)
entropies, ByteArray#
offsets, UnliftedArray# ByteArray#
UnliftedArray# (Unlifted ByteArray)
keys, SmallArray# v
vals #) of
      (# (# #) | #) -> Maybe v
forall a. Maybe a
Nothing
      (# | v
v #) -> v -> Maybe v
forall a. a -> Maybe a
Just v
v

-- One compelling optimization done here is that we use sameByteArray
-- to check if the sources of entropy are pointer-wise equal. This is
-- a very inexpensive check, and it ends up being true close to 50%
-- of the time. If it is true, we can avoid hashing a second time.
-- which avoids reading from a place in memory that is essentially
-- random. One way to further improve the performance of this library
-- would be to try to get doubleton buckets to use entropyA by searching
-- for a suitable offset.
lookup# ::
  (# ByteArray#, Int#, Int# #) ->
  (# ByteArray#, UnliftedArray# ByteArray#, ByteArray#, UnliftedArray# ByteArray#, SmallArray# v #) ->
  (# (# #) | v #)
{-# NOINLINE lookup# #-}
lookup# :: forall v.
(# ByteArray#, Int#, Int# #)
-> (# ByteArray#, UnliftedArray# ByteArray#, ByteArray#,
      UnliftedArray# ByteArray#, SmallArray# v #)
-> (# (# #) | v #)
lookup# (# ByteArray#
keyArr#, Int#
keyOff#, Int#
keyLen# #) (# ByteArray#
entropyA#, UnliftedArray# ByteArray#
entropies#, ByteArray#
offsets#, UnliftedArray# ByteArray#
keys#, SmallArray# v
vals# #)
  | Int
sz Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = (# (# #) | #)
  | ByteArray -> Int
PM.sizeofByteArray ByteArray
entropyA Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
reqEntropy = (# (# #) | #)
  | Int
ixA <- Word -> Int
w2i (Word -> Word -> Word
unsafeRem (Word32 -> Word
upW32 (ByteArray -> Bytes -> Word32
Hash.bytes ByteArray
entropyA Bytes
key)) (Int -> Word
i2w Int
sz))
  , ByteArray
entropyB <- UnliftedArray ByteArray -> Int -> ByteArray
forall a. PrimUnlifted a => UnliftedArray a -> Int -> a
PM.indexUnliftedArray UnliftedArray_ ByteArray# ByteArray
UnliftedArray ByteArray
entropies Int
ixA
  , Int
offset <- forall a b. (Integral a, Num b) => a -> b
fromIntegral @Int32 @Int (PrimArray Int32 -> Int -> Int32
forall a. Prim a => PrimArray a -> Int -> a
PM.indexPrimArray PrimArray Int32
offsets Int
ixA) =
      case ByteArray -> ByteArray -> Int#
sameByteArray ByteArray
entropyA ByteArray
entropyB of
        Int#
1#
          | Int
ix <- Int
ixA
          , Int
offsetIx <- Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ix
          , Bytes -> ByteArray -> Bool
bytesEqualsByteArray Bytes
key (UnliftedArray ByteArray -> Int -> ByteArray
forall a. PrimUnlifted a => UnliftedArray a -> Int -> a
PM.indexUnliftedArray UnliftedArray_ ByteArray# ByteArray
UnliftedArray ByteArray
keys Int
offsetIx)
          , !(# v
v #) <- SmallArray v -> Int -> (# v #)
forall a. SmallArray a -> Int -> (# a #)
PM.indexSmallArray## SmallArray v
vals Int
offsetIx ->
              (# | v
v #)
          | Bool
otherwise -> (# (# #) | #)
        Int#
_
          | ByteArray -> Int
PM.sizeofByteArray ByteArray
entropyB Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
reqEntropy
          , Int
ix <- Word -> Int
w2i (Word -> Word -> Word
unsafeRem (Word32 -> Word
upW32 (ByteArray -> Bytes -> Word32
Hash.bytes ByteArray
entropyB Bytes
key)) (Int -> Word
i2w Int
sz))
          , Int
offsetIx <- Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ix
          , Bytes -> ByteArray -> Bool
bytesEqualsByteArray Bytes
key (UnliftedArray ByteArray -> Int -> ByteArray
forall a. PrimUnlifted a => UnliftedArray a -> Int -> a
PM.indexUnliftedArray UnliftedArray_ ByteArray# ByteArray
UnliftedArray ByteArray
keys Int
offsetIx)
          , !(# v
v #) <- SmallArray v -> Int -> (# v #)
forall a. SmallArray a -> Int -> (# a #)
PM.indexSmallArray## SmallArray v
vals Int
offsetIx ->
              (# | v
v #)
          | Bool
otherwise -> (# (# #) | #)
 where
  sz :: Int
sz = UnliftedArray ByteArray -> Int
forall e. UnliftedArray e -> Int
PM.sizeofUnliftedArray UnliftedArray_ ByteArray# ByteArray
UnliftedArray ByteArray
entropies
  reqEntropy :: Int
reqEntropy = Word -> Int
w2i (Word -> Word
requiredEntropy (Int -> Word
i2w (Bytes -> Int
Bytes.length Bytes
key)))
  key :: Bytes
key = ByteArray -> Int -> Int -> Bytes
Bytes (ByteArray# -> ByteArray
ByteArray ByteArray#
keyArr#) (Int# -> Int
I# Int#
keyOff#) (Int# -> Int
I# Int#
keyLen#)
  entropyA :: ByteArray
entropyA = ByteArray# -> ByteArray
ByteArray ByteArray#
entropyA#
  entropies :: UnliftedArray ByteArray
entropies = UnliftedArray# ByteArray# -> UnliftedArray_ ByteArray# ByteArray
forall (unlifted_a :: UnliftedType) a.
UnliftedArray# unlifted_a -> UnliftedArray_ unlifted_a a
UnliftedArray UnliftedArray# ByteArray#
entropies# :: UnliftedArray ByteArray
  keys :: UnliftedArray ByteArray
keys = UnliftedArray# ByteArray# -> UnliftedArray_ ByteArray# ByteArray
forall (unlifted_a :: UnliftedType) a.
UnliftedArray# unlifted_a -> UnliftedArray_ unlifted_a a
UnliftedArray UnliftedArray# ByteArray#
keys# :: UnliftedArray ByteArray
  vals :: SmallArray v
vals = SmallArray# v -> SmallArray v
forall a. SmallArray# a -> SmallArray a
SmallArray SmallArray# v
vals#
  offsets :: PrimArray Int32
offsets = ByteArray# -> PrimArray Int32
forall a. ByteArray# -> PrimArray a
PrimArray ByteArray#
offsets# :: PrimArray Int32

unsafeRem :: Word -> Word -> Word
unsafeRem :: Word -> Word -> Word
unsafeRem (W# Word#
a) (W# Word#
b) = Word# -> Word
W# (Word# -> Word# -> Word#
Exts.remWord# Word#
a Word#
b)

fromListWithGen ::
  forall a s v.
  -- | Source of randomness
  a ->
  (a -> Int -> ST s ByteArray) ->
  (v -> v -> v) ->
  [(Bytes, v)] ->
  ST s (Map v)
fromListWithGen :: forall a s v.
a
-> (a -> Int -> ST s ByteArray)
-> (v -> v -> v)
-> [(Bytes, v)]
-> ST s (Map v)
fromListWithGen a
h a -> Int -> ST s ByteArray
ask v -> v -> v
combine [(Bytes, v)]
xs
  | Int
count Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Map v -> ST s (Map v)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ByteArray
-> UnliftedArray ByteArray
-> PrimArray Int32
-> UnliftedArray ByteArray
-> SmallArray v
-> Map v
forall v.
ByteArray
-> UnliftedArray ByteArray
-> PrimArray Int32
-> UnliftedArray ByteArray
-> SmallArray v
-> Map v
Map ByteArray
forall a. Monoid a => a
mempty UnliftedArray_ ByteArray# ByteArray
UnliftedArray ByteArray
forall a. Monoid a => a
mempty PrimArray Int32
forall a. Monoid a => a
mempty UnliftedArray_ ByteArray# ByteArray
UnliftedArray ByteArray
forall a. Monoid a => a
mempty SmallArray v
forall a. Monoid a => a
mempty)
  | Bool
otherwise = do
      let maxLen' :: Int
maxLen' =
            Word -> Int
w2i (Word -> Int) -> Word -> Int
forall a b. (a -> b) -> a -> b
$
              Word -> Word
requiredEntropy (Word -> Word) -> Word -> Word
forall a b. (a -> b) -> a -> b
$
                Int -> Word
i2w (Int -> Word) -> Int -> Word
forall a b. (a -> b) -> a -> b
$
                  (Int -> (Bytes, v) -> Int) -> Int -> [(Bytes, v)] -> Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl' (\Int
acc (Bytes
b, v
_) -> Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Bytes -> Int
Bytes.length Bytes
b) Int
acc) Int
0 [(Bytes, v)]
xs'
          allowedCollisions :: Int
allowedCollisions = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
ceiling (Double -> Double
forall a. Floating a => a -> a
sqrt (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Int @Double (Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1))) :: Int
      ByteArray
entropyA <- a
-> (a -> Int -> ST s ByteArray)
-> Int
-> Int
-> Int
-> [(Bytes, v)]
-> ST s ByteArray
forall s a v.
a
-> (a -> Int -> ST s ByteArray)
-> Int
-> Int
-> Int
-> [(Bytes, v)]
-> ST s ByteArray
findInitialEntropy a
h a -> Int -> ST s ByteArray
ask Int
maxLen' Int
count Int
allowedCollisions [(Bytes, v)]
xs'
      let groups :: [[(Word, (Bytes, v))]]
          groups :: [[(Word, (Bytes, v))]]
groups =
            ([(Word, (Bytes, v))] -> Down Int)
-> [[(Word, (Bytes, v))]] -> [[(Word, (Bytes, v))]]
forall b a. Ord b => (a -> b) -> [a] -> [a]
List.sortOn
              (Int -> Down Int
forall a. a -> Down a
Down (Int -> Down Int)
-> ([(Word, (Bytes, v))] -> Int)
-> [(Word, (Bytes, v))]
-> Down Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Int
List.length @[])
              ( ((Word, (Bytes, v)) -> (Word, (Bytes, v)) -> Bool)
-> [(Word, (Bytes, v))] -> [[(Word, (Bytes, v))]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
List.groupBy
                  (\(Word
x, (Bytes, v)
_) (Word
y, (Bytes, v)
_) -> Word
x Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
y)
                  ( ((Word, (Bytes, v)) -> Word)
-> [(Word, (Bytes, v))] -> [(Word, (Bytes, v))]
forall b a. Ord b => (a -> b) -> [a] -> [a]
List.sortOn
                      (Word, (Bytes, v)) -> Word
forall a b. (a, b) -> a
fst
                      ( ((Bytes, v) -> (Word, (Bytes, v)))
-> [(Bytes, v)] -> [(Word, (Bytes, v))]
forall a b. (a -> b) -> [a] -> [b]
List.map
                          (\(Bytes
b, v
v) -> (Word -> Word -> Word
forall a. Integral a => a -> a -> a
rem (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word32 @Word (ByteArray -> Bytes -> Word32
Hash.bytes ByteArray
entropyA Bytes
b)) (Int -> Word
i2w Int
count), (Bytes
b, v
v)))
                          [(Bytes, v)]
xs'
                      )
                  )
              )
      SmallMutableArray s Bool
used <- Int -> Bool -> ST s (SmallMutableArray (PrimState (ST s)) Bool)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> a -> m (SmallMutableArray (PrimState m) a)
PM.newSmallArray Int
count Bool
False
      MutableUnliftedArray_ ByteArray# s ByteArray
keys <- Int
-> ByteArray
-> ST s (MutableUnliftedArray (PrimState (ST s)) ByteArray)
forall (m :: * -> *) a.
(PrimMonad m, PrimUnlifted a) =>
Int -> a -> m (MutableUnliftedArray (PrimState m) a)
PM.newUnliftedArray Int
count (ByteArray
forall a. Monoid a => a
mempty :: ByteArray)
      SmallMutableArray s v
values <- Int -> v -> ST s (SmallMutableArray (PrimState (ST s)) v)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> a -> m (SmallMutableArray (PrimState m) a)
PM.newSmallArray Int
count (forall a. a
errorThunk @v)
      MutableUnliftedArray_ ByteArray# s ByteArray
entropies <- Int
-> ByteArray
-> ST s (MutableUnliftedArray (PrimState (ST s)) ByteArray)
forall (m :: * -> *) a.
(PrimMonad m, PrimUnlifted a) =>
Int -> a -> m (MutableUnliftedArray (PrimState m) a)
PM.newUnliftedArray Int
count (ByteArray
forall a. Monoid a => a
mempty :: ByteArray)
      MutablePrimArray s Int32
offsets <- Int -> ST s (MutablePrimArray (PrimState (ST s)) Int32)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
Int -> m (MutablePrimArray (PrimState m) a)
PM.newPrimArray Int
count
      MutablePrimArray (PrimState (ST s)) Int32
-> Int -> Int -> Int32 -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutablePrimArray (PrimState m) a -> Int -> Int -> a -> m ()
PM.setPrimArray MutablePrimArray s Int32
MutablePrimArray (PrimState (ST s)) Int32
offsets Int
0 Int
count (Int32
0 :: Int32)
      let {-# SCC goB #-}
          goB :: [ByteArray] -> [[(Word, (Bytes, v))]] -> ST s ()
          goB :: [ByteArray] -> [[(Word, (Bytes, v))]] -> ST s ()
goB ![ByteArray]
_ [] = () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
          goB ![ByteArray]
cache ([(Word, (Bytes, v))]
x : [[(Word, (Bytes, v))]]
ps) = case [(Word, (Bytes, v))]
x of
            [(Word
w, (Bytes
key, v
val))] -> do
              -- Space optimization for singleton buckets. If only one key
              -- hashed to a bucket, we just use entropyA as the entropy
              -- since it is guaranteed to be big enough. Then we use the
              -- offset field to correct the hash. This avoid creating any
              -- additional entropy byte arrays for singleton buckets.
              -- Technically, it should be possible to do this for some
              -- of the doubletons as well. It is just a little more
              -- difficult.
              let ix :: Int
ix = Word -> Int
w2i ([(Word, (Bytes, v))] -> Word
forall a b. [(a, b)] -> a
unsafeHeadFst [(Word, (Bytes, v))]
x)
              Int
j <- SmallMutableArray s Bool -> ST s Int
forall s. SmallMutableArray s Bool -> ST s Int
findUnused SmallMutableArray s Bool
used
              MutableUnliftedArray (PrimState (ST s)) ByteArray
-> Int -> ByteArray -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, PrimUnlifted a) =>
MutableUnliftedArray (PrimState m) a -> Int -> a -> m ()
PM.writeUnliftedArray MutableUnliftedArray_ ByteArray# s ByteArray
MutableUnliftedArray (PrimState (ST s)) ByteArray
entropies Int
ix ByteArray
entropyA
              MutablePrimArray (PrimState (ST s)) Int32
-> Int -> Int32 -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutablePrimArray (PrimState m) a -> Int -> a -> m ()
PM.writePrimArray MutablePrimArray s Int32
MutablePrimArray (PrimState (ST s)) Int32
offsets Int
ix (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Int @Int32 (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
w))
              SmallMutableArray (PrimState (ST s)) Bool -> Int -> Bool -> ST s ()
forall (m :: * -> *) a.
PrimMonad m =>
SmallMutableArray (PrimState m) a -> Int -> a -> m ()
PM.writeSmallArray SmallMutableArray s Bool
SmallMutableArray (PrimState (ST s)) Bool
used Int
j Bool
True
              MutableUnliftedArray (PrimState (ST s)) ByteArray
-> Int -> ByteArray -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, PrimUnlifted a) =>
MutableUnliftedArray (PrimState m) a -> Int -> a -> m ()
PM.writeUnliftedArray MutableUnliftedArray_ ByteArray# s ByteArray
MutableUnliftedArray (PrimState (ST s)) ByteArray
keys Int
j (Bytes -> ByteArray
Bytes.toByteArray Bytes
key)
              SmallMutableArray (PrimState (ST s)) v -> Int -> v -> ST s ()
forall (m :: * -> *) a.
PrimMonad m =>
SmallMutableArray (PrimState m) a -> Int -> a -> m ()
PM.writeSmallArray SmallMutableArray s v
SmallMutableArray (PrimState (ST s)) v
values Int
j v
val
              [ByteArray] -> [[(Word, (Bytes, v))]] -> ST s ()
goB [ByteArray]
cache [[(Word, (Bytes, v))]]
ps
            [(Word, (Bytes, v))]
_ -> do
              let ix :: Int
ix = Word -> Int
w2i ([(Word, (Bytes, v))] -> Word
forall a b. [(a, b)] -> a
unsafeHeadFst [(Word, (Bytes, v))]
x)
                  keyVals :: [(Bytes, v)]
keyVals = ((Word, (Bytes, v)) -> (Bytes, v))
-> [(Word, (Bytes, v))] -> [(Bytes, v)]
forall a b. (a -> b) -> [a] -> [b]
map (Word, (Bytes, v)) -> (Bytes, v)
forall a b. (a, b) -> b
snd [(Word, (Bytes, v))]
x
                  !maxGroupLen :: Int
maxGroupLen = (Int -> (Bytes, v) -> Int) -> Int -> [(Bytes, v)] -> Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl' (\Int
acc (Bytes
b, v
_) -> Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Bytes -> Int
Bytes.length Bytes
b) Int
acc) Int
0 [(Bytes, v)]
keyVals
                  reqEntrSz :: Int
reqEntrSz = Word -> Int
w2i (Word -> Word
requiredEntropy (Int -> Word
i2w Int
maxGroupLen))
              -- As a space optimization, we try out all options from the cache.
              -- If we can reuse random bytes that were used for a different key,
              -- we can save a lot of space. Reuse is frequently possible.
              Either () ()
e <- ExceptT () (ST s) () -> ST s (Either () ())
forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT (ExceptT () (ST s) () -> ST s (Either () ()))
-> ExceptT () (ST s) () -> ST s (Either () ())
forall a b. (a -> b) -> a -> b
$ [ByteArray]
-> (ByteArray -> ExceptT () (ST s) ()) -> ExceptT () (ST s) ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ (ByteArray
entropyA ByteArray -> [ByteArray] -> [ByteArray]
forall a. a -> [a] -> [a]
: [ByteArray]
cache) ((ByteArray -> ExceptT () (ST s) ()) -> ExceptT () (ST s) ())
-> (ByteArray -> ExceptT () (ST s) ()) -> ExceptT () (ST s) ()
forall a b. (a -> b) -> a -> b
$ \ByteArray
entropy ->
                if ByteArray -> Int
PM.sizeofByteArray ByteArray
entropy Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
reqEntrSz
                  then
                    ST s (Either () ()) -> ExceptT () (ST s) ()
forall e (m :: * -> *) a. m (Either e a) -> ExceptT e m a
ExceptT (ST s (Either () ()) -> ExceptT () (ST s) ())
-> ST s (Either () ()) -> ExceptT () (ST s) ()
forall a b. (a -> b) -> a -> b
$
                      ByteArray -> Int -> [(Bytes, v)] -> ST s Bool
attempt ByteArray
entropy Int
ix [(Bytes, v)]
keyVals ST s Bool -> (Bool -> ST s (Either () ())) -> ST s (Either () ())
forall a b. ST s a -> (a -> ST s b) -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
                        Bool
True -> Either () () -> ST s (Either () ())
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (() -> Either () ()
forall a b. a -> Either a b
Left ())
                        Bool
False -> Either () () -> ST s (Either () ())
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (() -> Either () ()
forall a b. b -> Either a b
Right ())
                  else ST s (Either () ()) -> ExceptT () (ST s) ()
forall e (m :: * -> *) a. m (Either e a) -> ExceptT e m a
ExceptT (Either () () -> ST s (Either () ())
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (() -> Either () ()
forall a b. b -> Either a b
Right ()))
              case Either () ()
e of
                Left () -> [ByteArray] -> [[(Word, (Bytes, v))]] -> ST s ()
goB [ByteArray]
cache [[(Word, (Bytes, v))]]
ps
                Right () -> do
                  [ByteArray]
-> Int
-> Int
-> [(Bytes, v)]
-> Int
-> [[(Word, (Bytes, v))]]
-> ST s ()
goD [ByteArray]
cache Int
100000 Int
ix [(Bytes, v)]
keyVals Int
reqEntrSz [[(Word, (Bytes, v))]]
ps
          updateSlots :: ByteArray -> Int -> [(Bytes, v)] -> ST s ()
          updateSlots :: ByteArray -> Int -> [(Bytes, v)] -> ST s ()
updateSlots !ByteArray
entropy !Int
ix [(Bytes, v)]
keyVals = do
            MutableUnliftedArray (PrimState (ST s)) ByteArray
-> Int -> ByteArray -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, PrimUnlifted a) =>
MutableUnliftedArray (PrimState m) a -> Int -> a -> m ()
PM.writeUnliftedArray MutableUnliftedArray_ ByteArray# s ByteArray
MutableUnliftedArray (PrimState (ST s)) ByteArray
entropies Int
ix ByteArray
entropy
            [(Bytes, v)] -> ((Bytes, v) -> ST s ()) -> ST s ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [(Bytes, v)]
keyVals (((Bytes, v) -> ST s ()) -> ST s ())
-> ((Bytes, v) -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \(Bytes
key, v
val) -> do
              let j :: Int
j = forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word @Int (Word -> Word -> Word
forall a. Integral a => a -> a -> a
rem (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word32 @Word (ByteArray -> Bytes -> Word32
Hash.bytes ByteArray
entropy Bytes
key)) (Int -> Word
i2w Int
count))
              SmallMutableArray (PrimState (ST s)) Bool -> Int -> Bool -> ST s ()
forall (m :: * -> *) a.
PrimMonad m =>
SmallMutableArray (PrimState m) a -> Int -> a -> m ()
PM.writeSmallArray SmallMutableArray s Bool
SmallMutableArray (PrimState (ST s)) Bool
used Int
j Bool
True
              MutableUnliftedArray (PrimState (ST s)) ByteArray
-> Int -> ByteArray -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, PrimUnlifted a) =>
MutableUnliftedArray (PrimState m) a -> Int -> a -> m ()
PM.writeUnliftedArray MutableUnliftedArray_ ByteArray# s ByteArray
MutableUnliftedArray (PrimState (ST s)) ByteArray
keys Int
j (Bytes -> ByteArray
Bytes.toByteArray Bytes
key)
              SmallMutableArray (PrimState (ST s)) v -> Int -> v -> ST s ()
forall (m :: * -> *) a.
PrimMonad m =>
SmallMutableArray (PrimState m) a -> Int -> a -> m ()
PM.writeSmallArray SmallMutableArray s v
SmallMutableArray (PrimState (ST s)) v
values Int
j v
val
          attempt :: ByteArray -> Int -> [(Bytes, v)] -> ST s Bool
          attempt :: ByteArray -> Int -> [(Bytes, v)] -> ST s Bool
attempt !ByteArray
entropy !Int
ix [(Bytes, v)]
keyVals = do
            SmallMutableArray s Bool
tmpUsed <- SmallMutableArray (PrimState (ST s)) Bool
-> Int -> Int -> ST s (SmallMutableArray (PrimState (ST s)) Bool)
forall (m :: * -> *) a.
PrimMonad m =>
SmallMutableArray (PrimState m) a
-> Int -> Int -> m (SmallMutableArray (PrimState m) a)
PM.cloneSmallMutableArray SmallMutableArray s Bool
SmallMutableArray (PrimState (ST s)) Bool
used Int
0 Int
count
            Bool
allGood <-
              (Bool -> (Bytes, v) -> ST s Bool)
-> Bool -> [(Bytes, v)] -> ST s Bool
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldlM
                ( \Bool
good (Bytes
key, v
_) ->
                    if Bool
good
                      then do
                        let j :: Int
j = forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word @Int (Word -> Word -> Word
forall a. Integral a => a -> a -> a
rem (Word32 -> Word
upW32 (ByteArray -> Bytes -> Word32
Hash.bytes ByteArray
entropy Bytes
key)) (Int -> Word
i2w Int
count))
                        SmallMutableArray (PrimState (ST s)) Bool -> Int -> ST s Bool
forall (m :: * -> *) a.
PrimMonad m =>
SmallMutableArray (PrimState m) a -> Int -> m a
PM.readSmallArray SmallMutableArray s Bool
SmallMutableArray (PrimState (ST s)) Bool
tmpUsed Int
j ST s Bool -> (Bool -> ST s Bool) -> ST s Bool
forall a b. ST s a -> (a -> ST s b) -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
                          Bool
True -> Bool -> ST s Bool
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
                          Bool
False -> do
                            SmallMutableArray (PrimState (ST s)) Bool -> Int -> Bool -> ST s ()
forall (m :: * -> *) a.
PrimMonad m =>
SmallMutableArray (PrimState m) a -> Int -> a -> m ()
PM.writeSmallArray SmallMutableArray s Bool
SmallMutableArray (PrimState (ST s)) Bool
tmpUsed Int
j Bool
True
                            Bool -> ST s Bool
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
                      else Bool -> ST s Bool
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
                )
                Bool
True
                [(Bytes, v)]
keyVals
            if Bool
allGood
              then do
                ByteArray -> Int -> [(Bytes, v)] -> ST s ()
updateSlots ByteArray
entropy Int
ix [(Bytes, v)]
keyVals
                Bool -> ST s Bool
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
              else Bool -> ST s Bool
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
          {-# SCC goD #-}
          goD :: [ByteArray] -> Int -> Int -> [(Bytes, v)] -> Int -> [[(Word, (Bytes, v))]] -> ST s ()
          goD :: [ByteArray]
-> Int
-> Int
-> [(Bytes, v)]
-> Int
-> [[(Word, (Bytes, v))]]
-> ST s ()
goD ![ByteArray]
cache !Int
counter !Int
ix [(Bytes, v)]
keyVals !Int
entropySz [[(Word, (Bytes, v))]]
zs = do
            ByteArray
entropy <- a -> Int -> ST s ByteArray
ask a
h Int
entropySz
            ByteArray -> Int -> [(Bytes, v)] -> ST s Bool
attempt ByteArray
entropy Int
ix [(Bytes, v)]
keyVals ST s Bool -> (Bool -> ST s ()) -> ST s ()
forall a b. ST s a -> (a -> ST s b) -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
              Bool
True -> [ByteArray] -> [[(Word, (Bytes, v))]] -> ST s ()
goB (ByteArray
entropy ByteArray -> [ByteArray] -> [ByteArray]
forall a. a -> [a] -> [a]
: [ByteArray]
cache) [[(Word, (Bytes, v))]]
zs
              Bool
False -> case Int
counter of
                Int
0 ->
                  HashMapException -> ST s ()
forall a e. Exception e => e -> a
throw (HashMapException -> ST s ()) -> HashMapException -> ST s ()
forall a b. (a -> b) -> a -> b
$
                    Int -> [Bytes] -> [[(Word, Bytes)]] -> HashMapException
HashMapException
                      Int
count
                      (((Bytes, v) -> Bytes) -> [(Bytes, v)] -> [Bytes]
forall a b. (a -> b) -> [a] -> [b]
map (Bytes, v) -> Bytes
forall a b. (a, b) -> a
fst [(Bytes, v)]
keyVals)
                      ((([(Word, (Bytes, v))] -> [(Word, Bytes)])
-> [[(Word, (Bytes, v))]] -> [[(Word, Bytes)]]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (([(Word, (Bytes, v))] -> [(Word, Bytes)])
 -> [[(Word, (Bytes, v))]] -> [[(Word, Bytes)]])
-> (((Bytes, v) -> Bytes)
    -> [(Word, (Bytes, v))] -> [(Word, Bytes)])
-> ((Bytes, v) -> Bytes)
-> [[(Word, (Bytes, v))]]
-> [[(Word, Bytes)]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Word, (Bytes, v)) -> (Word, Bytes))
-> [(Word, (Bytes, v))] -> [(Word, Bytes)]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (((Word, (Bytes, v)) -> (Word, Bytes))
 -> [(Word, (Bytes, v))] -> [(Word, Bytes)])
-> (((Bytes, v) -> Bytes) -> (Word, (Bytes, v)) -> (Word, Bytes))
-> ((Bytes, v) -> Bytes)
-> [(Word, (Bytes, v))]
-> [(Word, Bytes)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Bytes, v) -> Bytes) -> (Word, (Bytes, v)) -> (Word, Bytes)
forall a b. (a -> b) -> (Word, a) -> (Word, b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap) (Bytes, v) -> Bytes
forall a b. (a, b) -> a
fst [[(Word, (Bytes, v))]]
groups)
                Int
_ -> [ByteArray]
-> Int
-> Int
-> [(Bytes, v)]
-> Int
-> [[(Word, (Bytes, v))]]
-> ST s ()
goD [ByteArray]
cache (Int
counter Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int
ix [(Bytes, v)]
keyVals Int
entropySz [[(Word, (Bytes, v))]]
zs
      -- Notice that we do not start out with entropyA. We manually cons that
      -- onto the top every time, so that if it can get reused, it does. We
      -- would rather it get reused than anything else since there is an
      -- optimization in the lookup function that avoids computing the hash
      -- twice if this entropy gets used.
      [ByteArray] -> [[(Word, (Bytes, v))]] -> ST s ()
goB [] [[(Word, (Bytes, v))]]
groups
      SmallArray v
vals' <- SmallMutableArray (PrimState (ST s)) v -> ST s (SmallArray v)
forall (m :: * -> *) a.
PrimMonad m =>
SmallMutableArray (PrimState m) a -> m (SmallArray a)
PM.unsafeFreezeSmallArray SmallMutableArray s v
SmallMutableArray (PrimState (ST s)) v
values
      UnliftedArray ByteArray
keys' <- MutableUnliftedArray (PrimState (ST s)) ByteArray
-> ST s (UnliftedArray ByteArray)
forall (m :: * -> *) a.
PrimMonad m =>
MutableUnliftedArray (PrimState m) a -> m (UnliftedArray a)
PM.unsafeFreezeUnliftedArray MutableUnliftedArray_ ByteArray# s ByteArray
MutableUnliftedArray (PrimState (ST s)) ByteArray
keys
      UnliftedArray ByteArray
entropies' <- MutableUnliftedArray (PrimState (ST s)) ByteArray
-> ST s (UnliftedArray ByteArray)
forall (m :: * -> *) a.
PrimMonad m =>
MutableUnliftedArray (PrimState m) a -> m (UnliftedArray a)
PM.unsafeFreezeUnliftedArray MutableUnliftedArray_ ByteArray# s ByteArray
MutableUnliftedArray (PrimState (ST s)) ByteArray
entropies
      PrimArray Int32
offsets' <- MutablePrimArray (PrimState (ST s)) Int32 -> ST s (PrimArray Int32)
forall (m :: * -> *) a.
PrimMonad m =>
MutablePrimArray (PrimState m) a -> m (PrimArray a)
PM.unsafeFreezePrimArray MutablePrimArray s Int32
MutablePrimArray (PrimState (ST s)) Int32
offsets
      Map v -> ST s (Map v)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ByteArray
-> UnliftedArray ByteArray
-> PrimArray Int32
-> UnliftedArray ByteArray
-> SmallArray v
-> Map v
forall v.
ByteArray
-> UnliftedArray ByteArray
-> PrimArray Int32
-> UnliftedArray ByteArray
-> SmallArray v
-> Map v
Map ByteArray
entropyA UnliftedArray ByteArray
entropies' PrimArray Int32
offsets' UnliftedArray ByteArray
keys' SmallArray v
vals')
 where
  -- Combine duplicates upfront.
  xs' :: [(Bytes, v)]
  xs' :: [(Bytes, v)]
xs' =
    ([(Bytes, v)] -> (Bytes, v)) -> [[(Bytes, v)]] -> [(Bytes, v)]
forall a b. (a -> b) -> [a] -> [b]
map
      ( \[(Bytes, v)]
rs ->
          ( [(Bytes, v)] -> Bytes
forall a b. [(a, b)] -> a
unsafeHeadFst [(Bytes, v)]
rs
          , (v -> v -> v) -> [v] -> v
forall a. HasCallStack => (a -> a -> a) -> [a] -> a
List.foldl1' v -> v -> v
combine (((Bytes, v) -> v) -> [(Bytes, v)] -> [v]
forall a b. (a -> b) -> [a] -> [b]
map (Bytes, v) -> v
forall a b. (a, b) -> b
snd [(Bytes, v)]
rs)
          )
      )
      (((Bytes, v) -> (Bytes, v) -> Bool)
-> [(Bytes, v)] -> [[(Bytes, v)]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
List.groupBy (\(Bytes
x, v
_) (Bytes
y, v
_) -> Bytes
x Bytes -> Bytes -> Bool
forall a. Eq a => a -> a -> Bool
== Bytes
y) (((Bytes, v) -> Bytes) -> [(Bytes, v)] -> [(Bytes, v)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
List.sortOn (Bytes, v) -> Bytes
forall a b. (a, b) -> a
fst [(Bytes, v)]
xs))
  count :: Int
count = forall (t :: * -> *) a. Foldable t => t a -> Int
List.length @[] [(Bytes, v)]
xs' :: Int

findUnused :: PM.SmallMutableArray s Bool -> ST s Int
findUnused :: forall s. SmallMutableArray s Bool -> ST s Int
findUnused SmallMutableArray s Bool
xs = SmallMutableArray (PrimState (ST s)) Bool -> ST s Int
forall (m :: * -> *) a.
PrimMonad m =>
SmallMutableArray (PrimState m) a -> m Int
PM.getSizeofSmallMutableArray SmallMutableArray s Bool
SmallMutableArray (PrimState (ST s)) Bool
xs ST s Int -> (Int -> ST s Int) -> ST s Int
forall a b. ST s a -> (a -> ST s b) -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> Int -> ST s Int
forall {m :: * -> *}.
(PrimState m ~ s, PrimMonad m) =>
Int -> Int -> m Int
go Int
0
 where
  go :: Int -> Int -> m Int
go !Int
ix !Int
len =
    if Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
len
      then do
        SmallMutableArray (PrimState m) Bool -> Int -> m Bool
forall (m :: * -> *) a.
PrimMonad m =>
SmallMutableArray (PrimState m) a -> Int -> m a
PM.readSmallArray SmallMutableArray s Bool
SmallMutableArray (PrimState m) Bool
xs Int
ix m Bool -> (Bool -> m Int) -> m Int
forall a b. m a -> (a -> m b) -> m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
          Bool
True -> Int -> Int -> m Int
go (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
len
          Bool
False -> Int -> m Int
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
ix
      else [Char] -> m Int
forall a. HasCallStack => [Char] -> a
error [Char]
"findUnused: could not find unused slot"

fromListWith ::
  forall v.
  -- | Source of randomness
  CryptHandle ->
  (v -> v -> v) ->
  [(Bytes, v)] ->
  IO (Map v)
fromListWith :: forall v.
CryptHandle -> (v -> v -> v) -> [(Bytes, v)] -> IO (Map v)
fromListWith CryptHandle
h v -> v -> v
combine [(Bytes, v)]
xs =
  ST RealWorld (Map v) -> IO (Map v)
forall a. ST RealWorld a -> IO a
stToIO
    (CryptHandle
-> (CryptHandle -> Int -> ST RealWorld ByteArray)
-> (v -> v -> v)
-> [(Bytes, v)]
-> ST RealWorld (Map v)
forall a s v.
a
-> (a -> Int -> ST s ByteArray)
-> (v -> v -> v)
-> [(Bytes, v)]
-> ST s (Map v)
fromListWithGen CryptHandle
h CryptHandle -> Int -> ST RealWorld ByteArray
askForEntropy v -> v -> v
combine [(Bytes, v)]
xs)

findInitialEntropy ::
  forall s a v.
  a ->
  (a -> Int -> ST s ByteArray) ->
  Int ->
  Int ->
  Int ->
  [(Bytes, v)] ->
  ST s ByteArray
{-# SCC findInitialEntropy #-}
findInitialEntropy :: forall s a v.
a
-> (a -> Int -> ST s ByteArray)
-> Int
-> Int
-> Int
-> [(Bytes, v)]
-> ST s ByteArray
findInitialEntropy !a
h a -> Int -> ST s ByteArray
ask !Int
maxLen' !Int
count !Int
allowedCollisions [(Bytes, v)]
xs = Int -> ST s ByteArray
go Int
40
 where
  go :: Int -> ST s ByteArray
  go :: Int -> ST s ByteArray
go !Int
ix = do
    ByteArray
entropy <- a -> Int -> ST s ByteArray
ask a
h Int
maxLen'
    let maxCollisions :: Int
maxCollisions =
          (Int -> [Word] -> Int) -> Int -> [[Word]] -> Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl'
            (\Int
acc [Word]
zs -> Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
acc (forall (t :: * -> *) a. Foldable t => t a -> Int
List.length @[] [Word]
zs))
            Int
0
            ( [Word] -> [[Word]]
forall a. Eq a => [a] -> [[a]]
List.group
                ( [Word] -> [Word]
forall a. Ord a => [a] -> [a]
List.sort
                    (((Bytes, v) -> Word) -> [(Bytes, v)] -> [Word]
forall a b. (a -> b) -> [a] -> [b]
map (\(Bytes
b, v
_) -> Word -> Word -> Word
forall a. Integral a => a -> a -> a
rem (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word32 @Word (ByteArray -> Bytes -> Word32
Hash.bytes ByteArray
entropy Bytes
b)) (Int -> Word
i2w Int
count)) [(Bytes, v)]
xs)
                )
            )
    if Int
maxCollisions Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
allowedCollisions
      then ByteArray -> ST s ByteArray
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ByteArray
entropy
      else case Int
ix of
        Int
0 -> HashMapException -> ST s ByteArray
forall a e. Exception e => e -> a
throw (Int -> [Bytes] -> [[(Word, Bytes)]] -> HashMapException
HashMapException (-Int
1) [] [])
        Int
_ -> Int -> ST s ByteArray
go (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)

askForEntropyST :: STRef s Int -> Int -> ST s ByteArray
askForEntropyST :: forall s. STRef s Int -> Int -> ST s ByteArray
askForEntropyST !STRef s Int
ref !Int
n = do
  Int
counter <- STRef s Int -> ST s Int
forall s a. STRef s a -> ST s a
readSTRef STRef s Int
ref
  STRef s Int -> Int -> ST s ()
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s Int
ref (Int -> ST s ()) -> Int -> ST s ()
forall a b. (a -> b) -> a -> b
$! Int -> Int -> Int
forall a. Integral a => a -> a -> a
mod (Int
counter Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
8192
  let (Int
_, Int
r) = Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
divMod Int
n Int
8
  if
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 -> [Char] -> ST s ByteArray
forall a. HasCallStack => [Char] -> a
error [Char]
"bytehash: askForEntropyST, request does not divide 8"
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
8192 -> [Char] -> ST s ByteArray
forall a. HasCallStack => [Char] -> a
error [Char]
"bytehash: askForEntropyST, requested more than 8192"
    | Bool
otherwise -> do
        MutablePrimArray s Word8
dst <- Int -> ST s (MutablePrimArray (PrimState (ST s)) Word8)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
Int -> m (MutablePrimArray (PrimState m) a)
PM.newPrimArray Int
n
        MutablePrimArray (PrimState (ST s)) Word8
-> Int -> Ptr Word8 -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a -> Int -> Ptr a -> Int -> m ()
PM.copyPtrToMutablePrimArray
          MutablePrimArray s Word8
MutablePrimArray (PrimState (ST s)) Word8
dst
          Int
0
          (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
Hash.entropy Int
counter :: Ptr Word8)
          Int
n
        PM.PrimArray ByteArray#
x <- MutablePrimArray (PrimState (ST s)) Word8 -> ST s (PrimArray Word8)
forall (m :: * -> *) a.
PrimMonad m =>
MutablePrimArray (PrimState m) a -> m (PrimArray a)
PM.unsafeFreezePrimArray MutablePrimArray s Word8
MutablePrimArray (PrimState (ST s)) Word8
dst
        ByteArray -> ST s ByteArray
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ByteArray# -> ByteArray
ByteArray ByteArray#
x)

askForEntropy :: CryptHandle -> Int -> ST RealWorld ByteArray
askForEntropy :: CryptHandle -> Int -> ST RealWorld ByteArray
askForEntropy !CryptHandle
h !Int
n = IO ByteArray -> ST RealWorld ByteArray
forall a. IO a -> ST RealWorld a
ioToST (IO ByteArray -> ST RealWorld ByteArray)
-> IO ByteArray -> ST RealWorld ByteArray
forall a b. (a -> b) -> a -> b
$ do
  ByteString
entropy <- CryptHandle -> Int -> IO ByteString
hGetEntropy CryptHandle
h Int
n
  Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when
    (ByteString -> Int
ByteString.length ByteString
entropy Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
n)
    ([Char] -> IO ()
forall a. [Char] -> IO a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"bytehash: askForEntropy failed, blame entropy")
  MutableByteArray RealWorld
dst <- Int -> IO (MutableByteArray (PrimState IO))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
PM.newByteArray Int
n
  ByteString -> (CStringLen -> IO ()) -> IO ()
forall a. ByteString -> (CStringLen -> IO a) -> IO a
ByteString.unsafeUseAsCStringLen ByteString
entropy ((CStringLen -> IO ()) -> IO ()) -> (CStringLen -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \(Ptr CChar
ptr, Int
len) -> do
    let !(PM.MutableByteArray MutableByteArray# RealWorld
primDst) = MutableByteArray RealWorld
dst
    MutablePrimArray (PrimState IO) CChar
-> Int -> Ptr CChar -> Int -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a -> Int -> Ptr a -> Int -> m ()
PM.copyPtrToMutablePrimArray (MutableByteArray# RealWorld -> MutablePrimArray RealWorld CChar
forall s a. MutableByteArray# s -> MutablePrimArray s a
PM.MutablePrimArray MutableByteArray# RealWorld
primDst) Int
0 Ptr CChar
ptr Int
len
  MutableByteArray (PrimState IO) -> IO ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
PM.unsafeFreezeByteArray MutableByteArray RealWorld
MutableByteArray (PrimState IO)
dst

requiredEntropy :: Word -> Word
requiredEntropy :: Word -> Word
requiredEntropy Word
n = Word
8 Word -> Word -> Word
forall a. Num a => a -> a -> a
* Word
n Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
8

errorThunk :: a
errorThunk :: forall a. a
errorThunk = [Char] -> a
forall a. HasCallStack => [Char] -> a
error [Char]
"Data.Bytes.HashMap: mistake"

unsafeHeadFst :: [(a, b)] -> a
unsafeHeadFst :: forall a b. [(a, b)] -> a
unsafeHeadFst ((a
x, b
_) : [(a, b)]
_) = a
x
unsafeHeadFst [] = [Char] -> a
forall a. HasCallStack => [Char] -> a
error [Char]
"Data.Bytes.HashMap: bad use of unsafeHeadFst"

w2i :: Word -> Int
w2i :: Word -> Int
w2i = Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral

i2w :: Int -> Word
i2w :: Int -> Word
i2w = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral

upW32 :: Word32 -> Word
upW32 :: Word32 -> Word
upW32 = Word32 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral

bytesEqualsByteArray :: Bytes -> ByteArray -> Bool
bytesEqualsByteArray :: Bytes -> ByteArray -> Bool
bytesEqualsByteArray (Bytes ByteArray
arr1 Int
off1 Int
len1) ByteArray
arr2
  | Int
len1 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= ByteArray -> Int
PM.sizeofByteArray ByteArray
arr2 = Bool
False
  | Bool
otherwise = ByteArray -> Int -> ByteArray -> Int -> Int -> Ordering
compareByteArrays ByteArray
arr1 Int
off1 ByteArray
arr2 Int
0 Int
len1 Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
EQ

compareByteArrays :: ByteArray -> Int -> ByteArray -> Int -> Int -> Ordering
compareByteArrays :: ByteArray -> Int -> ByteArray -> Int -> Int -> Ordering
compareByteArrays (ByteArray ByteArray#
ba1#) (I# Int#
off1#) (ByteArray ByteArray#
ba2#) (I# Int#
off2#) (I# Int#
n#) =
  Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int# -> Int
I# (ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
Exts.compareByteArrays# ByteArray#
ba1# Int#
off1# ByteArray#
ba2# Int#
off2# Int#
n#)) Int
0

data HashMapException = HashMapException !Int [Bytes] [[(Word, Bytes)]]
  deriving stock (Int -> HashMapException -> ShowS
[HashMapException] -> ShowS
HashMapException -> [Char]
(Int -> HashMapException -> ShowS)
-> (HashMapException -> [Char])
-> ([HashMapException] -> ShowS)
-> Show HashMapException
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> HashMapException -> ShowS
showsPrec :: Int -> HashMapException -> ShowS
$cshow :: HashMapException -> [Char]
show :: HashMapException -> [Char]
$cshowList :: [HashMapException] -> ShowS
showList :: [HashMapException] -> ShowS
Show, HashMapException -> HashMapException -> Bool
(HashMapException -> HashMapException -> Bool)
-> (HashMapException -> HashMapException -> Bool)
-> Eq HashMapException
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: HashMapException -> HashMapException -> Bool
== :: HashMapException -> HashMapException -> Bool
$c/= :: HashMapException -> HashMapException -> Bool
/= :: HashMapException -> HashMapException -> Bool
Eq)
  deriving anyclass (Show HashMapException
Typeable HashMapException
(Typeable HashMapException, Show HashMapException) =>
(HashMapException -> SomeException)
-> (SomeException -> Maybe HashMapException)
-> (HashMapException -> [Char])
-> Exception HashMapException
SomeException -> Maybe HashMapException
HashMapException -> [Char]
HashMapException -> SomeException
forall e.
(Typeable e, Show e) =>
(e -> SomeException)
-> (SomeException -> Maybe e) -> (e -> [Char]) -> Exception e
$ctoException :: HashMapException -> SomeException
toException :: HashMapException -> SomeException
$cfromException :: SomeException -> Maybe HashMapException
fromException :: SomeException -> Maybe HashMapException
$cdisplayException :: HashMapException -> [Char]
displayException :: HashMapException -> [Char]
Exception)

{- | For each slot, gives the number of keys that hash to it
after the first hash function has been applied.
-}
distribution :: Map v -> [(Int, Int)]
distribution :: forall v. Map v -> [(Int, Int)]
distribution (Map ByteArray
entropy UnliftedArray ByteArray
entropies PrimArray Int32
_ UnliftedArray ByteArray
keys SmallArray v
_) =
  let counts :: PrimArray Int
counts = (forall s. ST s (PrimArray Int)) -> PrimArray Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (PrimArray Int)) -> PrimArray Int)
-> (forall s. ST s (PrimArray Int)) -> PrimArray Int
forall a b. (a -> b) -> a -> b
$ do
        let sz :: Int
sz = UnliftedArray ByteArray -> Int
forall e. UnliftedArray e -> Int
PM.sizeofUnliftedArray UnliftedArray ByteArray
entropies
        MutablePrimArray (PrimState (ST s)) Int
dst <- Int -> ST s (MutablePrimArray (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
Int -> m (MutablePrimArray (PrimState m) a)
PM.newPrimArray Int
sz
        MutablePrimArray (PrimState (ST s)) Int
-> Int -> Int -> Int -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutablePrimArray (PrimState m) a -> Int -> Int -> a -> m ()
PM.setPrimArray MutablePrimArray (PrimState (ST s)) Int
dst Int
0 Int
sz (Int
0 :: Int)
        let go :: Int -> ST s ()
go !Int
ix = case Int
ix of
              (-1) -> () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
              Int
_ -> do
                let key :: ByteArray
key = UnliftedArray ByteArray -> Int -> ByteArray
forall a. PrimUnlifted a => UnliftedArray a -> Int -> a
PM.indexUnliftedArray UnliftedArray ByteArray
keys Int
ix
                let ixA :: Int
ixA = Word -> Int
w2i (Word -> Word -> Word
unsafeRem (Word32 -> Word
upW32 (ByteArray -> ByteArray -> Word32
Hash.byteArray ByteArray
entropy ByteArray
key)) (Int -> Word
i2w Int
sz))
                Int
old <- MutablePrimArray (PrimState (ST s)) Int -> Int -> ST s Int
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutablePrimArray (PrimState m) a -> Int -> m a
PM.readPrimArray MutablePrimArray (PrimState (ST s)) Int
dst Int
ixA
                MutablePrimArray (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutablePrimArray (PrimState m) a -> Int -> a -> m ()
PM.writePrimArray MutablePrimArray (PrimState (ST s)) Int
dst Int
ixA (Int
old Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                Int -> ST s ()
go (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
        Int -> ST s ()
go (Int
sz Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
        MutablePrimArray (PrimState (ST s)) Int -> ST s (PrimArray Int)
forall (m :: * -> *) a.
PrimMonad m =>
MutablePrimArray (PrimState m) a -> m (PrimArray a)
PM.unsafeFreezePrimArray MutablePrimArray (PrimState (ST s)) Int
dst
   in [(Int, Int)] -> [(Int, Int)]
forall a. Ord a => [a] -> [a]
List.sort ([(Int, Int)] -> [(Int, Int)]) -> [(Int, Int)] -> [(Int, Int)]
forall a b. (a -> b) -> a -> b
$
        ([Int] -> (Int, Int)) -> [[Int]] -> [(Int, Int)]
forall a b. (a -> b) -> [a] -> [b]
List.map
          ( \[Int]
xs -> case [Int]
xs of
              [] -> [Char] -> (Int, Int)
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"bytehash: distribution impl error"
              Int
y : [Int]
_ -> (Int
y, [Int] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
List.length [Int]
xs)
          )
          ([Int] -> [[Int]]
forall a. Eq a => [a] -> [[a]]
List.group ([Int] -> [Int]
forall a. Ord a => [a] -> [a]
List.sort (PrimArray Int -> [Item (PrimArray Int)]
forall l. IsList l => l -> [Item l]
Exts.toList PrimArray Int
counts)))

-- | The number of non-matching entropies being used.
distinctEntropies :: Map v -> Int
distinctEntropies :: forall v. Map v -> Int
distinctEntropies (Map ByteArray
entropy UnliftedArray ByteArray
entropies PrimArray Int32
_ UnliftedArray ByteArray
_ SmallArray v
_) =
  [[ByteArray]] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
List.length ([ByteArray] -> [[ByteArray]]
forall a. Eq a => [a] -> [[a]]
List.group ([ByteArray] -> [ByteArray]
forall a. Ord a => [a] -> [a]
List.sort (ByteArray
entropy ByteArray -> [ByteArray] -> [ByteArray]
forall a. a -> [a] -> [a]
: UnliftedArray_ ByteArray# ByteArray
-> [Item (UnliftedArray_ ByteArray# ByteArray)]
forall l. IsList l => l -> [Item l]
Exts.toList UnliftedArray_ ByteArray# ByteArray
UnliftedArray ByteArray
entropies)))

sameByteArray :: ByteArray -> ByteArray -> Int#
sameByteArray :: ByteArray -> ByteArray -> Int#
sameByteArray (ByteArray ByteArray#
x) (ByteArray ByteArray#
y) =
  MutableByteArray# Any -> MutableByteArray# Any -> Int#
forall s. MutableByteArray# s -> MutableByteArray# s -> Int#
Exts.sameMutableByteArray# (ByteArray# -> MutableByteArray# Any
forall a b. a -> b
Exts.unsafeCoerce# ByteArray#
x) (ByteArray# -> MutableByteArray# Any
forall a b. a -> b
Exts.unsafeCoerce# ByteArray#
y)