-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Mutable hash tables in the ST monad -- -- This package provides a couple of different implementations of mutable -- hash tables in the ST monad, as well as a typeclass abstracting their -- common operations, and a set of wrappers to use the hash tables in the -- IO monad. -- -- QUICK START: documentation for the hash table operations is -- provided in the Data.HashTable.Class module, and the IO -- wrappers (which most users will probably prefer) are located in the -- Data.HashTable.IO module. -- -- This package currently contains three hash table implementations: -- --
    --
  1. Data.HashTable.ST.Cuckoo contains an implementation of -- "cuckoo hashing" as introduced by Pagh and Rodler in 2001 (see -- http://en.wikipedia.org/wiki/Cuckoo_hashing). Cuckoo hashing -- has worst-case O(1) lookups and can reach a high "load factor", -- in which the table can perform acceptably well even when approaching -- 90% full. Randomized testing shows this implementation of cuckoo -- hashing to be slightly faster on insert and slightly slower on lookup -- than Data.HashTable.ST.Basic, while being more space efficient -- by about a half-word per key-value mapping. Cuckoo hashing, like the -- basic hash table implementation using linear probing, can suffer from -- long delays when the table is resized.
  2. --
  3. Data.HashTable.ST.Basic contains a basic open-addressing -- hash table using linear probing as the collision strategy. On a pure -- speed basis it should currently be the fastest available Haskell hash -- table implementation for lookups, although it has a higher memory -- overhead than the other tables and can suffer from long delays when -- the table is resized because all of the elements in the table need to -- be rehashed.
  4. --
  5. Data.HashTable.ST.Linear contains a linear hash table (see -- http://en.wikipedia.org/wiki/Linear_hashing), which trades some -- insert and lookup performance for higher space efficiency and much -- shorter delays when expanding the table. In most cases, benchmarks -- show this table to be currently slightly faster than -- Data.HashTable from the Haskell base library.
  6. --
-- -- It is recommended to create a concrete type alias in your code when -- using this package, i.e.: -- --
--   import qualified Data.HashTable.IO as H
--   
--   type HashTable k v = H.BasicHashTable k v
--   
--   foo :: IO (HashTable Int Int)
--   foo = do
--       ht <- H.new
--       H.insert ht 1 1
--       return ht
--   
-- -- Firstly, this makes it easy to switch to a different hash table -- implementation, and secondly, using a concrete type rather than -- leaving your functions abstract in the HashTable class should allow -- GHC to optimize away the typeclass dictionaries. -- -- This package accepts a couple of different cabal flags: -- -- -- -- Please send bug reports to -- https://github.com/gregorycollins/hashtables/issues. @package hashtables @version 1.3 -- | This module contains a HashTable typeclass for the hash table -- implementations in this package. This allows you to provide functions -- which will work for any hash table implementation in this collection. -- -- It is recommended to create a concrete type alias in your code when -- using this package, i.e.: -- --
--   import qualified Data.HashTable.IO as H
--   
--   type HashTable k v = H.BasicHashTable k v
--   
--   foo :: IO (HashTable Int Int)
--   foo = do
--       ht <- H.new
--       H.insert ht 1 1
--       return ht
--   
-- -- or -- --
--   import qualified Data.HashTable.ST.Cuckoo as C
--   import qualified Data.HashTable.Class as H
--   
--   type HashTable s k v = C.HashTable s k v
--   
--   foo :: ST s (HashTable s k v)
--   foo = do
--       ht <- H.new
--       H.insert ht 1 1
--       return ht
--   
-- -- Firstly, this makes it easy to switch to a different hash table -- implementation, and secondly, using a concrete type rather than -- leaving your functions abstract in the HashTable class should -- allow GHC to optimize away the typeclass dictionaries. -- -- Note that the functions in this typeclass are in the ST monad; -- if you want hash tables in IO, use the convenience wrappers in -- Data.HashTable.IO. module Data.HashTable.Class -- | A typeclass for hash tables in the ST monad. The operations on -- these hash tables are typically both key- and value-strict. class HashTable h -- | Creates a new, default-sized hash table. O(1). new :: HashTable h => ST s (h s k v) -- | Creates a new hash table sized to hold n elements. -- O(n). newSized :: HashTable h => Int -> ST s (h s k v) -- | Generalized update. Given a key k, and a user function -- f, calls: -- -- -- -- If the user function returns (Nothing, _), then the value is -- deleted from the hash table. Otherwise the mapping for k is -- inserted or replaced with the provided value. -- -- Returns the second part of the tuple returned by f. mutate :: (HashTable h, Eq k, Hashable k) => h s k v -> k -> (Maybe v -> (Maybe v, a)) -> ST s a -- | As mutate, except that the action can perform additional side -- effects. mutateST :: (HashTable h, Eq k, Hashable k) => h s k v -> k -> (Maybe v -> ST s (Maybe v, a)) -> ST s a -- | Inserts a key/value mapping into a hash table, replacing any existing -- mapping for that key. -- -- O(n) worst case, O(1) amortized. insert :: (HashTable h, Eq k, Hashable k) => h s k v -> k -> v -> ST s () -- | Deletes a key-value mapping from a hash table. O(n) worst case, -- O(1) amortized. delete :: (HashTable h, Eq k, Hashable k) => h s k v -> k -> ST s () -- | Looks up a key-value mapping in a hash table. O(n) worst case, -- (O(1) for cuckoo hash), O(1) amortized. lookup :: (HashTable h, Eq k, Hashable k) => h s k v -> k -> ST s (Maybe v) -- | A strict fold over the key-value records of a hash table in the -- ST monad. O(n). foldM :: HashTable h => (a -> (k, v) -> ST s a) -> a -> h s k v -> ST s a -- | A side-effecting map over the key-value records of a hash table. -- O(n). mapM_ :: HashTable h => ((k, v) -> ST s b) -> h s k v -> ST s () -- | Looks up the index of a key-value mapping in a hash table suitable for -- passing to nextByIndex. lookupIndex :: (HashTable h, Eq k, Hashable k) => h s k v -> k -> ST s (Maybe Word) -- | Returns the next key-value mapping stored at the given index or at a -- greater index. The index, key, and value of the next record are -- returned. nextByIndex :: HashTable h => h s k v -> Word -> ST s (Maybe (Word, k, v)) -- | Computes the overhead (in words) per key-value mapping. Used for -- debugging, etc; time complexity depends on the underlying hash table -- implementation. O(n). computeOverhead :: HashTable h => h s k v -> ST s Double -- | Create a hash table from a list of key-value pairs. O(n). fromList :: (HashTable h, Eq k, Hashable k) => [(k, v)] -> ST s (h s k v) -- | Create a hash table from a list of key-value pairs, with a size hint. -- O(n). fromListWithSizeHint :: (HashTable h, Eq k, Hashable k) => Int -> [(k, v)] -> ST s (h s k v) -- | Given a hash table, produce a list of key-value pairs. O(n). toList :: HashTable h => h s k v -> ST s [(k, v)] -- | A basic open-addressing hash table using linear probing. Use this hash -- table if you... -- -- -- -- Of the hash tables in this collection, this hash table has the best -- lookup performance, while maintaining competitive insert performance. -- -- Space overhead -- -- This table is not especially memory-efficient; firstly, the table has -- a maximum load factor of 0.83 and will be resized if load exceeds this -- value. Secondly, to improve insert and lookup performance, we store a -- 16-bit hash code for each key in the table. -- -- Each hash table entry requires at least 2.25 words (on a 64-bit -- machine), two for the pointers to the key and value and one quarter -- word for the hash code. We don't count key and value pointers as -- overhead, because they have to be there -- so the overhead for a full -- slot is at least one quarter word -- but empty slots in the hash table -- count for a full 2.25 words of overhead. Define m as the -- number of slots in the table, n as the number of key value -- mappings, and ws as the machine word size in bytes. If -- the load factor is k=n/m, the amount of space wasted -- per mapping in words is: -- --
--   w(n) = (m*(2*ws + 2) - n*(2*ws)) / ws
--   
-- -- Since m=n/k, -- --
--   w(n) = n/k * (2*ws + 2) - n*(2*ws)
--        = (n * (2 + 2*ws*(1-k))  k)  ws
--   
-- -- Solving for k=0.83, the maximum load factor, gives a -- minimum overhead of 0.71 words per mapping on a 64-bit machine, -- or 1.01 words per mapping on a 32-bit machine. If k=0.5, -- which should be under normal usage the maximum overhead -- situation, then the overhead would be 2.5 words per mapping on a -- 64-bit machine, or 3.0 words per mapping on a 32-bit machine. -- -- Space overhead: experimental results -- -- In randomized testing on a 64-bit machine (see -- test/compute-overhead/ComputeOverhead.hs in the source -- distribution), mean overhead (that is, the number of words needed to -- store the key-value mapping over and above the two words necessary for -- the key and the value pointers) is approximately 1.24 machine words -- per key-value mapping with a standard deviation of about 0.30 words, -- and 1.70 words per mapping at the 95th percentile. -- -- Expensive resizes -- -- If enough elements are inserted into the table to make it exceed the -- maximum load factor, the table is resized. A resize involves a -- complete rehash of all the elements in the table, which means that any -- given call to insert might take O(n) time in the size of -- the table, with a large constant factor. If a long pause waiting for -- the table to resize is unacceptable for your application, you should -- choose the included linear hash table instead. -- -- References: -- -- module Data.HashTable.ST.Basic -- | An open addressing hash table using linear probing. data HashTable s k v -- | See the documentation for this function in new. new :: ST s (HashTable s k v) -- | See the documentation for this function in newSized. newSized :: Int -> ST s (HashTable s k v) -- | Returns the number of mappings currently stored in this table. -- O(1) size :: HashTable s k v -> ST s Int -- | See the documentation for this function in delete. delete :: (Hashable k, Eq k) => HashTable s k v -> k -> ST s () -- | See the documentation for this function in lookup. lookup :: (Eq k, Hashable k) => HashTable s k v -> k -> ST s (Maybe v) -- | See the documentation for this function in insert. insert :: (Eq k, Hashable k) => HashTable s k v -> k -> v -> ST s () -- | See the documentation for this function in mutate. mutate :: (Eq k, Hashable k) => HashTable s k v -> k -> (Maybe v -> (Maybe v, a)) -> ST s a -- | See the documentation for this function in mutateST. mutateST :: (Eq k, Hashable k) => HashTable s k v -> k -> (Maybe v -> ST s (Maybe v, a)) -> ST s a -- | See the documentation for this function in mapM_. mapM_ :: ((k, v) -> ST s b) -> HashTable s k v -> ST s () -- | See the documentation for this function in foldM. foldM :: (a -> (k, v) -> ST s a) -> a -> HashTable s k v -> ST s a -- | See the documentation for this function in computeOverhead. computeOverhead :: HashTable s k v -> ST s Double instance GHC.Show.Show Data.HashTable.ST.Basic.Slot instance GHC.Show.Show Data.HashTable.ST.Basic.SlotFindResponse instance GHC.Base.Semigroup Data.HashTable.ST.Basic.Slot instance GHC.Base.Monoid Data.HashTable.ST.Basic.Slot instance Data.HashTable.Class.HashTable Data.HashTable.ST.Basic.HashTable instance GHC.Show.Show (Data.HashTable.ST.Basic.HashTable s k v) -- | A hash table using the cuckoo strategy. (See -- http://en.wikipedia.org/wiki/Cuckoo_hashing). Use this hash -- table if you... -- -- -- -- Details: -- -- The basic idea of cuckoo hashing, first introduced by Pagh and Rodler -- in 2001, is to use d hash functions instead of only one; in -- this implementation d=2 and the strategy we use is to split up a flat -- array of slots into k buckets, each cache-line-sized: -- --
--   +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+----------+
--   |x0|x1|x2|x3|x4|x5|x6|x7|y0|y1|y2|y3|y4|y5|y6|y7|z0|z1|z2........|
--   +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+----------+
--   [  ^^^  bucket 0  ^^^  ][  ^^^  bucket 1  ^^^  ]...
--   
-- -- There are actually three parallel arrays: one unboxed array of -- Ints for hash codes, one boxed array for keys, and one boxed -- array for values. When looking up a key-value mapping, we hash the key -- using two hash functions and look in both buckets in the hash code -- array for the key. Each bucket is cache-line sized, with its keys in -- no particular order. Because the hash code array is unboxed, we can -- search it for the key using a highly-efficient branchless strategy in -- C code, using SSE instructions if available. -- -- On insert, if both buckets are full, we knock out a randomly-selected -- entry from one of the buckets (using a random walk ensures that "key -- cycles" are broken with maximum probability) and try to repeat the -- insert procedure. This process may not succeed; if all items have not -- successfully found a home after some number of tries, we give up and -- rehash all of the elements into a larger table. -- -- Space overhead: experimental results -- -- The implementation of cuckoo hash given here is almost as fast for -- lookups as the basic open-addressing hash table using linear probing, -- and on average is more space-efficient: in randomized testing on my -- 64-bit machine (see test/compute-overhead/ComputeOverhead.hs -- in the source distribution), mean overhead is 0.77 machine words per -- key-value mapping, with a standard deviation of 0.29 words, and 1.23 -- words per mapping at the 95th percentile. -- -- References: -- -- module Data.HashTable.ST.Cuckoo -- | A cuckoo hash table. data HashTable s k v -- | See the documentation for this function in new. new :: ST s (HashTable s k v) -- | See the documentation for this function in newSized. newSized :: Int -> ST s (HashTable s k v) -- | See the documentation for this function in delete. delete :: (Hashable k, Eq k) => HashTable s k v -> k -> ST s () -- | See the documentation for this function in lookup. lookup :: (Eq k, Hashable k) => HashTable s k v -> k -> ST s (Maybe v) -- | See the documentation for this function in insert. insert :: (Eq k, Hashable k) => HashTable s k v -> k -> v -> ST s () mutate :: (Eq k, Hashable k) => HashTable s k v -> k -> (Maybe v -> (Maybe v, a)) -> ST s a mutateST :: (Eq k, Hashable k) => HashTable s k v -> k -> (Maybe v -> ST s (Maybe v, a)) -> ST s a -- | See the documentation for this function in mapM_. mapM_ :: ((k, v) -> ST s a) -> HashTable s k v -> ST s () -- | See the documentation for this function in foldM. foldM :: (a -> (k, v) -> ST s a) -> a -> HashTable s k v -> ST s a -- | Find index of given key in the hashtable. lookupIndex :: (Hashable k, Eq k) => HashTable s k v -> k -> ST s (Maybe Word) -- | Find the next entry in the hashtable starting at the given index. nextByIndex :: HashTable s k v -> Word -> ST s (Maybe (Word, k, v)) instance Data.HashTable.Class.HashTable Data.HashTable.ST.Cuckoo.HashTable instance GHC.Show.Show (Data.HashTable.ST.Cuckoo.HashTable s k v) -- | An implementation of linear hash tables. (See -- http://en.wikipedia.org/wiki/Linear_hashing). Use this hash -- table if you... -- -- -- -- Details: -- -- Linear hashing allows for the expansion of the hash table one slot at -- a time, by moving a "split" pointer across an array of pointers to -- buckets. The number of buckets is always a power of two, and the -- bucket to look in is defined as: -- --
--   bucket(level,key) = hash(key) mod (2^level)
--   
-- -- The "split pointer" controls the expansion of the hash table. If the -- hash table is at level k (i.e. 2^k buckets have been -- allocated), we first calculate b=bucket(level-1,key). If -- b < splitptr, the destination bucket is calculated as -- b'=bucket(level,key), otherwise the original value b -- is used. -- -- The split pointer is incremented once an insert causes some bucket to -- become fuller than some predetermined threshold; the bucket at the -- split pointer (*not* the bucket which triggered the split!) is then -- rehashed, and half of its keys can be expected to be rehashed into the -- upper half of the table. -- -- When the split pointer reaches the middle of the bucket array, the -- size of the bucket array is doubled, the level increases, and the -- split pointer is reset to zero. -- -- Linear hashing, although not quite as fast for inserts or lookups as -- the implementation of linear probing included in this package, is well -- suited for interactive applications because it has much better worst -- case behaviour on inserts. Other hash table implementations can suffer -- from long pauses, because it is occasionally necessary to rehash all -- of the keys when the table grows. Linear hashing, on the other hand, -- only ever rehashes a bounded (effectively constant) number of keys -- when an insert forces a bucket split. -- -- Space overhead: experimental results -- -- In randomized testing (see -- test/compute-overhead/ComputeOverhead.hs in the source -- distribution), mean overhead is approximately 1.51 machine words per -- key-value mapping with a very low standard deviation of about 0.06 -- words, 1.60 words per mapping at the 95th percentile. -- -- Unsafe tricks -- -- Then the unsafe-tricks flag is on when this package is built -- (and it is on by default), we use some unsafe tricks (namely -- unsafeCoerce# and reallyUnsafePtrEquality#) to save -- indirections in this table. These techniques rely on assumptions about -- the behaviour of the GHC runtime system and, although they've been -- tested and should be safe under normal conditions, are slightly -- dangerous. Caveat emptor. In particular, these techniques are -- incompatible with HPC code coverage reports. -- -- References: -- -- module Data.HashTable.ST.Linear -- | A linear hash table. data HashTable s k v -- | See the documentation for this function in -- "Data.HashTable.Class#v:new". new :: ST s (HashTable s k v) -- | See the documentation for this function in -- "Data.HashTable.Class#v:newSized". newSized :: Int -> ST s (HashTable s k v) -- | See the documentation for this function in -- "Data.HashTable.Class#v:delete". delete :: (Hashable k, Eq k) => HashTable s k v -> k -> ST s () -- | See the documentation for this function in -- "Data.HashTable.Class#v:lookup". lookup :: (Eq k, Hashable k) => HashTable s k v -> k -> ST s (Maybe v) -- | See the documentation for this function in -- "Data.HashTable.Class#v:insert". insert :: (Eq k, Hashable k) => HashTable s k v -> k -> v -> ST s () mutate :: (Eq k, Hashable k) => HashTable s k v -> k -> (Maybe v -> (Maybe v, a)) -> ST s a mutateST :: (Eq k, Hashable k) => HashTable s k v -> k -> (Maybe v -> ST s (Maybe v, a)) -> ST s a -- | See the documentation for this function in -- "Data.HashTable.Class#v:mapM_". mapM_ :: ((k, v) -> ST s b) -> HashTable s k v -> ST s () -- | See the documentation for this function in -- "Data.HashTable.Class#v:foldM". foldM :: (a -> (k, v) -> ST s a) -> a -> HashTable s k v -> ST s a -- | See the documentation for this function in -- "Data.HashTable.Class#v:computeOverhead". computeOverhead :: HashTable s k v -> ST s Double instance Data.HashTable.Class.HashTable Data.HashTable.ST.Linear.HashTable instance GHC.Show.Show (Data.HashTable.ST.Linear.HashTable s k v) -- | This module provides wrappers in IO around the functions from -- Data.HashTable.Class. -- -- This module exports three concrete hash table types, one for each hash -- table implementation in this package: -- --
--   type BasicHashTable  k v = IOHashTable (B.HashTable)  k v
--   type CuckooHashTable k v = IOHashTable (Cu.HashTable) k v
--   type LinearHashTable k v = IOHashTable (L.HashTable)  k v
--   
-- -- The IOHashTable type can be thought of as a wrapper around a -- concrete hashtable type, which sets the ST monad state type -- to PrimState IO, a.k.a. RealWorld: -- --
--   type IOHashTable tabletype k v = tabletype (PrimState IO) k v
--   
-- -- This module provides stToIO wrappers around the hashtable -- functions (which are in ST) to make it convenient to use them -- in IO. It is intended to be imported qualified and used with a -- user-defined type alias, i.e.: -- --
--   import qualified Data.HashTable.IO as H
--   
--   type HashTable k v = H.CuckooHashTable k v
--   
--   foo :: IO (HashTable Int Int)
--   foo = do
--       ht <- H.new
--       H.insert ht 1 1
--       return ht
--   
-- -- Essentially, anywhere you see IOHashTable h k v in the -- type signatures below, you can plug in any of -- BasicHashTable k v, CuckooHashTable k -- v, or LinearHashTable k v. module Data.HashTable.IO -- | A type alias for a basic open addressing hash table using linear -- probing. See Data.HashTable.ST.Basic. type BasicHashTable k v = IOHashTable (HashTable) k v -- | A type alias for the cuckoo hash table. See -- Data.HashTable.ST.Cuckoo. type CuckooHashTable k v = IOHashTable (HashTable) k v -- | A type alias for the linear hash table. See -- Data.HashTable.ST.Linear. type LinearHashTable k v = IOHashTable (HashTable) k v -- | A type alias for our hash tables, which run in ST, to set the -- state token type to PrimState IO (aka -- RealWorld) so that we can use them in IO. type IOHashTable tabletype k v = tabletype (PrimState IO) k v -- | See the documentation for this function in new. new :: HashTable h => IO (IOHashTable h k v) -- | See the documentation for this function in newSized. newSized :: HashTable h => Int -> IO (IOHashTable h k v) -- | See the documentation for this function in insert. insert :: (HashTable h, Eq k, Hashable k) => IOHashTable h k v -> k -> v -> IO () -- | See the documentation for this function in delete. delete :: (HashTable h, Eq k, Hashable k) => IOHashTable h k v -> k -> IO () -- | See the documentation for this function in lookup. lookup :: (HashTable h, Eq k, Hashable k) => IOHashTable h k v -> k -> IO (Maybe v) -- | See the documentation for this function in mutate. mutate :: (HashTable h, Eq k, Hashable k) => IOHashTable h k v -> k -> (Maybe v -> (Maybe v, a)) -> IO a -- | See the documentation for this function in mutateST. mutateIO :: (HashTable h, Eq k, Hashable k) => IOHashTable h k v -> k -> (Maybe v -> IO (Maybe v, a)) -> IO a -- | See the documentation for this function in fromList. fromList :: (HashTable h, Eq k, Hashable k) => [(k, v)] -> IO (IOHashTable h k v) -- | See the documentation for this function in -- fromListWithSizeHint. fromListWithSizeHint :: (HashTable h, Eq k, Hashable k) => Int -> [(k, v)] -> IO (IOHashTable h k v) -- | See the documentation for this function in toList. toList :: (HashTable h, Eq k, Hashable k) => IOHashTable h k v -> IO [(k, v)] -- | See the documentation for this function in mapM_. mapM_ :: HashTable h => ((k, v) -> IO a) -> IOHashTable h k v -> IO () -- | See the documentation for this function in foldM. foldM :: HashTable h => (a -> (k, v) -> IO a) -> a -> IOHashTable h k v -> IO a -- | See the documentation for this function in computeOverhead. computeOverhead :: HashTable h => IOHashTable h k v -> IO Double -- | See the documentation for this function in lookupIndex. lookupIndex :: (HashTable h, Eq k, Hashable k) => IOHashTable h k v -> k -> IO (Maybe Word) -- | See the documentation for this function in nextByIndex. nextByIndex :: (HashTable h, Eq k, Hashable k) => IOHashTable h k v -> Word -> IO (Maybe (Word, k, v))