-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | "Advanced" Data Structures -- -- This package is a playground for working with several types of -- advanced data structures including wavelet trees and cache oblivious -- lookahead arrays. @package structures @version 0.2 module Data.Vector.Slow type IterST s = IterT (ST s) data Partial a Stop :: a -> Partial a Step :: (Partial a) -> Partial a -- | Adds an extra layer to a free monad value. -- -- In particular, for the iterative monad Iter, this makes the -- computation require one more step, without changing its final result. -- --
--   runIter (delay ma) == Right ma
--   
delay :: (Monad f, MonadFree f m) => m a -> m a walkST :: (forall s. IterST s a) -> Partial a streamST :: Stream Id a -> Stream (ST s) a munstream :: MVector v a => Stream (ST s) a -> IterST s (v s a) unstreamM :: Vector v a => Stream (ST s) a -> IterST s (v a) foldM' :: (a -> b -> ST s a) -> a -> Stream (ST s) b -> IterST s a -- | Left fold with a monadic operator foldM :: (a -> b -> ST s a) -> a -> Stream (ST s) b -> IterST s a instance Show a => Show (Partial a) instance Read a => Read (Partial a) instance Eq a => Eq (Partial a) instance Ord a => Ord (Partial a) module Data.Vector.Set.Fusion -- | This is the internal stream fusion combinator used to merge streams -- for addition. merge :: (Monad m, Ord k) => Stream m k -> Stream m k -> Stream m k -- | COLA fusion internals module Data.Vector.Map.Fusion -- | This is the internal stream fusion combinator used to merge streams -- for addition. merge :: (Monad m, Ord k) => Stream m (k, a) -> Stream m (k, a) -> Stream m (k, a) insert :: (Monad m, Ord k) => k -> a -> Stream m (k, a) -> Stream m (k, a) module Data.Vector.Bloom.Util -- | Compute several hashes using a variant of Kirsch and -- Mitzenmacher's double hashing. rehash :: Hashable a => Int -> a -> [Int] -- | optimalHashes n m calculates the optimal number of hash -- functions for a given number of entries n in a Bloom filter -- that is m bits wide. -- --
--   k = m/n log 2
--   
optimalHashes :: Int -> Int -> Int -- | optimalWidth n p calculate the optimal width m of a -- bloom filter given the expected number of entries n and -- target failure rate p at capacity. -- --
--   m = -n log p / (log 2)^2
--   
optimalWidth :: Int -> Double -> Int module Data.Vector.Bloom.Mutable data MBloom s MBloom :: {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> !(MVector s Word64) -> MBloom s mbloom :: Int -> Int -> ST s (MBloom s) width :: MBloom s -> Int insert :: (PrimMonad m, Hashable a) => a -> MBloom (PrimState m) -> m () instance Typeable1 MBloom -- | Hierarchical Bloom filters module Data.Vector.Bloom data Bloom Bloom :: {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> !(Vector Word64) -> Bloom -- | Number of bits set entries :: Bloom -> Int -- | The number of bits in our Bloom filter. Always an integral -- multiple of 64. width :: Bloom -> Int -- | bloom k m builds an m-bit wide Bloom -- filter that uses k hashes. bloom :: (Foldable f, Hashable a) => Int -> Int -> f a -> Bloom -- | Given an action on a mutable Bloom filter, modify this one. modify :: (forall s. MBloom s -> ST s ()) -> Bloom -> Bloom -- | Insert an element into a Bloom filter. insert :: Hashable a => a -> Bloom -> Bloom -- | Check if an element is a member of a Bloom filter. -- -- This may return false positives, but never a false negative. elem :: Hashable a => a -> Bloom -> Bool -- | Compute the union of two Bloom filters. union :: Bloom -> Bloom -> Bloom -- | Compute the intersection of two Bloom filters. intersection :: Bloom -> Bloom -> Bloom -- | O(m) freeze :: PrimMonad m => MBloom (PrimState m) -> m Bloom -- | O(m) thaw :: PrimMonad m => Bloom -> m (MBloom (PrimState m)) -- | O(1) unsafeFreeze :: PrimMonad m => MBloom (PrimState m) -> m Bloom -- | O(1) unsafeThaw :: PrimMonad m => Bloom -> m (MBloom (PrimState m)) instance Typeable Bloom instance Eq Bloom instance Ord Bloom instance Show Bloom instance Read Bloom instance Data Bloom instance Semigroup Bloom -- | This module provides a choice of a best vector type for a given -- type that is as unboxed as possible. module Data.Vector.Array class (Vector (Arr a) a, Monoid (Arr a a)) => Arrayed a where type family Arr a :: * -> * type instance Arr a = Vector -- | A vector of product-like data types that know how to store themselves -- in a Vector optimally, maximizing the level of unboxing provided, but -- not guaranteeing to unbox it all. type Array a = Arr a a type MArray s a = Mutable (Arr a) s a data V_Complex :: * -> * V_Complex :: {-# UNPACK #-} !Int -> !(Array a) -> !(Array a) -> V_Complex (Complex a) data MV_Complex :: * -> * -> * MV_Complex :: {-# UNPACK #-} !Int -> !(Mutable (Arr a) s a) -> !(Mutable (Arr a) s a) -> MV_Complex s (Complex a) data V_Pair :: * -> * V_Pair :: {-# UNPACK #-} !Int -> !(Array a) -> !(Array b) -> V_Pair (a, b) data MV_Pair :: * -> * -> * MV_Pair :: {-# UNPACK #-} !Int -> !(Mutable (Arr a) s a) -> !(Mutable (Arr b) s b) -> MV_Pair s (a, b) instance (Arrayed a, RealFloat a) => Arrayed (Complex a) instance (Arrayed a, RealFloat a, b ~ Complex a) => Monoid (V_Complex b) instance (Arrayed a, RealFloat a, Eq a, b ~ Complex a) => Eq (V_Complex b) instance (Arrayed a, RealFloat a, Read a, b ~ Complex a) => Read (V_Complex b) instance (Arrayed a, RealFloat a, Show a, b ~ Complex a) => Show (V_Complex b) instance (Arrayed a, RealFloat a) => Vector V_Complex (Complex a) instance (Arrayed a, RealFloat a) => MVector MV_Complex (Complex a) instance (Arrayed a, Arrayed b) => Arrayed (a, b) instance (Arrayed a, Arrayed b, c ~ (a, b)) => Monoid (V_Pair c) instance (Arrayed a, Arrayed b, Eq a, Eq b, c ~ (a, b)) => Eq (V_Pair c) instance (Arrayed a, Arrayed b, Read a, Read b, c ~ (a, b)) => Read (V_Pair c) instance (Arrayed a, Arrayed b, Show a, Show b, c ~ (a, b)) => Show (V_Pair c) instance (Arrayed a, Arrayed b) => Vector V_Pair (a, b) instance (Arrayed a, Arrayed b) => MVector MV_Pair (a, b) instance Arrayed (IO a) instance Arrayed (Either a b) instance Arrayed (Maybe a) instance Arrayed [a] instance Arrayed Integer instance Arrayed Word64 instance Arrayed Word32 instance Arrayed Word16 instance Arrayed Word8 instance Arrayed Word instance Arrayed Int64 instance Arrayed Int32 instance Arrayed Int16 instance Arrayed Int8 instance Arrayed Int instance Arrayed Float instance Arrayed Double instance Arrayed () module Data.Vector.Bit -- | A simple newtype around a Bool -- -- The principal use of this is that a Vector Bit is -- densely packed into individual bits rather than stored as one entry -- per Word8. newtype Bit Bit :: Bool -> Bit getBit :: Bit -> Bool -- | Bit and Bool are isomorphic. _Bit :: Iso' Bit Bool -- | A BitVector support for naïve O(1) rank. data BitVector BitVector :: {-# UNPACK #-} !Int -> !(Array Bit) -> !(Vector Int) -> BitVector -- | O(n) embedding A BitVector is isomorphic to a vector of -- bits. It just carries extra information. _BitVector :: Iso' BitVector (Array Bit) -- | O(1). rank i v counts the number of True -- bits up through and including the position i rank :: BitVector -> Int -> Int -- | O(1). Is the BitVector empty? null :: BitVector -> Bool -- | O(1). Return the size of the BitVector. size :: BitVector -> Int -- | Construct a BitVector with a single element. singleton :: Bool -> BitVector -- | The empty BitVector empty :: BitVector instance Typeable Bit instance Show Bit instance Read Bit instance Eq Bit instance Ord Bit instance Enum Bit instance Bounded Bit instance Data Bit instance Eq BitVector instance Ord BitVector instance Show BitVector instance Read BitVector instance Vector Vector Bit instance MVector MVector Bit instance Unbox Bit instance Arrayed Bit module Data.Vector.Heap type Heap s a = MArray s a heapify :: (MVector v a, Ord a) => v s a -> ST s () sift :: (MVector v a, Ord a) => v s a -> Int -> ST s () findMin :: MVector v a => v s a -> ST s a deleteMin :: (MVector v a, Ord a) => v s a -> ST s (v s a) updateMin :: (MVector v a, Ord a) => a -> v s a -> ST s () -- | Check whether the vector is empty null :: MVector v a => v s a -> Bool -- | Length of the mutable vector. length :: MVector v a => v s a -> Int -- | This module provides a Vector-based Map that is -- loosely based on the Cache Oblivious Lookahead Array (COLA) by Bender -- et al. from "Cache-Oblivious Streaming B-Trees", but with -- inserts converted from ephemerally amortized to persisently amortized -- using a technique from Overmars and van Leeuwen. -- -- Currently this Map is implemented in an insert-only fashion. -- Deletions are left to future work or to another derived structure in -- case they prove expensive. -- -- Currently, we also do not use fractional cascading, as it affects the -- constant factors badly enough to not pay for itself at the scales we -- are interested in. The naive O(log^2 n) lookup consistently -- outperforms the alternative. -- -- Compared to the venerable Data.Map, this data structure -- currently consumes more memory, but it provides a more limited palette -- of operations with different asymptotics (~10x faster inserts at a -- million entries) and enables us to utilize contiguous storage. -- -- NB: when used with boxed data this structure may hold onto -- references to old versions of things for many updates to come until -- sufficient operations have happened to merge them out of the COLA. module Data.Vector.Map -- | This Map is implemented as an insert-only Cache Oblivious Lookahead -- Array (COLA) with amortized complexity bounds that are equal to those -- of a B-Tree, except for an extra log factor slowdown on lookups due to -- the lack of fractional cascading. It uses a traditional Data.Map as a -- nursery. data Map k a -- | O(1) The empty LA. empty :: Map k v -- | O(1). Identify if a LA is the empty LA. null :: Map k v -> Bool -- | O(1) Construct a LA from a single key/value pair. singleton :: (Arrayed k, Arrayed v) => k -> v -> Map k v -- | O(log^2 N) worst-case. Lookup an element. lookup :: (Ord k, Arrayed k, Arrayed v) => k -> Map k v -> Maybe v -- | O((log N)/B) amortized loads for each cache. Insert an element. insert :: (Ord k, Arrayed k, Arrayed v) => k -> v -> Map k v -> Map k v fromList :: (Ord k, Arrayed k, Arrayed v) => [(k, v)] -> Map k v instance (Show (Arr k k), Show (Arr a a)) => Show (LA k a) instance (Show (Arr k k), Show (Arr a a)) => Show (Chunk k a) -- | This module provides a Vector-based Map that is -- loosely based on the Cache Oblivious Lookahead Array (COLA) by Bender -- et al. from "Cache-Oblivious Streaming B-Trees", but with -- inserts deamortized by using a varant of a technique from Overmars and -- van Leeuwen. -- -- Currently this Map is implemented in an insert-only fashion. -- Deletions are left to future work or to another derived structure in -- case they prove expensive. -- -- Currently, we also do not use fractional cascading, as it affects the -- constant factors badly enough to not pay for itself at the scales we -- are interested in. The naive O(log^2 n) lookup consistently -- outperforms the alternative. -- -- Compared to the venerable Data.Map, this data structure -- currently consumes more memory, but it provides a more limited palette -- of operations with different asymptotics (~10x faster inserts at a -- million entries) and enables us to utilize contiguous storage. -- -- NB: when used with boxed data this structure may hold onto -- references to old versions of things for many updates to come until -- sufficient operations have happened to merge them out of the COLA. module Data.Vector.Map.Deamortized -- | This Map is implemented as an insert-only Cache Oblivious Lookahead -- Array (COLA) with amortized complexity bounds that are equal to those -- of a B-Tree, except for an extra log factor slowdown on lookups due to -- the lack of fractional cascading. It uses a traditional Data.Map as a -- nursery. data Map k a -- | O(1) The empty Map. empty :: Map k v -- | O(1). Identify if a Map is the empty Map. null :: Map k v -> Bool -- | O(1) Construct a Map from a single key/value pair. singleton :: (Arrayed k, Arrayed v) => k -> v -> Map k v -- | O(log^2 N) worst-case. Lookup an element. lookup :: (Ord k, Arrayed k, Arrayed v) => k -> Map k v -> Maybe v -- | O((log N)/B) worst-case loads for each cache. Insert an element. insert :: (Ord k, Arrayed k, Arrayed v) => k -> v -> Map k v -> Map k v fromList :: (Ord k, Arrayed k, Arrayed v) => [(k, v)] -> Map k v instance (Show (Arr v v), Show (Arr k k), Show k, Show v) => Show (Map k v) instance (Show (Arr v v), Show (Arr k k)) => Show (LA k v) -- | This module provides a Vector-based Map that is -- loosely based on the Cache Oblivious Lookahead Array (COLA) by Bender -- et al. from "Cache-Oblivious Streaming B-Trees". -- -- Currently this Map is implemented in an insert-only fashion. -- Deletions are left to future work or to another derived structure in -- case they prove expensive. -- -- Unlike the COLA, this version merely provides amortized complexity -- bounds as this permits us to provide a fully functional API. However, -- even those asymptotics are only guaranteed if you do not modify the -- "old" versions of the Map. If you do, then while correctness is -- preserved, the asymptotic analysis becomes inaccurate. -- -- Reading from "old" versions of the Map does not affect the -- asymptotic analysis and is fine. -- -- Fractional cascading was originally replaced with the use of a -- hierarchical bloom filter per level containing the elements for that -- level, with the false positive rate tuned to balance the lookup cost -- against the costs of the cache misses for a false positive at that -- depth. This avoids the need to collect forwarding pointers from the -- next level, reducing pressure on the cache dramatically, while -- providing the same asymptotic complexity. -- -- With either of these two techniques when used ephemerally, this -- Map had asymptotic performance equal to that of a B-Tree tuned -- to the parameters of your caches with requiring such parameter tuning. -- -- However, the constants were still bad enough that the naive O(log^2 -- n) version of the COLA actually wins at lookups in benchmarks at -- the scale this data structure is interesting, say around a few million -- entries, by a factor of 10x! Consequently, we're currently not even -- Bloom filtering. -- -- Compared to the venerable Data.Map, this data structure -- currently consumes more memory, but it provides a more limited palette -- of operations with different asymptotics (~10x faster inserts at a -- million entries) and enables us to utilize contiguous storage. -- -- NB: when used with boxed data this structure may hold onto -- references to old versions of things for many updates to come until -- sufficient operations have happened to merge them out of the COLA. -- -- TODO: track actual percentage of occupancy for each vector compared to -- the source vector it was based on. This would permit split -- and other operations that trim a Map to properly reason about -- space usage by borrowing the 1/3rd occupancy rule from a Stratified -- Doubling Array. module Data.Vector.Map.Ephemeral -- | This Map is implemented as an insert-only Cache Oblivious Lookahead -- Array (COLA) with amortized complexity bounds that are equal to those -- of a B-Tree when it is used ephemerally, using Bloom filters to -- replace the fractional cascade. data Map k v Nil :: Map k v One :: !k -> v -> !(Map k v) -> Map k v Map :: !Int -> !(Array k) -> !(Array v) -> !(Map k v) -> Map k v -- | O(1) The empty Map. empty :: Map k v -- | O(1). Identify if a Map is the empty Map. null :: Map k v -> Bool -- | O(1) Construct a Map from a single key/value pair. singleton :: Arrayed v => k -> v -> Map k v -- | O(log^2 N) persistently amortized, O(N) worst case. -- Lookup an element. lookup :: (Ord k, Arrayed k, Arrayed v) => k -> Map k v -> Maybe v -- | O((log N)/B) ephemerally amortized loads for each cache, O(N/B) worst -- case. Insert an element. insert :: (Ord k, Arrayed k, Arrayed v) => k -> v -> Map k v -> Map k v fromList :: (Ord k, Arrayed k, Arrayed v) => [(k, v)] -> Map k v shape :: Map k v -> [Int] instance (Read (Arr v v), Read (Arr k k), Read k, Read v) => Read (Map k v) instance (Show (Arr v v), Show (Arr k k), Show k, Show v) => Show (Map k v) -- | A non-standard zeroless-binary deamortized cache-oblivious Set module Data.Vector.Set data Set a S0 :: Set a S1 :: !(Array a) -> Set a S2 :: !(Array a) -> !(Array a) -> !(Partial (Array a)) -> !(Set a) -> Set a S3 :: !(Array a) -> !(Array a) -> !(Array a) -> !(Partial (Array a)) -> !(Set a) -> Set a empty :: Set a null :: Set a -> Bool -- | O(log n) gives a conservative upper bound on size, assuming no -- collisions size :: Set a -> Int -- | O(log n) worst case insert :: (Arrayed a, Ord a) => a -> Set a -> Set a -- | O(log^n) worst and amortized member :: (Arrayed a, Ord a) => a -> Set a -> Bool member1 :: (Arrayed a, Ord a) => a -> Array a -> Bool merge :: (Arrayed a, Ord a) => Array a -> Array a -> Partial (Array a) step :: Partial a -> Partial a -- | Offset binary search -- -- Assuming l <= h. Returns h if the predicate is -- never True over [l..h) search :: (Int -> Bool) -> Int -> Int -> Int instance Show (Array a) => Show (Set a)