-- | -- Module: Data.Chimera -- Copyright: (c) 2018-2019 Andrew Lelechenko -- Licence: MIT -- Maintainer: Andrew Lelechenko -- -- Lazy infinite streams with O(1) indexing. {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} module Data.Chimera ( -- * Memoization memoize , memoizeFix -- * Chimera , Chimera , VChimera , UChimera -- * Construction , tabulate , tabulateFix , iterate , cycle -- * Elimination , index , toList -- * Monadic construction -- $monadic , tabulateM , tabulateFixM , iterateM -- * Subvectors -- $subvectors , mapSubvectors , zipSubvectors ) where import Prelude hiding ((^), (*), div, fromIntegral, not, and, or, cycle, iterate, drop) import Control.Applicative import Data.Bits import Data.Function (fix) import Data.Functor.Identity import qualified Data.Vector as V import qualified Data.Vector.Generic as G import qualified Data.Vector.Unboxed as U import Data.Chimera.Compat import Data.Chimera.FromIntegral -- $monadic -- Be careful: the stream is infinite, so -- monadic effects must be lazy -- in order to be executed in a finite time. -- -- For instance, lazy state monad works fine: -- -- >>> import Control.Monad.State.Lazy -- >>> ch = evalState (tabulateM (\i -> do modify (+ i); get)) 0 :: UChimera Word -- >>> take 10 (toList ch) -- [0,1,3,6,10,15,21,28,36,45] -- -- But the same computation in the strict state -- monad "Control.Monad.State.Strict" diverges. -- $subvectors -- Internally 'Chimera' consists of a number of subvectors. -- Following functions provide a low-level access to them. -- This ability is especially important for streams of booleans. -- -- Let us use 'Chimera' to memoize predicates @f1@, @f2@ @::@ 'Word' @->@ 'Bool'. -- Imagine them both already -- caught in amber as @ch1@, @ch2@ @::@ 'UChimera' 'Bool', -- and now we want to memoize @f3 x = f1 x && f2 x@ as @ch3@. -- One can do it in as follows: -- -- > ch3 = tabulate (\i -> index ch1 i && index ch2 i) -- -- There are two unsatisfactory things here. Firstly, -- even unboxed vectors store only one boolean per byte. -- We would rather reach out for 'Data.Bit.Bit' wrapper, -- which provides an instance of unboxed vector -- with one boolean per bit. Secondly, combining -- existing predicates by indexing them and tabulating again -- becomes relatively expensive, given how small and simple -- our data is. Fortunately, there is an ultra-fast 'Data.Bit.zipBits' -- to zip bit vectors. We can combine it altogether like this: -- -- > import Data.Bit -- > import Data.Bits -- > ch1 = tabulate (Bit . f1) -- > ch2 = tabulate (Bit . f2) -- > ch3 = zipSubvectors (zipBits (.&.)) ch1 ch2 -- | Lazy infinite streams with elements from @a@, -- backed by a 'G.Vector' @v@ (boxed, unboxed, storable, etc.). -- Use 'tabulate', 'tabulateFix', etc. to create a stream -- and 'index' to access its arbitrary elements -- in constant time. newtype Chimera v a = Chimera { _unChimera :: V.Vector (v a) } deriving (Functor, Foldable, Traversable) -- | Streams backed by boxed vectors. type VChimera = Chimera V.Vector -- | Streams backed by unboxed vectors. type UChimera = Chimera U.Vector -- | 'pure' creates a constant stream. instance Applicative (Chimera V.Vector) where pure = tabulate . const (<*>) = zipSubvectors (<*>) #if __GLASGOW_HASKELL__ > 801 liftA2 f = zipSubvectors (liftA2 f) #endif bits :: Int bits = fbs (0 :: Word) -- | Create a stream of values of a given function. -- Once created it can be accessed via 'index' or 'toList'. -- -- >>> ch = tabulate (^ 2) :: UChimera Word -- >>> index ch 9 -- 81 -- >>> take 10 (toList ch) -- [0,1,4,9,16,25,36,49,64,81] tabulate :: G.Vector v a => (Word -> a) -> Chimera v a tabulate f = runIdentity $ tabulateM (pure . f) -- | Monadic version of 'tabulate'. tabulateM :: forall m v a. (Monad m, G.Vector v a) => (Word -> m a) -> m (Chimera v a) tabulateM f = do z <- f 0 zs <- V.generateM bits tabulateSubVector pure $ Chimera $ G.singleton z `V.cons` zs where tabulateSubVector :: Int -> m (v a) tabulateSubVector i = G.generateM ii (\j -> f (int2word (ii + j))) where ii = 1 `shiftL` i {-# SPECIALIZE tabulateM :: G.Vector v a => (Word -> Identity a) -> Identity (Chimera v a) #-} -- | For a given @f@ create a stream of values of a recursive function 'fix' @f@. -- Once created it can be accessed via 'index' or 'toList'. -- -- For example, imagine that we want to tabulate -- : -- -- >>> catalan n = if n == 0 then 1 else sum [ catalan i * catalan (n - 1 - i) | i <- [0 .. n - 1] ] -- -- Can we find @catalanF@ such that @catalan@ = 'fix' @catalanF@? -- Just replace all recursive calls to @catalan@ with @f@: -- -- >>> catalanF f n = if n == 0 then 1 else sum [ f i * f (n - 1 - i) | i <- [0 .. n - 1] ] -- -- Now we are ready to use 'tabulateFix': -- -- >>> ch = tabulateFix catalanF :: VChimera Integer -- >>> index ch 9 -- 4862 -- >>> take 10 (toList ch) -- [1,1,2,5,14,42,132,429,1430,4862] tabulateFix :: G.Vector v a => ((Word -> a) -> Word -> a) -> Chimera v a tabulateFix uf = runIdentity $ tabulateFixM ((pure .) . uf . (runIdentity .)) -- | Monadic version of 'tabulateFix'. -- There are no particular guarantees about the order of recursive calls: -- they may be executed more than once or executed in different order. -- That said, monadic effects must be idempotent and commutative. tabulateFixM :: forall m v a. (Monad m, G.Vector v a) => ((Word -> m a) -> Word -> m a) -> m (Chimera v a) tabulateFixM f = result where result :: m (Chimera v a) result = do z <- fix f 0 zs <- V.generateM bits tabulateSubVector pure $ Chimera $ G.singleton z `V.cons` zs tabulateSubVector :: Int -> m (v a) tabulateSubVector i = subResult where subResult = G.generateM ii (\j -> f fixF (int2word (ii + j))) subResultBoxed = V.generateM ii (\j -> f fixF (int2word (ii + j))) ii = 1 `shiftL` i fixF :: Word -> m a fixF k | k < int2word ii = flip index k <$> result | k < int2word ii `shiftL` 1 = (`V.unsafeIndex` (word2int k - ii)) <$> subResultBoxed | otherwise = f fixF k {-# SPECIALIZE tabulateFixM :: G.Vector v a => ((Word -> Identity a) -> Word -> Identity a) -> Identity (Chimera v a) #-} -- | 'iterate' @f@ @x@ returns an infinite stream -- of repeated applications of @f@ to @x@. -- -- >>> ch = iterate (+ 1) 0 :: UChimera Int -- >>> take 10 (toList ch) -- [0,1,2,3,4,5,6,7,8,9] iterate :: G.Vector v a => (a -> a) -> a -> Chimera v a iterate f = runIdentity . iterateM (pure . f) -- | Monadic version of 'iterate'. iterateM :: forall m v a. (Monad m, G.Vector v a) => (a -> m a) -> a -> m (Chimera v a) iterateM f seed = do nextSeed <- f seed let z = G.singleton seed zs <- V.iterateNM bits go (G.singleton nextSeed) pure $ Chimera $ z `V.cons` zs where go :: v a -> m (v a) go vec = do nextSeed <- f (G.unsafeLast vec) G.iterateNM (G.length vec `shiftL` 1) f nextSeed {-# SPECIALIZE iterateM :: G.Vector v a => (a -> Identity a) -> a -> Identity (Chimera v a) #-} -- | Index a stream in a constant time. -- -- >>> ch = tabulate (^ 2) :: UChimera Word -- >>> index ch 9 -- 81 index :: G.Vector v a => Chimera v a -> Word -> a index (Chimera vs) 0 = G.unsafeHead (V.unsafeHead vs) index (Chimera vs) i = G.unsafeIndex (vs `V.unsafeIndex` (sgm + 1)) (word2int $ i - 1 `shiftL` sgm) where sgm :: Int sgm = fbs i - 1 - word2int (clz i) -- | Convert a stream to an infinite list. -- -- >>> ch = tabulate (^ 2) :: UChimera Word -- >>> take 10 (toList ch) -- [0,1,4,9,16,25,36,49,64,81] toList :: G.Vector v a => Chimera v a -> [a] toList (Chimera vs) = foldMap G.toList vs -- | Return an infinite repetion of a given vector. -- Throw an error on an empty vector. -- -- >>> ch = cycle (Data.Vector.fromList [4, 2]) :: VChimera Int -- >>> take 10 (toList ch) -- [4,2,4,2,4,2,4,2,4,2] cycle :: G.Vector v a => v a -> Chimera v a cycle vec = case l of 0 -> error "Data.Chimera.cycle: empty list" _ -> tabulate (G.unsafeIndex vec . word2int . (`rem` l)) where l = int2word $ G.length vec -- | Memoize a function: -- repeating calls to 'memoize' @f@ @n@ -- would compute @f@ @n@ only once -- and cache the result in 'VChimera'. -- This is just a shortcut for 'index' '.' 'tabulate'. -- -- prop> memoize f n = f n memoize :: (Word -> a) -> (Word -> a) memoize = index @V.Vector . tabulate -- | For a given @f@ memoize a recursive function 'fix' @f@, -- caching results in 'VChimera'. -- This is just a shortcut for 'index' '.' 'tabulateFix'. -- -- prop> memoizeFix f n = fix f n -- -- For example, imagine that we want to memoize -- : -- -- >>> fibo n = if n < 2 then fromIntegral n else fibo (n - 1) + fibo (n - 2) -- -- Can we find @fiboF@ such that @fibo@ = 'fix' @fiboF@? -- Just replace all recursive calls to @fibo@ with @f@: -- -- >>> fiboF f n = if n < 2 then fromIntegral n else f (n - 1) + f (n - 2) -- -- Now we are ready to use 'memoizeFix': -- -- >>> memoizeFix fiboF 10 -- 55 -- >>> memoizeFix fiboF 100 -- 354224848179261915075 memoizeFix :: ((Word -> a) -> Word -> a) -> (Word -> a) memoizeFix = index @V.Vector . tabulateFix -- | Map subvectors of a stream, using a given length-preserving function. mapSubvectors :: (G.Vector u a, G.Vector v b) => (u a -> v b) -> Chimera u a -> Chimera v b mapSubvectors f (Chimera bs) = Chimera (V.map f bs) -- | Zip subvectors from two streams, using a given length-preserving function. zipSubvectors :: (G.Vector u a, G.Vector v b, G.Vector w c) => (u a -> v b -> w c) -> Chimera u a -> Chimera v b -> Chimera w c zipSubvectors f (Chimera bs1) (Chimera bs2) = Chimera (V.zipWith f bs1 bs2)