{-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-} -- -- Module : StorableVector -- Copyright : (c) The University of Glasgow 2001, -- (c) David Roundy 2003-2005, -- (c) Simon Marlow 2005 -- (c) Don Stewart 2005-2006 -- (c) Bjorn Bringert 2006 -- (c) Spencer Janssen 2006 -- (c) Henning Thielemann 2008 -- -- -- License : BSD-style -- -- Maintainer : sjanssen@cse.unl.edu -- Stability : experimental -- Portability : portable, requires ffi and cpp -- Tested with : GHC 6.4.1 and Hugs March 2005 -- -- -- | A time and space-efficient implementation of vectors using -- packed arrays, suitable for high performance use, both in terms -- of large data quantities, or high speed requirements. Vectors -- are encoded as strict arrays, held in a 'ForeignPtr', -- and can be passed between C and Haskell with little effort. -- -- This module is intended to be imported @qualified@, to avoid name -- clashes with "Prelude" functions. eg. -- -- > import qualified Data.StorableVector as V -- -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use -- UArray by Simon Marlow. Rewritten to support slices and use -- ForeignPtr by David Roundy. Polished and extended by Don Stewart. -- Generalized to any Storable value by Spencer Janssen. -- Chunky lazy stream and mutable access in ST monad by Henning Thieleman. module Data.StorableVector ( -- * The @Vector@ type Vector, -- * Introducing and eliminating 'Vector's empty, singleton, pack, unpack, packWith, unpackWith, -- * Basic interface cons, snoc, append, head, last, tail, init, null, length, viewL, viewR, -- * Transformating 'Vector's map, reverse, intersperse, transpose, -- * Reducing 'Vector's (folds) foldl, foldl', foldl1, foldl1', foldr, foldr1, -- ** Special folds concat, concatMap, any, all, maximum, minimum, -- * Building 'Vector's -- ** Scans scanl, scanl1, scanr, scanr1, -- ** Accumulating maps mapAccumL, mapAccumR, mapIndexed, -- ** Unfolding 'Vector's replicate, unfoldr, unfoldrN, -- * Substrings -- ** Breaking strings take, drop, splitAt, takeWhile, dropWhile, span, spanEnd, break, breakEnd, group, groupBy, inits, tails, -- ** Breaking into many substrings split, splitWith, tokens, -- ** Joining strings join, -- * Predicates isPrefixOf, isSuffixOf, -- * Searching 'Vector's -- ** Searching by equality elem, notElem, -- ** Searching with a predicate find, filter, -- * Indexing 'Vector's index, elemIndex, elemIndices, elemIndexEnd, findIndex, findIndices, count, findIndexOrEnd, -- * Zipping and unzipping 'Vector's zip, zipWith, unzip, copy, -- * IO hGet, hPut, readFile, writeFile, appendFile, ) where import qualified Prelude as P import Prelude hiding (reverse,head,tail,last,init,null ,length,map,lines,foldl,foldr,unlines ,concat,any,take,drop,splitAt,takeWhile ,dropWhile,span,break,elem,filter,maximum ,minimum,all,concatMap,foldl1,foldr1 ,scanl,scanl1,scanr,scanr1 ,readFile,writeFile,appendFile,replicate ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem) import Data.StorableVector.Base import qualified Data.List as List import Control.Exception (assert, bracket, ) import Foreign.ForeignPtr (withForeignPtr, ) import Foreign.Marshal.Array (advancePtr, copyArray, ) import Foreign.Ptr (Ptr, minusPtr, ) import Foreign.Storable (Storable(..)) import Data.Monoid (Monoid, mempty, mappend, mconcat, ) import System.IO (openBinaryFile, hClose, hFileSize, hGetBuf, hPutBuf, Handle, IOMode(..), ) import System.IO.Unsafe -- import GHC.IOBase -- ----------------------------------------------------------------------------- -- -- Useful macros, until we have bang patterns -- #define STRICT1(f) f _a | _a `seq` False = undefined #define STRICT2(f) f _a _b | _a `seq` _b `seq` False = undefined #define STRICT3(f) f _a _b _c | _a `seq` _b `seq` _c `seq` False = undefined #define STRICT4(f) f _a _b _c _d | _a `seq` _b `seq` _c `seq` _d `seq` False = undefined #define STRICT5(f) f _a _b _c _d e | _a `seq` _b `seq` _c `seq` _d `seq` _e `seq` False = undefined -- ----------------------------------------------------------------------------- instance (Storable a, Eq a) => Eq (Vector a) where (==) = eq instance (Storable a) => Monoid (Vector a) where mempty = empty mappend = append mconcat = concat -- | /O(n)/ Equality on the 'Vector' type. eq :: (Storable a, Eq a) => Vector a -> Vector a -> Bool eq a@(SV p s l) b@(SV p' s' l') | l /= l' = False -- short cut on length | p == p' && s == s' = True -- short cut for the same string | otherwise = unpack a == unpack b {-# INLINE eq #-} -- ----------------------------------------------------------------------------- -- Introducing and eliminating 'Vector's -- | /O(1)/ The empty 'Vector' empty :: (Storable a) => Vector a empty = unsafeCreate 0 $ const $ return () {-# NOINLINE empty #-} -- | /O(1)/ Construct a 'Vector' containing a single element singleton :: (Storable a) => a -> Vector a singleton c = unsafeCreate 1 $ \p -> poke p c {-# INLINE singleton #-} -- | /O(n)/ Convert a '[a]' into a 'Vector a'. -- pack :: (Storable a) => [a] -> Vector a pack str = unsafeCreate (P.length str) $ \p -> go p str where go _ [] = return () go p (x:xs) = poke p x >> go (p `advancePtr` 1) xs -- | /O(n)/ Converts a 'Vector a' to a '[a]'. unpack :: (Storable a) => Vector a -> [a] unpack = foldr (:) [] {-# INLINE unpack #-} ------------------------------------------------------------------------ -- | /O(n)/ Convert a list into a 'Vector' using a conversion function packWith :: (Storable b) => (a -> b) -> [a] -> Vector b packWith k str = unsafeCreate (P.length str) $ \p -> go p str where STRICT2(go) go _ [] = return () go p (x:xs) = poke p (k x) >> go (p `advancePtr` 1) xs -- less space than pokeElemOff {-# INLINE packWith #-} -- | /O(n)/ Convert a 'Vector' into a list using a conversion function unpackWith :: (Storable a) => (a -> b) -> Vector a -> [b] unpackWith _ (SV _ _ 0) = [] unpackWith k (SV ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> go (p `advancePtr` s) (l - 1) [] where STRICT3(go) go p 0 acc = peek p >>= \e -> return (k e : acc) go p n acc = peekElemOff p n >>= \e -> go p (n-1) (k e : acc) {-# INLINE unpackWith #-} -- --------------------------------------------------------------------- -- Basic interface -- | /O(1)/ Test whether a 'Vector' is empty. null :: Vector a -> Bool null (SV _ _ l) = assert (l >= 0) $ l <= 0 {-# INLINE null #-} -- --------------------------------------------------------------------- -- | /O(1)/ 'length' returns the length of a 'Vector' as an 'Int'. length :: Vector a -> Int length (SV _ _ l) = assert (l >= 0) $ l -- -- length/loop fusion. When taking the length of any fuseable loop, -- rewrite it as a foldl', and thus avoid allocating the result buffer -- worth around 10% in speed testing. -- #if defined(__GLASGOW_HASKELL__) {-# INLINE [1] length #-} #endif ------------------------------------------------------------------------ -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different -- complexity, as it requires a memcpy. cons :: (Storable a) => a -> Vector a -> Vector a cons c (SV x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do poke p c copyArray (p `advancePtr` 1) (f `advancePtr` s) (fromIntegral l) {-# INLINE cons #-} -- | /O(n)/ Append an element to the end of a 'Vector' snoc :: (Storable a) => Vector a -> a -> Vector a snoc (SV x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do copyArray p (f `advancePtr` s) l pokeElemOff p l c {-# INLINE snoc #-} -- | /O(1)/ Extract the first element of a 'Vector', which must be non-empty. -- An exception will be thrown in the case of an empty 'Vector'. head :: (Storable a) => Vector a -> a head (SV x s l) | l <= 0 = errorEmptyList "head" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekElemOff p s {-# INLINE head #-} -- | /O(1)/ Extract the elements after the head of a 'Vector', which must be non-empty. -- An exception will be thrown in the case of an empty 'Vector'. tail :: (Storable a) => Vector a -> Vector a tail (SV p s l) | l <= 0 = errorEmptyList "tail" | otherwise = SV p (s+1) (l-1) {-# INLINE tail #-} -- | /O(1)/ Extract the last element of a 'Vector', which must be finite and non-empty. -- An exception will be thrown in the case of an empty 'Vector'. last :: (Storable a) => Vector a -> a last ps@(SV x s l) | null ps = errorEmptyList "last" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekElemOff p (s+l-1) {-# INLINE last #-} -- | /O(1)/ Return all the elements of a 'Vector' except the last one. -- An exception will be thrown in the case of an empty 'Vector'. init :: Vector a -> Vector a init ps@(SV p s l) | null ps = errorEmptyList "init" | otherwise = SV p s (l-1) {-# INLINE init #-} -- | /O(n)/ Append two Vectors append :: (Storable a) => Vector a -> Vector a -> Vector a append xs ys | null xs = ys | null ys = xs | otherwise = concat [xs,ys] {-# INLINE append #-} -- --------------------------------------------------------------------- -- Transformations -- | /O(n)/ 'map' @f xs@ is the 'Vector' obtained by applying @f@ to each -- element of @xs@. map :: (Storable a, Storable b) => (a -> b) -> Vector a -> Vector b map f (SV fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> create len $ map_ 0 (a `advancePtr` s) where STRICT3(map_) map_ n p1 p2 | n >= len = return () | otherwise = do x <- peekElemOff p1 n pokeElemOff p2 n (f x) map_ (n+1) p1 p2 {-# INLINE map #-} -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. reverse :: (Storable a) => Vector a -> Vector a reverse (SV x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> sequence_ [peekElemOff (f `advancePtr` s) i >>= pokeElemOff p (l - i - 1) | i <- [0 .. l - 1]] -- | /O(n)/ The 'intersperse' function takes a element and a -- 'Vector' and \`intersperses\' that element between the elements of -- the 'Vector'. It is analogous to the intersperse function on -- Lists. intersperse :: (Storable a) => a -> Vector a -> Vector a intersperse c = pack . List.intersperse c . unpack -- | The 'transpose' function transposes the rows and columns of its -- 'Vector' argument. transpose :: (Storable a) => [Vector a] -> [Vector a] transpose ps = P.map pack (List.transpose (P.map unpack ps)) -- --------------------------------------------------------------------- -- Reducing 'Vector's -- | 'foldl', applied to a binary operator, a starting value (typically -- the left-identity of the operator), and a Vector, reduces the -- 'Vector' using the binary operator, from left to right. -- This function is subject to array fusion. foldl :: (Storable a) => (b -> a -> b) -> b -> Vector a -> b foldl f v (SV x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> lgo v (ptr `advancePtr` s) (ptr `advancePtr` (s+l)) where STRICT3(lgo) lgo z p q | p == q = return z | otherwise = do c <- peek p lgo (f z c) (p `advancePtr` 1) q {-# INLINE foldl #-} -- | 'foldl\'' is like 'foldl', but strict in the accumulator. -- Though actually foldl is also strict in the accumulator. foldl' :: (Storable a) => (b -> a -> b) -> b -> Vector a -> b foldl' = foldl {-# INLINE foldl' #-} -- | 'foldr', applied to a binary operator, a starting value -- (typically the right-identity of the operator), and a 'Vector', -- reduces the 'Vector' using the binary operator, from right to left. foldr :: (Storable a) => (a -> b -> b) -> b -> Vector a -> b foldr k z = let recurse = switchL z (\h t -> k h (recurse t)) in recurse {-# INLINE foldr #-} -- | 'foldl1' is a variant of 'foldl' that has no starting value -- argument, and thus must be applied to non-empty 'Vector's. -- This function is subject to array fusion. -- An exception will be thrown in the case of an empty 'Vector'. foldl1 :: (Storable a) => (a -> a -> a) -> Vector a -> a foldl1 f = switchL (errorEmptyList "foldl1") (foldl f) {-# INLINE foldl1 #-} -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. -- An exception will be thrown in the case of an empty 'Vector'. foldl1' :: (Storable a) => (a -> a -> a) -> Vector a -> a foldl1' f = switchL (errorEmptyList "foldl1'") (foldl' f) {-# INLINE foldl1' #-} -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, -- and thus must be applied to non-empty 'Vector's -- An exception will be thrown in the case of an empty 'Vector'. foldr1 :: (Storable a) => (a -> a -> a) -> Vector a -> a foldr1 f = switchR (errorEmptyList "foldr1") (flip (foldr f)) {-# INLINE foldr1 #-} -- --------------------------------------------------------------------- -- Special folds -- | /O(n)/ Concatenate a list of 'Vector's. concat :: (Storable a) => [Vector a] -> Vector a concat [] = empty concat [ps] = ps concat xs = unsafeCreate len $ \ptr -> go xs ptr where len = P.sum . P.map length $ xs STRICT2(go) go [] _ = return () go (SV p s l:ps) ptr = do withForeignPtr p $ \fp -> copyArray ptr (fp `advancePtr` s) l go ps (ptr `advancePtr` l) -- | Map a function over a 'Vector' and concatenate the results concatMap :: (Storable a, Storable b) => (a -> Vector b) -> Vector a -> Vector b concatMap f = concat . foldr ((:) . f) [] {-# INLINE concatMap #-} -- | /O(n)/ Applied to a predicate and a 'Vector', 'any' determines if -- any element of the 'Vector' satisfies the predicate. any :: (Storable a) => (a -> Bool) -> Vector a -> Bool any _ (SV _ _ 0) = False any f (SV x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> go (ptr `advancePtr` s) (ptr `advancePtr` (s+l)) where STRICT2(go) go p q | p == q = return False | otherwise = do c <- peek p if f c then return True else go (p `advancePtr` 1) q {-# INLINE any #-} -- | /O(n)/ Applied to a predicate and a 'Vector', 'all' determines -- if all elements of the 'Vector' satisfy the predicate. all :: (Storable a) => (a -> Bool) -> Vector a -> Bool all _ (SV _ _ 0) = True all f (SV x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> go (ptr `advancePtr` s) (ptr `advancePtr` (s+l)) where STRICT2(go) go p q | p == q = return True -- end of list | otherwise = do c <- peek p if f c then go (p `advancePtr` 1) q else return False {-# INLINE all #-} ------------------------------------------------------------------------ -- | /O(n)/ 'maximum' returns the maximum value from a 'Vector' -- This function will fuse. -- An exception will be thrown in the case of an empty 'Vector'. maximum :: (Storable a, Ord a) => Vector a -> a maximum = foldl1' max -- | /O(n)/ 'minimum' returns the minimum value from a 'Vector' -- This function will fuse. -- An exception will be thrown in the case of an empty 'Vector'. minimum :: (Storable a, Ord a) => Vector a -> a minimum = foldl1' min ------------------------------------------------------------------------ switchL :: Storable a => b -> (a -> Vector a -> b) -> Vector a -> b switchL n j x = if null x then n else j (unsafeHead x) (unsafeTail x) {-# INLINE switchL #-} switchR :: Storable a => b -> (Vector a -> a -> b) -> Vector a -> b switchR n j x = if null x then n else j (unsafeInit x) (unsafeLast x) {-# INLINE switchR #-} viewL :: Storable a => Vector a -> Maybe (a, Vector a) viewL = switchL Nothing (curry Just) {-# INLINE viewL #-} viewR :: Storable a => Vector a -> Maybe (Vector a, a) viewR = switchR Nothing (curry Just) {-# INLINE viewR #-} -- | The 'mapAccumL' function behaves like a combination of 'map' and -- 'foldl'; it applies a function to each element of a 'Vector', -- passing an accumulating parameter from left to right, and returning a -- final value of this accumulator together with the new list. mapAccumL :: (Storable a, Storable b) => (acc -> a -> (acc, b)) -> acc -> Vector a -> (acc, Vector b) mapAccumL f acc0 as0 = let (bs, Just (acc2, _)) = unfoldrN (length as0) (\(acc,as) -> fmap (\(asHead,asTail) -> let (acc1,b) = f acc asHead in (b, (acc1, asTail))) (viewL as)) (acc0,as0) in (acc2, bs) {-# INLINE mapAccumL #-} -- | The 'mapAccumR' function behaves like a combination of 'map' and -- 'foldr'; it applies a function to each element of a 'Vector', -- passing an accumulating parameter from right to left, and returning a -- final value of this accumulator together with the new 'Vector'. mapAccumR :: (Storable a, Storable b) => (acc -> a -> (acc, b)) -> acc -> Vector a -> (acc, Vector b) mapAccumR f acc0 as0 = let (bs, Just (acc2, _)) = unfoldlN (length as0) (\(acc,as) -> fmap (\(asInit,asLast) -> let (acc1,b) = f acc asLast in (b, (acc1, asInit))) (viewR as)) (acc0,as0) in (acc2, bs) {-# INLINE mapAccumR #-} -- | /O(n)/ map functions, provided with the index at each position mapIndexed :: (Storable a, Storable b) => (Int -> a -> b) -> Vector a -> Vector b mapIndexed f = snd . mapAccumL (\i e -> (i + 1, f i e)) 0 {-# INLINE mapIndexed #-} -- --------------------------------------------------------------------- -- Building 'Vector's -- | 'scanl' is similar to 'foldl', but returns a list of successive -- reduced values from the left. This function will fuse. -- -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] -- -- Note that -- -- > last (scanl f z xs) == foldl f z xs. scanl :: (Storable a, Storable b) => (a -> b -> a) -> a -> Vector b -> Vector a scanl f acc0 as0 = fst $ unfoldrN (succ (length as0)) (fmap $ \(acc,as) -> (acc, fmap (\(asHead,asTail) -> (f acc asHead, asTail)) (viewL as))) (Just (acc0, as0)) -- less efficient but much more comprehensible -- scanl f z ps = -- cons z (snd (mapAccumL (\acc a -> let b = f acc a in (b,b)) z ps)) -- n.b. haskell's List scan returns a list one bigger than the -- input, so we need to snoc here to get some extra space, however, -- it breaks map/up fusion (i.e. scanl . map no longer fuses) {-# INLINE scanl #-} -- | 'scanl1' is a variant of 'scanl' that has no starting value argument. -- This function will fuse. -- -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] scanl1 :: (Storable a) => (a -> a -> a) -> Vector a -> Vector a scanl1 f = switchL empty (scanl f) {-# INLINE scanl1 #-} -- | scanr is the right-to-left dual of scanl. scanr :: (Storable a, Storable b) => (a -> b -> b) -> b -> Vector a -> Vector b scanr f acc0 as0 = fst $ unfoldlN (succ (length as0)) (fmap $ \(acc,as) -> (acc, fmap (\(asInit,asLast) -> (f asLast acc, asInit)) (viewR as))) (Just (acc0, as0)) {-# INLINE scanr #-} -- | 'scanr1' is a variant of 'scanr' that has no starting value argument. scanr1 :: (Storable a) => (a -> a -> a) -> Vector a -> Vector a scanr1 f = switchR empty (flip (scanl f)) {-# INLINE scanr1 #-} -- --------------------------------------------------------------------- -- Unfolds and replicates -- | /O(n)/ 'replicate' @n x@ is a 'Vector' of length @n@ with @x@ -- the value of every element. -- replicate :: (Storable a) => Int -> a -> Vector a replicate w c | w <= 0 = empty | otherwise = fst $ unfoldrN w (const $ return (c, ())) () -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a -- 'Vector' from a seed value. The function takes the element and -- returns 'Nothing' if it is done producing the 'Vector or returns -- 'Just' @(a,b)@, in which case, @a@ is the next element in the 'Vector', -- and @b@ is the seed value for further production. -- -- Examples: -- -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 -- > == pack [0, 1, 2, 3, 4, 5] -- unfoldr :: (Storable b) => (a -> Maybe (b, a)) -> a -> Vector b unfoldr f = concat . unfoldChunk 32 64 where unfoldChunk n n' x = case unfoldrN n f x of (s, Nothing) -> s : [] (s, Just x') -> s : unfoldChunk n' (n+n') x' {-# INLINE unfoldr #-} -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a 'Vector' from a seed -- value. However, the length of the result is limited by the first -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' -- when the maximum length of the result is known. -- -- The following equation relates 'unfoldrN' and 'unfoldr': -- -- > fst (unfoldrN n f s) == take n (unfoldr f s) -- unfoldrN :: (Storable b) => Int -> (a -> Maybe (b, a)) -> a -> (Vector b, Maybe a) unfoldrN i f x0 | i < 0 = (empty, Just x0) | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0 where STRICT3(go) go p x n = if n == i then return (0, n, Just x) else case f x of Nothing -> return (0, n, Nothing) Just (w,x') -> do poke p w go (p `advancePtr` 1) x' (n+1) {-# INLINE unfoldrN #-} unfoldlN :: (Storable b) => Int -> (a -> Maybe (b, a)) -> a -> (Vector b, Maybe a) unfoldlN i f x0 | i < 0 = (empty, Just x0) | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go (p `advancePtr` i) x0 i where STRICT3(go) go p x n = if n == 0 then return (n, i, Just x) else case f x of Nothing -> return (n, i, Nothing) Just (w,x') -> let p' = p `advancePtr` (-1) in do poke p' w go p' x' (n-1) {-# INLINE unfoldlN #-} -- --------------------------------------------------------------------- -- Substrings -- | /O(1)/ 'take' @n@, applied to a 'Vector' @xs@, returns the prefix -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. take :: (Storable a) => Int -> Vector a -> Vector a take n ps@(SV x s l) | n <= 0 = empty | n >= l = ps | otherwise = SV x s n {-# INLINE take #-} -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ -- elements, or @[]@ if @n > 'length' xs@. drop :: (Storable a) => Int -> Vector a -> Vector a drop n ps@(SV x s l) | n <= 0 = ps | n >= l = empty | otherwise = SV x (s+n) (l-n) {-# INLINE drop #-} -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. splitAt :: (Storable a) => Int -> Vector a -> (Vector a, Vector a) splitAt n ps@(SV x s l) | n <= 0 = (empty, ps) | n >= l = (ps, empty) | otherwise = (SV x s n, SV x (s+n) (l-n)) {-# INLINE splitAt #-} -- | 'takeWhile', applied to a predicate @p@ and a 'Vector' @xs@, -- returns the longest prefix (possibly empty) of @xs@ of elements that -- satisfy @p@. takeWhile :: (Storable a) => (a -> Bool) -> Vector a -> Vector a takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps {-# INLINE takeWhile #-} -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. dropWhile :: (Storable a) => (a -> Bool) -> Vector a -> Vector a dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps {-# INLINE dropWhile #-} -- | 'break' @p@ is equivalent to @'span' ('not' . p)@. break :: (Storable a) => (a -> Bool) -> Vector a -> (Vector a, Vector a) break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps) {-# INLINE break #-} -- | 'breakEnd' behaves like 'break' but from the end of the 'Vector' -- -- breakEnd p == spanEnd (not.p) breakEnd :: (Storable a) => (a -> Bool) -> Vector a -> (Vector a, Vector a) breakEnd p ps = splitAt (findFromEndUntil p ps) ps -- | 'span' @p xs@ breaks the 'Vector' into two segments. It is -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ span :: (Storable a) => (a -> Bool) -> Vector a -> (Vector a, Vector a) span p ps = break (not . p) ps {-# INLINE span #-} -- | 'spanEnd' behaves like 'span' but from the end of the 'Vector'. -- We have -- -- > spanEnd (not.isSpace) "x y z" == ("x y ","z") -- -- and -- -- > spanEnd (not . isSpace) ps -- > == -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) -- spanEnd :: (Storable a) => (a -> Bool) -> Vector a -> (Vector a, Vector a) spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps -- | /O(n)/ Splits a 'Vector' into components delimited by -- separators, where the predicate returns True for a separator element. -- The resulting components do not contain the separators. Two adjacent -- separators result in an empty component in the output. eg. -- -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] -- > splitWith (=='a') [] == [] -- splitWith :: (Storable a) => (a -> Bool) -> Vector a -> [Vector a] splitWith _ (SV _ _ 0) = [] splitWith p ps = loop p ps where STRICT2(loop) loop q qs = chunk : switchL [] (\ _ t -> loop q t) rest where (chunk,rest) = break q qs {-# INLINE splitWith #-} -- | /O(n)/ Break a 'Vector' into pieces separated by the -- argument, consuming the delimiter. I.e. -- -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] -- > split 'a' "aXaXaXa" == ["","X","X","X"] -- > split 'x' "x" == ["",""] -- -- and -- -- > join [c] . split c == id -- > split == splitWith . (==) -- -- As for all splitting functions in this library, this function does -- not copy the substrings, it just constructs new 'Vector's that -- are slices of the original. -- split :: (Storable a, Eq a) => a -> Vector a -> [Vector a] split w v = splitWith (w==) v {-# INLINE split #-} -- | Like 'splitWith', except that sequences of adjacent separators are -- treated as a single separator. eg. -- -- > tokens (=='a') "aabbaca" == ["bb","c"] -- tokens :: (Storable a) => (a -> Bool) -> Vector a -> [Vector a] tokens f = P.filter (not.null) . splitWith f {-# INLINE tokens #-} -- | The 'group' function takes a 'Vector' and returns a list of -- 'Vector's such that the concatenation of the result is equal to the -- argument. Moreover, each sublist in the result contains only equal -- elements. For example, -- -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] -- -- It is a special case of 'groupBy', which allows the programmer to -- supply their own equality test. It is about 40% faster than -- /groupBy (==)/ group :: (Storable a, Eq a) => Vector a -> [Vector a] group xs = switchL [] (\ h _ -> let (ys, zs) = span (== h) xs in ys : group zs) xs -- | The 'groupBy' function is the non-overloaded version of 'group'. groupBy :: (Storable a) => (a -> a -> Bool) -> Vector a -> [Vector a] groupBy k xs = switchL [] (\ h t -> let n = 1 + findIndexOrEnd (not . k h) t in unsafeTake n xs : groupBy k (unsafeDrop n xs)) xs {-# INLINE groupBy #-} -- | /O(n)/ The 'join' function takes a 'Vector' and a list of -- 'Vector's and concatenates the list after interspersing the first -- argument between each element of the list. join :: (Storable a) => Vector a -> [Vector a] -> Vector a join s = concat . List.intersperse s {-# INLINE join #-} -- --------------------------------------------------------------------- -- Indexing 'Vector's -- | /O(1)/ 'Vector' index (subscript) operator, starting from 0. index :: (Storable a) => Vector a -> Int -> a index ps n | n < 0 = moduleError "index" ("negative index: " ++ show n) | n >= length ps = moduleError "index" ("index too large: " ++ show n ++ ", length = " ++ show (length ps)) | otherwise = ps `unsafeIndex` n {-# INLINE index #-} -- | /O(n)/ The 'elemIndex' function returns the index of the first -- element in the given 'Vector' which is equal to the query -- element, or 'Nothing' if there is no such element. -- This implementation uses memchr(3). elemIndex :: (Storable a, Eq a) => a -> Vector a -> Maybe Int elemIndex c (SV x s l) = inlinePerformIO $ withForeignPtr x $ \p -> go p (s + l) 0 where STRICT3(go) go p end i | i == end = return Nothing | otherwise = do e <- peekElemOff p i if c == e then return $ Just (i - s) else go p end (i + 1) {-# INLINE elemIndex #-} -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the -- element in the given 'Vector' which is equal to the query -- element, or 'Nothing' if there is no such element. The following -- holds: -- -- > elemIndexEnd c xs == -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) -- elemIndexEnd :: (Storable a, Eq a) => a -> Vector a -> Maybe Int elemIndexEnd ch (SV x s l) = inlinePerformIO $ withForeignPtr x $ \p -> go (p `advancePtr` s) (l-1) where STRICT2(go) go p i | i < 0 = return Nothing | otherwise = do ch' <- peekElemOff p i if ch == ch' then return $ Just i else go p (i-1) {-# INLINE elemIndexEnd #-} -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning -- the indices of all elements equal to the query element, in ascending order. -- This implementation uses memchr(3). elemIndices :: (Storable a, Eq a) => a -> Vector a -> [Int] elemIndices c ps = loop 0 ps where STRICT2(loop) loop n ps' = switchL [] (\ h t -> if c == h then n : loop (n+1) t else loop (n+1) t) ps' {-# INLINE elemIndices #-} -- | count returns the number of times its argument appears in the 'Vector' -- -- > count = length . elemIndices -- -- But more efficiently than using length on the intermediate list. count :: (Storable a, Eq a) => a -> Vector a -> Int count w sv = List.length $ elemIndices w sv {-# INLINE count #-} -- | The 'findIndex' function takes a predicate and a 'Vector' and -- returns the index of the first element in the 'Vector' -- satisfying the predicate. findIndex :: (Storable a) => (a -> Bool) -> Vector a -> Maybe Int findIndex k (SV x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `advancePtr` s) 0 where STRICT2(go) go ptr n | n >= l = return Nothing | otherwise = do w <- peek ptr if k w then return (Just n) else go (ptr `advancePtr` 1) (n+1) {-# INLINE findIndex #-} -- | The 'findIndices' function extends 'findIndex', by returning the -- indices of all elements satisfying the predicate, in ascending order. findIndices :: (Storable a) => (a -> Bool) -> Vector a -> [Int] findIndices p ps = loop 0 ps where STRICT2(loop) loop n qs = switchL [] (\ h t -> if p h then n : loop (n+1) t else loop (n+1) t) $ qs {-# INLINE findIndices #-} -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length -- of the string if no element is found, rather than Nothing. findIndexOrEnd :: (Storable a) => (a -> Bool) -> Vector a -> Int findIndexOrEnd k (SV x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `advancePtr` s) 0 where STRICT2(go) go ptr n | n >= l = return l | otherwise = do w <- peek ptr if k w then return n else go (ptr `advancePtr` 1) (n+1) {-# INLINE findIndexOrEnd #-} -- --------------------------------------------------------------------- -- Searching Vectors -- | /O(n)/ 'elem' is the 'Vector' membership predicate. elem :: (Storable a, Eq a) => a -> Vector a -> Bool elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True {-# INLINE elem #-} -- | /O(n)/ 'notElem' is the inverse of 'elem' notElem :: (Storable a, Eq a) => a -> Vector a -> Bool notElem c ps = not (elem c ps) {-# INLINE notElem #-} -- | /O(n)/ 'filter', applied to a predicate and a 'Vector', -- returns a 'Vector' containing those elements that satisfy the -- predicate. This function is subject to array fusion. filter :: (Storable a) => (a -> Bool) -> Vector a -> Vector a filter k ps@(SV x s l) | null ps = ps | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> let STRICT3(go) go end i j | i == end = return j | otherwise = do w <- peekElemOff f i if k w then do pokeElemOff p j w go end (i+1) (j + 1) else go end (i+1) j in go (s + l) s 0 {-# INLINE filter #-} -- | /O(n)/ The 'find' function takes a predicate and a 'Vector', -- and returns the first element in matching the predicate, or 'Nothing' -- if there is no such element. -- -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing -- find :: (Storable a) => (a -> Bool) -> Vector a -> Maybe a find f p = fmap (unsafeIndex p) (findIndex f p) {-# INLINE find #-} -- --------------------------------------------------------------------- -- Searching for substrings -- | /O(n)/ The 'isPrefixOf' function takes two 'Vector' and returns 'True' -- iff the first is a prefix of the second. isPrefixOf :: (Storable a, Eq a) => Vector a -> Vector a -> Bool isPrefixOf x@(SV _ _ l1) y@(SV _ _ l2) = l1 <= l2 && x `eq` unsafeTake l1 y -- | /O(n)/ The 'isSuffixOf' function takes two 'Vector's and returns 'True' -- iff the first is a suffix of the second. -- -- The following holds: -- -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y -- isSuffixOf :: (Storable a, Eq a) => Vector a -> Vector a -> Bool isSuffixOf x@(SV _ _ l1) y@(SV _ _ l2) = l1 <= l2 && x `eq` unsafeDrop (l2 - l1) y -- --------------------------------------------------------------------- -- Zipping -- | /O(n)/ 'zip' takes two 'Vector's and returns a list of -- corresponding pairs of elements. If one input 'Vector' is short, -- excess elements of the longer 'Vector' are discarded. This is -- equivalent to a pair of 'unpack' operations. zip :: (Storable a, Storable b) => Vector a -> Vector b -> [(a, b)] zip ps qs = maybe [] id $ do (ph,pt) <- viewL ps (qh,qt) <- viewL qs return ((ph,qh) : zip pt qt) -- | 'zipWith' generalises 'zip' by zipping with the function given as -- the first argument, instead of a tupling function. For example, -- @'zipWith' (+)@ is applied to two 'Vector's to produce the list of -- corresponding sums. zipWith :: (Storable a, Storable b, Storable c) => (a -> b -> c) -> Vector a -> Vector b -> Vector c zipWith f ps0 qs0 = fst $ unfoldrN (min (length ps0) (length qs0)) (\(ps,qs) -> do (ph,pt) <- viewL ps (qh,qt) <- viewL qs return (f ph qh, (pt,qt))) (ps0,qs0) -- zipWith f ps qs = pack $ List.zipWith f (unpack ps) (unpack qs) {-# INLINE zipWith #-} -- | /O(n)/ 'unzip' transforms a list of pairs of elements into a pair of -- 'Vector's. Note that this performs two 'pack' operations. unzip :: (Storable a, Storable b) => [(a, b)] -> (Vector a, Vector b) unzip ls = (pack (P.map fst ls), pack (P.map snd ls)) {-# INLINE unzip #-} -- --------------------------------------------------------------------- -- Special lists -- | /O(n)/ Return all initial segments of the given 'Vector', shortest first. inits :: (Storable a) => Vector a -> [Vector a] inits (SV x s l) = [SV x s n | n <- [0..l]] -- | /O(n)/ Return all final segments of the given 'Vector', longest first. tails :: (Storable a) => Vector a -> [Vector a] tails p = switchL [empty] (\ _ t -> p : tails t) p -- --------------------------------------------------------------------- -- ** Ordered 'Vector's -- --------------------------------------------------------------------- -- Low level constructors -- | /O(n)/ Make a copy of the 'Vector' with its own storage. -- This is mainly useful to allow the rest of the data pointed -- to by the 'Vector' to be garbage collected, for example -- if a large string has been read in, and only a small part of it -- is needed in the rest of the program. copy :: (Storable a) => Vector a -> Vector a copy (SV x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> copyArray p (f `advancePtr` s) (fromIntegral l) -- --------------------------------------------------------------------- -- IO -- | Outputs a 'Vector' to the specified 'Handle'. hPut :: (Storable a) => Handle -> Vector a -> IO () hPut h v = if null v then return () else let (fptr, s, l) = toForeignPtr v in withForeignPtr fptr $ \ ptr -> let ptrS = advancePtr ptr s ptrE = advancePtr ptrS l -- use advancePtr and minusPtr in order to respect alignment in hPutBuf h ptrS (minusPtr ptrE ptrS) -- | Read a 'Vector' directly from the specified 'Handle'. This -- is far more efficient than reading the characters into a list -- and then using 'pack'. -- hGet :: (Storable a) => Handle -> Int -> IO (Vector a) hGet _ 0 = return empty hGet h i = createAndTrim i $ \p -> let elemType :: Ptr a -> a elemType _ = undefined sizeOfElem = sizeOf (elemType p) in fmap (flip div sizeOfElem) $ hGetBuf h p (i * sizeOfElem) -- | Read an entire file strictly into a 'Vector'. This is far more -- efficient than reading the characters into a 'String' and then using -- 'pack'. It also may be more efficient than opening the file and -- reading it using hGet. Files are read using 'binary mode' on Windows. -- readFile :: (Storable a) => FilePath -> IO (Vector a) readFile f = bracket (openBinaryFile f ReadMode) hClose (\h -> hGet h . fromIntegral =<< hFileSize h) -- | Write a 'Vector' to a file. writeFile :: (Storable a) => FilePath -> Vector a -> IO () writeFile f txt = bracket (openBinaryFile f WriteMode) hClose (\h -> hPut h txt) -- | Append a 'Vector' to a file. appendFile :: (Storable a) => FilePath -> Vector a -> IO () appendFile f txt = bracket (openBinaryFile f AppendMode) hClose (\h -> hPut h txt) -- --------------------------------------------------------------------- -- Internal utilities -- Common up near identical calls to `error' to reduce the number -- constant strings created when compiled: errorEmptyList :: String -> a errorEmptyList fun = moduleError fun "empty Vector" {-# NOINLINE errorEmptyList #-} moduleError :: String -> String -> a moduleError fun msg = error ("Data.StorableVector." ++ fun ++ ':':' ':msg) {-# NOINLINE moduleError #-} -- Find from the end of the string using predicate findFromEndUntil :: (Storable a) => (a -> Bool) -> Vector a -> Int STRICT2(findFromEndUntil) findFromEndUntil f ps@(SV x s l) = if null ps then 0 else if f (last ps) then l else findFromEndUntil f (SV x s (l-1))