{-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-} -- -- Module : StorableVector -- Copyright : (c) The University of Glasgow 2001, -- (c) David Roundy 2003-2005, -- (c) Simon Marlow 2005 -- (c) Don Stewart 2005-2006 -- (c) Bjorn Bringert 2006 -- (c) Spencer Janssen 2006 -- (c) Henning Thielemann 2008 -- -- -- License : BSD-style -- -- Maintainer : sjanssen@cse.unl.edu -- Stability : experimental -- Portability : portable, requires ffi and cpp -- Tested with : GHC 6.4.1 and Hugs March 2005 -- -- -- | A time and space-efficient implementation of vectors using -- packed arrays, suitable for high performance use, both in terms -- of large data quantities, or high speed requirements. Vectors -- are encoded as strict arrays, held in a 'ForeignPtr', -- and can be passed between C and Haskell with little effort. -- -- This module is intended to be imported @qualified@, to avoid name -- clashes with "Prelude" functions. eg. -- -- > import qualified Data.StorableVector as V -- -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use -- UArray by Simon Marlow. Rewritten to support slices and use -- ForeignPtr by David Roundy. Polished and extended by Don Stewart. -- Generalized to any Storable value by Spencer Janssen. module Data.StorableVector ( -- * The @Vector@ type Vector, -- * Introducing and eliminating 'Vector's empty, singleton, pack, unpack, packWith, unpackWith, -- * Basic interface cons, snoc, append, head, last, tail, init, null, length, viewL, viewR, -- * Transformating 'Vector's map, reverse, intersperse, transpose, -- * Reducing 'Vector's (folds) foldl, foldl', foldl1, foldl1', foldr, foldr1, -- ** Special folds concat, concatMap, any, all, maximum, minimum, -- * Building 'Vector's -- ** Scans scanl, scanl1, scanr, scanr1, -- ** Accumulating maps mapAccumL, mapAccumR, mapIndexed, -- ** Unfolding 'Vector's replicate, unfoldr, unfoldrN, -- * Substrings -- ** Breaking strings take, drop, splitAt, takeWhile, dropWhile, span, spanEnd, break, breakEnd, group, groupBy, inits, tails, -- ** Breaking into many substrings split, splitWith, tokens, -- ** Joining strings join, -- * Predicates isPrefixOf, isSuffixOf, -- * Searching 'Vector's -- ** Searching by equality elem, notElem, -- ** Searching with a predicate find, filter, -- * Indexing 'Vector's index, elemIndex, elemIndices, elemIndexEnd, findIndex, findIndices, count, findIndexOrEnd, -- * Zipping and unzipping 'Vector's zip, zipWith, unzip, copy, ) where import qualified Prelude as P import Prelude hiding (reverse,head,tail,last,init,null ,length,map,lines,foldl,foldr,unlines ,concat,any,take,drop,splitAt,takeWhile ,dropWhile,span,break,elem,filter,maximum ,minimum,all,concatMap,foldl1,foldr1 ,scanl,scanl1,scanr,scanr1 ,readFile,writeFile,appendFile,replicate ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem) import Data.StorableVector.Base import qualified Data.List as List import Control.Exception (assert) import Foreign.ForeignPtr import Foreign.Marshal.Array import Foreign.Ptr import Foreign.Storable (Storable(..)) import Data.Monoid (Monoid, mempty, mappend, mconcat) #if !defined(__GLASGOW_HASKELL__) import System.IO.Unsafe #endif #if defined(__GLASGOW_HASKELL__) import GHC.IOBase #endif -- ----------------------------------------------------------------------------- -- -- Useful macros, until we have bang patterns -- #define STRICT1(f) f _a | _a `seq` False = undefined #define STRICT2(f) f _a _b | _a `seq` _b `seq` False = undefined #define STRICT3(f) f _a _b _c | _a `seq` _b `seq` _c `seq` False = undefined #define STRICT4(f) f _a _b _c _d | _a `seq` _b `seq` _c `seq` _d `seq` False = undefined #define STRICT5(f) f _a _b _c _d e | _a `seq` _b `seq` _c `seq` _d `seq` _e `seq` False = undefined -- ----------------------------------------------------------------------------- instance (Storable a, Eq a) => Eq (Vector a) where (==) = eq instance (Storable a) => Monoid (Vector a) where mempty = empty mappend = append mconcat = concat -- | /O(n)/ Equality on the 'Vector' type. eq :: (Storable a, Eq a) => Vector a -> Vector a -> Bool eq a@(SV p s l) b@(SV p' s' l') | l /= l' = False -- short cut on length | p == p' && s == s' = True -- short cut for the same string | otherwise = unpack a == unpack b {-# INLINE eq #-} -- ----------------------------------------------------------------------------- -- Introducing and eliminating 'Vector's -- | /O(1)/ The empty 'Vector' empty :: (Storable a) => Vector a empty = unsafeCreate 0 $ const $ return () {-# NOINLINE empty #-} -- | /O(1)/ Construct a 'Vector' containing a single element singleton :: (Storable a) => a -> Vector a singleton c = unsafeCreate 1 $ \p -> poke p c {-# INLINE singleton #-} -- | /O(n)/ Convert a '[a]' into a 'Vector a'. -- pack :: (Storable a) => [a] -> Vector a pack str = unsafeCreate (P.length str) $ \p -> go p str where go _ [] = return () go p (x:xs) = poke p x >> go (p `advancePtr` 1) xs -- | /O(n)/ Converts a 'Vector a' to a '[a]'. unpack :: (Storable a) => Vector a -> [a] unpack = foldr (:) [] {-# INLINE unpack #-} ------------------------------------------------------------------------ -- | /O(n)/ Convert a list into a 'Vector' using a conversion function packWith :: (Storable b) => (a -> b) -> [a] -> Vector b packWith k str = unsafeCreate (P.length str) $ \p -> go p str where STRICT2(go) go _ [] = return () go p (x:xs) = poke p (k x) >> go (p `advancePtr` 1) xs -- less space than pokeElemOff {-# INLINE packWith #-} -- | /O(n)/ Convert a 'Vector' into a list using a conversion function unpackWith :: (Storable a) => (a -> b) -> Vector a -> [b] unpackWith _ (SV _ _ 0) = [] unpackWith k (SV ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> go (p `advancePtr` s) (l - 1) [] where STRICT3(go) go p 0 acc = peek p >>= \e -> return (k e : acc) go p n acc = peekElemOff p n >>= \e -> go p (n-1) (k e : acc) {-# INLINE unpackWith #-} -- --------------------------------------------------------------------- -- Basic interface -- | /O(1)/ Test whether a 'Vector' is empty. null :: Vector a -> Bool null (SV _ _ l) = assert (l >= 0) $ l <= 0 {-# INLINE null #-} -- --------------------------------------------------------------------- -- | /O(1)/ 'length' returns the length of a 'Vector' as an 'Int'. length :: Vector a -> Int length (SV _ _ l) = assert (l >= 0) $ l -- -- length/loop fusion. When taking the length of any fuseable loop, -- rewrite it as a foldl', and thus avoid allocating the result buffer -- worth around 10% in speed testing. -- #if defined(__GLASGOW_HASKELL__) {-# INLINE [1] length #-} #endif ------------------------------------------------------------------------ -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different -- complexity, as it requires a memcpy. cons :: (Storable a) => a -> Vector a -> Vector a cons c (SV x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do poke p c copyArray (p `advancePtr` 1) (f `advancePtr` s) (fromIntegral l) {-# INLINE cons #-} -- | /O(n)/ Append an element to the end of a 'Vector' snoc :: (Storable a) => Vector a -> a -> Vector a snoc (SV x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do copyArray (castPtr p) (f `advancePtr` s) l pokeElemOff p l c {-# INLINE snoc #-} -- | /O(1)/ Extract the first element of a 'Vector', which must be non-empty. -- An exception will be thrown in the case of an empty 'Vector'. head :: (Storable a) => Vector a -> a head (SV x s l) | l <= 0 = errorEmptyList "head" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekElemOff p s {-# INLINE head #-} -- | /O(1)/ Extract the elements after the head of a 'Vector', which must be non-empty. -- An exception will be thrown in the case of an empty 'Vector'. tail :: (Storable a) => Vector a -> Vector a tail (SV p s l) | l <= 0 = errorEmptyList "tail" | otherwise = SV p (s+1) (l-1) {-# INLINE tail #-} -- | /O(1)/ Extract the last element of a 'Vector', which must be finite and non-empty. -- An exception will be thrown in the case of an empty 'Vector'. last :: (Storable a) => Vector a -> a last ps@(SV x s l) | null ps = errorEmptyList "last" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekElemOff p (s+l-1) {-# INLINE last #-} -- | /O(1)/ Return all the elements of a 'Vector' except the last one. -- An exception will be thrown in the case of an empty 'Vector'. init :: Vector a -> Vector a init ps@(SV p s l) | null ps = errorEmptyList "init" | otherwise = SV p s (l-1) {-# INLINE init #-} -- | /O(n)/ Append two Vectors append :: (Storable a) => Vector a -> Vector a -> Vector a append xs ys | null xs = ys | null ys = xs | otherwise = concat [xs,ys] {-# INLINE append #-} -- --------------------------------------------------------------------- -- Transformations -- | /O(n)/ 'map' @f xs@ is the 'Vector' obtained by applying @f@ to each -- element of @xs@. map :: (Storable a, Storable b) => (a -> b) -> Vector a -> Vector b map f (SV fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> create len $ map_ 0 (a `advancePtr` s) where STRICT3(map_) map_ n p1 p2 | n >= len = return () | otherwise = do x <- peekElemOff p1 n pokeElemOff p2 n (f x) map_ (n+1) p1 p2 {-# INLINE map #-} -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. reverse :: (Storable a) => Vector a -> Vector a reverse (SV x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> sequence_ [peekElemOff (f `plusPtr` s) i >>= pokeElemOff p (l - i - 1) | i <- [0 .. l - 1]] -- | /O(n)/ The 'intersperse' function takes a element and a -- 'Vector' and \`intersperses\' that element between the elements of -- the 'Vector'. It is analogous to the intersperse function on -- Lists. intersperse :: (Storable a) => a -> Vector a -> Vector a intersperse c = pack . List.intersperse c . unpack -- | The 'transpose' function transposes the rows and columns of its -- 'Vector' argument. transpose :: (Storable a) => [Vector a] -> [Vector a] transpose ps = P.map pack (List.transpose (P.map unpack ps)) -- --------------------------------------------------------------------- -- Reducing 'Vector's -- | 'foldl', applied to a binary operator, a starting value (typically -- the left-identity of the operator), and a Vector, reduces the -- 'Vector' using the binary operator, from left to right. -- This function is subject to array fusion. foldl :: (Storable a) => (b -> a -> b) -> b -> Vector a -> b foldl f v (SV x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> lgo v (ptr `advancePtr` s) (ptr `advancePtr` (s+l)) where STRICT3(lgo) lgo z p q | p == q = return z | otherwise = do c <- peek p lgo (f z c) (p `advancePtr` 1) q {-# INLINE foldl #-} -- | 'foldl\'' is like 'foldl', but strict in the accumulator. -- Though actually foldl is also strict in the accumulator. foldl' :: (Storable a) => (b -> a -> b) -> b -> Vector a -> b foldl' = foldl {-# INLINE foldl' #-} -- | 'foldr', applied to a binary operator, a starting value -- (typically the right-identity of the operator), and a 'Vector', -- reduces the 'Vector' using the binary operator, from right to left. foldr :: (Storable a) => (a -> b -> b) -> b -> Vector a -> b foldr k z = let recurse = switchL z (\h t -> k h (recurse t)) in recurse {-# INLINE foldr #-} -- | 'foldl1' is a variant of 'foldl' that has no starting value -- argument, and thus must be applied to non-empty 'Vector's. -- This function is subject to array fusion. -- An exception will be thrown in the case of an empty 'Vector'. foldl1 :: (Storable a) => (a -> a -> a) -> Vector a -> a foldl1 f = switchL (errorEmptyList "foldl1") (foldl f) {-# INLINE foldl1 #-} -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. -- An exception will be thrown in the case of an empty 'Vector'. foldl1' :: (Storable a) => (a -> a -> a) -> Vector a -> a foldl1' f = switchL (errorEmptyList "foldl1'") (foldl' f) {-# INLINE foldl1' #-} -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, -- and thus must be applied to non-empty 'Vector's -- An exception will be thrown in the case of an empty 'Vector'. foldr1 :: (Storable a) => (a -> a -> a) -> Vector a -> a foldr1 f = switchR (errorEmptyList "foldr1") (flip (foldr f)) {-# INLINE foldr1 #-} -- --------------------------------------------------------------------- -- Special folds -- | /O(n)/ Concatenate a list of 'Vector's. concat :: (Storable a) => [Vector a] -> Vector a concat [] = empty concat [ps] = ps concat xs = unsafeCreate len $ \ptr -> go xs ptr where len = P.sum . P.map length $ xs STRICT2(go) go [] _ = return () go (SV p s l:ps) ptr = do withForeignPtr p $ \fp -> copyArray ptr (fp `advancePtr` s) l go ps (ptr `advancePtr` l) -- | Map a function over a 'Vector' and concatenate the results concatMap :: (Storable a, Storable b) => (a -> Vector b) -> Vector a -> Vector b concatMap f = concat . foldr ((:) . f) [] {-# INLINE concatMap #-} -- | /O(n)/ Applied to a predicate and a 'Vector', 'any' determines if -- any element of the 'Vector' satisfies the predicate. any :: (Storable a) => (a -> Bool) -> Vector a -> Bool any _ (SV _ _ 0) = False any f (SV x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> go (ptr `advancePtr` s) (ptr `advancePtr` (s+l)) where STRICT2(go) go p q | p == q = return False | otherwise = do c <- peek p if f c then return True else go (p `advancePtr` 1) q {-# INLINE any #-} -- | /O(n)/ Applied to a predicate and a 'Vector', 'all' determines -- if all elements of the 'Vector' satisfy the predicate. all :: (Storable a) => (a -> Bool) -> Vector a -> Bool all _ (SV _ _ 0) = True all f (SV x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> go (ptr `advancePtr` s) (ptr `advancePtr` (s+l)) where STRICT2(go) go p q | p == q = return True -- end of list | otherwise = do c <- peek p if f c then go (p `advancePtr` 1) q else return False {-# INLINE all #-} ------------------------------------------------------------------------ -- | /O(n)/ 'maximum' returns the maximum value from a 'Vector' -- This function will fuse. -- An exception will be thrown in the case of an empty 'Vector'. maximum :: (Storable a, Ord a) => Vector a -> a maximum = foldl1' max -- | /O(n)/ 'minimum' returns the minimum value from a 'Vector' -- This function will fuse. -- An exception will be thrown in the case of an empty 'Vector'. minimum :: (Storable a, Ord a) => Vector a -> a minimum = foldl1' min ------------------------------------------------------------------------ switchL :: Storable a => b -> (a -> Vector a -> b) -> Vector a -> b switchL n j x = if null x then n else j (unsafeHead x) (unsafeTail x) {-# INLINE switchL #-} switchR :: Storable a => b -> (Vector a -> a -> b) -> Vector a -> b switchR n j x = if null x then n else j (unsafeInit x) (unsafeLast x) {-# INLINE switchR #-} viewL :: Storable a => Vector a -> Maybe (a, Vector a) viewL = switchL Nothing (curry Just) {-# INLINE viewL #-} viewR :: Storable a => Vector a -> Maybe (Vector a, a) viewR = switchR Nothing (curry Just) {-# INLINE viewR #-} -- | The 'mapAccumL' function behaves like a combination of 'map' and -- 'foldl'; it applies a function to each element of a 'Vector', -- passing an accumulating parameter from left to right, and returning a -- final value of this accumulator together with the new list. mapAccumL :: (Storable a, Storable b) => (acc -> a -> (acc, b)) -> acc -> Vector a -> (acc, Vector b) mapAccumL f acc0 as0 = let (bs, Just (acc2, _)) = unfoldrN (length as0) (\(acc,as) -> fmap (\(asHead,asTail) -> let (acc1,b) = f acc asHead in (b, (acc1, asTail))) (viewL as)) (acc0,as0) in (acc2, bs) {-# INLINE mapAccumL #-} -- | The 'mapAccumR' function behaves like a combination of 'map' and -- 'foldr'; it applies a function to each element of a 'Vector', -- passing an accumulating parameter from right to left, and returning a -- final value of this accumulator together with the new 'Vector'. mapAccumR :: (Storable a, Storable b) => (acc -> a -> (acc, b)) -> acc -> Vector a -> (acc, Vector b) mapAccumR f acc0 as0 = let (bs, Just (acc2, _)) = unfoldlN (length as0) (\(acc,as) -> fmap (\(asInit,asLast) -> let (acc1,b) = f acc asLast in (b, (acc1, asInit))) (viewR as)) (acc0,as0) in (acc2, bs) {-# INLINE mapAccumR #-} -- | /O(n)/ map functions, provided with the index at each position mapIndexed :: (Storable a, Storable b) => (Int -> a -> b) -> Vector a -> Vector b mapIndexed f = snd . mapAccumL (\i e -> (i + 1, f i e)) 0 {-# INLINE mapIndexed #-} -- --------------------------------------------------------------------- -- Building 'Vector's -- | 'scanl' is similar to 'foldl', but returns a list of successive -- reduced values from the left. This function will fuse. -- -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] -- -- Note that -- -- > last (scanl f z xs) == foldl f z xs. scanl :: (Storable a, Storable b) => (a -> b -> a) -> a -> Vector b -> Vector a scanl f acc0 as0 = fst $ unfoldrN (succ (length as0)) (fmap $ \(acc,as) -> (acc, fmap (\(asHead,asTail) -> (f acc asHead, asTail)) (viewL as))) (Just (acc0, as0)) -- less efficient but much more comprehensible -- scanl f z ps = -- cons z (snd (mapAccumL (\acc a -> let b = f acc a in (b,b)) z ps)) -- n.b. haskell's List scan returns a list one bigger than the -- input, so we need to snoc here to get some extra space, however, -- it breaks map/up fusion (i.e. scanl . map no longer fuses) {-# INLINE scanl #-} -- | 'scanl1' is a variant of 'scanl' that has no starting value argument. -- This function will fuse. -- -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] scanl1 :: (Storable a) => (a -> a -> a) -> Vector a -> Vector a scanl1 f = switchL empty (scanl f) {-# INLINE scanl1 #-} -- | scanr is the right-to-left dual of scanl. scanr :: (Storable a, Storable b) => (a -> b -> b) -> b -> Vector a -> Vector b scanr f acc0 as0 = fst $ unfoldlN (succ (length as0)) (fmap $ \(acc,as) -> (acc, fmap (\(asInit,asLast) -> (f asLast acc, asInit)) (viewR as))) (Just (acc0, as0)) {-# INLINE scanr #-} -- | 'scanr1' is a variant of 'scanr' that has no starting value argument. scanr1 :: (Storable a) => (a -> a -> a) -> Vector a -> Vector a scanr1 f = switchR empty (flip (scanl f)) {-# INLINE scanr1 #-} -- --------------------------------------------------------------------- -- Unfolds and replicates -- | /O(n)/ 'replicate' @n x@ is a 'Vector' of length @n@ with @x@ -- the value of every element. -- replicate :: (Storable a) => Int -> a -> Vector a replicate w c | w <= 0 = empty | otherwise = fst $ unfoldrN w (const $ return (c, ())) () -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a -- 'Vector' from a seed value. The function takes the element and -- returns 'Nothing' if it is done producing the 'Vector or returns -- 'Just' @(a,b)@, in which case, @a@ is the next element in the 'Vector', -- and @b@ is the seed value for further production. -- -- Examples: -- -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 -- > == pack [0, 1, 2, 3, 4, 5] -- unfoldr :: (Storable b) => (a -> Maybe (b, a)) -> a -> Vector b unfoldr f = concat . unfoldChunk 32 64 where unfoldChunk n n' x = case unfoldrN n f x of (s, Nothing) -> s : [] (s, Just x') -> s : unfoldChunk n' (n+n') x' {-# INLINE unfoldr #-} -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a 'Vector' from a seed -- value. However, the length of the result is limited by the first -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' -- when the maximum length of the result is known. -- -- The following equation relates 'unfoldrN' and 'unfoldr': -- -- > fst (unfoldrN n f s) == take n (unfoldr f s) -- unfoldrN :: (Storable b) => Int -> (a -> Maybe (b, a)) -> a -> (Vector b, Maybe a) unfoldrN i f x0 | i < 0 = (empty, Just x0) | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0 where STRICT3(go) go p x n = if n == i then return (0, n, Just x) else case f x of Nothing -> return (0, n, Nothing) Just (w,x') -> do poke p w go (p `advancePtr` 1) x' (n+1) {-# INLINE unfoldrN #-} unfoldlN :: (Storable b) => Int -> (a -> Maybe (b, a)) -> a -> (Vector b, Maybe a) unfoldlN i f x0 | i < 0 = (empty, Just x0) | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go (p `advancePtr` i) x0 i where STRICT3(go) go p x n = if n == 0 then return (n, i, Just x) else case f x of Nothing -> return (n, i, Nothing) Just (w,x') -> let p' = p `advancePtr` (-1) in do poke p' w go p' x' (n-1) {-# INLINE unfoldlN #-} -- --------------------------------------------------------------------- -- Substrings -- | /O(1)/ 'take' @n@, applied to a 'Vector' @xs@, returns the prefix -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. take :: (Storable a) => Int -> Vector a -> Vector a take n ps@(SV x s l) | n <= 0 = empty | n >= l = ps | otherwise = SV x s n {-# INLINE take #-} -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ -- elements, or @[]@ if @n > 'length' xs@. drop :: (Storable a) => Int -> Vector a -> Vector a drop n ps@(SV x s l) | n <= 0 = ps | n >= l = empty | otherwise = SV x (s+n) (l-n) {-# INLINE drop #-} -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. splitAt :: (Storable a) => Int -> Vector a -> (Vector a, Vector a) splitAt n ps@(SV x s l) | n <= 0 = (empty, ps) | n >= l = (ps, empty) | otherwise = (SV x s n, SV x (s+n) (l-n)) {-# INLINE splitAt #-} -- | 'takeWhile', applied to a predicate @p@ and a 'Vector' @xs@, -- returns the longest prefix (possibly empty) of @xs@ of elements that -- satisfy @p@. takeWhile :: (Storable a) => (a -> Bool) -> Vector a -> Vector a takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps {-# INLINE takeWhile #-} -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. dropWhile :: (Storable a) => (a -> Bool) -> Vector a -> Vector a dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps {-# INLINE dropWhile #-} -- | 'break' @p@ is equivalent to @'span' ('not' . p)@. break :: (Storable a) => (a -> Bool) -> Vector a -> (Vector a, Vector a) break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps) {-# INLINE break #-} -- | 'breakEnd' behaves like 'break' but from the end of the 'Vector' -- -- breakEnd p == spanEnd (not.p) breakEnd :: (Storable a) => (a -> Bool) -> Vector a -> (Vector a, Vector a) breakEnd p ps = splitAt (findFromEndUntil p ps) ps -- | 'span' @p xs@ breaks the 'Vector' into two segments. It is -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ span :: (Storable a) => (a -> Bool) -> Vector a -> (Vector a, Vector a) span p ps = break (not . p) ps {-# INLINE span #-} -- | 'spanEnd' behaves like 'span' but from the end of the 'Vector'. -- We have -- -- > spanEnd (not.isSpace) "x y z" == ("x y ","z") -- -- and -- -- > spanEnd (not . isSpace) ps -- > == -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) -- spanEnd :: (Storable a) => (a -> Bool) -> Vector a -> (Vector a, Vector a) spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps -- | /O(n)/ Splits a 'Vector' into components delimited by -- separators, where the predicate returns True for a separator element. -- The resulting components do not contain the separators. Two adjacent -- separators result in an empty component in the output. eg. -- -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] -- > splitWith (=='a') [] == [] -- splitWith :: (Storable a) => (a -> Bool) -> Vector a -> [Vector a] splitWith _ (SV _ _ 0) = [] splitWith p ps = loop p ps where STRICT2(loop) loop q qs = chunk : switchL [] (\ _ t -> loop q t) rest where (chunk,rest) = break q qs {-# INLINE splitWith #-} -- | /O(n)/ Break a 'Vector' into pieces separated by the -- argument, consuming the delimiter. I.e. -- -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] -- > split 'a' "aXaXaXa" == ["","X","X","X"] -- > split 'x' "x" == ["",""] -- -- and -- -- > join [c] . split c == id -- > split == splitWith . (==) -- -- As for all splitting functions in this library, this function does -- not copy the substrings, it just constructs new 'Vector's that -- are slices of the original. -- split :: (Storable a, Eq a) => a -> Vector a -> [Vector a] split w v = splitWith (w==) v {-# INLINE split #-} -- | Like 'splitWith', except that sequences of adjacent separators are -- treated as a single separator. eg. -- -- > tokens (=='a') "aabbaca" == ["bb","c"] -- tokens :: (Storable a) => (a -> Bool) -> Vector a -> [Vector a] tokens f = P.filter (not.null) . splitWith f {-# INLINE tokens #-} -- | The 'group' function takes a 'Vector' and returns a list of -- 'Vector's such that the concatenation of the result is equal to the -- argument. Moreover, each sublist in the result contains only equal -- elements. For example, -- -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] -- -- It is a special case of 'groupBy', which allows the programmer to -- supply their own equality test. It is about 40% faster than -- /groupBy (==)/ group :: (Storable a, Eq a) => Vector a -> [Vector a] group xs = switchL [] (\ h _ -> let (ys, zs) = span (== h) xs in ys : group zs) xs -- | The 'groupBy' function is the non-overloaded version of 'group'. groupBy :: (Storable a) => (a -> a -> Bool) -> Vector a -> [Vector a] groupBy k xs = switchL [] (\ h t -> let n = 1 + findIndexOrEnd (not . k h) t in unsafeTake n xs : groupBy k (unsafeDrop n xs)) xs {-# INLINE groupBy #-} -- | /O(n)/ The 'join' function takes a 'Vector' and a list of -- 'Vector's and concatenates the list after interspersing the first -- argument between each element of the list. join :: (Storable a) => Vector a -> [Vector a] -> Vector a join s = concat . List.intersperse s {-# INLINE join #-} -- --------------------------------------------------------------------- -- Indexing 'Vector's -- | /O(1)/ 'Vector' index (subscript) operator, starting from 0. index :: (Storable a) => Vector a -> Int -> a index ps n | n < 0 = moduleError "index" ("negative index: " ++ show n) | n >= length ps = moduleError "index" ("index too large: " ++ show n ++ ", length = " ++ show (length ps)) | otherwise = ps `unsafeIndex` n {-# INLINE index #-} -- | /O(n)/ The 'elemIndex' function returns the index of the first -- element in the given 'Vector' which is equal to the query -- element, or 'Nothing' if there is no such element. -- This implementation uses memchr(3). elemIndex :: (Storable a, Eq a) => a -> Vector a -> Maybe Int elemIndex c (SV x s l) = inlinePerformIO $ withForeignPtr x $ \p -> go p (s + l) 0 where STRICT3(go) go p end i | i == end = return Nothing | otherwise = do e <- peekElemOff p i if c == e then return $ Just (i - s) else go p end (i + 1) {-# INLINE elemIndex #-} -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the -- element in the given 'Vector' which is equal to the query -- element, or 'Nothing' if there is no such element. The following -- holds: -- -- > elemIndexEnd c xs == -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) -- elemIndexEnd :: (Storable a, Eq a) => a -> Vector a -> Maybe Int elemIndexEnd ch (SV x s l) = inlinePerformIO $ withForeignPtr x $ \p -> go (p `advancePtr` s) (l-1) where STRICT2(go) go p i | i < 0 = return Nothing | otherwise = do ch' <- peekElemOff p i if ch == ch' then return $ Just i else go p (i-1) {-# INLINE elemIndexEnd #-} -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning -- the indices of all elements equal to the query element, in ascending order. -- This implementation uses memchr(3). elemIndices :: (Storable a, Eq a) => a -> Vector a -> [Int] elemIndices c ps = loop 0 ps where STRICT2(loop) loop n ps' = switchL [] (\ h t -> if c == h then n : loop (n+1) t else loop (n+1) t) ps' {-# INLINE elemIndices #-} -- | count returns the number of times its argument appears in the 'Vector' -- -- > count = length . elemIndices -- -- But more efficiently than using length on the intermediate list. count :: (Storable a, Eq a) => a -> Vector a -> Int count w sv = List.length $ elemIndices w sv {-# INLINE count #-} -- | The 'findIndex' function takes a predicate and a 'Vector' and -- returns the index of the first element in the 'Vector' -- satisfying the predicate. findIndex :: (Storable a) => (a -> Bool) -> Vector a -> Maybe Int findIndex k (SV x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `advancePtr` s) 0 where STRICT2(go) go ptr n | n >= l = return Nothing | otherwise = do w <- peek ptr if k w then return (Just n) else go (ptr `advancePtr` 1) (n+1) {-# INLINE findIndex #-} -- | The 'findIndices' function extends 'findIndex', by returning the -- indices of all elements satisfying the predicate, in ascending order. findIndices :: (Storable a) => (a -> Bool) -> Vector a -> [Int] findIndices p ps = loop 0 ps where STRICT2(loop) loop n qs = switchL [] (\ h t -> if p h then n : loop (n+1) t else loop (n+1) t) $ qs {-# INLINE findIndices #-} -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length -- of the string if no element is found, rather than Nothing. findIndexOrEnd :: (Storable a) => (a -> Bool) -> Vector a -> Int findIndexOrEnd k (SV x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `advancePtr` s) 0 where STRICT2(go) go ptr n | n >= l = return l | otherwise = do w <- peek ptr if k w then return n else go (ptr `advancePtr` 1) (n+1) {-# INLINE findIndexOrEnd #-} -- --------------------------------------------------------------------- -- Searching Vectors -- | /O(n)/ 'elem' is the 'Vector' membership predicate. elem :: (Storable a, Eq a) => a -> Vector a -> Bool elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True {-# INLINE elem #-} -- | /O(n)/ 'notElem' is the inverse of 'elem' notElem :: (Storable a, Eq a) => a -> Vector a -> Bool notElem c ps = not (elem c ps) {-# INLINE notElem #-} -- | /O(n)/ 'filter', applied to a predicate and a 'Vector', -- returns a 'Vector' containing those elements that satisfy the -- predicate. This function is subject to array fusion. filter :: (Storable a) => (a -> Bool) -> Vector a -> Vector a filter k ps@(SV x s l) | null ps = ps | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> let STRICT3(go) go end i j | i == end = return j | otherwise = do w <- peekElemOff f i if k w then do pokeElemOff p j w go end (i+1) (j + 1) else go end (i+1) j in go (s + l) s 0 {-# INLINE filter #-} -- | /O(n)/ The 'find' function takes a predicate and a 'Vector', -- and returns the first element in matching the predicate, or 'Nothing' -- if there is no such element. -- -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing -- find :: (Storable a) => (a -> Bool) -> Vector a -> Maybe a find f p = fmap (unsafeIndex p) (findIndex f p) {-# INLINE find #-} -- --------------------------------------------------------------------- -- Searching for substrings -- | /O(n)/ The 'isPrefixOf' function takes two 'Vector' and returns 'True' -- iff the first is a prefix of the second. isPrefixOf :: (Storable a, Eq a) => Vector a -> Vector a -> Bool isPrefixOf x@(SV _ _ l1) y@(SV _ _ l2) = l1 <= l2 && x `eq` unsafeTake l1 y -- | /O(n)/ The 'isSuffixOf' function takes two 'Vector's and returns 'True' -- iff the first is a suffix of the second. -- -- The following holds: -- -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y -- isSuffixOf :: (Storable a, Eq a) => Vector a -> Vector a -> Bool isSuffixOf x@(SV _ _ l1) y@(SV _ _ l2) = l1 <= l2 && x `eq` unsafeDrop (l2 - l1) y -- --------------------------------------------------------------------- -- Zipping -- | /O(n)/ 'zip' takes two 'Vector's and returns a list of -- corresponding pairs of elements. If one input 'Vector' is short, -- excess elements of the longer 'Vector' are discarded. This is -- equivalent to a pair of 'unpack' operations. zip :: (Storable a, Storable b) => Vector a -> Vector b -> [(a, b)] zip ps qs = maybe [] id $ do (ph,pt) <- viewL ps (qh,qt) <- viewL qs return ((ph,qh) : zip pt qt) -- | 'zipWith' generalises 'zip' by zipping with the function given as -- the first argument, instead of a tupling function. For example, -- @'zipWith' (+)@ is applied to two 'Vector's to produce the list of -- corresponding sums. zipWith :: (Storable a, Storable b, Storable c) => (a -> b -> c) -> Vector a -> Vector b -> Vector c zipWith f ps0 qs0 = fst $ unfoldrN (min (length ps0) (length qs0)) (\(ps,qs) -> do (ph,pt) <- viewL ps (qh,qt) <- viewL qs return (f ph qh, (pt,qt))) (ps0,qs0) -- zipWith f ps qs = pack $ List.zipWith f (unpack ps) (unpack qs) {-# INLINE zipWith #-} -- | /O(n)/ 'unzip' transforms a list of pairs of elements into a pair of -- 'Vector's. Note that this performs two 'pack' operations. unzip :: (Storable a, Storable b) => [(a, b)] -> (Vector a, Vector b) unzip ls = (pack (P.map fst ls), pack (P.map snd ls)) {-# INLINE unzip #-} -- --------------------------------------------------------------------- -- Special lists -- | /O(n)/ Return all initial segments of the given 'Vector', shortest first. inits :: (Storable a) => Vector a -> [Vector a] inits (SV x s l) = [SV x s n | n <- [0..l]] -- | /O(n)/ Return all final segments of the given 'Vector', longest first. tails :: (Storable a) => Vector a -> [Vector a] tails p = switchL [empty] (\ _ t -> p : tails t) p -- --------------------------------------------------------------------- -- ** Ordered 'Vector's -- --------------------------------------------------------------------- -- Low level constructors -- | /O(n)/ Make a copy of the 'Vector' with its own storage. -- This is mainly useful to allow the rest of the data pointed -- to by the 'Vector' to be garbage collected, for example -- if a large string has been read in, and only a small part of it -- is needed in the rest of the program. copy :: (Storable a) => Vector a -> Vector a copy (SV x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> copyArray p (f `advancePtr` s) (fromIntegral l) -- --------------------------------------------------------------------- -- Internal utilities -- Common up near identical calls to `error' to reduce the number -- constant strings created when compiled: errorEmptyList :: String -> a errorEmptyList fun = moduleError fun "empty Vector" {-# NOINLINE errorEmptyList #-} moduleError :: String -> String -> a moduleError fun msg = error ("Data.Vector." ++ fun ++ ':':' ':msg) {-# NOINLINE moduleError #-} -- Find from the end of the string using predicate findFromEndUntil :: (Storable a) => (a -> Bool) -> Vector a -> Int STRICT2(findFromEndUntil) findFromEndUntil f ps@(SV x s l) = if null ps then 0 else if f (last ps) then l else findFromEndUntil f (SV x s (l-1))