Data/Vector/Storable/ByteString.hs

{-# LANGUAGE CPP
           , NoImplicitPrelude
           , BangPatterns
           , NamedFieldPuns
           , MagicHash
           , UnboxedTuples
  #-}

#if __GLASGOW_HASKELL__ >= 704
{-# LANGUAGE Trustworthy #-}
#endif

{-# OPTIONS_GHC -fno-warn-name-shadowing #-}

-- |
-- Module      : Data.Vector.Storable.ByteString
-- Copyright   : (c) The University of Glasgow 2001,
--               (c) David Roundy 2003-2005,
--               (c) Simon Marlow 2005
--               (c) Bjorn Bringert 2006
--               (c) Don Stewart 2005-2008
--               (c) Bas van Dijk 2011
--
--               Array fusion code:
--               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
--               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
--
-- License     : BSD-style
--
-- Maintainer  : Bas van Dijk <v.dijk.bas@gmail.com>
-- Stability   : experimental
--
-- A time and space-efficient implementation of byte vectors using
-- vectors of Word8, suitable for high performance use, both in terms
-- of large data quantities, or high speed requirements. Byte vectors
-- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
-- and can be passed between C and Haskell with little effort.
--
-- This module is intended to be imported @qualified@, to avoid name
-- clashes with "Prelude" functions.  eg.
--
-- > import qualified Data.Vector.Storable.ByteString as B
--
-- Original GHC implementation by Bryan O\'Sullivan.
-- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
-- Rewritten to support slices and use 'ForeignPtr' by David Roundy.
-- Polished and extended by Don Stewart.
-- Redefined ByteString as a Vector (from the vector package) of Word8s
-- by Bas van Dijk
--

module Data.Vector.Storable.ByteString (

        -- * The ByteString type
        ByteString,             -- instances: Eq, Ord, Show, Read, Data, Typeable, Monoid

        -- * Introducing and eliminating ByteStrings
        empty,                  -- :: ByteString
        singleton,              -- :: Word8   -> ByteString
        pack,                   -- :: [Word8] -> ByteString
        unpack,                 -- :: ByteString -> [Word8]

        -- * Basic interface
        cons,                   -- :: Word8 -> ByteString -> ByteString
        snoc,                   -- :: ByteString -> Word8 -> ByteString
        append,                 -- :: ByteString -> ByteString -> ByteString

        head,                   -- :: ByteString -> Word8
        uncons,                 -- :: ByteString -> Maybe (Word8, ByteString)

        last,                   -- :: ByteString -> Word8
        tail,                   -- :: ByteString -> ByteString
        init,                   -- :: ByteString -> ByteString
        null,                   -- :: ByteString -> Bool
        length,                 -- :: ByteString -> Int

        -- * Transforming ByteStrings
        map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
        reverse,                -- :: ByteString -> ByteString
        intersperse,            -- :: Word8 -> ByteString -> ByteString

        intercalate,            -- :: ByteString -> [ByteString] -> ByteString
        transpose,              -- :: [ByteString] -> [ByteString]

        -- * Reducing ByteStrings (folds)
        foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
        foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
        foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
        foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8

        foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
        foldr',                 -- :: (Word8 -> a -> a) -> a -> ByteString -> a
        foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
        foldr1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8

        -- ** Special folds
        concat,                 -- :: [ByteString] -> ByteString
        concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
        any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
        all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
        maximum,                -- :: ByteString -> Word8
        minimum,                -- :: ByteString -> Word8

        -- * Building ByteStrings
        -- ** Scans
        scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
        scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
        scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
        scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString

        -- ** Accumulating maps
        mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
        mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)

        -- ** Generating and unfolding ByteStrings
        replicate,              -- :: Int -> Word8 -> ByteString
        unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
        unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)

        -- * Substrings

        -- ** Breaking strings
        take,                   -- :: Int -> ByteString -> ByteString
        drop,                   -- :: Int -> ByteString -> ByteString
        splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
        takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
        dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
        span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
        spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
        break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
        breakEnd,               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
        group,                  -- :: ByteString -> [ByteString]
        groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
        inits,                  -- :: ByteString -> [ByteString]
        tails,                  -- :: ByteString -> [ByteString]

        -- ** Breaking into many substrings
        split,                  -- :: Word8 -> ByteString -> [ByteString]
        splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]

        -- * Predicates
        isPrefixOf,             -- :: ByteString -> ByteString -> Bool
        isSuffixOf,             -- :: ByteString -> ByteString -> Bool
        isInfixOf,              -- :: ByteString -> ByteString -> Bool

        -- ** Search for arbitrary substrings
        breakSubstring,         -- :: ByteString -> ByteString -> (ByteString,ByteString)
        findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
        findSubstrings,         -- :: ByteString -> ByteString -> [Int]

        -- * Searching ByteStrings

        -- ** Searching by equality
        elem,                   -- :: Word8 -> ByteString -> Bool
        notElem,                -- :: Word8 -> ByteString -> Bool

        -- ** Searching with a predicate
        find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
        filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
        partition,              -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)

        -- * Indexing ByteStrings
        index,                  -- :: ByteString -> Int -> Word8
        elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
        elemIndices,            -- :: Word8 -> ByteString -> [Int]
        elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
        findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
        findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
        count,                  -- :: Word8 -> ByteString -> Int

        -- * Zipping and unzipping ByteStrings
        zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
        zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
        unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)

        -- * Ordered ByteStrings
        sort,                   -- :: ByteString -> ByteString

        -- * Low level conversions
        -- ** Copying ByteStrings
        copy,                   -- :: ByteString -> ByteString

        -- ** Packing CStrings and pointers
        packCString,            -- :: CString -> IO ByteString
        packCStringLen,         -- :: CStringLen -> IO ByteString

        -- ** Using ByteStrings as CStrings
        useAsCString,           -- :: ByteString -> (CString    -> IO a) -> IO a
        useAsCStringLen,        -- :: ByteString -> (CStringLen -> IO a) -> IO a

        -- * I\/O with ByteStrings

        -- ** Standard input and output
        getLine,                -- :: IO ByteString
        getContents,            -- :: IO ByteString
        putStr,                 -- :: ByteString -> IO ()
        putStrLn,               -- :: ByteString -> IO ()
        interact,               -- :: (ByteString -> ByteString) -> IO ()

        -- ** Files
        readFile,               -- :: FilePath -> IO ByteString
        writeFile,              -- :: FilePath -> ByteString -> IO ()
        appendFile,             -- :: FilePath -> ByteString -> IO ()

        -- ** I\/O with Handles
        hGetLine,               -- :: Handle -> IO ByteString
        hGetContents,           -- :: Handle -> IO ByteString
        hGet,                   -- :: Handle -> Int -> IO ByteString
        hGetSome,               -- :: Handle -> Int -> IO ByteString
        hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
        hPut,                   -- :: Handle -> ByteString -> IO ()
        hPutNonBlocking,        -- :: Handle -> ByteString -> IO ByteString
        hPutStr,                -- :: Handle -> ByteString -> IO ()
        hPutStrLn,              -- :: Handle -> ByteString -> IO ()

        breakByte
  ) where


--------------------------------------------------------------------------------
-- Imports
--------------------------------------------------------------------------------

-- from base:
import Control.Exception     ( finally )
import Control.Monad         ( (>>=), (=<<), (>>), return, void, when )
import Data.Bool             ( Bool(False, True), not, otherwise, (||) )
import Data.Char             ( ord )
import Data.Eq               ( (==), (/=) )
import Data.Function         ( (.), flip )
import Data.Functor          ( fmap )
import Data.IORef            ( readIORef, writeIORef )
import Data.Maybe            ( Maybe(Nothing, Just), isJust, listToMaybe )
import Data.Ord              ( min, (<), (>), (>=) )
import Data.Tuple            ( fst, snd )
import Data.Word             ( Word8 )
import Foreign.C.String      ( CString, CStringLen )
import Foreign.C.Types       ( CSize )
import Foreign.ForeignPtr    ( newForeignPtr, withForeignPtr )
import Foreign.Marshal.Alloc ( allocaBytes, mallocBytes
                             , reallocBytes, finalizerFree
                             )
import Foreign.Marshal.Array ( allocaArray )
import Foreign.Marshal.Utils ( copyBytes )
import Foreign.Ptr           ( nullPtr, plusPtr, minusPtr, castPtr )
import Foreign.Storable      ( peek, poke
                             , peekElemOff, pokeElemOff
                             , peekByteOff, pokeByteOff
                             , sizeOf
                             )
import Prelude               ( (+),(-),(*), ($), ($!)
                             , Int, fromIntegral, String, error, undefined
                             )
import System.IO             ( IO, FilePath, Handle
                             , IOMode(ReadMode, WriteMode, AppendMode)
                             , stdin, stdout
                             , hGetBuf, hGetBufSome, hGetBufNonBlocking
                             , hPutBuf, hPutBufNonBlocking, hFileSize
                             , withBinaryFile, hClose
                             )
import System.IO.Unsafe      ( unsafePerformIO )
import System.IO.Error       ( ioError, mkIOError, illegalOperationErrorType )
import Text.Show             ( show, showsPrec )

import qualified Data.List as L
    ( intersperse, transpose, map, reverse )
import Data.List ( (++) )

import GHC.IO.Handle.Internals ( wantReadableHandle_, flushCharReadBuffer
                               , ioe_EOF
                               )
import GHC.IO.Handle.Types     ( Handle__(..) )
import GHC.IO.Buffer           ( RawBuffer, Buffer(Buffer), bufRaw, bufL, bufR
                               , withRawBuffer, isEmptyBuffer, readWord8Buf
                               )
import GHC.IO.BufferedIO as Buffered ( fillReadBuffer )

-- from primitive:
import Control.Monad.Primitive ( unsafeInlineIO )

-- from vector:
import qualified Data.Vector.Storable as VS

-- from vector-bytestring (this package):
import Data.Vector.Storable.ByteString.Internal
    ( ByteString
    , create, unsafeCreate, createAndTrim, createAndTrim'
    , mallocByteString
    , memcpy, memset, memchr, memcmp
    , c_strlen, c_count, c_intersperse
    )


--------------------------------------------------------------------------------
-- * Introducing and eliminating 'ByteString's
--------------------------------------------------------------------------------

-- | /O(1)/ The empty 'ByteString'
empty :: ByteString
empty = VS.empty
{-# INLINE empty #-}

-- | /O(1)/ Convert a 'Word8' into a 'ByteString'
singleton :: Word8 -> ByteString
singleton = VS.singleton
{-# INLINE [1] singleton #-} -- Inline [1] for intercalate rule

-- | /O(n)/ Convert a @['Word8']@ into a 'ByteString'.
--
-- For applications with large numbers of string literals, pack can be a
-- bottleneck. In such cases, consider using packAddress (GHC only).
pack :: [Word8] -> ByteString
pack = VS.fromList
{-# INLINE pack #-}

-- | /O(n)/ Converts a 'ByteString' to a @['Word8']@.
unpack :: ByteString -> [Word8]
unpack = VS.toList
{-# INLINE unpack #-}


--------------------------------------------------------------------------------
--  * Basic interface
--------------------------------------------------------------------------------

-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
-- complexity, as it requires a memcpy.
cons :: Word8 -> ByteString -> ByteString
cons = VS.cons
{-# INLINE cons #-}

-- | /O(n)/ Append a byte to the end of a 'ByteString'
snoc :: ByteString -> Word8 -> ByteString
snoc = VS.snoc
{-# INLINE snoc #-}

-- | /O(n)/ Append two ByteStrings
append :: ByteString -> ByteString -> ByteString
append = (VS.++)
{-# INLINE append #-}

-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
-- An exception will be thrown in the case of an empty ByteString.
head :: ByteString -> Word8
head = VS.head
{-# INLINE head #-}

-- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
-- An exception will be thrown in the case of an empty ByteString.
tail :: ByteString -> ByteString
tail = VS.tail
{-# INLINE tail #-}

-- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing
-- if it is empty.
uncons :: ByteString -> Maybe (Word8, ByteString)
uncons v
    | VS.length v == 0 = Nothing
    | otherwise        = Just (VS.unsafeHead v, VS.unsafeTail v)
{-# INLINE uncons #-}

-- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
-- An exception will be thrown in the case of an empty ByteString.
last :: ByteString -> Word8
last = VS.last
{-# INLINE last #-}

-- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
-- An exception will be thrown in the case of an empty ByteString.
init :: ByteString -> ByteString
init = VS.init
{-# INLINE init #-}

-- | /O(1)/ Test whether a ByteString is empty.
null :: ByteString -> Bool
null = VS.null
{-# INLINE null #-}

-- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
length :: ByteString -> Int
length = VS.length
{-# INLINE length #-}


--------------------------------------------------------------------------------
-- * Transforming ByteStrings
--------------------------------------------------------------------------------

-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
-- element of @xs@. This function is subject to array fusion.
map :: (Word8 -> Word8) -> ByteString -> ByteString
map = VS.map
{-# INLINE map #-}

-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
reverse :: ByteString -> ByteString
reverse = VS.reverse
{-# INLINE reverse #-}

-- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
-- 'ByteString' and \`intersperses\' that byte between the elements of
-- the 'ByteString'.  It is analogous to the intersperse function on
-- Lists.
intersperse :: Word8 -> ByteString -> ByteString
intersperse c v
    | l < 2     = v
    | otherwise = unsafeCreate (2*l-1) $ \p' -> withForeignPtr fp $ \p ->
                    c_intersperse p' p (fromIntegral l) c
    where
      (fp, l) = VS.unsafeToForeignPtr0 v
{-# INLINE intersperse #-}

-- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of
-- 'ByteString's and concatenates the list after interspersing the first
-- argument between each element of the list.
intercalate :: ByteString -> [ByteString] -> ByteString
intercalate s = VS.concat . L.intersperse s
{-# INLINE [1] intercalate #-}

{-# RULES
"ByteString specialise intercalate c -> intercalateByte" forall c s1 s2 .
    intercalate (singleton c) (s1 : s2 : []) = intercalateWithByte c s1 s2
  #-}

-- | /O(n)/ intercalateWithByte. An efficient way to join to two ByteStrings
-- with a char. Around 4 times faster than the generalised join.
intercalateWithByte :: Word8 -> ByteString -> ByteString -> ByteString
intercalateWithByte c v1 v2 =
    unsafeCreate (l1 + l2 + 1) $ \ptr ->
      withForeignPtr fp1 $ \p1 ->
        withForeignPtr fp2 $ \p2 -> do
          memcpy ptr p1 (fromIntegral l1)
          poke (ptr `plusPtr` l1) c
          memcpy (ptr `plusPtr` (l1 + 1)) p2 (fromIntegral l2)
        where
          (fp1, l1) = VS.unsafeToForeignPtr0 v1
          (fp2, l2) = VS.unsafeToForeignPtr0 v2
{-# INLINE intercalateWithByte #-}

-- | The 'transpose' function transposes the rows and columns of its
-- 'ByteString' argument.
transpose :: [ByteString] -> [ByteString]
transpose = L.map VS.fromList
          . L.transpose
          . L.map VS.toList
{-# INLINE transpose #-}


--------------------------------------------------------------------------------
-- * Reducing 'ByteString's (folds)
--------------------------------------------------------------------------------

foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
foldl = VS.foldl
{-# INLINE foldl #-}

foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
foldl' = VS.foldl'
{-# INLINE foldl' #-}

foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldl1 = VS.foldl1
{-# INLINE foldl1 #-}

foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldl1' = VS.foldl1'
{-# INLINE foldl1' #-}

foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
foldr = VS.foldr
{-# INLINE foldr #-}

foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
foldr' = VS.foldr'
{-# INLINE foldr' #-}

foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldr1 = VS.foldr1
{-# INLINE foldr1 #-}

foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldr1' = VS.foldr1'
{-# INLINE foldr1' #-}

--------------------------------------------------------------------------------
-- ** Special folds

-- | /O(n)/ Concatenate a list of ByteStrings.
concat :: [ByteString] -> ByteString
concat = VS.concat
{-# INLINE concat #-}

-- | Map a function over a 'ByteString' and concatenate the results
concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
concatMap = VS.concatMap
{-# INLINE concatMap #-}

-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
-- any element of the 'ByteString' satisfies the predicate.
any :: (Word8 -> Bool) -> ByteString -> Bool
any = VS.any
{-# INLINE any #-}

-- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
-- if all elements of the 'ByteString' satisfy the predicate.
all :: (Word8 -> Bool) -> ByteString -> Bool
all = VS.all
{-# INLINE all #-}

-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
-- This function will fuse.
-- An exception will be thrown in the case of an empty ByteString.
maximum :: ByteString -> Word8
maximum = VS.maximum
{-# INLINE maximum #-}

-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
-- This function will fuse.
-- An exception will be thrown in the case of an empty ByteString.
minimum :: ByteString -> Word8
minimum = VS.minimum
{-# INLINE minimum #-}


--------------------------------------------------------------------------------
-- * Building ByteStrings
--------------------------------------------------------------------------------

--------------------------------------------------------------------------------
-- ** Scans

-- | 'scanl' is similar to 'foldl', but returns a list of successive
-- reduced values from the left. This function will fuse.
--
-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
--
-- Note that
--
-- > last (scanl f z xs) == foldl f z xs.
--
scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
scanl = VS.scanl
{-# INLINE scanl #-}

-- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
-- This function will fuse.
--
-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
scanl1 = VS.scanl1
{-# INLINE scanl1 #-}

-- | scanr is the right-to-left dual of scanl.
scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
scanr = VS.scanr
{-# INLINE scanr #-}

-- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
scanr1 = VS.scanr1
{-# INLINE scanr1 #-}

--------------------------------------------------------------------------------
-- ** Accumulating maps

-- | The 'mapAccumL' function behaves like a combination of 'map' and
-- 'foldl'; it applies a function to each element of a ByteString,
-- passing an accumulating parameter from left to right, and returning a
-- final value of this accumulator together with the new list.
mapAccumL :: (acc -> Word8 -> (acc, Word8))
          -> acc -> ByteString -> (acc, ByteString)
mapAccumL f acc v = unsafeInlineIO $ withForeignPtr fp $ \p -> do
    fp' <- mallocByteString l
    withForeignPtr fp' $ \p' ->
      let go !a !m
            | m >= l = return (a, VS.unsafeFromForeignPtr0 fp' l)
            | otherwise = do
                x <- peekByteOff p m
                let (a', y) = f a x
                pokeByteOff p' m y
                go a' (m+1)
      in go acc 0
          where
            (fp, l) = VS.unsafeToForeignPtr0 v
{-# INLINE mapAccumL #-}

-- | The 'mapAccumR' function behaves like a combination of 'map' and
-- 'foldr'; it applies a function to each element of a ByteString,
-- passing an accumulating parameter from right to left, and returning a
-- final value of this accumulator together with the new ByteString.
mapAccumR :: (acc -> Word8 -> (acc, Word8))
          -> acc -> ByteString -> (acc, ByteString)
mapAccumR f acc v = unsafeInlineIO $ withForeignPtr fp $ \p -> do
    fp' <- mallocByteString l
    withForeignPtr fp' $ \p' ->
      let go !a !m
            | m < 0     = return (a, VS.unsafeFromForeignPtr0 fp' l)
            | otherwise = do
                x <- peekByteOff p m
                let (a', y) = f a x
                pokeByteOff p' m y
                go a' (m-1)
      in go acc (l-1)
          where
            (fp, l) = VS.unsafeToForeignPtr0 v
{-# INLINE mapAccumR #-}

--------------------------------------------------------------------------------
-- ** Generating and unfolding ByteStrings

-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
-- the value of every element. The following holds:
--
-- > replicate n x = unfoldr n (\u -> Just (u,u)) x
--
replicate :: Int -> Word8 -> ByteString
replicate = VS.replicate
{-# INLINE replicate #-}

-- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'
-- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a
-- ByteString from a seed value.  The function takes the element and
-- returns 'Nothing' if it is done producing the ByteString or returns
-- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
-- and @b@ is the seed value for further production.
--
-- Examples:
--
-- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
-- > == pack [0, 1, 2, 3, 4, 5]
--
unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
unfoldr = VS.unfoldr
{-# INLINE unfoldr #-}

-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
-- value.  However, the length of the result is limited by the first
-- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
-- when the maximum length of the result is known.
--
-- The following equation relates 'unfoldrN' and 'unfoldr':
--
-- > snd (unfoldrN n f s) == take n (unfoldr f s)
--
-- /Note: this function has a different type than @Data.Vector.Storable.'VS.unfoldrN'@!/
unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
unfoldrN i f x0
    | i < 0     = (VS.empty, Just x0)
    | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
  where
    go !p !x !n =
        case f x of
          Nothing      -> return (0, n, Nothing)
          Just (w, x')
              | n == i    -> return (0, n, Just x)
              | otherwise -> do
                  poke p w
                  go (p `plusPtr` 1) x' (n+1)
{-# INLINE unfoldrN #-}


--------------------------------------------------------------------------------
-- * Substrings
--------------------------------------------------------------------------------

--------------------------------------------------------------------------------
-- ** Breaking strings

-- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
take :: Int -> ByteString -> ByteString
take = VS.take
{-# INLINE take #-}

-- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
-- elements, or @[]@ if @n > 'length' xs@.
drop  :: Int -> ByteString -> ByteString
drop = VS.drop
{-# INLINE drop #-}

-- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
splitAt :: Int -> ByteString -> (ByteString, ByteString)
splitAt = VS.splitAt
{-# INLINE splitAt #-}

-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
-- returns the longest prefix (possibly empty) of @xs@ of elements that
-- satisfy @p@.
takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
takeWhile f v = VS.unsafeTake (findIndexOrEnd (not . f) v) v
{-# INLINE takeWhile #-}

-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
dropWhile f v = VS.unsafeDrop (findIndexOrEnd (not . f) v) v
{-# INLINE dropWhile #-}

-- | 'span' @p xs@ breaks the ByteString into two segments. It is
-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
span = VS.span
{-# INLINE [1] span #-}

{-# RULES
"ByteString specialise span (x==)" forall x.
    span ((==) x) = spanByte x
"ByteString specialise span (==x)" forall x.
    span (==x) = spanByte x
  #-}

-- | 'spanByte' breaks its ByteString argument at the first
-- occurence of a byte other than its argument. It is more efficient
-- than 'span (==)'
--
-- > span  (=='c') "abcd" == spanByte 'c' "abcd"
--
spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
spanByte c v = unsafeInlineIO $ withForeignPtr fp $ \p ->
  let go !i | i >= l    = return (v, VS.empty)
            | otherwise = do
                c' <- peekByteOff p i
                if c /= c'
                  then return (VS.unsafeTake i v, VS.unsafeDrop i v)
                  else go (i+1)
  in go 0
      where
        (fp, l) = VS.unsafeToForeignPtr0 v
{-# INLINE spanByte #-}

-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
-- We have
--
-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
--
-- and
--
-- > spanEnd (not . isSpace) v
-- >    ==
-- > let (x,y) = span (not.isSpace) (reverse v) in (reverse y, reverse x)
--
spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd p v = VS.splitAt (findFromEndUntil (not . p) v) v
{-# INLINE spanEnd #-}

-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
--
-- Under GHC, a rewrite rule will transform break (==) into a
-- call to the specialised breakByte:
--
-- > break ((==) x) = breakByte x
-- > break (==x) = breakByte x
--
break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
break p v = (VS.unsafeTake n v, VS.unsafeDrop n v)
    where
      !n = findIndexOrEnd p v
{-# INLINE [1] break #-}

{-# RULES
"ByteString specialise break (x==)" forall x.
    break ((==) x) = breakByte x
"ByteString specialise break (==x)" forall x.
    break (==x) = breakByte x
  #-}

-- | 'breakByte' breaks its ByteString argument at the first occurence
-- of the specified byte. It is more efficient than 'break' as it is
-- implemented with @memchr(3)@. I.e.
--
-- > break (=='c') "abcd" == breakByte 'c' "abcd"
--
breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
breakByte x v = case VS.elemIndex x v of
    Nothing -> (v, VS.empty)
    Just n  -> (VS.unsafeTake n v, VS.unsafeDrop n v)
{-# INLINE breakByte #-}

-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
--
-- breakEnd p == spanEnd (not.p)
breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
breakEnd p v = VS.splitAt (findFromEndUntil p v) v
{-# INLINE breakEnd #-}

-- | The 'group' function takes a ByteString and returns a list of
-- ByteStrings such that the concatenation of the result is equal to the
-- argument.  Moreover, each sublist in the result contains only equal
-- elements.  For example,
--
-- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
--
-- It is a special case of 'groupBy', which allows the programmer to
-- supply their own equality test. It is about 40% faster than
-- /groupBy (==)/
group :: ByteString -> [ByteString]
group v
    | VS.null v = []
    | otherwise = ys : group zs
    where
      (ys, zs) = spanByte (VS.unsafeHead v) v
{-# INLINE group #-}

-- | The 'groupBy' function is the non-overloaded version of 'group'.
groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
groupBy  k v
    | VS.null v = []
    | otherwise = VS.unsafeTake n v : groupBy k (VS.unsafeDrop n v)
    where
      n = 1 + findIndexOrEnd (not . k (VS.unsafeHead v)) (VS.unsafeTail v)
{-# INLINE groupBy #-}

-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
inits :: ByteString -> [ByteString]
inits v = [VS.unsafeTake s v | s <- [0..VS.length v]]
{-# INLINE inits #-}

-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
tails :: ByteString -> [ByteString]
tails v | VS.null v = [VS.empty]
        | otherwise = v : tails (VS.unsafeTail v)
{-# INLINE tails #-}

--------------------------------------------------------------------------------
-- ** Breaking into many substrings

-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
-- argument, consuming the delimiter. I.e.
--
-- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
-- > split 'a'  "aXaXaXa"    == ["","X","X","X",""]
-- > split 'x'  "x"          == ["",""]
--
-- and
--
-- > intercalate [c] . split c == id
-- > split == splitWith . (==)
--
-- As for all splitting functions in this library, this function does
-- not copy the substrings, it just constructs new 'ByteStrings' that
-- are slices of the original.
--
split :: Word8 -> ByteString -> [ByteString]
split w v | l == 0    = []
          | otherwise = go 0
    where
      (fp, l) = VS.unsafeToForeignPtr0 v

      withFP = unsafeInlineIO . withForeignPtr fp

      go !n | q == nullPtr = vec l'    : []
            | otherwise    = vec (i-n) : go (i+1)
          where
            vec = VS.unsafeFromForeignPtr fp n

            q = withFP $ \p -> memchr (p `plusPtr` n) w (fromIntegral l')
            i = withFP $ \p -> return (q `minusPtr` p)

            l' = l - n
{-# INLINE split #-}

-- | /O(n)/ Splits a 'ByteString' into components delimited by
-- separators, where the predicate returns True for a separator element.
-- The resulting components do not contain the separators.  Two adjacent
-- separators result in an empty component in the output.  eg.
--
-- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
-- > splitWith (=='a') []        == []
--
splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
splitWith pred v
    | l == 0    = []
    | otherwise = splitWith0 0 l
    where
      (fp, l) = VS.unsafeToForeignPtr0 v

      splitWith0 !off !len = unsafeInlineIO $ withForeignPtr fp $ \p ->
        let vec = VS.unsafeFromForeignPtr fp off
            go !idx
                | idx >= len = return [vec idx]
                | otherwise = do
                    let sepIx = off + idx
                    w <- peekElemOff p sepIx
                    if pred w
                      then return (vec idx : splitWith0 (sepIx+1) (len-idx-1))
                      else go (idx+1)
        in go 0
{-# INLINE [1] splitWith #-}

{-# RULES
"ByteString specialise splitWith (x==)" forall x.
    splitWith ((==) x) = split x
"ByteString specialise splitWith (==x)" forall x.
    splitWith (==x) = split x
  #-}

--------------------------------------------------------------------------------
-- * Predicates
--------------------------------------------------------------------------------

-- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
-- iff the first is a prefix of the second.
isPrefixOf :: ByteString -> ByteString -> Bool
isPrefixOf v1 v2
    | l1 == 0   = True
    | l2 < l1   = False
    | otherwise = unsafeInlineIO $
                    withForeignPtr fp1 $ \p1 ->
                      withForeignPtr fp2 $ \p2 -> do
                        i <- memcmp p1 p2 (fromIntegral l1)
                        return $! i == 0
    where
      (fp1, l1) = VS.unsafeToForeignPtr0 v1
      (fp2, l2) = VS.unsafeToForeignPtr0 v2
{-# INLINE isPrefixOf #-}

-- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
-- iff the first is a suffix of the second.
--
-- The following holds:
--
-- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
--
-- However, the real implemenation uses memcmp to compare the end of the
-- string only, with no reverse required..
isSuffixOf :: ByteString -> ByteString -> Bool
isSuffixOf v1 v2
    | l1 == 0   = True
    | l2 < l1   = False
    | otherwise = unsafeInlineIO $
                    withForeignPtr fp1 $ \p1 ->
                      withForeignPtr fp2 $ \p2 -> do
        i <- memcmp p1 (p2 `plusPtr` (l2 - l1)) (fromIntegral l1)
        return $! i == 0
    where
      (fp1, l1) = VS.unsafeToForeignPtr0 v1
      (fp2, l2) = VS.unsafeToForeignPtr0 v2
{-# INLINE isSuffixOf #-}

-- | Check whether one string is a substring of another. @isInfixOf
-- p s@ is equivalent to @not (null (findSubstrings p s))@.
isInfixOf :: ByteString -> ByteString -> Bool
isInfixOf v1 v2 = isJust (findSubstring v1 v2)
{-# INLINE isInfixOf #-}

--------------------------------------------------------------------------------
--  ** Search for arbitrary substrings

-- | Break a string on a substring, returning a pair of the part of the
-- string prior to the match, and the rest of the string.
--
-- The following relationships hold:
--
-- > break (== c) l == breakSubstring (singleton c) l
--
-- and:
--
-- > findSubstring s l ==
-- >    if null s then Just 0
-- >              else case breakSubstring s l of
-- >                       (x,y) | null y    -> Nothing
-- >                             | otherwise -> Just (length x)
--
-- For example, to tokenise a string, dropping delimiters:
--
-- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t)
-- >     where (h,t) = breakSubstring x y
--
-- To skip to the first occurence of a string:
--
-- > snd (breakSubstring x y)
--
-- To take the parts of a string before a delimiter:
--
-- > fst (breakSubstring x y)
--
breakSubstring :: ByteString -- ^ String to search for
               -> ByteString -- ^ String to search in
               -> (ByteString, ByteString)
                             -- ^ Head and tail of string broken at substring
breakSubstring pat src = search 0 src
  where
    search !n !s
        | VS.null s          = (src, VS.empty) -- not found
        | pat `isPrefixOf` s = (VS.take n src, s)
        | otherwise          = search (n+1) (VS.unsafeTail s)
{-# INLINE breakSubstring #-}

-- | Get the first index of a substring in another string,
--   or 'Nothing' if the string is not found.
--   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
findSubstring :: ByteString -- ^ String to search for.
              -> ByteString -- ^ String to seach in.
              -> Maybe Int
findSubstring f i = listToMaybe (findSubstrings f i)
{-# INLINE findSubstring #-}

{-# DEPRECATED findSubstring "findSubstring is deprecated in favour of breakSubstring." #-}

-- | Find the indexes of all (possibly overlapping) occurances of a
-- substring in a string.
--
findSubstrings :: ByteString -- ^ String to search for.
               -> ByteString -- ^ String to seach in.
               -> [Int]
findSubstrings pat str
    | VS.null pat = [0 .. VS.length str]
    | otherwise   = search 0 str
  where
    search !ix !s
        | VS.null s          = []
        | pat `isPrefixOf` s = ix : ixs
        | otherwise          =      ixs
        where
          ixs = search (ix+1) (VS.unsafeTail s)
{-# INLINE findSubstrings #-}

{-# DEPRECATED findSubstrings "findSubstrings is deprecated in favour of breakSubstring." #-}


--------------------------------------------------------------------------------
-- * Searching ByteStrings
--------------------------------------------------------------------------------

--------------------------------------------------------------------------------
-- ** Searching by equality

-- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
elem :: Word8 -> ByteString -> Bool
elem = VS.elem
{-# INLINE elem #-}

-- | /O(n)/ 'notElem' is the inverse of 'elem'
notElem :: Word8 -> ByteString -> Bool
notElem = VS.notElem
{-# INLINE notElem #-}

--------------------------------------------------------------------------------
-- ** Searching with a predicate

-- | /O(n)/ The 'find' function takes a predicate and a ByteString,
-- and returns the first element in matching the predicate, or 'Nothing'
-- if there is no such element.
--
-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
--
find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
find = VS.find
{-# INLINE find #-}

-- | /O(n)/ 'filter', applied to a predicate and a ByteString,
-- returns a ByteString containing those characters that satisfy the
-- predicate. This function is subject to array fusion.
filter :: (Word8 -> Bool) -> ByteString -> ByteString
filter = VS.filter
{-# INLINE filter #-}

-- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns
-- the pair of ByteStrings with elements which do and do not satisfy the
-- predicate, respectively; i.e.,
--
-- > partition p bs == (filter p xs, filter (not . p) xs)
--
partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
partition = VS.partition
{-# INLINE partition #-}


--------------------------------------------------------------------------------
-- * Indexing ByteStrings
--------------------------------------------------------------------------------

-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
index :: ByteString -> Int -> Word8
index = (VS.!)
{-# INLINE index #-}

-- | /O(n)/ The 'elemIndex' function returns the index of the first
-- element in the given 'ByteString' which is equal to the query
-- element, or 'Nothing' if there is no such element.
elemIndex :: Word8 -> ByteString -> Maybe Int
elemIndex = VS.elemIndex
{-# INLINE elemIndex #-}

-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
-- the indices of all elements equal to the query element, in ascending order.
-- This implementation uses memchr(3).
elemIndices :: Word8 -> ByteString -> [Int]
elemIndices x v = VS.toList $ VS.elemIndices x v
{-# INLINE elemIndices #-}

-- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
-- element in the given 'ByteString' which is equal to the query
-- element, or 'Nothing' if there is no such element. The following
-- holds:
--
-- > elemIndexEnd c xs ==
-- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
--
elemIndexEnd :: Word8 -> ByteString -> Maybe Int
elemIndexEnd x v = unsafeInlineIO $ withForeignPtr fp $ \p ->
    let go !i | i < 0     = return Nothing
              | otherwise = do
                  x' <- peekByteOff p i
                  if x == x'
                    then return $ Just i
                    else go (i-1)
    in go (l - 1)
        where
          (fp, l) = VS.unsafeToForeignPtr0 v
{-# INLINE elemIndexEnd #-}

-- | The 'findIndex' function takes a predicate and a 'ByteString' and
-- returns the index of the first element in the ByteString
-- satisfying the predicate.
findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
findIndex = VS.findIndex
{-# INLINE findIndex #-}

-- | The 'findIndices' function extends 'findIndex', by returning the
-- indices of all elements satisfying the predicate, in ascending order.
findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
findIndices pred v = VS.toList $ VS.findIndices pred v
{-# INLINE findIndices #-}

-- | count returns the number of times its argument appears in the ByteString
--
-- > count = length . elemIndices
--
-- But more efficiently than using length on the intermediate list.
count :: Word8 -> ByteString -> Int
count x v = unsafeInlineIO $ withForeignPtr fp $ \p ->
    fmap fromIntegral $ c_count p (fromIntegral l) x
        where
          (fp, l) = VS.unsafeToForeignPtr0 v
{-# INLINE count #-}


--------------------------------------------------------------------------------
-- * Zipping and unzipping ByteStrings
--------------------------------------------------------------------------------

-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
-- corresponding pairs of bytes. If one input ByteString is short,
-- excess elements of the longer ByteString are discarded. This is
-- equivalent to a pair of 'unpack' operations.
zip :: ByteString -> ByteString -> [(Word8,Word8)]
zip v1 v2
    | VS.null v1 || VS.null v2 = []
    | otherwise = (VS.unsafeHead v1, VS.unsafeHead v2)
                : zip (VS.unsafeTail v1) (VS.unsafeTail v2)
{-# INLINE zip #-}

-- | 'zipWith' generalises 'zip' by zipping with the function given as
-- the first argument, instead of a tupling function.  For example,
-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
-- corresponding sums.
zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
zipWith f = go
    where
      go v1 v2
        | VS.null v1 || VS.null v2 = []
        | otherwise = f (VS.unsafeHead v1) (VS.unsafeHead v2)
                    : go (VS.unsafeTail v1) (VS.unsafeTail v2)
{-# INLINE [1] zipWith #-}

{-# RULES
"ByteString specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
    zipWith f p q = unpack (zipWith' f p q)
  #-}

-- | A specialised version of zipWith for the common case of a
-- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
-- are used to automatically covert zipWith into zipWith' when a pack is
-- performed on the result of zipWith.
zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
zipWith' f v1 v2 =
    unsafeInlineIO $
      withForeignPtr fp1 $ \p1 ->
         withForeignPtr fp2 $ \p2 ->
           create len $ \p ->
             let go !n
                     | n >= len = return ()
                     | otherwise = do
                         x <- peekByteOff p1 n
                         y <- peekByteOff p2 n
                         pokeByteOff p n (f x y)
                         go (n+1)
             in go 0
    where
      len = min l1 l2

      (fp1, l1) = VS.unsafeToForeignPtr0 v1
      (fp2, l2) = VS.unsafeToForeignPtr0 v2
{-# INLINE zipWith' #-}

-- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
-- ByteStrings. Note that this performs two 'pack' operations.
unzip :: [(Word8, Word8)] -> (ByteString, ByteString)
unzip ls = ( VS.fromList $ L.map fst ls
           , VS.fromList $ L.map snd ls
           )
{-# INLINE unzip #-}


--------------------------------------------------------------------------------
-- * Ordered ByteStrings
--------------------------------------------------------------------------------

-- | /O(n)/ Sort a ByteString efficiently, using counting sort.
sort :: ByteString -> ByteString
sort v = unsafeCreate l $ \p' -> allocaArray 256 $ \counts -> do

    -- Initialize counts array to all 0s:
    void $ memset (castPtr counts)
                  0
                  (256 * fromIntegral (sizeOf (undefined :: CSize)))

    -- Count occurrences:
    withForeignPtr fp $ \p ->
        let go !i | i == l    = return ()
                  | otherwise = do
                      k <- fromIntegral `fmap` peekElemOff p i
                      x <- peekElemOff counts k
                      pokeElemOff counts k (x + 1)
                      go (i + 1)
        in go 0

    -- Fill result array:
    let go 256 _   = return ()
        go !i !ptr = do
          n <- peekElemOff counts i
          when (n /= 0) $ void $ memset ptr (fromIntegral i) n
          go (i + 1) (ptr `plusPtr` fromIntegral n)
    go 0 p'
  where
    (fp, l) = VS.unsafeToForeignPtr0 v
{-# INLINE sort #-}


--------------------------------------------------------------------------------
-- * Low level conversions
--------------------------------------------------------------------------------

--------------------------------------------------------------------------------
-- ** Copying ByteStrings

-- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
-- This is mainly useful to allow the rest of the data pointed
-- to by the 'ByteString' to be garbage collected, for example
-- if a large string has been read in, and only a small part of it
-- is needed in the rest of the program.
--
copy :: ByteString -> ByteString
copy v = unsafeCreate l $ \p' ->
            withForeignPtr fp $ \p ->
                memcpy p' p (fromIntegral l)
    where
      (fp, l) = VS.unsafeToForeignPtr0 v
{-# INLINE copy #-}

--------------------------------------------------------------------------------
--  ** Packing 'CString's and pointers

-- | /O(n)./ Construct a new @ByteString@ from a @CString@. The
-- resulting @ByteString@ is an immutable copy of the original
-- @CString@, and is managed on the Haskell heap. The original
-- @CString@ must be null terminated.
packCString :: CString -> IO ByteString
packCString cstr = do
    len <- c_strlen cstr
    packCStringLen (cstr, fromIntegral len)
{-# INLINE packCString #-}

-- | /O(n)./ Construct a new @ByteString@ from a @CStringLen@. The
-- resulting @ByteString@ is an immutable copy of the original @CStringLen@.
-- The @ByteString@ is a normal Haskell value and will be managed on the
-- Haskell heap.
packCStringLen :: CStringLen -> IO ByteString
packCStringLen (cstr, len) | len >= 0 = create len $ \p ->
    memcpy p (castPtr cstr) (fromIntegral len)
packCStringLen (_, len) =
    moduleError "packCStringLen" ("negative length: " ++ show len)
{-# INLINE packCStringLen #-}

--------------------------------------------------------------------------------
-- ** Using ByteStrings as 'CString's

-- | /O(n) construction/ Use a @ByteString@ with a function requiring a
-- null-terminated @CString@.  The @CString@ will be freed
-- automatically. This is a memcpy(3).
useAsCString :: ByteString -> (CString -> IO a) -> IO a
useAsCString v action = do
 allocaBytes (l+1) $ \buf ->
    withForeignPtr fp $ \p -> do
      memcpy buf p (fromIntegral l)
      pokeByteOff buf l (0::Word8)
      action (castPtr buf)
    where
      (fp, l) = VS.unsafeToForeignPtr0 v
{-# INLINE useAsCString #-}

-- | /O(n) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
-- As for @useAsCString@ this function makes a copy of the original @ByteString@.
useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
useAsCStringLen v f = useAsCString v $ \cstr -> f (cstr, VS.length v)
{-# INLINE useAsCStringLen #-}


--------------------------------------------------------------------------------
--  * I\/O with 'ByteString's
--------------------------------------------------------------------------------

--------------------------------------------------------------------------------
-- ** Standard input and output

-- | Read a line from stdin.
getLine :: IO ByteString
getLine = hGetLine stdin

-- | getContents. Read stdin strictly. Equivalent to hGetContents stdin
-- The 'Handle' is closed after the contents have been read.
--
getContents :: IO ByteString
getContents = hGetContents stdin

-- | Write a ByteString to stdout
putStr :: ByteString -> IO ()
putStr = hPut stdout

-- | Write a ByteString to stdout, appending a newline byte
putStrLn :: ByteString -> IO ()
putStrLn = hPutStrLn stdout

-- | The interact function takes a function of type @ByteString -> ByteString@
-- as its argument. The entire input from the standard input device is passed
-- to this function as its argument, and the resulting string is output on the
-- standard output device.
--
interact :: (ByteString -> ByteString) -> IO ()
interact transformer = putStr . transformer =<< getContents

--------------------------------------------------------------------------------
-- ** Files

-- | Read an entire file strictly into a 'ByteString'.  This is far more
-- efficient than reading the characters into a 'String' and then using
-- 'pack'.  It also may be more efficient than opening the file and
-- reading it using hGet. Files are read using 'binary mode' on Windows,
-- for 'text mode' use the Char8 version of this function.
--
readFile :: FilePath -> IO ByteString
readFile f = withBinaryFile f ReadMode $ \h ->
               hFileSize h >>= hGet h . fromIntegral

-- | Write a 'ByteString' to a file.
writeFile :: FilePath -> ByteString -> IO ()
writeFile f txt = withBinaryFile f WriteMode $ \h -> hPut h txt

-- | Append a 'ByteString' to a file.
appendFile :: FilePath -> ByteString -> IO ()
appendFile f txt = withBinaryFile f AppendMode $ \h -> hPut h txt

--------------------------------------------------------------------------------
-- ** I\/O with Handles

-- | Read a line from a handle

hGetLine :: Handle -> IO ByteString
hGetLine h =
  wantReadableHandle_ "Data.Vector.Storable.ByteString.hGetLine" h $
    \ h_@Handle__{haByteBuffer} -> do
      flushCharReadBuffer h_
      buf <- readIORef haByteBuffer
      if isEmptyBuffer buf
        then fill    h_ buf 0 []
        else haveBuf h_ buf 0 []
 where
  fill h_@Handle__{haByteBuffer, haDevice} buf !len xss = do
    (r, buf') <- Buffered.fillReadBuffer haDevice buf
    if r == 0
       then do writeIORef haByteBuffer buf{ bufR=0, bufL=0 }
               if len > 0
                  then mkBigPS len xss
                  else ioe_EOF
       else haveBuf h_ buf' len xss

  haveBuf h_@Handle__{haByteBuffer}
          buf@Buffer{ bufRaw=raw, bufR=w, bufL=r }
          len xss = do
    off <- findEOL r w raw
    let new_len = len + off - r
    xs <- mkPS raw r off

    -- if eol == True, then off is the offset of the '\n'
    -- otherwise off == w and the buffer is now empty.
    if off /= w
      then do if (w == off + 1)
                then writeIORef haByteBuffer buf{ bufL=0, bufR=0 }
                else writeIORef haByteBuffer buf{ bufL = off + 1 }
              mkBigPS new_len (xs:xss)
      else fill h_ buf{ bufL=0, bufR=0 } new_len (xs:xss)

  -- find the end-of-line character, if there is one
  findEOL r w raw
      | r == w = return w
      | otherwise =  do
          c <- readWord8Buf raw r
          if c == fromIntegral (ord '\n')
            then return r -- NB. not r+1: don't include the '\n'
            else findEOL (r+1) w raw

mkPS :: RawBuffer Word8 -> Int -> Int -> IO ByteString
mkPS buf start end =
 create len $ \p ->
   withRawBuffer buf $ \pbuf -> do
   copyBytes p (pbuf `plusPtr` start) len
 where
   len = end - start

mkBigPS :: Int -> [ByteString] -> IO ByteString
mkBigPS _ [v] = return v
mkBigPS _ vs  = return $! VS.concat (L.reverse vs)

-- | Outputs a 'ByteString' to the specified 'Handle'.
hPut :: Handle -> ByteString -> IO ()
hPut h v
    | l == 0    = return ()
    | otherwise = withForeignPtr fp $ \p -> hPutBuf h p l
    where
      (fp, l) = VS.unsafeToForeignPtr0 v

-- | Similar to 'hPut' except that it will never block. Instead it returns
-- any tail that did not get written. This tail may be 'empty' in the case that
-- the whole string was written, or the whole original string if nothing was
-- written. Partial writes are also possible.
--
-- Note: on Windows and with Haskell implementation other than GHC, this
-- function does not work correctly; it behaves identically to 'hPut'.
--
hPutNonBlocking :: Handle -> ByteString -> IO ByteString
hPutNonBlocking h v = do
  bytesWritten <- withForeignPtr fp $ \p-> hPutBufNonBlocking h p l
  return $! VS.drop bytesWritten v
      where
        (fp, l) = VS.unsafeToForeignPtr0 v

-- | A synonym for @hPut@, for compatibility
hPutStr :: Handle -> ByteString -> IO ()
hPutStr = hPut

-- | Write a ByteString to a handle, appending a newline byte
hPutStrLn :: Handle -> ByteString -> IO ()
hPutStrLn h v
    | VS.length v < 1024 = hPut h (v `snoc` 0x0a)
    | otherwise          = hPut h v >> hPut h (singleton (0x0a)) -- don't copy

{-# DEPRECATED hPutStrLn
    "Use Data.Vector.Storable.ByteString.Char8.hPutStrLn instead. (Functions that rely on ASCII encodings belong in Data.Vector.Storable.ByteString.Char8)"
  #-}
{-# DEPRECATED putStrLn
    "Use Data.Vector.Storable.ByteString.Char8.putStrLn instead. (Functions that rely on ASCII encodings belong in Data.Vector.Storable.ByteString.Char8)"
  #-}

-- | Read a 'ByteString' directly from the specified 'Handle'.  This
-- is far more efficient than reading the characters into a 'String'
-- and then using 'pack'. First argument is the Handle to read from,
-- and the second is the number of bytes to read. It returns the bytes
-- read, up to n, or 'null' if EOF has been reached.
--
-- 'hGet' is implemented in terms of 'hGetBuf'.
--
-- If the handle is a pipe or socket, and the writing end
-- is closed, 'hGet' will behave as if EOF was reached.
--
hGet :: Handle -> Int -> IO ByteString
hGet h i
    | i >  0    = createAndTrim i $ \p -> hGetBuf h p i
    | i == 0    = return VS.empty
    | otherwise = illegalBufferSize h "hGet" i

-- | hGetNonBlocking is identical to 'hGet', except that it will never
-- block waiting for data to become available.  If there is no data
-- available to be read, 'hGetNonBlocking' returns 'null'.
--
hGetNonBlocking :: Handle -> Int -> IO ByteString
hGetNonBlocking h i
    | i >  0    = createAndTrim i $ \p -> hGetBufNonBlocking h p i
    | i == 0    = return VS.empty
    | otherwise = illegalBufferSize h "hGetNonBlocking" i

-- | Like 'hGet', except that a shorter 'ByteString' may be returned
-- if there are not enough bytes immediately available to satisfy the
-- whole request.  'hGetSome' only blocks if there is no data
-- available, and EOF has not yet been reached.
--
hGetSome :: Handle -> Int -> IO ByteString
hGetSome hh i
    | i > 0     = createAndTrim i $ \p -> hGetBufSome hh p i
    | i == 0    = return VS.empty
    | otherwise = illegalBufferSize hh "hGetSome" i

illegalBufferSize :: Handle -> String -> Int -> IO a
illegalBufferSize handle fn sz =
    ioError (mkIOError illegalOperationErrorType msg (Just handle) Nothing)
    --TODO: System.IO uses InvalidArgument here, but it's not exported :-(
    where
      msg = fn ++ ": illegal ByteString size " ++ showsPrec 9 sz []

-- | Read entire handle contents strictly into a 'ByteString'.
--
-- This function reads chunks at a time, doubling the chunksize on each
-- read. The final buffer is then realloced to the appropriate size. For
-- files > half of available memory, this may lead to memory exhaustion.
-- Consider using 'readFile' in this case.
--
-- As with 'hGet', the string representation in the file is assumed to
-- be ISO-8859-1.
--
-- The Handle is closed once the contents have been read,
-- or if an exception is thrown.
--
hGetContents :: Handle -> IO ByteString
hGetContents h = always (hClose h) $ do -- strict, so hClose
    let start_size = 1024
    p <- mallocBytes start_size
    i <- hGetBuf h p start_size
    if i < start_size
        then do p' <- reallocBytes p i
                fp <- newForeignPtr finalizerFree p'
                return $! VS.unsafeFromForeignPtr0 fp i
        else f p start_size
    where
        always = flip finally
        f p s = do
            let s' = 2 * s
            p' <- reallocBytes p s'
            i  <- hGetBuf h (p' `plusPtr` s) s
            if i < s
                then do let i' = s + i
                        p'' <- reallocBytes p' i'
                        fp  <- newForeignPtr finalizerFree p''
                        return $! VS.unsafeFromForeignPtr0 fp i'
                else f p' s'


--------------------------------------------------------------------------------
-- Utils
--------------------------------------------------------------------------------

-- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
-- of the string if no element is found, rather than Nothing.
findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
findIndexOrEnd k v = unsafeInlineIO $  withForeignPtr fp $ \p ->
  let end = p `plusPtr` l
      go !ptr | ptr == end = return l
              | otherwise = do
                  w <- peek ptr
                  if k w
                    then return (ptr `minusPtr` p)
                    else go (ptr `plusPtr` 1)
  in go p
    where
      (fp, l) = VS.unsafeToForeignPtr0 v
{-# INLINE findIndexOrEnd #-}

-- | Find from the end of the string using predicate
findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
findFromEndUntil pred = go
    where
      go v | VS.null v              = 0
           | pred (VS.unsafeLast v) = l
           | otherwise              = go (VS.unsafeTake (l-1) v)
           where
             l = VS.length v
{-# INLINE findFromEndUntil #-}

moduleError :: String -> String -> a
moduleError fun msg = error $ "Data.Vector.Storable.ByteString." ++
                               fun ++ ':':' ':msg
{-# NOINLINE moduleError #-}