{-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-} {-# LANGUAGE DeriveDataTypeable #-} #if __GLASGOW_HASKELL__ >= 703 {-# LANGUAGE Unsafe #-} #endif {-# OPTIONS_HADDOCK hide #-} -- | -- Module : Data.ByteString.Lazy.Internal -- Copyright : (c) Don Stewart 2006-2008 -- (c) Duncan Coutts 2006-2011 -- License : BSD-style -- Maintainer : dons00@gmail.com, duncan@community.haskell.org -- Stability : unstable -- Portability : non-portable -- -- A module containing semi-public 'ByteString' internals. This exposes -- the 'ByteString' representation and low level construction functions. -- Modules which extend the 'ByteString' system will need to use this module -- while ideally most users will be able to make do with the public interface -- modules. -- module Data.ByteString.Lazy.Internal ( -- * The lazy @ByteString@ type and representation ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable chunk, foldrChunks, foldlChunks, -- * Data type invariant and abstraction function invariant, checkInvariant, -- * Chunk allocation sizes defaultChunkSize, smallChunkSize, chunkOverhead, -- * Conversion with lists: packing and unpacking packBytes, packChars, unpackBytes, unpackChars, ) where import Prelude hiding (concat) import qualified Data.ByteString.Internal as S import qualified Data.ByteString as S (length, take, drop) import Data.Word (Word8) import Foreign.Storable (Storable(sizeOf)) #if MIN_VERSION_base(4,9,0) import Data.Semigroup (Semigroup((<>))) #endif #if !(MIN_VERSION_base(4,8,0)) import Data.Monoid (Monoid(..)) #endif import Control.DeepSeq (NFData, rnf) import Data.String (IsString(..)) import Data.Typeable (Typeable) import Data.Data (Data(..), mkNoRepType) -- | A space-efficient representation of a 'Word8' vector, supporting many -- efficient operations. -- -- A lazy 'ByteString' contains 8-bit bytes, or by using the operations -- from "Data.ByteString.Lazy.Char8" it can be interpreted as containing -- 8-bit characters. -- data ByteString = Empty | Chunk {-# UNPACK #-} !S.ByteString ByteString deriving (Typeable) instance Eq ByteString where (==) = eq instance Ord ByteString where compare = cmp #if MIN_VERSION_base(4,9,0) instance Semigroup ByteString where (<>) = append #endif instance Monoid ByteString where mempty = Empty #if MIN_VERSION_base(4,9,0) mappend = (<>) #else mappend = append #endif mconcat = concat instance NFData ByteString where rnf Empty = () rnf (Chunk _ b) = rnf b instance Show ByteString where showsPrec p ps r = showsPrec p (unpackChars ps) r instance Read ByteString where readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ] instance IsString ByteString where fromString = packChars instance Data ByteString where gfoldl f z txt = z packBytes `f` unpackBytes txt toConstr _ = error "Data.ByteString.Lazy.ByteString.toConstr" gunfold _ _ = error "Data.ByteString.Lazy.ByteString.gunfold" dataTypeOf _ = mkNoRepType "Data.ByteString.Lazy.ByteString" ------------------------------------------------------------------------ -- Packing and unpacking from lists packBytes :: [Word8] -> ByteString packBytes cs0 = packChunks 32 cs0 where packChunks n cs = case S.packUptoLenBytes n cs of (bs, []) -> chunk bs Empty (bs, cs') -> Chunk bs (packChunks (min (n * 2) smallChunkSize) cs') packChars :: [Char] -> ByteString packChars cs0 = packChunks 32 cs0 where packChunks n cs = case S.packUptoLenChars n cs of (bs, []) -> chunk bs Empty (bs, cs') -> Chunk bs (packChunks (min (n * 2) smallChunkSize) cs') unpackBytes :: ByteString -> [Word8] unpackBytes Empty = [] unpackBytes (Chunk c cs) = S.unpackAppendBytesLazy c (unpackBytes cs) unpackChars :: ByteString -> [Char] unpackChars Empty = [] unpackChars (Chunk c cs) = S.unpackAppendCharsLazy c (unpackChars cs) ------------------------------------------------------------------------ -- | The data type invariant: -- Every ByteString is either 'Empty' or consists of non-null 'S.ByteString's. -- All functions must preserve this, and the QC properties must check this. -- invariant :: ByteString -> Bool invariant Empty = True invariant (Chunk (S.PS _ _ len) cs) = len > 0 && invariant cs -- | In a form that checks the invariant lazily. checkInvariant :: ByteString -> ByteString checkInvariant Empty = Empty checkInvariant (Chunk c@(S.PS _ _ len) cs) | len > 0 = Chunk c (checkInvariant cs) | otherwise = error $ "Data.ByteString.Lazy: invariant violation:" ++ show (Chunk c cs) ------------------------------------------------------------------------ -- | Smart constructor for 'Chunk'. Guarantees the data type invariant. chunk :: S.ByteString -> ByteString -> ByteString chunk c@(S.PS _ _ len) cs | len == 0 = cs | otherwise = Chunk c cs {-# INLINE chunk #-} -- | Consume the chunks of a lazy ByteString with a natural right fold. foldrChunks :: (S.ByteString -> a -> a) -> a -> ByteString -> a foldrChunks f z = go where go Empty = z go (Chunk c cs) = f c (go cs) {-# INLINE foldrChunks #-} -- | Consume the chunks of a lazy ByteString with a strict, tail-recursive, -- accumulating left fold. foldlChunks :: (a -> S.ByteString -> a) -> a -> ByteString -> a foldlChunks f z = go z where go a _ | a `seq` False = undefined go a Empty = a go a (Chunk c cs) = go (f a c) cs {-# INLINE foldlChunks #-} ------------------------------------------------------------------------ -- The representation uses lists of packed chunks. When we have to convert from -- a lazy list to the chunked representation, then by default we use this -- chunk size. Some functions give you more control over the chunk size. -- -- Measurements here: -- http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png -- -- indicate that a value around 0.5 to 1 x your L2 cache is best. -- The following value assumes people have something greater than 128k, -- and need to share the cache with other programs. -- | The chunk size used for I\/O. Currently set to 32k, less the memory management overhead defaultChunkSize :: Int defaultChunkSize = 32 * k - chunkOverhead where k = 1024 -- | The recommended chunk size. Currently set to 4k, less the memory management overhead smallChunkSize :: Int smallChunkSize = 4 * k - chunkOverhead where k = 1024 -- | The memory management overhead. Currently this is tuned for GHC only. chunkOverhead :: Int chunkOverhead = 2 * sizeOf (undefined :: Int) ------------------------------------------------------------------------ -- Implementations for Eq, Ord and Monoid instances eq :: ByteString -> ByteString -> Bool eq Empty Empty = True eq Empty _ = False eq _ Empty = False eq (Chunk a as) (Chunk b bs) = case compare (S.length a) (S.length b) of LT -> a == (S.take (S.length a) b) && eq as (Chunk (S.drop (S.length a) b) bs) EQ -> a == b && eq as bs GT -> (S.take (S.length b) a) == b && eq (Chunk (S.drop (S.length b) a) as) bs cmp :: ByteString -> ByteString -> Ordering cmp Empty Empty = EQ cmp Empty _ = LT cmp _ Empty = GT cmp (Chunk a as) (Chunk b bs) = case compare (S.length a) (S.length b) of LT -> case compare a (S.take (S.length a) b) of EQ -> cmp as (Chunk (S.drop (S.length a) b) bs) result -> result EQ -> case compare a b of EQ -> cmp as bs result -> result GT -> case compare (S.take (S.length b) a) b of EQ -> cmp (Chunk (S.drop (S.length b) a) as) bs result -> result append :: ByteString -> ByteString -> ByteString append xs ys = foldrChunks Chunk ys xs concat :: [ByteString] -> ByteString concat css0 = to css0 where go Empty css = to css go (Chunk c cs) css = Chunk c (go cs css) to [] = Empty to (cs:css) = go cs css