Ticket #4515: Internal.hs

File Internal.hs, 19.4 KB (added by dsf, 3 years ago)
Line 
1{-# LANGUAGE BangPatterns #-}
2-- |
3-- Module      : Blaze.ByteString.Builder.Internal
4-- Copyright   : (c) 2010 Simon Meier
5-- License     : BSD3-style (see LICENSE)
6--
7-- Maintainer  : Simon Meier <iridcode@gmail.com>
8-- Stability   : experimental
9-- Portability : tested on GHC only
10--
11-- Implementation of the 'Builder' monoid.
12--
13-- A standard library user must never import this module directly. Instead, he
14-- should import "Blaze.ByteString.Builder", which re-exports the 'Builder' type and
15-- its associated public functions defined in this module.
16--
17-- Developers of other libraries may import this module to gain access to the
18-- internal representation of builders. For example, in some cases, creating a
19-- 'Builder' with a custom low-level 'BuildStep' may improve performance
20-- considerably compared to the creating it using the public 'Builder'
21-- combinators (e.g., @'fromWrite1List'@ in "Blaze.ByteString.Builder.Write").
22-- Another example, is the use of 'ModifyChunks' to efficiently wire the
23-- 'Builder' type with another library that generates lazy bytestrings.
24--
25-- In any case, whenever you import this module you /must/ reference the full
26-- version of the 'blaze-builder' package in your cabal file, as the
27-- implementation and the guarantees given in this file may change in any
28-- version! The release notes will tell, if this was the case.
29--
30module Internal
31    ( 
32    -- * The @Builder@ type
33      Builder(..)
34    , BuildStep
35    , BuildSignal(..)
36
37    -- * Flushing the buffer
38    , flush
39
40    -- * Executing builders
41    , toLazyByteStringWith
42    , toLazyByteString
43    , toByteString
44    , toByteStringIOWith
45    , toByteStringIO
46
47    -- * Default sizes
48    , defaultBufferSize
49    , defaultMinimalBufferSize
50    , defaultMaximalCopySize
51    ) where
52
53
54import Foreign
55import Data.Monoid
56import Control.Monad (unless)
57import qualified Data.ByteString      as S
58import qualified Data.ByteString.Lazy as L
59
60import Data.ByteString.Internal (inlinePerformIO)
61import qualified Data.ByteString.Internal as S
62import qualified Data.ByteString.Lazy.Internal as L
63
64
65------------------------------------------------------------------------------
66-- The Builder type
67------------------------------------------------------------------------------
68
69-- | Intuitively, a builder denotes the construction of a lazy bytestring.
70--
71-- Builders can be created from primitive buffer manipulations using the
72-- @'Write'@ abstraction provided by in "Blaze.ByteString.Builder.Write". However for
73-- many Haskell values, there exist predefined functions doing that already.
74-- For example, UTF-8 encoding 'Char' and 'String' values is provided by the
75-- functions in "Blaze.ByteString.Builder.Char.Utf8". Concatenating builders is done
76-- using their 'Monoid' instance.
77--
78-- Semantically, builders are nothing special. They just denote a sequence of
79-- bytes. However, their representation is chosen such that this sequence of
80-- bytes can be efficiently (in terms of CPU cycles) computed in an
81-- incremental, chunk-wise fashion such that the average chunk-size is large.
82-- Note that the large average chunk size allows to make good use of cache
83-- prefetching in later processing steps (e.g. compression) or to reduce the
84-- sytem call overhead when writing the resulting lazy bytestring to a file or
85-- sending it over the network.
86--
87-- For precisely understanding the performance of a specific 'Builder',
88-- benchmarking is unavoidable. Moreover, it also helps to understand the
89-- implementation of builders and the predefined combinators. This should be
90-- amenable to the average Haskell programmer by reading the source code of
91-- "Blaze.ByteString.Builder.Internal" and the other modules of this library.
92--
93-- The guiding implementation principle was to reduce the abstraction cost per
94-- output byte. We use continuation passing to achieve a constant time append.
95-- The output buffer is filled by the individual builders as long as possible.
96-- They call each other directly when they are done and control is returned to
97-- the driver (e.g., 'toLazyByteString') only when the buffer is full, a
98-- bytestring needs to be inserted directly, or no more bytes can be written.
99-- We also try to take the pressure off the cache by moving variables as far
100-- out of loops as possible. This leads to some duplication of code, but
101-- results in sometimes dramatic increases in performance. For example, see the
102-- @'fromWord8s'@ function in "Blaze.ByteString.Builder.Word".
103--
104newtype Builder = Builder (BuildStep -> BuildStep)
105
106-- | A 'BuildSignal' signals to the driver of the 'Builder' execution the next
107-- step to be taken.
108--
109data BuildSignal =
110  -- | @Done pf@ signals that the 'BuildStep' is finished and data has been
111  -- written up to the next free byte @pf@.
112    Done {-# UNPACK #-} !(Ptr Word8) 
113  -- | @BufferFull newSize pf nextStep@ signals that the buffer is full and
114  -- data has been written up to the next free byte @pf@. Moreover, the next
115  -- build step to be executed @nextStep@ requires a buffer of at least size
116  -- @newSize@ to execute successfully.
117  --
118  -- A driver /must/ guarantee that the buffer used to call @nextStep@ is at
119  -- least of size @newSize@.
120  | BufferFull
121      {-# UNPACK #-} !Int
122      {-# UNPACK #-} !(Ptr Word8)
123                     !BuildStep
124  -- | @ModifyChunks pf fbs nextStep@ signals that the data written up to the
125  -- next free byte @pf@ must be output and the remaining lazy bytestring that
126  -- is produced by executing @nextStep@ must be modified using the function
127  -- @fbs@.
128  --
129  -- This signal is used to insert bytestrings directly into the output stream.
130  -- It can also be used to efficiently hand over control to another library
131  -- for generating streams of strict bytestrings.
132  | ModifyChunks
133      {-# UNPACK #-} !(Ptr Word8) 
134                     !(L.ByteString -> L.ByteString) 
135                     !BuildStep
136
137-- | A 'BuildStep' fills a buffer from the given start pointer as long as
138-- possible and returns control to the caller using a 'BuildSignal', once it is
139-- required.
140--
141type BuildStep =  Ptr Word8       -- ^ Pointer to the next free byte in the
142                                  -- buffer. A 'BuildStep' must start writing
143                                  -- its data from this address.
144               -> Ptr Word8       -- ^ Pointer to the first byte /after/ the
145                                  -- buffer.  A 'BuildStep' must never write
146                                  -- data at or after this address.
147               -> IO BuildSignal  -- ^ Signal to the driver about the next step
148                                  -- to be taken.
149
150instance Monoid Builder where
151    mempty = Builder id
152    {-# INLINE mempty #-}
153    mappend (Builder f) (Builder g) = Builder $ f . g
154    {-# INLINE mappend #-}
155    mconcat = foldr mappend mempty
156    {-# INLINE mconcat #-}
157
158------------------------------------------------------------------------------
159-- Internal global constants.
160------------------------------------------------------------------------------
161
162-- | Default size (~32kb) for the buffer that becomes a chunk of the output
163-- stream once it is filled.
164--
165defaultBufferSize :: Int
166defaultBufferSize = 32 * 1024 - overhead -- Copied from Data.ByteString.Lazy.
167    where overhead = 2 * sizeOf (undefined :: Int)
168
169-- | The minimal length (~4kb) a buffer must have before filling it and
170-- outputting it as a chunk of the output stream.
171--
172-- This size determines when a buffer is spilled after a 'flush' or a direct
173-- bytestring insertion. It is also the size of the first chunk generated by
174-- 'toLazyByteString'.
175defaultMinimalBufferSize :: Int
176defaultMinimalBufferSize = 4 * 1024 - overhead
177    where overhead = 2 * sizeOf (undefined :: Int)
178
179-- | The default length (64) for the first buffer to be allocated when
180-- converting a 'Builder' to a lazy bytestring.
181--
182-- See 'toLazyByteStringWith' for further explanation.
183defaultFirstBufferSize :: Int
184defaultFirstBufferSize = 64
185
186-- | The maximal number of bytes for that copying is cheaper than direct
187-- insertion into the output stream. This takes into account the fragmentation
188-- that may occur in the output buffer due to the early 'flush' implied by the
189-- direct bytestring insertion.
190--
191-- @'defaultMaximalCopySize' = 2 * 'defaultMinimalBufferSize'@
192--
193defaultMaximalCopySize :: Int
194defaultMaximalCopySize = 2 * defaultMinimalBufferSize
195
196------------------------------------------------------------------------------
197-- Flushing and running a Builder
198------------------------------------------------------------------------------
199
200
201-- | Output all data written in the current buffer and start a new chunk.
202--
203-- The use uf this function depends on how the resulting bytestrings are
204-- consumed. 'flush' is possibly not very useful in non-interactive scenarios.
205-- However, it is kept for compatibility with the builder provided by
206-- Data.Binary.Builder.
207--
208-- When using 'toLazyByteString' to extract a lazy 'L.ByteString' from a
209-- 'Builder', this means that a new chunk will be started in the resulting lazy
210-- 'L.ByteString'. The remaining part of the buffer is spilled, if the
211-- reamining free space is smaller than the minimal desired buffer size.
212--
213flush :: Builder
214flush = Builder $ \k pf _ -> return $ ModifyChunks pf id k
215
216-- | Run a 'Builder' with the given buffer sizes.
217--
218-- Use this function for integrating the 'Builder' type with other libraries
219-- that generate lazy bytestrings.
220--
221-- Note that the builders should guarantee that on average the desired chunk
222-- size is attained. Builders may decide to start a new buffer and not
223-- completely fill the existing buffer, if this is faster. However, they should
224-- not spill too much of the buffer, if they cannot compensate for it.
225--
226-- A call @toLazyByteStringWith bufSize minBufSize firstBufSize@ will generate
227-- a lazy bytestring according to the following strategy. First, we allocate
228-- a buffer of size @firstBufSize@ and start filling it. If it overflows, we
229-- allocate a buffer of size @minBufSize@ and copy the first buffer to it in
230-- order to avoid generating a too small chunk. Finally, every next buffer will
231-- be of size @bufSize@. This, slow startup strategy is required to achieve
232-- good speed for short (<200 bytes) resulting bytestrings, as for them the
233-- allocation cost is of a large buffer cannot be compensated. Moreover, this
234-- strategy also allows us to avoid spilling too much memory for short
235-- resulting bytestrings.
236--
237-- Note that setting @firstBufSize >= minBufSize@ implies that the first buffer
238-- is no longer copied but allocated and filled directly. Hence, setting
239-- @firstBufSize = bufSize@ means that all chunks will use an underlying buffer
240-- of size @bufSize@. This is recommended, if you know that you always output
241-- more than @minBufSize@ bytes.
242toLazyByteStringWith 
243    :: Int           -- ^ Buffer size (upper-bounds the resulting chunk size).
244    -> Int           -- ^ Minimal free buffer space for continuing filling
245                     -- the same buffer after a 'flush' or a direct bytestring
246                     -- insertion. This corresponds to the minimal desired
247                     -- chunk size.
248    -> Int           -- ^ Size of the first buffer to be used and copied for
249                     -- larger resulting sequences
250    -> Builder       -- ^ Builder to run.
251    -> L.ByteString  -- ^ Lazy bytestring to output after the builder is
252                     -- finished.
253    -> L.ByteString  -- ^ Resulting lazy bytestring
254toLazyByteStringWith bufSize minBufSize firstBufSize (Builder b) k = 
255    inlinePerformIO $ fillFirstBuffer (b finalStep)
256  where
257    finalStep pf _ = return $ Done pf
258    -- fill a first very small buffer, if we need more space then copy it
259    -- to the new buffer of size 'minBufSize'. This way we don't pay the
260    -- allocation cost of the big 'bufSize' buffer, when outputting only
261    -- small sequences.
262    fillFirstBuffer !step0
263      | minBufSize <= firstBufSize = fillNewBuffer firstBufSize step0
264      | otherwise                  = do
265          fpbuf <- S.mallocByteString firstBufSize
266          withForeignPtr fpbuf $ \pf -> do
267              let !pe      = pf `plusPtr` firstBufSize
268                  mkbs pf' = S.PS fpbuf 0 (pf' `minusPtr` pf)
269                  {-# INLINE mkbs #-}
270              next <- step0 pf pe
271              case next of
272                  Done pf'
273                    | pf' == pf -> return k
274                    | otherwise -> return $ L.Chunk (mkbs pf') k
275
276                  BufferFull newSize pf' nextStep  -> do
277                      let != pf' `minusPtr` pf
278                      fillNewBuffer (max (l + newSize) minBufSize) $
279                          \pfNew peNew -> do
280                              copyBytes pfNew pf l
281                              nextStep (pfNew `plusPtr` l) peNew
282                     
283                  ModifyChunks pf' bsk nextStep
284                      | pf' == pf ->
285                          return $ bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep)
286                      | otherwise ->
287                          return $ L.Chunk (mkbs pf')
288                              (bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep))
289                   
290    -- allocate and fill a new buffer
291    fillNewBuffer !size !step0 = do
292        fpbuf <- S.mallocByteString size
293        withForeignPtr fpbuf $ fillBuffer fpbuf
294      where
295        fillBuffer fpbuf !pbuf = fill pbuf step0
296          where
297            !pe = pbuf `plusPtr` size
298            fill !pf !step = do
299                next <- step pf pe
300                let mkbs pf' = S.PS fpbuf (pf `minusPtr` pbuf) (pf' `minusPtr` pf)
301                    {-# INLINE mkbs #-}
302                case next of
303                    Done pf'
304                      | pf' == pf -> return k
305                      | otherwise -> return $ L.Chunk (mkbs pf') k
306
307                    BufferFull newSize pf' nextStep ->
308                        return $ L.Chunk (mkbs pf')
309                            (inlinePerformIO $ 
310                                fillNewBuffer (max newSize bufSize) nextStep)
311                       
312                    ModifyChunks  pf' bsk nextStep
313                      | pf' == pf                      ->
314                          return $ bsk (inlinePerformIO $ fill pf' nextStep)
315                      | minBufSize < pe `minusPtr` pf' ->
316                          return $ L.Chunk (mkbs pf')
317                              (bsk (inlinePerformIO $ fill pf' nextStep))
318                      | otherwise                      ->
319                          return $ L.Chunk (mkbs pf')
320                              (bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep))
321
322
323-- | Extract the lazy 'L.ByteString' from the builder by running it with default
324-- buffer sizes. Use this function, if you do not have any special
325-- considerations with respect to buffer sizes.
326--
327-- @ 'toLazyByteString' b = 'toLazyByteStringWith' 'defaultBufferSize' 'defaultMinimalBufferSize' 'defaultFirstBufferSize' b L.empty@
328--
329-- Note that @'toLazyByteString'@ is a 'Monoid' homomorphism.
330--
331-- > toLazyByteString mempty          == mempty
332-- > toLazyByteString (x `mappend` y) == toLazyByteString x `mappend` toLazyByteString y
333--
334-- However, in the second equation, the left-hand-side is generally faster to
335-- execute.
336--
337toLazyByteString :: Builder -> L.ByteString
338toLazyByteString b = toLazyByteStringWith
339    defaultBufferSize defaultMinimalBufferSize defaultFirstBufferSize b L.empty
340{-# INLINE toLazyByteString #-}
341
342-- | Pack the chunks of a lazy bytestring into a single strict bytestring.
343packChunks :: L.ByteString -> S.ByteString
344packChunks lbs = do
345    S.unsafeCreate (fromIntegral $ L.length lbs) (copyChunks lbs)
346  where
347    copyChunks !L.Empty                         !_pf = return ()
348    copyChunks !(L.Chunk (S.PS fpbuf o l) lbs') !pf  = do
349        withForeignPtr fpbuf $ \pbuf ->
350            copyBytes pf (pbuf `plusPtr` o) l
351        copyChunks lbs' (pf `plusPtr` l)
352
353-- | Run the builder to construct a strict bytestring containing the sequence
354-- of bytes denoted by the builder. This is done by first serializing to a lazy bytestring and then packing its
355-- chunks to a appropriately sized strict bytestring.
356--
357-- > toByteString = packChunks . toLazyByteString
358--
359-- Note that @'toByteString'@ is a 'Monoid' homomorphism.
360--
361-- > toByteString mempty          == mempty
362-- > toByteString (x `mappend` y) == toByteString x `mappend` toByteString y
363--
364-- However, in the second equation, the left-hand-side is generally faster to
365-- execute.
366--
367toByteString :: Builder -> S.ByteString
368toByteString = packChunks . toLazyByteString
369
370
371-- | @toByteStringIOWith bufSize io b@ runs the builder @b@ with a buffer of
372-- at least the size @bufSize@ and executes the 'IO' action @io@ whenever the
373-- buffer is full.
374--
375-- Compared to 'toLazyByteStringWith' this function requires less allocation,
376-- as the output buffer is only allocated once at the start of the
377-- serialization and whenever something bigger than the current buffer size has
378-- to be copied into the buffer, which should happen very seldomly for the
379-- default buffer size of 32kb. Hence, the pressure on the garbage collector is
380-- reduced, which can be an advantage when building long sequences of bytes.
381--
382toByteStringIOWith :: Int                      -- ^ Buffer size (upper bounds
383                                               -- the number of bytes forced
384                                               -- per call to the 'IO' action).
385                   -> (S.ByteString -> IO ())  -- ^ 'IO' action to execute per
386                                               -- full buffer, which is
387                                               -- referenced by a strict
388                                               -- 'S.ByteString'.
389                   -> Builder                  -- ^ 'Builder' to run.
390                   -> IO ()                    -- ^ Resulting 'IO' action.
391toByteStringIOWith bufSize io (Builder b) = 
392    fillNewBuffer bufSize (b finalStep)
393  where
394    finalStep pf _ = return $ Done pf
395
396    fillNewBuffer !size !step0 = do
397        S.mallocByteString size >>= fillBuffer
398      where
399        fillBuffer fpbuf = fill step0
400          where
401            -- safe because the constructed ByteString references the foreign
402            -- pointer AFTER its buffer was filled.
403            pf = unsafeForeignPtrToPtr fpbuf
404            fill !step = do
405                next <- step pf (pf `plusPtr` size)
406                case next of
407                    Done pf' ->
408                        unless (pf' == pf) (io $  S.PS fpbuf 0 (pf' `minusPtr` pf))
409
410                    BufferFull newSize pf' nextStep  -> do
411                        io $ S.PS fpbuf 0 (pf' `minusPtr` pf)
412                        if bufSize < newSize
413                          then fillNewBuffer newSize nextStep
414                          else fill nextStep
415                       
416                    ModifyChunks  pf' bsk nextStep  -> do
417                        unless (pf' == pf) (io $  S.PS fpbuf 0 (pf' `minusPtr` pf))
418                        -- was: mapM_ io $ L.toChunks (bsk L.empty)
419                        L.foldrChunks (\bs -> (io bs >>)) (return ()) (bsk L.empty)
420                        fill nextStep
421
422-- | Run the builder with a 'defaultBufferSize'd buffer and execute the given
423-- 'IO' action whenever the buffer is full or gets flushed.
424--
425-- @ 'toByteStringIO' = 'toByteStringIOWith' 'defaultBufferSize'@
426--
427-- This is a 'Monoid' homomorphism in the following sense.
428--
429-- > toByteStringIO io mempty          == return ()
430-- > toByteStringIO io (x `mappend` y) == toByteStringIO io x >> toByteStringIO io y
431--
432toByteStringIO :: (S.ByteString -> IO ()) -> Builder -> IO ()
433toByteStringIO = toByteStringIOWith defaultBufferSize
434{-# INLINE toByteStringIO #-}