| 1 | {-# LANGUAGE BangPatterns #-} |
|---|
| 2 | -- | |
|---|
| 3 | -- Module : Blaze.ByteString.Builder.Internal |
|---|
| 4 | -- Copyright : (c) 2010 Simon Meier |
|---|
| 5 | -- License : BSD3-style (see LICENSE) |
|---|
| 6 | -- |
|---|
| 7 | -- Maintainer : Simon Meier <iridcode@gmail.com> |
|---|
| 8 | -- Stability : experimental |
|---|
| 9 | -- Portability : tested on GHC only |
|---|
| 10 | -- |
|---|
| 11 | -- Implementation of the 'Builder' monoid. |
|---|
| 12 | -- |
|---|
| 13 | -- A standard library user must never import this module directly. Instead, he |
|---|
| 14 | -- should import "Blaze.ByteString.Builder", which re-exports the 'Builder' type and |
|---|
| 15 | -- its associated public functions defined in this module. |
|---|
| 16 | -- |
|---|
| 17 | -- Developers of other libraries may import this module to gain access to the |
|---|
| 18 | -- internal representation of builders. For example, in some cases, creating a |
|---|
| 19 | -- 'Builder' with a custom low-level 'BuildStep' may improve performance |
|---|
| 20 | -- considerably compared to the creating it using the public 'Builder' |
|---|
| 21 | -- combinators (e.g., @'fromWrite1List'@ in "Blaze.ByteString.Builder.Write"). |
|---|
| 22 | -- Another example, is the use of 'ModifyChunks' to efficiently wire the |
|---|
| 23 | -- 'Builder' type with another library that generates lazy bytestrings. |
|---|
| 24 | -- |
|---|
| 25 | -- In any case, whenever you import this module you /must/ reference the full |
|---|
| 26 | -- version of the 'blaze-builder' package in your cabal file, as the |
|---|
| 27 | -- implementation and the guarantees given in this file may change in any |
|---|
| 28 | -- version! The release notes will tell, if this was the case. |
|---|
| 29 | -- |
|---|
| 30 | module Internal |
|---|
| 31 | ( |
|---|
| 32 | -- * The @Builder@ type |
|---|
| 33 | Builder(..) |
|---|
| 34 | , BuildStep |
|---|
| 35 | , BuildSignal(..) |
|---|
| 36 | |
|---|
| 37 | -- * Flushing the buffer |
|---|
| 38 | , flush |
|---|
| 39 | |
|---|
| 40 | -- * Executing builders |
|---|
| 41 | , toLazyByteStringWith |
|---|
| 42 | , toLazyByteString |
|---|
| 43 | , toByteString |
|---|
| 44 | , toByteStringIOWith |
|---|
| 45 | , toByteStringIO |
|---|
| 46 | |
|---|
| 47 | -- * Default sizes |
|---|
| 48 | , defaultBufferSize |
|---|
| 49 | , defaultMinimalBufferSize |
|---|
| 50 | , defaultMaximalCopySize |
|---|
| 51 | ) where |
|---|
| 52 | |
|---|
| 53 | |
|---|
| 54 | import Foreign |
|---|
| 55 | import Data.Monoid |
|---|
| 56 | import Control.Monad (unless) |
|---|
| 57 | import qualified Data.ByteString as S |
|---|
| 58 | import qualified Data.ByteString.Lazy as L |
|---|
| 59 | |
|---|
| 60 | import Data.ByteString.Internal (inlinePerformIO) |
|---|
| 61 | import qualified Data.ByteString.Internal as S |
|---|
| 62 | import qualified Data.ByteString.Lazy.Internal as L |
|---|
| 63 | |
|---|
| 64 | |
|---|
| 65 | ------------------------------------------------------------------------------ |
|---|
| 66 | -- The Builder type |
|---|
| 67 | ------------------------------------------------------------------------------ |
|---|
| 68 | |
|---|
| 69 | -- | Intuitively, a builder denotes the construction of a lazy bytestring. |
|---|
| 70 | -- |
|---|
| 71 | -- Builders can be created from primitive buffer manipulations using the |
|---|
| 72 | -- @'Write'@ abstraction provided by in "Blaze.ByteString.Builder.Write". However for |
|---|
| 73 | -- many Haskell values, there exist predefined functions doing that already. |
|---|
| 74 | -- For example, UTF-8 encoding 'Char' and 'String' values is provided by the |
|---|
| 75 | -- functions in "Blaze.ByteString.Builder.Char.Utf8". Concatenating builders is done |
|---|
| 76 | -- using their 'Monoid' instance. |
|---|
| 77 | -- |
|---|
| 78 | -- Semantically, builders are nothing special. They just denote a sequence of |
|---|
| 79 | -- bytes. However, their representation is chosen such that this sequence of |
|---|
| 80 | -- bytes can be efficiently (in terms of CPU cycles) computed in an |
|---|
| 81 | -- incremental, chunk-wise fashion such that the average chunk-size is large. |
|---|
| 82 | -- Note that the large average chunk size allows to make good use of cache |
|---|
| 83 | -- prefetching in later processing steps (e.g. compression) or to reduce the |
|---|
| 84 | -- sytem call overhead when writing the resulting lazy bytestring to a file or |
|---|
| 85 | -- sending it over the network. |
|---|
| 86 | -- |
|---|
| 87 | -- For precisely understanding the performance of a specific 'Builder', |
|---|
| 88 | -- benchmarking is unavoidable. Moreover, it also helps to understand the |
|---|
| 89 | -- implementation of builders and the predefined combinators. This should be |
|---|
| 90 | -- amenable to the average Haskell programmer by reading the source code of |
|---|
| 91 | -- "Blaze.ByteString.Builder.Internal" and the other modules of this library. |
|---|
| 92 | -- |
|---|
| 93 | -- The guiding implementation principle was to reduce the abstraction cost per |
|---|
| 94 | -- output byte. We use continuation passing to achieve a constant time append. |
|---|
| 95 | -- The output buffer is filled by the individual builders as long as possible. |
|---|
| 96 | -- They call each other directly when they are done and control is returned to |
|---|
| 97 | -- the driver (e.g., 'toLazyByteString') only when the buffer is full, a |
|---|
| 98 | -- bytestring needs to be inserted directly, or no more bytes can be written. |
|---|
| 99 | -- We also try to take the pressure off the cache by moving variables as far |
|---|
| 100 | -- out of loops as possible. This leads to some duplication of code, but |
|---|
| 101 | -- results in sometimes dramatic increases in performance. For example, see the |
|---|
| 102 | -- @'fromWord8s'@ function in "Blaze.ByteString.Builder.Word". |
|---|
| 103 | -- |
|---|
| 104 | newtype Builder = Builder (BuildStep -> BuildStep) |
|---|
| 105 | |
|---|
| 106 | -- | A 'BuildSignal' signals to the driver of the 'Builder' execution the next |
|---|
| 107 | -- step to be taken. |
|---|
| 108 | -- |
|---|
| 109 | data BuildSignal = |
|---|
| 110 | -- | @Done pf@ signals that the 'BuildStep' is finished and data has been |
|---|
| 111 | -- written up to the next free byte @pf@. |
|---|
| 112 | Done {-# UNPACK #-} !(Ptr Word8) |
|---|
| 113 | -- | @BufferFull newSize pf nextStep@ signals that the buffer is full and |
|---|
| 114 | -- data has been written up to the next free byte @pf@. Moreover, the next |
|---|
| 115 | -- build step to be executed @nextStep@ requires a buffer of at least size |
|---|
| 116 | -- @newSize@ to execute successfully. |
|---|
| 117 | -- |
|---|
| 118 | -- A driver /must/ guarantee that the buffer used to call @nextStep@ is at |
|---|
| 119 | -- least of size @newSize@. |
|---|
| 120 | | BufferFull |
|---|
| 121 | {-# UNPACK #-} !Int |
|---|
| 122 | {-# UNPACK #-} !(Ptr Word8) |
|---|
| 123 | !BuildStep |
|---|
| 124 | -- | @ModifyChunks pf fbs nextStep@ signals that the data written up to the |
|---|
| 125 | -- next free byte @pf@ must be output and the remaining lazy bytestring that |
|---|
| 126 | -- is produced by executing @nextStep@ must be modified using the function |
|---|
| 127 | -- @fbs@. |
|---|
| 128 | -- |
|---|
| 129 | -- This signal is used to insert bytestrings directly into the output stream. |
|---|
| 130 | -- It can also be used to efficiently hand over control to another library |
|---|
| 131 | -- for generating streams of strict bytestrings. |
|---|
| 132 | | ModifyChunks |
|---|
| 133 | {-# UNPACK #-} !(Ptr Word8) |
|---|
| 134 | !(L.ByteString -> L.ByteString) |
|---|
| 135 | !BuildStep |
|---|
| 136 | |
|---|
| 137 | -- | A 'BuildStep' fills a buffer from the given start pointer as long as |
|---|
| 138 | -- possible and returns control to the caller using a 'BuildSignal', once it is |
|---|
| 139 | -- required. |
|---|
| 140 | -- |
|---|
| 141 | type BuildStep = Ptr Word8 -- ^ Pointer to the next free byte in the |
|---|
| 142 | -- buffer. A 'BuildStep' must start writing |
|---|
| 143 | -- its data from this address. |
|---|
| 144 | -> Ptr Word8 -- ^ Pointer to the first byte /after/ the |
|---|
| 145 | -- buffer. A 'BuildStep' must never write |
|---|
| 146 | -- data at or after this address. |
|---|
| 147 | -> IO BuildSignal -- ^ Signal to the driver about the next step |
|---|
| 148 | -- to be taken. |
|---|
| 149 | |
|---|
| 150 | instance Monoid Builder where |
|---|
| 151 | mempty = Builder id |
|---|
| 152 | {-# INLINE mempty #-} |
|---|
| 153 | mappend (Builder f) (Builder g) = Builder $ f . g |
|---|
| 154 | {-# INLINE mappend #-} |
|---|
| 155 | mconcat = foldr mappend mempty |
|---|
| 156 | {-# INLINE mconcat #-} |
|---|
| 157 | |
|---|
| 158 | ------------------------------------------------------------------------------ |
|---|
| 159 | -- Internal global constants. |
|---|
| 160 | ------------------------------------------------------------------------------ |
|---|
| 161 | |
|---|
| 162 | -- | Default size (~32kb) for the buffer that becomes a chunk of the output |
|---|
| 163 | -- stream once it is filled. |
|---|
| 164 | -- |
|---|
| 165 | defaultBufferSize :: Int |
|---|
| 166 | defaultBufferSize = 32 * 1024 - overhead -- Copied from Data.ByteString.Lazy. |
|---|
| 167 | where overhead = 2 * sizeOf (undefined :: Int) |
|---|
| 168 | |
|---|
| 169 | -- | The minimal length (~4kb) a buffer must have before filling it and |
|---|
| 170 | -- outputting it as a chunk of the output stream. |
|---|
| 171 | -- |
|---|
| 172 | -- This size determines when a buffer is spilled after a 'flush' or a direct |
|---|
| 173 | -- bytestring insertion. It is also the size of the first chunk generated by |
|---|
| 174 | -- 'toLazyByteString'. |
|---|
| 175 | defaultMinimalBufferSize :: Int |
|---|
| 176 | defaultMinimalBufferSize = 4 * 1024 - overhead |
|---|
| 177 | where overhead = 2 * sizeOf (undefined :: Int) |
|---|
| 178 | |
|---|
| 179 | -- | The default length (64) for the first buffer to be allocated when |
|---|
| 180 | -- converting a 'Builder' to a lazy bytestring. |
|---|
| 181 | -- |
|---|
| 182 | -- See 'toLazyByteStringWith' for further explanation. |
|---|
| 183 | defaultFirstBufferSize :: Int |
|---|
| 184 | defaultFirstBufferSize = 64 |
|---|
| 185 | |
|---|
| 186 | -- | The maximal number of bytes for that copying is cheaper than direct |
|---|
| 187 | -- insertion into the output stream. This takes into account the fragmentation |
|---|
| 188 | -- that may occur in the output buffer due to the early 'flush' implied by the |
|---|
| 189 | -- direct bytestring insertion. |
|---|
| 190 | -- |
|---|
| 191 | -- @'defaultMaximalCopySize' = 2 * 'defaultMinimalBufferSize'@ |
|---|
| 192 | -- |
|---|
| 193 | defaultMaximalCopySize :: Int |
|---|
| 194 | defaultMaximalCopySize = 2 * defaultMinimalBufferSize |
|---|
| 195 | |
|---|
| 196 | ------------------------------------------------------------------------------ |
|---|
| 197 | -- Flushing and running a Builder |
|---|
| 198 | ------------------------------------------------------------------------------ |
|---|
| 199 | |
|---|
| 200 | |
|---|
| 201 | -- | Output all data written in the current buffer and start a new chunk. |
|---|
| 202 | -- |
|---|
| 203 | -- The use uf this function depends on how the resulting bytestrings are |
|---|
| 204 | -- consumed. 'flush' is possibly not very useful in non-interactive scenarios. |
|---|
| 205 | -- However, it is kept for compatibility with the builder provided by |
|---|
| 206 | -- Data.Binary.Builder. |
|---|
| 207 | -- |
|---|
| 208 | -- When using 'toLazyByteString' to extract a lazy 'L.ByteString' from a |
|---|
| 209 | -- 'Builder', this means that a new chunk will be started in the resulting lazy |
|---|
| 210 | -- 'L.ByteString'. The remaining part of the buffer is spilled, if the |
|---|
| 211 | -- reamining free space is smaller than the minimal desired buffer size. |
|---|
| 212 | -- |
|---|
| 213 | flush :: Builder |
|---|
| 214 | flush = Builder $ \k pf _ -> return $ ModifyChunks pf id k |
|---|
| 215 | |
|---|
| 216 | -- | Run a 'Builder' with the given buffer sizes. |
|---|
| 217 | -- |
|---|
| 218 | -- Use this function for integrating the 'Builder' type with other libraries |
|---|
| 219 | -- that generate lazy bytestrings. |
|---|
| 220 | -- |
|---|
| 221 | -- Note that the builders should guarantee that on average the desired chunk |
|---|
| 222 | -- size is attained. Builders may decide to start a new buffer and not |
|---|
| 223 | -- completely fill the existing buffer, if this is faster. However, they should |
|---|
| 224 | -- not spill too much of the buffer, if they cannot compensate for it. |
|---|
| 225 | -- |
|---|
| 226 | -- A call @toLazyByteStringWith bufSize minBufSize firstBufSize@ will generate |
|---|
| 227 | -- a lazy bytestring according to the following strategy. First, we allocate |
|---|
| 228 | -- a buffer of size @firstBufSize@ and start filling it. If it overflows, we |
|---|
| 229 | -- allocate a buffer of size @minBufSize@ and copy the first buffer to it in |
|---|
| 230 | -- order to avoid generating a too small chunk. Finally, every next buffer will |
|---|
| 231 | -- be of size @bufSize@. This, slow startup strategy is required to achieve |
|---|
| 232 | -- good speed for short (<200 bytes) resulting bytestrings, as for them the |
|---|
| 233 | -- allocation cost is of a large buffer cannot be compensated. Moreover, this |
|---|
| 234 | -- strategy also allows us to avoid spilling too much memory for short |
|---|
| 235 | -- resulting bytestrings. |
|---|
| 236 | -- |
|---|
| 237 | -- Note that setting @firstBufSize >= minBufSize@ implies that the first buffer |
|---|
| 238 | -- is no longer copied but allocated and filled directly. Hence, setting |
|---|
| 239 | -- @firstBufSize = bufSize@ means that all chunks will use an underlying buffer |
|---|
| 240 | -- of size @bufSize@. This is recommended, if you know that you always output |
|---|
| 241 | -- more than @minBufSize@ bytes. |
|---|
| 242 | toLazyByteStringWith |
|---|
| 243 | :: Int -- ^ Buffer size (upper-bounds the resulting chunk size). |
|---|
| 244 | -> Int -- ^ Minimal free buffer space for continuing filling |
|---|
| 245 | -- the same buffer after a 'flush' or a direct bytestring |
|---|
| 246 | -- insertion. This corresponds to the minimal desired |
|---|
| 247 | -- chunk size. |
|---|
| 248 | -> Int -- ^ Size of the first buffer to be used and copied for |
|---|
| 249 | -- larger resulting sequences |
|---|
| 250 | -> Builder -- ^ Builder to run. |
|---|
| 251 | -> L.ByteString -- ^ Lazy bytestring to output after the builder is |
|---|
| 252 | -- finished. |
|---|
| 253 | -> L.ByteString -- ^ Resulting lazy bytestring |
|---|
| 254 | toLazyByteStringWith bufSize minBufSize firstBufSize (Builder b) k = |
|---|
| 255 | inlinePerformIO $ fillFirstBuffer (b finalStep) |
|---|
| 256 | where |
|---|
| 257 | finalStep pf _ = return $ Done pf |
|---|
| 258 | -- fill a first very small buffer, if we need more space then copy it |
|---|
| 259 | -- to the new buffer of size 'minBufSize'. This way we don't pay the |
|---|
| 260 | -- allocation cost of the big 'bufSize' buffer, when outputting only |
|---|
| 261 | -- small sequences. |
|---|
| 262 | fillFirstBuffer !step0 |
|---|
| 263 | | minBufSize <= firstBufSize = fillNewBuffer firstBufSize step0 |
|---|
| 264 | | otherwise = do |
|---|
| 265 | fpbuf <- S.mallocByteString firstBufSize |
|---|
| 266 | withForeignPtr fpbuf $ \pf -> do |
|---|
| 267 | let !pe = pf `plusPtr` firstBufSize |
|---|
| 268 | mkbs pf' = S.PS fpbuf 0 (pf' `minusPtr` pf) |
|---|
| 269 | {-# INLINE mkbs #-} |
|---|
| 270 | next <- step0 pf pe |
|---|
| 271 | case next of |
|---|
| 272 | Done pf' |
|---|
| 273 | | pf' == pf -> return k |
|---|
| 274 | | otherwise -> return $ L.Chunk (mkbs pf') k |
|---|
| 275 | |
|---|
| 276 | BufferFull newSize pf' nextStep -> do |
|---|
| 277 | let !l = pf' `minusPtr` pf |
|---|
| 278 | fillNewBuffer (max (l + newSize) minBufSize) $ |
|---|
| 279 | \pfNew peNew -> do |
|---|
| 280 | copyBytes pfNew pf l |
|---|
| 281 | nextStep (pfNew `plusPtr` l) peNew |
|---|
| 282 | |
|---|
| 283 | ModifyChunks pf' bsk nextStep |
|---|
| 284 | | pf' == pf -> |
|---|
| 285 | return $ bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep) |
|---|
| 286 | | otherwise -> |
|---|
| 287 | return $ L.Chunk (mkbs pf') |
|---|
| 288 | (bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep)) |
|---|
| 289 | |
|---|
| 290 | -- allocate and fill a new buffer |
|---|
| 291 | fillNewBuffer !size !step0 = do |
|---|
| 292 | fpbuf <- S.mallocByteString size |
|---|
| 293 | withForeignPtr fpbuf $ fillBuffer fpbuf |
|---|
| 294 | where |
|---|
| 295 | fillBuffer fpbuf !pbuf = fill pbuf step0 |
|---|
| 296 | where |
|---|
| 297 | !pe = pbuf `plusPtr` size |
|---|
| 298 | fill !pf !step = do |
|---|
| 299 | next <- step pf pe |
|---|
| 300 | let mkbs pf' = S.PS fpbuf (pf `minusPtr` pbuf) (pf' `minusPtr` pf) |
|---|
| 301 | {-# INLINE mkbs #-} |
|---|
| 302 | case next of |
|---|
| 303 | Done pf' |
|---|
| 304 | | pf' == pf -> return k |
|---|
| 305 | | otherwise -> return $ L.Chunk (mkbs pf') k |
|---|
| 306 | |
|---|
| 307 | BufferFull newSize pf' nextStep -> |
|---|
| 308 | return $ L.Chunk (mkbs pf') |
|---|
| 309 | (inlinePerformIO $ |
|---|
| 310 | fillNewBuffer (max newSize bufSize) nextStep) |
|---|
| 311 | |
|---|
| 312 | ModifyChunks pf' bsk nextStep |
|---|
| 313 | | pf' == pf -> |
|---|
| 314 | return $ bsk (inlinePerformIO $ fill pf' nextStep) |
|---|
| 315 | | minBufSize < pe `minusPtr` pf' -> |
|---|
| 316 | return $ L.Chunk (mkbs pf') |
|---|
| 317 | (bsk (inlinePerformIO $ fill pf' nextStep)) |
|---|
| 318 | | otherwise -> |
|---|
| 319 | return $ L.Chunk (mkbs pf') |
|---|
| 320 | (bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep)) |
|---|
| 321 | |
|---|
| 322 | |
|---|
| 323 | -- | Extract the lazy 'L.ByteString' from the builder by running it with default |
|---|
| 324 | -- buffer sizes. Use this function, if you do not have any special |
|---|
| 325 | -- considerations with respect to buffer sizes. |
|---|
| 326 | -- |
|---|
| 327 | -- @ 'toLazyByteString' b = 'toLazyByteStringWith' 'defaultBufferSize' 'defaultMinimalBufferSize' 'defaultFirstBufferSize' b L.empty@ |
|---|
| 328 | -- |
|---|
| 329 | -- Note that @'toLazyByteString'@ is a 'Monoid' homomorphism. |
|---|
| 330 | -- |
|---|
| 331 | -- > toLazyByteString mempty == mempty |
|---|
| 332 | -- > toLazyByteString (x `mappend` y) == toLazyByteString x `mappend` toLazyByteString y |
|---|
| 333 | -- |
|---|
| 334 | -- However, in the second equation, the left-hand-side is generally faster to |
|---|
| 335 | -- execute. |
|---|
| 336 | -- |
|---|
| 337 | toLazyByteString :: Builder -> L.ByteString |
|---|
| 338 | toLazyByteString b = toLazyByteStringWith |
|---|
| 339 | defaultBufferSize defaultMinimalBufferSize defaultFirstBufferSize b L.empty |
|---|
| 340 | {-# INLINE toLazyByteString #-} |
|---|
| 341 | |
|---|
| 342 | -- | Pack the chunks of a lazy bytestring into a single strict bytestring. |
|---|
| 343 | packChunks :: L.ByteString -> S.ByteString |
|---|
| 344 | packChunks lbs = do |
|---|
| 345 | S.unsafeCreate (fromIntegral $ L.length lbs) (copyChunks lbs) |
|---|
| 346 | where |
|---|
| 347 | copyChunks !L.Empty !_pf = return () |
|---|
| 348 | copyChunks !(L.Chunk (S.PS fpbuf o l) lbs') !pf = do |
|---|
| 349 | withForeignPtr fpbuf $ \pbuf -> |
|---|
| 350 | copyBytes pf (pbuf `plusPtr` o) l |
|---|
| 351 | copyChunks lbs' (pf `plusPtr` l) |
|---|
| 352 | |
|---|
| 353 | -- | Run the builder to construct a strict bytestring containing the sequence |
|---|
| 354 | -- of bytes denoted by the builder. This is done by first serializing to a lazy bytestring and then packing its |
|---|
| 355 | -- chunks to a appropriately sized strict bytestring. |
|---|
| 356 | -- |
|---|
| 357 | -- > toByteString = packChunks . toLazyByteString |
|---|
| 358 | -- |
|---|
| 359 | -- Note that @'toByteString'@ is a 'Monoid' homomorphism. |
|---|
| 360 | -- |
|---|
| 361 | -- > toByteString mempty == mempty |
|---|
| 362 | -- > toByteString (x `mappend` y) == toByteString x `mappend` toByteString y |
|---|
| 363 | -- |
|---|
| 364 | -- However, in the second equation, the left-hand-side is generally faster to |
|---|
| 365 | -- execute. |
|---|
| 366 | -- |
|---|
| 367 | toByteString :: Builder -> S.ByteString |
|---|
| 368 | toByteString = packChunks . toLazyByteString |
|---|
| 369 | |
|---|
| 370 | |
|---|
| 371 | -- | @toByteStringIOWith bufSize io b@ runs the builder @b@ with a buffer of |
|---|
| 372 | -- at least the size @bufSize@ and executes the 'IO' action @io@ whenever the |
|---|
| 373 | -- buffer is full. |
|---|
| 374 | -- |
|---|
| 375 | -- Compared to 'toLazyByteStringWith' this function requires less allocation, |
|---|
| 376 | -- as the output buffer is only allocated once at the start of the |
|---|
| 377 | -- serialization and whenever something bigger than the current buffer size has |
|---|
| 378 | -- to be copied into the buffer, which should happen very seldomly for the |
|---|
| 379 | -- default buffer size of 32kb. Hence, the pressure on the garbage collector is |
|---|
| 380 | -- reduced, which can be an advantage when building long sequences of bytes. |
|---|
| 381 | -- |
|---|
| 382 | toByteStringIOWith :: Int -- ^ Buffer size (upper bounds |
|---|
| 383 | -- the number of bytes forced |
|---|
| 384 | -- per call to the 'IO' action). |
|---|
| 385 | -> (S.ByteString -> IO ()) -- ^ 'IO' action to execute per |
|---|
| 386 | -- full buffer, which is |
|---|
| 387 | -- referenced by a strict |
|---|
| 388 | -- 'S.ByteString'. |
|---|
| 389 | -> Builder -- ^ 'Builder' to run. |
|---|
| 390 | -> IO () -- ^ Resulting 'IO' action. |
|---|
| 391 | toByteStringIOWith bufSize io (Builder b) = |
|---|
| 392 | fillNewBuffer bufSize (b finalStep) |
|---|
| 393 | where |
|---|
| 394 | finalStep pf _ = return $ Done pf |
|---|
| 395 | |
|---|
| 396 | fillNewBuffer !size !step0 = do |
|---|
| 397 | S.mallocByteString size >>= fillBuffer |
|---|
| 398 | where |
|---|
| 399 | fillBuffer fpbuf = fill step0 |
|---|
| 400 | where |
|---|
| 401 | -- safe because the constructed ByteString references the foreign |
|---|
| 402 | -- pointer AFTER its buffer was filled. |
|---|
| 403 | pf = unsafeForeignPtrToPtr fpbuf |
|---|
| 404 | fill !step = do |
|---|
| 405 | next <- step pf (pf `plusPtr` size) |
|---|
| 406 | case next of |
|---|
| 407 | Done pf' -> |
|---|
| 408 | unless (pf' == pf) (io $ S.PS fpbuf 0 (pf' `minusPtr` pf)) |
|---|
| 409 | |
|---|
| 410 | BufferFull newSize pf' nextStep -> do |
|---|
| 411 | io $ S.PS fpbuf 0 (pf' `minusPtr` pf) |
|---|
| 412 | if bufSize < newSize |
|---|
| 413 | then fillNewBuffer newSize nextStep |
|---|
| 414 | else fill nextStep |
|---|
| 415 | |
|---|
| 416 | ModifyChunks pf' bsk nextStep -> do |
|---|
| 417 | unless (pf' == pf) (io $ S.PS fpbuf 0 (pf' `minusPtr` pf)) |
|---|
| 418 | -- was: mapM_ io $ L.toChunks (bsk L.empty) |
|---|
| 419 | L.foldrChunks (\bs -> (io bs >>)) (return ()) (bsk L.empty) |
|---|
| 420 | fill nextStep |
|---|
| 421 | |
|---|
| 422 | -- | Run the builder with a 'defaultBufferSize'd buffer and execute the given |
|---|
| 423 | -- 'IO' action whenever the buffer is full or gets flushed. |
|---|
| 424 | -- |
|---|
| 425 | -- @ 'toByteStringIO' = 'toByteStringIOWith' 'defaultBufferSize'@ |
|---|
| 426 | -- |
|---|
| 427 | -- This is a 'Monoid' homomorphism in the following sense. |
|---|
| 428 | -- |
|---|
| 429 | -- > toByteStringIO io mempty == return () |
|---|
| 430 | -- > toByteStringIO io (x `mappend` y) == toByteStringIO io x >> toByteStringIO io y |
|---|
| 431 | -- |
|---|
| 432 | toByteStringIO :: (S.ByteString -> IO ()) -> Builder -> IO () |
|---|
| 433 | toByteStringIO = toByteStringIOWith defaultBufferSize |
|---|
| 434 | {-# INLINE toByteStringIO #-} |
|---|