----------------------------------------------------------------------------- -- | -- Module: Data.Enumerator.Binary -- Copyright: 2010 John Millikin -- License: MIT -- -- Maintainer: jmillikin@gmail.com -- Portability: portable -- -- Byte-oriented alternatives to "Data.Enumerator.List". Note that the -- enumeratees in this module must unpack their inputs to work properly. If -- you do not need to handle leftover input on a byte-by-byte basis, the -- chunk-oriented versions will be much faster. -- -- This module is intended to be imported qualified: -- -- @ -- import qualified Data.Enumerator.Binary as EB -- @ -- -- Since: 0.4.5 -- ----------------------------------------------------------------------------- module Data.Enumerator.Binary ( -- * IO enumHandle , enumHandleRange , enumFile , enumFileRange , iterHandle -- * List analogues -- ** Folds , fold , foldM -- ** Maps , Data.Enumerator.Binary.map , Data.Enumerator.Binary.mapM , Data.Enumerator.Binary.mapM_ , Data.Enumerator.Binary.concatMap , concatMapM -- ** Accumulating maps , mapAccum , mapAccumM , concatMapAccum , concatMapAccumM -- ** Infinite streams , Data.Enumerator.Binary.iterate , iterateM , Data.Enumerator.Binary.repeat , repeatM -- ** Bounded streams , Data.Enumerator.Binary.replicate , replicateM , generateM , unfold , unfoldM -- ** Filters , Data.Enumerator.Binary.filter , filterM -- ** Consumers , Data.Enumerator.Binary.take , takeWhile , consume -- ** Unsorted , Data.Enumerator.Binary.head , Data.Enumerator.Binary.drop , Data.Enumerator.Binary.dropWhile , require , isolate , splitWhen ) where import Prelude hiding (head, drop, takeWhile, mapM_) import Data.Enumerator hiding ( head, drop, iterateM, repeatM, replicateM , generateM, filterM, consume, foldM , concatMapM) import Control.Monad.IO.Class (MonadIO) import qualified Data.ByteString as B import qualified System.IO as IO import qualified Control.Exception as Exc import System.IO.Error (isEOFError) import Data.Word (Word8) import qualified Data.Enumerator.List as EL import qualified Control.Monad as CM import qualified Data.ByteString.Lazy as BL import Control.Monad.Trans.Class (lift) import Control.Monad (liftM) -- | Consume the entire input stream with a strict left fold, one byte -- at a time. -- -- Since: 0.4.8 fold :: Monad m => (b -> Word8 -> b) -> b -> Iteratee B.ByteString m b fold step = EL.fold (B.foldl' step) -- | Consume the entire input stream with a strict monadic left fold, one -- byte at a time. -- -- Since: 0.4.8 foldM :: Monad m => (b -> Word8 -> m b) -> b -> Iteratee B.ByteString m b foldM step = EL.foldM (\b bytes -> CM.foldM step b (B.unpack bytes)) -- | Enumerates a stream of bytes by repeatedly applying a function to -- some state. -- -- Similar to 'iterate'. -- -- Since: 0.4.8 unfold :: Monad m => (s -> Maybe (Word8, s)) -> s -> Enumerator B.ByteString m b unfold f = checkContinue1 $ \loop s k -> case f s of Nothing -> continue k Just (b, s') -> k (Chunks [B.singleton b]) >>== loop s' -- | Enumerates a stream of bytes by repeatedly applying a computation to -- some state. -- -- Similar to 'iterateM'. -- -- Since: 0.4.8 unfoldM :: Monad m => (s -> m (Maybe (Word8, s))) -> s -> Enumerator B.ByteString m b unfoldM f = checkContinue1 $ \loop s k -> do fs <- lift (f s) case fs of Nothing -> continue k Just (b, s') -> k (Chunks [B.singleton b]) >>== loop s' -- | @'map' f@ applies /f/ to each input byte and feeds the -- resulting outputs to the inner iteratee. -- -- Since: 0.4.8 map :: Monad m => (Word8 -> Word8) -> Enumeratee B.ByteString B.ByteString m b map f = Data.Enumerator.Binary.concatMap (\x -> B.singleton (f x)) -- | @'mapM' f@ applies /f/ to each input byte and feeds the -- resulting outputs to the inner iteratee. -- -- Since: 0.4.8 mapM :: Monad m => (Word8 -> m Word8) -> Enumeratee B.ByteString B.ByteString m b mapM f = Data.Enumerator.Binary.concatMapM (\x -> liftM B.singleton (f x)) -- | @'mapM_' f@ applies /f/ to each input byte, and discards the results. -- -- Since: 0.4.11 mapM_ :: Monad m => (Word8 -> m ()) -> Iteratee B.ByteString m () mapM_ f = foldM (\_ x -> f x >> return ()) () -- | @'concatMap' f@ applies /f/ to each input byte and feeds the -- resulting outputs to the inner iteratee. -- -- Since: 0.4.8 concatMap :: Monad m => (Word8 -> B.ByteString) -> Enumeratee B.ByteString B.ByteString m b concatMap f = Data.Enumerator.Binary.concatMapM (return . f) -- | @'concatMapM' f@ applies /f/ to each input byte and feeds the -- resulting outputs to the inner iteratee. -- -- Since: 0.4.8 concatMapM :: Monad m => (Word8 -> m B.ByteString) -> Enumeratee B.ByteString B.ByteString m b concatMapM f = checkDone (continue . step) where step k EOF = yield (Continue k) EOF step k (Chunks xs) = loop k (BL.unpack (BL.fromChunks xs)) loop k [] = continue (step k) loop k (x:xs) = do fx <- lift (f x) k (Chunks [fx]) >>== checkDoneEx (Chunks [B.pack xs]) (\k' -> loop k' xs) -- | Similar to 'concatMap', but with a stateful step function. -- -- Since: 0.4.11 concatMapAccum :: Monad m => (s -> Word8 -> (s, B.ByteString)) -> s -> Enumeratee B.ByteString B.ByteString m b concatMapAccum f s0 = checkDone (continue . step s0) where step _ k EOF = yield (Continue k) EOF step s k (Chunks xs) = loop s k xs loop s k [] = continue (step s k) loop s k (x:xs) = case B.uncons x of Nothing -> loop s k xs Just (b, x') -> case f s b of (s', ai) -> k (Chunks [ai]) >>== checkDoneEx (Chunks (x':xs)) (\k' -> loop s' k' (x':xs)) -- | Similar to 'concatMapM', but with a stateful step function. -- -- Since: 0.4.11 concatMapAccumM :: Monad m => (s -> Word8 -> m (s, B.ByteString)) -> s -> Enumeratee B.ByteString B.ByteString m b concatMapAccumM f s0 = checkDone (continue . step s0) where step _ k EOF = yield (Continue k) EOF step s k (Chunks xs) = loop s k xs loop s k [] = continue (step s k) loop s k (x:xs) = case B.uncons x of Nothing -> loop s k xs Just (b, x') -> do (s', ai) <- lift (f s b) k (Chunks [ai]) >>== checkDoneEx (Chunks (x':xs)) (\k' -> loop s' k' (x':xs)) -- | Similar to 'map', but with a stateful step function. -- -- Since: 0.4.9 mapAccum :: Monad m => (s -> Word8 -> (s, Word8)) -> s -> Enumeratee B.ByteString B.ByteString m b mapAccum f = concatMapAccum (\s w -> case f s w of (s', w') -> (s', B.singleton w')) -- | Similar to 'mapM', but with a stateful step function. -- -- Since: 0.4.9 mapAccumM :: Monad m => (s -> Word8 -> m (s, Word8)) -> s -> Enumeratee B.ByteString B.ByteString m b mapAccumM f = concatMapAccumM (\s w -> do (s', w') <- f s w return (s', B.singleton w')) -- | @'iterate' f x@ enumerates an infinite stream of repeated applications -- of /f/ to /x/. -- -- Analogous to 'Prelude.iterate'. -- -- Since: 0.4.8 iterate :: Monad m => (Word8 -> Word8) -> Word8 -> Enumerator B.ByteString m b iterate f = checkContinue1 $ \loop s k -> k (Chunks [B.singleton s]) >>== loop (f s) -- | Similar to 'iterate', except the iteration function is monadic. -- -- Since: 0.4.8 iterateM :: Monad m => (Word8 -> m Word8) -> Word8 -> Enumerator B.ByteString m b iterateM f base = worker (return base) where worker = checkContinue1 $ \loop m_byte k -> do byte <- lift m_byte k (Chunks [B.singleton byte]) >>== loop (f byte) -- | Enumerates an infinite stream of a single byte. -- -- Analogous to 'Prelude.repeat'. -- -- Since: 0.4.8 repeat :: Monad m => Word8 -> Enumerator B.ByteString m b repeat byte = EL.repeat (B.singleton byte) -- | Enumerates an infinite stream of byte. Each byte is computed by the -- underlying monad. -- -- Since: 0.4.8 repeatM :: Monad m => m Word8 -> Enumerator B.ByteString m b repeatM next = EL.repeatM (liftM B.singleton next) -- | @'replicate' n x@ enumerates a stream containing /n/ copies of /x/. -- -- Since: 0.4.8 replicate :: Monad m => Integer -> Word8 -> Enumerator B.ByteString m b replicate n byte = EL.replicate n (B.singleton byte) -- | @'replicateM' n m_x@ enumerates a stream of /n/ bytes, with each byte -- computed by /m_x/. -- -- Since: 0.4.8 replicateM :: Monad m => Integer -> m Word8 -> Enumerator B.ByteString m b replicateM n next = EL.replicateM n (liftM B.singleton next) -- | Like 'repeatM', except the computation may terminate the stream by -- returning 'Nothing'. -- -- Since: 0.4.8 generateM :: Monad m => m (Maybe Word8) -> Enumerator B.ByteString m b generateM next = EL.generateM (liftM (liftM B.singleton) next) -- | Applies a predicate to the stream. The inner iteratee only receives -- characters for which the predicate is @True@. -- -- Since: 0.4.8 filter :: Monad m => (Word8 -> Bool) -> Enumeratee B.ByteString B.ByteString m b filter p = Data.Enumerator.Binary.concatMap (\x -> B.pack [x | p x]) -- | Applies a monadic predicate to the stream. The inner iteratee only -- receives bytes for which the predicate returns @True@. -- -- Since: 0.4.8 filterM :: Monad m => (Word8 -> m Bool) -> Enumeratee B.ByteString B.ByteString m b filterM p = Data.Enumerator.Binary.concatMapM (\x -> liftM B.pack (CM.filterM p [x])) -- | @'take' n@ extracts the next /n/ bytes from the stream, as a lazy -- ByteString. -- -- Since: 0.4.5 take :: Monad m => Integer -> Iteratee B.ByteString m BL.ByteString take n | n <= 0 = return BL.empty take n = continue (loop id n) where loop acc n' (Chunks xs) = iter where lazy = BL.fromChunks xs len = toInteger (BL.length lazy) iter = if len < n' then continue (loop (acc . (BL.append lazy)) (n' - len)) else let (xs', extra) = BL.splitAt (fromInteger n') lazy in yield (acc xs') (toChunks extra) loop acc _ EOF = yield (acc BL.empty) EOF -- | @'takeWhile' p@ extracts input from the stream until the first byte which -- does not match the predicate. -- -- Since: 0.4.5 takeWhile :: Monad m => (Word8 -> Bool) -> Iteratee B.ByteString m BL.ByteString takeWhile p = continue (loop id) where loop acc (Chunks []) = continue (loop acc) loop acc (Chunks xs) = iter where lazy = BL.fromChunks xs (xs', extra) = BL.span p lazy iter = if BL.null extra then continue (loop (acc . (BL.append lazy))) else yield (acc xs') (toChunks extra) loop acc EOF = yield (acc BL.empty) EOF -- | @'consume' = 'takeWhile' (const True)@ -- -- Since: 0.4.5 consume :: Monad m => Iteratee B.ByteString m BL.ByteString consume = continue (loop id) where loop acc (Chunks []) = continue (loop acc) loop acc (Chunks xs) = iter where lazy = BL.fromChunks xs iter = continue (loop (acc . (BL.append lazy))) loop acc EOF = yield (acc BL.empty) EOF -- | Get the next byte from the stream, or 'Nothing' if the stream has -- ended. -- -- Since: 0.4.5 head :: Monad m => Iteratee B.ByteString m (Maybe Word8) head = continue loop where loop (Chunks xs) = case BL.uncons (BL.fromChunks xs) of Just (char, extra) -> yield (Just char) (toChunks extra) Nothing -> head loop EOF = yield Nothing EOF -- | @'drop' n@ ignores /n/ bytes of input from the stream. -- -- Since: 0.4.5 drop :: Monad m => Integer -> Iteratee B.ByteString m () drop n | n <= 0 = return () drop n = continue (loop n) where loop n' (Chunks xs) = iter where lazy = BL.fromChunks xs len = toInteger (BL.length lazy) iter = if len < n' then drop (n' - len) else yield () (toChunks (BL.drop (fromInteger n') lazy)) loop _ EOF = yield () EOF -- | @'dropWhile' p@ ignores input from the stream until the first byte -- which does not match the predicate. -- -- Since: 0.4.5 dropWhile :: Monad m => (Word8 -> Bool) -> Iteratee B.ByteString m () dropWhile p = continue loop where loop (Chunks xs) = iter where lazy = BL.dropWhile p (BL.fromChunks xs) iter = if BL.null lazy then continue loop else yield () (toChunks lazy) loop EOF = yield () EOF -- | @'require' n@ buffers input until at least /n/ bytes are available, or -- throws an error if the stream ends early. -- -- Since: 0.4.5 require :: Monad m => Integer -> Iteratee B.ByteString m () require n | n <= 0 = return () require n = continue (loop id n) where loop acc n' (Chunks xs) = iter where lazy = BL.fromChunks xs len = toInteger (BL.length lazy) iter = if len < n' then continue (loop (acc . (BL.append lazy)) (n' - len)) else yield () (toChunks (acc lazy)) loop _ _ EOF = throwError (Exc.ErrorCall "require: Unexpected EOF") -- | @'isolate' n@ reads at most /n/ bytes from the stream, and passes them -- to its iteratee. If the iteratee finishes early, bytes continue to be -- consumed from the outer stream until /n/ have been consumed. -- -- Since: 0.4.5 isolate :: Monad m => Integer -> Enumeratee B.ByteString B.ByteString m b isolate n step | n <= 0 = return step isolate n (Continue k) = continue loop where loop (Chunks []) = continue loop loop (Chunks xs) = iter where lazy = BL.fromChunks xs len = toInteger (BL.length lazy) iter = if len <= n then k (Chunks xs) >>== isolate (n - len) else let (s1, s2) = BL.splitAt (fromInteger n) lazy in k (toChunks s1) >>== (\step -> yield step (toChunks s2)) loop EOF = k EOF >>== (\step -> yield step EOF) isolate n step = drop n >> return step -- | Split on bytes satisfying a given predicate. -- -- Since: 0.4.8 splitWhen :: Monad m => (Word8 -> Bool) -> Enumeratee B.ByteString B.ByteString m b splitWhen p = loop where loop = checkDone step step k = isEOF >>= \eof -> if eof then yield (Continue k) EOF else do lazy <- takeWhile (not . p) let bytes = B.concat (BL.toChunks lazy) eof <- isEOF drop 1 if BL.null lazy && eof then yield (Continue k) EOF else k (Chunks [bytes]) >>== loop -- | Read bytes (in chunks of the given buffer size) from the handle, and -- stream them to an 'Iteratee'. If an exception occurs during file IO, -- enumeration will stop and 'Error' will be returned. Exceptions from the -- iteratee are not caught. -- -- This enumerator blocks until at least one byte is available from the -- handle, and might read less than the maximum buffer size in some -- cases. -- -- The handle should be opened with no encoding, and in 'IO.ReadMode' or -- 'IO.ReadWriteMode'. -- -- Since: 0.4.5 enumHandle :: MonadIO m => Integer -- ^ Buffer size -> IO.Handle -> Enumerator B.ByteString m b enumHandle bufferSize h = checkContinue0 $ \loop k -> do let intSize = fromInteger bufferSize bytes <- tryIO (getBytes h intSize) if B.null bytes then continue k else k (Chunks [bytes]) >>== loop -- | Read bytes (in chunks of the given buffer size) from the handle, and -- stream them to an 'Iteratee'. If an exception occurs during file IO, -- enumeration will stop and 'Error' will be returned. Exceptions from the -- iteratee are not caught. -- -- This enumerator blocks until at least one byte is available from the -- handle, and might read less than the maximum buffer size in some -- cases. -- -- The handle should be opened with no encoding, and in 'IO.ReadMode' or -- 'IO.ReadWriteMode'. -- -- If an offset is specified, the handle will be seeked to that offset -- before reading. If the handle cannot be seeked, an error will be -- thrown. -- -- If a maximum count is specified, the number of bytes read will not -- exceed that count. -- -- Since: 0.4.8 enumHandleRange :: MonadIO m => Integer -- ^ Buffer size -> Maybe Integer -- ^ Offset -> Maybe Integer -- ^ Maximum count -> IO.Handle -> Enumerator B.ByteString m b enumHandleRange bufferSize offset count h s = seek >> enum where seek = case offset of Nothing -> return () Just off -> tryIO (IO.hSeek h IO.AbsoluteSeek off) enum = case count of Just n -> enumRange n s Nothing -> enumHandle bufferSize h s enumRange = checkContinue1 $ \loop n k -> let rem = fromInteger (min bufferSize n) keepGoing = do bytes <- tryIO (getBytes h rem) if B.null bytes then continue k else feed bytes feed bs = k (Chunks [bs]) >>== loop (n - (toInteger (B.length bs))) in if rem <= 0 then continue k else keepGoing getBytes :: IO.Handle -> Int -> IO B.ByteString getBytes h n = do hasInput <- Exc.catch (IO.hWaitForInput h (-1)) (\err -> if isEOFError err then return False else Exc.throwIO err) if hasInput then B.hGetNonBlocking h n else return B.empty -- | Opens a file path in binary mode, and passes the handle to -- 'enumHandle'. The file will be closed when enumeration finishes. -- -- Since: 0.4.5 enumFile :: FilePath -> Enumerator B.ByteString IO b enumFile path = enumFileRange path Nothing Nothing -- | Opens a file path in binary mode, and passes the handle to -- 'enumHandleRange'. The file will be closed when enumeration finishes. -- -- Since: 0.4.8 enumFileRange :: FilePath -> Maybe Integer -- ^ Offset -> Maybe Integer -- ^ Maximum count -> Enumerator B.ByteString IO b enumFileRange path offset count step = do h <- tryIO (IO.openBinaryFile path IO.ReadMode) let iter = enumHandleRange 4096 offset count h step Iteratee (Exc.finally (runIteratee iter) (IO.hClose h)) -- | Read bytes from a stream and write them to a handle. If an exception -- occurs during file IO, enumeration will stop and 'Error' will be -- returned. -- -- The handle should be opened with no encoding, and in 'IO.WriteMode' or -- 'IO.ReadWriteMode'. -- -- Since: 0.4.5 iterHandle :: MonadIO m => IO.Handle -> Iteratee B.ByteString m () iterHandle h = continue step where step EOF = yield () EOF step (Chunks []) = continue step step (Chunks bytes) = do tryIO (CM.mapM_ (B.hPut h) bytes) continue step toChunks :: BL.ByteString -> Stream B.ByteString toChunks = Chunks . BL.toChunks