-- |Monadic and General Iteratees: -- incremental input parsers, processors and transformers module Data.Iteratee.Base ( -- * Types StreamG (..), IterateeG (..), IterateeGM (..), EnumeratorN, EnumeratorGM, EnumeratorGMM, -- * Iteratees -- ** Iteratee Combinators liftI, (>>==), (==<<), joinI, stream2list, -- ** Error handling iterErr, iterReportError, -- ** Basic Iteratees break, dropWhile, drop, head, peek, skipToEof, seek, -- ** Advanced iteratee combinators take, takeR, mapStream, convStream, -- * Enumerators enumEof, enumErr, (>.), enumPure1Chunk, enumPureNChunk, -- * Misc. FileOffset, bindm ) where import Prelude hiding (head, drop, dropWhile, take, break) import qualified Prelude as P import qualified Data.Iteratee.Base.StreamChunk as SC import Data.Iteratee.IO.Base import Control.Monad.Trans import Control.Monad.Identity import System.IO -- |A useful combinator. bindm :: Monad m => m (Maybe a) -> (a -> m (Maybe b)) -> m (Maybe b) bindm m f = m >>= maybe (return Nothing) f -- |A stream is a (continuing) sequence of elements bundled in Chunks. -- The first two variants indicate termination of the stream. -- Chunk a gives the currently available part of the stream. -- The stream is not terminated yet. -- The case (null Chunk) signifies a stream with no currently available -- data but which is still continuing. A stream processor should, -- informally speaking, ``suspend itself'' and wait for more data -- to arrive. data (SC.StreamChunk c el) => StreamG c el = EOF | Error String | Chunk (c el) -- |Iteratee -- a generic stream processor, what is being folded over -- a stream -- When Iteratee is in the 'done' state, it contains the computed -- result and the remaining part of the stream. -- In the 'cont' state, the iteratee has not finished the computation -- and needs more input. -- We assume that all iteratees are `good' -- given bounded input, -- they do the bounded amount of computation and take the bounded amount -- of resources. The monad m describes the sort of computations done -- by the iteratee as it processes the stream. The monad m could be -- the identity monad (for pure computations) or the IO monad -- (to let the iteratee store the stream processing results as they -- are computed). -- We also assume that given a terminated stream, an iteratee -- moves to the done state, so the results computed so far could be returned. data IterateeG s el m a = Done a (StreamG s el) | Cont (StreamG s el -> IterateeGM s el m a) | Seek FileOffset (StreamG s el -> IterateeGM s el m a) instance (Show a) => Show (IterateeG s el m a) where show (Done a _) = "Iteratee done: " ++ P.show a show (Cont _k) = "Iteratee: incomplete" show (Seek f _k) = "Iteratee: seek to " ++ P.show f ++ "requested" newtype IterateeGM s el m a = IM {unIM :: m (IterateeG s el m a)} -- Useful combinators for implementing iteratees and enumerators -- |Lift an 'IterateeG' into an 'IterateeGM'. liftI :: Monad m => IterateeG s el m a -> IterateeGM s el m a liftI = IM . return {-# INLINE liftI #-} -- |Just like bind (at run-time, this is indeed exactly bind) infixl 1 >>== (>>==) :: Monad m => IterateeGM s el m a -> (IterateeG s el m a -> IterateeGM s' el' m b) -> IterateeGM s' el' m b m >>== f = IM (unIM m >>= unIM . f) {-# INLINE (>>==) #-} -- |Just like an application -- a call-by-value-like application infixr 1 ==<< (==<<) :: Monad m => (IterateeG s el m a -> IterateeGM s' el' m b) -> IterateeGM s el m a -> IterateeGM s' el' m b (==<<) = flip (>>==) -- |The following is a `variant' of join in the IterateeGM s el m monad -- When el' is the same as el, the type of joinI is indeed that of -- true monadic join. However, joinI is subtly different: since -- generally el' is different from el, it makes no sense to -- continue using the internal, IterateeG el' m a: we no longer -- have elements of the type el' to feed to that iteratee. -- We thus send EOF to the internal Iteratee and propagate its result. -- This join function is useful when dealing with `derived iteratees' -- for embedded/nested streams. In particular, joinI is useful to -- process the result of take, mapStream, or convStream below. joinI :: (SC.StreamChunk s el, SC.StreamChunk s' el', Monad m) => IterateeGM s el m (IterateeG s' el' m a) -> IterateeGM s el m a joinI m = m >>= (\iter -> enumEof iter >>== check) where check (Done x (Error str)) = liftI $ Done x (Error str) check (Done x _) = liftI $ Done x EOF check (Cont _) = error "joinI: can't happen: EOF didn't terminate" check (Seek _ _) = error "joinI: can't happen: EOF didn't terminate" -- It turns out, IterateeGM form a monad. We can use the familiar do -- notation for composing Iteratees instance (SC.StreamChunk s el, Monad m) => Monad (IterateeGM s el m) where return x = liftI $ Done x (Chunk SC.empty) m >>= f = iter_bind m f iter_bind :: (SC.StreamChunk s el, Monad m ) => IterateeGM s el m a -> (a -> IterateeGM s el m b) -> IterateeGM s el m b iter_bind m f = m >>== docase where docase (Done a (Chunk vec)) | SC.null vec = f a docase (Done a stream) = f a >>== (\r -> case r of Done x _ -> liftI $ Done x stream Cont k -> k stream iter -> liftI iter) docase (Cont k) = liftI $ Cont ((>>= f) . k) docase (Seek off k) = liftI $ Seek off ((>>= f) . k) {-# SPECIALIZE iter_bind :: SC.StreamChunk s el => IterateeGM s el IO a -> (a -> IterateeGM s el IO b) -> IterateeGM s el IO b #-} instance (Monad m, Functor m) => Functor (IterateeGM s el m) where fmap f m = m >>== docase where docase (Done a stream) = liftI $ Done (f a) stream docase (Cont k) = liftI $ Cont (fmap f . k) docase (Seek off k) = liftI $ Seek off (fmap f . k) instance (SC.StreamChunk s el) => MonadTrans (IterateeGM s el) where lift m = IM (m >>= unIM . return) instance (SC.StreamChunk s el, MonadIO m) => MonadIO (IterateeGM s el m) where liftIO = lift . liftIO -- ------------------------------------------------------------------------ -- Primitive iteratees -- |Read a stream to the end and return all of its elements as a list stream2list :: (SC.StreamChunk s el, Monad m) => IterateeGM s el m [el] stream2list = liftI $ Cont (step SC.empty) where step acc (Chunk ls) | SC.null ls = liftI $ Cont (step acc) | otherwise = liftI $ Cont (step $ acc `SC.append` ls) step acc stream = liftI $ Done (SC.toList acc) stream -- |Report and propagate an error. Disregard the input first and then -- propagate the error. iterErr :: (SC.StreamChunk s el, Monad m) => String -> IterateeGM s el m () iterErr err = liftI $ Cont step where step _ = liftI $ Done () (Error err) -- |Check to see if the stream is in error iterReportError :: (SC.StreamChunk s el, Monad m) => IterateeGM s el m (Maybe String) iterReportError = liftI $ Cont step where step s@(Error str) = liftI $ Done (Just str) s step s = liftI $ Done Nothing s -- ------------------------------------------------------------------------ -- Parser combinators -- |The analogue of List.break -- It takes an element predicate and returns a pair: -- (str, Just c) -- the element 'c' is the first element of the stream -- satisfying the break predicate; -- The chunk str is the prefix of the stream up -- to but including 'c' -- (str,Nothing) -- The stream is terminated with EOF or error before -- any element satisfying the break predicate was found. -- str is the scanned part of the stream. -- None of the element in str satisfy the break predicate. break :: (SC.StreamChunk s el, Monad m) => (el -> Bool) -> IterateeGM s el m (s el, Maybe el) break cpred = liftI $ Cont (liftI . step SC.empty) where step before (Chunk str) | SC.null str = Cont (liftI . step before) | otherwise = case SC.findIndex cpred str of Nothing -> Cont (liftI . step (before `SC.append` str)) Just ix -> let (str', tail') = SC.splitAt ix str in done (before `SC.append` str') (Just $ SC.head tail') (Chunk $ SC.tail tail') step before stream = done before Nothing stream done line' char = Done (line', char) -- |A particular optimized case of 'drop': skip all elements of the stream -- satisfying the given predicate - until the first element -- that does not satisfy the predicate, or the end of the stream. -- This is the analogue of List.dropWhile dropWhile :: (SC.StreamChunk s el, Monad m) => (el -> Bool) -> IterateeGM s el m () dropWhile cpred = liftI $ Cont step where step (Chunk str) | SC.null str = dropWhile cpred | otherwise = let remm = SC.dropWhile cpred str in case SC.null remm of True -> dropWhile cpred False -> liftI $ Done () (Chunk remm) step stream = liftI $ Done () stream -- |Attempt to read the next element of the stream -- Return (Just c) if successful, return Nothing if the stream is -- terminated (by EOF or an error) head :: (SC.StreamChunk s el, Monad m) => IterateeGM s el m (Maybe el) head = liftI $ Cont step where step (Chunk vec) | SC.null vec = head | otherwise = liftI $ Done (Just $ SC.head vec) (Chunk $ SC.tail vec) step stream = liftI $ Done Nothing stream -- |Look ahead at the next element of the stream, without removing -- it from the stream. -- Return (Just c) if successful, return Nothing if the stream is -- terminated (by EOF or an error) peek :: (SC.StreamChunk s el, Monad m) => IterateeGM s el m (Maybe el) peek = liftI $ Cont step where step s@(Chunk vec) | SC.null vec = peek | otherwise = liftI $ Done (Just $ SC.head vec) s step stream = liftI $ Done Nothing stream -- |Skip the rest of the stream skipToEof :: (SC.StreamChunk s el, Monad m) => IterateeGM s el m () skipToEof = liftI $ Cont step where step (Chunk _) = skipToEof step _ = return () -- |Skip n elements of the stream, if there are that many -- This is the analogue of List.drop drop :: (SC.StreamChunk s el, Monad m) => Int -> IterateeGM s el m () drop 0 = return () drop n = liftI $ Cont step where step (Chunk str) | SC.length str <= n = drop (n - SC.length str) step (Chunk str) = liftI $ Done () (Chunk s2) where (_s1,s2) = SC.splitAt n str step stream = liftI $ Done () stream -- |Create a request to seek within an input stream. This will result in -- an error if the enumerator is not capable of responding to a seek request. seek :: (SC.StreamChunk s el, Monad m) => FileOffset -> IterateeGM s el m () seek off = liftI (Seek off step) where step = liftI . Done () -- --------------------------------------------------- -- The converters show a different way of composing two iteratees: -- `vertical' rather than `horizontal' -- |The type of the converter from the stream with elements el_outer -- to the stream with element el_inner. The result is the iteratee -- for the outer stream that uses an `IterateeG el_inner m a' -- to process the embedded, inner stream as it reads the outer stream. type EnumeratorN s_outer el_outer s_inner el_inner m a = IterateeG s_inner el_inner m a -> IterateeGM s_outer el_outer m (IterateeG s_inner el_inner m a) -- |Read n elements from a stream and apply the given iteratee to the -- stream of the read elements. Unless the stream is terminated early, we -- read exactly n elements (even if the iteratee has accepted fewer). take :: (SC.StreamChunk s el, Monad m) => Int -> EnumeratorN s el s el m a take 0 iter = return iter take n iter@Done{} = drop n >> return iter take n (Seek _off k) = liftI $ Cont step where step chunk@(Chunk str) | SC.null str = liftI $ Cont step | SC.length str <= n = take (n - SC.length str) ==<< k chunk step (Chunk str) = done (Chunk s1) (Chunk s2) where (s1,s2) = SC.splitAt n str step stream = done stream stream done s1 s2 = k s1 >>== \r -> liftI $ Done r s2 take n (Cont k) = liftI $ Cont step where step chunk@(Chunk str) | SC.null str = liftI $ Cont step | SC.length str <= n = take (n - SC.length str) ==<< k chunk step (Chunk str) = done (Chunk s1) (Chunk s2) where (s1,s2) = SC.splitAt n str step stream = done stream stream done s1 s2 = k s1 >>== \r -> liftI $ Done r s2 -- |Read n elements from a stream and apply the given iteratee to the -- stream of the read elements. If the given iteratee accepted fewer -- elements, we stop. -- This is the variation of `take' with the early termination -- of processing of the outer stream once the processing of the inner stream -- finished early. This variation is particularly useful for randomIO, -- where we do not have to care to `drain the input stream'. takeR :: (SC.StreamChunk s el, Monad m) => Int -> EnumeratorN s el s el m a takeR 0 iter = return iter takeR _n iter@Done{} = return iter takeR _n iter@Seek{} = return iter takeR n (Cont k) = liftI $ Cont step where step chunk@(Chunk str) | SC.null str = liftI $ Cont step | SC.length str <= n = takeR (n - SC.length str) ==<< k chunk step (Chunk str) = done (Chunk s1) (Chunk s2) where (s1,s2) = SC.splitAt n str step stream = done stream stream done s1 s2 = k s1 >>== \r -> liftI $ Done r s2 -- |Map the stream: yet another iteratee transformer -- Given the stream of elements of the type el and the function el->el', -- build a nested stream of elements of the type el' and apply the -- given iteratee to it. -- Note the contravariance mapStream :: (SC.StreamChunk s el, SC.StreamChunk s el', Monad m) => (el -> el') -> EnumeratorN s el s el' m a mapStream _f iter@Done{} = return iter mapStream f (Cont k) = liftI $ Cont step where step (Chunk str) | SC.null str = liftI $ Cont step step (Chunk str) = k (Chunk (SC.cMap f str)) >>== mapStream f step EOF = k EOF >>== \r -> liftI $ Done r EOF step (Error err) = k (Error err) >>== \r -> liftI $ Done r (Error err) mapStream f (Seek off k) = liftI $ Seek off step where step (Chunk str) | SC.null str = liftI $ Cont step step (Chunk str) = k (Chunk (SC.cMap f str)) >>== mapStream f step EOF = k EOF >>== \r -> liftI $ Done r EOF step (Error err) = k (Error err) >>== \r -> liftI $ Done r (Error err) -- |Convert one stream into another, not necessarily in `lockstep' -- The transformer mapStream maps one element of the outer stream -- to one element of the nested stream. The transformer below is more -- general: it may take several elements of the outer stream to produce -- one element of the inner stream, or the other way around. -- The transformation from one stream to the other is specified as -- IterateeGM s el m (Maybe [el']). The `Maybe' type reflects the -- possibility of the conversion error. convStream :: (SC.StreamChunk s el, SC.StreamChunk s' el', Monad m) => IterateeGM s el m (Maybe (s' el')) -> EnumeratorN s el s' el' m a convStream _fi iter@Done{} = return iter convStream fi (Cont k) = fi >>= (convStream fi ==<<) . k . maybe (Error "conv: stream error") Chunk convStream fi (Seek _off k) = fi >>= (convStream fi ==<<) . k . maybe (Error "conv: stream error") Chunk {-# SPECIALIZE convStream :: (SC.StreamChunk s el, SC.StreamChunk s' el') => IterateeGM s el IO (Maybe (s' el')) -> EnumeratorN s el s' el' IO a #-} -- ------------------------------------------------------------------------ -- Enumerators -- |Each enumerator takes an iteratee and returns an iteratee -- an Enumerator is an iteratee transformer. -- The enumerator normally stops when the stream is terminated -- or when the iteratee moves to the done state, whichever comes first. -- When to stop is of course up to the enumerator... -- We have two choices of composition: compose iteratees or compose -- enumerators. The latter is useful when one iteratee -- reads from the concatenation of two data sources. type EnumeratorGM s el m a = IterateeG s el m a -> IterateeGM s el m a -- |More general enumerator type: enumerator that maps -- streams (not necessarily in lock-step). This is -- a flattened (`joinI-ed') EnumeratorN sfrom elfrom sto elto m a type EnumeratorGMM sfrom elfrom sto elto m a = IterateeG sto elto m a -> IterateeGM sfrom elfrom m a -- |The most primitive enumerator: applies the iteratee to the terminated -- stream. The result is the iteratee usually in the done state. enumEof :: (SC.StreamChunk s el, Monad m) => EnumeratorGM s el m a enumEof (Done x _) = liftI $ Done x EOF enumEof (Cont k) = k EOF enumEof (Seek _off k) = k EOF -- |Another primitive enumerator: report an error enumErr :: (SC.StreamChunk s el, Monad m) => String -> EnumeratorGM s el m a enumErr str (Done x _) = liftI $ Done x (Error str) enumErr str (Cont k) = k (Error str) enumErr str (Seek _off k) = k (Error str) -- |The composition of two enumerators: essentially the functional composition -- It is convenient to flip the order of the arguments of the composition -- though: in e1 >. e2, e1 is executed first (>.):: (SC.StreamChunk s el, Monad m) => EnumeratorGM s el m a -> EnumeratorGM s el m a -> EnumeratorGM s el m a e1 >. e2 = (e2 ==<<) . e1 -- |The pure 1-chunk enumerator -- It passes a given list of elements to the iteratee in one chunk -- This enumerator does no IO and is useful for testing of base parsing enumPure1Chunk :: (SC.StreamChunk s el, Monad m) => s el -> EnumeratorGM s el m a enumPure1Chunk _str iter@Done{} = liftI iter enumPure1Chunk str (Cont k) = k (Chunk str) enumPure1Chunk _str (Seek _off _k) = fail "enumPure1Chunk cannot handle random IO" -- |The pure n-chunk enumerator -- It passes a given lift of elements to the iteratee in n chunks -- This enumerator does no IO and is useful for testing of base parsing -- and handling of chunk boundaries enumPureNChunk :: (SC.StreamChunk s el, Monad m) => s el -> Int -> EnumeratorGM s el m a enumPureNChunk _str _n iter@Done{} = liftI iter enumPureNChunk str _n iter | SC.null str = liftI iter enumPureNChunk str n (Cont k) = enumPureNChunk s2 n ==<< k (Chunk s1) where (s1,s2) = SC.splitAt n str enumPureNChunk _str _n (Seek _off _k) = fail "enumPureNChunk cannot handle ranom IO"