----------------------------------------------------------------------------- -- | -- Module: Data.Enumerator -- Copyright: 2010 John Millikin -- License: MIT -- -- Maintainer: jmillikin@gmail.com -- Portability: portable -- -- An implementation of Oleg Kiselyov’s left-fold enumerators -- ----------------------------------------------------------------------------- module Data.Enumerator ( -- * Types Stream (..) , Step (..) , Iteratee (..) , Enumerator , Enumeratee -- * Primitives -- ** Combinators -- | These are common patterns which occur whenever iteratees are -- being defined. , returnI , yield , continue , throwError , catchError , mapError , liftI , (>>==) , (==<<) , ($$) , (>==>) , (<==<) -- ** Iteratees , run , consume , isEOF , liftTrans , liftFoldL , liftFoldL' , liftFoldM , printChunks -- ** Enumerators , enumEOF , enumList , concatEnums -- ** Enumeratees , checkDone , Data.Enumerator.map , Data.Enumerator.sequence , joinI -- * Parser combinators -- | Oleg’s original @IterateeM.hs@ includes some basic iteratees -- for parsing, so this section ports them to the new interface. However, -- in practice most parsing will be performed with enumerator-based -- interfaces to existing parser libraries (such as Parsec or Attoparsec). , Data.Enumerator.head , peek , Data.Enumerator.last , Data.Enumerator.length , Data.Enumerator.drop , Data.Enumerator.dropWhile , span , Data.Enumerator.break ) where import Data.List (genericDrop, genericLength, genericSplitAt) import Data.Monoid (Monoid, mempty, mappend, mconcat) import qualified Control.Applicative as A import Control.Monad (liftM, ap) import qualified Control.Monad.IO.Class as MIO import qualified Control.Monad.Trans.Class as MT import Control.Monad.Fix (fix) import qualified Data.List as DataList import Control.Monad (foldM) import qualified Control.Exception as E import Prelude hiding (span) import qualified Prelude as Prelude -- | Not to be confused with types from the @Stream@ or -- @stream-fusion@ packages, a 'Stream' is a sequence of chunks -- generated by an 'Enumerator'. In contrast to Oleg’s implementation, -- this stream does not support error handling -- errors encountered -- while generating a stream are reported in the 'Step' type instead. -- -- @(Chunks [])@ is used to indicate that a stream is still active, but -- currently has no available data. Iteratees should ignore empty chunks. data Stream a = Chunks [a] | EOF deriving (Show, Eq) data Step e a m b -- | The 'Iteratee' is capable of accepting more input. Note that more input -- is not necessarily required; the 'Iteratee' might be able to generate a -- value immediately if it receives 'EOF'. = Continue (Stream a -> Iteratee e a m b) -- | The 'Iteratee' has received enough input to generate a result. -- Included in this value is left-over input, which can be passed to -- composed 'Iteratee's. | Yield b (Stream a) -- | The 'Iteratee' encountered an error which prevents it from proceeding -- further. The type of error will depend on the 'Enumerator' and/or -- 'Iteratee' -- common choices are 'String' and 'E.SomeException'. | Error e -- | The primary data type for this library, which consumes -- input from a 'Stream' until it either generates a value or encounters -- an error. Rather than requiring all input at once, an iteratee will -- return 'Continue' when it is capable of processing more data. -- -- In general, iteratees begin in the 'Continue' state. As each chunk is -- passed to the continuation, the iteratee returns the next step: -- 'Continue' for more data, 'Yield' when it's finished, or 'Error' to -- abort processing. newtype Iteratee e a m b = Iteratee { runIteratee :: m (Step e a m b) } -- | While 'Iteratee's consume data, enumerators generate it. Since -- @'Iteratee'@ is an alias for @m ('Step' e a m b)@, 'Enumerator's can -- be considered step transformers of type -- @'Step' e a m b -> m ('Step' e a m b)@. -- -- 'Enumerator's typically read from an external source (parser, handle, -- random generator, etc). They feed chunks into an 'Iteratee' until the -- source runs out of data (triggering 'EOF') or the iteratee finishes -- processing ('Yield's a value). type Enumerator e a m b = Step e a m b -> Iteratee e a m b -- | In cases where an enumerator acts as both a source and sink, the resulting -- type is named an 'Enumeratee'. Enumeratees have two input types, -- “outer a” (@aOut@) and “inner a” (@aIn@). type Enumeratee e aOut aIn m b = Step e aIn m b -> Iteratee e aOut m (Step e aIn m b) instance Monoid (Stream a) where mempty = Chunks mempty mappend (Chunks xs) (Chunks ys) = Chunks $ mappend xs ys mappend _ _ = EOF instance Functor Stream where fmap f (Chunks xs) = Chunks $ fmap f xs fmap _ EOF = EOF instance Monad Stream where return = Chunks . return Chunks xs >>= f = mconcat $ fmap f xs EOF >>= _ = EOF instance Monad m => Monad (Iteratee e a m) where return x = Iteratee . return $ Yield x $ Chunks [] {-# INLINE return #-} m >>= f = Iteratee $ runIteratee m >>= \r1 -> case r1 of Continue k -> return $ Continue ((>>= f) . k) Error err -> return $ Error err Yield x (Chunks []) -> runIteratee $ f x Yield x chunk -> runIteratee (f x) >>= \r2 -> case r2 of Continue k -> runIteratee $ k chunk Error err -> return $ Error err Yield x' _ -> return $ Yield x' chunk instance Monad m => Functor (Iteratee e a m) where fmap = liftM {-# INLINE fmap #-} instance Monad m => A.Applicative (Iteratee e a m) where pure = return {-# INLINE pure #-} (<*>) = ap {-# INLINE (<*>) #-} instance MT.MonadTrans (Iteratee e a) where lift m = Iteratee $ m >>= runIteratee . return {-# INLINE lift #-} instance MIO.MonadIO m => MIO.MonadIO (Iteratee e a m) where liftIO = MT.lift . MIO.liftIO {-# INLINE liftIO #-} -- | Lift an 'Iteratee' onto a monad transformer, re-wrapping the -- 'Iteratee'’s inner monadic values. liftTrans :: (Monad m, MT.MonadTrans t, Monad (t m)) => Iteratee e a m b -> Iteratee e a (t m) b liftTrans iter = Iteratee $ do step <- MT.lift $ runIteratee iter return $ case step of Yield x cs -> Yield x cs Error err -> Error err Continue k -> Continue (liftTrans . k) -- | @returnI x = Iteratee (return x)@ returnI :: Monad m => Step e a m b -> Iteratee e a m b returnI = Iteratee . return {-# INLINE returnI #-} -- | @yield x chunk = returnI (Yield x chunk)@ yield :: Monad m => b -> Stream a -> Iteratee e a m b yield x chunk = returnI (Yield x chunk) {-# INLINE yield #-} -- | @continue k = returnI (Continue k)@ continue :: Monad m => (Stream a -> Iteratee e a m b) -> Iteratee e a m b continue = returnI . Continue {-# INLINE continue #-} -- | @throwError err = returnI (Error err)@ throwError :: Monad m => e -> Iteratee e a m b throwError = returnI . Error {-# INLINE throwError #-} -- | @liftI f = continue (returnI . f)@ liftI :: Monad m => (Stream a -> Step e a m b) -> Iteratee e a m b liftI k = continue $ returnI . k {-# INLINE liftI #-} catchError :: Monad m => Iteratee e a m b -> (e -> Iteratee e a m b) -> Iteratee e a m b catchError iter h = Iteratee $ do step <- runIteratee iter case step of Error err -> runIteratee (h err) _ -> return step -- | A pseudo-'Enumerator' which alters the error representation of its -- iteratee. Use this to compose enumerators and iteratees with differing -- or sub-optimal error types. mapError :: Monad m => (e1 -> e2) -> Step e1 a m b -> Iteratee e2 a m b mapError conv = fix $ \loop s -> case s of Yield x chunks -> yield x chunks Continue k -> continue ((loop $$) . k) Error err -> throwError (conv err) infixl 1 >>== -- | Equivalent to (>>=), but allows 'Iteratee's with different input types -- to be composed. (>>==) :: Monad m => Iteratee e a m b -> (Step e a m b -> Iteratee e' a' m b') -> Iteratee e' a' m b' i >>== f = Iteratee $ runIteratee i >>= runIteratee . f {-# INLINE (>>==) #-} infixr 1 ==<< -- | @(==\<\<) = flip (\>\>==)@ (==<<):: Monad m => (Step e a m b -> Iteratee e' a' m b') -> Iteratee e a m b -> Iteratee e' a' m b' (==<<) = flip (>>==) {-# INLINE (==<<) #-} infixr 0 $$ -- | @($$) = (==\<\<)@ -- -- This might be easier to read when passing a chain of iteratees to an -- enumerator. ($$):: Monad m => (Step e a m b -> Iteratee e' a' m b') -> Iteratee e a m b -> Iteratee e' a' m b' ($$) = (==<<) {-# INLINE ($$) #-} infixr 1 >==> -- | @(>==>) e1 e2 s = e1 s >>== e2@ (>==>) :: Monad m => Enumerator e a m b -> (Step e a m b -> Iteratee e' a' m b') -> Step e a m b -> Iteratee e' a' m b' (>==>) e1 e2 s = e1 s >>== e2 {-# INLINE (>==>) #-} infixr 1 <==< -- | @(\<==\<) = flip (>==>)@ (<==<) :: Monad m => (Step e a m b -> Iteratee e' a' m b') -> Enumerator e a m b -> Step e a m b -> Iteratee e' a' m b' (<==<) = flip (>==>) {-# INLINE (<==<) #-} -- | Consume all input until 'EOF', then return consumed input as a list. consume :: Monad m => Iteratee e a m [a] consume = liftI $ step id where step acc chunk = case chunk of Chunks [] -> Continue $ returnI . step acc Chunks xs -> Continue $ returnI . (step $ acc . (xs ++)) EOF -> Yield (acc []) EOF -- | Return 'True' if the next 'Stream' is 'EOF'. isEOF :: Monad m => Iteratee e a m Bool isEOF = liftI $ \c -> case c of EOF -> Yield True c _ -> Yield False c -- | Lifts a pure left fold into an iteratee. liftFoldL :: Monad m => (b -> a -> b) -> b -> Iteratee e a m b liftFoldL f = liftI . step where step acc chunk = case chunk of Chunks [] -> Continue $ returnI . step acc Chunks xs -> Continue $ returnI . step (Prelude.foldl f acc xs) EOF -> Yield acc EOF -- | As 'liftFoldL', but strict in its accumulator. liftFoldL' :: Monad m => (b -> a -> b) -> b -> Iteratee e a m b liftFoldL' f = liftI . step where fold = DataList.foldl' f step acc chunk = case chunk of Chunks [] -> Continue $ returnI . step acc Chunks xs -> Continue $ returnI . (step $! fold acc xs) EOF -> Yield acc EOF -- | Lifts a monadic left fold into an iteratee. liftFoldM :: Monad m => (b -> a -> m b) -> b -> Iteratee e a m b liftFoldM f = continue . step where step acc chunk = case chunk of Chunks [] -> continue $ step acc Chunks xs -> Iteratee $ liftM (Continue . step) (foldM f acc xs) EOF -> yield acc EOF -- | Run an iteratee until it finishes, and return either the final value -- (if it succeeded) or the error (if it failed). run :: Monad m => Iteratee e a m b -> m (Either e b) run i = do mStep <- runIteratee $ enumEOF ==<< i case mStep of Error err -> return $ Left err Yield x _ -> return $ Right x Continue _ -> error "run: divergent iteratee" -- | Print chunks as they're received from the enumerator, optionally -- printing empty chunks. printChunks :: (MIO.MonadIO m, Show a) => Bool -> Iteratee e a m () printChunks printEmpty = continue step where step (Chunks []) | not printEmpty = continue step step (Chunks xs) = MIO.liftIO (print xs) >> continue step step EOF = MIO.liftIO (putStrLn "EOF") >> yield () EOF -- | The most primitive enumerator; simply sends 'EOF'. The iteratee must -- either yield a value or throw an error continuing receiving 'EOF' will -- not terminate with any useful value. enumEOF :: Monad m => Enumerator e a m b enumEOF (Yield x _) = yield x EOF enumEOF (Error err) = throwError err enumEOF (Continue k) = k EOF >>== check where check (Continue _) = error "enumEOF: divergent iteratee" check s = enumEOF s -- | Another small, useful enumerator separates an input list into chunks, -- and sends them to the iteratee. This is useful for testing iteratees in pure -- code. enumList :: Monad m => Integer -> [a] -> Enumerator e a m b enumList n xs (Continue k) | not (null xs) = k chunk >>== loop where (s1, s2) = genericSplitAt n xs chunk = Chunks s1 loop = enumList n s2 enumList _ _ step = returnI step -- | Compose a list of 'Enumerator's using '(>>==)' concatEnums :: Monad m => [Enumerator e a m b] -> Enumerator e a m b concatEnums = foldl (>==>) returnI {-# INLINE concatEnums #-} -- | 'joinI' is used to “flatten” 'Enumeratee's into an -- 'Iteratee'. joinI :: Monad m => Iteratee e a m (Step e a' m b) -> Iteratee e a m b joinI outer = outer >>= check where check (Continue k) = k EOF >>== \s -> case s of Continue _ -> error "joinI: divergent iteratee" _ -> check s check (Yield x _) = return x check (Error e) = throwError e -- | A common pattern in 'Enumeratee' implementations is to check whether -- the inner 'Iteratee' has finished, and if so, to return its output. -- 'checkDone' passes its parameter a continuation if the 'Iteratee' -- can still consume input, or yields otherwise. checkDone :: Monad m => ((Stream a -> Iteratee e a m b) -> Iteratee e a' m (Step e a m b)) -> Enumeratee e a' a m b checkDone _ (Yield x chunk) = return $ Yield x chunk checkDone f (Continue k) = f k checkDone _ (Error err) = throwError err {-# INLINE checkDone #-} map :: Monad m => (ao -> ai) -> Enumeratee e ao ai m b map f = loop where loop = checkDone $ continue . step step k EOF = yield (Continue k) EOF step k (Chunks []) = continue $ step k step k (Chunks xs) = k (Chunks (Prelude.map f xs)) >>== loop sequence :: Monad m => Iteratee e ao m ai -> Enumeratee e ao ai m b sequence i = loop where loop = checkDone check check k = isEOF >>= \f -> if f then yield (Continue k) EOF else step k step k = i >>= \v -> k (Chunks [v]) >>== loop head :: Monad m => Iteratee e a m (Maybe a) head = liftI step where step (Chunks []) = Continue $ returnI . step step (Chunks (x:xs)) = Yield (Just x) (Chunks xs) step EOF = Yield Nothing EOF peek :: Monad m => Iteratee e a m (Maybe a) peek = liftI step where step (Chunks []) = Continue $ returnI . step step chunk@(Chunks (x:_)) = Yield (Just x) chunk step chunk = Yield Nothing chunk last :: Monad m => Iteratee e a m (Maybe a) last = liftI $ step Nothing where step ret (Chunks xs) = let ret' = case xs of [] -> ret _ -> Just $ Prelude.last xs in Continue $ returnI . step ret' step ret EOF = Yield ret EOF length :: Monad m => Iteratee e a m Integer length = liftI $ step 0 where step n (Chunks xs) = Continue $ returnI . step (n + genericLength xs) step n EOF = Yield n EOF drop :: Monad m => Integer -> Iteratee e a m () drop 0 = return () drop n = liftI $ step n where step n' (Chunks xs) | len xs < n' = Continue $ returnI . step (n' - len xs) | otherwise = Yield () $ Chunks $ genericDrop n' xs step _ EOF = Yield () EOF len = genericLength dropWhile :: Monad m => (a -> Bool) -> Iteratee e a m () dropWhile p = liftI step where step (Chunks xs) = case Prelude.dropWhile p xs of [] -> Continue $ returnI . step xs' -> Yield () $ Chunks xs' step EOF = Yield () EOF span :: Monad m => (a -> Bool) -> Iteratee e a m [a] span f = liftI $ step [] where step acc (Chunks xs) = case Prelude.span f xs of (_, []) -> Continue $ returnI . step (acc ++ xs) (head', tail') -> Yield (acc ++ head') (Chunks tail') step acc EOF = Yield acc EOF -- | @break p = 'span' (not . p)@ break :: Monad m => (a -> Bool) -> Iteratee e a m [a] break p = span $ not . p