-- Haskell98! -- |Utilties for Char-based iteratee processing. -- The running example, parts 1 and 2 -- Part 1 is reading the headers, the sequence of lines terminated by an -- empty line. Each line is terminated by CR, LF, or CRLF. -- We should return the headers in order. In the case of error, -- we should return the headers read so far and the description of the error. -- Part 2 is reading the headers and reading all the lines from the -- HTTP-chunk-encoded content that follows the headers. Part 2 thus -- verifies layering of streams, and processing of one stream -- embedded (chunk encoded) into another stream. module Data.Iteratee.Char ( -- * Type synonyms Stream, Iteratee, EnumeratorM, Line, -- * Word and Line processors line, printLines, readLines, enumLines, enumWords, module Data.Iteratee.Base ) where import qualified Data.Iteratee.Base as Iter import Data.Iteratee.Base hiding (break) import Data.Char import Control.Monad.Trans import qualified Data.ListLike as LL import Data.Monoid -- |A particular instance of StreamG: the stream of characters. -- This stream is used by many input parsers. type Stream = StreamG [] Char type Iteratee = IterateeG [] Char -- Useful combinators for implementing iteratees and enumerators type Line = String -- The line of text, terminators are not included -- |Read the line of text from the stream -- The line can be terminated by CR, LF or CRLF. -- Return (Right Line) if successful. Return (Left Line) if EOF or -- a stream error were encountered before the terminator is seen. -- The returned line is the string read so far. -- The code is the same as that of pure Iteratee, only the signature -- has changed. -- Compare the code below with GHCBufferIO.line_lazy line :: Monad m => IterateeG [] Char m (Either Line Line) line = Iter.break (\c -> c == '\r' || c == '\n') >>= \l -> terminators >>= check l where check l 0 = return . Left $ l check l _ = return . Right $ l terminators = heads "\r\n" >>= \l -> if l == 0 then heads "\n" else return l -- Line iteratees: processors of a stream whose elements are made of Lines -- Collect all read lines and return them as a list -- see stream2list -- |Print lines as they are received. This is the first `impure' iteratee -- with non-trivial actions during chunk processing printLines :: IterateeG [] Char IO () printLines = lines' where lines' = Iter.break (\c -> c == '\r' || c == '\n') >>= \l -> terminators >>= check l check _ 0 = return () check "" _ = return () check l _ = liftIO (putStrLn l) >> lines' terminators = heads "\r\n" >>= \l -> if l == 0 then heads "\n" else return l -- |Read a sequence of lines from the stream up to the empty lin -- The line can be terminated by CR, LF, or CRLF -- or by EOF or stream error. -- Return the read lines, in order, not including the terminating empty line -- Upon EOF or stream error, return the complete, terminated lines accumulated -- so far. readLines :: (Monad m) => IterateeG [] Char m (Either [Line] [Line]) readLines = lines' [] where lines' acc = Iter.break (\c -> c == '\r' || c == '\n') >>= \l -> terminators >>= check acc l check acc _ 0 = return . Left . reverse $ acc -- no terminator found check acc "" _ = return . Right . reverse $ acc check acc l _ = lines' (l:acc) terminators = heads "\r\n" >>= \l -> if l == 0 then heads "\n" else return l -- |Convert the stream of characters to the stream of lines, and -- apply the given iteratee to enumerate the latter. -- The stream of lines is normally terminated by the empty line. -- When the stream of characters is terminated, the stream of lines -- is also terminated, abnormally. -- This is the first proper iteratee-enumerator: it is the iteratee of the -- character stream and the enumerator of the line stream. enumLines :: (LL.ListLike (s el) el, LL.StringLike (s el), Functor m, Monad m) => IterateeG [] (s el) m a -> IterateeG s el m (IterateeG [] (s el) m a) enumLines = convStream getter where getter = IterateeG step lChar = (== '\n') . last . LL.toString step (Chunk xs) | LL.null xs = return $ Cont getter Nothing | lChar xs = return $ Done (Just $ LL.lines xs) (Chunk mempty) | True = return $ Cont (IterateeG (step' xs)) Nothing step str = return $ Done Nothing str step' xs (Chunk ys) | LL.null ys = return $ Cont (IterateeG (step' xs)) Nothing | lChar ys = return $ Done (Just . LL.lines . mappend xs $ ys) (Chunk mempty) | True = let w' = LL.lines $ mappend xs ys ws = init w' ck = last w' in return $ Done (Just ws) (Chunk ck) step' xs str = return $ Done (Just $ LL.lines xs) str -- |Convert the stream of characters to the stream of words, and -- apply the given iteratee to enumerate the latter. -- Words are delimited by white space. -- This is the analogue of List.words -- One should keep in mind that enumWords is a more general, monadic -- function. enumWords :: (LL.ListLike (s el) el , LL.StringLike (s el) , Functor m, Monad m) => IterateeG [] (s el) m a -> IterateeG s el m (IterateeG [] (s el) m a) enumWords = convStream getter where getter = IterateeG step lChar = isSpace . last . LL.toString step (Chunk xs) | LL.null xs = return $ Cont getter Nothing step (Chunk xs) | LL.null xs = return $ Cont getter Nothing | lChar xs = return $ Done (Just $ LL.words xs) (Chunk mempty) | True = return $ Cont (IterateeG (step' xs)) Nothing step str = return $ Done Nothing str step' xs (Chunk ys) | LL.null ys = return $ Cont (IterateeG (step' xs)) Nothing | lChar ys = return $ Done (Just . LL.words . mappend xs $ ys) (Chunk mempty) | True = let w' = LL.words $ mappend xs ys ws = init w' ck = last w' in return $ Done (Just ws) (Chunk ck) step' xs str = return $ Done (Just $ LL.words xs) str {-# INLINE enumWords #-} -- ------------------------------------------------------------------------ -- Enumerators type EnumeratorM m a = EnumeratorGM [] Char m a