{-# LANGUAGE OverloadedStrings #-} ------------------------------------------------------------------------------ -- | Module: Main -- Copyright: Martin Grabmueller -- License: BSD3 -- -- Maintainer: martin@grabmueller.de -- Stability: provisional -- Portability: GHC and Linux only -- -- This file contains an example on using the -- pipes-attoparsec-streaming package. -- -- Part of this code was adapted from Bryan O'Sullivan's HTTP parser -- example on his blog. -- -- Currently, this parser only recognizes headers (like in e-mails), -- the end-of-header empty line and the parser chunks the remainder of -- the input into suitable pieces. ------------------------------------------------------------------------------ module Main(main) where import Control.Pipe.Attoparsec.Stream import Data.Word import Control.Pipe import Control.Pipe.Combinators as Combinators import Data.Attoparsec(Parser) import qualified Data.Attoparsec as A import qualified Data.Attoparsec.Char8 as A8 import Data.ByteString(ByteString) import qualified Data.ByteString as B import Control.Applicative import Control.Monad.Trans.Class(lift) ------------------------------------------------------------------------------ -- First, we start with some useful utility parsers. ------------------------------------------------------------------------------ -- Predicate for testing whether a byte may appear in a MIME token. -- isToken :: Word8 -> Bool isToken w = w <= 127 && A.notInClass "\0-\31()<>@,;:\\\"/[]?={} \t" w -- Skip horizontal space. At least one space must be -- available. Return the skipped whitespace. -- takeSpaces :: Parser ByteString takeSpaces = B.cons <$> A.satisfy A8.isHorizontalSpace <*> A.takeWhile A8.isHorizontalSpace -- Recognize and return an end-of-line sequence, which must be either -- a single newline or a carriage-newline combination. -- endOfLine :: Parser ByteString endOfLine = (A.word8 10 >> return "\n") <|> A.string "\r\n" -- Return @n@ bytes, or less if the end of input is reached. takeAtMost :: Int -> Parser ByteString takeAtMost n = A.scan n (\ n' _ -> if n' == 0 then Nothing else (Just (n' - 1))) ------------------------------------------------------------------------------ -- Events. These are the parse results that are streamed. ------------------------------------------------------------------------------ -- | The generated events. -- -- Each event contains exactly the bytes which were parsed, so that -- the original input can be reconstructed from the events. -- data Event = Header ByteString ByteString [(ByteString, ByteString, ByteString)] | EndOfHeader ByteString | BodyChunk ByteString | EndOfInput deriving (Show) ------------------------------------------------------------------------------ -- The parser. ------------------------------------------------------------------------------ -- | Parse a single message header, including continuation lines. -- messageHeader :: Parser (PartialResult Event) messageHeader = do -- Header name. header <- B.cons <$> A.satisfy isToken <*> A.takeWhile isToken -- Delimiter. delim <- B.snoc <$> A.takeWhile A8.isHorizontalSpace <*> A8.char8 ':' -- Parse the first line of the header value. body <- (,,) <$> A.takeWhile A8.isHorizontalSpace <*> A.takeTill A8.isEndOfLine <*> endOfLine -- Parse the remaining lines of the header value. Each one must -- begin with at least one horizontal space. bodies <- many $ (,,) <$> takeSpaces <*> A.takeTill A8.isEndOfLine <*> endOfLine -- Return the parsed header and return ourself as the continuation, -- to parse more headers. return (PartialResult (Just (Header header delim (body:bodies))) (Just messageHeader)) <|> -- When no header can be parsed, return no result and set the -- @headerEnd@ parser as the continuation. return (PartialResult Nothing (Just headerEnd)) -- | Parse the end-of-header marker (an empty line). -- headerEnd :: Parser (PartialResult Event) headerEnd = do -- This one is easy. s <- endOfLine -- Continue with a body chunk. return $! PartialResult (Just (EndOfHeader s)) (Just bodyChunk) -- | Parse a chunk of message body data. -- bodyChunk :: Parser (PartialResult Event) bodyChunk = do -- Parse a small chunk. s <- takeAtMost 10 if B.null s -- When end-of-input is reached, indicate that with the correct -- event and don't return a continuation. then return $ PartialResult (Just EndOfInput) Nothing -- Return the chunk and try again. else return $ PartialResult (Just (BodyChunk s)) (Just bodyChunk) ------------------------------------------------------------------------------ -- Example invocation. ------------------------------------------------------------------------------ -- | Tiny example message. -- msg :: ByteString msg = "Received : foo\r\n cont'd\r\nDate: now\r\n\r\nblabla\r\nsecond line - longer" -- | Small helper for printing the parsed events. -- dump :: Pipe Event Void IO () dump = go where go = do e <- await lift $ print e go -- | Main program. Run the parser on a small example and print the -- streamed parsing results. -- main :: IO () main = do runPipe $ fromList [msg] >+> parse messageHeader >+> dump