{-# LANGUAGE ViewPatterns #-} ----------------------------------------------------------------------------- {- | Module : Data.MBox Copyright : (c) Gershom Bazerman, 2009; ported to Text by Alexander Jerneck, 2012 License : BSD 3 Clause Maintainer : gershomb@gmail.com Stability : experimental Reads and writes mboxrd files as per . This module uses Lazy Text pervasively, and should be able to operate as a streaming parser. That is to say, given a lazy stream of Text, and a streaming processing function, you should be able to analyze large mbox files in constant space. -} ------------------------------------------------------------------------- module Data.MBox (MBox, Message(..), Header, parseMBox, parseForward, parseDateHeader, showMessage, showMBox, getHeader, isID, isDate) where import Prelude hiding (tail, init, last, minimum, maximum, foldr1, foldl1, (!!), read) import Control.Arrow import Data.Char import System.Locale import Data.Time import Safe import qualified Data.Text.Lazy as T type MBox = [Message] data Message = Message {fromLine :: T.Text, headers :: [Header], body :: T.Text} deriving (Read, Show) type Header = (T.Text, T.Text) -- | Reads a date header as a UTCTime parseDateHeader :: T.Text -> Maybe UTCTime parseDateHeader = parseTime defaultTimeLocale "%A, %B %e, %Y %l:%M %p" . T.unpack -- | Attempts to retrieve the contents of a forwarded message from an enclosing message. parseForward :: Message -> Message parseForward origMsg@(Message f _ b) = case drop 1 $ dropWhile (/= T.pack "-----Original Message-----") (T.lines b) of [] -> origMsg xs -> headDef origMsg . parseMBox . T.unlines $ f:xs -- | Parses Text as an mbox file. parseMBox :: T.Text -> MBox parseMBox = go . T.lines where go [] = [] go (x:xs) = uncurry (:) . (readMsg x *** go) . break ((T.pack "From ") `T.isPrefixOf`) $ xs readMsg :: T.Text -> [T.Text] -> Message readMsg x xs = uncurry (Message x) . second (T.unlines . map unquoteFrom). readHeaders $ xs readHeaders :: [T.Text] -> ([Header], [T.Text]) readHeaders [] = ([],[]) readHeaders (x:xs) | T.null x || T.all isSpace x || not (T.any (==':') x) = ([],xs) | otherwise = first ((second (T.strip . sanHeader . (`T.append` headerCont) . T.drop 1) . T.break (==':') $ x):) $ readHeaders xs' where (headerCont, xs') = first ((T.pack " " `T.append`) . T.unlines . map T.strip) . break notCont $ xs notCont :: T.Text -> Bool notCont s = doesNotStartSpace s || allSpace s allSpace = T.all isSpace doesNotStartSpace s = case T.length s of 0 -> True _ -> not (isSpace $ T.head s) unquoteFrom :: T.Text -> T.Text unquoteFrom xs'@(T.stripPrefix (T.pack ">") -> Just suf) = if (T.pack "From ") `T.isPrefixOf` T.dropWhile (=='>') suf then suf else xs' unquoteFrom xs = xs sanHeader :: T.Text -> T.Text sanHeader = T.replace (T.pack "\n") (T.pack " ") -- | Renders an MBox into Text showMBox :: MBox -> T.Text showMBox = T.concat . map showMessage -- | Renders an individual message into Text. showMessage :: Message -> T.Text showMessage (Message f hs b) = T.unlines $ f : formatHeaders hs ++ [(T.pack "\n")] ++ formatBody b where formatHeaders = map (\(x,y) -> x `T.append` (T.pack ": ") `T.append` y) formatBody = map unFrom . T.lines unFrom x | isFrom x = '>' `T.cons` x | otherwise = x isFrom x = (T.pack "From ") `T.isPrefixOf` T.dropWhile (=='>') x -- | Return True if header is a Message-ID header. isID :: Header -> Bool isID (x, _) = x == T.pack "Message-ID" -- | Return True if header is a Date header. isDate :: Header -> Bool isDate (x, _) = x == T.pack "Date" -- | Return the values of headers for which predicate is True getHeader :: (Header -> Bool) -> Message -> T.Text getHeader predFunc = snd . head . filter predFunc . headers