----------------------------------------------------------------------------- {- | Module : Data.MBox Copyright : (c) Gershom Bazerman, 2009 License : BSD 3 Clause Maintainer : gershomb@gmail.com Stability : experimental Reads and writes mboxrd files as per . This parser is written to be a streaming parser. Given a lazy source of data and a streaming consumer, you should be able to analyze arbitrary mbox files in constant space. -} ------------------------------------------------------------------------- module Data.MBox.String (MBox, Message(..), Header, parseMBox, parseForward, parseDateHeader, showMessage, showMBox, getHeader, isID, isDate) where import Prelude hiding (tail, init, last, minimum, maximum, foldr1, foldl1, (!!), read) import Control.Arrow import Data.List (isPrefixOf) import Data.Char import System.Locale import Data.Time import Safe type MBox = [Message] data Message = Message {fromLine :: String, headers :: [Header], body :: String} deriving (Read, Show) type Header = (String, String) -- | Reads a date header as a UTCTime parseDateHeader :: String -> Maybe UTCTime parseDateHeader = parseTime defaultTimeLocale "%A, %B %e, %Y %l:%M %p" -- | Attempts to retrieve the contents of a forwarded message from an enclosing message. parseForward :: Message -> Message parseForward origMsg@(Message f _ b) = case drop 1 $ dropWhile (/= "-----Original Message-----") (lines b) of [] -> origMsg xs -> headDef origMsg . parseMBox . unlines $ f:xs -- | Reads a string as an mbox file. parseMBox :: String -> MBox parseMBox = go . lines where go [] = [] go (x:xs) = uncurry (:) . (readMsg x *** go) . break ("From " `isPrefixOf`) $ xs readMsg :: String -> [String] -> Message readMsg x xs = uncurry (Message x) . second (unlines . map unquoteFrom). readHeaders $ xs readHeaders :: [String] -> ([Header], [String]) readHeaders [] = ([],[]) readHeaders (x:xs) | null x || all isSpace x || not (any (==':') x) = ([],xs) | otherwise = first ((second (killSpace . sanHeader . (++ headerCont) . drop 1) . break (==':') $ x):) $ readHeaders xs' where (headerCont, xs') = first ((" " ++) . unlines . map killSpace) . break notCont $ xs notCont [] = True notCont (c:cs) = not (isSpace c) || (all isSpace cs) unquoteFrom :: String -> String unquoteFrom xs'@('>':xs) = if "From " `isPrefixOf` dropWhile (=='>') xs then xs else xs' unquoteFrom xs = xs sanHeader :: String -> String sanHeader = map (\x -> if x == '\n' then ' ' else x) -- | Renders an MBox into a String showMBox :: MBox -> String showMBox = concatMap showMessage -- | Renders an individual message into a String. showMessage :: Message -> String showMessage (Message f hs b) = unlines $ f : map (\(x,y) -> (x ++ ": " ++ y)) hs ++ ["\n"] ++ map unFrom (lines b) where unFrom x | isFrom x = '>':x | otherwise = x isFrom x = "From " `isPrefixOf` dropWhile (=='>') x killSpace :: String -> String killSpace = dropWhile isSpace . dropEndWhile isSpace dropEndWhile :: (a -> Bool) -> [a] -> [a] dropEndWhile p = foldr (\x xs -> if p x && null xs then [] else x:xs) [] -- | Header accessors isID :: Header -> Bool isID (x, _y) = x == "Message-ID" isDate :: Header -> Bool isDate (x, _y) = x == "Date" getHeader :: (Header -> Bool) -> Message -> String getHeader p = snd . head . filter p . headers