module Codec.Mbox
(
Mbox(..)
, MboxMessage(..)
, Direction(..)
, parseMboxFile
, parseMboxFiles
, parseMbox
, safeParseMbox
, parseOneMboxMessage
, showMbox
, showMboxMessage
, showMboxFromLine
, mboxMsgSender
, mboxMsgTime
, mboxMsgBody
, mboxMsgFile
, mboxMsgOffset
, Month(..)
, fromQuoting
, msgYear
, msgMonthYear
, opposite
) where
import Control.Arrow (first,second)
import Control.Applicative ((<$>))
import qualified Data.ByteString.Lazy.Char8 as C
import Data.ByteString.Lazy (ByteString)
import Data.Int (Int64)
import Data.Maybe (listToMaybe)
import System.IO
import System.IO.Unsafe (unsafeInterleaveIO)
data P a b = !a :*: !b
first' :: (a -> b) -> P a c -> P b c
first' f !(a :*: c) = f a :*: c
uncurry' :: (a -> b -> c) -> P a b -> c
uncurry' f (x :*: y) = f x y
newtype Mbox s = Mbox { mboxMessages :: [MboxMessage s] }
deriving (Eq, Ord, Show)
data MboxMessage s = MboxMessage { _mboxMsgSender :: s
, _mboxMsgTime :: s
, _mboxMsgBody :: s
, _mboxMsgFile :: FilePath
, _mboxMsgOffset :: Int64 }
deriving (Eq, Ord, Show)
mboxMsgSender :: Functor f => (a -> f a) -> MboxMessage a -> f (MboxMessage a)
mboxMsgSender f (MboxMessage s t b p o) = (\x -> MboxMessage x t b p o) <$> f s
mboxMsgTime :: Functor f => (a -> f a) -> MboxMessage a -> f (MboxMessage a)
mboxMsgTime f (MboxMessage s t b p o) = (\x -> MboxMessage s x b p o) <$> f t
mboxMsgBody :: Functor f => (a -> f a) -> MboxMessage a -> f (MboxMessage a)
mboxMsgBody f (MboxMessage s t b p o) = (\x -> MboxMessage s t x p o) <$> f b
mboxMsgFile :: Functor f => (FilePath -> f FilePath) -> MboxMessage a -> f (MboxMessage a)
mboxMsgFile f (MboxMessage s t b p o) = (\x -> MboxMessage s t b x o) <$> f p
mboxMsgOffset :: Functor f => (Int64-> f Int64) -> MboxMessage a -> f (MboxMessage a)
mboxMsgOffset f (MboxMessage s t b p o) = (\x -> MboxMessage s t b p x) <$> f o
readYear :: MboxMessage C.ByteString -> C.ByteString -> Int
readYear m s =
case reads $ C.unpack s of
[(i, "")] -> i
_ -> error ("readYear: badly formatted date (year) in " ++ show (C.unpack $ _mboxMsgTime m))
data Month = Jan
| Feb
| Mar
| Apr
| May
| Jun
| Jul
| Aug
| Sep
| Oct
| Nov
| Dec
deriving (Read, Show, Eq)
readMonth :: MboxMessage C.ByteString -> C.ByteString -> Month
readMonth m s =
case reads $ C.unpack s of
[(month, "")] -> month
_ -> error ("readMonth: badly formatted date (month) in " ++ show (C.unpack $ _mboxMsgTime m))
msgYear :: MboxMessage C.ByteString -> Int
msgYear m = readYear m . last . C.split ' ' . _mboxMsgTime $ m
msgMonthYear :: MboxMessage C.ByteString -> (Month,Int)
msgMonthYear m =
case filter (not . C.null) . C.split ' ' . _mboxMsgTime $ m of
[_wday, month, _mday, _hour, year] -> (readMonth m month, readYear m year)
_ -> error ("msgMonthYear: badly formatted date in " ++ show (C.unpack $ _mboxMsgTime m))
nextFrom :: ByteString -> Maybe (Int64, ByteString, ByteString)
nextFrom !orig = goNextFrom 0 orig
where goNextFrom !count !input = do
off <- (+1) <$> C.elemIndex '\n' input
let (nls, i') = first C.length $ C.span (=='\n') $ C.drop off input
if C.take 5 i' == bFrom
then let off' = off + count + (nls 1) in
Just (off', C.take off' orig, C.drop 5 i')
else goNextFrom (off + count + nls) i'
fromQuoting :: (Int64 -> Int64) -> C.ByteString -> C.ByteString
fromQuoting onLevel = C.tail . nextQuotedFrom . C.cons '\n'
where nextQuotedFrom !orig = goNextQuotedFrom 0 orig
where goNextQuotedFrom !count !input =
case C.elemIndex '\n' input of
Nothing ->
if C.null input then orig else orig `C.snoc` '\n'
Just off ->
let (!level, i') = first C.length $ C.span (=='>') $ C.drop (off + 1) input in
if C.take 5 i' == bFrom
then C.take (off + count) orig `C.append`
mkQuotedFrom (onLevel level) `C.append`
nextQuotedFrom (C.drop 5 i')
else goNextQuotedFrom (off + level + count + 1) i'
mkQuotedFrom :: Int64 -> C.ByteString
mkQuotedFrom n | n < 0 = error "mkQuotedFrom: negative quoting"
| otherwise = '\n' `C.cons` C.replicate n '>' `C.append` bFrom
bFrom :: ByteString
bFrom = C.pack "From "
skipFirstFrom :: ByteString -> Either String ByteString
skipFirstFrom xs | bFrom == C.take 5 xs = Right $ C.drop 5 xs
| otherwise = Left "skipFirstFrom: badly formatted mbox: 'From ' expected at the beginning"
safeParseMbox :: FilePath -> Int64 -> ByteString -> Either String (Mbox ByteString)
safeParseMbox fp offset s | C.null s = Right $ Mbox []
| otherwise = Mbox . map (uncurry' $ finishMboxMessageParsing fp) . splitMboxMessages offset
<$> skipFirstFrom s
parseMbox :: ByteString -> Mbox ByteString
parseMbox = either error id . safeParseMbox "" 0
splitMboxMessages :: Int64 -> ByteString -> [P Int64 ByteString]
splitMboxMessages !offset !input =
case nextFrom input of
Nothing | C.null input -> []
| otherwise -> [(offset :*: input)]
Just (!offset', !msg, rest) -> (offset :*: msg) : splitMboxMessages (6 + offset + offset') rest
finishMboxMessageParsing :: FilePath -> Int64 -> ByteString -> MboxMessage ByteString
finishMboxMessageParsing fp !offset !inp = MboxMessage sender time (fromQuoting pred body) fp offset
where ((sender,time),body) = first (breakAt ' ') $ breakAt '\n' inp
breakAt c = second (C.drop 1 ) . C.break (==c)
showMbox :: Mbox ByteString -> ByteString
showMbox = C.intercalate (C.singleton '\n') . map showMboxMessage . mboxMessages
showMboxFromLine :: MboxMessage ByteString -> ByteString
showMboxFromLine (MboxMessage sender time _ _ _) =
C.append bFrom
. C.append sender
. C.cons ' '
. C.append time
. C.cons '\n'
$ C.empty
showMboxMessage :: MboxMessage ByteString -> ByteString
showMboxMessage msg = showMboxFromLine msg `C.append` fromQuoting (+1) (_mboxMsgBody msg)
parseOneMboxMessage :: FilePath -> Handle -> Integer -> IO (MboxMessage C.ByteString)
parseOneMboxMessage fp fh offset = do
hSeek fh AbsoluteSeek offset
s <- C.hGetContents fh
a <- either fail return $ safeParseMbox fp (fromInteger offset) s
(maybe (fail "parseOneMboxMessage: end of file") return . listToMaybe . mboxMessages) a
readRevMboxFile :: FilePath -> IO (Mbox ByteString)
readRevMboxFile fp = readRevMboxHandle fp =<< openFile fp ReadMode
readRevMboxHandle :: FilePath -> Handle -> IO (Mbox ByteString)
readRevMboxHandle fp fh = do siz <- hFileSize fh
readRevMbox fp siz <$> readHandleBackward mboxChunkSize siz fh
readRevMbox :: FilePath -> Integer -> [ByteString] -> Mbox ByteString
readRevMbox fp filesize chunks = Mbox $ go (fromInteger filesize+1) (filter (not . C.null) chunks)
where go _ [] = []
go !siz (chunk1:cs) =
case nextFrom chunk1 of
Nothing -> kont siz cs chunk1
Just (!_backoffset, !msg, rest) ->
let siz' = siz C.length rest 6 in
(map finishmmp . reverse . map (first' (+siz')) . splitMboxMessages 0 $ rest)
++ kont siz' cs msg
kont !_ [] = (:[]) . finishLast
kont !siz (chunk2:cs2) = \k -> go siz (chunk2 `C.append` k : cs2)
finishLast =
finishMboxMessageParsing fp 0 . either (error . ("readRevMbox: impossible: " ++)) id . skipFirstFrom
finishmmp = uncurry' $ finishMboxMessageParsing fp
readHandleBackward :: Integer -> Integer -> Handle -> IO [ByteString]
readHandleBackward maxChunkSize siz0 fh = go siz0
where go 0 = return []
go siz = unsafeInterleaveIO $
do let delta = min maxChunkSize siz
siz' = siz delta
hSeek fh AbsoluteSeek siz'
s <- C.hGet fh $ fromInteger delta
(s :) <$> go siz'
data Direction = Backward | Forward
opposite :: Direction -> Direction
opposite Forward = Backward
opposite Backward = Forward
parseMboxFile :: Direction -> FilePath -> IO (Mbox ByteString)
parseMboxFile Forward fp = (either fail return =<<) . (safeParseMbox fp 0 <$>) . C.readFile $ fp
parseMboxFile Backward fp = readRevMboxFile fp
parseMboxFiles :: Direction -> [FilePath] -> IO [Mbox ByteString]
parseMboxFiles Forward [] = (:[]) . parseMbox <$> C.getContents
parseMboxFiles Backward [] = fail "reading backward on standard input does not make sense"
parseMboxFiles dir xs = mapM (unsafeInterleaveIO . parseMboxFile dir) xs
mboxChunkSize :: Integer
mboxChunkSize = 10*oneMegabyte
oneMegabyte :: Integer
oneMegabyte = 2 ^ (20 :: Int)