{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Network.Email.Header.Parser
(
fws
, cfws
, lexeme
, symbol
, commaSep
, dateTime
, address
, mailbox
, mailboxList
, recipient
, recipientList
, messageID
, messageIDList
, phrase
, phraseList
, unstructured
, mimeVersion
, contentType
, contentTransferEncoding
) where
import Control.Applicative
import Control.Monad
import Data.Attoparsec.ByteString (Parser)
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.Attoparsec.Combinator
import Data.Bits
import qualified Data.ByteString as B
import qualified Data.ByteString.Base64 as Base64
import qualified Data.ByteString.Char8 as B8
import Data.ByteString.Internal (w2c)
import qualified Data.ByteString.Lazy as L (toStrict)
import Data.ByteString.Lazy.Builder
import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI
import Data.List
import qualified Data.Map.Strict as Map
import Data.Maybe
import qualified Data.Text as T
import Data.Text.Encoding
import qualified Data.Text.Lazy as L (Text, fromChunks)
import Data.Time
import Data.Time.Calendar.WeekDate
import Data.Word
import Network.Email.Charset
import Network.Email.Header.Types hiding (mimeType)
infixl 3 <+>
(<+>) :: (Applicative f, Monoid a) => f a -> f a -> f a
(<+>) = liftA2 mappend
concatMany :: (Alternative f, Monoid a) => f a -> f a
concatMany p = mconcat <$> many p
parseMaybe :: Monad m => String -> Maybe a -> m a
parseMaybe s = maybe (fail s) return
parseEither :: Monad m => Either String a -> m a
parseEither = either fail return
toByteString :: Builder -> B.ByteString
toByteString = L.toStrict . toLazyByteString
fws :: Parser ()
fws = A8.skipSpace
comment :: Parser B.ByteString
comment = A8.char '(' *> A.scan (0 :: Int, False) f <* A8.char ')'
where
f (!n, True ) _ = Just (n, False)
f (!n, False) w = case w2c w of
'(' -> Just (n + 1, False)
')' -> if n == 0 then Nothing else Just (n - 1, False)
'\\' -> Just (n, True)
_ -> Just (n, False)
cfws :: Parser ()
cfws = () <$ fws `sepBy` comment
lexeme :: Parser a -> Parser a
lexeme p = p <* cfws
symbol :: Char -> Parser Char
symbol = lexeme . A8.char
fromDigit :: Integral a => Word8 -> a
fromDigit w = fromIntegral (w - 48)
digits :: Integral a => Int -> Parser a
digits 0 = return 0
digits 1 = fromDigit <$> A.satisfy A8.isDigit_w8
digits n = do
s <- A.take n
unless (B.all A8.isDigit_w8 s) $
fail $ "expected " ++ show n ++ " digits"
return $ B.foldl' (\a w -> 10*a + fromDigit w) 0 s
number :: Integral a => Int -> Parser a
number = lexeme . digits
hexPair :: Parser Word8
hexPair = decode <$> hexDigit <*> hexDigit
where
decode a b = shiftL a 4 .|. b
hexDigit = fromHexDigit <$> A.satisfy isHexDigit
isHexDigit w = w >= 48 && w <= 57
|| w >= 64 && w <= 70
|| w >= 97 && w <= 102
fromHexDigit w
| w >= 97 = w - 87
| w >= 64 = w - 55
| otherwise = w - 48
tokenWith :: String -> Parser B.ByteString
tokenWith specials = lexeme (A.takeWhile1 isAtom)
where
isAtom w = w <= 126 && w >= 33 && A.notInClass specials w
atom :: Parser B.ByteString
atom = tokenWith "()<>[]:;@\\\",."
dotAtom :: Parser B.ByteString
dotAtom = tokenWith "()<>[]:;@\\\","
token :: Parser B.ByteString
token = tokenWith "()<>@,;:\\\"/[]?="
tokenCI :: Parser (CI B.ByteString)
tokenCI = CI.mk <$> token
quotedString :: Parser B.ByteString
quotedString = lexeme $
toByteString <$ A8.char '"' <*> concatMany quotedChar <* A8.char '"'
where
quotedChar = mempty <$ A.string "\r\n"
<|> word8 <$ A8.char '\\' <*> A.anyWord8
<|> char8 <$> A8.satisfy (/= '"')
encodedWord :: Parser T.Text
encodedWord = do
_ <- A.string "=?"
name <- B8.unpack <$> tokenWith "()<>@,;:\"/[]?.="
_ <- A8.char '?'
method <- decodeMethod
_ <- A8.char '?'
enc <- method
_ <- A.string "?="
charset <- parseMaybe "charset not found" $ lookupCharset name
return $ toUnicode charset enc
where
decodeMethod = quoted <$ A.satisfy (`B.elem` "Qq")
<|> base64String <$ A.satisfy (`B.elem` "Bb")
quoted = toByteString <$> concatMany quotedChar
quotedChar = char8 ' ' <$ A8.char '_'
<|> word8 <$ A8.char '=' <*> hexPair
<|> char8 <$> A8.satisfy (not . isBreak)
isBreak c = A8.isSpace c || c == '?'
base64String = do
s <- A8.takeWhile (/= '?')
parseEither (Base64.decode s)
scanString :: Char -> Char -> Parser Builder
scanString start end = lexeme $ do
s <- byteString <$ A8.char start <*> A8.scan False f <* A8.char end
return (char8 start <> s <> char8 end)
where
f True _ = Just False
f False c
| c == end = Nothing
| c == '\\' = Just True
| otherwise = Just False
addrSpec :: Parser B.ByteString
addrSpec = toByteString <$> (localPart <+> at <+> domain)
where
at = char8 <$> symbol '@'
dot = char8 <$> symbol '.'
dotSep p = p <+> concatMany (dot <+> p)
addrAtom = byteString <$> atom
addrQuote = scanString '"' '"'
domainLiteral = scanString '[' ']'
localPart = dotSep (addrAtom <|> addrQuote)
domain = dotSep addrAtom <|> domainLiteral
angleAddrSpec :: Parser B.ByteString
angleAddrSpec = symbol '<' *> addrSpec <* symbol '>'
sepBy2 :: Alternative f => f a -> f b -> f [a]
sepBy2 p sep = (:) <$> p <*> many1 (sep *> p)
optionalSepBy1 :: Alternative f => f a -> f b -> f [a]
optionalSepBy1 p sep = catMaybes <$> sepBy2 (optional p) sep
<|> return <$> p
commaSep :: Parser a -> Parser [a]
commaSep p = optionalSepBy1 p (symbol ',')
dateTime :: Parser ZonedTime
dateTime = do
wday <- optional dayOfWeek
zoned <- zonedTime
let (_, _, expected) =
toWeekDate . localDay . zonedTimeToLocalTime $ zoned
case wday of
Just actual | actual /= expected
-> fail "day of week does not match date"
_ -> return zoned
where
dayOfWeek = dayName <* symbol ','
localTime = LocalTime <$> date <*> timeOfDay
zonedTime = ZonedTime <$> localTime <*> timeZone
date = do
d <- lexeme A8.decimal
m <- month
y <- year
parseMaybe "invalid date" $ fromGregorianValid y m d
year = number 4
<|> (+ 1900) <$> number 3
<|> adjust <$> number 2
where
adjust n | n < 50 = 2000 + n
| otherwise = 1900 + n
timeOfDay = do
h <- number 2
m <- symbol ':' *> number 2
s <- option (0 :: Int) (symbol ':' *> number 2)
parseMaybe "invalid time of day" $
makeTimeOfDayValid h m (fromIntegral s)
timeZone = minutesToTimeZone <$> timeZoneOffset
<|> return utc
timeZoneOffset = lexeme . A8.signed $ do
hh <- digits 2
mm <- digits 2
if mm >= 60
then fail "invalid time zone"
else return $ hh * 60 + mm
listIndex = lexeme . choice . map (\(n, s) -> n <$ A.string s) . zip [1..]
dayName = listIndex [ "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" ]
month = listIndex [ "Jan", "Feb", "Mar", "Apr", "May", "Jun"
, "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
]
angleAddr :: Parser Address
angleAddr = Address <$> angleAddrSpec
address :: Parser Address
address = Address <$> addrSpec
mailbox :: Parser Mailbox
mailbox = Mailbox <$> optional phrase <*> angleAddr
<|> Mailbox Nothing <$> address
mailboxList :: Parser [Mailbox]
mailboxList = commaSep mailbox
recipient :: Parser Recipient
recipient = Group <$> phrase <* symbol ':' <*> mailboxList <* symbol ';'
<|> Individual <$> mailbox
recipientList :: Parser [Recipient]
recipientList = commaSep recipient
messageID :: Parser MessageID
messageID = MessageID <$> angleAddrSpec
messageIDList :: Parser [MessageID]
messageIDList = many1 messageID
fromElements :: [T.Text] -> L.Text
fromElements = L.fromChunks . intersperse (T.singleton ' ')
phrase :: Parser L.Text
phrase = fromElements <$> many1 element
where
element = T.concat <$> many1 (lexeme encodedWord)
<|> decodeLatin1 <$> quotedString
<|> decodeLatin1 <$> dotAtom
phraseList :: Parser [L.Text]
phraseList = commaSep phrase
unstructured :: Parser L.Text
unstructured = fromElements <$ fws <*> many element <* A.endOfInput
where
element = T.concat <$> many1 (encodedWord <* fws)
<|> decodeLatin1 <$> word <* fws
word = A.takeWhile1 (not . A8.isSpace_w8)
mimeVersion :: Parser (Int, Int)
mimeVersion = (,) <$> number 1 <* symbol '.' <*> number 1
contentType :: Parser (MimeType, Parameters)
contentType = (,) <$> mimeType <*> parameters
where
mimeType = MimeType <$> tokenCI <* symbol '/' <*> tokenCI
parameters = Map.fromList <$> many (symbol ';' *> parameter)
parameter = (,) <$> tokenCI <* symbol '=' <*> (token <|> quotedString)
contentTransferEncoding :: Parser (CI B.ByteString)
contentTransferEncoding = tokenCI