-- We assume we have US-ASCII characters module Codec.MIME.String.Headers ( Domain(Domain, LiteralDomain), Mailbox(Mailbox), RoutedEmailAddress(RoutedEmailAddress, NormalEmailAddress), EmailAddress(EmailAddress), get_addr_spec, Address(Address, Group), ContentType(ContentType), get_content_type, ContentDescription(ContentDescription), get_content_description, ContentTransferEncoding(ContentTransferEncoding), get_content_transfer_encoding, ContentID(ContentID), get_content_id, MessageID(MessageID), MIMEVersion(MIMEVersion), get_mime_version, Parameter(Parameter), p_parameter, From(From), To(To), Subject(Subject), get_from, get_to, get_subject, get_boundary, p_extension_token, p_value, p_quoted_string, cws, p_ci_string, ) where import Prelude hiding ( (<*>), (<$>), (<*), (<$) ) import Codec.MIME.String.Internal.ABNF ( Parser, parse, pSucceed, pFail, (<$>), (<$), (<*>), (<*), (<|>), (<|), (), pEOI, pPred, pChar, pMany, pAtLeast, pMaybe, pOptDef, pString, ) import qualified Codec.Binary.Base64.String as Base64 (decode) import qualified Codec.Binary.EncodingQ.String as EncodingQ (decode) import Codec.MIME.String.Internal.Utils import Codec.Text.IConv import qualified Data.ByteString.Lazy.Char8 as BS import Data.Char import Data.List ----------------------- -- Utils ignore :: Parser inp a -> Parser inp () ignore p = () <$ p boxp :: Parser inp a -> Parser inp [a] boxp p = box <$> p ----------------------- -- RFC 2234 p_CTL :: Parser Char Char p_CTL = pPred (\c -> ord c < 32 || ord c == 127) p_SP :: Parser Char Char p_SP = pChar ' ' p_HTAB :: Parser Char Char p_HTAB = pChar '\t' p_WSP :: Parser Char Char p_WSP = p_SP <|> p_HTAB ----------------------- -- RFC 2822 -- Case insensitive strings, written "Foo" p_ci_string :: String -> Parser Char String p_ci_string s = s <$ f s where f "" = pSucceed () f (c:cs) = let p = if isAsciiAlpha c then pChar (toLower c) <| pChar (toUpper c) else pChar c in () <$ p <* f cs p_NO_WS_CTL :: Parser Char Char p_NO_WS_CTL = pPred (\c -> let o = ord c in 1 <= o && o <= 8 || o == 11 || o == 12 || 14 <= o && o <= 31 || o == 127) -- If we follow the spec precisely then we get pMany (pMany), and hence -- non-termination, so we merge the definition of p_obs_text in. p_text :: Parser Char String p_text = concat <$> pMany ( p_encoded_words <| boxp (pPred (\c -> let o = ord c in 0 <= o && o <= 9 || o == 11 || o == 12 || 14 <= o && o <= 127)) ) -- We are lax about checking they have any necessary surrounding -- whitespace p_encoded_words :: Parser Char String p_encoded_words = (\x xs -> x ++ concat xs) <$> p_encoded_word <*> pMany (id <$ cws <*> p_encoded_word) -- XXX What happens if iconv doesn't understand the charset "cs"? p_encoded_word :: Parser Char String p_encoded_word = (\cs dec text -> BS.unpack $ convertFuzzy Transliterate cs "utf8" $ BS.pack $ dec text) <$ pString "=?" <*> p_charset <* pChar '?' <*> p_encoding <* pChar '?' <*> p_encoded_text <* pString "?=" -- token definition inlined as they use a different one to p_token. p_charset :: Parser Char String p_charset = pAtLeast 1 (pPred isAscii (p_SP <|> p_CTL <|> p_especials)) p_especials :: Parser Char Char p_especials = pPred (`elem` "()<>@,;:\\\"/[]?.=") -- This is much stricter than specified, but if it's not [qQbB] then -- we'd want to fall back to showing it as a string anyway. p_encoding :: Parser Char (String -> String) p_encoding = EncodingQ.decode <$ (pChar 'Q' <|> pChar 'q') <|> Base64.decode <$ (pChar 'B' <|> pChar 'b') p_encoded_text :: Parser Char String p_encoded_text = pMany (pPred (\c -> isAsciiPrint c && c /= '?' && c /= ' ')) p_quoted_pair :: Parser Char String p_quoted_pair = id <$ pChar '\\' <*> p_text <|> boxp p_obs_qp p_obs_qp :: Parser Char Char p_obs_qp = id <$ pChar '\\' <*> pPred isAscii -- Done differently as the newlines are already gone p_FWS :: Parser Char String p_FWS = pMany p_WSP p_ctext :: Parser Char Char p_ctext = p_NO_WS_CTL <|> pPred (\c -> let o = ord c in 33 <= o && o <= 39 || 42 <= o && o <= 91 || 93 <= o && o <= 126) p_ccontent :: Parser Char () p_ccontent = ignore p_ctext <|> ignore p_quoted_pair <|> p_comment p_comment :: Parser Char () p_comment = () <$ pChar '(' <* pMany (() <$ pMany p_NO_WS_CTL <* p_ccontent) <* pMany p_NO_WS_CTL <* pChar ')' -- We might want to keep the result. If we do then we also need to -- handle encoded words properly. -- This isn't quite CFWS as we need to be able to accept "1.0" -- as a MIME version with cws between all the characters. -- Also, we've already removed all the newlines in the headers. cws :: Parser Char () cws = ignore $ pMany (ignore (pAtLeast 1 p_WSP) <|> p_comment) p_qtext :: Parser Char Char p_qtext = p_NO_WS_CTL <|> pPred (\c -> let o = ord c in o == 33 || 35 <= o && o <= 91 || 93 <= o && o <= 126) p_qcontent :: Parser Char String p_qcontent = boxp p_qtext <|> p_quoted_pair p_quoted_string :: Parser Char String p_quoted_string = (++) <$ cws <* pChar '"' <*> (concat <$> pMany ((++) <$> pOptDef "" p_FWS <*> p_qcontent)) <*> pOptDef "" p_FWS <* pChar '"' p_dcontent :: Parser Char String p_dcontent = boxp p_dtext <|> p_quoted_pair p_dtext :: Parser Char Char p_dtext = p_NO_WS_CTL <|> pPred (\c -> let o = ord c in 33 <= o && o <= 90 || 94 <= o && o <= 126) data MessageID = MessageID String Domain deriving (Show, Read) p_msg_id :: Parser Char MessageID p_msg_id = MessageID <$ cws <* pChar '<' <*> p_id_left <* pChar '@' <*> p_id_right <* pChar '>' <* cws p_atom :: Parser Char String p_atom = id <$ cws <*> pAtLeast 1 p_atext <* cws p_atext :: Parser Char Char p_atext = pPred (\c -> isAsciiAlphaNum c || c `elem` "!#$%&'+-/=?^_`{|}~") p_dot_atom :: Parser Char String p_dot_atom = id <$ cws <*> p_dot_atom_text <* cws p_word :: Parser Char String p_word = p_atom <|> p_quoted_string -- This incorporates obs-phrase p_phrase :: Parser Char [String] p_phrase = (:) <$> (p_encoded_words <| p_word) <*> pMany (id <$ cws <*> (p_encoded_words <| p_word <| pString ".")) <|> boxp p_quoted_string p_dot_atom_text :: Parser Char String p_dot_atom_text = (\x xs -> x ++ concat xs) <$> pAtLeast 1 p_atext <*> pMany ((:) <$> pChar '.' <*> pAtLeast 1 p_atext) p_id_left :: Parser Char String p_id_left = p_dot_atom_text <|> p_no_fold_quote <|> p_obs_id_left p_id_right :: Parser Char Domain p_id_right = Domain <$> p_dot_atom_text <|> p_no_fold_literal <|> p_obs_id_right p_obs_id_left :: Parser Char String p_obs_id_left = p_local_part p_local_part :: Parser Char String p_local_part = p_dot_atom <|> p_quoted_string <|> p_obs_local_part p_obs_local_part :: Parser Char String p_obs_local_part = (\x xs -> x ++ concat xs) <$> p_word <*> pMany ((:) <$> pChar '.' <*> p_word) p_domain :: Parser Char Domain p_domain = Domain <$> p_dot_atom <|> p_domain_literal <|> p_obs_domain p_domain_literal :: Parser Char Domain p_domain_literal = (LiteralDomain . concat) <$ cws <* pChar '[' <*> pMany ( id <$ p_FWS <*> p_dcontent) <* p_FWS <* pChar ']' <* cws p_obs_domain :: Parser Char Domain p_obs_domain = (\x xs -> Domain (x ++ concat xs)) <$> p_atom <*> pMany ((:) <$> pChar '.' <*> p_atom) p_obs_id_right :: Parser Char Domain p_obs_id_right = p_domain p_no_fold_quote :: Parser Char String p_no_fold_quote = concat <$ pChar '"' <*> pMany (boxp p_qtext <|> p_quoted_pair) <* pChar '"' data Domain = Domain String | LiteralDomain String deriving (Show, Read, Eq) p_no_fold_literal :: Parser Char Domain p_no_fold_literal = LiteralDomain . concat <$ pChar '[' <*> pMany (boxp p_dtext <|> p_quoted_pair) <* pChar ']' newtype Subject = Subject String deriving (Show, Read) get_subject :: String -> Maybe Subject get_subject xs = case parse ph_subject xs of Left cd -> Just cd Right _ -> Nothing -- This is actually the RFC822 definition, as otherwise things get very -- confusing. -- Would be pMany, but p_text already does that for us ph_subject :: Parser Char Subject ph_subject = Subject <$> p_text <* pEOI newtype From = From [Mailbox] deriving (Show, Read, Eq) get_from :: String -> Maybe From get_from xs = case parse ph_from xs of Left f -> Just f Right _ -> Nothing ph_from :: Parser Char From ph_from = From <$ cws <*> p_mailbox_list <* cws <* pEOI newtype To = To [Address] deriving (Show, Read) data Address = Address Mailbox | Group String [Mailbox] deriving (Show, Read) get_to :: String -> Maybe To get_to xs = case parse ph_to xs of Left t -> Just t Right _ -> Nothing ph_to :: Parser Char To ph_to = To <$ cws <*> p_address_list <* cws <* pEOI -- obs-addr-list merged in p_address_list :: Parser Char [Address] p_address_list = (:) <$ pMany (() <$ pChar ',' <* cws) <*> p_address <*> pMany ( id <$ pAtLeast 1 (() <$ cws <* pChar ',') <* cws <*> p_address) <* pMany (() <$ cws <* pChar ',') p_address :: Parser Char Address p_address = Address <$> p_mailbox <|> p_group p_group :: Parser Char Address p_group = Group <$> p_display_name <* cws <* pChar ':' <* cws <*> pOptDef [] p_mailbox_list <* cws <* pChar ';' -- obs-mbox-list merged in p_mailbox_list :: Parser Char [Mailbox] p_mailbox_list = (:) <$ pMany (() <$ pChar ',' <* cws) <*> p_mailbox <*> pMany ( id <$ pAtLeast 1 (() <$ cws <* pChar ',') <* cws <*> p_mailbox) <* pMany (() <$ cws <* pChar ',') data Mailbox = Mailbox (Maybe String) RoutedEmailAddress deriving (Show, Read, Eq) p_mailbox :: Parser Char Mailbox p_mailbox = p_name_addr <|> (Mailbox Nothing . NormalEmailAddress) <$> p_addr_spec p_name_addr :: Parser Char Mailbox p_name_addr = Mailbox <$> pMaybe p_display_name <* cws <*> p_angle_addr data EmailAddress = EmailAddress String Domain deriving (Show, Read, Eq) data RoutedEmailAddress = NormalEmailAddress EmailAddress | RoutedEmailAddress [Domain] EmailAddress deriving (Show, Read, Eq) p_angle_addr :: Parser Char RoutedEmailAddress p_angle_addr = ($) <$ pChar '<' <* cws -- This next makes us also satisfy obs-angle-addr <*> pOptDef NormalEmailAddress (RoutedEmailAddress <$> p_obs_route <* cws) <*> p_addr_spec <* cws <* pChar '>' get_addr_spec :: String -> Maybe EmailAddress get_addr_spec xs = case parse p_addr_spec xs of Left e -> Just e Right _ -> Nothing p_addr_spec :: Parser Char EmailAddress p_addr_spec = EmailAddress <$> p_local_part <* cws <* pChar '@' <* cws <*> p_domain p_display_name :: Parser Char String p_display_name = (concat . intersperse " ") <$> p_phrase p_obs_route :: Parser Char [Domain] p_obs_route = id <$> p_obs_domain_list <* pChar ':' p_obs_domain_list :: Parser Char [Domain] p_obs_domain_list = (:) <$ pChar '@' <* cws <*> p_domain <*> pMany ( id <$ pMaybe (() <$ cws <* pChar ',') <* cws <* pChar '@' <* cws <*> p_domain) ----------------------- -- RFC 2045 data MIMEVersion = MIMEVersion Integer Integer deriving (Show, Read) get_mime_version :: String -> Maybe MIMEVersion get_mime_version xs = case parse ph_mime_version xs of Left ct -> Just ct Right _ -> Nothing ph_mime_version :: Parser Char MIMEVersion ph_mime_version = MIMEVersion <$ cws <*> (read <$> pMany (pPred isAsciiDigit)) <* cws <* pChar '.' <* cws <*> (read <$> pMany (pPred isAsciiDigit)) <* cws <* pEOI data ContentType = ContentType String -- Case insensitive: lower-cased String -- Case insensitive: lower-cased [Parameter] deriving (Show, Read) data Parameter = Parameter String -- Case insensitive: lower-cased String deriving (Show, Read) get_content_type :: String -> Maybe ContentType get_content_type xs = case parse ph_content_type xs of Left ct -> Just ct Right _ -> Nothing ph_content_type :: Parser Char ContentType ph_content_type = ContentType <$ cws <*> p_type <* cws <* pChar '/' <* cws <*> p_subtype <*> pMany (id <$ cws <* pChar ';' <* cws <*> p_parameter) <* cws <* pEOI -- For type and subtypes, allow anything that matches a regexp that -- subsumes the currently allowed values p_type :: Parser Char String p_type = pAtLeast 1 (pPred (\c -> isAsciiAlphaNum c || c `elem` "-.+")) p_subtype :: Parser Char String p_subtype = pAtLeast 1 (pPred (\c -> isAsciiAlphaNum c || c `elem` "-.+")) {- p_type :: Parser Char String p_type = p_discrete_type <|> p_composite_type p_subtype :: Parser Char String p_subtype = map asciiToLower <$> (p_extension_token <|> p_iana_token) p_discrete_type :: Parser Char String p_discrete_type = p_ci_string "text" <|> p_ci_string "image" <|> p_ci_string "audio" <|> p_ci_string "video" <|> p_ci_string "application" <|> map asciiToLower <$> p_extension_token p_composite_type :: Parser Char String p_composite_type = p_ci_string "message" <|> p_ci_string "multipart" <|> map asciiToLower <$> p_extension_token p_iana_token :: Parser Char String p_iana_token = pFail -} p_extension_token :: Parser Char String p_extension_token = p_ietf_token <|> p_x_token p_ietf_token :: Parser Char String p_ietf_token = pFail p_x_token :: Parser Char String p_x_token = (\x t -> x:'-':t) <$> (pChar 'X' <|> pChar 'x') <* pChar '-' <*> p_token p_parameter :: Parser Char Parameter p_parameter = Parameter <$> p_attribute <* cws <* pChar '=' <* cws <*> p_value p_attribute :: Parser Char String p_attribute = map asciiToLower <$> p_token p_value :: Parser Char String p_value = p_token <|> p_quoted_string p_token :: Parser Char String p_token = pAtLeast 1 (pPred isAscii (p_SP <|> p_CTL <|> p_tspecials)) p_tspecials :: Parser Char Char p_tspecials = pPred (`elem` "()<>@,;:\\\"/[]?=") ----- newtype ContentTransferEncoding = ContentTransferEncoding String -- Case insensitive: lower-cased deriving (Show, Read) get_content_transfer_encoding :: String -> Maybe ContentTransferEncoding get_content_transfer_encoding xs = case parse ph_content_transfer_encoding xs of Left cte -> Just cte Right _ -> Nothing ph_content_transfer_encoding :: Parser Char ContentTransferEncoding ph_content_transfer_encoding = ContentTransferEncoding <$ cws <*> p_mechanism <* cws <* pEOI p_mechanism :: Parser Char String p_mechanism = p_ci_string "7bit" <|> p_ci_string "8bit" <|> p_ci_string "binary" <|> p_ci_string "quoted-printable" <|> p_ci_string "base64" <|> map asciiToLower <$> p_ietf_token <|> map asciiToLower <$> p_x_token newtype ContentID = ContentID MessageID deriving (Show, Read) get_content_id :: String -> Maybe ContentID get_content_id xs = case parse ph_content_id xs of Left ci -> Just ci Right _ -> Nothing ph_content_id :: Parser Char ContentID ph_content_id = ContentID <$ cws <*> p_msg_id <* cws <* pEOI newtype ContentDescription = ContentDescription String deriving (Show, Read) get_content_description :: String -> Maybe ContentDescription get_content_description xs = case parse ph_content_description xs of Left cd -> Just cd Right _ -> Nothing ph_content_description :: Parser Char ContentDescription ph_content_description = ContentDescription <$ cws <*> p_text -- would be pMany, but p_text already does that for us <* cws <* pEOI ----------------------- -- RFC 2046 -- Not really a header as such get_boundary :: String -> Maybe String get_boundary xs = case parse p_boundary xs of Left b -> Just b Right _ -> Nothing -- We are very flexible here p_boundary :: Parser Char String p_boundary = (\ss b bs -> dropFromEndWhile (' ' ==) (ss ++ [b] ++ bs)) <$> pMany (pChar ' ') <*> p_bchars <*> pMany p_bchars p_bchars :: Parser Char Char p_bchars = p_bcharsnospace <|> pChar ' ' p_bcharsnospace :: Parser Char Char p_bcharsnospace = pPred (\c -> isAsciiAlphaNum c || c `elem` "'()+_,-./:=?")