-- | Decode text messages that have a Content-Transfer-Encoding and/or -- a foreign character encoding module DecodeText where import Dqp import Base64 import Dew import MimeMessage import Utils2(strToLower,apSnd,pairwith) import HeaderNames hiding (contentType) -- * Main functions -- | Decode a MIME message, both the message body and encoded words in headers. -- Convert the character set to Unicode. decodeTextMsg msg@(MimeMsg hdrs _) = if getheader hdrs mimeVersion /= "" then decodeTextMsg' msg else msg -- | Decode a message like 'decodeTextMsg' even if there is no -- @Mime-Version@ header. decodeTextMsg' msg@(MimeMsg hdrs body) = if istext mimetype then case get_decoder hdrs of (cte,Just decode) -> MimeMsg (changeType cte hdrs') (convert (decode body)) _ -> MimeMsg hdrs' (convert body) else MimeMsg hdrs' body -- not mime, or not plain text, leave encoded... where convert body = cbody where (_,(ocs,cbody0)) = convertMessageBody hdrs body cbody = maybe (note++cbody0) (const cbody0) ocs note = "[no/unknown charset parameter, no conversion]" mimetype = mimeContentType hdrs hdrs' = decodeHeaders hdrs -- | Decode the Content-Transfer-Encoding of any type of message -- (i.e. without checking the Mime-Version or Content-Type headers). decodeMsg msg@(MimeMsg hdrs body) = case get_decoder hdrs of (cte,Just decode) -> MimeMsg (changeType cte hdrs) (decode body) _ -> msg -- * Helper functions -- | If the message body uses a foreign character encoding -- (UTF-8 or Windows-1252), convert it native Haskell 'Char'acters (Unicode). -- The returned value includes the mime type, the name of character encoding -- (if recognized) and the converted message body. convertMessageBody hdrs body = (mimetype,cbody) where (mimetype,params) = apSnd parseParams (contentType hdrs) cbody = if istext mimetype then maybe nb apconv (convertCharset' =<< lookup "charset" params) else nb nb = (Nothing,body) apconv (cs,conv) = (Just cs,conv body) -- | Update the Content-Transfer-Encoding header. Keep the original -- encoding in a nonstandard X-Content-Transfer-Encoding-Was header. changeType cte = updateHdr (cte_hdr,if strToLower cte=="7bit" then cte else "8BIT") . renameHdr cte_hdr cte_was_hdr cte_hdr = contentTransferEncoding cte_was_hdr = hn ("X-"++orig cte_hdr++"-Was") -- | Recognise a few standard text mime types istext ct = strToLower ct `elem` ["text/plain","text/richtext","text/html"] -- | Get the Content-Transfer-Encoding and the corresponding decoder (if any) get_decoder = pairwith (flip lookup encodings) . strToLower . flip getheader cte_hdr -- | Decoding functions for standard Content-Tranfer-Encoding (there are only -- two of them: quoted-printable and base64) encodings = [("quoted-printable",decodeQuotedPrintable), ("base64",decodeBase64)] -- | Decode encoded words in selected headers decodeHeaders = modHdr subject decodeEncodedWords . modHdr from decodeEncodedWords . modHdr to decodeEncodedWords -- should decode some other hdrs too!! -- * Obsolete functions -- | Old version of 'decodeTextMsg', kept for backwards compatibility decodeText msg = let (hdrs,bodylines)=splitmsg msg body = unlines bodylines hdrs' = decodeHeaders hdrs original = hdrlines hdrs' ++ [""] ++ bodylines in if getheader hdrs mimeVersion /= "" && istext (mimeContentType hdrs) then case get_decoder hdrs of (cte,Just decode) -> hdrlines (changeType cte hdrs') ++ [""] ++ lines (decode body) _ -> original else original -- not mime, or not plain text, leave encoded... -- | Old version of 'get_decoder', kept for backwards compatibility decode e = maybe id id (lookup e encodings)