{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeFamilies #-}
module Data.MIME.EncodedWord
(
decodeEncodedWords
) where
import Control.Applicative ((<|>), liftA2, optional)
import Data.Bifunctor (first)
import Data.Semigroup ((<>))
import Control.Lens (to, clonePrism, review, view)
import Data.Attoparsec.ByteString
import Data.Attoparsec.ByteString.Char8 (char8)
import qualified Data.ByteString as B
import qualified Data.CaseInsensitive as CI
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.MIME.Charset
import Data.MIME.TransferEncoding
import Data.MIME.QuotedPrintable
import Data.RFC5322.Internal
data EncodedWord = EncodedWord
{ _encodedWordCharset :: CI.CI B.ByteString
, _encodedWordLanguage :: Maybe (CI.CI B.ByteString)
, _encodedWordEncoding :: CI.CI B.ByteString
, _encodedWordText :: B.ByteString
}
instance HasTransferEncoding EncodedWord where
type TransferDecoded EncodedWord = TransferDecodedEncodedWord
transferEncodingName = to _encodedWordEncoding
transferEncodedData = to _encodedWordText
transferDecoded = to $ \a@(EncodedWord charset lang _ _) ->
TransferDecodedEncodedWord charset lang <$> view transferDecodedBytes a
data TransferDecodedEncodedWord = TransferDecodedEncodedWord
{ _transDecWordCharset :: CI.CI B.ByteString
, _transDecWordLanguage :: Maybe (CI.CI B.ByteString)
, _transDecWordText :: B.ByteString
}
instance HasCharset TransferDecodedEncodedWord where
type Decoded TransferDecodedEncodedWord = T.Text
charsetName = to (pure . _transDecWordCharset)
charsetData = to _transDecWordText
charsetDecoded = charsetText
charsetEncode s =
let
bs = T.encodeUtf8 s
charset = if B.all (< 0x80) bs then "us-ascii" else "utf-8"
in TransferDecodedEncodedWord charset Nothing bs
encodedWord :: Parser EncodedWord
encodedWord =
EncodedWord
<$> ci token
<*> optional (char8 '*' *> ci token)
<*> (qmark *> ci token)
<*> (qmark *> encodedText <* qmark <* eq)
where
eq = char8 '='
qmark = char8 '?'
token = takeWhile1 (\c -> c >= 33 && c <= 126 && notInClass "()<>@,;:\\\"/[]?.=*" c)
encodedText = takeWhile1 (\c -> (c >= 33 && c < 63) || (c > 63 && c <= 127))
serialiseEncodedWord :: EncodedWord -> B.ByteString
serialiseEncodedWord (EncodedWord charset lang enc s) =
"=?" <> CI.original charset
<> maybe "" (\l -> "*" <> CI.original l) lang
<> "?" <> CI.original enc <>
"?" <> s <> "?="
transferEncodeEncodedWord :: TransferDecodedEncodedWord -> EncodedWord
transferEncodeEncodedWord (TransferDecodedEncodedWord charset lang s) =
EncodedWord charset lang "Q" (review (clonePrism q) s)
decodeEncodedWords :: B.ByteString -> T.Text
decodeEncodedWords s =
either (const $ decodeLenient s) merge $ fmap (g . f) <$> parseOnly tokens s
where
tokens = liftA2 (:) (Left <$> takeTillString "=?") more
<|> ((:[]) . Left <$> takeByteString)
more = liftA2 (:) (Right <$> encodedWord <|> pure (Left "=?")) tokens
f (Left t) = Left t
f (Right w) = first
(const $ serialiseEncodedWord w :: TransferEncodingError -> B.ByteString)
(view transferDecoded w)
g (Left t) = Left t
g (Right w) = first
(const $ serialiseEncodedWord $ transferEncodeEncodedWord w :: CharsetError -> B.ByteString)
(view charsetDecoded w)
merge = foldMap (either decodeLenient id)