-- This file is part of purebred-email
-- Copyright (C) 2018-2021  Fraser Tweedale
--
-- purebred-email is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU Affero General Public License for more details.
--
-- You should have received a copy of the GNU Affero General Public License
-- along with this program.  If not, see <http://www.gnu.org/licenses/>.

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeFamilies #-}

{- |

@encoded-word@s for representing non-7bit ASCII data in headers
(RFC 2047 and RFC 2231).

-}
module Data.MIME.EncodedWord
  (
    decodeEncodedWord
  , decodeEncodedWords
  , EncodedWord
  , encodedWord
  , buildEncodedWord
  , encodeEncodedWords
  , chooseEncodedWordEncoding
  ) where

import Control.Applicative ((<|>), liftA2, optional)
import Data.Maybe (fromMaybe)
import Data.Monoid (Sum(Sum), Any(Any))

import Control.Lens (to, clonePrism, review, view, foldMapOf)
import Data.Attoparsec.ByteString
import Data.Attoparsec.ByteString.Char8 (char8)
import Data.ByteString.Lens (bytes)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Builder as Builder
import qualified Data.CaseInsensitive as CI
import qualified Data.Text as T
import qualified Data.Text.Encoding as T

import Data.MIME.Charset
import Data.MIME.Error (EncodingError)
import Data.MIME.TransferEncoding
import Data.MIME.Base64
import Data.MIME.QuotedPrintable
import Data.IMF.Syntax (ci, takeTillString)

{-# ANN module ("HLint: ignore Eta reduce" :: String) #-}

data EncodedWord = EncodedWord
  { EncodedWord -> TransferEncodingName
_encodedWordCharset :: CI.CI B.ByteString
  , EncodedWord -> Maybe TransferEncodingName
_encodedWordLanguage :: Maybe (CI.CI B.ByteString)
  , EncodedWord -> TransferEncodingName
_encodedWordEncoding :: CI.CI B.ByteString
  , EncodedWord -> ByteString
_encodedWordText :: B.ByteString
  }

instance HasTransferEncoding EncodedWord where
  type TransferDecoded EncodedWord = TransferDecodedEncodedWord
  transferEncodingName :: Getter EncodedWord TransferEncodingName
transferEncodingName = forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to EncodedWord -> TransferEncodingName
_encodedWordEncoding
  transferEncodedData :: Getter EncodedWord ByteString
transferEncodedData = forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to EncodedWord -> ByteString
_encodedWordText
  transferDecoded :: forall e (p :: * -> * -> *) (f :: * -> *).
(AsTransferEncodingError e, Profunctor p, Contravariant f) =>
Optic' p f EncodedWord (Either e (TransferDecoded EncodedWord))
transferDecoded = forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to forall a b. (a -> b) -> a -> b
$ \a :: EncodedWord
a@(EncodedWord TransferEncodingName
charset Maybe TransferEncodingName
lang TransferEncodingName
_ ByteString
_) ->
    TransferEncodingName
-> Maybe TransferEncodingName
-> ByteString
-> TransferDecodedEncodedWord
TransferDecodedEncodedWord TransferEncodingName
charset Maybe TransferEncodingName
lang forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view forall a e (p :: * -> * -> *) (f :: * -> *).
(HasTransferEncoding a, AsTransferEncodingError e, Profunctor p,
 Contravariant f) =>
Optic' p f a (Either e ByteString)
transferDecodedBytes EncodedWord
a
  transferEncode :: TransferDecoded EncodedWord -> EncodedWord
transferEncode = TransferDecodedEncodedWord -> EncodedWord
transferEncodeEncodedWord


data TransferDecodedEncodedWord = TransferDecodedEncodedWord
  { TransferDecodedEncodedWord -> TransferEncodingName
_transDecWordCharset :: CI.CI B.ByteString
  , TransferDecodedEncodedWord -> Maybe TransferEncodingName
_transDecWordLanguage :: Maybe (CI.CI B.ByteString)
  , TransferDecodedEncodedWord -> ByteString
_transDecWordText :: B.ByteString
  }

-- | 'charsetEncode' uses UTF-8, but sets the charset to @us-ascii@
-- if the text only contains ASCII characters.  No language is set
-- upon encoding.
--
instance HasCharset TransferDecodedEncodedWord where
  type Decoded TransferDecodedEncodedWord = T.Text
  charsetName :: Getter TransferDecodedEncodedWord (Maybe TransferEncodingName)
charsetName = forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to (forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. TransferDecodedEncodedWord -> TransferEncodingName
_transDecWordCharset)
  charsetData :: Getter TransferDecodedEncodedWord ByteString
charsetData = forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to TransferDecodedEncodedWord -> ByteString
_transDecWordText
  charsetDecoded :: forall e.
AsCharsetError e =>
CharsetLookup
-> forall (p :: * -> * -> *) (f :: * -> *).
   (Profunctor p, Contravariant f) =>
   Optic'
     p
     f
     TransferDecodedEncodedWord
     (Either e (Decoded TransferDecodedEncodedWord))
charsetDecoded CharsetLookup
charsets = forall a e (p :: * -> * -> *) (f :: * -> *).
(HasCharset a, AsCharsetError e, Profunctor p, Contravariant f) =>
CharsetLookup -> Optic' p f a (Either e Text)
charsetText CharsetLookup
charsets

  charsetEncode :: Decoded TransferDecodedEncodedWord -> TransferDecodedEncodedWord
charsetEncode Decoded TransferDecodedEncodedWord
s =
    let
      bs :: ByteString
bs = Text -> ByteString
T.encodeUtf8 Decoded TransferDecodedEncodedWord
s
      charset :: TransferEncodingName
charset = if (Word8 -> Bool) -> ByteString -> Bool
B.all (forall a. Ord a => a -> a -> Bool
< Word8
0x80) ByteString
bs then TransferEncodingName
"us-ascii" else TransferEncodingName
"utf-8"
    in TransferEncodingName
-> Maybe TransferEncodingName
-> ByteString
-> TransferDecodedEncodedWord
TransferDecodedEncodedWord TransferEncodingName
charset forall a. Maybe a
Nothing ByteString
bs

-- NOTE: may not be > 75 chars long
--
-- NOTE: DOES NOT PARSE THE LEADING "=?"
--
encodedWord :: Parser EncodedWord
encodedWord :: Parser EncodedWord
encodedWord =
  TransferEncodingName
-> Maybe TransferEncodingName
-> TransferEncodingName
-> ByteString
-> EncodedWord
EncodedWord
  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s. FoldCase s => Parser s -> Parser (CI s)
ci Parser ByteString
token
  forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Char -> Parser Word8
char8 Char
'*' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s. FoldCase s => Parser s -> Parser (CI s)
ci Parser ByteString
token)
  forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Parser Word8
qmark forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s. FoldCase s => Parser s -> Parser (CI s)
ci Parser ByteString
token)
  forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Parser Word8
qmark forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString
encodedText forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Word8
qmark forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Word8
eq)
  where
    eq :: Parser Word8
eq = Char -> Parser Word8
char8 Char
'='
    qmark :: Parser Word8
qmark = Char -> Parser Word8
char8 Char
'?'
    token :: Parser ByteString
token = (Word8 -> Bool) -> Parser ByteString
takeWhile1 (\Word8
c -> Word8
c forall a. Ord a => a -> a -> Bool
>= Word8
33 Bool -> Bool -> Bool
&& Word8
c forall a. Ord a => a -> a -> Bool
<= Word8
126 Bool -> Bool -> Bool
&& String -> Word8 -> Bool
notInClass String
"()<>@,;:\\\"/[]?.=*" Word8
c)

    -- any printable ascii char other than ? or SPACE
    encodedText :: Parser ByteString
encodedText = (Word8 -> Bool) -> Parser ByteString
takeWhile1 (\Word8
c -> (Word8
c forall a. Ord a => a -> a -> Bool
>= Word8
33 Bool -> Bool -> Bool
&& Word8
c forall a. Ord a => a -> a -> Bool
< Word8
63) Bool -> Bool -> Bool
|| (Word8
c forall a. Ord a => a -> a -> Bool
> Word8
63 Bool -> Bool -> Bool
&& Word8
c forall a. Ord a => a -> a -> Bool
<= Word8
127))

serialiseEncodedWord :: EncodedWord -> B.ByteString
serialiseEncodedWord :: EncodedWord -> ByteString
serialiseEncodedWord = ByteString -> ByteString
L.toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
Builder.toLazyByteString forall b c a. (b -> c) -> (a -> b) -> a -> c
. EncodedWord -> Builder
buildEncodedWord

buildEncodedWord :: EncodedWord -> Builder.Builder
buildEncodedWord :: EncodedWord -> Builder
buildEncodedWord (EncodedWord TransferEncodingName
charset Maybe TransferEncodingName
lang TransferEncodingName
enc ByteString
s) =
  Builder
"=?" forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
Builder.byteString (forall s. CI s -> s
CI.original TransferEncodingName
charset)
  forall a. Semigroup a => a -> a -> a
<> forall b a. b -> (a -> b) -> Maybe a -> b
maybe Builder
"" (\TransferEncodingName
l -> Builder
"*" forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
Builder.byteString (forall s. CI s -> s
CI.original TransferEncodingName
l)) Maybe TransferEncodingName
lang
  forall a. Semigroup a => a -> a -> a
<> Builder
"?" forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
Builder.byteString (forall s. CI s -> s
CI.original TransferEncodingName
enc)
  forall a. Semigroup a => a -> a -> a
<> Builder
"?" forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
Builder.byteString ByteString
s forall a. Semigroup a => a -> a -> a
<> Builder
"?="


transferEncodeEncodedWord :: TransferDecodedEncodedWord -> EncodedWord
transferEncodeEncodedWord :: TransferDecodedEncodedWord -> EncodedWord
transferEncodeEncodedWord (TransferDecodedEncodedWord TransferEncodingName
charset Maybe TransferEncodingName
lang ByteString
s) =
  let (TransferEncodingName
enc, EncodedWordEncoding
p) = forall a. a -> Maybe a -> a
fromMaybe (TransferEncodingName
"Q", EncodedWordEncoding
q) (ByteString -> Maybe (TransferEncodingName, EncodedWordEncoding)
chooseEncodedWordEncoding ByteString
s)
  in TransferEncodingName
-> Maybe TransferEncodingName
-> TransferEncodingName
-> ByteString
-> EncodedWord
EncodedWord TransferEncodingName
charset Maybe TransferEncodingName
lang TransferEncodingName
enc (forall b (m :: * -> *) t. MonadReader b m => AReview t b -> m t
review (forall s t a b. APrism s t a b -> Prism s t a b
clonePrism EncodedWordEncoding
p) ByteString
s)

-- | RFC 2047 and RFC 2231 define the /encoded-words/ mechanism for
-- embedding non-ASCII data in headers.  This function locates
-- encoded-words and decodes them.
--
-- @
-- λ> T.putStrLn $ decodeEncodedWords defaultCharsets "hello =?utf-8?B?5LiW55WM?=!"
-- hello 世界!
-- @
--
-- If parsing fails or the encoding is unrecognised the encoded-word
-- is left unchanged in the result.
--
-- @
-- λ> T.putStrLn $ decodeEncodedWords defaultCharsets "=?utf-8?B?bogus?="
-- =?utf-8?B?bogus?=
--
-- λ> T.putStrLn $ decodeEncodedWords defaultCharsets "=?utf-8?X?unrecognised_encoding?="
-- =?utf-8?X?unrecognised_encoding?=
-- @
--
-- Language specification is supported (the datum is discarded).
--
-- @
-- λ> T.putStrLn $ decodeEncodedWords defaultCharsets "=?utf-8*es?Q?hola_mundo!?="
-- hola mundo!
-- @
--
decodeEncodedWords :: CharsetLookup -> B.ByteString -> T.Text
decodeEncodedWords :: CharsetLookup -> ByteString -> Text
decodeEncodedWords CharsetLookup
charsets ByteString
s =
  forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const (ByteString -> Text
decodeLenient ByteString
s)) (forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap Either ByteString EncodedWord -> Text
conv) (forall a. Parser a -> ByteString -> Either String a
parseOnly Parser [Either ByteString EncodedWord]
tokens ByteString
s)
  where
    tokens :: Parser [Either B.ByteString EncodedWord]
    tokens :: Parser [Either ByteString EncodedWord]
tokens = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 (:) (forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Parser ByteString
takeTillString ByteString
"=?") Parser [Either ByteString EncodedWord]
more
          forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ((forall a. a -> [a] -> [a]
:[]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
takeByteString)
    more :: Parser [Either ByteString EncodedWord]
more = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 (:) (forall a b. b -> Either a b
Right forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser EncodedWord
encodedWord forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure (forall a b. a -> Either a b
Left ByteString
"=?")) Parser [Either ByteString EncodedWord]
tokens
    conv :: Either ByteString EncodedWord -> Text
conv = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either ByteString -> Text
decodeLenient (CharsetLookup -> EncodedWord -> Text
decodeEncodedWord CharsetLookup
charsets)

-- | Decode an 'EncodedWord'.  If transfer or charset decoding fails,
-- returns the serialised encoded word.
decodeEncodedWord :: CharsetLookup -> EncodedWord -> T.Text
decodeEncodedWord :: CharsetLookup -> EncodedWord -> Text
decodeEncodedWord CharsetLookup
charsets EncodedWord
w =
  forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
    (ByteString -> Text
decodeLenient forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. a -> b -> a
const (EncodedWord -> ByteString
serialiseEncodedWord EncodedWord
w) :: EncodingError -> T.Text)
    forall a. a -> a
id
    (forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view forall a e (p :: * -> * -> *) (f :: * -> *).
(HasTransferEncoding a, AsTransferEncodingError e, Profunctor p,
 Contravariant f) =>
Optic' p f a (Either e (TransferDecoded a))
transferDecoded EncodedWord
w forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view (forall a e.
(HasCharset a, AsCharsetError e) =>
CharsetLookup
-> forall (p :: * -> * -> *) (f :: * -> *).
   (Profunctor p, Contravariant f) =>
   Optic' p f a (Either e (Decoded a))
charsetDecoded CharsetLookup
charsets))

-- | This function encodes necessary parts of some text
--
-- Currently turns the whole text into a single encoded word, if necessary.
encodeEncodedWords :: T.Text -> B.ByteString
encodeEncodedWords :: Text -> ByteString
encodeEncodedWords Text
t = forall b a. b -> (a -> b) -> Maybe a -> b
maybe ByteString
utf8 (forall a b. a -> b -> a
const ByteString
ew) (ByteString -> Maybe (TransferEncodingName, EncodedWordEncoding)
chooseEncodedWordEncoding ByteString
utf8)
  where
    ew :: ByteString
ew = ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
" " forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> ByteString
encOrNot forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> ByteString -> [ByteString]
B.split Word8
32 forall a b. (a -> b) -> a -> b
$ ByteString
utf8
    encOrNot :: ByteString -> ByteString
encOrNot ByteString
s = forall b a. b -> (a -> b) -> Maybe a -> b
maybe ByteString
s (forall {b}.
b
-> (TransferEncodingName, APrism ByteString ByteString b b)
-> ByteString
g ByteString
s) (ByteString -> Maybe (TransferEncodingName, EncodedWordEncoding)
chooseEncodedWordEncoding ByteString
s)
    g :: b
-> (TransferEncodingName, APrism ByteString ByteString b b)
-> ByteString
g b
s (TransferEncodingName
enc, APrism ByteString ByteString b b
p) = EncodedWord -> ByteString
serialiseEncodedWord forall a b. (a -> b) -> a -> b
$
      TransferEncodingName
-> Maybe TransferEncodingName
-> TransferEncodingName
-> ByteString
-> EncodedWord
EncodedWord TransferEncodingName
"utf-8" forall a. Maybe a
Nothing TransferEncodingName
enc (forall b (m :: * -> *) t. MonadReader b m => AReview t b -> m t
review (forall s t a b. APrism s t a b -> Prism s t a b
clonePrism APrism ByteString ByteString b b
p) b
s)
    utf8 :: ByteString
utf8 = Text -> ByteString
T.encodeUtf8 Text
t

chooseEncodedWordEncoding :: B.ByteString -> Maybe (TransferEncodingName, TransferEncoding)
chooseEncodedWordEncoding :: ByteString -> Maybe (TransferEncodingName, EncodedWordEncoding)
chooseEncodedWordEncoding ByteString
s
  | Bool -> Bool
not Bool
doEnc = forall a. Maybe a
Nothing
  | Int
nQP forall a. Ord a => a -> a -> Bool
< Int
nB64 = forall a. a -> Maybe a
Just (TransferEncodingName
"Q", EncodedWordEncoding
q)
  | Bool
otherwise = forall a. a -> Maybe a
Just (TransferEncodingName
"B", EncodedWordEncoding
b)
  where
    -- https://tools.ietf.org/html/rfc5322#section-3.5 'text'
    needEnc :: a -> Bool
needEnc a
c = a
c forall a. Ord a => a -> a -> Bool
> a
127 Bool -> Bool -> Bool
|| a
c forall a. Eq a => a -> a -> Bool
== a
0
    qpBytes :: Word8 -> a
qpBytes Word8
c
      | QuotedPrintableMode -> Word8 -> Bool
encodingRequiredNonEOL QuotedPrintableMode
Q Word8
c = a
3
      | Bool
otherwise = a
1
    (Any Bool
doEnc, Sum Int
nQP) = forall r s a. Getting r s a -> (a -> r) -> s -> r
foldMapOf forall t. IsByteString t => IndexedTraversal' Int t Word8
bytes (\Word8
c -> (Bool -> Any
Any (forall {a}. (Ord a, Num a) => a -> Bool
needEnc Word8
c), forall a. a -> Sum a
Sum (forall {a}. Num a => Word8 -> a
qpBytes Word8
c))) ByteString
s
    nB64 :: Int
nB64 = ((ByteString -> Int
B.length ByteString
s forall a. Num a => a -> a -> a
+ Int
2) forall a. Integral a => a -> a -> a
`div` Int
3) forall a. Num a => a -> a -> a
* Int
4