{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Data.MIME.Charset
(
HasCharset(..)
, CharsetName
, charsetText
, charsetText'
, CharsetError(..)
, AsCharsetError(..)
, charsetPrism
, CharsetLookup
, defaultCharsets
, decodeLenient
) where
import Control.Lens
import qualified Data.ByteString as B
import qualified Data.CaseInsensitive as CI
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as T
{-# ANN module ("HLint: ignore Use camelCase" :: String) #-}
type CharsetName = CI.CI B.ByteString
type Charset = B.ByteString -> T.Text
data CharsetError
= CharsetUnspecified
| CharsetUnsupported CharsetName
| CharsetDecodeError CharsetName
deriving (Int -> CharsetError -> ShowS
[CharsetError] -> ShowS
CharsetError -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CharsetError] -> ShowS
$cshowList :: [CharsetError] -> ShowS
show :: CharsetError -> String
$cshow :: CharsetError -> String
showsPrec :: Int -> CharsetError -> ShowS
$cshowsPrec :: Int -> CharsetError -> ShowS
Show)
class AsCharsetError s where
_CharsetError :: Prism' s CharsetError
_CharsetUnspecified :: Prism' s ()
_CharsetUnsupported :: Prism' s CharsetName
_CharsetDecodeError :: Prism' s CharsetName
_CharsetUnspecified = forall s. AsCharsetError s => Prism' s CharsetError
_CharsetError forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. AsCharsetError s => Prism' s ()
_CharsetUnspecified
_CharsetUnsupported = forall s. AsCharsetError s => Prism' s CharsetError
_CharsetError forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. AsCharsetError s => Prism' s (CI ByteString)
_CharsetUnsupported
_CharsetDecodeError = forall s. AsCharsetError s => Prism' s CharsetError
_CharsetError forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. AsCharsetError s => Prism' s (CI ByteString)
_CharsetDecodeError
instance AsCharsetError CharsetError where
_CharsetError :: Prism' CharsetError CharsetError
_CharsetError = forall a. a -> a
id
_CharsetUnspecified :: Prism' CharsetError ()
_CharsetUnspecified = forall b s a. (b -> s) -> (s -> Maybe a) -> Prism s s a b
prism' (forall a b. a -> b -> a
const CharsetError
CharsetUnspecified) forall a b. (a -> b) -> a -> b
$ \case
CharsetError
CharsetUnspecified -> forall a. a -> Maybe a
Just () ; CharsetError
_ -> forall a. Maybe a
Nothing
_CharsetUnsupported :: Prism' CharsetError (CI ByteString)
_CharsetUnsupported = forall b s a. (b -> s) -> (s -> Maybe a) -> Prism s s a b
prism' CI ByteString -> CharsetError
CharsetUnsupported forall a b. (a -> b) -> a -> b
$ \case
CharsetUnsupported CI ByteString
k -> forall a. a -> Maybe a
Just CI ByteString
k ; CharsetError
_ -> forall a. Maybe a
Nothing
_CharsetDecodeError :: Prism' CharsetError (CI ByteString)
_CharsetDecodeError = forall b s a. (b -> s) -> (s -> Maybe a) -> Prism s s a b
prism' CI ByteString -> CharsetError
CharsetDecodeError forall a b. (a -> b) -> a -> b
$ \case
CharsetDecodeError CI ByteString
k -> forall a. a -> Maybe a
Just CI ByteString
k ; CharsetError
_ -> forall a. Maybe a
Nothing
class HasCharset a where
type Decoded a
charsetName :: Getter a (Maybe CharsetName)
charsetData :: Getter a B.ByteString
charsetDecoded
:: AsCharsetError e
=> CharsetLookup
-> ( forall p f. (Profunctor p, Contravariant f)
=> Optic' p f a (Either e (Decoded a)) )
charsetDecoded'
:: CharsetLookup
-> ( forall p f. (Profunctor p, Contravariant f)
=> Optic' p f a (Either CharsetError (Decoded a)) )
charsetDecoded' = forall a e.
(HasCharset a, AsCharsetError e) =>
CharsetLookup
-> forall (p :: * -> * -> *) (f :: * -> *).
(Profunctor p, Contravariant f) =>
Optic' p f a (Either e (Decoded a))
charsetDecoded
charsetEncode :: Decoded a -> a
charsetText
:: (HasCharset a, AsCharsetError e, Profunctor p, Contravariant f)
=> CharsetLookup -> Optic' p f a (Either e T.Text)
charsetText :: forall a e (p :: * -> * -> *) (f :: * -> *).
(HasCharset a, AsCharsetError e, Profunctor p, Contravariant f) =>
CharsetLookup -> Optic' p f a (Either e Text)
charsetText CharsetLookup
lookupCharset = forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to forall a b. (a -> b) -> a -> b
$ \a
a ->
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ forall b (m :: * -> *) t. MonadReader b m => AReview t b -> m t
review forall s. AsCharsetError s => Prism' s ()
_CharsetUnspecified ()) forall a b. b -> Either a b
Right (forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view forall a. HasCharset a => Getter a (Maybe (CI ByteString))
charsetName a
a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CI ByteString
k -> forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ forall b (m :: * -> *) t. MonadReader b m => AReview t b -> m t
review forall s. AsCharsetError s => Prism' s (CI ByteString)
_CharsetUnsupported CI ByteString
k) forall a b. b -> Either a b
Right (CharsetLookup
lookupCharset CI ByteString
k)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Charset
f -> forall (f :: * -> *) a. Applicative f => a -> f a
pure (Charset
f (forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view forall a. HasCharset a => Getter a ByteString
charsetData a
a))
charsetText'
:: (HasCharset a, Profunctor p, Contravariant f)
=> CharsetLookup
-> Optic' p f a (Either CharsetError T.Text)
charsetText' :: forall a (p :: * -> * -> *) (f :: * -> *).
(HasCharset a, Profunctor p, Contravariant f) =>
CharsetLookup -> Optic' p f a (Either CharsetError Text)
charsetText' = forall a e (p :: * -> * -> *) (f :: * -> *).
(HasCharset a, AsCharsetError e, Profunctor p, Contravariant f) =>
CharsetLookup -> Optic' p f a (Either e Text)
charsetText
charsetPrism :: forall a. (HasCharset a) => CharsetLookup -> Prism' a (Decoded a)
charsetPrism :: forall a. HasCharset a => CharsetLookup -> Prism' a (Decoded a)
charsetPrism CharsetLookup
m = forall b s a. (b -> s) -> (s -> Maybe a) -> Prism s s a b
prism' forall a. HasCharset a => Decoded a -> a
charsetEncode (forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall {x}. CharsetError -> Maybe x
err forall a. a -> Maybe a
Just forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view (forall a e.
(HasCharset a, AsCharsetError e) =>
CharsetLookup
-> forall (p :: * -> * -> *) (f :: * -> *).
(Profunctor p, Contravariant f) =>
Optic' p f a (Either e (Decoded a))
charsetDecoded CharsetLookup
m))
where
err :: CharsetError -> Maybe x
err = forall a b. a -> b -> a
const forall a. Maybe a
Nothing :: CharsetError -> Maybe x
charsets :: [(CI.CI B.ByteString, Charset)]
charsets :: [(CI ByteString, Charset)]
charsets =
[ (CI ByteString
"us-ascii", Charset
us_ascii)
, (CI ByteString
"utf-8", Charset
utf_8)
, (CI ByteString
"iso-8859-1", Charset
iso_8859_1)
, (CI ByteString
"ISO646-US", Charset
us_ascii)
, (CI ByteString
"ANSI_X3.4-1968", Charset
us_ascii)
, (CI ByteString
"iso-ir-6", Charset
us_ascii)
, (CI ByteString
"ANSI_X3.4-1986", Charset
us_ascii)
, (CI ByteString
"ISO_646.irv:1991", Charset
us_ascii)
, (CI ByteString
"us", Charset
us_ascii)
, (CI ByteString
"IBM367", Charset
us_ascii)
, (CI ByteString
"cp367", Charset
us_ascii)
, (CI ByteString
"csASCII.4-1968", Charset
us_ascii)
, (CI ByteString
"ascii", Charset
us_ascii)
, (CI ByteString
"iso-ir-100", Charset
iso_8859_1)
, (CI ByteString
"ISO_8859-1", Charset
iso_8859_1)
, (CI ByteString
"latin1", Charset
iso_8859_1)
, (CI ByteString
"l1", Charset
iso_8859_1)
, (CI ByteString
"IBM819", Charset
iso_8859_1)
, (CI ByteString
"CP819", Charset
iso_8859_1)
, (CI ByteString
"csISOLatin1", Charset
iso_8859_1)
, (CI ByteString
"csUTF8", Charset
utf_8)
, (CI ByteString
"UTF-16BE", Charset
utf16be)
, (CI ByteString
"UTF-16LE", Charset
utf16le)
, (CI ByteString
"UTF-16", Charset
utf16)
, (CI ByteString
"csUTF16BE", Charset
utf16be)
, (CI ByteString
"csUTF16LE", Charset
utf16le)
, (CI ByteString
"csUTF16", Charset
utf16)
, (CI ByteString
"UTF-32BE", Charset
utf32be)
, (CI ByteString
"UTF-32LE", Charset
utf32le)
, (CI ByteString
"UTF-32", Charset
utf32)
, (CI ByteString
"csUTF32BE", Charset
utf32be)
, (CI ByteString
"csUTF32LE", Charset
utf32le)
, (CI ByteString
"csUTF32", Charset
utf32)
]
us_ascii, utf_8, iso_8859_1 :: Charset
us_ascii :: Charset
us_ascii = Charset
decodeLenient
utf_8 :: Charset
utf_8 = Charset
decodeLenient
iso_8859_1 :: Charset
iso_8859_1 = Charset
T.decodeLatin1
utf16be, utf16le, utf16 :: Charset
utf16be :: Charset
utf16be = OnDecodeError -> Charset
T.decodeUtf16BEWith OnDecodeError
T.lenientDecode
utf16le :: Charset
utf16le = OnDecodeError -> Charset
T.decodeUtf16LEWith OnDecodeError
T.lenientDecode
utf16 :: Charset
utf16 ByteString
b = case Int -> ByteString -> ByteString
B.take Int
2 ByteString
b of
ByteString
"\xff\xfe" -> Charset
utf16le ByteString
b
ByteString
_ -> Charset
utf16be ByteString
b
utf32be, utf32le, utf32 :: Charset
utf32be :: Charset
utf32be = OnDecodeError -> Charset
T.decodeUtf32BEWith OnDecodeError
T.lenientDecode
utf32le :: Charset
utf32le = OnDecodeError -> Charset
T.decodeUtf32LEWith OnDecodeError
T.lenientDecode
utf32 :: Charset
utf32 ByteString
b = case Int -> ByteString -> ByteString
B.take Int
4 ByteString
b of
ByteString
"\xff\xfe\x00\x00" -> Charset
utf32le ByteString
b
ByteString
_ -> Charset
utf32be ByteString
b
type CharsetLookup = CI.CI B.ByteString -> Maybe Charset
defaultCharsets :: CharsetLookup
defaultCharsets :: CharsetLookup
defaultCharsets CI ByteString
k = forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup CI ByteString
k [(CI ByteString, Charset)]
charsets
decodeLenient :: B.ByteString -> T.Text
decodeLenient :: Charset
decodeLenient = OnDecodeError -> Charset
T.decodeUtf8With OnDecodeError
T.lenientDecode