{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Data.MIME.Charset
(
HasCharset(..)
, CharsetName
, charsetText
, charsetText'
, CharsetError(..)
, AsCharsetError(..)
, charsetPrism
, CharsetLookup
, defaultCharsets
, decodeLenient
) where
import Control.Lens (Getter, Prism', prism', review, to, view)
import qualified Data.ByteString as B
import qualified Data.CaseInsensitive as CI
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as T
{-# ANN module ("HLint: ignore Use camelCase" :: String) #-}
type CharsetName = CI.CI B.ByteString
type Charset = B.ByteString -> T.Text
data CharsetError
= CharsetUnspecified
| CharsetUnsupported CharsetName
| CharsetDecodeError CharsetName
deriving (Show)
class AsCharsetError s where
_CharsetError :: Prism' s CharsetError
_CharsetUnspecified :: Prism' s ()
_CharsetUnsupported :: Prism' s CharsetName
_CharsetDecodeError :: Prism' s CharsetName
_CharsetUnspecified = _CharsetError . _CharsetUnspecified
_CharsetUnsupported = _CharsetError . _CharsetUnsupported
_CharsetDecodeError = _CharsetError . _CharsetDecodeError
instance AsCharsetError CharsetError where
_CharsetError = id
_CharsetUnspecified = prism' (const CharsetUnspecified) $ \case
CharsetUnspecified -> Just () ; _ -> Nothing
_CharsetUnsupported = prism' CharsetUnsupported $ \case
CharsetUnsupported k -> Just k ; _ -> Nothing
_CharsetDecodeError = prism' CharsetDecodeError $ \case
CharsetDecodeError k -> Just k ; _ -> Nothing
class HasCharset a where
type Decoded a
charsetName :: Getter a (Maybe CharsetName)
charsetData :: Getter a B.ByteString
charsetDecoded :: AsCharsetError e => CharsetLookup -> Getter a (Either e (Decoded a))
charsetEncode :: Decoded a -> a
charsetText
:: (HasCharset a, AsCharsetError e)
=> CharsetLookup -> Getter a (Either e T.Text)
charsetText lookupCharset = to $ \a ->
maybe (Left $ review _CharsetUnspecified ()) Right (view charsetName a)
>>= \k -> maybe (Left $ review _CharsetUnsupported k) Right (lookupCharset k)
>>= \f -> pure (f (view charsetData a))
charsetText'
:: (HasCharset a)
=> CharsetLookup
-> Getter a (Either CharsetError T.Text)
charsetText' = charsetText
charsetPrism :: forall a. (HasCharset a) => CharsetLookup -> Prism' a (Decoded a)
charsetPrism m = prism' charsetEncode (either (const Nothing) Just . view l)
where
l = charsetDecoded m :: Getter a (Either CharsetError (Decoded a))
charsets :: [(CI.CI B.ByteString, Charset)]
charsets =
[ ("us-ascii", us_ascii)
, ("utf-8", utf_8)
, ("iso-8859-1", iso_8859_1)
, ("ISO646-US", us_ascii)
, ("ANSI_X3.4-1968", us_ascii)
]
us_ascii, utf_8, iso_8859_1 :: Charset
us_ascii = decodeLenient
utf_8 = decodeLenient
iso_8859_1 = T.decodeLatin1
type CharsetLookup = CI.CI B.ByteString -> Maybe Charset
defaultCharsets :: CharsetLookup
defaultCharsets k = lookup k charsets
decodeLenient :: B.ByteString -> T.Text
decodeLenient = T.decodeUtf8With T.lenientDecode