{-# LANGUAGE OverloadedStrings #-}

-- | Handles server-specified text decoding.
module Network.URI.Charset(resolveCharset, resolveCharset', convertCharset, charsets) where
import           Data.Text (Text)
import           Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import           Data.Text.Encoding
import           Debug.Trace (trace)
import           Data.List (intercalate)

-- | If the MIMEtype specifies a charset parameter, apply it.
resolveCharset :: [String] -- ^ The MIMEtype, split by ';'
    -> ByteString -- ^ The bytes received from the server
    -> (String, Either Text ByteString) -- ^ The MIMEtype (minus parameters) & possibly decoded text, to be returned from protocol handlers.
resolveCharset :: [String] -> ByteString -> (String, Either Text ByteString)
resolveCharset (String
mime:(Char
'c':Char
'h':Char
'a':Char
'r':Char
's':Char
'e':Char
't':Char
'=':String
charset):[String]
params) ByteString
response =
    (String -> [String] -> String
parameterizedMIME String
mime [String]
params, Text -> Either Text ByteString
forall a b. a -> Either a b
Left (Text -> Either Text ByteString) -> Text -> Either Text ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString -> Text
convertCharset String
charset (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
B.toStrict ByteString
response)
resolveCharset (String
mime:String
param:[String]
params) ByteString
response =
    [String] -> ByteString -> (String, Either Text ByteString)
resolveCharset (String -> [String] -> String
parameterizedMIME String
mime [String
param]String -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
params) ByteString
response
resolveCharset [String
mime] ByteString
response = (String
mime, ByteString -> Either Text ByteString
forall a b. b -> Either a b
Right (ByteString -> Either Text ByteString)
-> ByteString -> Either Text ByteString
forall a b. (a -> b) -> a -> b
$ ByteString
response)
-- NOTE I can't localize this error string because resolveCharset doesn't know the locale.
--      I don't think this is worth fixing, because hitting this indicates the server is badly misbehaving.
resolveCharset [] ByteString
response = (String
"text/x-error\t", Text -> Either Text ByteString
forall a b. a -> Either a b
Left Text
"Filetype unspecified")

parameterizedMIME :: String -> [String] -> String
parameterizedMIME String
mime [String]
params = String
mime String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
";" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
";" [String]
params

-- | As per `resolveCharset`, but also returns given URI (or other type).
resolveCharset' :: a -> [String] -> ByteString -> (a, String, Either Text ByteString)
resolveCharset' :: a -> [String] -> ByteString -> (a, String, Either Text ByteString)
resolveCharset' a
a [String]
mimes ByteString
resp = let (String
mime, Either Text ByteString
resp') = [String] -> ByteString -> (String, Either Text ByteString)
resolveCharset [String]
mimes ByteString
resp in (a
a, String
mime, Either Text ByteString
resp')

-- | Decodes bytes according to a charset identified by it's IANA-assigned name(s).
convertCharset :: String -> ByteString -> Text
convertCharset String
"iso-8859-1" = ByteString -> Text
decodeLatin1
convertCharset String
"latin1" = ByteString -> Text
decodeLatin1
convertCharset String
"us-ascii" = OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
forall p p. p -> p -> Maybe Char
replaceChar
convertCharset String
"utf-8" = OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
forall p p. p -> p -> Maybe Char
replaceChar
convertCharset String
"utf-16be" = OnDecodeError -> ByteString -> Text
decodeUtf16BEWith OnDecodeError
forall p p. p -> p -> Maybe Char
replaceChar
convertCharset String
"utf-16le" = OnDecodeError -> ByteString -> Text
decodeUtf16LEWith OnDecodeError
forall p p. p -> p -> Maybe Char
replaceChar
convertCharset String
"utf-16" = OnDecodeError -> ByteString -> Text
decodeUtf16LEWith OnDecodeError
forall p p. p -> p -> Maybe Char
replaceChar
convertCharset String
"utf-32be" = OnDecodeError -> ByteString -> Text
decodeUtf32BEWith OnDecodeError
forall p p. p -> p -> Maybe Char
replaceChar
convertCharset String
"utf-32le" = OnDecodeError -> ByteString -> Text
decodeUtf32LEWith OnDecodeError
forall p p. p -> p -> Maybe Char
replaceChar
convertCharset String
"utf-32" = OnDecodeError -> ByteString -> Text
decodeUtf32LEWith OnDecodeError
forall p p. p -> p -> Maybe Char
replaceChar
convertCharset String
charset = -- FIXME Is this the best fallback for unsupported charsets?
    String -> (ByteString -> Text) -> ByteString -> Text
forall a. String -> a -> a
trace (String
"Unsupported text encoding" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
charset) ((ByteString -> Text) -> ByteString -> Text)
-> (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
forall p p. p -> p -> Maybe Char
replaceChar

replaceChar :: p -> p -> Maybe Char
replaceChar p
_ p
_ = Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'�'

-- | Lists all charsets supported by convertCharset
charsets :: [Text]
charsets :: [Text]
charsets = [Text
"iso-8859-1", Text
"latin1", Text
"us-ascii", Text
"utf-8", Text
"utf-16be", Text
"utf-16le", Text
"utf-16", Text
"utf-32be", Text
"utf-32le", Text
"utf-32"]