{-# LANGUAGE OverloadedStrings #-} -- | Handles server-specified text decoding. module Network.URI.Charset(resolveCharset, resolveCharset', convertCharset, charsets) where import Data.Text (Text) import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import Data.Text.Encoding import Debug.Trace (trace) -- | If the MIMEtype specifies a charset parameter, apply it. resolveCharset :: [String] -- ^ The MIMEtype, split by ';' -> ByteString -- ^ The bytes received from the server -> (String, Either Text ByteString) -- ^ The MIMEtype (minus parameters) & possibly decoded text, to be returned from protocol handlers. resolveCharset (mime:('c':'h':'a':'r':'s':'e':'t':'=':charset):_) response = (mime, Left $ convertCharset charset $ B.toStrict response) resolveCharset (mime:_:params) response = resolveCharset (mime:params) response resolveCharset [mime] response = (mime, Right $ response) -- NOTE I can't localize this error string because resolveCharset doesn't know the locale. -- I don't think this is worth fixing, because hitting this indicates the server is badly misbehaving. resolveCharset [] response = ("text/x-error\t", Left "Filetype unspecified") -- | As per `resolveCharset`, but also returns given URI (or other type). resolveCharset' :: a -> [String] -> ByteString -> (a, String, Either Text ByteString) resolveCharset' a mimes resp = let (mime, resp') = resolveCharset mimes resp in (a, mime, resp') -- | Decodes bytes according to a charset identified by it's IANA-assigned name(s). convertCharset "iso-8859-1" = decodeLatin1 convertCharset "latin1" = decodeLatin1 convertCharset "us-ascii" = decodeUtf8With replaceChar convertCharset "utf-8" = decodeUtf8With replaceChar convertCharset "utf-16be" = decodeUtf16BEWith replaceChar convertCharset "utf-16le" = decodeUtf16LEWith replaceChar convertCharset "utf-16" = decodeUtf16LEWith replaceChar convertCharset "utf-32be" = decodeUtf32BEWith replaceChar convertCharset "utf-32le" = decodeUtf32LEWith replaceChar convertCharset "utf-32" = decodeUtf32LEWith replaceChar convertCharset charset = -- FIXME Is this the best fallback for unsupported charsets? trace ("Unsupported text encoding" ++ charset) $ decodeUtf8With replaceChar replaceChar _ _ = Just '�' -- | Lists all charsets supported by convertCharset charsets :: [Text] charsets = ["iso-8859-1", "latin1", "us-ascii", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32be", "utf-32le", "utf-32"]