{-# LANGUAGE OverloadedStrings #-} -- | Handles server-specified text decoding. module Network.URI.Charset(resolveCharset, convertCharset, charsets) where import Data.Text (Text) import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import Data.Text.Encoding import Debug.Trace (trace) -- | If the MIMEtype specifies a charset parameter, apply it. resolveCharset :: [String] -- ^ The MIMEtype, split by ';' -> ByteString -- ^ The bytes received from the server -> (String, Either Text ByteString) -- ^ The MIMEtype (minus parameters) & possibly decoded text, to be returned from protocol handlers. resolveCharset (mime:('c':'h':'a':'r':'s':'e':'t':'=':charset):_) response = (mime, Left $ convertCharset charset $ B.toStrict response) resolveCharset (mime:_:params) response = resolveCharset (mime:params) response resolveCharset [mime] response = (mime, Right $ response) resolveCharset [] response = ("text/plain", Left "Filetype unspecified") -- | Decodes bytes according to a charset identified by it's IANA-assigned name(s). convertCharset "iso-8859-1" = decodeLatin1 convertCharset "latin1" = decodeLatin1 convertCharset "us-ascii" = decodeUtf8With replaceChar convertCharset "utf-8" = decodeUtf8With replaceChar convertCharset "utf-16be" = decodeUtf16BEWith replaceChar convertCharset "utf-16le" = decodeUtf16LEWith replaceChar convertCharset "utf-16" = decodeUtf16LEWith replaceChar convertCharset "utf-32be" = decodeUtf32BEWith replaceChar convertCharset "utf-32le" = decodeUtf32LEWith replaceChar convertCharset "utf-32" = decodeUtf32LEWith replaceChar convertCharset charset = -- FIXME Is this the best fallback for unsupported charsets? trace ("Unsupported text encoding" ++ charset) $ decodeUtf8With replaceChar replaceChar error _ = trace error $ Just '�' -- | Lists all charsets supported by convertCharset charsets :: [Text] charsets = ["iso-8859-1", "latin1", "us-ascii", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32be", "utf-32le", "utf-32"]