-- | Detection and of character encodings of HTTP message bodies module Network.HTTP.Encoding.Character (getContentTypeAndCharacterEncoding ,setCharacterEncoding ,tryAsUTF8) where import Network.HTTP import Codec.Text.IConv import Codec.MIME.Parse import Codec.MIME.Type import Control.Applicative hiding (many, (<|>)) import Data.Char (toLower, ord) import Data.ByteString.Lazy.UTF8 (uncons) import Data.ByteString.Lazy (ByteString) import Control.Monad -- | Looks for and parses the ContentType header. Returns the -- (optional) content-type and (optional) the character encoding name. getContentTypeAndCharacterEncoding :: [Header] -> (Maybe Type, Maybe EncodingName) getContentTypeAndCharacterEncoding [] = (Nothing, Nothing) getContentTypeAndCharacterEncoding (Header HdrContentType str:_) = parseContentTypeHdr str getContentTypeAndCharacterEncoding (_:hs) = getContentTypeAndCharacterEncoding hs -- | Sets the given character encoding name in the given header. If -- there is no content type header in the header list, it defaults to -- the text/plain content type setCharacterEncoding :: EncodingName -> [Header] -> [Header] setCharacterEncoding enc [] = [plainText enc] setCharacterEncoding enc ((Header HdrContentType str):rest) = let (mtype, _) = parseContentTypeHdr str newhdr = case mtype of Nothing -> plainText enc Just ty -> Header HdrContentType $ showType $ setMIMEEncoding ty enc in newhdr:rest plainText :: EncodingName -> Header plainText enc = Header HdrContentType $ showType $ Type {mimeType = (Text "plain") ,mimeParams = [("charset", enc)]} setMIMEEncoding :: Type -> EncodingName -> Type setMIMEEncoding ty enc = ty {mimeParams = map replaceEnc $ mimeParams ty} where replaceEnc (pname, _) | pname == "charset" = (pname, enc) replaceEnc x = x parseContentTypeHdr :: String -> (Maybe Type, Maybe EncodingName) parseContentTypeHdr str = case parseContentType str of Nothing -> (Nothing, Nothing) Just ctype -> (Just ctype, (map toLower) <$> lookup "charset" (mimeParams ctype)) -- | Tries to decode a bytestring as UTF-8. Returns nothing if any -- illegal characters are encountered tryAsUTF8 :: ByteString -> Maybe String tryAsUTF8 bs = uncons bs >>= \(c, bs') -> if (ord c == 0xFFFD) then mzero else tryAsUTF8 bs' >>= \s -> return (c:s)