-- | Detection and of character encodings of HTTP message bodies
module Network.HTTP.Encoding.Character 
       (getContentTypeAndCharacterEncoding
       ,setCharacterEncoding
       ,tryAsUTF8) where

import Network.HTTP
import Codec.Text.IConv
import Codec.MIME.Parse
import Codec.MIME.Type
import Control.Applicative hiding (many, (<|>))
import Data.Char (toLower, ord)
import Data.ByteString.Lazy.UTF8 (uncons)
import Data.ByteString.Lazy (ByteString)
import Control.Monad

-- | Looks for and parses the ContentType header. Returns the
-- (optional) content-type and (optional) the character encoding name.
getContentTypeAndCharacterEncoding :: [Header] -> 
                                      (Maybe Type, Maybe EncodingName)
getContentTypeAndCharacterEncoding [] = (Nothing, Nothing)
getContentTypeAndCharacterEncoding (Header HdrContentType str:_) = 
  parseContentTypeHdr str
getContentTypeAndCharacterEncoding (_:hs) = getContentTypeAndCharacterEncoding hs

-- | Sets the given character encoding name in the given header. If
-- there is no content type header in the header list, it defaults to
-- the text/plain content type
setCharacterEncoding :: EncodingName -> [Header] -> [Header]
setCharacterEncoding enc [] = [plainText enc]
setCharacterEncoding enc ((Header HdrContentType str):rest) =
  let (mtype, _) = parseContentTypeHdr str
      newhdr = case mtype of
        Nothing -> plainText enc
        Just ty -> Header HdrContentType $ showType $ setMIMEEncoding ty enc
  in newhdr:rest

plainText :: EncodingName -> Header
plainText enc = Header HdrContentType $ showType $
                Type {mimeType = (Text "plain")
                     ,mimeParams = [("charset", enc)]}

setMIMEEncoding :: Type -> EncodingName -> Type
setMIMEEncoding ty enc = ty {mimeParams = map replaceEnc $ mimeParams ty}
  where replaceEnc (pname, _) | pname == "charset" = (pname, enc)
        replaceEnc x = x

parseContentTypeHdr :: String -> (Maybe Type, Maybe EncodingName)
parseContentTypeHdr str = 
  case parseContentType str of
    Nothing    -> (Nothing, Nothing)
    Just ctype -> (Just ctype, 
                   (map toLower) <$> lookup "charset" (mimeParams ctype))

-- | Tries to decode a bytestring as UTF-8. Returns nothing if any
-- illegal characters are encountered
tryAsUTF8 :: ByteString -> Maybe String
tryAsUTF8 bs = uncons bs >>= \(c, bs') ->
  if (ord c == 0xFFFD) then mzero
  else tryAsUTF8 bs' >>= \s -> return (c:s)