-- ------------------------------------------------------------

{-
   Module     : Text.XML.HXT.Parser.ProtocolHandlerUtil
   Copyright  : Copyright (C) 2008 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   Protocol handler utility functions

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Parser.ProtocolHandlerUtil
    ( parseContentType
    )

where

import Text.XML.HXT.DOM.XmlKeywords

import Text.XML.HXT.DOM.Util    ( stringToUpper
                                , stringTrim
                                )

import qualified Text.ParserCombinators.Parsec as P

-- ------------------------------------------------------------

-- |
-- Try to extract charset spec from Content-Type header
-- e.g. \"text\/html; charset=ISO-8859-1\"
--
-- Sometimes the server deliver the charset spec in quotes
-- these are removed

parseContentType        :: P.Parser [(String, String)]
parseContentType
    = P.try ( do
              mimeType <- ( do
                            mt <- P.many (P.noneOf ";")
                            rtMT mt
                          )
              charset  <- ( do
                            _ <- P.char ';'
                            _ <- P.many  (P.oneOf " \t'")
                            _ <- P.string "charset="
                            _ <- P.option '"' (P.oneOf "\"'")
                            cs <- P.many1 (P.noneOf "\"'")
                            return [ (transferEncoding, stringToUpper cs) ]
                          )
              return (mimeType ++ charset)
            )
      P.<|>
      ( do
        mt <- P.many (P.noneOf ";")
        rtMT mt
      )
    where
    rtMT mt = return [ (transferMimeType, stringTrim mt) ]

-- ------------------------------------------------------------