module Text.XML.HXT.Parser.XmlParsec
    ( charData
    , charData'
    , comment
    , pI
    , cDSect
    , document
    , document'
    , prolog
    , xMLDecl
    , xMLDecl'
    , versionInfo
    , misc
    , doctypedecl
    , markupdecl
    , sDDecl
    , element
    , content
    , contentWithTextDecl
    , textDecl
    , encodingDecl
    , xread
    , parseXmlAttrValue
    , parseXmlContent
    , parseXmlDocEncodingSpec
    , parseXmlDocument
    , parseXmlDTDPart
    , parseXmlEncodingSpec
    , parseXmlEntityEncodingSpec
    , parseXmlGeneralEntityValue
    , parseXmlPart
    , parseXmlText
    , parseNMToken
    , parseName
    , removeEncodingSpec
    )
where
import Text.ParserCombinators.Parsec
    ( GenParser
    , Parser
    , parse
    , (<?>), (<|>)
    , char
    , string
    , eof 
    , between
    , many, many1
    , option
    , try
    , unexpected
    , getPosition
    , getInput
    , sourceName
    )
import Text.XML.HXT.DOM.ShowXml
    ( xshow
    )
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.XmlNode
    ( mkElement
    , mkAttr
    , mkRoot
    , mkDTDElem
    , mkText
    , mkCmt
    , mkCdata
    , mkError
    , mkPi
    , isText
    , isRoot
    , getText
    , getChildren
    , getAttrl
    , getAttrName
    , changeAttrl
    , mergeAttrl
    )
import Text.XML.HXT.Parser.XmlCharParser
    ( xmlChar
    )
import qualified Text.XML.HXT.Parser.XmlTokenParser	as XT
import qualified Text.XML.HXT.Parser.XmlDTDTokenParser	as XD
import Data.Char	(toLower)
import Data.Maybe
charData		:: GenParser Char state XmlTrees
charData
    = many (charData' <|> XT.referenceT)
charData'		:: GenParser Char state XmlTree
charData'
    = try ( do
	    t <- XT.allBut1 many1 (\ c -> not (c `elem` "<&")) "]]>"
	    return (mkText $! t)
	  )
comment		:: GenParser Char state XmlTree
comment
    = ( do
	c <- between (try $ string "<!--") (string "-->") (XT.allBut many "--")
	return (mkCmt $! c)
      ) <?> "comment"
pI		:: GenParser Char state XmlTree
pI
    = between (try $ string "<?") (string "?>")
      ( do
	n <- pITarget
	p <- option "" (do
			_ <- XT.sPace
			XT.allBut many "?>"
		       )
	return $ mkPi (mkName n) [mkAttr (mkName a_value) [mkText p]]
      ) <?> "processing instruction"
      where
      pITarget	:: GenParser Char state String
      pITarget = ( do
		   n <- XT.name
		   if map toLower n == t_xml
		      then unexpected n
		      else return n
		 )
cDSect		:: GenParser Char state XmlTree
cDSect
    = do
      t <- between ( try $ string "<![CDATA[") (string "]]>") (XT.allBut many "]]>")
      return (mkCdata $! t)
      <?> "CDATA section"
document	:: GenParser Char state XmlTree
document
    = do
      pos <- getPosition
      dl <- document'
      return $ mkRoot [ mkAttr (mkName a_source) [mkText (sourceName pos)]
		      , mkAttr (mkName a_status) [mkText (show c_ok)]
		      ] dl
document'	:: GenParser Char state XmlTrees
document'
    = do
      pl <- prolog
      el <- element
      ml <- many misc
      eof
      return (pl ++ [el] ++ ml)
prolog		:: GenParser Char state XmlTrees
prolog
    = do
      xml     <- option [] xMLDecl'
      misc1   <- many misc
      dtdPart <- option [] doctypedecl
      misc2   <- many misc
      return (xml ++ misc1 ++ dtdPart ++ misc2)
xMLDecl		:: GenParser Char state XmlTrees
xMLDecl
    = between (try $ string "<?xml") (string "?>")
      ( do
	vi <- versionInfo
	ed <- option [] encodingDecl
	sd <- option [] sDDecl
	XT.skipS0
	return (vi ++ ed ++ sd)
      )
      <?> "xml declaration"
xMLDecl'	:: GenParser Char state XmlTrees
xMLDecl'
    = do
      al <- xMLDecl
      return [mkPi (mkName t_xml) al]
xMLDecl''	:: GenParser Char state XmlTree
xMLDecl''
    = do
      al     <- option [] (try xMLDecl)
      return (mkRoot al [])
versionInfo	:: GenParser Char state XmlTrees
versionInfo
    = ( do
	_ <- try ( do
		   XT.skipS
		   XT.keyword a_version
		 )
	XT.eq
	vi <- XT.quoted XT.versionNum
	return [mkAttr (mkName a_version) [mkText vi]]
      )
      <?> "version info (with quoted version number)"
misc		:: GenParser Char state XmlTree
misc
    = comment
      <|>
      pI
      <|>
      ( ( do
	  ws <- XT.sPace
	  return (mkText ws)
	) <?> ""
      )
doctypedecl	:: GenParser Char state XmlTrees
doctypedecl
    = between (try $ string "<!DOCTYPE") (char '>')
      ( do
	XT.skipS
	n <- XT.name
	exId <- option [] ( try ( do
				  XT.skipS
				  externalID
				)
			  )
	XT.skipS0
	markup <- option []
	          ( do
		    m <- between (char '[' ) (char ']') markupOrDeclSep
		    XT.skipS0
		    return m
		  )
	return [mkDTDElem DOCTYPE ((a_name, n) : exId) markup]
      )
markupOrDeclSep	:: GenParser Char state XmlTrees
markupOrDeclSep
    = ( do
	ll <- many ( markupdecl
		     <|>
		     declSep
		     <|>
		     XT.mkList conditionalSect
		   )
	return (concat ll)
      )
declSep		:: GenParser Char state XmlTrees
declSep
    = XT.mkList XT.peReferenceT
      <|>
      ( do
	XT.skipS
	return []
      )
markupdecl	:: GenParser Char state XmlTrees
markupdecl
    = XT.mkList
      ( pI
	<|>
	comment
	<|>
	XD.dtdDeclTokenizer
      )
sDDecl		:: GenParser Char state XmlTrees
sDDecl
    = do
      _ <- try (do
		XT.skipS
		XT.keyword a_standalone
	       )
      XT.eq
      sd <- XT.quoted (XT.keywords [v_yes, v_no])
      return [mkAttr (mkName a_standalone) [mkText sd]]
element		:: GenParser Char state XmlTree
element
    = ( do
	e <- elementStart
	elementRest e
      ) <?> "element"
elementStart		:: GenParser Char state (String, [(String, XmlTrees)])
elementStart
    = do
      n <- ( try ( do
		   _ <- char '<'
		   n <- XT.name
		   return n
		 )
	     <?> "start tag"
	   )
      ass <- attrList
      XT.skipS0
      return (n, ass)
      where
      attrList
	  = option [] ( do
			XT.skipS
			attrList'
		      )
      attrList'
	  = option [] ( do
			a1 <- attribute
			al <- attrList
			let (n, _v) = a1
			if isJust . lookup n $ al
			  then unexpected ("attribute name " ++ show n ++ " occurs twice in attribute list")
			  else return (a1 : al)
		      )
elementRest	:: (String, [(String, XmlTrees)]) -> GenParser Char state XmlTree
elementRest (n, al)
    = ( do
	_ <- try $ string "/>"
	return $! (mkElement (mkName n) (map (mkA $!) al) [])
      )
      <|>
      ( do
	_ <- XT.gt
	c <- content
	eTag n
	return $! (mkElement (mkName n) (map (mkA $!) al) $! c)
      )
      <?> "proper attribute list followed by \"/>\" or \">\""
    where
    mkA (n', ts') = mkAttr (mkName n') ts'
eTag		:: String -> GenParser Char state ()
eTag n'
    = do
      _ <- try ( string "</" ) <?> ""
      n <- XT.name
      XT.skipS0
      _ <- XT.gt
      if n == n'
	 then return ()
	 else unexpected ("illegal end tag </" ++ n ++ "> found, </" ++ n' ++ "> expected")
attribute	:: GenParser Char state (String, XmlTrees)
attribute
    = do
      n <- XT.name
      XT.eq
      v <- XT.attrValueT
      return (n, v)
content		:: GenParser Char state XmlTrees
content
    = do
      c1 <- charData
      cl <- many
	    ( do
	      l <- ( element
		     <|>
		     cDSect
		     <|>
		     pI
		     <|>
		     comment
		   )
	      c <- charData
	      return (l : c)
	    )
      return (c1 ++ concat cl)
contentWithTextDecl	:: GenParser Char state XmlTrees
contentWithTextDecl
    = do
      _ <- option [] textDecl
      content
conditionalSect		:: GenParser Char state XmlTree
conditionalSect
    = do
      _ <- try $ string "<!["
      cs <- many XD.dtdToken
      _ <- char '['
      sect <- condSectCont
      return (mkDTDElem CONDSECT [(a_value, sect)] cs)
    where
    condSectCont	:: GenParser Char state String
    condSectCont
	= ( do
	    _ <- try $ string "]]>"
	    return ""
	  )
          <|>
	  ( do
	    _ <- try $ string "<!["
	    cs1 <- condSectCont
	    cs2 <- condSectCont
	    return ("<![" ++ cs1 ++ "]]>" ++ cs2)
	  )
	  <|>
	  ( do
	    c  <- xmlChar
	    cs <- condSectCont
	    return (c : cs)
	  )
externalID	:: GenParser Char state Attributes
externalID
    = ( do
	_ <- XT.keyword k_system
	XT.skipS
	lit <- XT.systemLiteral
	return [(k_system, lit)]
      )
      <|>
      ( do
	_ <- XT.keyword k_public
	XT.skipS
	pl <- XT.pubidLiteral
	XT.skipS
	sl <- XT.systemLiteral
	return [ (k_system, sl)
	       , (k_public, pl) ]
      )
      <?> "SYSTEM or PUBLIC declaration"
textDecl	:: GenParser Char state XmlTrees
textDecl
    = between (try $ string "<?xml") (string "?>")
      ( do
	vi <- option [] versionInfo
	ed <- encodingDecl
	XT.skipS0
	return (vi ++ ed)
      )
      <?> "text declaration"
textDecl''	:: GenParser Char state XmlTree
textDecl''
    = do
      al    <- option [] (try textDecl)
      return (mkRoot al [])
encodingDecl	:: GenParser Char state XmlTrees
encodingDecl
    = do
      _ <- try ( do
		 XT.skipS
		 XT.keyword a_encoding
	       )
      XT.eq
      ed <- XT.quoted XT.encName
      return [mkAttr (mkName a_encoding) [mkText ed]]
xread			:: String -> XmlTrees
xread str
    = parseXmlFromString parser loc str
    where
    loc = "string: " ++ show (if length str > 40 then take 40 str ++ "..." else str)
    parser = do
	     res <- content		
	     eof			
	     return res
parseXmlContent		:: XmlTree -> XmlTrees
parseXmlContent
    = xread . xshow . (:[])
parseXmlText		:: Parser XmlTrees -> String -> XmlTree -> XmlTrees
parseXmlText p loc	= parseXmlFromString p loc . xshow . (:[])
parseXmlDocument	:: String -> String -> XmlTrees
parseXmlDocument	= parseXmlFromString document'
parseXmlFromString	:: Parser XmlTrees -> String -> String -> XmlTrees
parseXmlFromString parser loc
    = either ((:[]) . mkError c_err . (++ "\n") . show) id . parse parser loc
removeEncodingSpec	:: XmlTree -> XmlTrees
removeEncodingSpec t
    | isText t
	= ( either ((:[]) . mkError c_err . (++ "\n") . show) ((:[]) . mkText)
	    . parse parser "remove encoding spec"
	    . fromMaybe ""
	    . getText
	  ) t
    | otherwise
	= [t]
    where
    parser :: Parser String
    parser = do
	     _ <- option [] textDecl
	     getInput
parseXmlPart	:: Parser XmlTrees -> String -> String -> XmlTree -> XmlTrees
parseXmlPart parser expected context t
    = parseXmlText ( do
		     res <- parser
		     eof <?> expected
		     return res
		   ) context
      $ t
parseXmlDTDPart	:: String -> XmlTree -> XmlTrees
parseXmlDTDPart
    = parseXmlPart markupOrDeclSep "markup declaration"
parseXmlGeneralEntityValue	:: String -> XmlTree -> XmlTrees
parseXmlGeneralEntityValue
    = parseXmlPart content "general entity value"
parseXmlAttrValue	:: String -> XmlTree -> XmlTrees
parseXmlAttrValue
    = parseXmlPart (XT.attrValueT' "<&") "attribute value"
parseNMToken		:: String -> XmlTree -> XmlTrees
parseNMToken
    = parseXmlPart (many1 XT.nmtokenT) "nmtoken"
parseName		:: String -> XmlTree -> XmlTrees
parseName
    = parseXmlPart (many1 XT.nameT) "name"
parseXmlEncodingSpec	:: Parser XmlTree -> XmlTree -> XmlTrees
parseXmlEncodingSpec encDecl x
    = (:[]) .
      ( if isRoot x
	then parseEncSpec
	else id
      ) $ x
    where
    parseEncSpec r
	= case ( parse encDecl source . xshow . getChildren $ r ) of
	  Right t
	      -> changeAttrl (mergeAttrl . fromMaybe [] . getAttrl $ t) r
	  Left _
	      -> r
	where
	
	source = xshow . concat . map getChildren . filter ((== a_source) . maybe "" qualifiedName . getAttrName) . fromMaybe [] . getAttrl $ r
parseXmlEntityEncodingSpec	:: XmlTree -> XmlTrees
parseXmlEntityEncodingSpec	= parseXmlEncodingSpec textDecl''
parseXmlDocEncodingSpec		:: XmlTree -> XmlTrees
parseXmlDocEncodingSpec		= parseXmlEncodingSpec xMLDecl''