module Text.XML.HaXml.Parse
  (
  
    xmlParse, xmlParse'
  
  , dtdParse, dtdParse'
  ) where
import Prelude hiding (either,maybe,sequence)
import qualified Prelude (either)
import Maybe hiding (maybe)
import List (intersperse)       
import Char (isSpace,isDigit,isHexDigit)
import Monad hiding (sequence)
import Numeric (readDec,readHex)
import Text.XML.HaXml.Types
import Text.XML.HaXml.Lex
import Text.ParserCombinators.HuttonMeijerWallace
#if ( defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ > 502 ) || \
    ( defined(__NHC__) && __NHC__ > 114 ) || defined(__HUGS__)
import System.IO.Unsafe (unsafePerformIO)
#elif defined(__GLASGOW_HASKELL__)
import IOExts (unsafePerformIO)
#elif defined(__NHC__)
import IOExtras (unsafePerformIO)
#elif defined(__HBC__)
import UnsafePerformIO
#endif
#if defined(DEBUG)
#  if ( defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ > 502 ) || \
      ( defined(__NHC__) && __NHC__ > 114 ) || defined(__HUGS__)
import Debug.Trace(trace)
#  elif defined(__GLASGOW_HASKELL__)
import IOExts(trace)
#  elif defined(__NHC__) || defined(__HBC__)
import NonStdTrace
#  endif
debug :: a -> String -> a
v `debug` s = trace s v
#else
v `debug` s = v
#endif
xmlParse :: String -> String -> Document
xmlParse' :: String -> String -> Either String Document
dtdParse  :: String -> String -> Maybe DocTypeDecl
dtdParse' :: String -> String -> Either String (Maybe DocTypeDecl)
xmlParse  name  = Prelude.either error id . xmlParse' name
dtdParse  name  = Prelude.either error id . dtdParse' name
xmlParse' name  = sanitycheck . papply' (toEOF document) emptySTs . xmlLex name
dtdParse' name  = sanitycheck . papply' justDTD  emptySTs . xmlLex name
sanitycheck :: Show p => Either String [(a,s,[Either String (p,t)])]
                         -> Either String a
sanitycheck (Left err)          = Left err
sanitycheck (Right [])          = Left "***Error, no parse: document not XML?"
sanitycheck (Right ((x,_,_):_)) = Right x
type SymTabs = (SymTab PEDef, SymTab EntityDef)
emptySTs :: SymTabs
emptySTs = (emptyST, emptyST)
addPE :: String -> PEDef -> SymTabs -> SymTabs
addPE n v (pe,ge) = (addST n v pe, ge)
addGE :: String -> EntityDef -> SymTabs -> SymTabs
addGE n v (pe,ge) = let newge = addST n v ge in newge `seq` (pe, newge)
lookupPE :: String -> SymTabs -> Maybe PEDef
lookupPE s (pe,ge) = lookupST s pe
flattenEV :: EntityValue -> String
flattenEV (EntityValue evs) = concatMap flatten evs
  where
    flatten (EVString s)          = s
    flatten (EVRef (RefEntity r)) = "&" ++r++";"
    flatten (EVRef (RefChar r))   = "&#"++show r++";"
fst3 (a,_,_) = a
snd3 (_,a,_) = a
thd3 (_,_,a) = a
type XParser a = Parser SymTabs (Posn,TokenT) String a
name :: XParser Name
name = do {(p,TokName s) <- item; return s}
string, freetext :: XParser String
string   = do {(p,TokName s) <- item; return s}
freetext = do {(p,TokFreeText s) <- item; return s}
maybe :: XParser a -> XParser (Maybe a)
maybe p =
    ( p >>= return . Just) +++
    ( return Nothing)
either :: XParser a -> XParser b -> XParser (Either a b)
either p q =
    ( p >>= return . Left) +++
    ( q >>= return . Right)
word :: String -> XParser ()
word s = P (\st inp-> case inp of {
               (Left err: _) -> Left err;
               (Right (p,TokName n):ts) -> if s==n then Right [((),st,ts)]
                                                   else Right [];
               (Right (p,TokFreeText n):ts) -> if s==n then Right [((),st,ts)]
                                                       else Right [];
               ts -> Right [] } )
posn :: XParser Posn
posn = P (\st inp-> case inp of {
                     (Left err:_)    -> Left  err;
                     (Right (p,_):_) -> Right [(p,st,inp)];
                     []              -> Right [] } )
nmtoken :: XParser NmToken
nmtoken = (string +++ freetext)
peRef :: XParser a -> XParser a
peRef p =
    p +++
    ( do pn <- posn
         n <- pereference
         tr <- stquery (lookupPE n) `debug` ("Looking up %"++n)
         case tr of
           (Just (PEDefEntityValue ev)) ->
                      do reparse (xmlReLex (posInNewCxt ("macro %"++n++";")
                                                        (Just pn))
                                           (flattenEV ev))
                               `debug` ("  defn:  "++flattenEV ev)
                         peRef p
           (Just (PEDefExternalID (PUBLIC _ (SystemLiteral f)))) ->
                      do let val = unsafePerformIO (readFile f)
                         reparse (xmlReLex (posInNewCxt ("file "++f)
                                                        (Just pn)) val)
                               `debug` ("  reading from file "++f)
                         peRef p
           (Just (PEDefExternalID (SYSTEM (SystemLiteral f)))) ->
                      do let val = unsafePerformIO (readFile f)
                         reparse (xmlReLex (posInNewCxt ("file "++f)
                                                        (Just pn)) val)
                               `debug` ("  reading from file "++f)
                         peRef p
           Nothing -> mzero `elserror` "PEReference use before definition" )
blank :: XParser a -> XParser a
blank p =
    p +++
    ( do n <- pereference
         tr <- stquery (lookupPE n) `debug` ("Looking up %"++n++" (is blank?)")
         case tr of
           (Just (PEDefEntityValue ev))
                    | all isSpace (flattenEV ev)  ->
                            do blank p `debug` "Empty macro definition"
           (Just _) -> mzero
           Nothing  -> mzero `elserror` "PEReference use before definition" )
justDTD :: XParser (Maybe DocTypeDecl)
justDTD =
  ( do (ExtSubset _ ds) <- extsubset `debug` "Trying external subset"
       if null ds then mzero
         else return (Just (DTD "extsubset" Nothing (concatMap extract ds)))
  ) +++
  ( do (Prolog _ _ dtd _) <- prolog
       return dtd )
 where extract (ExtMarkupDecl m) = [m]
       extract (ExtConditionalSect (IncludeSect i)) = concatMap extract i
       extract (ExtConditionalSect (IgnoreSect i)) = []
document :: XParser Document
document = do
    p <- prolog `elserror` "unrecognisable XML prolog"
    e <- element `elserror` "no toplevel document element"
    ms <- many misc
    (_,ge) <- stget
    return (Document p ge e ms)
comment :: XParser Comment
comment = do
    bracket (tok TokCommentOpen) freetext (tok TokCommentClose)
processinginstruction :: XParser ProcessingInstruction
processinginstruction = do
    tok TokPIOpen
    n <- string  `elserror` "processing instruction has no target"
    f <- freetext
    tok TokPIClose `elserror` "missing ?>"
    return (n, f)
cdsect :: XParser CDSect
cdsect = do
    tok TokSectionOpen
    bracket (tok (TokSection CDATAx)) chardata (tok TokSectionClose)
prolog :: XParser Prolog
prolog = do
    x <- maybe xmldecl
    m1 <- many misc
    dtd <- maybe doctypedecl
    m2 <- many misc
    return (Prolog x m1 dtd m2)
xmldecl :: XParser XMLDecl
xmldecl = do
    tok TokPIOpen
    (word "xml" +++ word "XML")
    p <- posn
    s <- freetext
    tok TokPIClose `elserror` "missing ?> in <?xml ...?>"
    raise ((papply' aux emptySTs . xmlReLex p) s)
  where
    aux = do
      v <- versioninfo  `elserror` "missing XML version info"
      e <- maybe encodingdecl
      s <- maybe sddecl
      return (XMLDecl v e s)
    raise (Left err) = mzero `elserror` err
    raise (Right ok) = (return . fst3 . head) ok
versioninfo :: XParser VersionInfo
versioninfo = do
    (word "version" +++ word "VERSION")
    tok TokEqual
    bracket (tok TokQuote) freetext (tok TokQuote)
misc :: XParser Misc
misc =
    ( comment >>= return . Comment) +++
    ( processinginstruction >>= return . PI)
doctypedecl :: XParser DocTypeDecl
doctypedecl = do
    tok TokSpecialOpen
    tok (TokSpecial DOCTYPEx)
    n <- name
    eid <- maybe externalid
    es <- maybe (bracket (tok TokSqOpen)
                         (many (peRef markupdecl))
                         (tok TokSqClose))
    blank (tok TokAnyClose)  `elserror` "missing > in DOCTYPE decl"
    return (DTD n eid (case es of { Nothing -> []; Just e -> e }))
markupdecl :: XParser MarkupDecl
markupdecl =
    ( elementdecl >>= return . Element) +++
    ( attlistdecl >>= return . AttList) +++
    ( entitydecl >>= return . Entity) +++
    ( notationdecl >>= return . Notation) +++
    ( misc >>= return . MarkupMisc)
extsubset :: XParser ExtSubset
extsubset = do
    td <- maybe textdecl
    ds <- many (peRef extsubsetdecl)
    return (ExtSubset td ds)
extsubsetdecl :: XParser ExtSubsetDecl
extsubsetdecl =
    ( markupdecl >>= return . ExtMarkupDecl) +++
    ( conditionalsect >>= return . ExtConditionalSect)
sddecl :: XParser SDDecl
sddecl = do
    (word "standalone" +++ word "STANDALONE")
    tok TokEqual `elserror` "missing = in 'standalone' decl"
    bracket (tok TokQuote)
            ( (word "yes" >> return True) +++
              (word "no" >> return False) `elserror`
              "'standalone' decl requires 'yes' or 'no' value" )
            (tok TokQuote)
element :: XParser Element
element = do
    tok TokAnyOpen
    (ElemTag n as) <- elemtag
    (( do tok TokEndClose
          return (Elem n as [])) +++
     ( do tok TokAnyClose
          cs <- many content
          p <- posn
          m <- bracket (tok TokEndOpen) name (tok TokAnyClose)
          checkmatch p n m
          return (Elem n as cs))
     `elserror` "missing > or /> in element tag")
checkmatch :: Posn -> Name -> Name -> XParser ()
checkmatch p n m =
  if n == m then return ()
  else mzero `elserror` ("tag <"++n++"> terminated by </"++m++">")
elemtag :: XParser ElemTag
elemtag = do
    n <- name `elserror` "malformed element tag"
    as <- many attribute
    return (ElemTag n as)
attribute :: XParser Attribute
attribute = do
    n <- name
    tok TokEqual `elserror` "missing = in attribute"
    v <- attvalue `elserror` "missing attvalue"
    return (n,v)
content :: XParser Content
content =
    ( element >>= return . CElem) +++
    ( chardata >>= return . CString False) +++
    ( reference >>= return . CRef) +++
    ( cdsect >>= return . CString True) +++
    ( misc >>= return . CMisc)
elementdecl :: XParser ElementDecl
elementdecl = do
    tok TokSpecialOpen
    tok (TokSpecial ELEMENTx)
    n <- peRef name `elserror` "missing identifier in ELEMENT decl"
    c <- peRef contentspec `elserror` "missing content spec in ELEMENT decl"
    blank (tok TokAnyClose) `elserror`
       ("expected > terminating ELEMENT decl"
       ++"\n    element name was "++show n
       ++"\n    contentspec was "++(\(ContentSpec p)-> show p)c)
    return (ElementDecl n c)
contentspec :: XParser ContentSpec
contentspec =
    ( peRef (word "EMPTY") >> return EMPTY) +++
    ( peRef (word "ANY") >> return ANY) +++
    ( peRef mixed >>= return . Mixed) +++
    ( peRef cp >>= return . ContentSpec)
choice :: XParser [CP]
choice = do
    bracket (tok TokBraOpen `debug` "Trying choice")
            (peRef cp `sepby1` blank (tok TokPipe))
            (blank (tok TokBraClose `debug` "Succeeded with choice"))
sequence :: XParser [CP]
sequence = do
    bracket (tok TokBraOpen `debug` "Trying sequence")
            (peRef cp `sepby1` blank (tok TokComma))
            (blank (tok TokBraClose `debug` "Succeeded with sequence"))
cp :: XParser CP
cp =
    ( do n <- name
         m <- modifier
         let c = TagName n m
         return c `debug` ("ContentSpec: name "++show c)) +++
    ( do ss <- sequence
         m <- modifier
         let c = Seq ss m
         return c `debug` ("ContentSpec: sequence "++show c)) +++
    ( do cs <- choice
         m <- modifier
         let c = Choice cs m
         return c `debug` ("ContentSpec: choice "++show c))
modifier :: XParser Modifier
modifier =
    ( tok TokStar >> return Star) +++
    ( tok TokQuery >> return Query) +++
    ( tok TokPlus >> return Plus) +++
    ( return None)
instance Show CP where
    show (TagName n m) = n++show m
    show (Choice cps m) = '(': concat (intersperse "|" (map show cps))
                          ++")"++show m
    show (Seq cps m) = '(': concat (intersperse "," (map show cps))
                          ++")"++show m
instance Show Modifier where
    show None = ""
    show Query = "?"
    show Star = "*"
    show Plus = "+"
mixed :: XParser Mixed
mixed = do
    tok TokBraOpen
    peRef (do tok TokHash
              word "PCDATA")
    cont
  where
    cont = ( do cs <- many (peRef (do tok TokPipe
                                      peRef name))
                blank (tok TokBraClose >> tok TokStar)
                return (PCDATAplus cs)) +++
           ( blank (tok TokBraClose >> tok TokStar) >> return PCDATA) +++
           ( blank (tok TokBraClose) >> return PCDATA)
attlistdecl :: XParser AttListDecl
attlistdecl = do
    tok TokSpecialOpen
    tok (TokSpecial ATTLISTx)
    n <- peRef name `elserror` "missing identifier in ATTLIST"
    ds <- peRef (many (peRef attdef))
    blank (tok TokAnyClose) `elserror` "missing > terminating ATTLIST"
    return (AttListDecl n ds)
attdef :: XParser AttDef
attdef =
  do n <- peRef name
     t <- peRef atttype `elserror` "missing attribute type in attlist defn"
     d <- peRef defaultdecl
     return (AttDef n t d)
atttype :: XParser AttType
atttype =
    ( word "CDATA" >> return StringType) +++
    ( tokenizedtype >>= return . TokenizedType) +++
    ( enumeratedtype >>= return . EnumeratedType)
tokenizedtype :: XParser TokenizedType
tokenizedtype =
    ( word "ID" >> return ID) +++
    ( word "IDREF" >> return IDREF) +++
    ( word "IDREFS" >> return IDREFS) +++
    ( word "ENTITY" >> return ENTITY) +++
    ( word "ENTITIES" >> return ENTITIES) +++
    ( word "NMTOKEN" >> return NMTOKEN) +++
    ( word "NMTOKENS" >> return NMTOKENS)
enumeratedtype :: XParser EnumeratedType
enumeratedtype =
    ( notationtype >>= return . NotationType) +++
    ( enumeration >>= return . Enumeration)
notationtype :: XParser NotationType
notationtype = do
    word "NOTATION"
    bracket (tok TokBraOpen)
            (peRef name `sepby1` peRef (tok TokPipe))
            (blank (tok TokBraClose))
enumeration :: XParser Enumeration
enumeration =
    bracket (tok TokBraOpen)
            (peRef nmtoken `sepby1` peRef ((tok TokPipe)))
            (blank (tok TokBraClose))
defaultdecl :: XParser DefaultDecl
defaultdecl =
    ( tok TokHash >> word "REQUIRED" >> return REQUIRED) +++
    ( tok TokHash >> word "IMPLIED" >> return IMPLIED) +++
    ( do f <- maybe (tok TokHash >> word "FIXED" >> return FIXED)
         a <- peRef attvalue
         return (DefaultTo a f))
conditionalsect :: XParser ConditionalSect
conditionalsect =
    ( do tok TokSectionOpen
         peRef (tok (TokSection INCLUDEx))
         tok TokSqOpen `elserror` "missing [ after INCLUDE"
         i <- many (peRef extsubsetdecl)
         tok TokSectionClose `elserror` "missing ]]> for INCLUDE section"
         return (IncludeSect i)) +++
    ( do tok TokSectionOpen
         peRef (tok (TokSection IGNOREx))
         tok TokSqOpen `elserror` "missing [ after IGNORE"
         i <- many newIgnore  
         tok TokSectionClose `elserror` "missing ]]> for IGNORE section"
         return (IgnoreSect []))
newIgnore :: XParser Ignore
newIgnore =
    ( do tok TokSectionOpen
         many newIgnore `debug` "IGNORING conditional section"
         tok TokSectionClose
         return Ignore `debug` "end of IGNORED conditional section") +++
    ( do t <- nottok [TokSectionOpen,TokSectionClose]
         return Ignore  `debug` ("ignoring: "++show t))
ignoresectcontents :: XParser IgnoreSectContents
ignoresectcontents = do
    i <- ignore
    is <- many (do tok TokSectionOpen
                   ic <- ignoresectcontents
                   tok TokSectionClose
                   ig <- ignore
                   return (ic,ig))
    return (IgnoreSectContents i is)
ignore :: XParser Ignore
ignore = do
  is <- many1 (nottok [TokSectionOpen,TokSectionClose])
  return Ignore  `debug` ("ignored all of: "++show is)
reference :: XParser Reference
reference = do
    bracket (tok TokAmp) (freetext >>= val) (tok TokSemi)
  where
    val ('#':'x':i) | all isHexDigit i
                    = return . RefChar . fst . head . readHex $ i
    val ('#':i)     | all isDigit i
                    = return . RefChar . fst . head . readDec $ i
    val name        = return . RefEntity $ name
pereference :: XParser PEReference
pereference = do
    bracket (tok TokPercent) nmtoken (tok TokSemi)
entitydecl :: XParser EntityDecl
entitydecl =
    ( gedecl >>= return . EntityGEDecl) +++
    ( pedecl >>= return . EntityPEDecl)
gedecl :: XParser GEDecl
gedecl = do
    tok TokSpecialOpen
    tok (TokSpecial ENTITYx)
    n <- name
    e <- entitydef `elserror` "missing entity defn in G ENTITY decl"
    tok TokAnyClose `elserror` "expected > terminating G ENTITY decl"
    stupd (addGE n e)
    return (GEDecl n e)
pedecl :: XParser PEDecl
pedecl = do
    tok TokSpecialOpen
    tok (TokSpecial ENTITYx)
    tok TokPercent
    n <- name
    e <- pedef `elserror` "missing entity defn in P ENTITY decl"
    tok TokAnyClose `elserror` "expected > terminating P ENTITY decl"
    stupd (addPE n e)
    return (PEDecl n e)
entitydef :: XParser EntityDef
entitydef =
    ( entityvalue >>= return . DefEntityValue) +++
    ( do eid <- externalid
         ndd <- maybe ndatadecl
         return (DefExternalID eid ndd))
pedef :: XParser PEDef
pedef =
    ( entityvalue >>= return . PEDefEntityValue) +++
    ( externalid  >>= return . PEDefExternalID)
externalid :: XParser ExternalID
externalid =
    ( do word "SYSTEM"
         s <- systemliteral
         return (SYSTEM s)) +++
    ( do word "PUBLIC"
         p <- pubidliteral
         s <- systemliteral
         return (PUBLIC p s))
ndatadecl :: XParser NDataDecl
ndatadecl = do
    word "NDATA"
    n <- name
    return (NDATA n)
textdecl :: XParser TextDecl
textdecl = do
    tok TokPIOpen
    (word "xml" +++ word "XML")
    v <- maybe versioninfo
    e <- encodingdecl
    tok TokPIClose `elserror` "expected ?> terminating text decl"
    return (TextDecl v e)
extparsedent :: XParser ExtParsedEnt
extparsedent = do
    t <- maybe textdecl
    c <- content
    return (ExtParsedEnt t c)
extpe :: XParser ExtPE
extpe = do
    t <- maybe textdecl
    e <- many (peRef extsubsetdecl)
    return (ExtPE t e)
encodingdecl :: XParser EncodingDecl
encodingdecl = do
    (word "encoding" +++ word "ENCODING")
    tok TokEqual `elserror` "expected = in 'encoding' decl"
    f <- bracket (tok TokQuote) freetext (tok TokQuote)
    return (EncodingDecl f)
notationdecl :: XParser NotationDecl
notationdecl = do
    tok TokSpecialOpen
    tok (TokSpecial NOTATIONx)
    n <- name
    e <- either externalid publicid
    tok TokAnyClose `elserror` "expected > terminating NOTATION decl"
    return (NOTATION n e)
publicid :: XParser PublicID
publicid = do
    word "PUBLIC"
    p <- pubidliteral
    return (PUBLICID p)
entityvalue :: XParser EntityValue
entityvalue = do
 
    tok TokQuote
    evs <- many (peRef ev)
    tok TokQuote `elserror` "expected quote to terminate entityvalue"
    return (EntityValue evs)
ev :: XParser EV
ev =
    ( (string+++freetext) >>= return . EVString) +++
    ( reference >>= return . EVRef)
attvalue :: XParser AttValue
attvalue = do
    avs <- bracket (tok TokQuote)
                   (many (either freetext reference))
                   (tok TokQuote)
    return (AttValue avs)
systemliteral :: XParser SystemLiteral
systemliteral = do
    s <- bracket (tok TokQuote) freetext (tok TokQuote)
    return (SystemLiteral s)            
pubidliteral :: XParser PubidLiteral
pubidliteral = do
    s <- bracket (tok TokQuote) freetext (tok TokQuote)
    return (PubidLiteral s)             
chardata :: XParser CharData
chardata = freetext