-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Arrow.ProcessDocument
   Copyright  : Copyright (C) 2011 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   Compound arrows for reading, parsing, validating and writing XML documents

   All arrows use IO and a global state for options, errorhandling, ...
-}

-- ------------------------------------------------------------

module Text.XML.HXT.Arrow.ProcessDocument
    ( parseXmlDocument
    , parseXmlDocumentWithExpat
    , parseHtmlDocument
    , validateDocument
    , propagateAndValidateNamespaces
    , andValidateNamespaces
    , getDocumentContents
    )
where

import           Control.Arrow
import           Control.Arrow.ArrowIf
import           Control.Arrow.ArrowList
import           Control.Arrow.ArrowTree
import           Control.Arrow.ListArrow                      (fromLA)
import           Control.Arrow.NTreeEdit

import           Text.XML.HXT.DOM.Interface

import           Text.XML.HXT.Arrow.XmlArrow
import           Text.XML.HXT.Arrow.XmlState
import           Text.XML.HXT.Arrow.XmlState.TypeDefs

import           Text.XML.HXT.Arrow.ParserInterface           (parseHtmlDoc,
                                                               parseXmlDoc)

import           Text.XML.HXT.Arrow.Edit                      (substAllXHTMLEntityRefs,
                                                               transfAllCharRef)

import           Text.XML.HXT.Arrow.GeneralEntitySubstitution (processGeneralEntities)

import           Text.XML.HXT.Arrow.DTDProcessing             (processDTD)

import           Text.XML.HXT.Arrow.DocumentInput             (getXmlContents)

import           Text.XML.HXT.Arrow.Namespace                 (propagateNamespaces, validateNamespaces)
import           Text.XML.HXT.DTDValidation.Validation        (generalEntitiesDefined,
                                                               getDTDSubset,
                                                               transform,
                                                               validate)

-- ------------------------------------------------------------

{- |
XML parser

Input tree must be a root tree with a text tree as child containing the document to be parsed.
The parser generates from the input string a tree of a wellformed XML document,
processes the DTD (parameter substitution, conditional DTD parts, ...) and
substitutes all general entity references. Next step is character reference substitution.
Last step is the document validation.
Validation can be controlled by an extra parameter.

Example:

> parseXmlDocument True    -- parse and validate document
>
> parseXmlDocument False   -- only parse document, don't validate

This parser is useful for applications processing correct XML documents.
-}

parseXmlDocument        :: Bool -> Bool -> Bool -> Bool -> IOStateArrow s XmlTree XmlTree
parseXmlDocument :: Bool -> Bool -> Bool -> Bool -> IOStateArrow s XmlTree XmlTree
parseXmlDocument Bool
validateD Bool
substDTD Bool
substHTML Bool
validateRX
    = ( IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
replaceChildren ( ( String -> IOSLA (XIOState s) XmlTree String
forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
                            IOSLA (XIOState s) XmlTree String
-> IOSLA (XIOState s) XmlTree String
-> IOSLA (XIOState s) XmlTree (String, String)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&&
                            IOStateArrow s XmlTree XmlTree -> IOSLA (XIOState s) XmlTree String
forall (a :: * -> * -> *) n.
ArrowXml a =>
a n XmlTree -> a n String
xshow IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b)
getChildren
                          )
                          IOSLA (XIOState s) XmlTree (String, String)
-> IOSLA (XIOState s) (String, String) XmlTree
-> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                          IOSLA (XIOState s) (String, String) XmlTree
forall (a :: * -> * -> *). ArrowXml a => a (String, String) XmlTree
parseXmlDoc
                          IOSLA (XIOState s) (String, String) XmlTree
-> IOStateArrow s XmlTree XmlTree
-> IOSLA (XIOState s) (String, String) XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                          IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg
                        )
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        String -> IOStateArrow s XmlTree XmlTree
forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"parse XML document"
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        ( IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c d.
ArrowIf a =>
a b c -> a b d -> a b d -> a b d
ifA (LA XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA LA XmlTree XmlTree
getDTDSubset)
          ( IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
processDTDandEntities
            IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
            ( if Bool
validate'                      -- validation only possible if there is a DTD
              then IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
validateDocument
              else IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b. ArrowList a => a b b
this
            )
          )
          ( if Bool
validate'                        -- validation only consists of checking
                                                -- for undefined entity refs
                                                -- predefined XML entity refs are substituted
                                                -- in the XML parser into char refs
                                                -- so there is no need for an entity substitution
            then Int -> String -> IOStateArrow s XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
2 String
"checkUndefinedEntityRefs: looking for undefined entity refs"
                 IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                 IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
checkUndefinedEntityRefs
                 IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                 Int -> String -> IOStateArrow s XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
2 String
"checkUndefinedEntityRefs: looking for undefined entity refs done"
                 IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                 String -> IOStateArrow s XmlTree XmlTree
forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"decoding document"
            else IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b. ArrowList a => a b b
this
          )
        )
      )
      IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when` IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk
    where
      validate' :: Bool
validate'
          = Bool
validateD Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
validateRX

      processDTDandEntities :: IOSLA (XIOState s) XmlTree XmlTree
processDTDandEntities
          = ( if Bool
validateD Bool -> Bool -> Bool
|| Bool
substDTD
              then IOSLA (XIOState s) XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
processDTD
              else IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *) b. ArrowList a => a b b
this
            )
            IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
            ( if Bool
substDTD
              then ( IOSLA (XIOState s) XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
processGeneralEntities             -- DTD contains general entity definitions
                     IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
                     LA XmlTree XmlTree -> IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA LA XmlTree XmlTree
generalEntitiesDefined
                   )
              else if Bool
substHTML
                   then IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
substAllXHTMLEntityRefs
                   else IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *) b. ArrowList a => a b b
this
            )
            IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
            IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
transfAllCharRef

checkUndefinedEntityRefs        :: IOStateArrow s XmlTree XmlTree
checkUndefinedEntityRefs :: IOStateArrow s XmlTree XmlTree
checkUndefinedEntityRefs
    = IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) (t :: * -> *) b c.
(ArrowTree a, Tree t) =>
a (t b) c -> a (t b) c
deep IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isEntityRef
      IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      IOSLA (XIOState s) XmlTree String
forall (a :: * -> * -> *). ArrowXml a => a XmlTree String
getEntityRef
      IOSLA (XIOState s) XmlTree String
-> IOSLA (XIOState s) String XmlTree
-> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      (String -> String) -> IOSLA (XIOState s) String String
forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr (\ String
en -> String
"general entity reference \"&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
en String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
";\" is undefined")
      IOSLA (XIOState s) String String
-> IOSLA (XIOState s) String XmlTree
-> IOSLA (XIOState s) String XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      Int -> IOSLA (XIOState s) String XmlTree
forall (a :: * -> * -> *). ArrowXml a => Int -> a String XmlTree
mkError Int
c_err
      IOSLA (XIOState s) String XmlTree
-> IOStateArrow s XmlTree XmlTree
-> IOSLA (XIOState s) String XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg

-- ------------------------------------------------------------

parseXmlDocumentWithExpat        :: IOStateArrow s XmlTree XmlTree
parseXmlDocumentWithExpat :: IOStateArrow s XmlTree XmlTree
parseXmlDocumentWithExpat
    = ( IOSArrow XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState (IOSArrow XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree (IOSArrow XmlTree XmlTree)
-> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree (IOSArrow XmlTree XmlTree)
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (IOSArrow XmlTree XmlTree)
theExpatParser
      )
      IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when` IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk

-- ------------------------------------------------------------

{- |
HTML parser

Input tree must be a root tree with a text tree as child containing the document to be parsed.
The parser tries to parse everything as HTML, if the HTML document is not wellformed XML or if
errors occur, warnings are generated. The warnings can be issued, or suppressed.

Example: @ parseHtmlDocument True @ : parse document and issue warnings

This parser is useful for applications like web crawlers, where the pages may contain
arbitray errors, but the application is only interested in parts of the document, e.g. the plain text.

-}

parseHtmlDocument       :: IOStateArrow s XmlTree XmlTree
parseHtmlDocument :: IOStateArrow s XmlTree XmlTree
parseHtmlDocument
    = ( IOSLA (XIOState s) XmlTree String -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform ( String -> IOSLA (XIOState s) XmlTree String
forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
                  IOSLA (XIOState s) XmlTree String
-> IOSLA (XIOState s) String String
-> IOSLA (XIOState s) XmlTree String
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                  Int -> (String -> String) -> IOSLA (XIOState s) String String
forall b s. Int -> (b -> String) -> IOStateArrow s b b
traceValue Int
1 ((String
"parseHtmlDoc: parse HTML document " String -> String -> String
forall a. [a] -> [a] -> [a]
++) (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. Show a => a -> String
show)
                )
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        ( (Bool, Bool) -> IOStateArrow s XmlTree XmlTree
forall s0. (Bool, Bool) -> IOSLA (XIOState s0) XmlTree XmlTree
parseHtml      ((Bool, Bool) -> IOStateArrow s XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree (Bool, Bool)
-> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (Bool, Bool)
-> IOSLA (XIOState s) XmlTree (Bool, Bool)
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar (Selector XIOSysState Bool
theTagSoup  Selector XIOSysState Bool
-> Selector XIOSysState Bool -> Selector XIOSysState (Bool, Bool)
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&. Selector XIOSysState Bool
theExpat) )
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        ( (Bool, Bool) -> IOStateArrow s XmlTree XmlTree
forall s0. (Bool, Bool) -> IOSLA (XIOState s0) XmlTree XmlTree
removeWarnings ((Bool, Bool) -> IOStateArrow s XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree (Bool, Bool)
-> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (Bool, Bool)
-> IOSLA (XIOState s) XmlTree (Bool, Bool)
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar (Selector XIOSysState Bool
theWarnings Selector XIOSysState Bool
-> Selector XIOSysState Bool -> Selector XIOSysState (Bool, Bool)
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&. Selector XIOSysState Bool
theTagSoup) )
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        String -> IOStateArrow s XmlTree XmlTree
forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"parse HTML document"
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
traceTree
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
traceSource
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        IOSLA (XIOState s) XmlTree String -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform ( String -> IOSLA (XIOState s) XmlTree String
forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
                  IOSLA (XIOState s) XmlTree String
-> IOSLA (XIOState s) String String
-> IOSLA (XIOState s) XmlTree String
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                  Int -> (String -> String) -> IOSLA (XIOState s) String String
forall b s. Int -> (b -> String) -> IOStateArrow s b b
traceValue Int
1 (\ String
src -> String
"parse HTML document " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
src String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" finished")
                )
      )
      IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when` IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk
    where
    parseHtml :: (Bool, Bool) -> IOSLA (XIOState s0) XmlTree XmlTree
parseHtml (Bool
withTagSoup', Bool
withExpat')
        | Bool
withExpat'    = IOSArrow XmlTree XmlTree -> IOSLA (XIOState s0) XmlTree XmlTree
forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState (IOSArrow XmlTree XmlTree -> IOSLA (XIOState s0) XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree (IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree XmlTree
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree (IOSArrow XmlTree XmlTree)
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (IOSArrow XmlTree XmlTree)
theExpatParser

        | Bool
withTagSoup'  = IOSArrow XmlTree XmlTree -> IOSLA (XIOState s0) XmlTree XmlTree
forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState (IOSArrow XmlTree XmlTree -> IOSLA (XIOState s0) XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree (IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree XmlTree
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree (IOSArrow XmlTree XmlTree)
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (IOSArrow XmlTree XmlTree)
theTagSoupParser

        | Bool
otherwise     = Int -> String -> IOSLA (XIOState s0) XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 (String
"parse document with parsec HTML parser")
                          IOSLA (XIOState s0) XmlTree XmlTree
-> IOSLA (XIOState s0) XmlTree XmlTree
-> IOSLA (XIOState s0) XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                          IOSLA (XIOState s0) XmlTree XmlTree
-> IOSLA (XIOState s0) XmlTree XmlTree
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
replaceChildren
                          ( ( String -> IOSLA (XIOState s0) XmlTree String
forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source             -- get source name
                              IOSLA (XIOState s0) XmlTree String
-> IOSLA (XIOState s0) XmlTree String
-> IOSLA (XIOState s0) XmlTree (String, String)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&&
                              IOSLA (XIOState s0) XmlTree XmlTree
-> IOSLA (XIOState s0) XmlTree String
forall (a :: * -> * -> *) n.
ArrowXml a =>
a n XmlTree -> a n String
xshow IOSLA (XIOState s0) XmlTree XmlTree
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b)
getChildren
                            )                                   -- get string to be parsed
                            IOSLA (XIOState s0) XmlTree (String, String)
-> IOSLA (XIOState s0) (String, String) XmlTree
-> IOSLA (XIOState s0) XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                            IOSLA (XIOState s0) (String, String) XmlTree
forall (a :: * -> * -> *).
ArrowList a =>
a (String, String) XmlTree
parseHtmlDoc                        -- run parser, entity substituion is done in parser
                          )

    removeWarnings :: (Bool, Bool) -> IOSLA (XIOState s) XmlTree XmlTree
removeWarnings (Bool
warnings, Bool
withTagSoup')
        | Bool
warnings      = IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *).
ArrowXml a =>
a XmlTree XmlTree -> a XmlTree XmlTree
processTopDownWithAttrl               -- remove warnings inserted by parser and entity subst
                          IOSLA (XIOState s) XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg
        | Bool
withTagSoup'  = IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *) b. ArrowList a => a b b
this                                  -- warnings are not generated in tagsoup

        | Bool
otherwise     = LA XmlTree XmlTree -> IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA (LA XmlTree XmlTree -> IOSLA (XIOState s) XmlTree XmlTree)
-> LA XmlTree XmlTree -> IOSLA (XIOState s) XmlTree XmlTree
forall a b. (a -> b) -> a -> b
$
                          [IfThen (LA XmlTree XmlTree) (LA XmlTree XmlTree)]
-> LA XmlTree XmlTree
forall b c.
[IfThen (LA (NTree b) c) (LA (NTree b) (NTree b))]
-> LA (NTree b) (NTree b)
editNTreeA [LA XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isError LA XmlTree XmlTree
-> LA XmlTree XmlTree
-> IfThen (LA XmlTree XmlTree) (LA XmlTree XmlTree)
forall a b. a -> b -> IfThen a b
:-> LA XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => a b c
none]         -- remove all warnings from document


-- ------------------------------------------------------------

{- | Document validation

Input must be a complete document tree. The document
is validated with respect to the DTD spec.
Only useful for XML documents containing a DTD.

If the document is valid, it is transformed with respect to the DTD,
normalization of attribute values, adding default values, sorting attributes by name,...

If no error was found, result is the normalized tree,
else the error status is set in the list of attributes
of the root node \"\/\" and the document content is removed from the tree.

-}

validateDocument        :: IOStateArrow s XmlTree XmlTree
validateDocument :: IOStateArrow s XmlTree XmlTree
validateDocument
    = ( Int -> String -> IOStateArrow s XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"validating document"
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform ( IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
validateDoc
                  IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                  IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg
                )
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        String -> IOStateArrow s XmlTree XmlTree
forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"document validation"
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        Int -> String -> IOStateArrow s XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"document validated, transforming doc with respect to DTD"
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
transformDoc
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        Int -> String -> IOStateArrow s XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"document transformed"
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
traceSource
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
traceTree
      )
      IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
      IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk

-- ------------------------------------------------------------

{- | Namespace propagation

Input must be a complete document tree. The namespace declarations
are evaluated and all element and attribute names are processed by
splitting the name into prefix, local part and namespace URI.

Naames are checked with respect to the XML namespace definition

If no error was found, result is the unchanged input tree,
else the error status is set in the list of attributes
of the root node \"\/\" and the document content is removed from the tree.


-}

propagateAndValidateNamespaces  :: IOStateArrow s XmlTree XmlTree
propagateAndValidateNamespaces :: IOStateArrow s XmlTree XmlTree
propagateAndValidateNamespaces
    = ( Int -> String -> IOStateArrow s XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"propagating namespaces"
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
propagateNamespaces
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        String -> IOStateArrow s XmlTree XmlTree
forall s. String -> IOStateArrow s XmlTree XmlTree
traceDoc String
"propagating namespaces done"
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
andValidateNamespaces
      )
      IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
      IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk

andValidateNamespaces  :: IOStateArrow s XmlTree XmlTree
andValidateNamespaces :: IOStateArrow s XmlTree XmlTree
andValidateNamespaces
    = ( Int -> String -> IOStateArrow s XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"validating namespaces"
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        ( String -> IOStateArrow s XmlTree XmlTree
forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"namespace propagation"
          IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
          ( IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
validateNamespaces IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg )
        )
        IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        Int -> String -> IOStateArrow s XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"namespace validation finished"
      )
      IOStateArrow s XmlTree XmlTree
-> IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
      IOStateArrow s XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk

-- ------------------------------------------------------------

{- |
   creates a new document root, adds all options
   as attributes to the document root and calls 'getXmlContents'.

   If the document name is the empty string, the document will be read
   from standard input.

   For supported protocols see 'Text.XML.HXT.Arrow.DocumentInput.getXmlContents'
-}

getDocumentContents     :: String -> IOStateArrow s b XmlTree
getDocumentContents :: String -> IOStateArrow s b XmlTree
getDocumentContents String
src
    = [IOStateArrow s b XmlTree]
-> [IOStateArrow s b XmlTree] -> IOStateArrow s b XmlTree
forall (a :: * -> * -> *) n.
ArrowXml a =>
[a n XmlTree] -> [a n XmlTree] -> a n XmlTree
root [] []
      IOStateArrow s b XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree -> IOStateArrow s b XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      String -> String -> IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
a_source String
src
      IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      Int -> String -> IOSLA (XIOState s) XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 (String
"readDocument: start processing document " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
src)
      IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      IOSLA (XIOState s) XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
getXmlContents

-- ------------------------------------------------------------

validateDoc                     :: ArrowList a => a XmlTree XmlTree
validateDoc :: a XmlTree XmlTree
validateDoc                     = LA XmlTree XmlTree -> a XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA ( LA XmlTree XmlTree
validate
                                           LA XmlTree XmlTree -> LA XmlTree XmlTree -> LA XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
                                           LA XmlTree XmlTree
getDTDSubset      -- validate only when DTD decl is present
                                         )

transformDoc                    :: ArrowList a => a XmlTree XmlTree
transformDoc :: a XmlTree XmlTree
transformDoc                    = LA XmlTree XmlTree -> a XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA LA XmlTree XmlTree
transform

-- ------------------------------------------------------------