{-# LANGUAGE CPP                #-}

-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Parser.HtmlParsec
   Copyright  : Copyright (C) 2005 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   This parser tries to interprete everything as HTML
   no errors are emitted during parsing. If something looks
   weired, warning messages are inserted in the document tree.

   All filter are pure XmlFilter,
   errror handling and IO is done in 'Text.XML.HXT.Parser.HtmlParser'
   or other modules

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Parser.HtmlParsec
    ( parseHtmlText
    , parseHtmlDocument
    , parseHtmlContent
    , isEmptyHtmlTag
    , isInnerHtmlTagOf
    , closesHtmlTag
    , emptyHtmlTags
    )

where

#if MIN_VERSION_base(4,8,2)
#else
import Control.Applicative                      ((<$>))
#endif

import Data.Char                                ( toLower
                                                , toUpper
                                                )
import Data.Char.Properties.XMLCharProps        ( isXmlChar
                                                )
import Data.Maybe                               ( fromMaybe
                                                , fromJust
                                                )
import qualified Data.Map                       as M

import Text.ParserCombinators.Parsec            ( SourcePos
                                                , anyChar
                                                , between
                                                -- , char
                                                , eof
                                                , getPosition
                                                , many
                                                , many1
                                                , noneOf
                                                , option
                                                , runParser
                                                , satisfy
                                                , string
                                                , try
                                                , (<|>)
                                                )

import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.XmlNode                 ( mkText'
                                                , mkError'
                                                , mkCdata'
                                                , mkCmt'
                                                , mkCharRef'
                                                , mkElement'
                                                , mkAttr'
                                                , mkDTDElem'
                                                , mkPi'
                                                , isEntityRef
                                                , getEntityRef
                                                )
import Text.XML.HXT.Parser.XmlTokenParser       ( allBut
                                                , amp
                                                , dq
                                                , eq
                                                , gt
                                                , lt
                                                , name
                                                , pubidLiteral
                                                , skipS
                                                , skipS0
                                                , sPace
                                                , sq
                                                , systemLiteral
                                                , checkString
                                                , singleCharsT
                                                , referenceT
                                                , mergeTextNodes
                                                )
import Text.XML.HXT.Parser.XmlParsec            ( misc
                                                , parseXmlText
                                                , xMLDecl'
                                                )
import Text.XML.HXT.Parser.XmlCharParser        ( xmlChar
                                                , SimpleXParser
                                                , withNormNewline
                                                )
import Text.XML.HXT.Parser.XhtmlEntities        ( xhtmlEntities
                                                )

-- ------------------------------------------------------------

parseHtmlText           :: String -> XmlTree -> XmlTrees
parseHtmlText :: String -> XmlTree -> XmlTrees
parseHtmlText String
loc XmlTree
t     = SimpleXParser XmlTrees
-> XPState () -> String -> XmlTree -> XmlTrees
parseXmlText SimpleXParser XmlTrees
htmlDocument (() -> XPState ()
forall a. a -> XPState a
withNormNewline ()) String
loc (XmlTree -> XmlTrees) -> XmlTree -> XmlTrees
forall a b. (a -> b) -> a -> b
$ XmlTree
t

-- ------------------------------------------------------------

parseHtmlFromString     :: SimpleXParser XmlTrees -> String -> String -> XmlTrees
parseHtmlFromString :: SimpleXParser XmlTrees -> String -> String -> XmlTrees
parseHtmlFromString SimpleXParser XmlTrees
parser String
loc
    = (ParseError -> XmlTrees)
-> (XmlTrees -> XmlTrees) -> Either ParseError XmlTrees -> XmlTrees
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either ((XmlTree -> XmlTrees -> XmlTrees
forall a. a -> [a] -> [a]
:[]) (XmlTree -> XmlTrees)
-> (ParseError -> XmlTree) -> ParseError -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> XmlTree
mkError' Int
c_err (String -> XmlTree)
-> (ParseError -> String) -> ParseError -> XmlTree
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\n") (String -> String)
-> (ParseError -> String) -> ParseError -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParseError -> String
forall a. Show a => a -> String
show) XmlTrees -> XmlTrees
forall a. a -> a
id (Either ParseError XmlTrees -> XmlTrees)
-> (String -> Either ParseError XmlTrees) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SimpleXParser XmlTrees
-> XPState () -> String -> String -> Either ParseError XmlTrees
forall tok st a.
GenParser tok st a -> st -> String -> [tok] -> Either ParseError a
runParser SimpleXParser XmlTrees
parser (() -> XPState ()
forall a. a -> XPState a
withNormNewline ()) String
loc

parseHtmlDocument       :: String -> String -> XmlTrees
parseHtmlDocument :: String -> String -> XmlTrees
parseHtmlDocument       = SimpleXParser XmlTrees -> String -> String -> XmlTrees
parseHtmlFromString SimpleXParser XmlTrees
htmlDocument

parseHtmlContent        :: String -> XmlTrees
parseHtmlContent :: String -> XmlTrees
parseHtmlContent        = SimpleXParser XmlTrees -> String -> String -> XmlTrees
parseHtmlFromString SimpleXParser XmlTrees
htmlContent String
"string"

-- ------------------------------------------------------------

type Context    = (XmlTreeFl, OpenTags)

type XmlTreeFl  = XmlTrees -> XmlTrees

type OpenTags   = [(String, XmlTrees, XmlTreeFl)]

-- ------------------------------------------------------------

htmlDocument    :: SimpleXParser XmlTrees
htmlDocument :: SimpleXParser XmlTrees
htmlDocument
    = do
      XmlTrees
pl <- SimpleXParser XmlTrees
htmlProlog
      XmlTrees
el <- SimpleXParser XmlTrees
htmlContent
      ParsecT String (XPState ()) Identity ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
      XmlTrees -> SimpleXParser XmlTrees
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTrees
pl XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
el)

htmlProlog      :: SimpleXParser XmlTrees
htmlProlog :: SimpleXParser XmlTrees
htmlProlog
    = do
      XmlTrees
xml <- XmlTrees -> SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option []
             ( SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall tok st a. GenParser tok st a -> GenParser tok st a
try SimpleXParser XmlTrees
forall s. XParser s XmlTrees
xMLDecl'
               SimpleXParser XmlTrees
-> SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
               ( do
                 SourcePos
pos <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
                 String -> ParsecT String (XPState ()) Identity ()
forall s. String -> XParser s ()
checkString String
"<?"
                 XmlTrees -> SimpleXParser XmlTrees
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTrees -> SimpleXParser XmlTrees)
-> XmlTrees -> SimpleXParser XmlTrees
forall a b. (a -> b) -> a -> b
$ [Int -> String -> XmlTree
mkError' Int
c_warn (SourcePos -> String
forall a. Show a => a -> String
show SourcePos
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" wrong XML declaration")]
               )
             )
      XmlTrees
misc1   <- ParsecT String (XPState ()) Identity XmlTree
-> SimpleXParser XmlTrees
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ParsecT String (XPState ()) Identity XmlTree
forall s. XParser s XmlTree
misc
      XmlTrees
dtdPart <- XmlTrees -> SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option []
                 ( SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall tok st a. GenParser tok st a -> GenParser tok st a
try SimpleXParser XmlTrees
doctypedecl
                   SimpleXParser XmlTrees
-> SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
                   ( do
                     SourcePos
pos <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
                     String -> ParsecT String (XPState ()) Identity ()
upperCaseString String
"<!DOCTYPE"
                     XmlTrees -> SimpleXParser XmlTrees
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTrees -> SimpleXParser XmlTrees)
-> XmlTrees -> SimpleXParser XmlTrees
forall a b. (a -> b) -> a -> b
$ [Int -> String -> XmlTree
mkError' Int
c_warn (SourcePos -> String
forall a. Show a => a -> String
show SourcePos
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" HTML DOCTYPE declaration ignored")]
                   )
                 )
      XmlTrees -> SimpleXParser XmlTrees
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTrees
xml XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
misc1 XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
dtdPart)

doctypedecl     :: SimpleXParser XmlTrees
doctypedecl :: SimpleXParser XmlTrees
doctypedecl
    = ParsecT String (XPState ()) Identity ()
-> ParsecT String (XPState ()) Identity ()
-> SimpleXParser XmlTrees
-> SimpleXParser XmlTrees
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (String -> ParsecT String (XPState ()) Identity ()
upperCaseString String
"<!DOCTYPE") ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
gt
      ( do
        ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
skipS
        String
n <- XParser () String
forall s. XParser s String
name
        [(String, String)]
exId <- ( do
                  ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
skipS
                  [(String, String)]
-> ParsecT String (XPState ()) Identity [(String, String)]
-> ParsecT String (XPState ()) Identity [(String, String)]
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option [] ParsecT String (XPState ()) Identity [(String, String)]
externalID
                )
        ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
skipS0
        XmlTrees -> SimpleXParser XmlTrees
forall (m :: * -> *) a. Monad m => a -> m a
return [DTDElem -> [(String, String)] -> XmlTrees -> XmlTree
mkDTDElem' DTDElem
DOCTYPE ((String
a_name, String
n) (String, String) -> [(String, String)] -> [(String, String)]
forall a. a -> [a] -> [a]
: [(String, String)]
exId) []]
      )

externalID      :: SimpleXParser Attributes
externalID :: ParsecT String (XPState ()) Identity [(String, String)]
externalID
    = do
      String -> ParsecT String (XPState ()) Identity ()
upperCaseString String
k_public
      ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
skipS
      String
pl <- XParser () String
forall s. XParser s String
pubidLiteral
      String
sl <- String -> XParser () String -> XParser () String
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option String
"" (XParser () String -> XParser () String)
-> XParser () String -> XParser () String
forall a b. (a -> b) -> a -> b
$ XParser () String -> XParser () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try ( do
                              ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
skipS
                              XParser () String
forall s. XParser s String
systemLiteral
                            )
      [(String, String)]
-> ParsecT String (XPState ()) Identity [(String, String)]
forall (m :: * -> *) a. Monad m => a -> m a
return ([(String, String)]
 -> ParsecT String (XPState ()) Identity [(String, String)])
-> [(String, String)]
-> ParsecT String (XPState ()) Identity [(String, String)]
forall a b. (a -> b) -> a -> b
$ (String
k_public, String
pl) (String, String) -> [(String, String)] -> [(String, String)]
forall a. a -> [a] -> [a]
: if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
sl then [] else [(String
k_system, String
sl)]

htmlContent     :: SimpleXParser XmlTrees
htmlContent :: SimpleXParser XmlTrees
htmlContent
    = XmlTrees -> XmlTrees
mergeTextNodes (XmlTrees -> XmlTrees)
-> SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> SimpleXParser XmlTrees
htmlContent'

htmlContent'    :: SimpleXParser XmlTrees
htmlContent' :: SimpleXParser XmlTrees
htmlContent'
    = XmlTrees -> SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option []
      ( do
        Context
context <- Context -> SimpleXParser Context
hContent (XmlTrees -> XmlTrees
forall a. a -> a
id, [])
        SourcePos
pos     <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
        XmlTrees -> SimpleXParser XmlTrees
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTrees -> SimpleXParser XmlTrees)
-> XmlTrees -> SimpleXParser XmlTrees
forall a b. (a -> b) -> a -> b
$ SourcePos -> Context -> XmlTrees
forall a. Show a => a -> Context -> XmlTrees
closeTags SourcePos
pos Context
context
      )
      where
      closeTags :: a -> Context -> XmlTrees
closeTags a
_pos (XmlTrees -> XmlTrees
body, [])
          = XmlTrees -> XmlTrees
body []
      closeTags a
pos' (XmlTrees -> XmlTrees
body, ((String
tn, XmlTrees
al, XmlTrees -> XmlTrees
body1) : [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen))
          = a -> Context -> XmlTrees
closeTags a
pos'
                      ( String -> Context -> Context
addHtmlWarn (a -> String
forall a. Show a => a -> String
show a
pos' String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
": no closing tag found for \"<" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
tn String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" ...>\"")
                        (Context -> Context) -> (Context -> Context) -> Context -> Context
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                        String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
tn XmlTrees
al XmlTrees -> XmlTrees
body
                        (Context -> Context) -> Context -> Context
forall a b. (a -> b) -> a -> b
$
                        (XmlTrees -> XmlTrees
body1, [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen)
                      )

-- ------------------------------------------------------------

hElement        :: Context -> SimpleXParser Context
hElement :: Context -> SimpleXParser Context
hElement Context
context
    = ( do
        XmlTree
t <- ParsecT String (XPState ()) Identity XmlTree
hSimpleData
        Context -> SimpleXParser Context
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTree -> Context -> Context
addHtmlElem XmlTree
t Context
context)
      )
      SimpleXParser Context
-> SimpleXParser Context -> SimpleXParser Context
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      Context -> SimpleXParser Context
hCloseTag Context
context
      SimpleXParser Context
-> SimpleXParser Context -> SimpleXParser Context
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      Context -> SimpleXParser Context
hOpenTag Context
context
      SimpleXParser Context
-> SimpleXParser Context -> SimpleXParser Context
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do                      -- wrong tag, take it as text
        SourcePos
pos <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
        Unicode
c   <- XParser () Unicode
forall s. XParser s Unicode
xmlChar
        Context -> SimpleXParser Context
forall (m :: * -> *) a. Monad m => a -> m a
return ( String -> Context -> Context
addHtmlWarn (SourcePos -> String
forall a. Show a => a -> String
show SourcePos
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" markup char " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Unicode -> String
forall a. Show a => a -> String
show Unicode
c String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" not allowed in this context")
                 (Context -> Context) -> (Context -> Context) -> Context -> Context
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                 XmlTree -> Context -> Context
addHtmlElem (String -> XmlTree
mkText' [Unicode
c])
                 (Context -> Context) -> Context -> Context
forall a b. (a -> b) -> a -> b
$
                 Context
context
               )
      )
      SimpleXParser Context
-> SimpleXParser Context -> SimpleXParser Context
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do
        SourcePos
pos <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
        Unicode
c <- XParser () Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
ParsecT s u m Unicode
anyChar
        Context -> SimpleXParser Context
forall (m :: * -> *) a. Monad m => a -> m a
return ( String -> Context -> Context
addHtmlWarn ( SourcePos -> String
forall a. Show a => a -> String
show SourcePos
pos
                               String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" illegal data in input or illegal XML char "
                               String -> String -> String
forall a. [a] -> [a] -> [a]
++ Unicode -> String
forall a. Show a => a -> String
show Unicode
c
                               String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" found and ignored, possibly wrong encoding scheme used")
                 (Context -> Context) -> Context -> Context
forall a b. (a -> b) -> a -> b
$
                 Context
context
               )
      )


hSimpleData     :: SimpleXParser XmlTree
hSimpleData :: ParsecT String (XPState ()) Identity XmlTree
hSimpleData
    = ParsecT String (XPState ()) Identity XmlTree
forall u. ParsecT String u Identity XmlTree
charData''
      ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ParsecT String (XPState ()) Identity XmlTree
hReference'
      ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ParsecT String (XPState ()) Identity XmlTree
hComment
      ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ParsecT String (XPState ()) Identity XmlTree
hpI
      ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ParsecT String (XPState ()) Identity XmlTree
hcDSect
    where
    charData'' :: ParsecT String u Identity XmlTree
charData''
        = do
          String
t <- ParsecT String u Identity Unicode
-> ParsecT String u Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ((Unicode -> Bool) -> ParsecT String u Identity Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
(Unicode -> Bool) -> ParsecT s u m Unicode
satisfy (\ Unicode
x -> Unicode -> Bool
isXmlChar Unicode
x Bool -> Bool -> Bool
&& Bool -> Bool
not (Unicode
x Unicode -> Unicode -> Bool
forall a. Eq a => a -> a -> Bool
== Unicode
'<' Bool -> Bool -> Bool
|| Unicode
x Unicode -> Unicode -> Bool
forall a. Eq a => a -> a -> Bool
== Unicode
'&')))
          XmlTree -> ParsecT String u Identity XmlTree
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTree
mkText' String
t)

hCloseTag       :: Context -> SimpleXParser Context
hCloseTag :: Context -> SimpleXParser Context
hCloseTag Context
context
    = do
      String -> ParsecT String (XPState ()) Identity ()
forall s. String -> XParser s ()
checkString String
"</"
      String
n <- XParser () String
lowerCaseName
      ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
skipS0
      SourcePos
pos <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
      ParsecT String (XPState ()) Identity ()
-> String -> Context -> SimpleXParser Context
checkSymbol ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
gt (String
"closing > in tag \"</" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\" expected") (SourcePos -> String -> Context -> Context
closeTag SourcePos
pos String
n Context
context)

hOpenTag        :: Context -> SimpleXParser Context
hOpenTag :: Context -> SimpleXParser Context
hOpenTag Context
context
    = ( do
        ((SourcePos, String), XmlTrees)
e   <- SimpleXParser ((SourcePos, String), XmlTrees)
hOpenTagStart
        ((SourcePos, String), XmlTrees) -> Context -> SimpleXParser Context
hOpenTagRest ((SourcePos, String), XmlTrees)
e Context
context
      )

hOpenTagStart   :: SimpleXParser ((SourcePos, String), XmlTrees)
hOpenTagStart :: SimpleXParser ((SourcePos, String), XmlTrees)
hOpenTagStart
    = do
      (SourcePos, String)
np <- GenParser Unicode (XPState ()) (SourcePos, String)
-> GenParser Unicode (XPState ()) (SourcePos, String)
forall tok st a. GenParser tok st a -> GenParser tok st a
try ( do
                  ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
lt
                  SourcePos
pos <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
                  String
n <- XParser () String
lowerCaseName
                  (SourcePos, String)
-> GenParser Unicode (XPState ()) (SourcePos, String)
forall (m :: * -> *) a. Monad m => a -> m a
return (SourcePos
pos, String
n)
                )
      ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
skipS0
      XmlTrees
as <- SimpleXParser XmlTrees
hAttrList
      ((SourcePos, String), XmlTrees)
-> SimpleXParser ((SourcePos, String), XmlTrees)
forall (m :: * -> *) a. Monad m => a -> m a
return ((SourcePos, String)
np, XmlTrees
as)

hOpenTagRest    :: ((SourcePos, String), XmlTrees) -> Context -> SimpleXParser Context
hOpenTagRest :: ((SourcePos, String), XmlTrees) -> Context -> SimpleXParser Context
hOpenTagRest ((SourcePos
pos, String
tn), XmlTrees
al) Context
context
    = ( do
        String -> ParsecT String (XPState ()) Identity ()
forall s. String -> XParser s ()
checkString String
"/>"
        Context -> SimpleXParser Context
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
tn XmlTrees
al XmlTrees -> XmlTrees
forall a. a -> a
id Context
context)
      )
      SimpleXParser Context
-> SimpleXParser Context -> SimpleXParser Context
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do
        Context
context1 <- ParsecT String (XPState ()) Identity ()
-> String -> Context -> SimpleXParser Context
checkSymbol ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
gt (String
"closing > in tag \"<" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
tn String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"...\" expected") Context
context
        Context -> SimpleXParser Context
forall (m :: * -> *) a. Monad m => a -> m a
return ( let context2 :: Context
context2 = SourcePos -> String -> Context -> Context
closePrevTag SourcePos
pos String
tn Context
context1
                 in
                 ( if String -> Bool
isEmptyHtmlTag String
tn
                   then String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
tn XmlTrees
al XmlTrees -> XmlTrees
forall a. a -> a
id
                   else String -> XmlTrees -> Context -> Context
openTag String
tn XmlTrees
al
                 ) Context
context2
               )
      )

hAttrList       :: SimpleXParser XmlTrees
hAttrList :: SimpleXParser XmlTrees
hAttrList
    = ParsecT String (XPState ()) Identity XmlTree
-> SimpleXParser XmlTrees
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall tok st a. GenParser tok st a -> GenParser tok st a
try ParsecT String (XPState ()) Identity XmlTree
hAttribute)
      where
      hAttribute :: ParsecT String (XPState ()) Identity XmlTree
hAttribute
          = do
            String
n <- XParser () String
lowerCaseName
            XmlTrees
v <- SimpleXParser XmlTrees
hAttrValue
            ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
skipS0
            XmlTree -> ParsecT String (XPState ()) Identity XmlTree
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTree -> ParsecT String (XPState ()) Identity XmlTree)
-> XmlTree -> ParsecT String (XPState ()) Identity XmlTree
forall a b. (a -> b) -> a -> b
$ QName -> XmlTrees -> XmlTree
mkAttr' (String -> QName
mkName String
n) XmlTrees
v

hAttrValue      :: SimpleXParser XmlTrees
hAttrValue :: SimpleXParser XmlTrees
hAttrValue
    = XmlTrees -> SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option []
      ( ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
eq ParsecT String (XPState ()) Identity ()
-> SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> SimpleXParser XmlTrees
hAttrValue' )

hAttrValue'     :: SimpleXParser XmlTrees
hAttrValue' :: SimpleXParser XmlTrees
hAttrValue'
    = SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall tok st a. GenParser tok st a -> GenParser tok st a
try ( ParsecT String (XPState ()) Identity ()
-> ParsecT String (XPState ()) Identity ()
-> SimpleXParser XmlTrees
-> SimpleXParser XmlTrees
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
dq ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
dq (String -> SimpleXParser XmlTrees
hAttrValue'' String
"&\"") )
      SimpleXParser XmlTrees
-> SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall tok st a. GenParser tok st a -> GenParser tok st a
try ( ParsecT String (XPState ()) Identity ()
-> ParsecT String (XPState ()) Identity ()
-> SimpleXParser XmlTrees
-> SimpleXParser XmlTrees
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
sq ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
sq (String -> SimpleXParser XmlTrees
hAttrValue'' String
"&\'") )
      SimpleXParser XmlTrees
-> SimpleXParser XmlTrees -> SimpleXParser XmlTrees
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do                      -- HTML allows unquoted attribute values
        String
cs <- XParser () Unicode -> XParser () String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> XParser () Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
String -> ParsecT s u m Unicode
noneOf String
" \r\t\n>\"\'")
        XmlTrees -> SimpleXParser XmlTrees
forall (m :: * -> *) a. Monad m => a -> m a
return [String -> XmlTree
mkText' String
cs]
      )

hAttrValue''    :: String -> SimpleXParser XmlTrees
hAttrValue'' :: String -> SimpleXParser XmlTrees
hAttrValue'' String
notAllowed
    = ParsecT String (XPState ()) Identity XmlTree
-> SimpleXParser XmlTrees
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ( ParsecT String (XPState ()) Identity XmlTree
hReference' ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> String -> ParsecT String (XPState ()) Identity XmlTree
forall s. String -> XParser s XmlTree
singleCharsT String
notAllowed)

hReference'     :: SimpleXParser XmlTree
hReference' :: ParsecT String (XPState ()) Identity XmlTree
hReference'
    = ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall tok st a. GenParser tok st a -> GenParser tok st a
try ParsecT String (XPState ()) Identity XmlTree
hReferenceT
      ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do
        ParsecT String (XPState ()) Identity ()
forall s. XParser s ()
amp
        XmlTree -> ParsecT String (XPState ()) Identity XmlTree
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTree
mkText' String
"&")
      )

hReferenceT     :: SimpleXParser XmlTree
hReferenceT :: ParsecT String (XPState ()) Identity XmlTree
hReferenceT
    = do
      XmlTree
r <- ParsecT String (XPState ()) Identity XmlTree
forall s. XParser s XmlTree
referenceT
      XmlTree -> ParsecT String (XPState ()) Identity XmlTree
forall (m :: * -> *) a. Monad m => a -> m a
return ( if XmlTree -> Bool
forall a. XmlNode a => a -> Bool
isEntityRef XmlTree
r
               then XmlTree -> XmlTree
substRef  XmlTree
r
               else XmlTree
r
             )
    where
    -- optimization: HTML entity refs are substituted by char refs, so a later entity ref substituion isn't required
    substRef :: XmlTree -> XmlTree
substRef XmlTree
r
        = case (String -> [(String, Int)] -> Maybe Int
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
en [(String, Int)]
xhtmlEntities) of
          Just Int
i        -> Int -> XmlTree
mkCharRef' Int
i
          Maybe Int
Nothing       -> XmlTree
r                            -- not found: the entity ref remains as it is
                                                        -- this is also done in the XML parser
{- alternative def
          Nothing       -> mkText' ("&" ++ en ++ ";")   -- not found: the entity ref is taken as text
-}
        where
        en :: String
en = Maybe String -> String
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe String -> String)
-> (XmlTree -> Maybe String) -> XmlTree -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlTree -> Maybe String
forall a. XmlNode a => a -> Maybe String
getEntityRef (XmlTree -> String) -> XmlTree -> String
forall a b. (a -> b) -> a -> b
$ XmlTree
r

hContent        :: Context -> SimpleXParser Context
hContent :: Context -> SimpleXParser Context
hContent Context
context
    = Context -> SimpleXParser Context -> SimpleXParser Context
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option Context
context
      ( Context -> SimpleXParser Context
hElement Context
context
        SimpleXParser Context
-> (Context -> SimpleXParser Context) -> SimpleXParser Context
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
        Context -> SimpleXParser Context
hContent
      )

-- ------------------------------------------------------------

-- hComment allows "--" in comments
-- comment from XML spec does not

hComment                :: SimpleXParser XmlTree
hComment :: ParsecT String (XPState ()) Identity XmlTree
hComment
    = do
      String -> ParsecT String (XPState ()) Identity ()
forall s. String -> XParser s ()
checkString String
"<!--"
      SourcePos
pos <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
      String
c <- (XParser () Unicode -> XParser () String)
-> String -> XParser () String
forall s.
(XParser s Unicode -> XParser s String)
-> String -> XParser s String
allBut XParser () Unicode -> XParser () String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many String
"-->"
      SourcePos -> String -> ParsecT String (XPState ()) Identity XmlTree
forall a s.
Show a =>
a -> String -> ParsecT String (XPState s) Identity XmlTree
closeCmt SourcePos
pos String
c
    where
    closeCmt :: a -> String -> ParsecT String (XPState s) Identity XmlTree
closeCmt a
pos String
c
        = ( do
            String -> XParser s ()
forall s. String -> XParser s ()
checkString String
"-->"
            XmlTree -> ParsecT String (XPState s) Identity XmlTree
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTree
mkCmt' String
c)
          )
          ParsecT String (XPState s) Identity XmlTree
-> ParsecT String (XPState s) Identity XmlTree
-> ParsecT String (XPState s) Identity XmlTree
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
          ( XmlTree -> ParsecT String (XPState s) Identity XmlTree
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTree -> ParsecT String (XPState s) Identity XmlTree)
-> XmlTree -> ParsecT String (XPState s) Identity XmlTree
forall a b. (a -> b) -> a -> b
$
            Int -> String -> XmlTree
mkError' Int
c_warn (a -> String
forall a. Show a => a -> String
show a
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" no closing comment sequence \"-->\" found")
          )

-- ------------------------------------------------------------

hpI             :: SimpleXParser XmlTree
hpI :: ParsecT String (XPState ()) Identity XmlTree
hpI = String -> ParsecT String (XPState ()) Identity ()
forall s. String -> XParser s ()
checkString String
"<?"
      ParsecT String (XPState ()) Identity ()
-> ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
      ( ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall tok st a. GenParser tok st a -> GenParser tok st a
try ( do
              String
n <- XParser () String
forall s. XParser s String
name
              String
p <- XParser () String
forall s. XParser s String
sPace XParser () String -> XParser () String -> XParser () String
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (XParser () Unicode -> XParser () String)
-> String -> XParser () String
forall s.
(XParser s Unicode -> XParser s String)
-> String -> XParser s String
allBut XParser () Unicode -> XParser () String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many String
"?>"
              String -> XParser () String
forall s (m :: * -> *) u.
Stream s m Unicode =>
String -> ParsecT s u m String
string String
"?>" XParser () String
-> ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
                     XmlTree -> ParsecT String (XPState ()) Identity XmlTree
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> XmlTrees -> XmlTree
mkPi' (String -> QName
mkName String
n) [QName -> XmlTrees -> XmlTree
mkAttr' (String -> QName
mkName String
a_value) [String -> XmlTree
mkText' String
p]])
            )
        ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
-> ParsecT String (XPState ()) Identity XmlTree
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
        ( do
          SourcePos
pos <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
          XmlTree -> ParsecT String (XPState ()) Identity XmlTree
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTree -> ParsecT String (XPState ()) Identity XmlTree)
-> XmlTree -> ParsecT String (XPState ()) Identity XmlTree
forall a b. (a -> b) -> a -> b
$
            Int -> String -> XmlTree
mkError' Int
c_warn (SourcePos -> String
forall a. Show a => a -> String
show SourcePos
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" illegal PI found")
        )
      )

-- ------------------------------------------------------------

hcDSect        :: SimpleXParser XmlTree
hcDSect :: ParsecT String (XPState ()) Identity XmlTree
hcDSect
    = do
      String -> ParsecT String (XPState ()) Identity ()
forall s. String -> XParser s ()
checkString String
"<![CDATA["
      SourcePos
pos <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
      String
t <- (XParser () Unicode -> XParser () String)
-> String -> XParser () String
forall s.
(XParser s Unicode -> XParser s String)
-> String -> XParser s String
allBut XParser () Unicode -> XParser () String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many String
"]]>"
      SourcePos -> String -> ParsecT String (XPState ()) Identity XmlTree
forall a s.
Show a =>
a -> String -> ParsecT String (XPState s) Identity XmlTree
closeCD SourcePos
pos String
t
    where
    closeCD :: a -> String -> ParsecT String (XPState s) Identity XmlTree
closeCD a
pos String
t
        = ( do
            String -> XParser s ()
forall s. String -> XParser s ()
checkString String
"]]>"
            XmlTree -> ParsecT String (XPState s) Identity XmlTree
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTree
mkCdata' String
t)
          )
          ParsecT String (XPState s) Identity XmlTree
-> ParsecT String (XPState s) Identity XmlTree
-> ParsecT String (XPState s) Identity XmlTree
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
          ( XmlTree -> ParsecT String (XPState s) Identity XmlTree
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTree -> ParsecT String (XPState s) Identity XmlTree)
-> XmlTree -> ParsecT String (XPState s) Identity XmlTree
forall a b. (a -> b) -> a -> b
$
            Int -> String -> XmlTree
mkError' Int
c_warn (a -> String
forall a. Show a => a -> String
show a
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" no closing CDATA sequence \"]]>\" found")
          )

-- ------------------------------------------------------------

checkSymbol     :: SimpleXParser () -> String -> Context -> SimpleXParser Context
checkSymbol :: ParsecT String (XPState ()) Identity ()
-> String -> Context -> SimpleXParser Context
checkSymbol ParsecT String (XPState ()) Identity ()
p String
msg Context
context
    = ( ParsecT String (XPState ()) Identity ()
p
        ParsecT String (XPState ()) Identity ()
-> SimpleXParser Context -> SimpleXParser Context
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
        Context -> SimpleXParser Context
forall (m :: * -> *) a. Monad m => a -> m a
return Context
context
      )
      SimpleXParser Context
-> SimpleXParser Context -> SimpleXParser Context
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do
        SourcePos
pos <- ParsecT String (XPState ()) Identity SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
        Context -> SimpleXParser Context
forall (m :: * -> *) a. Monad m => a -> m a
return (Context -> SimpleXParser Context)
-> Context -> SimpleXParser Context
forall a b. (a -> b) -> a -> b
$ String -> Context -> Context
addHtmlWarn (SourcePos -> String
forall a. Show a => a -> String
show SourcePos
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
msg) Context
context
      )

lowerCaseName   :: SimpleXParser String
lowerCaseName :: XParser () String
lowerCaseName
    = do
      String
n <- XParser () String
forall s. XParser s String
name
      String -> XParser () String
forall (m :: * -> *) a. Monad m => a -> m a
return ((Unicode -> Unicode) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Unicode -> Unicode
toLower String
n)

upperCaseString :: String -> SimpleXParser ()
upperCaseString :: String -> ParsecT String (XPState ()) Identity ()
upperCaseString String
s
    = XParser () String -> XParser () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try ([XParser () Unicode] -> XParser () String
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence ((Unicode -> XParser () Unicode) -> String -> [XParser () Unicode]
forall a b. (a -> b) -> [a] -> [b]
map (\ Unicode
c -> (Unicode -> Bool) -> XParser () Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
(Unicode -> Bool) -> ParsecT s u m Unicode
satisfy (( Unicode -> Unicode -> Bool
forall a. Eq a => a -> a -> Bool
== Unicode
c) (Unicode -> Bool) -> (Unicode -> Unicode) -> Unicode -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Unicode -> Unicode
toUpper)) String
s)) XParser () String
-> ParsecT String (XPState ()) Identity ()
-> ParsecT String (XPState ()) Identity ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> () -> ParsecT String (XPState ()) Identity ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

-- ------------------------------------------------------------

addHtmlTag      :: String -> XmlTrees -> XmlTreeFl -> Context -> Context
addHtmlTag :: String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
tn XmlTrees
al XmlTrees -> XmlTrees
body Context
context
    = XmlTree
e XmlTree -> Context -> Context
`seq`
      XmlTree -> Context -> Context
addHtmlElem XmlTree
e Context
context
    where
    e :: XmlTree
e = QName -> XmlTrees -> XmlTrees -> XmlTree
mkElement' (String -> QName
mkName String
tn) XmlTrees
al (XmlTrees -> XmlTrees
body [])

addHtmlWarn     :: String -> Context -> Context
addHtmlWarn :: String -> Context -> Context
addHtmlWarn String
msg
    = XmlTree -> Context -> Context
addHtmlElem (Int -> String -> XmlTree
mkError' Int
c_warn String
msg)

addHtmlElem    :: XmlTree -> Context -> Context
addHtmlElem :: XmlTree -> Context -> Context
addHtmlElem XmlTree
elem' (XmlTrees -> XmlTrees
body, [(String, XmlTrees, XmlTrees -> XmlTrees)]
openTags)
    = (XmlTrees -> XmlTrees
body (XmlTrees -> XmlTrees)
-> (XmlTrees -> XmlTrees) -> XmlTrees -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (XmlTree
elem' XmlTree -> XmlTrees -> XmlTrees
forall a. a -> [a] -> [a]
:), [(String, XmlTrees, XmlTrees -> XmlTrees)]
openTags)

openTag         :: String -> XmlTrees -> Context -> Context
openTag :: String -> XmlTrees -> Context -> Context
openTag String
tn XmlTrees
al (XmlTrees -> XmlTrees
body, [(String, XmlTrees, XmlTrees -> XmlTrees)]
openTags)
    = (XmlTrees -> XmlTrees
forall a. a -> a
id, (String
tn, XmlTrees
al, XmlTrees -> XmlTrees
body) (String, XmlTrees, XmlTrees -> XmlTrees)
-> [(String, XmlTrees, XmlTrees -> XmlTrees)]
-> [(String, XmlTrees, XmlTrees -> XmlTrees)]
forall a. a -> [a] -> [a]
: [(String, XmlTrees, XmlTrees -> XmlTrees)]
openTags)

closeTag        :: SourcePos -> String -> Context -> Context
closeTag :: SourcePos -> String -> Context -> Context
closeTag SourcePos
pos String
n Context
context
    | String
n String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (((String, XmlTrees, XmlTrees -> XmlTrees) -> String)
-> [(String, XmlTrees, XmlTrees -> XmlTrees)] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map ( \ (String
n1, XmlTrees
_, XmlTrees -> XmlTrees
_) -> String
n1) ([(String, XmlTrees, XmlTrees -> XmlTrees)] -> [String])
-> [(String, XmlTrees, XmlTrees -> XmlTrees)] -> [String]
forall a b. (a -> b) -> a -> b
$ Context -> [(String, XmlTrees, XmlTrees -> XmlTrees)]
forall a b. (a, b) -> b
snd Context
context)
        = String -> Context -> Context
closeTag' String
n Context
context
    | Bool
otherwise
        = String -> Context -> Context
addHtmlWarn (SourcePos -> String
forall a. Show a => a -> String
show SourcePos
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" no opening tag found for </" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
">")
          (Context -> Context) -> (Context -> Context) -> Context -> Context
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
          String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
n [] XmlTrees -> XmlTrees
forall a. a -> a
id
          (Context -> Context) -> Context -> Context
forall a b. (a -> b) -> a -> b
$
          Context
context
    where
    closeTag' :: String -> Context -> Context
closeTag' String
n' (XmlTrees -> XmlTrees
body', (String
n1, XmlTrees
al1, XmlTrees -> XmlTrees
body1) : [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen)
        = Context -> Context
close Context
context1
          where
          context1 :: Context
context1
              = String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
n1 XmlTrees
al1 XmlTrees -> XmlTrees
body' (XmlTrees -> XmlTrees
body1, [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen)
          close :: Context -> Context
close
              | String
n' String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
n1
                = Context -> Context
forall a. a -> a
id
              | String
n1 String -> String -> Bool
`isInnerHtmlTagOf` String
n'
                  = SourcePos -> String -> Context -> Context
closeTag SourcePos
pos String
n'
              | Bool
otherwise
                = String -> Context -> Context
addHtmlWarn (SourcePos -> String
forall a. Show a => a -> String
show SourcePos
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" no closing tag found for \"<" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
n1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" ...>\"")
                  (Context -> Context) -> (Context -> Context) -> Context -> Context
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                  String -> Context -> Context
closeTag' String
n'
    closeTag' String
_ Context
_
        = String -> Context
forall a. HasCallStack => String -> a
error String
"illegal argument for closeTag'"

closePrevTag    :: SourcePos -> String -> Context -> Context
closePrevTag :: SourcePos -> String -> Context -> Context
closePrevTag SourcePos
_pos String
_n context :: Context
context@(XmlTrees -> XmlTrees
_body, [])
    = Context
context
closePrevTag SourcePos
pos String
n context :: Context
context@(XmlTrees -> XmlTrees
body, (String
n1, XmlTrees
al1, XmlTrees -> XmlTrees
body1) : [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen)
    | String
n String -> String -> Bool
`closesHtmlTag` String
n1
        = SourcePos -> String -> Context -> Context
closePrevTag SourcePos
pos String
n
          ( String -> Context -> Context
addHtmlWarn (SourcePos -> String
forall a. Show a => a -> String
show SourcePos
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" tag \"<" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
n1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" ...>\" implicitly closed by opening tag \"<" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" ...>\"")
            (Context -> Context) -> (Context -> Context) -> Context -> Context
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
            String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
n1 XmlTrees
al1 XmlTrees -> XmlTrees
body
            (Context -> Context) -> Context -> Context
forall a b. (a -> b) -> a -> b
$
            (XmlTrees -> XmlTrees
body1, [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen)
          )
    | Bool
otherwise
        = Context
context

-- ------------------------------------------------------------
--
-- taken from HaXml and extended

isEmptyHtmlTag  :: String -> Bool
isEmptyHtmlTag :: String -> Bool
isEmptyHtmlTag String
n
    = String
n String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`
      [String]
emptyHtmlTags

emptyHtmlTags   :: [String]
emptyHtmlTags :: [String]
emptyHtmlTags
    = [ String
"area"
      , String
"base"
      , String
"br"
      , String
"col"
      , String
"frame"
      , String
"hr"
      , String
"img"
      , String
"input"
      , String
"link"
      , String
"meta"
      , String
"param"
      ]
{-# INLINE emptyHtmlTags #-}

isInnerHtmlTagOf        :: String -> String -> Bool
String
n isInnerHtmlTagOf :: String -> String -> Bool
`isInnerHtmlTagOf` String
tn
    = String
n String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`
      ( [String] -> Maybe [String] -> [String]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [String] -> [String])
-> ([(String, [String])] -> Maybe [String])
-> [(String, [String])]
-> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [(String, [String])] -> Maybe [String]
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
tn
      ([(String, [String])] -> [String])
-> [(String, [String])] -> [String]
forall a b. (a -> b) -> a -> b
$ [ (String
"body",    [String
"p"])
        , (String
"caption", [String
"p"])
        , (String
"dd",      [String
"p"])
        , (String
"div",     [String
"p"])
        , (String
"dl",      [String
"dt",String
"dd"])
        , (String
"dt",      [String
"p"])
        , (String
"li",      [String
"p"])
        , (String
"map",     [String
"p"])
        , (String
"object",  [String
"p"])
        , (String
"ol",      [String
"li"])
        , (String
"table",   [String
"th",String
"tr",String
"td",String
"thead",String
"tfoot",String
"tbody"])
        , (String
"tbody",   [String
"th",String
"tr",String
"td"])
        , (String
"td",      [String
"p"])
        , (String
"tfoot",   [String
"th",String
"tr",String
"td"])
        , (String
"th",      [String
"p"])
        , (String
"thead",   [String
"th",String
"tr",String
"td"])
        , (String
"tr",      [String
"th",String
"td"])
        , (String
"ul",      [String
"li"])
        ]
      )

-- a bit more efficient implementation of closes

closesHtmlTag   :: String -> String -> Bool
closesHtmlTag :: String -> String -> Bool
closesHtmlTag String
t String
t2
    = Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
False (Maybe Bool -> Bool)
-> (Map String (String -> Bool) -> Maybe Bool)
-> Map String (String -> Bool)
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((String -> Bool) -> Bool) -> Maybe (String -> Bool) -> Maybe Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((String -> Bool) -> String -> Bool
forall a b. (a -> b) -> a -> b
$ String
t) (Maybe (String -> Bool) -> Maybe Bool)
-> (Map String (String -> Bool) -> Maybe (String -> Bool))
-> Map String (String -> Bool)
-> Maybe Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Map String (String -> Bool) -> Maybe (String -> Bool)
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup String
t2 (Map String (String -> Bool) -> Bool)
-> Map String (String -> Bool) -> Bool
forall a b. (a -> b) -> a -> b
$ Map String (String -> Bool)
closedByTable
{-# INLINE closesHtmlTag #-}

closedByTable   :: M.Map String (String -> Bool)
closedByTable :: Map String (String -> Bool)
closedByTable
    = [(String, String -> Bool)] -> Map String (String -> Bool)
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(String, String -> Bool)] -> Map String (String -> Bool))
-> [(String, String -> Bool)] -> Map String (String -> Bool)
forall a b. (a -> b) -> a -> b
$
      [ (String
"a",   (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"a"))
      , (String
"li",  (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"li" ))
      , (String
"th",  (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th", String
"td", String
"tr"] ))
      , (String
"td",  (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th", String
"td", String
"tr"] ))
      , (String
"tr",  (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"tr"))
      , (String
"dt",  (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt", String
"dd"] ))
      , (String
"dd",  (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt", String
"dd"] ))
      , (String
"p",   (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"hr"
                        , String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
      , (String
"colgroup",    (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"colgroup", String
"thead", String
"tfoot", String
"tbody"] ))
      , (String
"form",        (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"form"] ))
      , (String
"label",       (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"label"] ))
      , (String
"map",         (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"map"] ))
      , (String
"option",      Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
True)
      , (String
"script",      Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
True)
      , (String
"style",       Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
True)
      , (String
"textarea",    Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
True)
      , (String
"title",       Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
True)
      , (String
"select",      ( String -> String -> Bool
forall a. Eq a => a -> a -> Bool
/= String
"option"))
      , (String
"thead",       (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"tfoot",String
"tbody"] ))
      , (String
"tbody",       (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"tbody" ))
      , (String
"tfoot",       (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"tbody" ))
      , (String
"h1",  (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
      , (String
"h2",  (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
      , (String
"h3",  (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
      , (String
"h4",  (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
      , (String
"h5",  (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
      , (String
"h6",  (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
      ]

{-
closesHtmlTag :: String -> String -> Bool
closesHtmlTag   = closes

closes :: String -> String -> Bool

"a"     `closes` "a"                                    = True
"li"    `closes` "li"                                   = True
"th"    `closes`  t    | t `elem` ["th","td"]           = True
"td"    `closes`  t    | t `elem` ["th","td"]           = True
"tr"    `closes`  t    | t `elem` ["th","td","tr"]      = True
"dt"    `closes`  t    | t `elem` ["dt","dd"]           = True
"dd"    `closes`  t    | t `elem` ["dt","dd"]           = True
"hr"    `closes`  "p"                                   = True
"colgroup"
        `closes` "colgroup"                             = True
"form"  `closes` "form"                                 = True
"label" `closes` "label"                                = True
"map"   `closes` "map"                                  = True
"object"
        `closes` "object"                               = True
_       `closes` t  | t `elem` ["option"
                               ,"script"
                               ,"style"
                               ,"textarea"
                               ,"title"
                               ]                        = True
t       `closes` "select" | t /= "option"               = True
"thead" `closes` t  | t `elem` ["colgroup"]             = True
"tfoot" `closes` t  | t `elem` ["thead"
                               ,"colgroup"]             = True
"tbody" `closes` t  | t `elem` ["tbody"
                               ,"tfoot"
                               ,"thead"
                               ,"colgroup"]             = True
t       `closes` t2 | t `elem` ["h1","h2","h3"
                               ,"h4","h5","h6"
                               ,"dl","ol","ul"
                               ,"table"
                               ,"div","p"
                               ]
                      &&
                      t2 `elem` ["h1","h2","h3"
                                ,"h4","h5","h6"
                                ,"p"                    -- not "div"
                                ]                       = True
_       `closes` _                                      = False
-}

-- ------------------------------------------------------------