{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}

-- | Much of the parsing logic is based on the XML productions found in https://www.w3.org/TR/xml/#NT-content
--
-- As an Xml parser, this is very incomplete and rudimentary, hence not calling it an xml parser.
--
-- My other reference was https://www.w3schools.com/xml/xml_syntax.asp (don't laugh).
module Chart.Markup.Parser
  ( markupP,
    contentP,
    XmlDocument (..),
    xmlDocument,
    xmlProlog,
    xmlXMLDecl,
    xmlDoctypedecl,
    XmlMiscType,
    XmlMisc (..),
    xmlMisc,
    xmlComment,
    lt,
    gt,
    gtc,
    oct,
    sq,
    dq,
    wrappedQ,
    wrappedQNoGuard,
    eq,
    xmlName,
    xmlAtt,
    openTag,
    closeTag,
    emptyElemTag,

    -- * testing
    exampleDocument,
  )
where

import Chart.FlatParse
import Chart.Markup
  ( Content (..),
    Markup (Markup),
    attribute,
  )
import Data.ByteString (ByteString)
import Data.String.Interpolate
import FlatParse.Basic hiding (cut)
import FlatParse.Basic.Text qualified as T
import GHC.Generics
import Prelude

-- $setup
--
-- >>> :set -XOverloadedLabels
-- >>> :set -XOverloadedStrings
-- >>> import Chart
-- >>> import Optics.Core
-- >>> import FlatParse.Basic
-- >>> import Chart.FlatParse

-- * special XML chars

-- | opening tag
--
-- >>> runParserMaybe lt "<"
-- Just ()
lt :: Parser e ()
lt :: forall e. Parser e ()
lt = $(char '<') -- `cut'` Lit "<"

-- | closing tag char
--
-- >>> runParserMaybe gt ">"
-- Just ()
gt :: Parser e ()
gt :: forall e. Parser e ()
gt = $(char '>')

-- | self-closing tag
--
-- >>> runParserMaybe gtc "/>"
-- Just ()
gtc :: Parser e ()
gtc :: forall e. Parser e ()
gtc = $(string "/>")

-- | open closer tag
--
-- >>> runParserMaybe oct "</"
-- Just ()
oct :: Parser e ()
oct :: forall e. Parser e ()
oct = $(string "</")

-- | single quote
--
-- >>> runParserMaybe sq "''"
-- Just ()
sq :: ParserT st e ()
sq :: forall (st :: ZeroBitType) e. ParserT st e ()
sq = $(char '\'')

-- | double quote
--
-- >>> runParserMaybe dq "\""
-- Just ()
dq :: ParserT st e ()
dq :: forall (st :: ZeroBitType) e. ParserT st e ()
dq = $(char '"')

wrappedDq :: Parser e ByteString
wrappedDq :: forall e. Parser e ByteString
wrappedDq = forall e a. Parser e () -> Parser e a -> Parser e a
wrapped forall (st :: ZeroBitType) e. ParserT st e ()
dq (forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
T.satisfy (forall a. Eq a => a -> a -> Bool
/= Char
'"')))

-- | guard check for closing quote
wrappedSq :: Parser e ByteString
wrappedSq :: forall e. Parser e ByteString
wrappedSq = forall e a. Parser e () -> Parser e a -> Parser e a
wrapped forall (st :: ZeroBitType) e. ParserT st e ()
sq (forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
T.satisfy (forall a. Eq a => a -> a -> Bool
/= Char
'\'')))

-- | quote or double quote wrapped
--
-- >>> runParserMaybe wrappedQ "\"quoted\""
-- Just "quoted"
--
-- >>> runParserMaybe wrappedQ "'quoted'"
-- Just "quoted"
wrappedQ :: Parser e ByteString
wrappedQ :: forall e. Parser e ByteString
wrappedQ =
  forall e. Parser e ByteString
wrappedDq
    forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> forall e. Parser e ByteString
wrappedSq

-- | quote or double quote wrapped
--
-- >>> runParserMaybe (wrappedQNoGuard xmlName) "\"name\""
-- Just "name"
--
-- but will consume quotes if the underlying parser does.
--
-- >>> runParserMaybe (wrappedQNoGuard (many anyChar)) "\"name\""
-- Nothing
wrappedQNoGuard :: Parser e a -> Parser e a
wrappedQNoGuard :: forall e a. Parser e a -> Parser e a
wrappedQNoGuard Parser e a
p = forall e a. Parser e () -> Parser e a -> Parser e a
wrapped forall (st :: ZeroBitType) e. ParserT st e ()
dq Parser e a
p forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> forall e a. Parser e () -> Parser e a -> Parser e a
wrapped forall (st :: ZeroBitType) e. ParserT st e ()
sq Parser e a
p

-- | xml production [25]
--
-- >>> runParserMaybe eq " = "
-- Just ()
--
-- >>> runParserMaybe eq "="
-- Just ()
eq :: Parser e ()
eq :: forall e. Parser e ()
eq = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e ByteString
wss forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> $(char '=') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e ByteString
wss

-- [4]
nameStartChar :: Parser e Char
nameStartChar :: forall e. Parser e Char
nameStartChar = forall (st :: ZeroBitType) e.
(Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> ParserT st e Char
fusedSatisfy Char -> Bool
isLatinLetter Char -> Bool
isNameStartChar Char -> Bool
isNameStartChar Char -> Bool
isNameStartChar

isNameStartChar :: Char -> Bool
isNameStartChar :: Char -> Bool
isNameStartChar Char
x =
  (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'Z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
':')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'_')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xC0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xD6')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xD8' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xF6')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xF8' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x37D')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x37F' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x200D')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x218F')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')

-- [4a]
nameChar :: Parser e Char
nameChar :: forall e. Parser e Char
nameChar = forall (st :: ZeroBitType) e.
(Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> ParserT st e Char
fusedSatisfy Char -> Bool
isNameCharAscii Char -> Bool
isNameCharExt Char -> Bool
isNameCharExt Char -> Bool
isNameCharExt

isNameCharAscii :: Char -> Bool
isNameCharAscii :: Char -> Bool
isNameCharAscii Char
x =
  (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'Z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'9')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
':')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'_')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'-')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'.')

isNameCharExt :: Char -> Bool
isNameCharExt :: Char -> Bool
isNameCharExt Char
x =
  (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'Z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'9')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
':')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'_')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'-')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'.')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'\xB7')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xC0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xD6')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xD8' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xF6')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xF8' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x300' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x36F')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x37D')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x37F' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x200D')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x203F' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x2040')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x218F')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')

-- | name string according to xml production rule [5]
--
-- >>> runParserMaybe xmlName "name"
-- Just "name"
xmlName :: Parser e ByteString
xmlName :: forall e. Parser e ByteString
xmlName = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (forall e. Parser e Char
nameStartChar forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many forall e. Parser e Char
nameChar)

-- | attribute pair
--
-- >>> runParserMaybe xmlAtt "style = 'fancy'"
-- Just ("style","fancy")
xmlAtt :: Parser e (ByteString, ByteString)
xmlAtt :: forall e. Parser e (ByteString, ByteString)
xmlAtt = (,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall e. Parser e ByteString
xmlName forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
eq) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall e. Parser e ByteString
wrappedQ

-- | open xml tag as per xml production rule [40]
--
-- >>> runParserMaybe openTag "<g style='fancy'>"
-- Just ("g",[("style","fancy")])
openTag :: Parser Error (ByteString, [(ByteString, ByteString)])
openTag :: Parser Error (ByteString, [(ByteString, ByteString)])
openTag =
  forall e. Parser e ()
lt forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ((,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e. Parser e ByteString
xmlName forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall e. Parser e ByteString
wss forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e (ByteString, ByteString)
xmlAtt) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e ByteString
wss) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
gt forall a. Parser Error a -> Expected -> Parser Error a
`cut'` [Char] -> Expected
Msg [Char]
"open tag expected"

-- | closing tag as per [42]
--
-- >>> runParserMaybe closeTag "</g>"
-- Just "g"
closeTag :: Parser Error ByteString
closeTag :: Parser Error ByteString
closeTag = forall e. Parser e ()
oct forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e ByteString
xmlName forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e ByteString
wss forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
gt forall a. Parser Error a -> Expected -> Parser Error a
`cut'` [Char] -> Expected
Msg [Char]
"close tag expected"

-- | empty element tag as per [44]
--
-- >>> runParserMaybe emptyElemTag "<br/>"
-- Just ("br",[])
emptyElemTag :: Parser Error (ByteString, [(ByteString, ByteString)])
emptyElemTag :: Parser Error (ByteString, [(ByteString, ByteString)])
emptyElemTag =
  forall e. Parser e ()
lt forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ((,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e. Parser e ByteString
xmlName forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall e. Parser e ByteString
wss forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e (ByteString, ByteString)
xmlAtt) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e ByteString
wss) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
gtc

-- * comments

xmlCommentOpen :: Parser e ()
xmlCommentOpen :: forall e. Parser e ()
xmlCommentOpen = $(string "<!--")

xmlCommentClose :: Parser e ()
xmlCommentClose :: forall e. Parser e ()
xmlCommentClose = $(string "-->")

xmlCharNotMinus :: Parser e ByteString
xmlCharNotMinus :: forall e. Parser e ByteString
xmlCharNotMinus = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf forall a b. (a -> b) -> a -> b
$ forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy (forall a. Eq a => a -> a -> Bool
/= Char
'-')

xmlMinusPlusChar :: Parser e ByteString
xmlMinusPlusChar :: forall e. Parser e ByteString
xmlMinusPlusChar = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf forall a b. (a -> b) -> a -> b
$ $(char '-') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e ByteString
xmlCharNotMinus

-- | xml comment
--
--
-- >>> runParserMaybe xmlComment "<!-- comment -->"
-- Just " comment "
xmlComment :: Parser e ByteString
xmlComment :: forall e. Parser e ByteString
xmlComment = forall e. Parser e ()
xmlCommentOpen forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall e. Parser e ByteString
xmlCharNotMinus forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> forall e. Parser e ByteString
xmlMinusPlusChar)) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
xmlCommentClose

-- * prolog

-- | xml production rule [22]
--
-- The library doesn't do any analysis of the prolog string nor produces it, hence it is just parsed as a ByteString
--
-- >>> runParser (ws_ *> xmlProlog) exampleDocument
-- OK "<?xml version=\"1.0\" standalone=\"yes\" ?>\n\n<!--open the DOCTYPE declaration -\n  the open square bracket indicates an internal DTD-->\n<!DOCTYPE foo [\n\n<!--define the internal DTD-->\n  <!ELEMENT foo (#PCDATA)>\n\n<!--close the DOCTYPE declaration-->\n]>\n" "<foo>Hello World.</foo>\n"
xmlProlog :: Parser e ByteString
xmlProlog :: forall e. Parser e ByteString
xmlProlog =
  forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf forall a b. (a -> b) -> a -> b
$
    forall e. Parser e ByteString
xmlXMLDecl
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many forall e. Parser e XmlMisc
xmlMisc
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional (forall e. Parser e ByteString
xmlDoctypedecl forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e XmlMisc
xmlMisc)

-- | XML declaration as per production rule [23]
--
-- >>> runParserMaybe xmlXMLDecl "<?xml version=\"1.0\" standalone=\"yes\" ?>"
-- Just "<?xml version=\"1.0\" standalone=\"yes\" ?>"
xmlXMLDecl :: Parser e ByteString
xmlXMLDecl :: forall e. Parser e ByteString
xmlXMLDecl =
  forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf forall a b. (a -> b) -> a -> b
$
    $(string "<?xml")
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e ByteString
xmlVersionInfo
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e ByteString
xmlEncodingDecl
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e ByteString
wssDDecl
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e ByteString
wss
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> $(string "?>")

-- xml production [24]
xmlVersionInfo :: Parser e ByteString
xmlVersionInfo :: forall e. Parser e ByteString
xmlVersionInfo = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf forall a b. (a -> b) -> a -> b
$ forall e. Parser e ByteString
wss forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> $(string "version") forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e ()
eq forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e a. Parser e a -> Parser e a
wrappedQNoGuard forall e. Parser e ByteString
xmlVersionNum

-- | xml production [26]
xmlVersionNum :: Parser e ByteString
xmlVersionNum :: forall e. Parser e ByteString
xmlVersionNum =
  forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf ($(string "1.") forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy Char -> Bool
isDigit))

-- | Whether an 'XmlMisc' is comment or whitespace
data XmlMiscType = XMiscComment | XMiscS deriving (forall x. Rep XmlMiscType x -> XmlMiscType
forall x. XmlMiscType -> Rep XmlMiscType x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep XmlMiscType x -> XmlMiscType
$cfrom :: forall x. XmlMiscType -> Rep XmlMiscType x
Generic, Int -> XmlMiscType -> ShowS
[XmlMiscType] -> ShowS
XmlMiscType -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [XmlMiscType] -> ShowS
$cshowList :: [XmlMiscType] -> ShowS
show :: XmlMiscType -> [Char]
$cshow :: XmlMiscType -> [Char]
showsPrec :: Int -> XmlMiscType -> ShowS
$cshowsPrec :: Int -> XmlMiscType -> ShowS
Show, XmlMiscType -> XmlMiscType -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: XmlMiscType -> XmlMiscType -> Bool
$c/= :: XmlMiscType -> XmlMiscType -> Bool
== :: XmlMiscType -> XmlMiscType -> Bool
$c== :: XmlMiscType -> XmlMiscType -> Bool
Eq)

-- | A comment or whitespace outside of the main document [27]
--
--   not as per [27] (missing PI)
data XmlMisc = XmlMisc {XmlMisc -> XmlMiscType
xmiscType :: XmlMiscType, XmlMisc -> ByteString
xmiscContent :: ByteString} deriving (forall x. Rep XmlMisc x -> XmlMisc
forall x. XmlMisc -> Rep XmlMisc x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep XmlMisc x -> XmlMisc
$cfrom :: forall x. XmlMisc -> Rep XmlMisc x
Generic, Int -> XmlMisc -> ShowS
[XmlMisc] -> ShowS
XmlMisc -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [XmlMisc] -> ShowS
$cshowList :: [XmlMisc] -> ShowS
show :: XmlMisc -> [Char]
$cshow :: XmlMisc -> [Char]
showsPrec :: Int -> XmlMisc -> ShowS
$cshowsPrec :: Int -> XmlMisc -> ShowS
Show, XmlMisc -> XmlMisc -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: XmlMisc -> XmlMisc -> Bool
$c/= :: XmlMisc -> XmlMisc -> Bool
== :: XmlMisc -> XmlMisc -> Bool
$c== :: XmlMisc -> XmlMisc -> Bool
Eq)

-- | Parser for miscellaneous guff
xmlMisc :: Parser e XmlMisc
xmlMisc :: forall e. Parser e XmlMisc
xmlMisc =
  (XmlMiscType -> ByteString -> XmlMisc
XmlMisc XmlMiscType
XMiscComment forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e. Parser e ByteString
xmlComment)
    forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> (XmlMiscType -> ByteString -> XmlMisc
XmlMisc XmlMiscType
XMiscS forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e. Parser e ByteString
wss)

-- | Typical xml header text
exampleDocument :: ByteString
exampleDocument :: ByteString
exampleDocument =
  [i|
<?xml version="1.0" standalone="yes" ?>

<!--open the DOCTYPE declaration -
  the open square bracket indicates an internal DTD-->
<!DOCTYPE foo [

<!--define the internal DTD-->
  <!ELEMENT foo (\#PCDATA)>

<!--close the DOCTYPE declaration-->
]>
<foo>Hello World.</foo>
|]

-- | Doctype declaration as per production rule [28]
--
-- >>> runParserMaybe xmlDoctypedecl "<!DOCTYPE foo [ declarations ]>"
-- Just "<!DOCTYPE foo [ declarations ]>"
xmlDoctypedecl :: Parser e ByteString
xmlDoctypedecl :: forall e. Parser e ByteString
xmlDoctypedecl =
  forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf forall a b. (a -> b) -> a -> b
$
    $(string "<!DOCTYPE")
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e ByteString
wss
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e ByteString
xmlName
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
      -- optional (wss >> xmlExternalID) >>
      forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e ByteString
wss
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e [Char]
bracketedSB
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e ByteString
wss
      forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> $(char '>')

bracketedSB :: Parser e [Char]
bracketedSB :: forall e. Parser e [Char]
bracketedSB = forall e b a. Parser e b -> Parser e b -> Parser e a -> Parser e a
bracketed $(char '[') $(char ']') (forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy (forall a. Eq a => a -> a -> Bool
/= Char
']')))

-- [32]
wssDDecl :: Parser e ByteString
wssDDecl :: forall e. Parser e ByteString
wssDDecl =
  forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf forall a b. (a -> b) -> a -> b
$
    forall e. Parser e ByteString
wss forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> $(string "standalone") forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e ()
eq forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e ByteString
xmlYesNo

xmlYesNo :: Parser e ByteString
xmlYesNo :: forall e. Parser e ByteString
xmlYesNo = forall e a. Parser e a -> Parser e a
wrappedQNoGuard (forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf forall a b. (a -> b) -> a -> b
$ $(string "yes") forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> $(string "no"))

-- | xml production [80]
xmlEncodingDecl :: Parser e ByteString
xmlEncodingDecl :: forall e. Parser e ByteString
xmlEncodingDecl = forall e. Parser e ByteString
wss forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> $(string "encoding") forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e ()
eq forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e a. Parser e a -> Parser e a
wrappedQNoGuard forall e. Parser e ByteString
xmlEncName

-- [81]
xmlEncName :: Parser e ByteString
xmlEncName :: forall e. Parser e ByteString
xmlEncName = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfyAscii Char -> Bool
isLatinLetter forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfyAscii (\Char
x -> Char -> Bool
isLatinLetter Char
x Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
x Bool -> Bool -> Bool
|| forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
x ([Char]
"._-" :: [Char]))))

-- main Parser

-- | An XML document as pre production rule [1]
data XmlDocument = XmlDocument ByteString Markup [XmlMisc] deriving (Int -> XmlDocument -> ShowS
[XmlDocument] -> ShowS
XmlDocument -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [XmlDocument] -> ShowS
$cshowList :: [XmlDocument] -> ShowS
show :: XmlDocument -> [Char]
$cshow :: XmlDocument -> [Char]
showsPrec :: Int -> XmlDocument -> ShowS
$cshowsPrec :: Int -> XmlDocument -> ShowS
Show, XmlDocument -> XmlDocument -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: XmlDocument -> XmlDocument -> Bool
$c/= :: XmlDocument -> XmlDocument -> Bool
== :: XmlDocument -> XmlDocument -> Bool
$c== :: XmlDocument -> XmlDocument -> Bool
Eq)

-- | Note that the library builds a Markup as per the SVG standards and not a Document.
--
-- >>> runParser (ws_ *> xmlDocument) exampleDocument
-- OK (XmlDocument "<?xml version=\"1.0\" standalone=\"yes\" ?>\n\n<!--open the DOCTYPE declaration -\n  the open square bracket indicates an internal DTD-->\n<!DOCTYPE foo [\n\n<!--define the internal DTD-->\n  <!ELEMENT foo (#PCDATA)>\n\n<!--close the DOCTYPE declaration-->\n]>\n" (Markup {tag = "foo", atts = Attributes {attMap = fromList []}, contents = [Content "Hello World."]}) [XmlMisc {xmiscType = XMiscS, xmiscContent = "\n"}]) ""
xmlDocument :: Parser Error XmlDocument
xmlDocument :: Parser Error XmlDocument
xmlDocument = ByteString -> Markup -> [XmlMisc] -> XmlDocument
XmlDocument forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall e. Parser e ()
ws_ forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e ByteString
xmlProlog) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Error Markup
markupP forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many forall e. Parser e XmlMisc
xmlMisc

-- | Main parser for a single Markup (xml-like) element
--
-- >>> runParser markupP "<foo>Hello World.</foo>"
-- OK (Markup {tag = "foo", atts = Attributes {attMap = fromList []}, contents = [Content "Hello World."]}) ""
markupP :: Parser Error Markup
markupP :: Parser Error Markup
markupP =
  ((\(ByteString
n, [(ByteString, ByteString)]
as) -> ByteString -> Attributes -> [Content] -> Markup
Markup ByteString
n (forall a. Monoid a => [a] -> a
mconcat forall a b. (a -> b) -> a -> b
$ (ByteString, ByteString) -> Attributes
attribute forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(ByteString, ByteString)]
as) forall a. Monoid a => a
mempty) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Error (ByteString, [(ByteString, ByteString)])
emptyElemTag)
    forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|>
    -- no close tag = open tag test
    ((\(ByteString
n, [(ByteString, ByteString)]
as) [Content]
c ByteString
_ -> ByteString -> Attributes -> [Content] -> Markup
Markup ByteString
n (forall a. Monoid a => [a] -> a
mconcat forall a b. (a -> b) -> a -> b
$ (ByteString, ByteString) -> Attributes
attribute forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(ByteString, ByteString)]
as) [Content]
c) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Error (ByteString, [(ByteString, ByteString)])
openTag forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser Error Content
contentP forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Error ByteString
closeTag forall a. Parser Error a -> [Expected] -> Parser Error a
`cut` [Expected
"open tag", Expected
"content", Expected
"close tag"])

-- | Inner contents of an element.
--
-- >>> runParser (some contentP) "<foo>Hello World.</foo>content<!-- comment -->"
-- OK [MarkupLeaf (Markup {tag = "foo", atts = Attributes {attMap = fromList []}, contents = [Content "Hello World."]}),Content "content",Comment " comment "] ""
contentP :: Parser Error Content
contentP :: Parser Error Content
contentP =
  (Markup -> Content
MarkupLeaf forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Error Markup
markupP)
    forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> (ByteString -> Content
Comment forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e. Parser e ByteString
xmlComment)
    forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> (ByteString -> Content
Content forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy (forall a. Eq a => a -> a -> Bool
/= Char
'<'))))