{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}

-- | A 'Markup' parser and printer of strict bytestrings. 'Markup' is a representation of data such as HTML, SVG or XML but the parsing is sub-standard.
module MarkupParse
  ( -- $usage

    -- * Markup
    Markup (..),
    Standard (..),
    markup,
    markup_,
    RenderStyle (..),
    markdown,
    normalize,
    wellFormed,
    isWellFormed,

    -- * Warnings
    MarkupWarning (..),
    Result,
    resultError,
    resultEither,
    resultMaybe,

    -- * Token components
    TagName,
    name,
    selfClosers,
    AttrName,
    AttrValue,
    Attr (..),
    attrs,

    -- * Tokens
    Token (..),
    tokenize,
    tokenize_,
    token,
    detokenize,
    gather,
    gather_,
    degather,
    degather_,

    -- * XML specific Parsers
    xmlVersionInfo,
    xmlEncodingDecl,
    xmlStandalone,
    xmlVersionNum,
    xmlEncName,
    xmlYesNo,
  )
where

import Control.Category ((>>>))
import Control.DeepSeq
import Control.Monad
import Data.Bifunctor
import Data.Bool
import Data.ByteString (ByteString)
import Data.ByteString.Char8 qualified as B
import Data.Char hiding (isDigit)
import Data.Foldable
import Data.Function
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Maybe
import Data.String.Interpolate
import Data.These
import Data.Tree
import Data.TreeDiff
import FlatParse.Basic hiding (Result, cut, take)
import GHC.Generics
import MarkupParse.FlatParse
import Prelude hiding (replicate)

-- $setup
-- >>> :set -XTemplateHaskell
-- >>> :set -XQuasiQuotes
-- >>> :set -XOverloadedStrings
-- >>> import MarkupParse
-- >>> import MarkupParse.Patch
-- >>> import MarkupParse.FlatParse
-- >>> import FlatParse.Basic
-- >>> import Data.String.Interpolate
-- >>> import Data.ByteString.Char8 qualified as B
-- >>> import Data.Tree

-- $usage
--
-- > import MarkupParse
-- > import Data.ByteString qualified as B
-- >
-- > bs <- B.readFile "other/line.svg"
-- > m = markup_ bs
--
-- @'markdown' . ''markup_'@ is an approximate round trip from 'ByteString' to 'Markup' back to ByteString'. The underscores represent versions of main functions that throw an exception on warnings encountered along the way.
--
-- At a lower level, a round trip pipeline might look something like:
--
-- > :t tokenize Html >=> gather Html >>> fmap (Markup Html >>> normalize) >=> degather >>> fmap (fmap (detokenize Html) >>> mconcat)
-- > ByteString -> These [MarkupWarning] ByteString
--
-- From left to right:
--
-- - 'tokenize' converts a 'ByteString' to a 'Token' list,
--
-- - 'gather' takes the tokens and gathers them into 'Tree's of tokens
--
-- - this is then wrapped into the 'Markup' data type.
--
-- - 'normalize' concatenates content, and normalizes attributes,
--
-- - 'degather' turns the markup tree back into a token list. Finally,
--
-- - 'detokenize' turns a token back into a bytestring.
--
-- Along the way, the kleisi fishies and compose forward usage accumulates any warnings via the 'These' monad instance.

-- | From a parsing pov, Html & Xml (& Svg) are close enough that they share a lot of parsing logic, so that parsing and printing just need some tweaking.
--
-- The xml parsing logic is based on the XML productions found in https://www.w3.org/TR/xml/
--
-- The html parsing was based on a reading of <https://hackage.haskell.org/package/html-parse html-parse>, but ignores the various '\x00' to '\xfffd' & eof directives that form part of the html standards.
data Standard = Html | Xml deriving (Standard -> Standard -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Standard -> Standard -> Bool
$c/= :: Standard -> Standard -> Bool
== :: Standard -> Standard -> Bool
$c== :: Standard -> Standard -> Bool
Eq, Int -> Standard -> ShowS
[Standard] -> ShowS
Standard -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Standard] -> ShowS
$cshowList :: [Standard] -> ShowS
show :: Standard -> String
$cshow :: Standard -> String
showsPrec :: Int -> Standard -> ShowS
$cshowsPrec :: Int -> Standard -> ShowS
Show, Eq Standard
Standard -> Standard -> Bool
Standard -> Standard -> Ordering
Standard -> Standard -> Standard
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Standard -> Standard -> Standard
$cmin :: Standard -> Standard -> Standard
max :: Standard -> Standard -> Standard
$cmax :: Standard -> Standard -> Standard
>= :: Standard -> Standard -> Bool
$c>= :: Standard -> Standard -> Bool
> :: Standard -> Standard -> Bool
$c> :: Standard -> Standard -> Bool
<= :: Standard -> Standard -> Bool
$c<= :: Standard -> Standard -> Bool
< :: Standard -> Standard -> Bool
$c< :: Standard -> Standard -> Bool
compare :: Standard -> Standard -> Ordering
$ccompare :: Standard -> Standard -> Ordering
Ord, forall x. Rep Standard x -> Standard
forall x. Standard -> Rep Standard x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Standard x -> Standard
$cfrom :: forall x. Standard -> Rep Standard x
Generic, Standard -> ()
forall a. (a -> ()) -> NFData a
rnf :: Standard -> ()
$crnf :: Standard -> ()
NFData)

instance ToExpr Standard

-- | A 'Tree' list of markup 'Token's
--
-- >>> markup Html "<foo class=\"bar\">baz</foo>"
-- That (Markup {standard = Html, markupTree = [Node {rootLabel = StartTag "foo" [Attr "class" "bar"], subForest = [Node {rootLabel = Content "baz", subForest = []}]}]})
data Markup = Markup {Markup -> Standard
standard :: Standard, Markup -> [Tree Token]
markupTree :: [Tree Token]} deriving (Int -> Markup -> ShowS
[Markup] -> ShowS
Markup -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Markup] -> ShowS
$cshowList :: [Markup] -> ShowS
show :: Markup -> String
$cshow :: Markup -> String
showsPrec :: Int -> Markup -> ShowS
$cshowsPrec :: Int -> Markup -> ShowS
Show, Markup -> Markup -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Markup -> Markup -> Bool
$c/= :: Markup -> Markup -> Bool
== :: Markup -> Markup -> Bool
$c== :: Markup -> Markup -> Bool
Eq, Eq Markup
Markup -> Markup -> Bool
Markup -> Markup -> Ordering
Markup -> Markup -> Markup
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Markup -> Markup -> Markup
$cmin :: Markup -> Markup -> Markup
max :: Markup -> Markup -> Markup
$cmax :: Markup -> Markup -> Markup
>= :: Markup -> Markup -> Bool
$c>= :: Markup -> Markup -> Bool
> :: Markup -> Markup -> Bool
$c> :: Markup -> Markup -> Bool
<= :: Markup -> Markup -> Bool
$c<= :: Markup -> Markup -> Bool
< :: Markup -> Markup -> Bool
$c< :: Markup -> Markup -> Bool
compare :: Markup -> Markup -> Ordering
$ccompare :: Markup -> Markup -> Ordering
Ord, forall x. Rep Markup x -> Markup
forall x. Markup -> Rep Markup x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Markup x -> Markup
$cfrom :: forall x. Markup -> Rep Markup x
Generic, Markup -> ()
forall a. (a -> ()) -> NFData a
rnf :: Markup -> ()
$crnf :: Markup -> ()
NFData)

instance ToExpr Markup

-- | markup-parse generally tries to continue on parse errors, and return what has/can still be parsed, together with any warnings.
data MarkupWarning
  = -- | A tag ending with "/>" that is not an element of 'selfClosers' (Html only).
    BadEmptyElemTag
  | -- | A tag ending with "/>" that has children. Cannot happen in the parsing phase.
    SelfCloserWithChildren
  | -- | Only a 'StartTag' can have child tokens.
    LeafWithChildren
  | -- | A CloseTag with a different name to the currently open StartTag.
    TagMismatch TagName TagName
  | -- | An EndTag with no corresponding StartTag.
    UnmatchedEndTag
  | -- | An EndTag with corresponding StartTag.
    UnclosedTag
  | -- | An EndTag should never appear in 'Markup'
    EndTagInTree
  | -- | Empty Content, Comment, Decl or Doctype
    EmptyContent
  | MarkupParser ParserWarning
  deriving (MarkupWarning -> MarkupWarning -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: MarkupWarning -> MarkupWarning -> Bool
$c/= :: MarkupWarning -> MarkupWarning -> Bool
== :: MarkupWarning -> MarkupWarning -> Bool
$c== :: MarkupWarning -> MarkupWarning -> Bool
Eq, Int -> MarkupWarning -> ShowS
[MarkupWarning] -> ShowS
MarkupWarning -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MarkupWarning] -> ShowS
$cshowList :: [MarkupWarning] -> ShowS
show :: MarkupWarning -> String
$cshow :: MarkupWarning -> String
showsPrec :: Int -> MarkupWarning -> ShowS
$cshowsPrec :: Int -> MarkupWarning -> ShowS
Show, Eq MarkupWarning
MarkupWarning -> MarkupWarning -> Bool
MarkupWarning -> MarkupWarning -> Ordering
MarkupWarning -> MarkupWarning -> MarkupWarning
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: MarkupWarning -> MarkupWarning -> MarkupWarning
$cmin :: MarkupWarning -> MarkupWarning -> MarkupWarning
max :: MarkupWarning -> MarkupWarning -> MarkupWarning
$cmax :: MarkupWarning -> MarkupWarning -> MarkupWarning
>= :: MarkupWarning -> MarkupWarning -> Bool
$c>= :: MarkupWarning -> MarkupWarning -> Bool
> :: MarkupWarning -> MarkupWarning -> Bool
$c> :: MarkupWarning -> MarkupWarning -> Bool
<= :: MarkupWarning -> MarkupWarning -> Bool
$c<= :: MarkupWarning -> MarkupWarning -> Bool
< :: MarkupWarning -> MarkupWarning -> Bool
$c< :: MarkupWarning -> MarkupWarning -> Bool
compare :: MarkupWarning -> MarkupWarning -> Ordering
$ccompare :: MarkupWarning -> MarkupWarning -> Ordering
Ord, forall x. Rep MarkupWarning x -> MarkupWarning
forall x. MarkupWarning -> Rep MarkupWarning x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep MarkupWarning x -> MarkupWarning
$cfrom :: forall x. MarkupWarning -> Rep MarkupWarning x
Generic, MarkupWarning -> ()
forall a. (a -> ()) -> NFData a
rnf :: MarkupWarning -> ()
$crnf :: MarkupWarning -> ()
NFData)

showWarnings :: [MarkupWarning] -> String
showWarnings :: [MarkupWarning] -> String
showWarnings = forall a. Eq a => [a] -> [a]
List.nub forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. Show a => a -> String
show forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [String] -> String
unlines

-- | The structure of many returning functions.
--
-- A common computation pipeline is to take advantage of the 'These' Monad instance eg
--
-- > markup s bs = bs & (tokenize s >=> gather s) & second (Markup s)
type Result a = These [MarkupWarning] a

-- | Convert any warnings to an 'error'
--
-- >>> resultError $ (tokenize Html) "<foo"
-- *** Exception: MarkupParser (ParserLeftover "<foo")
-- ...
resultError :: Result a -> a
resultError :: forall a. Result a -> a
resultError = forall a c b.
(a -> c) -> (b -> c) -> (a -> b -> c) -> These a b -> c
these ([MarkupWarning] -> String
showWarnings forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall a. HasCallStack => String -> a
error) forall a. a -> a
id (\[MarkupWarning]
xs a
a -> forall a. a -> a -> Bool -> a
bool (forall a. HasCallStack => String -> a
error ([MarkupWarning] -> String
showWarnings [MarkupWarning]
xs)) a
a ([MarkupWarning]
xs forall a. Eq a => a -> a -> Bool
== []))

-- | Returns Left on any warnings
--
-- >>> resultEither $ (tokenize Html) "<foo><baz"
-- Left [MarkupParser (ParserLeftover "<baz")]
resultEither :: Result a -> Either [MarkupWarning] a
resultEither :: forall a. Result a -> Either [MarkupWarning] a
resultEither = forall a c b.
(a -> c) -> (b -> c) -> (a -> b -> c) -> These a b -> c
these forall a b. a -> Either a b
Left forall a b. b -> Either a b
Right (\[MarkupWarning]
xs a
a -> forall a. a -> a -> Bool -> a
bool (forall a b. a -> Either a b
Left [MarkupWarning]
xs) (forall a b. b -> Either a b
Right a
a) ([MarkupWarning]
xs forall a. Eq a => a -> a -> Bool
== []))

-- | Returns results if any, ignoring warnings.
--
-- >>> resultMaybe $ (tokenize Html) "<foo><baz"
-- Just [StartTag "foo" []]
resultMaybe :: Result a -> Maybe a
resultMaybe :: forall a. Result a -> Maybe a
resultMaybe = forall a c b.
(a -> c) -> (b -> c) -> (a -> b -> c) -> These a b -> c
these (forall a b. a -> b -> a
const forall a. Maybe a
Nothing) forall a. a -> Maybe a
Just (\[MarkupWarning]
_ a
a -> forall a. a -> Maybe a
Just a
a)

-- | Convert bytestrings to 'Markup'
--
-- >>> markup Html "<foo><br></foo><baz"
-- These [MarkupParser (ParserLeftover "<baz")] (Markup {standard = Html, markupTree = [Node {rootLabel = StartTag "foo" [], subForest = [Node {rootLabel = StartTag "br" [], subForest = []}]}]})
markup :: Standard -> ByteString -> These [MarkupWarning] Markup
markup :: Standard -> TagName -> These [MarkupWarning] Markup
markup Standard
s TagName
bs = TagName
bs forall a b. a -> (a -> b) -> b
& (Standard -> TagName -> These [MarkupWarning] [Token]
tokenize Standard
s forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> Standard -> [Token] -> These [MarkupWarning] [Tree Token]
gather Standard
s) forall a b. a -> (a -> b) -> b
& forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (Standard -> [Tree Token] -> Markup
Markup Standard
s)

-- | markup but errors on warnings.
markup_ :: Standard -> ByteString -> Markup
markup_ :: Standard -> TagName -> Markup
markup_ Standard
s TagName
bs = Standard -> TagName -> These [MarkupWarning] Markup
markup Standard
s TagName
bs forall a b. a -> (a -> b) -> b
& forall a. Result a -> a
resultError

-- | concatenate sequential content, and normalize attributes; unwording class values and removing duplicate attributes (taking last).
--
-- >>> B.putStr $ markdown Compact $ normalize (markup_ Xml [i|<foo class="a" class="b" bar="first" bar="last"/>|])
-- <foo bar="last" class="a b"/>
normalize :: Markup -> Markup
normalize :: Markup -> Markup
normalize (Markup Standard
s [Tree Token]
trees) = Standard -> [Tree Token] -> Markup
Markup Standard
s ([Tree Token] -> [Tree Token]
normContentTrees forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Token -> Token
normTokenAttrs) [Tree Token]
trees)

-- | Are the trees in the markup well-formed?
isWellFormed :: Markup -> Bool
isWellFormed :: Markup -> Bool
isWellFormed = (forall a. Eq a => a -> a -> Bool
== []) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Markup -> [MarkupWarning]
wellFormed

-- | Check for well-formedness and rerturn warnings encountered.
--
-- >>> wellFormed $ Markup Html [Node (Comment "") [], Node (EndTag "foo") [], Node (EmptyElemTag "foo" []) [Node (Content "bar") []], Node (EmptyElemTag "foo" []) []]
-- [EmptyContent,EndTagInTree,LeafWithChildren,BadEmptyElemTag]
wellFormed :: Markup -> [MarkupWarning]
wellFormed :: Markup -> [MarkupWarning]
wellFormed (Markup Standard
s [Tree Token]
trees) = forall a. Eq a => [a] -> [a]
List.nub forall a b. (a -> b) -> a -> b
$ forall a. Monoid a => [a] -> a
mconcat (forall a b. (a -> [b] -> b) -> Tree a -> b
foldTree Token -> [[MarkupWarning]] -> [MarkupWarning]
checkNode forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Tree Token]
trees)
  where
    checkNode :: Token -> [[MarkupWarning]] -> [MarkupWarning]
checkNode (StartTag TagName
_ [Attr]
_) [[MarkupWarning]]
xs = forall a. Monoid a => [a] -> a
mconcat [[MarkupWarning]]
xs
    checkNode (EmptyElemTag TagName
n [Attr]
_) [] =
      forall a. a -> a -> Bool -> a
bool [] [MarkupWarning
BadEmptyElemTag] (Bool -> Bool
not (TagName
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [TagName]
selfClosers) Bool -> Bool -> Bool
&& Standard
s forall a. Eq a => a -> a -> Bool
== Standard
Html)
    checkNode (EndTag TagName
_) [] = [MarkupWarning
EndTagInTree]
    checkNode (Content TagName
bs) [] = forall a. a -> a -> Bool -> a
bool [] [MarkupWarning
EmptyContent] (TagName
bs forall a. Eq a => a -> a -> Bool
== TagName
"")
    checkNode (Comment TagName
bs) [] = forall a. a -> a -> Bool -> a
bool [] [MarkupWarning
EmptyContent] (TagName
bs forall a. Eq a => a -> a -> Bool
== TagName
"")
    checkNode (Decl TagName
bs) [] = forall a. a -> a -> Bool -> a
bool [] [MarkupWarning
EmptyContent] (TagName
bs forall a. Eq a => a -> a -> Bool
== TagName
"")
    checkNode (Doctype TagName
bs) [] = forall a. a -> a -> Bool -> a
bool [] [MarkupWarning
EmptyContent] (TagName
bs forall a. Eq a => a -> a -> Bool
== TagName
"")
    checkNode Token
_ [[MarkupWarning]]
_ = [MarkupWarning
LeafWithChildren]

-- | Name of token
type TagName = ByteString

-- | A Markup token
--
-- >>> runParser_ (many (token Html)) [i|<foo>content</foo>|]
-- [StartTag "foo" [],Content "content",EndTag "foo"]
--
-- >>> runParser_ (token Xml) [i|<foo/>|]
-- EmptyElemTag "foo" []
--
-- >>> runParser_ (token Html) "<!-- Comment -->"
-- Comment " Comment "
--
-- >>> runParser_ (token Xml) [i|<?xml version="1.0" encoding="UTF-8"?>|]
-- Decl "xml version=\"1.0\" encoding=\"UTF-8\""
--
-- >>> runParser_ (token Html) "<!DOCTYPE html>"
-- Doctype "DOCTYPE html"
--
-- >>> runParser_ (token Xml) "<!DOCTYPE foo [ declarations ]>"
-- Doctype "DOCTYPE foo [ declarations ]"
--
-- >>> runParser (token Html) [i|<foo a="a" b="b" c=c check>|]
-- OK (StartTag "foo" [Attr "a" "a",Attr "b" "b",Attr "c" "c",Attr "check" ""]) ""
--
-- >>> runParser (token Xml) [i|<foo a="a" b="b" c=c check>|]
-- Fail
data Token
  = -- | A start tag. https://developer.mozilla.org/en-US/docs/Glossary/Tag
    StartTag !TagName ![Attr]
  | -- | An empty element tag. Optional for XML and kind of not allowed in HTML.
    EmptyElemTag !TagName ![Attr]
  | -- | A closing tag.
    EndTag !TagName
  | -- | The content between tags.
    Content !ByteString
  | -- | Contents of a comment.
    Comment !ByteString
  | -- | Contents of a declaration
    Decl !ByteString
  | -- | Contents of a doctype declaration.
    Doctype !ByteString
  deriving (Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show, Eq Token
Token -> Token -> Bool
Token -> Token -> Ordering
Token -> Token -> Token
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Token -> Token -> Token
$cmin :: Token -> Token -> Token
max :: Token -> Token -> Token
$cmax :: Token -> Token -> Token
>= :: Token -> Token -> Bool
$c>= :: Token -> Token -> Bool
> :: Token -> Token -> Bool
$c> :: Token -> Token -> Bool
<= :: Token -> Token -> Bool
$c<= :: Token -> Token -> Bool
< :: Token -> Token -> Bool
$c< :: Token -> Token -> Bool
compare :: Token -> Token -> Ordering
$ccompare :: Token -> Token -> Ordering
Ord, Token -> Token -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq, forall x. Rep Token x -> Token
forall x. Token -> Rep Token x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Token x -> Token
$cfrom :: forall x. Token -> Rep Token x
Generic)

instance NFData Token

instance ToExpr Token

-- | A flatparse 'Token' parser.
--
-- >>> runParser (token Html) "<foo>content</foo>"
-- OK (StartTag "foo" []) "content</foo>"
token :: Standard -> Parser String Token
token :: Standard -> Parser String Token
token Standard
Html = forall e. Parser e Token
tokenHtml
token Standard
Xml = forall e. Parser e Token
tokenXml

-- | Parse a bytestring into tokens
--
-- >>> tokenize Html [i|<foo>content</foo>|]
-- That [StartTag "foo" [],Content "content",EndTag "foo"]
tokenize :: Standard -> ByteString -> These [MarkupWarning] [Token]
tokenize :: Standard -> TagName -> These [MarkupWarning] [Token]
tokenize Standard
s TagName
bs = forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first ((forall a. a -> [a] -> [a]
: []) forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParserWarning -> MarkupWarning
MarkupParser) forall a b. (a -> b) -> a -> b
$ forall a. Parser String a -> TagName -> These ParserWarning a
runParserWarn (forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Standard -> Parser String Token
token Standard
s)) TagName
bs

-- | tokenize but errors on warnings.
tokenize_ :: Standard -> ByteString -> [Token]
tokenize_ :: Standard -> TagName -> [Token]
tokenize_ Standard
s TagName
bs = Standard -> TagName -> These [MarkupWarning] [Token]
tokenize Standard
s TagName
bs forall a b. a -> (a -> b) -> b
& forall a. Result a -> a
resultError

-- | Html tags that self-close
selfClosers :: [TagName]
selfClosers :: [TagName]
selfClosers =
  [ TagName
"area",
    TagName
"base",
    TagName
"br",
    TagName
"col",
    TagName
"embed",
    TagName
"hr",
    TagName
"img",
    TagName
"input",
    TagName
"link",
    TagName
"meta",
    TagName
"param",
    TagName
"source",
    TagName
"track",
    TagName
"wbr"
  ]

-- | Name of an attribute.
type AttrName = ByteString

-- | Value of an attribute. "" is equivalent to true with respect to boolean attributes.
type AttrValue = ByteString

-- | An attribute of a tag
--
-- In parsing, boolean attributes, which are not required to have a value in HTML,
-- will be set a value of "", which is ok. But this will then be rendered.
--
-- >>> detokenize Html <$> tokenize_ Html [i|<input checked>|]
-- ["<input checked=\"\">"]
data Attr = Attr !AttrName !AttrValue
  deriving (forall x. Rep Attr x -> Attr
forall x. Attr -> Rep Attr x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Attr x -> Attr
$cfrom :: forall x. Attr -> Rep Attr x
Generic, Int -> Attr -> ShowS
[Attr] -> ShowS
Attr -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Attr] -> ShowS
$cshowList :: [Attr] -> ShowS
show :: Attr -> String
$cshow :: Attr -> String
showsPrec :: Int -> Attr -> ShowS
$cshowsPrec :: Int -> Attr -> ShowS
Show, Attr -> Attr -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Attr -> Attr -> Bool
$c/= :: Attr -> Attr -> Bool
== :: Attr -> Attr -> Bool
$c== :: Attr -> Attr -> Bool
Eq, Eq Attr
Attr -> Attr -> Bool
Attr -> Attr -> Ordering
Attr -> Attr -> Attr
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Attr -> Attr -> Attr
$cmin :: Attr -> Attr -> Attr
max :: Attr -> Attr -> Attr
$cmax :: Attr -> Attr -> Attr
>= :: Attr -> Attr -> Bool
$c>= :: Attr -> Attr -> Bool
> :: Attr -> Attr -> Bool
$c> :: Attr -> Attr -> Bool
<= :: Attr -> Attr -> Bool
$c<= :: Attr -> Attr -> Bool
< :: Attr -> Attr -> Bool
$c< :: Attr -> Attr -> Bool
compare :: Attr -> Attr -> Ordering
$ccompare :: Attr -> Attr -> Ordering
Ord)

instance NFData Attr

instance ToExpr Attr

normTokenAttrs :: Token -> Token
normTokenAttrs :: Token -> Token
normTokenAttrs (StartTag TagName
n [Attr]
as) = TagName -> [Attr] -> Token
StartTag TagName
n ([Attr] -> [Attr]
normAttrs [Attr]
as)
normTokenAttrs (EmptyElemTag TagName
n [Attr]
as) = TagName -> [Attr] -> Token
EmptyElemTag TagName
n ([Attr] -> [Attr]
normAttrs [Attr]
as)
normTokenAttrs Token
x = Token
x

-- | normalize an attribution list, removing duplicate AttrNames, and space concatenating class values.
normAttrs :: [Attr] -> [Attr]
normAttrs :: [Attr] -> [Attr]
normAttrs [Attr]
as =
  forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry TagName -> TagName -> Attr
Attr
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ( forall k a. Map k a -> [(k, a)]
Map.toList forall a b. (a -> b) -> a -> b
$
            forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl'
              ( \Map TagName TagName
s (Attr TagName
n TagName
v) ->
                  forall k a.
Ord k =>
(k -> a -> a -> a) -> k -> a -> Map k a -> Map k a
Map.insertWithKey
                    ( \TagName
k TagName
new TagName
old ->
                        case TagName
k of
                          TagName
"class" -> TagName
old forall a. Semigroup a => a -> a -> a
<> TagName
" " forall a. Semigroup a => a -> a -> a
<> TagName
new
                          TagName
_ -> TagName
new
                    )
                    TagName
n
                    TagName
v
                    Map TagName TagName
s
              )
              forall k a. Map k a
Map.empty
              [Attr]
as
        )

-- | render attributes
renderAttrs :: [Attr] -> ByteString
renderAttrs :: [Attr] -> TagName
renderAttrs = [TagName] -> TagName
B.unwords forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Attr -> TagName
renderAttr

-- | render an attribute
--
-- Does not attempt to escape double quotes.
renderAttr :: Attr -> ByteString
renderAttr :: Attr -> TagName
renderAttr (Attr TagName
k TagName
v) = [i|#{k}="#{v}"|]

commentClose :: Parser e ()
commentClose :: forall e. Parser e ()
commentClose = $(string "-->")

charNotMinus :: Parser e ByteString
charNotMinus :: forall e. Parser e TagName
charNotMinus = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf forall a b. (a -> b) -> a -> b
$ forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy (forall a. Eq a => a -> a -> Bool
/= Char
'-')

minusPlusChar :: Parser e ByteString
minusPlusChar :: forall e. Parser e TagName
minusPlusChar = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf ($(char '-') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e TagName
charNotMinus)

comment :: Parser e Token
comment :: forall e. Parser e Token
comment = TagName -> Token
Comment forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf (forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall e. Parser e TagName
charNotMinus forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> forall e. Parser e TagName
minusPlusChar)) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
commentClose

content :: Parser e Token
content :: forall e. Parser e Token
content = TagName -> Token
Content forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf (forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy (forall a. Eq a => a -> a -> Bool
/= Char
'<')))

-- | bytestring representation of 'Token'.
--
-- >>> detokenize Html (StartTag "foo" [])
-- "<foo>"
detokenize :: Standard -> Token -> ByteString
detokenize :: Standard -> Token -> TagName
detokenize Standard
s = \case
  (StartTag TagName
n []) -> [i|<#{n}>|]
  (StartTag TagName
n [Attr]
as) -> [i|<#{n} #{renderAttrs as}>|]
  (EmptyElemTag TagName
n [Attr]
as) ->
    forall a. a -> a -> Bool -> a
bool
      [i|<#{n} #{renderAttrs as}/>|]
      [i|<#{n} #{renderAttrs as} />|]
      (Standard
s forall a. Eq a => a -> a -> Bool
== Standard
Html)
  (EndTag TagName
n) -> [i|</#{n}>|]
  (Content TagName
t) -> TagName
t
  (Comment TagName
t) -> [i|<!--#{t}-->|]
  (Doctype TagName
t) -> [i|<!#{t}>|]
  (Decl TagName
t) -> forall a. a -> a -> Bool -> a
bool [i|<?#{t}?>|] [i|<!#{t}!>|] (Standard
s forall a. Eq a => a -> a -> Bool
== Standard
Html)

-- | Indented 0 puts newlines in between the tags.
data RenderStyle = Compact | Indented Int deriving (RenderStyle -> RenderStyle -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RenderStyle -> RenderStyle -> Bool
$c/= :: RenderStyle -> RenderStyle -> Bool
== :: RenderStyle -> RenderStyle -> Bool
$c== :: RenderStyle -> RenderStyle -> Bool
Eq, Int -> RenderStyle -> ShowS
[RenderStyle] -> ShowS
RenderStyle -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RenderStyle] -> ShowS
$cshowList :: [RenderStyle] -> ShowS
show :: RenderStyle -> String
$cshow :: RenderStyle -> String
showsPrec :: Int -> RenderStyle -> ShowS
$cshowsPrec :: Int -> RenderStyle -> ShowS
Show, forall x. Rep RenderStyle x -> RenderStyle
forall x. RenderStyle -> Rep RenderStyle x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep RenderStyle x -> RenderStyle
$cfrom :: forall x. RenderStyle -> Rep RenderStyle x
Generic)

indentChildren :: RenderStyle -> [ByteString] -> [ByteString]
indentChildren :: RenderStyle -> [TagName] -> [TagName]
indentChildren RenderStyle
Compact = forall a. a -> a
id
indentChildren (Indented Int
x) =
  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Char -> TagName
B.replicate Int
x Char
' ' <>)

finalConcat :: RenderStyle -> [ByteString] -> ByteString
finalConcat :: RenderStyle -> [TagName] -> TagName
finalConcat RenderStyle
Compact = forall a. Monoid a => [a] -> a
mconcat
finalConcat (Indented Int
_) =
  TagName -> [TagName] -> TagName
B.intercalate (Char -> TagName
B.singleton Char
'\n')
    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (forall a. Eq a => a -> a -> Bool
/= TagName
"")

-- | Convert 'Markup' to bytestrings
--
-- >>> B.putStr $ markdown (Indented 4) (markup_ Html [i|<foo><br></foo>|])
-- <foo>
--     <br>
-- </foo>
markdown :: RenderStyle -> Markup -> ByteString
markdown :: RenderStyle -> Markup -> TagName
markdown RenderStyle
r (Markup Standard
std [Tree Token]
tree) =
  RenderStyle -> [TagName] -> TagName
finalConcat RenderStyle
r forall a b. (a -> b) -> a -> b
$ forall a. Monoid a => [a] -> a
mconcat forall a b. (a -> b) -> a -> b
$ forall a b. (a -> [b] -> b) -> Tree a -> b
foldTree (RenderStyle -> Standard -> Token -> [[TagName]] -> [TagName]
renderBranch RenderStyle
r Standard
std) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Tree Token] -> [Tree Token]
normContentTrees [Tree Token]
tree

-- note that renderBranch adds in EndTags for StartTags when needed
renderBranch :: RenderStyle -> Standard -> Token -> [[ByteString]] -> [ByteString]
renderBranch :: RenderStyle -> Standard -> Token -> [[TagName]] -> [TagName]
renderBranch RenderStyle
r Standard
std s :: Token
s@(StartTag TagName
n [Attr]
_) [[TagName]]
children
  | TagName
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [TagName]
selfClosers Bool -> Bool -> Bool
&& Standard
std forall a. Eq a => a -> a -> Bool
== Standard
Html =
      [Standard -> Token -> TagName
detokenize Standard
std Token
s] forall a. Semigroup a => a -> a -> a
<> RenderStyle -> [TagName] -> [TagName]
indentChildren RenderStyle
r (forall a. Monoid a => [a] -> a
mconcat [[TagName]]
children)
  | Bool
otherwise =
      [Standard -> Token -> TagName
detokenize Standard
std Token
s] forall a. Semigroup a => a -> a -> a
<> RenderStyle -> [TagName] -> [TagName]
indentChildren RenderStyle
r (forall a. Monoid a => [a] -> a
mconcat [[TagName]]
children) forall a. Semigroup a => a -> a -> a
<> [Standard -> Token -> TagName
detokenize Standard
std (TagName -> Token
EndTag TagName
n)]
renderBranch RenderStyle
r Standard
std Token
x [[TagName]]
children =
  -- ignoring that this should be an error
  [Standard -> Token -> TagName
detokenize Standard
std Token
x] forall a. Semigroup a => a -> a -> a
<> RenderStyle -> [TagName] -> [TagName]
indentChildren RenderStyle
r (forall a. Monoid a => [a] -> a
mconcat [[TagName]]
children)

normContentTrees :: [Tree Token] -> [Tree Token]
normContentTrees :: [Tree Token] -> [Tree Token]
normContentTrees [Tree Token]
trees = forall a b. (a -> [b] -> b) -> Tree a -> b
foldTree (\Token
x [Tree Token]
xs -> forall a. a -> [Tree a] -> Tree a
Node Token
x (forall a. (a -> Bool) -> [a] -> [a]
filter ((forall a. Eq a => a -> a -> Bool
/= TagName -> Token
Content TagName
"") forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Tree a -> a
rootLabel) forall a b. (a -> b) -> a -> b
$ [Tree Token] -> [Tree Token]
concatContent [Tree Token]
xs)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Tree Token] -> [Tree Token]
concatContent [Tree Token]
trees

concatContent :: [Tree Token] -> [Tree Token]
concatContent :: [Tree Token] -> [Tree Token]
concatContent = \case
  ((Node (Content TagName
t) [Tree Token]
_) : (Node (Content TagName
t') [Tree Token]
_) : [Tree Token]
ts) -> [Tree Token] -> [Tree Token]
concatContent forall a b. (a -> b) -> a -> b
$ forall a. a -> [Tree a] -> Tree a
Node (TagName -> Token
Content (TagName
t forall a. Semigroup a => a -> a -> a
<> TagName
t')) [] forall a. a -> [a] -> [a]
: [Tree Token]
ts
  (Tree Token
t : [Tree Token]
ts) -> Tree Token
t forall a. a -> [a] -> [a]
: [Tree Token] -> [Tree Token]
concatContent [Tree Token]
ts
  [] -> []

-- | Gather together token trees from a token list, placing child elements in nodes and removing EndTags.
--
-- >>> gather Html =<< tokenize Html "<foo class=\"bar\">baz</foo>"
-- That [Node {rootLabel = StartTag "foo" [Attr "class" "bar"], subForest = [Node {rootLabel = Content "baz", subForest = []}]}]
gather :: Standard -> [Token] -> These [MarkupWarning] [Tree Token]
gather :: Standard -> [Token] -> These [MarkupWarning] [Tree Token]
gather Standard
s [Token]
ts =
  case ([Tree Token]
finalSibs, [(Token, [Tree Token])]
finalParents, [MarkupWarning]
warnings) of
    ([Tree Token]
sibs, [], []) -> forall a b. b -> These a b
That (forall a. [a] -> [a]
reverse [Tree Token]
sibs)
    ([], [], [MarkupWarning]
xs) -> forall a b. a -> These a b
This [MarkupWarning]
xs
    ([Tree Token]
sibs, [(Token, [Tree Token])]
ps, [MarkupWarning]
xs) ->
      forall a b. a -> b -> These a b
These ([MarkupWarning]
xs forall a. Semigroup a => a -> a -> a
<> [MarkupWarning
UnclosedTag]) (forall a. [a] -> [a]
reverse forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\[Tree Token]
ss' (Token
p, [Tree Token]
ss) -> forall a. a -> [Tree a] -> Tree a
Node Token
p (forall a. [a] -> [a]
reverse [Tree Token]
ss') forall a. a -> [a] -> [a]
: [Tree Token]
ss) [Tree Token]
sibs [(Token, [Tree Token])]
ps)
  where
    (Cursor [Tree Token]
finalSibs [(Token, [Tree Token])]
finalParents, [MarkupWarning]
warnings) =
      forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\(Cursor
c, [MarkupWarning]
xs) Token
t -> Standard -> Token -> Cursor -> (Cursor, Maybe MarkupWarning)
incCursor Standard
s Token
t Cursor
c forall a b. a -> (a -> b) -> b
& forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (forall a. Maybe a -> [a]
maybeToList forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> (forall a. Semigroup a => a -> a -> a
<> [MarkupWarning]
xs))) ([Tree Token] -> [(Token, [Tree Token])] -> Cursor
Cursor [] [], []) [Token]
ts

-- | gather but errors on warnings.
gather_ :: Standard -> [Token] -> [Tree Token]
gather_ :: Standard -> [Token] -> [Tree Token]
gather_ Standard
s [Token]
ts = Standard -> [Token] -> These [MarkupWarning] [Tree Token]
gather Standard
s [Token]
ts forall a b. a -> (a -> b) -> b
& forall a. Result a -> a
resultError

incCursor :: Standard -> Token -> Cursor -> (Cursor, Maybe MarkupWarning)
-- Only StartTags are ever pushed on to the parent list, here:
incCursor :: Standard -> Token -> Cursor -> (Cursor, Maybe MarkupWarning)
incCursor Standard
Xml t :: Token
t@(StartTag TagName
_ [Attr]
_) (Cursor [Tree Token]
ss [(Token, [Tree Token])]
ps) = ([Tree Token] -> [(Token, [Tree Token])] -> Cursor
Cursor [] ((Token
t, [Tree Token]
ss) forall a. a -> [a] -> [a]
: [(Token, [Tree Token])]
ps), forall a. Maybe a
Nothing)
incCursor Standard
Html t :: Token
t@(StartTag TagName
n [Attr]
_) (Cursor [Tree Token]
ss [(Token, [Tree Token])]
ps) =
  (forall a. a -> a -> Bool -> a
bool ([Tree Token] -> [(Token, [Tree Token])] -> Cursor
Cursor [] ((Token
t, [Tree Token]
ss) forall a. a -> [a] -> [a]
: [(Token, [Tree Token])]
ps)) ([Tree Token] -> [(Token, [Tree Token])] -> Cursor
Cursor (forall a. a -> [Tree a] -> Tree a
Node Token
t [] forall a. a -> [a] -> [a]
: [Tree Token]
ss) [(Token, [Tree Token])]
ps) (TagName
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [TagName]
selfClosers), forall a. Maybe a
Nothing)
incCursor Standard
Xml t :: Token
t@(EmptyElemTag TagName
_ [Attr]
_) (Cursor [Tree Token]
ss [(Token, [Tree Token])]
ps) = ([Tree Token] -> [(Token, [Tree Token])] -> Cursor
Cursor (forall a. a -> [Tree a] -> Tree a
Node Token
t [] forall a. a -> [a] -> [a]
: [Tree Token]
ss) [(Token, [Tree Token])]
ps, forall a. Maybe a
Nothing)
incCursor Standard
Html t :: Token
t@(EmptyElemTag TagName
n [Attr]
_) (Cursor [Tree Token]
ss [(Token, [Tree Token])]
ps) =
  ( [Tree Token] -> [(Token, [Tree Token])] -> Cursor
Cursor (forall a. a -> [Tree a] -> Tree a
Node Token
t [] forall a. a -> [a] -> [a]
: [Tree Token]
ss) [(Token, [Tree Token])]
ps,
    forall a. a -> a -> Bool -> a
bool (forall a. a -> Maybe a
Just MarkupWarning
BadEmptyElemTag) forall a. Maybe a
Nothing (TagName
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [TagName]
selfClosers)
  )
incCursor Standard
_ (EndTag TagName
n) (Cursor [Tree Token]
ss ((p :: Token
p@(StartTag TagName
n' [Attr]
_), [Tree Token]
ss') : [(Token, [Tree Token])]
ps)) =
  ( [Tree Token] -> [(Token, [Tree Token])] -> Cursor
Cursor (forall a. a -> [Tree a] -> Tree a
Node Token
p (forall a. [a] -> [a]
reverse [Tree Token]
ss) forall a. a -> [a] -> [a]
: [Tree Token]
ss') [(Token, [Tree Token])]
ps,
    forall a. a -> a -> Bool -> a
bool (forall a. a -> Maybe a
Just (TagName -> TagName -> MarkupWarning
TagMismatch TagName
n TagName
n')) forall a. Maybe a
Nothing (TagName
n forall a. Eq a => a -> a -> Bool
== TagName
n')
  )
-- Non-StartTag on parent list
incCursor Standard
_ (EndTag TagName
_) (Cursor [Tree Token]
ss ((Token
p, [Tree Token]
ss') : [(Token, [Tree Token])]
ps)) =
  ( [Tree Token] -> [(Token, [Tree Token])] -> Cursor
Cursor (forall a. a -> [Tree a] -> Tree a
Node Token
p (forall a. [a] -> [a]
reverse [Tree Token]
ss) forall a. a -> [a] -> [a]
: [Tree Token]
ss') [(Token, [Tree Token])]
ps,
    forall a. a -> Maybe a
Just MarkupWarning
LeafWithChildren
  )
incCursor Standard
_ (EndTag TagName
_) (Cursor [Tree Token]
ss []) =
  ( [Tree Token] -> [(Token, [Tree Token])] -> Cursor
Cursor [Tree Token]
ss [],
    forall a. a -> Maybe a
Just MarkupWarning
UnmatchedEndTag
  )
incCursor Standard
_ Token
t (Cursor [Tree Token]
ss [(Token, [Tree Token])]
ps) = ([Tree Token] -> [(Token, [Tree Token])] -> Cursor
Cursor (forall a. a -> [Tree a] -> Tree a
Node Token
t [] forall a. a -> [a] -> [a]
: [Tree Token]
ss) [(Token, [Tree Token])]
ps, forall a. Maybe a
Nothing)

data Cursor = Cursor
  { -- siblings, not (yet) part of another element
    Cursor -> [Tree Token]
_sibs :: [Tree Token],
    -- open elements and their siblings.
    Cursor -> [(Token, [Tree Token])]
_stack :: [(Token, [Tree Token])]
  }

-- | Convert a markup into a token list, adding end tags.
--
-- >>> degather =<< markup Html "<foo class=\"bar\">baz</foo>"
-- That [StartTag "foo" [Attr "class" "bar"],Content "baz",EndTag "foo"]
degather :: Markup -> These [MarkupWarning] [Token]
degather :: Markup -> These [MarkupWarning] [Token]
degather (Markup Standard
s [Tree Token]
tree) = forall a. [Result [a]] -> Result [a]
rconcats forall a b. (a -> b) -> a -> b
$ forall a b. (a -> [b] -> b) -> Tree a -> b
foldTree (Standard
-> Token
-> [These [MarkupWarning] [Token]]
-> These [MarkupWarning] [Token]
addCloseTags Standard
s) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Tree Token]
tree

-- | degather but errors on warning
degather_ :: Markup -> [Token]
degather_ :: Markup -> [Token]
degather_ Markup
m = Markup -> These [MarkupWarning] [Token]
degather Markup
m forall a b. a -> (a -> b) -> b
& forall a. Result a -> a
resultError

rconcats :: [Result [a]] -> Result [a]
rconcats :: forall a. [Result [a]] -> Result [a]
rconcats [Result [a]]
rs = case forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap forall a. Monoid a => [a] -> a
mconcat forall a. Monoid a => [a] -> a
mconcat forall a b. (a -> b) -> a -> b
$ forall a b. [These a b] -> ([a], [b])
partitionHereThere [Result [a]]
rs of
  ([], [a]
xs) -> forall a b. b -> These a b
That [a]
xs
  ([MarkupWarning]
es, []) -> forall a b. a -> These a b
This [MarkupWarning]
es
  ([MarkupWarning]
es, [a]
xs) -> forall a b. a -> b -> These a b
These [MarkupWarning]
es [a]
xs

addCloseTags :: Standard -> Token -> [These [MarkupWarning] [Token]] -> These [MarkupWarning] [Token]
addCloseTags :: Standard
-> Token
-> [These [MarkupWarning] [Token]]
-> These [MarkupWarning] [Token]
addCloseTags Standard
std s :: Token
s@(StartTag TagName
n [Attr]
_) [These [MarkupWarning] [Token]]
children
  | [These [MarkupWarning] [Token]]
children forall a. Eq a => a -> a -> Bool
/= [] Bool -> Bool -> Bool
&& TagName
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [TagName]
selfClosers Bool -> Bool -> Bool
&& Standard
std forall a. Eq a => a -> a -> Bool
== Standard
Html =
      forall a b. a -> b -> These a b
These [MarkupWarning
SelfCloserWithChildren] [Token
s] forall a. Semigroup a => a -> a -> a
<> forall a. [Result [a]] -> Result [a]
rconcats [These [MarkupWarning] [Token]]
children
  | TagName
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [TagName]
selfClosers Bool -> Bool -> Bool
&& Standard
std forall a. Eq a => a -> a -> Bool
== Standard
Html =
      forall a b. b -> These a b
That [Token
s] forall a. Semigroup a => a -> a -> a
<> forall a. [Result [a]] -> Result [a]
rconcats [These [MarkupWarning] [Token]]
children
  | Bool
otherwise =
      forall a b. b -> These a b
That [Token
s] forall a. Semigroup a => a -> a -> a
<> forall a. [Result [a]] -> Result [a]
rconcats [These [MarkupWarning] [Token]]
children forall a. Semigroup a => a -> a -> a
<> forall a b. b -> These a b
That [TagName -> Token
EndTag TagName
n]
addCloseTags Standard
_ Token
x [These [MarkupWarning] [Token]]
xs = case [These [MarkupWarning] [Token]]
xs of
  [] -> forall a b. b -> These a b
That [Token
x]
  [These [MarkupWarning] [Token]]
cs -> forall a b. a -> b -> These a b
These [MarkupWarning
LeafWithChildren] [Token
x] forall a. Semigroup a => a -> a -> a
<> forall a. [Result [a]] -> Result [a]
rconcats [These [MarkupWarning] [Token]]
cs

tokenXml :: Parser e Token
tokenXml :: forall e. Parser e Token
tokenXml =
  $( switch
       [|
         case _ of
           "<!--" -> comment
           "<!" -> doctypeXml
           "</" -> endTagXml
           "<?" -> declXml
           "<" -> startTagsXml
           _ -> content
         |]
   )

-- [4]
nameStartChar :: Parser e Char
nameStartChar :: forall e. Parser e Char
nameStartChar = forall (st :: ZeroBitType) e.
(Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> ParserT st e Char
fusedSatisfy Char -> Bool
isLatinLetter Char -> Bool
isNameStartChar Char -> Bool
isNameStartChar Char -> Bool
isNameStartChar

isNameStartChar :: Char -> Bool
isNameStartChar :: Char -> Bool
isNameStartChar Char
x =
  (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'Z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
':')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'_')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xC0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xD6')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xD8' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xF6')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xF8' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x37D')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x37F' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x200D')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x218F')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')

-- [4a]
nameChar :: Parser e Char
nameChar :: forall e. Parser e Char
nameChar = forall (st :: ZeroBitType) e.
(Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> ParserT st e Char
fusedSatisfy Char -> Bool
isNameCharAscii Char -> Bool
isNameCharExt Char -> Bool
isNameCharExt Char -> Bool
isNameCharExt

isNameCharAscii :: Char -> Bool
isNameCharAscii :: Char -> Bool
isNameCharAscii Char
x =
  (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'Z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'9')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
':')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'_')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'-')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'.')

isNameCharExt :: Char -> Bool
isNameCharExt :: Char -> Bool
isNameCharExt Char
x =
  (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'Z')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'9')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
':')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'_')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'-')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'.')
    Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'\xB7')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xC0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xD6')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xD8' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xF6')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xF8' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x300' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x36F')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x37D')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x37F' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x200D')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x203F' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x2040')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x218F')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD')
    Bool -> Bool -> Bool
|| (Char
x forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')

-- | name string according to xml production rule [5]
nameXml :: Parser e ByteString
nameXml :: forall e. Parser e TagName
nameXml = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf (forall e. Parser e Char
nameStartChar forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many forall e. Parser e Char
nameChar)

-- | XML declaration as per production rule [23]
declXml :: Parser e Token
declXml :: forall e. Parser e Token
declXml =
  TagName -> Token
Decl
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf
      ( $(string "xml")
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e TagName
xmlVersionInfo
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e TagName
xmlEncodingDecl
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e TagName
xmlStandalone
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e ()
ws_
      )
    forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* $(string "?>")

-- | xml production [24]
xmlVersionInfo :: Parser e ByteString
xmlVersionInfo :: forall e. Parser e TagName
xmlVersionInfo = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf forall a b. (a -> b) -> a -> b
$ forall e. Parser e ()
ws_ forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> $(string "version") forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e ()
eq forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e a. Parser e a -> Parser e a
wrappedQNoGuard forall e. Parser e TagName
xmlVersionNum

-- | xml production [26]
xmlVersionNum :: Parser e ByteString
xmlVersionNum :: forall e. Parser e TagName
xmlVersionNum =
  forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf ($(string "1.") forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy Char -> Bool
isDigit))

-- | Doctype declaration as per production rule [28]
doctypeXml :: Parser e Token
doctypeXml :: forall e. Parser e Token
doctypeXml =
  TagName -> Token
Doctype
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf
      ( $(string "DOCTYPE")
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e ()
ws_
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e TagName
nameXml
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
          -- optional (ws_ >> xmlExternalID) >>
          forall e. Parser e ()
ws_
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional forall e. Parser e String
bracketedSB
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e ()
ws_
      )
    forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* $(char '>')

-- | Xml production [32]
xmlStandalone :: Parser e ByteString
xmlStandalone :: forall e. Parser e TagName
xmlStandalone =
  forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf forall a b. (a -> b) -> a -> b
$
    forall e. Parser e ()
ws_ forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> $(string "standalone") forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e ()
eq forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e TagName
xmlYesNo

-- | Xml yes/no
xmlYesNo :: Parser e ByteString
xmlYesNo :: forall e. Parser e TagName
xmlYesNo = forall e a. Parser e a -> Parser e a
wrappedQNoGuard (forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf forall a b. (a -> b) -> a -> b
$ $(string "yes") forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> $(string "no"))

-- | xml production [80]
xmlEncodingDecl :: Parser e ByteString
xmlEncodingDecl :: forall e. Parser e TagName
xmlEncodingDecl = forall e. Parser e ()
ws_ forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> $(string "encoding") forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e ()
eq forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e a. Parser e a -> Parser e a
wrappedQNoGuard forall e. Parser e TagName
xmlEncName

-- | xml production [81]
xmlEncName :: Parser e ByteString
xmlEncName :: forall e. Parser e TagName
xmlEncName = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfyAscii Char -> Bool
isLatinLetter forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfyAscii (\Char
x -> Char -> Bool
isLatinLetter Char
x Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
x Bool -> Bool -> Bool
|| forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
x (String
"._-" :: [Char]))))

-- | open xml tag as per xml production rule [40]
--  self-closing xml tag as per [44]
startTagsXml :: Parser e Token
startTagsXml :: forall e. Parser e Token
startTagsXml = do
  !TagName
n <- forall e. Parser e TagName
nameXml
  ![Attr]
as <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall e. Parser e ()
ws_ forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e. Parser e Attr
attrXml)
  ()
_ <- forall e. Parser e ()
ws_
  $( switch
       [|
         case _ of
           "/>" -> pure (EmptyElemTag n as)
           ">" -> pure (StartTag n as)
         |]
   )

attrXml :: Parser e Attr
attrXml :: forall e. Parser e Attr
attrXml = TagName -> TagName -> Attr
Attr forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall e. Parser e TagName
nameXml forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
eq) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall e. Parser e TagName
wrappedQ

-- | closing tag as per [42]
endTagXml :: Parser e Token
endTagXml :: forall e. Parser e Token
endTagXml = TagName -> Token
EndTag forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall e. Parser e TagName
nameXml forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
ws_ forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* $(char '>'))

-- | Parse a single 'Token'.
tokenHtml :: Parser e Token
tokenHtml :: forall e. Parser e Token
tokenHtml =
  $( switch
       [|
         case _ of
           "<!--" -> comment
           "<!" -> doctypeHtml
           "</" -> endTagHtml
           "<?" -> bogusCommentHtml
           "<" -> startTagsHtml
           _ -> content
         |]
   )

bogusCommentHtml :: Parser e Token
bogusCommentHtml :: forall e. Parser e Token
bogusCommentHtml = TagName -> Token
Comment forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf (forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy (forall a. Eq a => a -> a -> Bool
/= Char
'<')))

doctypeHtml :: Parser e Token
doctypeHtml :: forall e. Parser e Token
doctypeHtml =
  TagName -> Token
Doctype
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf
      ( $(string "DOCTYPE")
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e ()
ws_
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e TagName
nameHtml
          forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall e. Parser e ()
ws_
      )
    forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* $(char '>')

startTagsHtml :: Parser e Token
startTagsHtml :: forall e. Parser e Token
startTagsHtml = do
  TagName
n <- forall e. Parser e TagName
nameHtml
  [Attr]
as <- forall a. Standard -> Parser a [Attr]
attrs Standard
Html
  ()
_ <- forall e. Parser e ()
ws_
  $( switch
       [|
         case _ of
           "/>" -> pure (EmptyElemTag n as)
           ">" -> pure (StartTag n as)
         |]
   )

endTagHtml :: Parser e Token
endTagHtml :: forall e. Parser e Token
endTagHtml = TagName -> Token
EndTag forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e. Parser e TagName
nameHtml forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
ws_ forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* $(char '>')

-- | Parse a tag name. Each standard is slightly different.
name :: Standard -> Parser e ByteString
name :: forall e. Standard -> Parser e TagName
name Standard
Html = forall e. Parser e TagName
nameHtml
name Standard
Xml = forall e. Parser e TagName
nameXml

nameHtml :: Parser e ByteString
nameHtml :: forall e. Parser e TagName
nameHtml = do
  forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf (forall e. Parser e Char
nameStartCharHtml forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy Char -> Bool
isNameChar))

nameStartCharHtml :: Parser e Char
nameStartCharHtml :: forall e. Parser e Char
nameStartCharHtml = forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfyAscii Char -> Bool
isLatinLetter

isNameChar :: Char -> Bool
isNameChar :: Char -> Bool
isNameChar Char
x =
  Bool -> Bool
not
    ( Char -> Bool
isWhitespace Char
x
        Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'/')
        Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'<')
        Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'>')
    )

attrHtml :: Parser e Attr
attrHtml :: forall e. Parser e Attr
attrHtml =
  (TagName -> TagName -> Attr
Attr forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall e. Parser e TagName
attrName forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
eq) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall e. Parser e TagName
wrappedQ forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> forall e. Parser e TagName
attrBooleanName))
    forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> ((TagName -> TagName -> Attr
`Attr` forall a. Monoid a => a
mempty) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e. Parser e TagName
attrBooleanName)

attrBooleanName :: Parser e ByteString
attrBooleanName :: forall e. Parser e TagName
attrBooleanName = forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e TagName
byteStringOf forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy Char -> Bool
isBooleanAttrName)

-- | Parse an 'Attr'
attr :: Standard -> Parser a Attr
attr :: forall a. Standard -> Parser a Attr
attr Standard
Html = forall e. Parser e Attr
attrHtml
attr Standard
Xml = forall e. Parser e Attr
attrXml

-- | Parse attributions
attrs :: Standard -> Parser a [Attr]
attrs :: forall a. Standard -> Parser a [Attr]
attrs Standard
s = forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall e. Parser e ()
ws_ forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall a. Standard -> Parser a Attr
attr Standard
s) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e. Parser e ()
ws_

attrName :: Parser e ByteString
attrName :: forall e. Parser e TagName
attrName = forall e. (Char -> Bool) -> Parser e TagName
isa Char -> Bool
isAttrName

isAttrName :: Char -> Bool
isAttrName :: Char -> Bool
isAttrName Char
x =
  Bool -> Bool
not forall a b. (a -> b) -> a -> b
$
    Char -> Bool
isWhitespace Char
x
      Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'/')
      Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'>')
      Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'=')

isBooleanAttrName :: Char -> Bool
isBooleanAttrName :: Char -> Bool
isBooleanAttrName Char
x =
  Bool -> Bool
not forall a b. (a -> b) -> a -> b
$
    Char -> Bool
isWhitespace Char
x
      Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'/')
      Bool -> Bool -> Bool
|| (Char
x forall a. Eq a => a -> a -> Bool
== Char
'>')