{-# LANGUAGE OverloadedStrings #-}

{-|
Description:    A stateless reformulation of the components of the tree construction algorithm.

Copyright:      (c) 2020 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      provisional
Portability:    portable

This module provides the data structures used in the second half of this
implementation's split
__[HTML](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction)__
tree construction algorithm, and the common functions generating them.  While
it would certainly be possible to implement the specification more directly,
the instructions it provides are not only (very strongly) build around mutable
data structures, they're also tailored to a pass-by-reference core library.
Haskell idiomatically being neither, it's actually easier (and likely more
performant, though that's not been tested) to implement a fourth stage from
scratch.

In effect, the main parser body handles all stateful computation ---tracking
the stack of open elements, managing transitions through the finite state
machine, etc.--- but doesn't know the shape of the document tree beyond its
very narrow window.  Instead, it emits a self-contained sequence of
instructions which can be very simply consumed with neither (much) further
modification nor external state; this then is the source from which the final
document tree is built.  Some duplication /is/ admittedly involved in doing so,
as the emitted instruction is typically accompanied by a corresponding change
to the parser state, but the relative logic simplicity makes doing so worth it.
-}
module Web.Mangrove.Parse.Tree.Patch
    ( -- * Types
      Patch ( .. )
    , TreeOutput ( .. )
    , treeRemainder
    , TokenizerOutputState
    , InsertAt ( .. )
    , TargetNode
    , ReparentDepth
      -- * Utility functions
      -- ** Tree types
      -- $utility-state
    , packTree
    , packTree_
    , packTreeErrors
    , packTreeErrors_
    , consTreeError
    , consTreeError_
    , (++|)
    , (|++)
    , (|++|)
      -- ** Token types
    , mapTokenErrs
    , mapTokenOut
      -- * Instructions
    , setDocumentQuirks
    , restartParsing
    , stopParsing
      -- ** Opening nodes
    , insertCharacter
    , insertComment
    , insertComment'
    , insertDoctype
    , addAttribute
      -- *** Elements
    , createElement
    , insertElement
    , insertElement_
    , insertNullElement
    , insertNullElement_
    , insertHeadElement
    , insertHeadElement_
      -- **** Formatting
    , insertForeignElement
    , insertForeignNullElement
    , insertFormattingElement
    , reconstructFormattingElements
      -- ** Closing nodes
      -- *** Single
    , closeCurrentNode
    , closeCurrentNode_
    , dropCurrentNode
    , softCloseCurrentNode_
    , closeAncestorNode_
    , closeAncestorNodes_
      -- *** Multiple
      -- **** Exclusive clear
    , clearToContext
    , tableContext
    , tableBodyContext
    , tableRowContext
      -- **** Inclusive clear
    , clearCount
    , closeElement
    , closeElements
    , closePElement
    , generateEndTags
    , impliedEndTags
    , thoroughlyImpliedEndTags
    ) where


import qualified Control.Applicative as A
import qualified Control.Monad as N
import qualified Control.Monad.Trans.State as N.S

import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BS.L
import qualified Data.IntMap.Strict as M.I
import qualified Data.List as L
import qualified Data.Maybe as Y
import qualified Data.Text as T

import Data.Function ( (&) )
import Data.Functor ( ($>) )

import Web.Willow.DOM

import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Tokenize as Tokenize
import Web.Mangrove.Parse.Tokenize.Common
import Web.Mangrove.Parse.Tree.Common
import Web.Willow.Common.Encoding


-- | The atomic, self-contained instruction set describing how to build a final
-- document tree.
data Patch
    = ErrorList [ParseError]
        -- ^ "Ignore the token", or add errors to, e.g., 'SetDocumentQuirks'.
    | SetDocumentQuirks QuirksMode
        -- ^ Specify which degree of backwards compatibility should be used in
        -- rendering the document.
    | CloseNodes (M.I.IntMap ReparentDepth)
        -- ^ "Pop the current node off the stack of open elements."
        -- 
        -- The first 'TargetNode' of every tuple is treated identically to
        -- 'RelativeLocation', while the second 'ReparentDepth' indicates how
        -- many ancestors should be closed (i.e., @'CloseNodes' [(1, 1)]@
        -- closes the parent node).
    | SoftCloseCurrentNode
        -- ^ Float any following non-'Element' nodes (until the next
        -- 'CloseNodes' or 'DropCurrentNode') to the parent rather than
        -- inserting them under the current.
    | DropCurrentNode
        -- ^ "Remove the current node from its parent node, if it has one.  Pop
        -- it off the stack of open elements."
    | InsertCharacter [ParseError] Char
        -- ^ "Insert the character."
    | InsertComment [ParseError] InsertAt T.Text
        -- ^ "Insert a comment."
    | InsertElement [ParseError] ElementParams
        -- ^ "Insert a foreign element for the token, in the given namespace."
    | InsertAndSetDocumentType [ParseError] DocumentTypeParams
        -- ^ "Append a 'DocumentType' node to the @Document@ node.  Associate
        -- the former with the latter so that it is returned as the value of
        -- the @doctype@ attribute."
        -- 
        -- While the specification requires the 'DocumentType' be explicitly
        -- associated as the @doctype@ attribute of the document, the parsing
        -- rules only allow for a single such node to be produced, so the extra
        -- processing isn't actually required.
    | AddAttribute InsertAt AttributeParams
        -- ^ "Add the attribute and its corresponding value to the top element
        -- of the stack of open elements."
    | RestartParsing
        -- ^ Discard the previous tree and begin a new, empty one.
  deriving ( Patch -> Patch -> Bool
(Patch -> Patch -> Bool) -> (Patch -> Patch -> Bool) -> Eq Patch
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Patch -> Patch -> Bool
$c/= :: Patch -> Patch -> Bool
== :: Patch -> Patch -> Bool
$c== :: Patch -> Patch -> Bool
Eq, Int -> Patch -> ShowS
[Patch] -> ShowS
Patch -> String
(Int -> Patch -> ShowS)
-> (Patch -> String) -> ([Patch] -> ShowS) -> Show Patch
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Patch] -> ShowS
$cshowList :: [Patch] -> ShowS
show :: Patch -> String
$cshow :: Patch -> String
showsPrec :: Int -> Patch -> ShowS
$cshowsPrec :: Int -> Patch -> ShowS
Show, ReadPrec [Patch]
ReadPrec Patch
Int -> ReadS Patch
ReadS [Patch]
(Int -> ReadS Patch)
-> ReadS [Patch]
-> ReadPrec Patch
-> ReadPrec [Patch]
-> Read Patch
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Patch]
$creadListPrec :: ReadPrec [Patch]
readPrec :: ReadPrec Patch
$creadPrec :: ReadPrec Patch
readList :: ReadS [Patch]
$creadList :: ReadS [Patch]
readsPrec :: Int -> ReadS Patch
$creadsPrec :: Int -> ReadS Patch
Read )


-- | Some 'Patch'es represent instruction categories which may not always be
-- intended for the current location in the tree, and the resulting 'Tree's may
-- likewise migrate from where they're generated.  These are the "addresses"
-- which direct those datatypes through the hierarchy.
data InsertAt
    = RelativeLocation ReparentDepth
        -- ^ Insert a specific number of nodes /up/ the tree from where the
        -- 'Patch' or 'Tree' was generated.
    | InDocument
        -- ^ Insert as a direct child of the 'Web.Willow.IDL.Document.Document'
        -- itself.
    | InHtmlElement
        -- ^ Insert as a direct child of the top-level @html@ element in the
        -- document.
  deriving ( InsertAt -> InsertAt -> Bool
(InsertAt -> InsertAt -> Bool)
-> (InsertAt -> InsertAt -> Bool) -> Eq InsertAt
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: InsertAt -> InsertAt -> Bool
$c/= :: InsertAt -> InsertAt -> Bool
== :: InsertAt -> InsertAt -> Bool
$c== :: InsertAt -> InsertAt -> Bool
Eq, Int -> InsertAt -> ShowS
[InsertAt] -> ShowS
InsertAt -> String
(Int -> InsertAt -> ShowS)
-> (InsertAt -> String) -> ([InsertAt] -> ShowS) -> Show InsertAt
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [InsertAt] -> ShowS
$cshowList :: [InsertAt] -> ShowS
show :: InsertAt -> String
$cshow :: InsertAt -> String
showsPrec :: Int -> InsertAt -> ShowS
$cshowsPrec :: Int -> InsertAt -> ShowS
Show, ReadPrec [InsertAt]
ReadPrec InsertAt
Int -> ReadS InsertAt
ReadS [InsertAt]
(Int -> ReadS InsertAt)
-> ReadS [InsertAt]
-> ReadPrec InsertAt
-> ReadPrec [InsertAt]
-> Read InsertAt
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [InsertAt]
$creadListPrec :: ReadPrec [InsertAt]
readPrec :: ReadPrec InsertAt
$creadPrec :: ReadPrec InsertAt
readList :: ReadS [InsertAt]
$creadList :: ReadS [InsertAt]
readsPrec :: Int -> ReadS InsertAt
$creadsPrec :: Int -> ReadS InsertAt
Read )


-- | Type-level clarification for a count of how many levels a 'Patch' should
-- rise in the hierarchy without affecting the intervening nodes; a value of
-- @0@ indicates the current node.
type TargetNode = Word

-- | Type-level clarification for how many levels up the hierarchy the
-- associated object should be moved.  Note that a value of @0@ won't result in
-- any noticable change.
type ReparentDepth = Word


-- | The standard output of parsers used in the first tree construction stage.
-- Specifically, it contains the final state of the tokenization stage in
-- addition to the generated instructions, to enable the recursion loop to
-- detect the end of multi-token outputs and properly update the resume state.
data TreeOutput = TreeOutput
    { TreeOutput -> [Patch]
treePatches :: [Patch]
        -- ^ The instructions generated by the parser.
    , TreeOutput -> TokenizerOutputState
treeState :: TokenizerOutputState
        -- ^ The data required to resume tokenization immediately following the
        -- value, if possible.
    }
  deriving ( TreeOutput -> TreeOutput -> Bool
(TreeOutput -> TreeOutput -> Bool)
-> (TreeOutput -> TreeOutput -> Bool) -> Eq TreeOutput
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TreeOutput -> TreeOutput -> Bool
$c/= :: TreeOutput -> TreeOutput -> Bool
== :: TreeOutput -> TreeOutput -> Bool
$c== :: TreeOutput -> TreeOutput -> Bool
Eq, Int -> TreeOutput -> ShowS
[TreeOutput] -> ShowS
TreeOutput -> String
(Int -> TreeOutput -> ShowS)
-> (TreeOutput -> String)
-> ([TreeOutput] -> ShowS)
-> Show TreeOutput
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TreeOutput] -> ShowS
$cshowList :: [TreeOutput] -> ShowS
show :: TreeOutput -> String
$cshow :: TreeOutput -> String
showsPrec :: Int -> TreeOutput -> ShowS
$cshowsPrec :: Int -> TreeOutput -> ShowS
Show, ReadPrec [TreeOutput]
ReadPrec TreeOutput
Int -> ReadS TreeOutput
ReadS [TreeOutput]
(Int -> ReadS TreeOutput)
-> ReadS [TreeOutput]
-> ReadPrec TreeOutput
-> ReadPrec [TreeOutput]
-> Read TreeOutput
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TreeOutput]
$creadListPrec :: ReadPrec [TreeOutput]
readPrec :: ReadPrec TreeOutput
$creadPrec :: ReadPrec TreeOutput
readList :: ReadS [TreeOutput]
$creadList :: ReadS [TreeOutput]
readsPrec :: Int -> ReadS TreeOutput
$creadsPrec :: Int -> ReadS TreeOutput
Read )

-- | The unparsed portion of the binary stream, after building the
-- associated instruction set.
treeRemainder :: TreeOutput -> Maybe BS.ByteString
treeRemainder :: TreeOutput -> Maybe ByteString
treeRemainder = ((TokenizerState, ByteString) -> ByteString)
-> TokenizerOutputState -> Maybe ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (TokenizerState, ByteString) -> ByteString
forall a b. (a, b) -> b
snd (TokenizerOutputState -> Maybe ByteString)
-> (TreeOutput -> TokenizerOutputState)
-> TreeOutput
-> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TreeOutput -> TokenizerOutputState
treeState


-- $utility-state
-- Many functions producing 'TreeOutput' objects have two forms: one taking a
-- 'TreeInput' (typically passed via the enclosing
-- 'Web.Willow.Common.Parser.Switch.switch') alongside some data to pack, and
-- one with only the latter.  That 'TreeInput' is the means by which errors
-- from earlier stages are propagated forward, and by which the support
-- framework identifies breakpoints in the tokenizer.  Therefore, most case
-- logic must be written to:
-- 
-- * Emit the value returned by a function referencing the 'TreeInput',
--   unless that function is a 'Web.Willow.Common.Parser.push' to allow it to
--   be reconsumed.
-- * Only use the 'TreeInput' with a single function to avoid duplicating
--   errors.
-- 
-- In many cases, it may be possible to pass the 'TreeInput' to one of
-- several functions.  So long as the above rules are followed, it often
-- doesn't matter which function performs the wrapping; attaching it in a way
-- that makes logical or proximal sense could provide clearer error display
-- positioning.


-- | Emit a patchset from a tree construction parser, referencing the state
-- when the original token was produced.
-- 
-- This produces a stateful output, and should only be used where the token
-- would otherwise be discarded; use 'packTree_' elsewhere.
packTree :: TreeInput -> [Patch] -> TreeBuilder TreeOutput
packTree :: TreeInput -> [Patch] -> TreeBuilder TreeOutput
packTree TreeInput
t' [Patch]
ps = TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> TreeBuilder TreeOutput)
-> TreeOutput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ TreeOutput :: [Patch] -> TokenizerOutputState -> TreeOutput
TreeOutput
    { treePatches :: [Patch]
treePatches = [Patch]
ps
    , treeState :: TokenizerOutputState
treeState = TreeInput -> TokenizerOutputState
tokenState TreeInput
t'
    }

-- | Emit a patchset from a tree construction parser without any reference to
-- the original token.
-- 
-- This produces a stateless output, and should only be used after the token
-- has been 'Web.Willow.Common.Parser.push'ed for reconsumption; use 'packTree'
-- elsewhere.
packTree_ :: [Patch] -> TreeBuilder TreeOutput
packTree_ :: [Patch] -> TreeBuilder TreeOutput
packTree_ [Patch]
ps = TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> TreeBuilder TreeOutput)
-> TreeOutput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ TreeOutput :: [Patch] -> TokenizerOutputState -> TreeOutput
TreeOutput
    { treePatches :: [Patch]
treePatches = [Patch]
ps
    , treeState :: TokenizerOutputState
treeState = TokenizerOutputState
forall a. Maybe a
Nothing
    }


-- | Modify the collection of errors associated with a wrapped token.
mapTokenErrs :: ([ParseError] -> [ParseError]) -> TreeInput -> TreeInput
mapTokenErrs :: ([ParseError] -> [ParseError]) -> TreeInput -> TreeInput
mapTokenErrs [ParseError] -> [ParseError]
f TreeInput
t' = TreeInput
t'
    { tokenErrs :: [ParseError]
tokenErrs = [ParseError] -> [ParseError]
f ([ParseError] -> [ParseError]) -> [ParseError] -> [ParseError]
forall a b. (a -> b) -> a -> b
$ TreeInput -> [ParseError]
tokenErrs TreeInput
t'
    }

-- | Modify a wrapped token without affecting the associated data.
mapTokenOut :: (Token -> Token) -> TreeInput -> TreeInput
mapTokenOut :: (Token -> Token) -> TreeInput -> TreeInput
mapTokenOut Token -> Token
f TreeInput
t' = TreeInput
t'
    { tokenOut :: Token
tokenOut = Token -> Token
f (Token -> Token) -> Token -> Token
forall a b. (a -> b) -> a -> b
$ TreeInput -> Token
tokenOut TreeInput
t'
    }


-- | Emit a collection of errors from a tree construction parser, if and only
-- if that collection is non-empty.
-- 
-- This produces a stateful output, and should only be used where the token
-- would otherwise be discarded; use 'packTreeErrors_' elsewhere.
packTreeErrors :: [ParseError] -> TreeInput -> TreeBuilder TreeOutput
packTreeErrors :: [ParseError] -> TreeInput -> TreeBuilder TreeOutput
packTreeErrors [ParseError]
errs TreeInput
t' = do
    TreeOutput
ps <- [ParseError] -> TreeBuilder TreeOutput
packTreeErrors_ ([ParseError] -> TreeBuilder TreeOutput)
-> [ParseError] -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ [ParseError]
errs [ParseError] -> [ParseError] -> [ParseError]
forall a. [a] -> [a] -> [a]
++ TreeInput -> [ParseError]
tokenErrs TreeInput
t'
    TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> TreeBuilder TreeOutput)
-> TreeOutput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ TreeOutput
ps
        { treeState :: TokenizerOutputState
treeState = TreeInput -> TokenizerOutputState
tokenState TreeInput
t'
        }

-- | Emit a collection of errors from a tree construction parser, if and only
-- if that collection is non-empty.
-- 
-- This produces a stateless output, and should only be used after the token
-- has been 'Web.Willow.Common.Parser.push'ed for reconsumption; use
-- 'packTreeErrors' elsewhere, or 'consTreeError' and 'consTreeError_' if a
-- relevant set of patches already exists.
packTreeErrors_ :: [ParseError] -> TreeBuilder TreeOutput
packTreeErrors_ :: [ParseError] -> TreeBuilder TreeOutput
packTreeErrors_ [] = [Patch] -> TreeBuilder TreeOutput
packTree_ []
packTreeErrors_ [ParseError]
errs = [Patch] -> TreeBuilder TreeOutput
packTree_ [[ParseError] -> Patch
ErrorList [ParseError]
errs]

-- | Prepend an error to the first patch in the list, or add an 'ErrorList'
-- entry if it doesn't support them.
-- 
-- If this is the only patchset which may be generated, use 'packTreeErrors_'
-- rather than passing a null @['Patch']@.
consTreeError_ :: ParseError -> [Patch] -> [Patch]
consTreeError_ :: ParseError -> [Patch] -> [Patch]
consTreeError_ ParseError
err (ErrorList [ParseError]
errs:[Patch]
ps) =
    [ParseError] -> Patch
ErrorList (ParseError
err ParseError -> [ParseError] -> [ParseError]
forall a. a -> [a] -> [a]
: [ParseError]
errs) Patch -> [Patch] -> [Patch]
forall a. a -> [a] -> [a]
: [Patch]
ps
consTreeError_ ParseError
err (InsertCharacter [ParseError]
errs Char
c:[Patch]
ps) =
    [ParseError] -> Char -> Patch
InsertCharacter (ParseError
err ParseError -> [ParseError] -> [ParseError]
forall a. a -> [a] -> [a]
: [ParseError]
errs) Char
c Patch -> [Patch] -> [Patch]
forall a. a -> [a] -> [a]
: [Patch]
ps
consTreeError_ ParseError
err (InsertComment [ParseError]
errs InsertAt
loc Text
d:[Patch]
ps) =
    [ParseError] -> InsertAt -> Text -> Patch
InsertComment (ParseError
err ParseError -> [ParseError] -> [ParseError]
forall a. a -> [a] -> [a]
: [ParseError]
errs) InsertAt
loc Text
d Patch -> [Patch] -> [Patch]
forall a. a -> [a] -> [a]
: [Patch]
ps
consTreeError_ ParseError
err (InsertElement [ParseError]
errs ElementParams
d:[Patch]
ps) =
    [ParseError] -> ElementParams -> Patch
InsertElement (ParseError
err ParseError -> [ParseError] -> [ParseError]
forall a. a -> [a] -> [a]
: [ParseError]
errs) ElementParams
d Patch -> [Patch] -> [Patch]
forall a. a -> [a] -> [a]
: [Patch]
ps
consTreeError_ ParseError
err (InsertAndSetDocumentType [ParseError]
errs DocumentTypeParams
d:[Patch]
ps) =
    [ParseError] -> DocumentTypeParams -> Patch
InsertAndSetDocumentType (ParseError
err ParseError -> [ParseError] -> [ParseError]
forall a. a -> [a] -> [a]
: [ParseError]
errs) DocumentTypeParams
d Patch -> [Patch] -> [Patch]
forall a. a -> [a] -> [a]
: [Patch]
ps
consTreeError_ ParseError
err [Patch]
ps = [ParseError] -> Patch
ErrorList [ParseError
err] Patch -> [Patch] -> [Patch]
forall a. a -> [a] -> [a]
: [Patch]
ps

-- | Prepend an error to the wrapped patchset, adding an 'ErrorList' entry if
-- the first patch doesn't support them.
-- 
-- If this is the only patchset which may be generated, use 'packTreeErrors'
-- rather than passing a null @['Patch']@.
consTreeError :: ParseError -> TreeOutput -> TreeOutput
consTreeError :: ParseError -> TreeOutput -> TreeOutput
consTreeError ParseError
err TreeOutput
out = TreeOutput
out { treePatches :: [Patch]
treePatches = ParseError -> [Patch] -> [Patch]
consTreeError_ ParseError
err ([Patch] -> [Patch]) -> [Patch] -> [Patch]
forall a b. (a -> b) -> a -> b
$ TreeOutput -> [Patch]
treePatches TreeOutput
out }


-- | Prepend a plain patchset to the contents of a wrapped one.
(++|) :: [Patch] -> TreeOutput -> TreeOutput
[Patch]
ps ++| :: [Patch] -> TreeOutput -> TreeOutput
++| TreeOutput
out = TreeOutput
out { treePatches :: [Patch]
treePatches = [Patch]
ps [Patch] -> [Patch] -> [Patch]
forall a. [a] -> [a] -> [a]
++ TreeOutput -> [Patch]
treePatches TreeOutput
out }
infixr 4 ++|

-- | Append a plain patchset to the contents of a wrapped one.
(|++) :: TreeOutput -> [Patch] -> TreeOutput
TreeOutput
out |++ :: TreeOutput -> [Patch] -> TreeOutput
|++ [Patch]
ps = TreeOutput
out { treePatches :: [Patch]
treePatches = TreeOutput -> [Patch]
treePatches TreeOutput
out [Patch] -> [Patch] -> [Patch]
forall a. [a] -> [a] -> [a]
++ [Patch]
ps }
infixr 4 |++

-- | Concatenate the payloads of two wrapped patchsets, retaining the
-- associated state of the right one.
(|++|) :: TreeOutput -> TreeOutput -> TreeOutput
TreeOutput
ps |++| :: TreeOutput -> TreeOutput -> TreeOutput
|++| TreeOutput
ps' = TreeOutput
ps' { treePatches :: [Patch]
treePatches = TreeOutput -> [Patch]
treePatches TreeOutput
ps [Patch] -> [Patch] -> [Patch]
forall a. [a] -> [a] -> [a]
++ TreeOutput -> [Patch]
treePatches TreeOutput
ps' }
infixr 4 |++|


-- | Set the degree of backwards compatibility the document seems to be written
-- for.
setDocumentQuirks :: QuirksMode -> TreeBuilder [Patch]
setDocumentQuirks :: QuirksMode -> TreeBuilder [Patch]
setDocumentQuirks QuirksMode
quirks = do
    (TreeParserState -> TreeParserState)
-> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
N.S.modify ((TreeParserState -> TreeParserState)
 -> StateT TreeParserState (Parser [TreeInput]) ())
-> (TreeParserState -> TreeParserState)
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ \TreeParserState
state -> TreeParserState
state
        { quirksMode :: QuirksMode
quirksMode = QuirksMode
quirks
        }
    [Patch] -> TreeBuilder [Patch]
forall (m :: * -> *) a. Monad m => a -> m a
return [QuirksMode -> Patch
SetDocumentQuirks QuirksMode
quirks]


-- | Pass the stack of open elements into the given function in order to
-- determine how many elements should be closed, then close them.
clear :: ([ElementParams] -> Word) -> TreeBuilder [Patch]
clear :: ([ElementParams] -> Word) -> TreeBuilder [Patch]
clear [ElementParams] -> Word
f = do
    TreeParserState
state <- StateT TreeParserState (Parser [TreeInput]) TreeParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
    Word -> TreeBuilder [Patch]
clearCount (Word -> TreeBuilder [Patch])
-> ([(NodeIndex, ElementParams)] -> Word)
-> [(NodeIndex, ElementParams)]
-> TreeBuilder [Patch]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ElementParams] -> Word
f ([ElementParams] -> Word)
-> ([(NodeIndex, ElementParams)] -> [ElementParams])
-> [(NodeIndex, ElementParams)]
-> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((NodeIndex, ElementParams) -> ElementParams)
-> [(NodeIndex, ElementParams)] -> [ElementParams]
forall a b. (a -> b) -> [a] -> [b]
map (NodeIndex, ElementParams) -> ElementParams
forall a b. (a, b) -> b
snd ([(NodeIndex, ElementParams)] -> TreeBuilder [Patch])
-> [(NodeIndex, ElementParams)] -> TreeBuilder [Patch]
forall a b. (a -> b) -> a -> b
$ TreeParserState -> [(NodeIndex, ElementParams)]
openElements TreeParserState
state

-- | Pop a known number of nodes from the stack of open elements.
clearCount :: Word -> TreeBuilder [Patch]
clearCount :: Word -> TreeBuilder [Patch]
clearCount Word
0 = [Patch] -> TreeBuilder [Patch]
forall (m :: * -> *) a. Monad m => a -> m a
return []
clearCount Word
l = Word -> Word -> TreeBuilder [Patch]
closeAncestorNodes_ Word
0 Word
l


-- | Close all markup tags until the current node is one of the given elements
-- in the HTML namespace, close it.  For the inverse (closing all tags which
-- /are/ listed), see 'generateEndTags'.
-- 
-- 'tableContext', 'tableBodyContext', and 'tableRowContext' provide the
-- typical inputs.
clearToContext :: [ElementName] -> TreeBuilder [Patch]
clearToContext :: [Text] -> TreeBuilder [Patch]
clearToContext [Text]
ns = ([ElementParams] -> Word) -> TreeBuilder [Patch]
clear [ElementParams] -> Word
forall p. (Num p, Enum p) => [ElementParams] -> p
countToContext
  where countToContext :: [ElementParams] -> p
countToContext [] = p
0
        countToContext (ElementParams
d:[ElementParams]
ds) = if Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem (ElementParams -> Text
elementName ElementParams
d) [Text]
ns Bool -> Bool -> Bool
&& ElementParams -> Maybe Text
elementNamespace ElementParams
d Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Maybe Text
forall a. a -> Maybe a
Just Text
htmlNamespace
            then p
0
            else p -> p
forall a. Enum a => a -> a
succ (p -> p) -> p -> p
forall a b. (a -> b) -> a -> b
$ [ElementParams] -> p
countToContext [ElementParams]
ds

-- | __HTML:__
--      @[clear the stack back to a table context]
--      (https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-context)@
-- 
-- The list of elements to pass 'clearToContext' when closing all tags until
-- the start of the most recent table.
tableContext :: [ElementName]
tableContext :: [Text]
tableContext =
    [ Text
"table"
    , Text
"template"
    , Text
"html"
    ]

-- | __HTML:__
--      @[clear the stack back to a table body context]
--      (https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-body-context)@
-- 
-- The list of elements to pass 'clearToContext' when closing all tags until
-- the start of the most recent section of the current table.
tableBodyContext :: [ElementName]
tableBodyContext :: [Text]
tableBodyContext =
    [ Text
"tbody"
    , Text
"tfoot"
    , Text
"thead"
    , Text
"template"
    , Text
"html"
    ]

-- | __HTML:__
--      @[clear the stack back to a table row context]
--      (https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-row-context)@
-- 
-- The list of elements to pass 'clearToContext' when closing all tags until
-- the start of the most recent row of the current table.
tableRowContext :: [ElementName]
tableRowContext :: [Text]
tableRowContext =
    [ Text
"tr"
    , Text
"template"
    , Text
"html"
    ]


-- | "Pop elements from the stack of open elements until the given element in
-- the HTML namespace has been popped from the stack."
-- 
-- If multiple elements may indicate an endpoint, use 'closeElements' instead.
closeElement :: ElementName -> TreeBuilder [Patch]
closeElement :: Text -> TreeBuilder [Patch]
closeElement = [Text] -> TreeBuilder [Patch]
closeElements ([Text] -> TreeBuilder [Patch])
-> (Text -> [Text]) -> Text -> TreeBuilder [Patch]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [])

-- | "Pop elements from the stack of open elements until one of the given
-- elements in the HTML namespace has been popped from the stack."
-- 
-- If only a single element may indicate an endpoint, 'closeElement' could
-- provide a cleaner interface.
closeElements :: [ElementName] -> TreeBuilder [Patch]
closeElements :: [Text] -> TreeBuilder [Patch]
closeElements [Text]
names = ([ElementParams] -> Word) -> TreeBuilder [Patch]
clear [ElementParams] -> Word
forall p. (Num p, Enum p) => [ElementParams] -> p
countToElement
  where countToElement :: [ElementParams] -> p
countToElement [] = p
0
        countToElement (ElementParams
d:[ElementParams]
ds) = if Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem (ElementParams -> Text
elementName ElementParams
d) [Text]
names Bool -> Bool -> Bool
&& ElementParams -> Maybe Text
elementNamespace ElementParams
d Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Maybe Text
forall a. a -> Maybe a
Just Text
htmlNamespace
            then p
1
            else p -> p
forall a. Enum a => a -> a
succ (p -> p) -> p -> p
forall a b. (a -> b) -> a -> b
$ [ElementParams] -> p
countToElement [ElementParams]
ds

-- | "Pop the current element from the stack of open elements."
-- 
-- This produces a stateful output, and should only be used where the token
-- would otherwise be discarded; use 'closeCurrentNode_' elsewhere.
closeCurrentNode :: TreeInput -> TreeBuilder TreeOutput
closeCurrentNode :: TreeInput -> TreeBuilder TreeOutput
closeCurrentNode TreeInput
t' = do
    [Patch]
close <- TreeBuilder [Patch]
closeCurrentNode_
    TreeParserState
state <- StateT TreeParserState (Parser [TreeInput]) TreeParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
    TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> TreeBuilder TreeOutput)
-> TreeOutput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ TreeOutput :: [Patch] -> TokenizerOutputState -> TreeOutput
TreeOutput
        { treePatches :: [Patch]
treePatches = (ParseError -> [Patch] -> [Patch])
-> [Patch] -> [ParseError] -> [Patch]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ParseError -> [Patch] -> [Patch]
consTreeError_ [Patch]
close ([ParseError] -> [Patch]) -> [ParseError] -> [Patch]
forall a b. (a -> b) -> a -> b
$ TreeInput -> [ParseError]
tokenErrs TreeInput
t'
        , treeState :: TokenizerOutputState
treeState = TreeInput -> TokenizerOutputState
tokenState (TreeInput -> TokenizerOutputState)
-> ((TokenParserState -> TokenParserState) -> TreeInput)
-> (TokenParserState -> TokenParserState)
-> TokenizerOutputState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TreeInput -> (TokenParserState -> TokenParserState) -> TreeInput
mapTokenState TreeInput
t' ((TokenParserState -> TokenParserState) -> TokenizerOutputState)
-> (TokenParserState -> TokenParserState) -> TokenizerOutputState
forall a b. (a -> b) -> a -> b
$ TreeParserState -> TokenParserState -> TokenParserState
resetNamespace TreeParserState
state
        }
  where resetNamespace :: TreeParserState -> TokenParserState -> TokenParserState
resetNamespace TreeParserState
state TokenParserState
tokState = TokenParserState
tokState
            { currentNodeNamespace :: Maybe Text
currentNodeNamespace = [(NodeIndex, ElementParams)] -> Maybe (NodeIndex, ElementParams)
forall a. [a] -> Maybe a
Y.listToMaybe (TreeParserState -> [(NodeIndex, ElementParams)]
openElements TreeParserState
state) Maybe (NodeIndex, ElementParams)
-> ((NodeIndex, ElementParams) -> Maybe Text) -> Maybe Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ElementParams -> Maybe Text
elementNamespace (ElementParams -> Maybe Text)
-> ((NodeIndex, ElementParams) -> ElementParams)
-> (NodeIndex, ElementParams)
-> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (NodeIndex, ElementParams) -> ElementParams
forall a b. (a, b) -> b
snd
            }

-- | "Pop the current element from the stack of open elements."
-- 
-- This produces a stateless output, and should only be used where the token is
-- handled in another manner; use 'closeCurrentNode' elsewhere.  This has
-- identical behaviour to @'closeAncestorNode_' 0@.
closeCurrentNode_ :: TreeBuilder [Patch]
closeCurrentNode_ :: TreeBuilder [Patch]
closeCurrentNode_ = Word -> TreeBuilder [Patch]
closeAncestorNode_ Word
0

-- | As 'closeCurrentNode_', but the closed node and its descendants are not
-- retained in the document tree.
dropCurrentNode :: TreeBuilder [Patch]
dropCurrentNode :: TreeBuilder [Patch]
dropCurrentNode = TreeBuilder [Patch]
closeCurrentNode_ TreeBuilder [Patch] -> [Patch] -> TreeBuilder [Patch]
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> [Patch
DropCurrentNode]

-- | Partially close the current node, such that 'insertElement' (and related
-- element instructions) will still be inserted as children, but /every other/
-- type of node will instead be inserted as siblings.  This doesn't actually
-- change the internal state; a following call to 'closeCurrentNode' or similar
-- is still required.
softCloseCurrentNode_ :: [Patch]
softCloseCurrentNode_ :: [Patch]
softCloseCurrentNode_ = [Patch
SoftCloseCurrentNode]


-- | Remove the node the specified number of ancestors up the tree from the
-- stack of open elements.  See 'closeCurrentNode_' for the special case where
-- the argument is @0@.
closeAncestorNode_ :: TargetNode -> TreeBuilder [Patch]
closeAncestorNode_ :: Word -> TreeBuilder [Patch]
closeAncestorNode_ = (Word -> Word -> TreeBuilder [Patch])
-> Word -> Word -> TreeBuilder [Patch]
forall a b c. (a -> b -> c) -> b -> a -> c
flip Word -> Word -> TreeBuilder [Patch]
closeAncestorNodes_ Word
1

-- | Remove several nodes the specified number of ancestors up the tree from
-- the stack of open elements.  See 'closeAncestorNode_' if only a single node
-- needs to be closed.
closeAncestorNodes_ :: TargetNode -> ReparentDepth -> TreeBuilder [Patch]
closeAncestorNodes_ :: Word -> Word -> TreeBuilder [Patch]
closeAncestorNodes_ Word
l Word
d = do
    TreeParserState
state <- StateT TreeParserState (Parser [TreeInput]) TreeParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
    let ([(NodeIndex, ElementParams)]
es1, [(NodeIndex, ElementParams)]
es2) = Int
-> [(NodeIndex, ElementParams)]
-> ([(NodeIndex, ElementParams)], [(NodeIndex, ElementParams)])
forall a. Int -> [a] -> ([a], [a])
splitAt (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
l) ([(NodeIndex, ElementParams)]
 -> ([(NodeIndex, ElementParams)], [(NodeIndex, ElementParams)]))
-> [(NodeIndex, ElementParams)]
-> ([(NodeIndex, ElementParams)], [(NodeIndex, ElementParams)])
forall a b. (a -> b) -> a -> b
$ TreeParserState -> [(NodeIndex, ElementParams)]
openElements TreeParserState
state
        d' :: Int
d' = Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
d
    TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
N.S.put (TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ())
-> TreeParserState
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ TreeParserState
state
        { openElements :: [(NodeIndex, ElementParams)]
openElements = [(NodeIndex, ElementParams)]
es1 [(NodeIndex, ElementParams)]
-> [(NodeIndex, ElementParams)] -> [(NodeIndex, ElementParams)]
forall a. [a] -> [a] -> [a]
++ Int -> [(NodeIndex, ElementParams)] -> [(NodeIndex, ElementParams)]
forall a. Int -> [a] -> [a]
drop Int
d' [(NodeIndex, ElementParams)]
es2
        }
    [Patch] -> TreeBuilder [Patch]
forall (m :: * -> *) a. Monad m => a -> m a
return [IntMap Word -> Patch
CloseNodes (IntMap Word -> Patch) -> (Int -> IntMap Word) -> Int -> Patch
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word -> IntMap Word
forall a. Int -> a -> IntMap a
M.I.singleton (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
l) (Word -> IntMap Word) -> (Int -> Word) -> Int -> IntMap Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word) -> (Int -> Int) -> Int -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
d' (Int -> Patch) -> Int -> Patch
forall a b. (a -> b) -> a -> b
$ [(NodeIndex, ElementParams)] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(NodeIndex, ElementParams)]
es2]


-- | Close all markup tags up to and including the most recent @\<p\>@ element,
-- throwing an 'UnexpectedElementWithImpliedEndTag' if one of them does not
-- typically allow an implied end tag.  If the error is not required,
-- 'closeElement' provides similar behaviour.
closePElement :: TreeBuilder [Patch]
closePElement :: TreeBuilder [Patch]
closePElement = do
    [Patch]
generate <- [Text] -> TreeBuilder [Patch]
generateEndTags ([Text] -> TreeBuilder [Patch]) -> [Text] -> TreeBuilder [Patch]
forall a b. (a -> b) -> a -> b
$ Text -> [Text] -> [Text]
forall a. Eq a => a -> [a] -> [a]
L.delete Text
"p" [Text]
impliedEndTags
    Maybe ElementParams
current <- TreeBuilder (Maybe ElementParams)
currentNode
    let errF :: [Patch] -> [Patch]
errF = if Bool -> (ElementParams -> Bool) -> Maybe ElementParams -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
True (Text -> ElementParams -> Bool
nodeIsElement Text
"p") Maybe ElementParams
current
            then [Patch] -> [Patch]
forall a. a -> a
id
            else ParseError -> [Patch] -> [Patch]
consTreeError_ ParseError
UnexpectedElementWithImpliedEndTag
    [Patch]
p <- Text -> TreeBuilder [Patch]
closeElement Text
"p"
    [Patch] -> TreeBuilder [Patch]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Patch] -> TreeBuilder [Patch]) -> [Patch] -> TreeBuilder [Patch]
forall a b. (a -> b) -> a -> b
$ [Patch]
generate [Patch] -> [Patch] -> [Patch]
forall a. [a] -> [a] -> [a]
++ [Patch] -> [Patch]
errF [Patch]
p

-- | Close all markup tags in a given list, until reaching an element which is
-- not in that list.  For the inverse (closing all tags /except/ what's
-- listed), see 'clearToContext'.
-- 
-- 'impliedEndTags' and 'thoroughlyImpliedEndTags' provide the typical inputs.
generateEndTags :: [ElementName] -> TreeBuilder [Patch]
generateEndTags :: [Text] -> TreeBuilder [Patch]
generateEndTags [Text]
tags = ([ElementParams] -> Word) -> TreeBuilder [Patch]
clear [ElementParams] -> Word
forall p. (Num p, Enum p) => [ElementParams] -> p
countImpliable
  where countImpliable :: [ElementParams] -> p
countImpliable [] = p
0
        countImpliable (ElementParams
d:[ElementParams]
ds) = if Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem (ElementParams -> Text
elementName ElementParams
d) [Text]
tags
            then p -> p
forall a. Enum a => a -> a
succ (p -> p) -> p -> p
forall a b. (a -> b) -> a -> b
$ [ElementParams] -> p
countImpliable [ElementParams]
ds
            else p
0

-- | __HTML:__
--      @[generate implied end tags]
--      (https://html.spec.whatwg.org/multipage/parsing.html#generate-implied-end-tags)@
-- 
-- The list of elements to pass 'generateEndTags' when closing all markup tags
-- which typically allow their end tag to be omitted.
impliedEndTags :: [ElementName]
impliedEndTags :: [Text]
impliedEndTags =
    [ Text
"dd"
    , Text
"dt"
    , Text
"li"
    , Text
"optgroup"
    , Text
"option"
    , Text
"p"
    , Text
"rb"
    , Text
"rp"
    , Text
"rt"
    , Text
"rtc"
    ]

-- | __HTML:__
--      @[generate all implied end tags thoroughly]
--      (https://html.spec.whatwg.org/multipage/parsing.html#generate-all-implied-end-tags-thoroughly)@
-- 
-- The list of elements to pass 'generateEndTags' when closing all markup tags,
-- including ones which may not usually allow their end tag to be omitted.
thoroughlyImpliedEndTags :: [ElementName]
thoroughlyImpliedEndTags :: [Text]
thoroughlyImpliedEndTags =
    [ Text
"caption"
    , Text
"colgroup"
    , Text
"tbody"
    , Text
"td"
    , Text
"tfoot"
    , Text
"th"
    , Text
"thead"
    , Text
"tr"
    ] [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [Text]
impliedEndTags


-- | __HTML:__
--      @[create an element for the token]
--      (https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token)@
-- 
-- Reserve a place in the tree for a given node, but do not yet add it.  Note
-- that this does not otherwise affect the parser state, so any modification of
-- open elements or updating of the 'currentNodeNamespace', for example, needs
-- to be performed manually.  For that reason, 'insertElement_' is almost
-- always more desirable.
createElement :: ElementParams -> TreeBuilder (NodeIndex, ElementParams)
createElement :: ElementParams -> TreeBuilder (NodeIndex, ElementParams)
createElement ElementParams
d = do
    TreeParserState
state <- StateT TreeParserState (Parser [TreeInput]) TreeParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
    let index :: NodeIndex
index = TreeParserState -> NodeIndex
elementIndex TreeParserState
state
    TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
N.S.put (TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ())
-> TreeParserState
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ TreeParserState
state
        { elementIndex :: NodeIndex
elementIndex = NodeIndex -> NodeIndex
forall a. Enum a => a -> a
succ NodeIndex
index
        }
    (NodeIndex, ElementParams)
-> TreeBuilder (NodeIndex, ElementParams)
forall (m :: * -> *) a. Monad m => a -> m a
return (NodeIndex
index, ElementParams
d)


-- | __HTML:__
--      @[insert a character]
--      (https://html.spec.whatwg.org/multipage/parsing.html#insert-a-character)@
--      steps 1-3
-- 
-- If the wrapped token is a 'Character', add it to the final tree; fails if
-- it's not.  Note that the concatenation of character into 'Text' nodes occurs
-- in the patch folding logic instead.
insertCharacter :: TreeInput -> TreeBuilder TreeOutput
insertCharacter :: TreeInput -> TreeBuilder TreeOutput
insertCharacter TreeInput
t' = case TreeInput -> Token
tokenOut TreeInput
t' of
    Character Char
c -> TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> TreeBuilder TreeOutput)
-> TreeOutput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ TreeOutput :: [Patch] -> TokenizerOutputState -> TreeOutput
TreeOutput
        { treePatches :: [Patch]
treePatches = [[ParseError] -> Char -> Patch
InsertCharacter (TreeInput -> [ParseError]
tokenErrs TreeInput
t') Char
c]
        , treeState :: TokenizerOutputState
treeState = TreeInput -> TokenizerOutputState
tokenState TreeInput
t'
        }
    Token
_ -> TreeBuilder TreeOutput
forall (f :: * -> *) a. Alternative f => f a
A.empty

-- | __HTML:__
--      @[insert a comment]
--      (https://html.spec.whatwg.org/multipage/parsing.html#insert-a-comment)@
--      with no explicit insertion postion
-- 
-- If the wrapped token is a 'Tokenize.Comment', add it to the final tree at the current
-- position (@'RelativeLocation' 0@); fails if it's not.
insertComment :: TreeInput -> TreeBuilder TreeOutput
insertComment :: TreeInput -> TreeBuilder TreeOutput
insertComment = InsertAt -> TreeInput -> TreeBuilder TreeOutput
insertComment' (InsertAt -> TreeInput -> TreeBuilder TreeOutput)
-> InsertAt -> TreeInput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ Word -> InsertAt
RelativeLocation Word
0

-- | __HTML:__
--      @[insert a comment]
--      (https://html.spec.whatwg.org/multipage/parsing.html#insert-a-comment)@
-- 
-- If the wrapped token is a 'Tokenize.Comment', add it to the final tree at
-- the position specified; fails if it's not.
insertComment' :: InsertAt -> TreeInput -> TreeBuilder TreeOutput
insertComment' :: InsertAt -> TreeInput -> TreeBuilder TreeOutput
insertComment' InsertAt
at TreeInput
t' = case TreeInput -> Token
tokenOut TreeInput
t' of
    Tokenize.Comment Text
c -> TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> TreeBuilder TreeOutput)
-> TreeOutput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ TreeOutput :: [Patch] -> TokenizerOutputState -> TreeOutput
TreeOutput
        { treePatches :: [Patch]
treePatches = [[ParseError] -> InsertAt -> Text -> Patch
InsertComment (TreeInput -> [ParseError]
tokenErrs TreeInput
t') InsertAt
at Text
c]
        , treeState :: TokenizerOutputState
treeState = TreeInput -> TokenizerOutputState
tokenState TreeInput
t'
        }
    Token
_ -> TreeBuilder TreeOutput
forall (f :: * -> *) a. Alternative f => f a
A.empty

-- | If the wrapped token is a 'Tokenize.Comment', add it to the final tree;
-- fails if it's not.
insertDoctype :: TreeInput -> TreeBuilder TreeOutput
insertDoctype :: TreeInput -> TreeBuilder TreeOutput
insertDoctype TreeInput
t' = case TreeInput -> Token
tokenOut TreeInput
t' of
    Doctype DoctypeParams
d ->
        let system :: Maybe Text
system = DoctypeParams -> Maybe Text
doctypeSystemId DoctypeParams
d
            legacy :: Bool
legacy
                | DoctypeParams -> Maybe Text
doctypeName DoctypeParams
d Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"html" = Bool
True
                | Maybe Text -> Bool
forall a. Maybe a -> Bool
Y.isJust (Maybe Text -> Bool) -> Maybe Text -> Bool
forall a b. (a -> b) -> a -> b
$ DoctypeParams -> Maybe Text
doctypePublicId DoctypeParams
d = Bool
True
                | Maybe Text -> Bool
forall a. Maybe a -> Bool
Y.isJust Maybe Text
system Bool -> Bool -> Bool
&& Maybe Text
system Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"about:legacy-compat" = Bool
True
                | Bool
otherwise = Bool
False
            errs' :: [ParseError]
errs'
                | Bool
legacy = ParseError
LegacyDoctype ParseError -> [ParseError] -> [ParseError]
forall a. a -> [a] -> [a]
: TreeInput -> [ParseError]
tokenErrs TreeInput
t'
                | Bool
otherwise = TreeInput -> [ParseError]
tokenErrs TreeInput
t'
        in  TreeInput -> [Patch] -> TreeBuilder TreeOutput
packTree TreeInput
t' [[ParseError] -> DocumentTypeParams -> Patch
InsertAndSetDocumentType [ParseError]
errs' (DocumentTypeParams -> Patch) -> DocumentTypeParams -> Patch
forall a b. (a -> b) -> a -> b
$ TreeInput -> DocumentTypeParams
tokenDocumentType TreeInput
t']
    Token
_ -> TreeBuilder TreeOutput
forall (f :: * -> *) a. Alternative f => f a
A.empty

-- | __HTML:__
--      @[insert an HTML element]
--      (https://html.spec.whatwg.org/multipage/parsing.html#insert-an-html-element)@
-- 
-- If the wrapped token is a 'StartTag', add it to the final tree as a markup
-- element in the HTML namespace; fails if it's not.
-- 
-- For tag data generated /a priori/, use 'insertElement_' instead.
insertElement :: TreeInput -> TreeBuilder TreeOutput
insertElement :: TreeInput -> TreeBuilder TreeOutput
insertElement = Text -> TreeInput -> TreeBuilder TreeOutput
insertForeignElement Text
htmlNamespace

-- | Add a markup element described by the input record to the tree in the HTML
-- namespace.
-- 
-- If the tag data was obtained from the tokenizer, use 'insertElement' instead.
insertElement_ :: TagParams -> TreeBuilder [Patch]
insertElement_ :: TagParams -> TreeBuilder [Patch]
insertElement_ TagParams
d = TreeOutput -> [Patch]
treePatches (TreeOutput -> [Patch])
-> TreeBuilder TreeOutput -> TreeBuilder [Patch]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TreeInput -> TreeBuilder TreeOutput
insertElement ([ParseError] -> Token -> TreeInput
dummyToken [] (Token -> TreeInput) -> Token -> TreeInput
forall a b. (a -> b) -> a -> b
$ TagParams -> Token
StartTag TagParams
d)

-- | __HTML:__
--      @[insert a foreign element]
--      (https://html.spec.whatwg.org/multipage/parsing.html#insert-a-foreign-element)@
-- 
-- If the wrapped token is a 'StartTag', add it to the final tree as a markup
-- element in the specified namespace; fails if it's not.
insertForeignElement :: Namespace -> TreeInput -> TreeBuilder TreeOutput
insertForeignElement :: Text -> TreeInput -> TreeBuilder TreeOutput
insertForeignElement Text
ns TreeInput
t' = case TreeInput -> Token
tokenOut TreeInput
t' of
    StartTag TagParams
d -> do
        let d' :: ElementParams
d' = Maybe Text -> TagParams -> ElementParams
packNodeData (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
ns) TagParams
d
            errs' :: [ParseError]
errs'
                | TagParams -> Bool
tagIsSelfClosing TagParams
d = ParseError
NonVoidHtmlElementStartTagWithTrailingSolidus ParseError -> [ParseError] -> [ParseError]
forall a. a -> [a] -> [a]
: TreeInput -> [ParseError]
tokenErrs TreeInput
t'
                | Bool
otherwise = TreeInput -> [ParseError]
tokenErrs TreeInput
t'
        (NodeIndex, ElementParams)
e <- ElementParams -> TreeBuilder (NodeIndex, ElementParams)
createElement ElementParams
d'
        (TreeParserState -> TreeParserState)
-> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
N.S.modify ((TreeParserState -> TreeParserState)
 -> StateT TreeParserState (Parser [TreeInput]) ())
-> (TreeParserState -> TreeParserState)
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ \TreeParserState
state -> TreeParserState
state
            { openElements :: [(NodeIndex, ElementParams)]
openElements = (NodeIndex, ElementParams)
e (NodeIndex, ElementParams)
-> [(NodeIndex, ElementParams)] -> [(NodeIndex, ElementParams)]
forall a. a -> [a] -> [a]
: TreeParserState -> [(NodeIndex, ElementParams)]
openElements TreeParserState
state
            }
        TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> TreeBuilder TreeOutput)
-> TreeOutput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ TreeOutput :: [Patch] -> TokenizerOutputState -> TreeOutput
TreeOutput
            { treePatches :: [Patch]
treePatches = [[ParseError] -> ElementParams -> Patch
InsertElement [ParseError]
errs' (ElementParams -> Patch) -> ElementParams -> Patch
forall a b. (a -> b) -> a -> b
$ ElementParams -> ElementParams
adjustAttributes ElementParams
d']
            , treeState :: TokenizerOutputState
treeState = TreeInput -> TokenizerOutputState
tokenState (TreeInput -> TokenizerOutputState)
-> TreeInput -> TokenizerOutputState
forall a b. (a -> b) -> a -> b
$ TreeInput -> (TokenParserState -> TokenParserState) -> TreeInput
mapTokenState TreeInput
t' TokenParserState -> TokenParserState
setNamespace
            }
    Token
_ -> TreeBuilder TreeOutput
forall (f :: * -> *) a. Alternative f => f a
A.empty
  where adjustAttributes :: ElementParams -> ElementParams
adjustAttributes
            | Text
ns Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
htmlNamespace = ElementParams -> ElementParams
forall a. a -> a
id
            | Bool
otherwise = ElementParams -> ElementParams
adjustForeignAttributes
        setNamespace :: TokenParserState -> TokenParserState
setNamespace TokenParserState
state = TokenParserState
state
            { currentNodeNamespace :: Maybe Text
currentNodeNamespace = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
ns
            }

-- | If the wrapped token is a 'StartTag', add it to the final tree as a markup
-- element in the HTML namespace, and then immediately close it; fails if it's
-- not.
-- 
-- For tag data generated /a priori/, use 'insertNullElement_' instead.
insertNullElement :: TreeInput -> TreeBuilder TreeOutput
insertNullElement :: TreeInput -> TreeBuilder TreeOutput
insertNullElement = Text -> TreeInput -> TreeBuilder TreeOutput
insertForeignNullElement Text
htmlNamespace

-- | Add a markup element described by the input record to the tree in the HTML
-- namespace, and then immediately close it.
-- 
-- If the tag data was obtained from the tokenizer, use 'insertNullElement'
-- instead.
insertNullElement_ :: TagParams -> TreeBuilder [Patch]
insertNullElement_ :: TagParams -> TreeBuilder [Patch]
insertNullElement_ TagParams
d = TreeOutput -> [Patch]
treePatches (TreeOutput -> [Patch])
-> TreeBuilder TreeOutput -> TreeBuilder [Patch]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TreeInput -> TreeBuilder TreeOutput
insertNullElement ([ParseError] -> Token -> TreeInput
dummyToken [] (Token -> TreeInput) -> Token -> TreeInput
forall a b. (a -> b) -> a -> b
$ TagParams -> Token
StartTag TagParams
d)

-- | If the wrapped token is a 'StartTag', add it to the final tree as a markup
-- element in the specified namespace, and then immediately close it; fails if
-- it's not.
insertForeignNullElement :: Namespace -> TreeInput -> TreeBuilder TreeOutput
insertForeignNullElement :: Text -> TreeInput -> TreeBuilder TreeOutput
insertForeignNullElement Text
ns TreeInput
t' = case TreeInput -> Token
tokenOut TreeInput
t' of
    StartTag TagParams
d -> TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> TreeBuilder TreeOutput)
-> TreeOutput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ TreeOutput :: [Patch] -> TokenizerOutputState -> TreeOutput
TreeOutput
        { treePatches :: [Patch]
treePatches =
            [ [ParseError] -> ElementParams -> Patch
InsertElement (TreeInput -> [ParseError]
tokenErrs TreeInput
t') (ElementParams -> Patch)
-> (ElementParams -> ElementParams) -> ElementParams -> Patch
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ElementParams -> ElementParams
adjustAttributes (ElementParams -> Patch) -> ElementParams -> Patch
forall a b. (a -> b) -> a -> b
$ Maybe Text -> TagParams -> ElementParams
packNodeData (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
ns) TagParams
d
            , IntMap Word -> Patch
CloseNodes (IntMap Word -> Patch) -> IntMap Word -> Patch
forall a b. (a -> b) -> a -> b
$ Int -> Word -> IntMap Word
forall a. Int -> a -> IntMap a
M.I.singleton Int
0 Word
1
            ]
        , treeState :: TokenizerOutputState
treeState = TreeInput -> TokenizerOutputState
tokenState TreeInput
t'
        }
    Token
_ -> TreeBuilder TreeOutput
forall (f :: * -> *) a. Alternative f => f a
A.empty
  where adjustAttributes :: ElementParams -> ElementParams
adjustAttributes
            | Text
ns Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
htmlNamespace = ElementParams -> ElementParams
forall a. a -> a
id
            | Bool
otherwise = ElementParams -> ElementParams
adjustForeignAttributes


-- | If the wrapped token is a 'StartTag', add it to the final tree as a markup
-- element in the HTML namespace and set it as the target of the head element
-- pointer; fails if it's not.
-- 
-- For tag data generated /a priori/, use 'insertHeadElement_' instead.
insertHeadElement :: TreeInput -> TreeBuilder TreeOutput
insertHeadElement :: TreeInput -> TreeBuilder TreeOutput
insertHeadElement TreeInput
t' = do
    (TreeParserState -> TreeParserState)
-> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
N.S.modify ((TreeParserState -> TreeParserState)
 -> StateT TreeParserState (Parser [TreeInput]) ())
-> (TreeParserState -> TreeParserState)
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ \TreeParserState
state -> TreeParserState
state
        { headElementPointer :: Maybe NodeIndex
headElementPointer = NodeIndex -> Maybe NodeIndex
forall a. a -> Maybe a
Just (NodeIndex -> Maybe NodeIndex) -> NodeIndex -> Maybe NodeIndex
forall a b. (a -> b) -> a -> b
$ TreeParserState -> NodeIndex
elementIndex TreeParserState
state
        }
    TreeInput -> TreeBuilder TreeOutput
insertElement TreeInput
t'

-- | Add a markup element described by the input record to the tree in the HTML
-- namespace and set it as the target of the head element pointer.
-- 
-- If the tag data was obtained from the tokenizer, use 'insertElement' instead.
insertHeadElement_ :: TagParams -> TreeBuilder [Patch]
insertHeadElement_ :: TagParams -> TreeBuilder [Patch]
insertHeadElement_ TagParams
d = do
    (TreeParserState -> TreeParserState)
-> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
N.S.modify ((TreeParserState -> TreeParserState)
 -> StateT TreeParserState (Parser [TreeInput]) ())
-> (TreeParserState -> TreeParserState)
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ \TreeParserState
state -> TreeParserState
state
        { headElementPointer :: Maybe NodeIndex
headElementPointer = NodeIndex -> Maybe NodeIndex
forall a. a -> Maybe a
Just (NodeIndex -> Maybe NodeIndex) -> NodeIndex -> Maybe NodeIndex
forall a b. (a -> b) -> a -> b
$ TreeParserState -> NodeIndex
elementIndex TreeParserState
state
        }
    TagParams -> TreeBuilder [Patch]
insertElement_ TagParams
d


-- | Add an extra point of metadata to the indicated markup element, if that
-- element doesn't already have an attribute with that name.
addAttribute :: InsertAt -> NodeIndex -> BasicAttribute -> TreeBuilder [Patch]
addAttribute :: InsertAt -> NodeIndex -> BasicAttribute -> TreeBuilder [Patch]
addAttribute InsertAt
at NodeIndex
i (Text
name, Text
value) = do
    TreeParserState
state <- StateT TreeParserState (Parser [TreeInput]) TreeParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
    let ([(NodeIndex, ElementParams)]
es1, [(NodeIndex, ElementParams)]
es2) = ((NodeIndex, ElementParams) -> Bool)
-> [(NodeIndex, ElementParams)]
-> ([(NodeIndex, ElementParams)], [(NodeIndex, ElementParams)])
forall a. (a -> Bool) -> [a] -> ([a], [a])
L.break (NodeIndex -> NodeIndex -> Bool
forall a. Eq a => a -> a -> Bool
(==) NodeIndex
i (NodeIndex -> Bool)
-> ((NodeIndex, ElementParams) -> NodeIndex)
-> (NodeIndex, ElementParams)
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (NodeIndex, ElementParams) -> NodeIndex
forall a b. (a, b) -> a
fst) ([(NodeIndex, ElementParams)]
 -> ([(NodeIndex, ElementParams)], [(NodeIndex, ElementParams)]))
-> [(NodeIndex, ElementParams)]
-> ([(NodeIndex, ElementParams)], [(NodeIndex, ElementParams)])
forall a b. (a -> b) -> a -> b
$ TreeParserState -> [(NodeIndex, ElementParams)]
openElements TreeParserState
state
    case [(NodeIndex, ElementParams)]
es2 of
        ((NodeIndex
_, ElementParams
e):[(NodeIndex, ElementParams)]
es') | Bool -> Bool
not (Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Text
name ([Text] -> Bool)
-> (AttributeMap -> [Text]) -> AttributeMap -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (AttributeParams -> Text) -> [AttributeParams] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map AttributeParams -> Text
attrName ([AttributeParams] -> [Text])
-> (AttributeMap -> [AttributeParams]) -> AttributeMap -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. AttributeMap -> [AttributeParams]
toAttrList (AttributeMap -> Bool) -> AttributeMap -> Bool
forall a b. (a -> b) -> a -> b
$ ElementParams -> AttributeMap
elementAttributes ElementParams
e) -> do
            TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
N.S.put (TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ())
-> TreeParserState
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ TreeParserState
state
                { openElements :: [(NodeIndex, ElementParams)]
openElements = [(NodeIndex, ElementParams)]
es1 [(NodeIndex, ElementParams)]
-> [(NodeIndex, ElementParams)] -> [(NodeIndex, ElementParams)]
forall a. [a] -> [a] -> [a]
++ (NodeIndex
i, ElementParams -> ElementParams
addAttributeData ElementParams
e) (NodeIndex, ElementParams)
-> [(NodeIndex, ElementParams)] -> [(NodeIndex, ElementParams)]
forall a. a -> [a] -> [a]
: [(NodeIndex, ElementParams)]
es'
                }
            [Patch] -> TreeBuilder [Patch]
forall (m :: * -> *) a. Monad m => a -> m a
return [InsertAt -> AttributeParams -> Patch
AddAttribute InsertAt
at AttributeParams
attr]
        [(NodeIndex, ElementParams)]
_ -> [Patch] -> TreeBuilder [Patch]
forall (m :: * -> *) a. Monad m => a -> m a
return []
  where addAttributeData :: ElementParams -> ElementParams
addAttributeData ElementParams
d = ElementParams
d
            { elementAttributes :: AttributeMap
elementAttributes = AttributeParams -> AttributeMap -> AttributeMap
insertAttribute AttributeParams
attr (AttributeMap -> AttributeMap) -> AttributeMap -> AttributeMap
forall a b. (a -> b) -> a -> b
$ ElementParams -> AttributeMap
elementAttributes ElementParams
d
            }
        attr :: AttributeParams
attr = AttributeParams
emptyAttributeParams
            { attrName :: Text
attrName = Text
name
            , attrValue :: Text
attrValue = Text
value
            }


-- | __HTML:__
--      @[push onto the list of active formatting elements]
--      (https://html.spec.whatwg.org/multipage/parsing.html#push-onto-the-list-of-active-formatting-elements)@
-- 
-- Add a markup element described by the input record to the tree in the HTML
-- namespace.  The element is also added to the set of elements which are
-- recreated on overlapping markup spans via 'reconstructFormattingElements'.
insertFormattingElement :: TreeInput -> TreeBuilder TreeOutput
insertFormattingElement :: TreeInput -> TreeBuilder TreeOutput
insertFormattingElement TreeInput
t' = do
    TreeOutput
insert <- TreeInput -> TreeBuilder TreeOutput
insertElement TreeInput
t'
    TreeParserState
state <- StateT TreeParserState (Parser [TreeInput]) TreeParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
    case [(NodeIndex, ElementParams)] -> Maybe (NodeIndex, ElementParams)
forall a. [a] -> Maybe a
Y.listToMaybe ([(NodeIndex, ElementParams)] -> Maybe (NodeIndex, ElementParams))
-> [(NodeIndex, ElementParams)] -> Maybe (NodeIndex, ElementParams)
forall a b. (a -> b) -> a -> b
$ TreeParserState -> [(NodeIndex, ElementParams)]
openElements TreeParserState
state of
        Just (NodeIndex, ElementParams)
e -> case TreeInput -> Token
tokenOut TreeInput
t' of
            StartTag TagParams
d -> TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
N.S.put (TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ())
-> TreeParserState
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ TreeParserState
state
                { activeFormattingElements :: [[(NodeIndex, TagParams)]]
activeFormattingElements =
                    (NodeIndex, TagParams)
-> [[(NodeIndex, TagParams)]] -> [[(NodeIndex, TagParams)]]
forall a.
(a, TagParams) -> [[(a, TagParams)]] -> [[(a, TagParams)]]
pushFormattingElement ((NodeIndex, ElementParams) -> NodeIndex
forall a b. (a, b) -> a
fst (NodeIndex, ElementParams)
e, TagParams
d) ([[(NodeIndex, TagParams)]] -> [[(NodeIndex, TagParams)]])
-> [[(NodeIndex, TagParams)]] -> [[(NodeIndex, TagParams)]]
forall a b. (a -> b) -> a -> b
$ TreeParserState -> [[(NodeIndex, TagParams)]]
activeFormattingElements TreeParserState
state
                }
            Token
_ -> () -> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
        Maybe (NodeIndex, ElementParams)
Nothing -> () -> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return TreeOutput
insert
  where pushFormattingElement :: (a, TagParams) -> [[(a, TagParams)]] -> [[(a, TagParams)]]
pushFormattingElement (a, TagParams)
e [] = [[(a, TagParams)
e]]
        pushFormattingElement (a, TagParams)
e ([(a, TagParams)]
fs:[[(a, TagParams)]]
fss) = ((a, TagParams)
e (a, TagParams) -> [(a, TagParams)] -> [(a, TagParams)]
forall a. a -> [a] -> [a]
: [(a, TagParams)]
fs') [(a, TagParams)] -> [[(a, TagParams)]] -> [[(a, TagParams)]]
forall a. a -> [a] -> [a]
: [[(a, TagParams)]]
fss
          where fs' :: [(a, TagParams)]
fs' = case ((a, TagParams) -> Bool) -> [(a, TagParams)] -> [Int]
forall a. (a -> Bool) -> [a] -> [Int]
L.findIndices (TagParams -> Bool
equalElement (TagParams -> Bool)
-> ((a, TagParams) -> TagParams) -> (a, TagParams) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a, TagParams) -> TagParams
forall a b. (a, b) -> b
snd) [(a, TagParams)]
fs of
                    (Int
_:Int
_:Int
i:[Int]
is) -> case [Int] -> [(a, TagParams)] -> [[(a, TagParams)]]
forall a. [Int] -> [a] -> [[a]]
splitAts (Int
i Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: [Int]
is) [(a, TagParams)]
fs of
                        [] -> []
                        ([(a, TagParams)]
ds:[[(a, TagParams)]]
dss) -> [(a, TagParams)]
ds [(a, TagParams)] -> [(a, TagParams)] -> [(a, TagParams)]
forall a. [a] -> [a] -> [a]
++ ([(a, TagParams)] -> [(a, TagParams)])
-> [[(a, TagParams)]] -> [(a, TagParams)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Int -> [(a, TagParams)] -> [(a, TagParams)]
forall a. Int -> [a] -> [a]
drop Int
1) [[(a, TagParams)]]
dss
                    [Int]
_ -> [(a, TagParams)]
fs
                splitAts :: [Int] -> [a] -> [[a]]
splitAts [] [a]
dss = [[a]
dss]
                splitAts (Int
i:[Int]
is) [a]
dss =
                    let ([a]
ds', [a]
dss') = Int -> [a] -> ([a], [a])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
i [a]
dss
                    in  [a]
ds' [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: [Int] -> [a] -> [[a]]
splitAts ((Int -> Int) -> [Int] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Int -> Int
forall a. Num a => a -> a -> a
`subtract` Int
i) [Int]
is) [a]
dss'
                equalElement :: TagParams -> Bool
equalElement TagParams
e' =
                    TagParams -> Text
tagName ((a, TagParams) -> TagParams
forall a b. (a, b) -> b
snd (a, TagParams)
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== TagParams -> Text
tagName TagParams
e'
                    -- This is only ever invoked on HTML elements, so the
                    -- namespaces shouldn't need to be checked for equality.
                    Bool -> Bool -> Bool
&& TagParams -> HashMap Text Text
tagAttributes ((a, TagParams) -> TagParams
forall a b. (a, b) -> b
snd (a, TagParams)
e) HashMap Text Text -> HashMap Text Text -> Bool
forall a. Eq a => a -> a -> Bool
== TagParams -> HashMap Text Text
tagAttributes TagParams
e'

-- | __HTML:__
--      @[reconstruct the active formatting elements]
--      (https://html.spec.whatwg.org/multipage/parsing.html#reconstruct-the-active-formatting-elements)@
-- 
-- Create new tokens for each of the yet-unclosed elements created by
-- 'insertFormattingElement', within the scope defined by
-- 'insertFormattingMarker'.
reconstructFormattingElements :: TreeBuilder [Patch]
reconstructFormattingElements :: TreeBuilder [Patch]
reconstructFormattingElements = do
    TreeParserState
state <- StateT TreeParserState (Parser [TreeInput]) TreeParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
    let open :: [NodeIndex]
open = ((NodeIndex, ElementParams) -> NodeIndex)
-> [(NodeIndex, ElementParams)] -> [NodeIndex]
forall a b. (a -> b) -> [a] -> [b]
map (NodeIndex, ElementParams) -> NodeIndex
forall a b. (a, b) -> a
fst ([(NodeIndex, ElementParams)] -> [NodeIndex])
-> [(NodeIndex, ElementParams)] -> [NodeIndex]
forall a b. (a -> b) -> a -> b
$ TreeParserState -> [(NodeIndex, ElementParams)]
openElements TreeParserState
state
        ([(NodeIndex, TagParams)]
es, [[(NodeIndex, TagParams)]]
ess) = case TreeParserState -> [[(NodeIndex, TagParams)]]
activeFormattingElements TreeParserState
state of
            [] -> ([], [])
            ([(NodeIndex, TagParams)]
es':[[(NodeIndex, TagParams)]]
ess') -> ([(NodeIndex, TagParams)]
es', [[(NodeIndex, TagParams)]]
ess')
        ([(NodeIndex, TagParams)]
toRebuild, [(NodeIndex, TagParams)]
alreadyOpened) = ((NodeIndex, TagParams) -> Bool)
-> [(NodeIndex, TagParams)]
-> ([(NodeIndex, TagParams)], [(NodeIndex, TagParams)])
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (\(NodeIndex
i, TagParams
_) -> NodeIndex -> [NodeIndex] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
notElem NodeIndex
i [NodeIndex]
open) [(NodeIndex, TagParams)]
es
    TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
N.S.put (TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ())
-> TreeParserState
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ TreeParserState
state
        { activeFormattingElements :: [[(NodeIndex, TagParams)]]
activeFormattingElements = [(NodeIndex, TagParams)]
alreadyOpened [(NodeIndex, TagParams)]
-> [[(NodeIndex, TagParams)]] -> [[(NodeIndex, TagParams)]]
forall a. a -> [a] -> [a]
: [[(NodeIndex, TagParams)]]
ess
        }
    ([Patch] -> (NodeIndex, TagParams) -> TreeBuilder [Patch])
-> [Patch] -> [(NodeIndex, TagParams)] -> TreeBuilder [Patch]
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
N.foldM [Patch] -> (NodeIndex, TagParams) -> TreeBuilder [Patch]
forall a. [Patch] -> (a, TagParams) -> TreeBuilder [Patch]
reconstruct [] ([(NodeIndex, TagParams)] -> TreeBuilder [Patch])
-> [(NodeIndex, TagParams)] -> TreeBuilder [Patch]
forall a b. (a -> b) -> a -> b
$ [(NodeIndex, TagParams)] -> [(NodeIndex, TagParams)]
forall a. [a] -> [a]
reverse [(NodeIndex, TagParams)]
toRebuild
  where reconstruct :: [Patch] -> (a, TagParams) -> TreeBuilder [Patch]
reconstruct [Patch]
pss (a
_, TagParams
d) = do
            [Patch]
ps <- TreeOutput -> [Patch]
treePatches (TreeOutput -> [Patch])
-> TreeBuilder TreeOutput -> TreeBuilder [Patch]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TreeInput -> TreeBuilder TreeOutput
insertFormattingElement ([ParseError] -> Token -> TreeInput
dummyToken [] (Token -> TreeInput) -> Token -> TreeInput
forall a b. (a -> b) -> a -> b
$ TagParams -> Token
StartTag TagParams
d)
            [Patch] -> TreeBuilder [Patch]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Patch] -> TreeBuilder [Patch]) -> [Patch] -> TreeBuilder [Patch]
forall a b. (a -> b) -> a -> b
$ [Patch]
pss [Patch] -> [Patch] -> [Patch]
forall a. [a] -> [a] -> [a]
++ [Patch]
ps


-- | __HTML:__
--      @[change the encoding]
--      (https://html.spec.whatwg.org/multipage/parsing.html#stop-parsing)@,
--      step 6
-- 
-- Restore the tracked state to pristine condition, issue an instruction to
-- throw out any 'Tree' generated thus far, and set the given binary stream to
-- be used when the parser next resumes.
restartParsing :: BS.L.ByteString -> TreeBuilder TreeOutput
restartParsing :: ByteString -> TreeBuilder TreeOutput
restartParsing ByteString
initial = do
    TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
N.S.put (TreeParserState -> StateT TreeParserState (Parser [TreeInput]) ())
-> TreeParserState
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ TreeState -> TreeParserState
treeParserState TreeState
defState
    TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> TreeBuilder TreeOutput)
-> TreeOutput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ TreeOutput :: [Patch] -> TokenizerOutputState -> TreeOutput
TreeOutput
        { treePatches :: [Patch]
treePatches = [Patch
RestartParsing]
        , treeState :: TokenizerOutputState
treeState = (TokenizerState, ByteString) -> TokenizerOutputState
forall a. a -> Maybe a
Just (TokenizerState
tokState, ByteString -> ByteString
BS.L.toStrict ByteString
initial)
        }
  where updateDecoder :: TreeState -> TokenizerState
updateDecoder TreeState
state = TreeState -> TokenizerState
tokenizerState TreeState
state TokenizerState
-> (TokenizerState -> TokenizerState) -> TokenizerState
forall a b. a -> (a -> b) -> b
&
            case TokenizerState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
decoderState_ (TokenizerState
 -> Either
      (Either SnifferEnvironment Encoding) (Maybe DecoderState))
-> TokenizerState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
forall a b. (a -> b) -> a -> b
$ TreeState -> TokenizerState
tokenizerState TreeState
state of
                Left Either SnifferEnvironment Encoding
initialize -> Either SnifferEnvironment (Maybe Encoding)
-> TokenizerState -> TokenizerState
tokenizerEncoding (Either SnifferEnvironment (Maybe Encoding)
 -> TokenizerState -> TokenizerState)
-> Either SnifferEnvironment (Maybe Encoding)
-> TokenizerState
-> TokenizerState
forall a b. (a -> b) -> a -> b
$ (Encoding -> Maybe Encoding)
-> Either SnifferEnvironment Encoding
-> Either SnifferEnvironment (Maybe Encoding)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Encoding -> Maybe Encoding
forall a. a -> Maybe a
Just Either SnifferEnvironment Encoding
initialize
                Right Maybe DecoderState
decState -> Either SnifferEnvironment (Maybe Encoding)
-> TokenizerState -> TokenizerState
tokenizerEncoding (Either SnifferEnvironment (Maybe Encoding)
 -> TokenizerState -> TokenizerState)
-> (Maybe Encoding -> Either SnifferEnvironment (Maybe Encoding))
-> Maybe Encoding
-> TokenizerState
-> TokenizerState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe Encoding -> Either SnifferEnvironment (Maybe Encoding)
forall a b. b -> Either a b
Right (Maybe Encoding -> TokenizerState -> TokenizerState)
-> Maybe Encoding -> TokenizerState -> TokenizerState
forall a b. (a -> b) -> a -> b
$ (DecoderState -> Encoding) -> Maybe DecoderState -> Maybe Encoding
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap DecoderState -> Encoding
decoderEncoding Maybe DecoderState
decState
        defState :: TreeState
defState = TreeState
defaultTreeState
        tokState :: TokenizerState
tokState = TreeState -> TokenizerState
updateDecoder TreeState
defState

-- | __HTML:__
--      @[stop parsing]
--      (https://html.spec.whatwg.org/multipage/parsing.html#stop-parsing)@
-- 
-- Close all remaining open elements, and perform other cleanup functions to
-- finalize the document tree.
stopParsing :: TreeInput -> TreeBuilder TreeOutput
stopParsing :: TreeInput -> TreeBuilder TreeOutput
stopParsing TreeInput
t' = ([ElementParams] -> Word) -> TreeBuilder [Patch]
clear (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word)
-> ([ElementParams] -> Int) -> [ElementParams] -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ElementParams] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length) TreeBuilder [Patch]
-> ([Patch] -> TreeBuilder TreeOutput) -> TreeBuilder TreeOutput
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= TreeInput -> [Patch] -> TreeBuilder TreeOutput
packTree TreeInput
t'