{-# HLINT ignore "Redundant flip" #-}

{-|
Description:    Fold a linear, semantic stream into a tree structure.

Copyright:      (c) 2020-2021 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      provisional
Portability:    portable

This module and the internal branch it heads implement the "Tree Construction"
section of the
__[HTML](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction)__
document parsing specification, operating over the output of the
"Web.Mangrove.Parse.Tokenize" stage to produce a DOM tree representation of a
web page.  As this library is still in the early stages of development, the
representation produced here is not actually a proper DOM implementation, but
instead only stores basic parameters in an equivalent (but less-featured)
structure.  Nonetheless, it is still enough for basic evaluation and unstyled
rendering.
-}
module Web.Mangrove.Parse.Tree
    ( -- * Types
      -- ** Final
      Tree ( .. )
    , Node ( .. )
    , QuirksMode ( .. )
      -- ** Intermediate
    , Patch
    , TreeState
    , Encoding ( .. )
    , NodeIndex
    , ElementParams ( .. )
    , emptyElementParams
      -- * Initialization
    , defaultTreeState
    , treeEncoding
    , treeFragment
    , treeInIFrame
      -- * Transformations
    , tree
    , treeStep
    , finalizeTree
    ) where


import qualified Control.Monad.Trans.State as N.S

import qualified Data.ByteString as BS
import qualified Data.ByteString.Short as BS.SH
import qualified Data.List as L
import qualified Data.Maybe as Y
import qualified Data.Text as T

import Web.Willow.DOM

import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Tokenize
import Web.Mangrove.Parse.Tokenize.Common
import Web.Mangrove.Parse.Tree.Common
import Web.Mangrove.Parse.Tree.Dispatcher
import Web.Mangrove.Parse.Tree.Patch
import Web.Mangrove.Parse.Tree.Patch.Fold
import Web.Willow.Common.Encoding hiding ( setRemainder )
import Web.Willow.Common.Encoding.Sniffer
import Web.Willow.Common.Parser


-- | __HTML:__
--      @[tree construction]
--      (https://html.spec.whatwg.org/multipage/parsing.html#tree-construction)@
-- 
-- Given a starting environment, transform a binary document stream into a
-- hierarchical markup tree.  If the parse fails, returns an empty tree (a
-- 'Document' node with no children).
tree :: TreeState -> BS.ByteString -> ([Patch], TreeState)
tree :: TreeState -> ByteString -> ([Patch], TreeState)
tree TreeState
state ByteString
stream = (([Patch], TreeState)
 -> ([Patch], TreeState, ByteString) -> ([Patch], TreeState))
-> ([Patch], TreeState)
-> [([Patch], TreeState, ByteString)]
-> ([Patch], TreeState)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' ([Patch], TreeState)
-> ([Patch], TreeState, ByteString) -> ([Patch], TreeState)
forall a b b c. ([a], b) -> ([a], b, c) -> ([a], b)
treeFold ([], TreeState
state) ([([Patch], TreeState, ByteString)] -> ([Patch], TreeState))
-> [([Patch], TreeState, ByteString)] -> ([Patch], TreeState)
forall a b. (a -> b) -> a -> b
$ ((TreeState, ByteString)
 -> Maybe
      (([Patch], TreeState, ByteString), (TreeState, ByteString)))
-> (TreeState, ByteString) -> [([Patch], TreeState, ByteString)]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
L.unfoldr (TreeState, ByteString)
-> Maybe
     (([Patch], TreeState, ByteString), (TreeState, ByteString))
treeUnfold (TreeState
state, ByteString
stream)
  where treeUnfold :: (TreeState, ByteString)
-> Maybe
     (([Patch], TreeState, ByteString), (TreeState, ByteString))
treeUnfold = (TreeState -> ByteString -> ([Patch], TreeState, ByteString))
-> (TreeState -> TokenizerState)
-> (TreeState, ByteString)
-> Maybe
     (([Patch], TreeState, ByteString), (TreeState, ByteString))
forall state out.
Eq state =>
(state -> ByteString -> ([out], state, ByteString))
-> (state -> TokenizerState)
-> (state, ByteString)
-> Maybe (([out], state, ByteString), (state, ByteString))
unfoldLoop TreeState -> ByteString -> ([Patch], TreeState, ByteString)
treeStep TreeState -> TokenizerState
tokenizerState
        treeFold :: ([a], b) -> ([a], b, c) -> ([a], b)
treeFold ([a]
ps, b
_) ([a]
ps', b
state', c
_) = ([a]
ps [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [a]
ps', b
state')

-- | Wrap a parser in a signature appropriate to pass to 'L.unfoldr'.
unfoldLoop
    :: Eq state
    => (state -> BS.ByteString -> ([out], state, BS.ByteString))
        -- ^ The single-step parser function.
    -> (state -> TokenizerState)
        -- ^ Repack the state used by the step parser into a standardized form.
    -> (state, BS.ByteString)
        -- ^ The initial inputs to the parser.
    -> Maybe (([out], state, BS.ByteString), (state, BS.ByteString))
unfoldLoop :: (state -> ByteString -> ([out], state, ByteString))
-> (state -> TokenizerState)
-> (state, ByteString)
-> Maybe (([out], state, ByteString), (state, ByteString))
unfoldLoop state -> ByteString -> ([out], state, ByteString)
step state -> TokenizerState
toTokState (state
state, ByteString
stream)
    | ByteString -> Bool
BS.null ByteString
stream = Maybe (([out], state, ByteString), (state, ByteString))
forall a. Maybe a
Nothing
    | Bool
otherwise = case state -> ByteString -> ([out], state, ByteString)
step state
state ByteString
stream of
        out :: ([out], state, ByteString)
out@([out]
_, state
state', ByteString
_) | state
state state -> state -> Bool
forall a. Eq a => a -> a -> Bool
/= state
state' Bool -> Bool -> Bool
|| state -> Bool
hasRemainder state
state' -> ([out], state, ByteString)
-> Maybe (([out], state, ByteString), (state, ByteString))
forall a a b. (a, a, b) -> Maybe ((a, a, b), (a, b))
continueUnfold ([out], state, ByteString)
out
        ([], state
_, ByteString
stream') | ByteString -> Bool
BS.null ByteString
stream' -> Maybe (([out], state, ByteString), (state, ByteString))
forall a. Maybe a
Nothing
        ([out], state, ByteString)
out -> ([out], state, ByteString)
-> Maybe (([out], state, ByteString), (state, ByteString))
forall a a b. (a, a, b) -> Maybe ((a, a, b), (a, b))
continueUnfold ([out], state, ByteString)
out
  where continueUnfold :: (a, a, b) -> Maybe ((a, a, b), (a, b))
continueUnfold (a
ps, a
state', b
stream') = ((a, a, b), (a, b)) -> Maybe ((a, a, b), (a, b))
forall a. a -> Maybe a
Just ((a
ps, a
state', b
stream'), (a
state', b
stream'))
        hasRemainder :: state -> Bool
hasRemainder = Bool -> (DecoderState -> Bool) -> Maybe DecoderState -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False (Bool -> Bool
not (Bool -> Bool) -> (DecoderState -> Bool) -> DecoderState -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Bool
BS.SH.null (ShortByteString -> Bool)
-> (DecoderState -> ShortByteString) -> DecoderState -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecoderState -> ShortByteString
decoderRemainder) (Maybe DecoderState -> Bool)
-> (state -> Maybe DecoderState) -> state -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerState -> Maybe DecoderState
decoderState (TokenizerState -> Maybe DecoderState)
-> (state -> TokenizerState) -> state -> Maybe DecoderState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. state -> TokenizerState
toTokState

-- | Parse a minimal number of tokens from a binary document stream, into a
-- state-independent sequence of folding instructions.  Returns all data
-- required to seamlessly resume parsing.
treeStep :: TreeState -> BS.ByteString -> ([Patch], TreeState, BS.ByteString)
treeStep :: TreeState -> ByteString -> ([Patch], TreeState, ByteString)
treeStep TreeState
state ByteString
stream = [([([ParseError], Token)], TokenizerState, ByteString)]
-> (TreeState -> TreeState)
-> TreeState
-> ([Patch], TreeState, ByteString)
treeStep' [([([ParseError], Token)], TokenizerState, ByteString)]
stream' TreeState -> TreeState
stateRemainder TreeState
state
  where stateRemainder :: TreeState -> TreeState
stateRemainder TreeState
state' = TreeState
state'
            { tokenizerState :: TokenizerState
tokenizerState = ShortByteString -> TokenizerState -> TokenizerState
setRemainder (ByteString -> ShortByteString
BS.SH.toShort ByteString
stream) (TokenizerState -> TokenizerState)
-> TokenizerState -> TokenizerState
forall a b. (a -> b) -> a -> b
$ TreeState -> TokenizerState
tokenizerState TreeState
state'
            }
        stream' :: [([([ParseError], Token)], TokenizerState, ByteString)]
stream' = ((TokenizerState, ByteString)
 -> Maybe
      (([([ParseError], Token)], TokenizerState, ByteString),
       (TokenizerState, ByteString)))
-> (TokenizerState, ByteString)
-> [([([ParseError], Token)], TokenizerState, ByteString)]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
L.unfoldr (TokenizerState, ByteString)
-> Maybe
     (([([ParseError], Token)], TokenizerState, ByteString),
      (TokenizerState, ByteString))
tokenUnfold (TreeState -> TokenizerState
tokenizerState TreeState
state, ByteString
stream)
        tokenUnfold :: (TokenizerState, ByteString)
-> Maybe
     (([([ParseError], Token)], TokenizerState, ByteString),
      (TokenizerState, ByteString))
tokenUnfold = (TokenizerState
 -> ByteString
 -> ([([ParseError], Token)], TokenizerState, ByteString))
-> (TokenizerState -> TokenizerState)
-> (TokenizerState, ByteString)
-> Maybe
     (([([ParseError], Token)], TokenizerState, ByteString),
      (TokenizerState, ByteString))
forall state out.
Eq state =>
(state -> ByteString -> ([out], state, ByteString))
-> (state -> TokenizerState)
-> (state, ByteString)
-> Maybe (([out], state, ByteString), (state, ByteString))
unfoldLoop TokenizerState
-> ByteString
-> ([([ParseError], Token)], TokenizerState, ByteString)
tokenizeStep TokenizerState -> TokenizerState
forall a. a -> a
id

-- | Parse a minimal number of tokens from a binary document stream, performing
-- any backend processing required to correctly generate the document tree.
treeStep'
    :: [([([ParseError], Token)], TokenizerState, BS.ByteString)]
    -> (TreeState -> TreeState)
    -> TreeState
    -> ([Patch], TreeState, BS.ByteString)
treeStep' :: [([([ParseError], Token)], TokenizerState, ByteString)]
-> (TreeState -> TreeState)
-> TreeState
-> ([Patch], TreeState, ByteString)
treeStep' [([([ParseError], Token)], TokenizerState, ByteString)]
input TreeState -> TreeState
fallback TreeState
state =
    case ParserT
  [TreeInput]
  Maybe
  (([Patch], TokenizerState, ByteString), TreeParserState)
-> [TreeInput]
-> Maybe
     ((([Patch], TokenizerState, ByteString), TreeParserState),
      [TreeInput])
forall stream (gather :: * -> *) out.
ParserT stream gather out -> stream -> gather (out, stream)
runParserT (StateT
  TreeParserState
  (Parser [TreeInput])
  ([Patch], TokenizerState, ByteString)
-> TreeParserState
-> ParserT
     [TreeInput]
     Maybe
     (([Patch], TokenizerState, ByteString), TreeParserState)
forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
N.S.runStateT StateT
  TreeParserState
  (Parser [TreeInput])
  ([Patch], TokenizerState, ByteString)
recurse (TreeParserState
 -> ParserT
      [TreeInput]
      Maybe
      (([Patch], TokenizerState, ByteString), TreeParserState))
-> TreeParserState
-> ParserT
     [TreeInput]
     Maybe
     (([Patch], TokenizerState, ByteString), TreeParserState)
forall a b. (a -> b) -> a -> b
$ TreeState -> TreeParserState
treeParserState TreeState
state) [TreeInput]
stream of
        Just ((([Patch]
ps, TokenizerState
tokState, ByteString
stream'), TreeParserState
parserState), [TreeInput]
_) -> ([Patch]
ps', TreeState
state', ByteString
stream')
          where state' :: TreeState
state' = TreeState :: TreeParserState -> TokenizerState -> TreeState
TreeState
                    { tokenizerState :: TokenizerState
tokenizerState = TokenizerState
tokState
                    , treeParserState :: TreeParserState
treeParserState = TreeParserState
parserState
                    }
                ps' :: [Patch]
ps' = (Patch -> Patch) -> [Patch] -> [Patch]
forall a b. (a -> b) -> [a] -> [b]
map Patch -> Patch
redirectPatches [Patch]
ps
        Maybe
  ((([Patch], TokenizerState, ByteString), TreeParserState),
   [TreeInput])
Nothing -> ([], TreeState -> TreeState
fallback TreeState
state, ByteString
BS.empty)
  where stream :: [TreeInput]
stream = (([([ParseError], Token)], TokenizerState, ByteString)
 -> [TreeInput] -> [TreeInput])
-> [TreeInput]
-> [([([ParseError], Token)], TokenizerState, ByteString)]
-> [TreeInput]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
L.foldr ([([ParseError], Token)], TokenizerState, ByteString)
-> [TreeInput] -> [TreeInput]
repackStream [] [([([ParseError], Token)], TokenizerState, ByteString)]
input
        redirectPatches :: Patch -> Patch
redirectPatches Patch
p = case TreeParserState
-> Maybe (ElementParams, [(NodeIndex, ElementParams)])
fragmentContext (TreeParserState
 -> Maybe (ElementParams, [(NodeIndex, ElementParams)]))
-> TreeParserState
-> Maybe (ElementParams, [(NodeIndex, ElementParams)])
forall a b. (a -> b) -> a -> b
$ TreeState -> TreeParserState
treeParserState TreeState
state of
            -- A fragment with an @\<html\>@ context won't create that node in
            -- the folded tree, and so any patches sent there won't find an
            -- anchor.
            Just (ElementParams, [(NodeIndex, ElementParams)])
ctx | Text -> ElementParams -> Bool
nodeIsElement (String -> Text
T.pack String
"html") (ElementParams -> Bool) -> ElementParams -> Bool
forall a b. (a -> b) -> a -> b
$ (ElementParams, [(NodeIndex, ElementParams)]) -> ElementParams
forall a b. (a, b) -> a
fst (ElementParams, [(NodeIndex, ElementParams)])
ctx -> case Patch
p of
                InsertComment [ParseError]
errs InsertAt
InDocument Text
_ -> [ParseError] -> Patch
ErrorList [ParseError]
errs
                InsertComment [ParseError]
errs InsertAt
InHtmlElement Text
txt -> [ParseError] -> InsertAt -> Text -> Patch
InsertComment [ParseError]
errs InsertAt
InDocument Text
txt
                AddAttribute InsertAt
InDocument AttributeParams
_ -> [ParseError] -> Patch
ErrorList []
                AddAttribute InsertAt
InHtmlElement AttributeParams
attr -> InsertAt -> AttributeParams -> Patch
AddAttribute InsertAt
InDocument AttributeParams
attr
                Patch
_ -> Patch
p
            -- The fragment parsing algorithm returns the children of the
            -- context node, and so any patches destined for locations above
            -- that shouldn't show up in the resulting tree.
            Just (ElementParams, [(NodeIndex, ElementParams)])
_ -> case Patch
p of
                InsertComment [ParseError]
errs InsertAt
InDocument Text
_ -> [ParseError] -> Patch
ErrorList [ParseError]
errs
                InsertComment [ParseError]
errs InsertAt
InHtmlElement Text
_ -> [ParseError] -> Patch
ErrorList [ParseError]
errs
                AddAttribute InsertAt
InDocument AttributeParams
_ -> [ParseError] -> Patch
ErrorList []
                AddAttribute InsertAt
InHtmlElement AttributeParams
_ -> [ParseError] -> Patch
ErrorList []
                Patch
_ -> Patch
p
            Maybe (ElementParams, [(NodeIndex, ElementParams)])
_ -> Patch
p

-- | Explicitly indicate that the input stream will not contain any further
-- bytes, and perform any finalization processing based on that.
finalizeTree :: [Patch] -> TreeState -> Tree
finalizeTree :: [Patch] -> TreeState -> Tree
finalizeTree [Patch]
ps TreeState
state = [Patch] -> Tree
buildTree ([Patch] -> Tree) -> [Patch] -> Tree
forall a b. (a -> b) -> a -> b
$ [Patch]
ps [Patch] -> [Patch] -> [Patch]
forall a. [a] -> [a] -> [a]
++ [Patch]
ps'
  where ([Patch]
ps', TreeState
_, ByteString
_) = [([([ParseError], Token)], TokenizerState, ByteString)]
-> (TreeState -> TreeState)
-> TreeState
-> ([Patch], TreeState, ByteString)
treeStep' [([([ParseError], Token)]
ts, ShortByteString -> TokenizerState -> TokenizerState
setRemainder ShortByteString
BS.SH.empty TokenizerState
tokState, ByteString
BS.empty)] TreeState -> TreeState
forall a. a -> a
id TreeState
state
        tokState :: TokenizerState
tokState = TreeState -> TokenizerState
tokenizerState TreeState
state
        ts :: [([ParseError], Token)]
ts = TokenizerState -> [([ParseError], Token)]
finalizeTokenizer TokenizerState
tokState [([ParseError], Token)]
-> [([ParseError], Token)] -> [([ParseError], Token)]
forall a. [a] -> [a] -> [a]
++ [([], Token
EndOfStream)]


-- | Specify the encoding scheme a given parse environment should use to read
-- from the binary document stream.  Note that this will always use the initial
-- state for the respective decoder; intermediate states as returned by
-- 'decodeStep' are not supported.
treeEncoding :: Either SnifferEnvironment (Maybe Encoding) -> TreeState -> TreeState
treeEncoding :: Either SnifferEnvironment (Maybe Encoding)
-> TreeState -> TreeState
treeEncoding Either SnifferEnvironment (Maybe Encoding)
enc TreeState
state = TreeState
state
    { tokenizerState :: TokenizerState
tokenizerState = Either SnifferEnvironment (Maybe Encoding)
-> TokenizerState -> TokenizerState
tokenizerEncoding Either SnifferEnvironment (Maybe Encoding)
enc (TokenizerState -> TokenizerState)
-> TokenizerState -> TokenizerState
forall a b. (a -> b) -> a -> b
$ TreeState -> TokenizerState
tokenizerState TreeState
state
    }

-- | __HTML:__
--      @[fragment parsing algorithm]
--      (https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm)@
-- 
-- Transform a given parse environment by adding context for an embedded but
-- separate document fragment.  Calling this with an intermediate state
-- returned by 'treeStep' (as opposed to an initial state from
-- 'defaultTreeState') may result in an unexpected tree structure.
treeFragment
    :: ElementParams
        -- ^ __HTML:__
        --      @[context element]
        --      (https://html.spec.whatwg.org/multipage/parsing.html#concept-frag-parse-context)@
        -- 
        -- The node wrapping -- in one way or another -- the embedded document fragment.
    -> [(NodeIndex, ElementParams)]
        -- ^ The ancestors of the context element, most immediate first.
    -> Maybe QuirksMode
        -- ^ The degree of backwards compatibility used in the node document of
        -- the context element, if it can be determined.
    -> Maybe Bool
        -- ^ Whether the node document of the context element has been parsed
        -- in a way which would require scripting to be enabled (@'Just'
        -- 'True'@) or disabled (@'Just' 'False'@).
    -> TreeState
    -> TreeState
treeFragment :: ElementParams
-> [(NodeIndex, ElementParams)]
-> Maybe QuirksMode
-> Maybe Bool
-> TreeState
-> TreeState
treeFragment ElementParams
ctxNode [(NodeIndex, ElementParams)]
ctxTree Maybe QuirksMode
ctxQuirks Maybe Bool
ctxScript TreeState
state = TreeState
state
    { tokenizerState :: TokenizerState
tokenizerState =
        let mode :: CurrentTokenizerState
mode = if ElementParams -> Maybe Text
elementNamespace ElementParams
ctxNode Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Maybe Text
forall a. a -> Maybe a
Just Text
htmlNamespace
                then case Text -> String
T.unpack (Text -> String) -> Text -> String
forall a b. (a -> b) -> a -> b
$ ElementParams -> Text
elementName ElementParams
ctxNode of
                    String
"title" -> CurrentTokenizerState
RCDataState
                    String
"textarea" -> CurrentTokenizerState
RCDataState
                    String
"style" -> CurrentTokenizerState
RawTextState
                    String
"xmp" -> CurrentTokenizerState
RawTextState
                    String
"iframe" -> CurrentTokenizerState
RawTextState
                    String
"noembed" -> CurrentTokenizerState
RawTextState
                    String
"noframes" -> CurrentTokenizerState
RawTextState
                    String
"script" -> CurrentTokenizerState
ScriptDataState
                    String
"noscript" -> if Maybe Bool
ctxScript Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True
                        then CurrentTokenizerState
RawTextState
                        else CurrentTokenizerState
DataState
                    String
"plaintext" -> CurrentTokenizerState
PlainTextState
                    String
_ -> CurrentTokenizerState
DataState
                else CurrentTokenizerState
DataState
        in  Maybe Text -> Text -> TokenizerState -> TokenizerState
tokenizerStartTag (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
htmlNamespace) (String -> Text
T.pack String
"html") (TokenizerState -> TokenizerState)
-> (TokenizerState -> TokenizerState)
-> TokenizerState
-> TokenizerState
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
            CurrentTokenizerState -> TokenizerState -> TokenizerState
tokenizerMode CurrentTokenizerState
mode (TokenizerState -> TokenizerState)
-> TokenizerState -> TokenizerState
forall a b. (a -> b) -> a -> b
$ TreeState -> TokenizerState
tokenizerState TreeState
state
    , treeParserState :: TreeParserState
treeParserState = TreeParserState -> TreeParserState
resetInsertionMode' (TreeParserState -> TreeParserState)
-> TreeParserState -> TreeParserState
forall a b. (a -> b) -> a -> b
$ (TreeState -> TreeParserState
treeParserState TreeState
state)
        { openElements :: [(NodeIndex, ElementParams)]
openElements = [(NodeIndex
0, ElementParams
htmlElement)]
        , elementIndex :: NodeIndex
elementIndex = NodeIndex
ctxIndex NodeIndex -> NodeIndex -> NodeIndex
forall a. Num a => a -> a -> a
+ NodeIndex
1
        , quirksMode :: QuirksMode
quirksMode = QuirksMode -> Maybe QuirksMode -> QuirksMode
forall a. a -> Maybe a -> a
Y.fromMaybe QuirksMode
NoQuirks Maybe QuirksMode
ctxQuirks
        , templateInsertionModes :: [InsertionMode]
templateInsertionModes = if Text -> ElementParams -> Bool
nodeIsElement (String -> Text
T.pack String
"template") ElementParams
ctxNode
            then [InsertionMode
InTemplate]
            else []
        , fragmentContext :: Maybe (ElementParams, [(NodeIndex, ElementParams)])
fragmentContext = (ElementParams, [(NodeIndex, ElementParams)])
-> Maybe (ElementParams, [(NodeIndex, ElementParams)])
forall a. a -> Maybe a
Just (ElementParams
ctxNode, [(NodeIndex, ElementParams)]
ctxTree)
        , scriptingEnabled :: Bool
scriptingEnabled = Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
Y.fromMaybe Bool
False Maybe Bool
ctxScript
        , formElementPointer :: Maybe NodeIndex
formElementPointer = ((NodeIndex, ElementParams) -> NodeIndex)
-> Maybe (NodeIndex, ElementParams) -> Maybe NodeIndex
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (NodeIndex, ElementParams) -> NodeIndex
forall a b. (a, b) -> a
fst (Maybe (NodeIndex, ElementParams) -> Maybe NodeIndex)
-> ([(NodeIndex, ElementParams)]
    -> Maybe (NodeIndex, ElementParams))
-> [(NodeIndex, ElementParams)]
-> Maybe NodeIndex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((NodeIndex, ElementParams) -> Bool)
-> [(NodeIndex, ElementParams)] -> Maybe (NodeIndex, ElementParams)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
L.find (Text -> ElementParams -> Bool
nodeIsElement (String -> Text
T.pack String
"form") (ElementParams -> Bool)
-> ((NodeIndex, ElementParams) -> ElementParams)
-> (NodeIndex, ElementParams)
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (NodeIndex, ElementParams) -> ElementParams
forall a b. (a, b) -> b
snd) ([(NodeIndex, ElementParams)] -> Maybe NodeIndex)
-> [(NodeIndex, ElementParams)] -> Maybe NodeIndex
forall a b. (a -> b) -> a -> b
$
            (NodeIndex
ctxIndex, ElementParams
ctxNode) (NodeIndex, ElementParams)
-> [(NodeIndex, ElementParams)] -> [(NodeIndex, ElementParams)]
forall a. a -> [a] -> [a]
: [(NodeIndex, ElementParams)]
ctxTree
        }
    }
  where ctxIndex :: NodeIndex
ctxIndex = ((NodeIndex, ElementParams) -> NodeIndex -> NodeIndex)
-> NodeIndex -> [(NodeIndex, ElementParams)] -> NodeIndex
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (NodeIndex -> NodeIndex -> NodeIndex
forall a. Ord a => a -> a -> a
max (NodeIndex -> NodeIndex -> NodeIndex)
-> ((NodeIndex, ElementParams) -> NodeIndex)
-> (NodeIndex, ElementParams)
-> NodeIndex
-> NodeIndex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (NodeIndex, ElementParams) -> NodeIndex
forall a b. (a, b) -> a
fst) NodeIndex
0 [(NodeIndex, ElementParams)]
ctxTree NodeIndex -> NodeIndex -> NodeIndex
forall a. Num a => a -> a -> a
+ NodeIndex
1
        htmlElement :: ElementParams
htmlElement = ElementParams
emptyElementParams
            { elementName :: Text
elementName = String -> Text
T.pack String
"html"
            , elementNamespace :: Maybe Text
elementNamespace = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
htmlNamespace
            }

-- | Specify whether the given parse environment should be treated as if the
-- document were contained within the @srcdoc@ attribute of an @\<iframe\>@
-- element ('False' by default).
treeInIFrame :: Bool -> TreeState -> TreeState
treeInIFrame :: Bool -> TreeState -> TreeState
treeInIFrame Bool
b TreeState
state = TreeState
state
    { treeParserState :: TreeParserState
treeParserState = (TreeState -> TreeParserState
treeParserState TreeState
state)
        { isInIFrameSrcDoc :: Bool
isInIFrameSrcDoc = Bool
b
        }
    }


-- | Given the output of 'tokenizeStep', rewrap the token list and single state
-- into a single uniform stream.  The final returned tuple will have a 'Just'
-- value containing the single state, while every other element has 'Nothing'
-- (indicating a point where reentry is impossible).
repackStream
    :: ([([ParseError], Token)], TokenizerState, BS.ByteString)
    -> [TreeInput]
    -> [TreeInput]
repackStream :: ([([ParseError], Token)], TokenizerState, ByteString)
-> [TreeInput] -> [TreeInput]
repackStream ([], TokenizerState
_, ByteString
_) [TreeInput]
is = [TreeInput]
is
repackStream ([([ParseError], Token)]
ts, TokenizerState
state, ByteString
stream) [TreeInput]
is = case [([ParseError], Token)] -> [([ParseError], Token)]
forall a. [a] -> [a]
reverse [([ParseError], Token)]
ts of
    -- Be sure any 'EndOfStream' tokens encountered mid-stream don't break parsing.
    (([ParseError]
errs, Token
EndOfStream):[([ParseError], Token)]
ts') -> case [TreeInput]
is of
        [] -> [([ParseError], Token)] -> [TreeInput]
repackStream' [([ParseError], Token)]
ts
        (TreeInput
i:[TreeInput]
is') -> [([ParseError], Token)] -> [TreeInput]
repackStream' ([([ParseError], Token)] -> [([ParseError], Token)]
forall a. [a] -> [a]
reverse [([ParseError], Token)]
ts') [TreeInput] -> [TreeInput] -> [TreeInput]
forall a. [a] -> [a] -> [a]
++ [ParseError] -> TreeInput -> TreeInput
consErrors [ParseError]
errs TreeInput
i TreeInput -> [TreeInput] -> [TreeInput]
forall a. a -> [a] -> [a]
: [TreeInput]
is'
    -- Any other token ending the segment, however, can simply be repacked.
    [([ParseError], Token)]
_ -> [([ParseError], Token)] -> [TreeInput]
repackStream' [([ParseError], Token)]
ts [TreeInput] -> [TreeInput] -> [TreeInput]
forall a. [a] -> [a] -> [a]
++ [TreeInput]
is
  where repackStream' :: [([ParseError], Token)] -> [TreeInput]
repackStream' = ((([ParseError], Token) -> [TreeInput] -> [TreeInput])
 -> [TreeInput] -> [([ParseError], Token)] -> [TreeInput])
-> [TreeInput]
-> (([ParseError], Token) -> [TreeInput] -> [TreeInput])
-> [([ParseError], Token)]
-> [TreeInput]
forall a b c. (a -> b -> c) -> b -> a -> c
flip (([ParseError], Token) -> [TreeInput] -> [TreeInput])
-> [TreeInput] -> [([ParseError], Token)] -> [TreeInput]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr [] ((([ParseError], Token) -> [TreeInput] -> [TreeInput])
 -> [([ParseError], Token)] -> [TreeInput])
-> (([ParseError], Token) -> [TreeInput] -> [TreeInput])
-> [([ParseError], Token)]
-> [TreeInput]
forall a b. (a -> b) -> a -> b
$ \([ParseError]
errs, Token
t) [TreeInput]
ts' -> TreeInput :: [ParseError] -> Token -> TokenizerOutputState -> TreeInput
TreeInput
            { tokenErrs :: [ParseError]
tokenErrs = [ParseError]
errs
            , tokenOut :: Token
tokenOut = Token
t
            , tokenState :: TokenizerOutputState
tokenState = if [TreeInput] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [TreeInput]
ts'
                then (TokenizerState, ByteString) -> TokenizerOutputState
forall a. a -> Maybe a
Just (TokenizerState
state, ByteString
stream)
                else TokenizerOutputState
forall a. Maybe a
Nothing
            } TreeInput -> [TreeInput] -> [TreeInput]
forall a. a -> [a] -> [a]
: [TreeInput]
ts'
        consErrors :: [ParseError] -> TreeInput -> TreeInput
consErrors [ParseError]
errs TreeInput
i = TreeInput
i
            { tokenErrs :: [ParseError]
tokenErrs = [ParseError]
errs [ParseError] -> [ParseError] -> [ParseError]
forall a. [a] -> [a] -> [a]
++ TreeInput -> [ParseError]
tokenErrs TreeInput
i
            }


-- | Loop the tree dispatcher until it returns a patchset which happens to
-- coincide with a tokenizer breakpoint.  Relies on lazy evaluation in the
-- stream generation to avoid forcing the entire thing at once, while still
-- retaining the capability to consume as much input as necessary to get the
-- parsers to line up.
recurse :: TreeBuilder ([Patch], TokenizerState, BS.ByteString)
recurse :: StateT
  TreeParserState
  (Parser [TreeInput])
  ([Patch], TokenizerState, ByteString)
recurse = do
    TreeOutput
out <- TreeBuilder TreeOutput
dispatcher
    case TreeOutput -> TokenizerOutputState
treeState TreeOutput
out of
        -- The tokenizer can't provide a breakpoint at the current token.
        TokenizerOutputState
Nothing -> do
            ([Patch]
out', TokenizerState
tokState', ByteString
stream') <- StateT
  TreeParserState
  (Parser [TreeInput])
  ([Patch], TokenizerState, ByteString)
recurse
            ([Patch], TokenizerState, ByteString)
-> StateT
     TreeParserState
     (Parser [TreeInput])
     ([Patch], TokenizerState, ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> [Patch]
treePatches TreeOutput
out [Patch] -> [Patch] -> [Patch]
forall a. [a] -> [a] -> [a]
++ [Patch]
out', TokenizerState
tokState', ByteString
stream')
        -- We have a re-entrant state to seamlessly resume the tokenizer.
        Just (TokenizerState
tokState, ByteString
stream) ->
            ([Patch], TokenizerState, ByteString)
-> StateT
     TreeParserState
     (Parser [TreeInput])
     ([Patch], TokenizerState, ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> [Patch]
treePatches TreeOutput
out, TokenizerState
tokState, ByteString
stream)