{-| Description: Functions to collapse a state-free instruction list into a document tree. Copyright: (c) 2020 Sam May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: provisional Portability: portable This module provides the logic powering the second half of this implementation's split __[HTML](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction)__ tree construction algorithm. Namely, the functions in this module operate over the single-dimensional stream of static tree-building instructions generated by the rest of the "Web.Mangrove.Parse.Tree" hierarchy, folding them into a simplified DOM tree. For a more detailed discussion of the design behind this, see the documentation of "Web.Mangrove.Parse.Tree.Patch". -} module Web.Mangrove.Parse.Tree.Patch.Fold ( buildTree ) where import qualified Data.Bifunctor as F.B import qualified Data.Either as E import qualified Data.HashMap.Strict as M import qualified Data.IntMap.Strict as M.I import qualified Data.List as L import qualified Data.Maybe as Y import qualified Data.Text as T import Web.Willow.DOM import Web.Mangrove.Parse.Common.Error import Web.Mangrove.Parse.Tree.Common hiding ( Token ( .. ) ) import Web.Mangrove.Parse.Tree.Patch -- | Fold a series of instructions describing how to build a document tree -- (without reference to any persistent state) into the tree they describe. buildTree :: [Patch] -> Tree buildTree = buildTree' . foldPatchset where buildTree' (ts, []) = emptyTree { node = Document mode' , children = ts' } where (ms, ts') = L.partition (\t -> nodeType (node t) == Just DocumentNode) ts mode' = foldr max NoQuirks $ Y.mapMaybe (getQuirksMode . node) ms getQuirksMode (Document mode) = Just mode getQuirksMode _ = Nothing buildTree' (_, RestartParsing : ps) = buildTree' $ foldPatchset ps buildTree' (ts, ps) = buildTree' . F.B.first (ts ++) $ foldPatchset ps -- | Consume the next minimal sequence of folding instructions which would -- result in at least one complete tree. The input patchset is read according -- to a sensible behaviour for trees providing a root for the document -- (typically the /only/ root). foldPatchset :: [Patch] -> ([Tree], [Patch]) foldPatchset ps = F.B.first (joinTexts . Y.mapMaybe filterNull) $ foldPatchset' False ps where -- Preprocess the placeholder tuples, where the 'InsertAt' carries the -- semantic data rather than the 'Tree'. filterNull (Right (_, t)) = Just $ Right t filterNull (Left c) = Just $ Left c -- | Consume the next sequence of folding instructions which would result in a -- complete tree. Any intervening instructions which are destined for other -- parts of the final tree are extracted for re-direction upward. Any textual -- data to be inserted directly into the tree is returned as a 'Left' value for -- later packing into a single 'Text' node, to avoid the exponential complexity -- of, e.g., @'Data.Text'.'T.cons'@. foldPatchset' :: Bool -- ^ Whether this function call originates from somewhere higher up the -- document tree; any explicit calls will almost always use 'False'. -> [Patch] -> ([Either ([ParseError], Char) (InsertAt, Tree)], [Patch]) foldPatchset' _ [] = ([], []) foldPatchset' isInner (ErrorList _:ps) = foldPatchset' isInner ps -- Break infinite loop on unmatched node-closing patches. foldPatchset' False (CloseNodes _:ps) = foldPatchset' False ps foldPatchset' False (SoftCloseCurrentNode:ps) = foldPatchset' False ps foldPatchset' False (DropCurrentNode:ps) = foldPatchset' False ps foldPatchset' False (RestartParsing:ps) = foldPatchset' False ps -- Stop processing on node-closing patches to enable recursive consumption of -- all patches until the end of the node. foldPatchset' True ps@(DropCurrentNode:_) = ([], ps) foldPatchset' True ps@(RestartParsing:_) = ([], ps) -- Push a "close ancestors" along until it reaches one of the node-closing -- patches; at that point, drop the payload destined for the current location. foldPatchset' True (p@(CloseNodes ls):ps) = case decrementReparenting ls of _ | M.I.null ls -> foldPatchset' True ps (0, toFloat) -> case ps of [] -> ([], [p]) (CloseNodes ls':ps') -> foldPatchset' True $ CloseNodes (M.I.unionWith (+) ls ls') : ps' (DropCurrentNode:ps') -> ([], DropCurrentNode : CloseNodes toFloat : ps') (RestartParsing:_) -> ([], ps) (p'@InsertElement{}:ps') -> foldPatchset' True $ p' : CloseNodes (incrementReparenting ls) : ps' (p':ps') -> foldPatchset' True $ p' : p : ps' _ -> ([], p : ps) -- Simple patch -> node translation. foldPatchset' isInner (SetDocumentQuirks mode:ps) = F.B.first (Right (InDocument, packQuirks mode) :) $ foldPatchset' isInner ps foldPatchset' isInner (InsertAndSetDocumentType _ d:ps) = F.B.first (Right (InDocument, packDoctype d) :) $ foldPatchset' isInner ps foldPatchset' isInner (InsertComment _ loc txt:ps) = F.B.first (Right (loc, packComment txt) :) $ foldPatchset' isInner ps foldPatchset' isInner (AddAttribute loc attr:ps) = F.B.first (Right (loc, packAttribute attr) :) $ foldPatchset' isInner ps -- Prepends to the following text rather than the spec's appending to the -- previous. That should still have an identical result. foldPatchset' isInner (InsertCharacter errs h:ps) = F.B.first (Left (errs, h) :) $ foldPatchset' isInner ps -- Float non-element nodes up one level. foldPatchset' True (SoftCloseCurrentNode:ps) = F.B.first (map floatTrees) $ foldPatchset' True ps where floatTrees t'@(Right (RelativeLocation l, t)) | nodeType (node t) == Just ElementNode = t' | otherwise = Right (RelativeLocation $ succ l, t) -- Text nodes here don't take advantage of ahead-of-time gathering -- (thus need more complex concatenation), but shouldn't be a large -- portion of the document. floatTrees (Left (_, c)) = Right (RelativeLocation 1, emptyTree { node = Text $ T.singleton c }) floatTrees t' = t' -- Consume patches until and including the (matching) node-closing patch. foldPatchset' isInner (InsertElement _ tag:ps) = let (ts, ps') = foldPatchset' True ps (ts', bubble) = filterFloaters ts $ floatsElement isInner tag (attrs, ts'') = L.partition isAttribute ts' tag' = tag { elementAttributes = M.union (elementAttributes tag) . fromAttrList $ Y.mapMaybe toAttribute attrs } this = Right (RelativeLocation 0, packElement tag' $ joinTexts ts'') in case ps' of [] -> (this : bubble, []) (DropCurrentNode:ps'') -> F.B.first (++ bubble) $ foldPatchset' True ps'' (CloseNodes ls:ps'') -> let (here, toFloat) = decrementReparenting ls ls' = case here of l | l >= 2 -> M.I.insertWith (+) 0 (pred l) toFloat _ -> toFloat (out, trail) = foldPatchset' True $ CloseNodes ls' : ps'' in (this : bubble ++ out, trail) (RestartParsing:_) -> ([], ps') _ -> let (out, trail) = foldPatchset' True ps' cleaned = map (fmap $ \c -> (RelativeLocation 0, c)) ts'' in (cleaned ++ out ++ bubble, trail) where isAttribute (Right tok) = nodeType (node tok) == Just AttributeNode isAttribute (Left _) = False toAttribute (Right tok) = case node tok of Attribute attr -> Just attr _ -> Nothing toAttribute (Left _) = Nothing -- | Given a heterogeneous set of patches potentially destined for multiple -- places within the document hierarchy, and a known set of addresses for the -- current location, partition out the relevant patches from those continuing -- onward. filterFloaters :: [Either ([ParseError], Char) (InsertAt, Tree)] -> [InsertAt] -> ([Either ([ParseError], Char) Tree], [Either ([ParseError], Char) (InsertAt, Tree)]) filterFloaters ts here = F.B.bimap (map $ F.B.second snd) (map . fmap $ F.B.first decrementTarget) $ L.partition isHere ts where decrementTarget loc = case loc of RelativeLocation 0 -> loc RelativeLocation i -> RelativeLocation $ pred i _ -> loc isHere (Right (l, _)) = elem l here isHere (Left _) = True -- | Calculate the accepted addresses for the element at the current location; -- usually just @'RelativeLocation' 0@, but a root @\@ node also accepts -- 'InHtmlElement'. floatsElement :: Bool -- ^ Whether this node is a descendant node of the root. -> ElementParams -> [InsertAt] floatsElement isInner d | elementName d == T.pack "html" && not isInner = [RelativeLocation 0, InHtmlElement] | otherwise = [RelativeLocation 0] -- | Given the list of reparenting directives used by a 'CloseNodes' -- instruction, increase the distance to the node-to-reparent to account for -- the patch being pushed into a child node. incrementReparenting :: M.I.IntMap ReparentDepth -> M.I.IntMap ReparentDepth incrementReparenting = M.I.mapKeys succ -- | Given the list of reparenting directives used by a 'CloseNodes' -- instruction, partition out the accumulated directives intended for the -- current node, and decrease the distance to the node-to-reparent to account -- for the child node being closed. decrementReparenting :: M.I.IntMap ReparentDepth -> (ReparentDepth, M.I.IntMap ReparentDepth) decrementReparenting ls = (Y.fromMaybe 0 $ M.I.lookup 0 ls, M.I.mapKeys pred $ M.I.delete 0 ls) -- | Collapse all sequential 'Char' sequences in the list and pack them into a -- single 'Text' node, preserving the interspersed complete subtrees. joinTexts :: [Either ([ParseError], Char) Tree] -> [Tree] joinTexts [] = [] joinTexts (Right t : xs) = t : joinTexts xs joinTexts (Left c : xs) = txt' : joinTexts xs' where (cs', xs') = span E.isLeft xs cs = map (either id $ error "unexpected 'Right' after @span isLeft@") cs' (_, txt) = unzip $ c : cs txt' = emptyTree { node = Text $ T.pack txt } -- | Wrap the desired level of backwards compatibility into a 'Document' node -- as a placeholder. packQuirks :: QuirksMode -> Tree packQuirks mode = emptyTree { node = Document mode } -- | Wrap a string of characters in the data types expected for a 'Comment' -- node in the output of 'buildTree'. packComment :: T.Text -> Tree packComment txt = emptyTree { node = Comment txt } -- | Wrap the metadata contained in a document type declaration in the data -- types expected for a 'DocumentType' node in the output of 'buildTree'. packDoctype :: DocumentTypeParams -> Tree packDoctype dtd = emptyTree { node = DocumentType dtd } -- | Wrap the metadata contained in a markup tag in the data types expected for -- an 'Element' node in the output of 'buildTree'. If it represents an HTML -- @\@ element, additionally wrap the children in a -- 'DocumentFragment' node to indicate the @[template -- contents](https://html.spec.whatwg.org/scripting.html#template-contents)@ -- parameter. packElement :: ElementParams -> [Tree] -> Tree packElement tag childTrees | nodeIsElement (T.pack "template") tag = emptyTree { node = e , children = [emptyTree { node = DocumentFragment , children = childTrees }] } | otherwise = emptyTree { node = e , children = childTrees } where e = Element tag -- | Wrap the metadata contained in a metadata tag's attribute in the data -- types expected for an 'Attribute' node in the output of 'buildTree'; this -- will then be subsumed into the parent 'Element' node, and does not remain in -- the final document tree. packAttribute :: AttributeParams -> Tree packAttribute attr = emptyTree { node = Attribute attr }