{-# LANGUAGE OverloadedStrings #-} {-| Description: Token processing rules within the @\@ section. Copyright: (c) 2020-2021 Sam May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: provisional Portability: portable -} module Web.Mangrove.Parse.Tree.InBody ( treeInBody ) where import qualified Control.Monad as N import qualified Control.Monad.Trans.State as N.S import qualified Data.HashMap.Strict as M import qualified Data.List as L import qualified Data.Maybe as Y import qualified Data.Text as T import Web.Willow.DOM import Web.Mangrove.Parse.Common.Error import Web.Mangrove.Parse.Tokenize.Common import Web.Mangrove.Parse.Tree.Common import Web.Mangrove.Parse.Tree.InHead import Web.Mangrove.Parse.Tree.InTemplate import Web.Mangrove.Parse.Tree.InText import Web.Mangrove.Parse.Tree.Patch import Web.Willow.Common.Parser import Web.Willow.Common.Parser.Switch import {-# SOURCE #-} Web.Mangrove.Parse.Tree.Dispatcher import Control.Applicative ( (<|>) ) import Data.Functor ( ($>) ) -- | __HTML:__ -- @[the "in body" insertion mode] -- (https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody)@ -- -- The parsing instructions corresponding to the 'InBody' section of the state -- machine. treeInBody :: TreeBuilder TreeOutput treeInBody = next >>= switch [ If isNull $ packTreeErrors [UnexpectedNullCharacter] , If isWhitespace $ \t' -> do format <- reconstructFormattingElements insert <- insertCharacter t' return $ format ++| insert , If isCharacter $ \t' -> do setFramesetNotOk format <- reconstructFormattingElements insert <- insertCharacter t' return $ format ++| insert , If isComment insertComment , If isDoctype $ \t' -> packTreeErrors [UnexpectedDoctype $ tokenDocumentType t'] t' , If (isStartTag ["html"]) $ \t' -> do errs <- packTreeErrors [NestedSingletonElement] t' elements <- openElements <$> N.S.get if Y.isJust $ L.find (nodeIsElement "template" . snd) elements then return errs else case reverse elements of [] -> return errs ((i, _):_) -> do add <- mapM (addAttribute InHtmlElement i) . M.toList . tagAttributes $ tokenTag t' return $ errs |++ concat add , If (isStartTag [ "base" , "basefont" , "bgsound" , "link" , "meta" , "noframes" , "script" , "style" , "template" , "title" ]) $ \t' -> do push t' treeInHead , If (isEndTag ["template"]) $ \t' -> do push t' treeInHead , If (isStartTag ["body"]) $ \t' -> do errs <- packTreeErrors [NestedSingletonElement] t' elements <- openElements <$> N.S.get case reverse elements of [] -> return errs [_] -> return errs es | Y.isJust $ L.find (nodeIsElement "template" . snd) es -> return errs (_:(i, e):es) | not (nodeIsElement "body" e) -> return errs | otherwise -> do setFramesetNotOk add <- mapM (addAttribute (RelativeLocation . fromIntegral $ length es) i) . M.toList . tagAttributes $ tokenTag t' return $ errs |++ concat add , If (isStartTag ["frameset"]) $ \t' -> do state <- N.S.get let elements = map snd $ openElements state err = FramesetInBody $ tokenElement t' errs <- packTreeErrors [err] t' case reverse elements of [] -> return errs [_] -> return errs (_:e:_) | not (nodeIsElement "body" e) -> return errs _ | not (framesetOk state) -> return errs _ -> do clear <- N.replicateM (length elements - 1) dropCurrentNode insert <- insertElement t' switchMode InFrameset return . consTreeError err $ concat clear ++| insert , If isEOF $ \t' -> do modes <- templateInsertionModes <$> N.S.get if null modes then do hasUnexpected <- hasUnexpectedOpenElement if hasUnexpected then consTreeError UnexpectedElementWithImpliedEndTag <$> stopParsing t' else stopParsing t' else do push t' treeInTemplate , If (isEndTag ["body"]) $ \t' -> do switchMode AfterBody hasBody <- hasInScope ["body"] hasUnexpected <- hasUnexpectedOpenElement if hasBody then if hasUnexpected then packTreeErrors [UnexpectedElementWithImpliedEndTag] t' else packTreeErrors [] t' else packTreeErrors [UnmatchedEndTag $ tokenElement t'] t' , If (isEndTag ["html"]) $ \t' -> do push t' switchMode AfterBody hasBody <- hasInScope ["body"] hasUnexpected <- hasUnexpectedOpenElement if hasBody then if hasUnexpected then consTreeError UnexpectedElementWithImpliedEndTag <$> dispatchHtml else packTreeErrors_ [] else consTreeError (UnmatchedEndTag $ tokenElement t') <$> dispatchHtml , If (isStartTag [ "address" , "article" , "aside" , "blockquote" , "center" , "details" , "dialog" , "dir" , "div" , "dl" , "fieldset" , "figcaption" , "figure" , "footer" , "header" , "hgroup" , "main" , "menu" , "nav" , "ol" , "p" , "section" , "summary" , "ul" ]) $ \t' -> do hasP <- hasInButtonScope ["p"] if hasP then do close <- closePElement insert <- insertElement t' return $ close ++| insert else insertElement t' , If (isStartTag $ map T.unpack headerNames) $ \t' -> do current <- currentNode hasP <- hasInButtonScope ["p"] close <- if hasP then closePElement else return [] nest <- case fmap elementName current of Just h | elem h headerNames -> consTreeError_ OverlappingHeaderElements <$> closeCurrentNode_ _ -> return [] insert <- insertElement t' return $ close ++ nest ++| insert , If (isStartTag ["pre", "listing"]) $ \t' -> do lineFeed <- next lineFeedState <- if tokenOut lineFeed == Character '\n' then return $ tokenState lineFeed else push lineFeed $> Nothing setFramesetNotOk hasP <- hasInButtonScope ["p"] close <- if hasP then closePElement else return [] insert <- insertElement $ mapTokenState' t' (lineFeedState <|>) return $ close ++| insert , If (isStartTag ["form"]) $ \t' -> do state <- N.S.get let hasTemplate = any (nodeIsElement "template" . snd) $ openElements state if hasTemplate || Y.isJust (formElementPointer state) then packTreeErrors [] t' else do hasP <- hasInButtonScope ["p"] close <- if hasP then closePElement else return [] N.S.modify $ \state' -> state' { formElementPointer = Just $ elementIndex state' } insert <- insertElement t' return $ close ++| insert , If (isStartTag ["li"]) $ \t' -> do setFramesetNotOk state <- N.S.get clear <- liLoop $ openElements state hasP <- hasInButtonScope ["p"] close <- if hasP then closePElement else return [] insert <- insertElement t' return $ clear ++ close ++| insert , If (isStartTag ["dd", "dt"]) $ \t' -> do setFramesetNotOk state <- N.S.get clear <- ddLoop $ openElements state hasP <- hasInButtonScope ["p"] close <- if hasP then closePElement else return [] insert <- insertElement t' return $ clear ++ close ++| insert , If (isStartTag ["plaintext"]) $ \t' -> do hasP <- hasInButtonScope ["p"] close <- if hasP then closePElement else return [] insert <- insertElement . mapTokenState t' $ \state -> state { currentState = PlainTextState } return $ close ++| insert , If (isStartTag ["button"]) $ \t' -> do hasButton <- hasInScope ["button"] nested <- if hasButton then do generate <- generateEndTags impliedEndTags clear <- closeElement "button" return . consTreeError_ NestedNonRecursiveElement $ generate ++ clear else return [] format <- reconstructFormattingElements insert <- insertElement t' setFramesetNotOk return $ nested ++ format ++| insert , If (isEndTag [ "address" , "article" , "aside" , "blockquote" , "button" , "center" , "details" , "dialog" , "dir" , "div" , "dl" , "fieldset" , "figcaption" , "figure" , "footer" , "header" , "hgroup" , "listing" , "main" , "menu" , "nav" , "ol" , "pre" , "section" , "summary" , "ul" ]) $ \t' -> do let d = tokenTag t' hasMatch <- hasInScope [tagName d] if hasMatch then do generate <- generateEndTags impliedEndTags current <- currentNode let errF = if maybe True (nodeIsElement $ tagName d) current then id else consTreeError_ UnexpectedElementWithImpliedEndTag clear <- closeElement $ tagName d packTree t' $ generate ++ errF clear else packTreeErrors [UnmatchedEndTag $ tokenElement t'] t' , If (isEndTag ["form"]) $ \t' -> do state <- N.S.get if Y.isJust . L.find (nodeIsElement "template" . snd) $ openElements state then do hasTemplate <- hasInScope ["template"] if hasTemplate then do generate <- generateEndTags impliedEndTags current <- currentNode let errF = if maybe True (nodeIsElement "form") current then id else consTreeError_ UnexpectedElementWithImpliedEndTag clear <- closeElement "form" packTree t' $ generate ++ errF clear else packTreeErrors [] t' else do let formElement = formElementPointer state N.S.put $ state { formElementPointer = Nothing } hasFormElement <- maybe (return False) hasIndexInScope formElement if hasFormElement then do generate <- generateEndTags impliedEndTags let current = maybe 0 fst . Y.listToMaybe $ openElements state es = takeWhile ((/=) (Y.fromMaybe current formElement) . fst) $ openElements state errF = if null es then id else consTreeError_ UnexpectedElementWithImpliedEndTag close <- closeAncestorNode_ . fromIntegral $ length es packTree t' $ generate ++ errF close else packTreeErrors [UnmatchedEndTag $ tokenElement t'] t' , If (isEndTag ["p"]) $ \t' -> do hasP <- hasInButtonScope ["p"] ps <- if hasP then closePElement else fmap (consTreeError_ . UnmatchedEndTag $ tokenElement t') . insertNullElement_ $ emptyTagParams { tagName = "p" } packTree t' ps , If (isEndTag ["li"]) $ \t' -> do hasLi <- hasInListItemScope ["li"] if hasLi then do generate <- generateEndTags $ L.delete "li" impliedEndTags current <- currentNode let errF = if maybe True (nodeIsElement "li") current then id else consTreeError_ UnexpectedElementWithImpliedEndTag clear <- closeElement "li" packTree t' $ generate ++ errF clear else packTreeErrors [UnmatchedEndTag $ tokenElement t'] t' , If (isEndTag ["dd", "dt"]) $ \t' -> do let d = tokenTag t' hasMatch <- hasInScope [tagName d] if hasMatch then do generate <- generateEndTags $ L.delete (tagName d) impliedEndTags current <- currentNode let errF = if maybe True (nodeIsElement $ tagName d) current then id else consTreeError_ UnexpectedElementWithImpliedEndTag clear <- closeElement $ tagName d packTree t' $ generate ++ errF clear else packTreeErrors [UnmatchedEndTag $ tokenElement t'] t' , If (isEndTag $ map T.unpack headerNames) $ \t' -> do let d = tokenTag t' hasMatch <- hasInScope headerNames if hasMatch then do generate <- generateEndTags $ L.delete (tagName d) impliedEndTags current <- currentNode let errF = if maybe True (nodeIsElement $ tagName d) current then id else consTreeError_ UnexpectedElementWithImpliedEndTag clear <- closeElements headerNames packTree t' $ generate ++ errF clear else packTreeErrors [UnmatchedEndTag $ tokenElement t'] t' , If (isStartTag ["a"]) $ \t' -> do state <- N.S.get let formatting = activeFormattingElements state active = Y.fromMaybe [] $ Y.listToMaybe formatting nested <- case L.find (\f -> tagName (snd f) == "a") active of Just (i, _) -> do adopt <- consTreeError NestedElementForAdoptionAgency <$> runAdoptionAgency t' open' <- openElements <$> N.S.get close <- case break ((==) i . fst) open' of (_, []) -> return [] (es1, _) -> do N.S.modify $ \state' -> state' { activeFormattingElements = case activeFormattingElements state' of [] -> [] (es:ess) -> filter ((/=) i . fst) es : ess } closeAncestorNode_ . fromIntegral $ length es1 return $ adopt |++ close Nothing -> packTreeErrors [] t' format <- reconstructFormattingElements insert <- insertFormattingElement t' return $ nested |++| format ++| insert , If (isStartTag [ "b" , "big" , "code" , "em" , "font" , "i" , "s" , "small" , "strike" , "strong" , "tt" , "u" ]) $ \t' -> do format <- reconstructFormattingElements insert <- insertFormattingElement t' return $ format ++| insert , If (isStartTag ["nobr"]) $ \t' -> do format <- reconstructFormattingElements hasNobr <- hasInScope ["nobr"] nested <- if hasNobr then do --BUG: Adoption agency + insertion may duplicate the errors? adopt <- consTreeError NestedElementForAdoptionAgency <$> runAdoptionAgency t' format' <- reconstructFormattingElements return $ adopt |++ format' else packTreeErrors [] t' insert <- insertFormattingElement t' return $ format ++| nested |++| insert , If (isEndTag [ "a" , "b" , "big" , "code" , "em" , "font" , "i" , "nobr" , "s" , "small" , "strike" , "strong" , "tt" , "u" ]) runAdoptionAgency , If (isStartTag ["applet", "marquee", "object"]) $ \t' -> do setFramesetNotOk format <- reconstructFormattingElements insertFormattingMarker insert <- insertElement t' return $ format ++| insert , If (isEndTag ["applet", "marquee", "object"]) $ \t' -> do let d = tokenTag t' hasMatch <- hasInScope [tagName d] if hasMatch then do generate <- generateEndTags impliedEndTags current <- currentNode let errF = if maybe True (nodeIsElement $ tagName d) current then id else consTreeError_ UnexpectedElementWithImpliedEndTag clear <- closeElement $ tagName d clearFormattingElements packTree t' $ generate ++ errF clear else packTreeErrors [UnmatchedEndTag $ tokenElement t'] t' , If (isStartTag ["table"]) $ \t' -> do hasP <- hasInButtonScope ["p"] quirks <- (== FullQuirks) . quirksMode <$> N.S.get clear <- if hasP && not quirks then closePElement else return [] insert <- insertElement t' setFramesetNotOk switchMode InTable return $ clear ++| insert , If (isEndTag ["br"]) $ \t' -> do push . mapTokenErrs (BREndTag :) . flip mapTokenOut t' $ const (StartTag $ (tokenTag t') { tagAttributes = M.empty }) treeInBody , If (isStartTag ["area", "br", "embed", "img", "keygen", "wbr"]) $ \t' -> do setFramesetNotOk format <- reconstructFormattingElements insert <- insertNullElement t' return $ format ++| insert , If (isStartTag ["input"]) $ \t' -> do case fmap snd . L.find (\a -> fst a == "type") . M.toList . tagAttributes $ tokenTag t' of Nothing -> setFramesetNotOk Just "hidden" -> return () Just _ -> setFramesetNotOk format <- reconstructFormattingElements insert <- insertNullElement t' return $ format ++| insert , If (isStartTag ["param", "source", "track"]) insertNullElement , If (isStartTag ["hr"]) $ \t' -> do setFramesetNotOk hasP <- hasInButtonScope ["p"] close <- if hasP then closePElement else return [] insert <- insertNullElement t' return $ close ++| insert , If (isStartTag ["image"]) $ \t' -> do push . mapTokenErrs (ObsoleteTagName "image" :) . flip mapTokenOut t' $ const (StartTag $ (tokenTag t') { tagName = "img" }) treeInBody , If (isStartTag ["textarea"]) $ \t' -> do lineFeed <- next lineFeedState <- if tokenOut lineFeed == Character '\n' then return $ tokenState lineFeed else push lineFeed $> Nothing setFramesetNotOk genericRCDataElement $ mapTokenState' t' (lineFeedState <|>) , If (isStartTag ["xmp"]) $ \t' -> do setFramesetNotOk hasP <- hasInButtonScope ["p"] close <- if hasP then closePElement else return [] format <- reconstructFormattingElements text <- genericRawTextElement t' return $ close ++ format ++| text , If (isStartTag ["iframe"]) $ \t' -> do setFramesetNotOk genericRawTextElement t' , If (isStartTag ["noembed"]) genericRawTextElement , If (isStartTag ["noscript"]) $ \t' -> do state <- N.S.get if scriptingEnabled state then genericRawTextElement t' else do -- "any other start tag" format <- reconstructFormattingElements insert <- insertElement t' return $ format ++| insert , If (isStartTag ["select"]) $ \t' -> do setFramesetNotOk state <- N.S.get switchMode $ if elem (insertionMode state) [ InTable , InCaption , InTableBody , InRow , InCell ] then InSelectInTable else InSelect format <- reconstructFormattingElements insert <- insertElement t' return $ format ++| insert , If (isStartTag ["optgroup", "option"]) $ \t' -> do current <- currentNode format <- case fmap elementName current of Just "option" -> do close <- closeCurrentNode_ format' <- reconstructFormattingElements return $ close ++ format' _ -> reconstructFormattingElements insert <- insertElement t' return $ format ++| insert , If (isStartTag ["rb", "rtc"]) $ \t' -> do hasRuby <- hasInScope ["ruby"] generate <- if hasRuby then do generate' <- generateEndTags impliedEndTags current <- currentNode let errF = if maybe True (nodeIsElement "ruby") current then id else consTreeError_ UnexpectedElementWithImpliedEndTag return $ errF generate' else return [] insert <- insertElement t' return $ generate ++| insert , If (isStartTag ["rp", "rt"]) $ \t' -> do hasRuby <- hasInScope ["ruby"] generate <- if hasRuby then do generate' <- generateEndTags $ L.delete "rtc" impliedEndTags current <- currentNode let errF = if maybe True (\e -> nodeIsElement "ruby" e || nodeIsElement "rtc" e) current then id else consTreeError_ UnexpectedElementWithImpliedEndTag return $ errF generate' else return [] insert <- insertElement t' return $ generate ++| insert , If (isStartTag ["math"]) $ \t' -> do let d = tokenTag t' insertF | tagIsSelfClosing d = insertForeignNullElement | otherwise = insertForeignElement format <- reconstructFormattingElements insert <- insertF mathMLNamespace $ mapTokenOut (const . StartTag $ adjustMathMLAttributes d) t' return $ format ++| insert , If (isStartTag ["svg"]) $ \t' -> do let d = tokenTag t' insertF | tagIsSelfClosing d = insertForeignNullElement | otherwise = insertForeignElement format <- reconstructFormattingElements insert <- insertF svgNamespace $ mapTokenOut (const . StartTag $ adjustSvgAttributes d) t' return $ format ++| insert , If (isStartTag [ "caption" , "col" , "colgroup" , "frame" , "head" , "tbody" , "td" , "tfoot" , "th" , "thead" , "tr" ]) $ \t' -> packTreeErrors [UnexpectedDescendantElement $ tokenElement t'] t' , If isAnyStartTag $ \t' -> do format <- reconstructFormattingElements insert <- insertElement t' return $ format ++| insert , If isAnyEndTag $ \t' -> do state <- N.S.get anyOtherEndTag (openElements state) t' ] where headerNames = [ T.snoc "h" i | i <- ['1'..'6'] ] hasUnexpectedOpenElement = hasOpenElementExcept [ "dd" , "dt" , "li" , "optgroup" , "option" , "p" , "rb" , "rp" , "rt" , "rtc" , "tbody" , "td" , "tfoot" , "th" , "thead" , "tr" , "body" , "html" ] -- | __HTML:__ -- the "any other end tag" entry in @[the "in body" insertion mode] -- (https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody)@ -- -- Delegate a token to to the 'InBody' section of the state machine, but skip -- the token-dependent behaviour and instead simply treat it according to the -- fallback case for 'EndTag' tokens. anyOtherEndTag :: [(NodeIndex, ElementParams)] -- ^ The stack of open elements. -> TreeInput -- ^ The token to process. -> TreeBuilder TreeOutput anyOtherEndTag [] t' = packTreeErrors [] t' anyOtherEndTag ((i, e):es) t' = case tokenOut t' of EndTag d | nodeIsElement (tagName d) e -> do generate <- generateEndTags $ L.delete (tagName d) impliedEndTags elementIndices <- map fst . openElements <$> N.S.get let errF = if Y.listToMaybe elementIndices == Just i then id else consTreeError_ UnexpectedElementWithImpliedEndTag count = length (takeWhile (/= i) elementIndices) + 1 clear <- clearCount $ fromIntegral count packTree t' $ generate ++ errF clear EndTag _ -> if nodeIsSpecial e then packTreeErrors [UnexpectedElementWithImpliedEndTag] t' else anyOtherEndTag es t' _ -> packTreeErrors [] t' -- | Check whether a node is in 'scopeElements' -- the tags which break -- open-tag searches. isScopeElement :: ElementParams -> Bool isScopeElement d' = elem (Y.fromMaybe T.empty $ elementNamespace d', elementName d') scopeElements -- | Check whether a node is in 'specialElements' -- the tags which are subject -- to custom handling. isSpecialElement :: ElementParams -> Bool isSpecialElement d' = elem (Y.fromMaybe T.empty $ elementNamespace d', elementName d') specialElements -- | __HTML:__ -- the substeps spanned by the /"Loop"/ for processing @\@ and -- @\@ tags in @[the "in body" insertion mode] -- (https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody)@ -- -- @\@ and @\@ elements should close most opened elements if the -- relevant end tag is left implied. ddLoop :: [(NodeIndex, ElementParams)] -- ^ The stack of open elements. -> TreeBuilder [Patch] ddLoop [] = return [] ddLoop ((_, e):es) | nodeIsElement "dd" e = do generate <- generateEndTags $ L.delete "dd" impliedEndTags current <- currentNode let err = case current of Just e' | not $ nodeIsElement "dd" e' -> consTreeError_ UnexpectedElementWithImpliedEndTag _ -> id clear <- closeElement "dd" return . err $ generate ++ clear | nodeIsElement "dt" e = do generate <- generateEndTags $ L.delete "dt" impliedEndTags current <- currentNode let err = case current of Just e' | not $ nodeIsElement "dt" e' -> consTreeError_ UnexpectedElementWithImpliedEndTag _ -> id clear <- closeElement "dt" return . err $ generate ++ clear | nodeIsElement "address" e = ddLoop es | nodeIsElement "div" e = ddLoop es | nodeIsElement "p" e = ddLoop es | isSpecialElement e = return [] | otherwise = ddLoop es -- | __HTML:__ -- the substeps spanned by the /"Loop"/ for processing @\@ tags in -- @[the "in body" insertion mode] -- (https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody)@ -- -- @\@ elements should close most opened elements if the relevant end tag -- is left implied. liLoop :: [(NodeIndex, ElementParams)] -- ^ The stack of open elements. -> TreeBuilder [Patch] liLoop [] = return [] liLoop ((_, e):es) | nodeIsElement "li" e = do generate <- generateEndTags $ L.delete "li" impliedEndTags current <- currentNode let err = case current of Just e' | not $ nodeIsElement "li" e' -> consTreeError_ UnexpectedElementWithImpliedEndTag _ -> id clear <- closeElement "li" return . err $ generate ++ clear | nodeIsElement "address" e = liLoop es | nodeIsElement "div" e = liLoop es | nodeIsElement "p" e = liLoop es | isSpecialElement e = return [] | otherwise = liLoop es -- | __HTML:__ -- @[adoption agency algorithm] -- (https://html.spec.whatwg.org/multipage/parsing.html#adoption-agency-algorithm)@ -- -- Handle a misnested formatting element by closing its ancestors up to a -- stable branch point, and then reconstructing the tree in a more logical -- order. runAdoptionAgency :: TreeInput -> TreeBuilder TreeOutput runAdoptionAgency t' = case tokenOut t' of StartTag d -> runAdoptionAgency' d EndTag d -> runAdoptionAgency' d _ -> packTreeErrors [] t' where runAdoptionAgency' d = do current <- currentNode index <- currentNodeIndex active <- concat . activeFormattingElements <$> N.S.get let isCurrent = maybe False (nodeIsElement $ tagName d) current inActive = Y.isJust $ L.find (\(i, _) -> Just i == index) active if isCurrent && not inActive then closeCurrentNode_ >>= packTree t' else runAdoptionAgencyOuterLoop 8 t' d -- | __HTML:__ -- the substeps spanned by the /"Outer loop"/ within the -- @[adoption agency algorithm] -- (https://html.spec.whatwg.org/multipage/parsing.html#adoption-agency-algorithm)@ -- -- Determine whether the current formatting element close tag is mis-nested, -- and if so, close the element and reconstruct an equivalent formatting tree. runAdoptionAgencyOuterLoop :: Word -- ^ The maximum number of times the loop should be run, to avoid -- overly-costly input. Note that this implementation counts down -- rather than up, and so this function should generally be passed @8@ -- when it is originally called to implement the official behaviour. -> TreeInput -- ^ The token to process. -> TagParams -- ^ The inner element data extracted from the second parameter. -> TreeBuilder TreeOutput -- 4. runAdoptionAgencyOuterLoop 0 t' _ = packTreeErrors [] t' runAdoptionAgencyOuterLoop i t' d = do _ <- error "Adoption agency not yet implemented" -- 6. state <- N.S.get let formatting = activeFormattingElements state open = openElements state case L.find (\f -> tagName (snd $ snd f) == tagName d) . zip [0..] $ concat formatting of Nothing -> anyOtherEndTag open t' Just (indexFormat, formattingElement) -> do let removeFormattingElement = N.S.put $ state { activeFormattingElements = map (L.delete formattingElement) formatting } -- 7. case L.elemIndex (fst formattingElement) (map fst open) of Nothing -> do removeFormattingElement packTreeErrors [IncompletelyClosedFormattingElement] t' Just indexOpen -> do -- 8. Make use of the fact we can get an index rather than -- just a 'Bool' when checking if the formatting element is -- in the stack of open elements, to check if it's in -- scope ('hasInScope' only checks the tag name, not UID). let descendants = take indexOpen open formattingNode = head $ drop indexOpen open if any (isScopeElement . snd) descendants then packTreeErrors [UnexpectedFormattingElementOutOfScope] t' else do -- 9. let overlap = if maybe False (fst formattingElement ==) . Y.listToMaybe $ map fst open then consTreeError_ OverlappingFormattingElements else id -- 10. case L.find (isSpecialElement . snd) $ reverse descendants of Nothing -> do -- 11. clear <- clearCount $ fromIntegral indexOpen close <- closeCurrentNode_ removeFormattingElement packTree t' $ overlap clear ++ close Just furthestBlock -> do -- 14. (bookmark, lastTag) <- runAdoptionAgencyInnerLoop 3 indexFormat formattingElement furthestBlock furthestBlock . drop 1 $ dropWhile (/= furthestBlock) open -- 15. reparent <- case L.elemIndex (fst lastTag) $ map fst open of Nothing -> return [] Just indexInner -> if indexInner < indexOpen then closeAncestorNodes_ (fromIntegral indexInner) (fromIntegral $ indexOpen - indexInner) else return [] -- 16. newElement <- createElement $ snd formattingNode let newTag = unpackNodeData <$> newElement N.S.modify $ \state' -> state' -- 19. { activeFormattingElements = let findBookmark _ [] = [] findBookmark bookmark' (es:ess) | bookmark' > l = es : findBookmark (bookmark' - l) ess | otherwise = (ds ++ newTag : drop 1 as) : ess where l = length es (ds, as) = splitAt bookmark' es in findBookmark bookmark formatting -- 20. , openElements = let es = L.delete formattingNode open (ds, as) = break (== furthestBlock) es in ds ++ newElement : as } -- 21. recurse <- runAdoptionAgencyOuterLoop (i - 1) t' d packTree t' $ overlap reparent ++ treePatches recurse -- | __HTML:__ -- the substeps spanned by the /"Inner loop"/ within the -- @[adoption agency algorithm] -- (https://html.spec.whatwg.org/multipage/parsing.html#adoption-agency-algorithm)@ -- -- Close ancestor elements until reaching a stable node for the new tree, and -- then reconstruct an equivalent tree. runAdoptionAgencyInnerLoop :: Word -- ^ __HTML:__ -- @inner loop counter@ -- -- The maximum number of times the loop should be run, to avoid -- overly-costly input. Note that this implementation counts down -- rather than up, and so this function should generally be passed @3@. -> Int -- ^ __HTML:__ -- the bookmark noting the position of @formatting element@ -> (NodeIndex, TagParams) -- ^ __HTML:__ -- @formatting element@ -- -- The node selected as the base of the reparenting process. -> (NodeIndex, ElementParams) -- ^ __HTML:__ -- @last node@ -- -- The child node actively being reparented. -> (NodeIndex, ElementParams) -- ^ __HTML:__ -- @furthest block@ -- -- The most senior descendant of @formatting element@ which will serve -- as a secondary fixed point. -> [(NodeIndex, ElementParams)] -- ^ The stack of open elements; @inner node@ is derived from this. -> TreeBuilder (Int, (NodeIndex, TagParams)) runAdoptionAgencyInnerLoop _ bookmark formattingElement _ _ [] = return (bookmark, formattingElement) runAdoptionAgencyInnerLoop i bookmark formattingElement lastNode furthestBlock (innerNode:ns) -- 4. | fst formattingElement == fst innerNode = return (bookmark, formattingElement) | otherwise = do state <- N.S.get let formatting = activeFormattingElements state case L.elemIndex innerTag $ concat formatting of Just innerIndex -> if i == 0 -- 5. / 6. then do N.S.modify $ \state' -> state' --BUG: Not reflected in the patch list. { openElements = L.delete innerNode $ openElements state' , activeFormattingElements = map (L.delete innerTag) formatting } let bookmark' | innerIndex <= bookmark = pred bookmark | otherwise = bookmark runAdoptionAgencyInnerLoop i' bookmark' formattingElement lastNode furthestBlock ns else do -- 7. newElement <- createElement $ snd innerNode let newTag = unpackNodeData <$> newElement N.S.modify $ \state' -> state' { openElements = replaceNode newElement innerNode $ openElements state' , activeFormattingElements = map (replaceNode newTag innerTag) formatting } -- 8. let bookmark' | lastNode == furthestBlock = maybe bookmark succ . L.elemIndex newTag $ concat formatting | otherwise = bookmark -- 10. / 11. runAdoptionAgencyInnerLoop i' bookmark' formattingElement innerNode furthestBlock ns -- 6. Nothing -> do N.S.modify $ \state' -> state' --BUG: Not reflected in the patch list. { openElements = L.delete innerNode $ openElements state' } runAdoptionAgencyInnerLoop i' bookmark formattingElement lastNode furthestBlock ns where i' = if i == 0 then 0 else i - 1 replaceNode newElement innerElement es = ds ++ newElement : drop 1 as where (ds, as) = break (== innerElement) es innerTag = unpackNodeData <$> innerNode -- | Retrieve the data from the finalized form of the collection, and repack it -- in the form expected by some of the parser combinators used by the -- @runAdoptionAgency@ algorithm. unpackNodeData :: ElementParams -> TagParams unpackNodeData d = emptyTagParams { tagName = case elementPrefix d of Just prefix -> prefix <> ":" <> elementName d Nothing -> elementName d , tagAttributes = M.fromList . map unpackAttribute . toAttrList $ elementAttributes d -- Don't have to worry about self-closing tags, as if they're taking part -- in the adoption agency algorithm, they must have been able to have -- children. } where unpackAttribute d' = case attrPrefix d' of Just prefix -> (prefix <> ":" <> attrName d', attrValue d') Nothing -> (attrName d', attrValue d')