{-# LANGUAGE OverloadedStrings #-} {-| Description: Token processing rules within non-HTML content. Copyright: (c) 2020 Sam May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: stable Portability: portable -} module Web.Mangrove.Parse.Tree.Foreign ( treeForeign ) where import qualified Control.Monad.Trans.State as N.S import qualified Data.HashMap.Strict as M import qualified Data.Maybe as Y import qualified Data.Text as T import Web.Willow.DOM import Web.Mangrove.Parse.Common.Error import Web.Mangrove.Parse.Tokenize.Common import Web.Mangrove.Parse.Tree.Common import Web.Mangrove.Parse.Tree.InBody import Web.Mangrove.Parse.Tree.Patch import Web.Willow.Common.Encoding.Character import Web.Willow.Common.Parser import Web.Willow.Common.Parser.Switch import {-# SOURCE #-} Web.Mangrove.Parse.Tree.Dispatcher -- | __HTML:__ -- @[the rules for parsing tokens in foreign content] -- (https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inforeign)@ -- -- The parsing instructions for non-HTML content, which follows a simpler -- recovery model more in line with HTML. treeForeign :: TreeBuilder TreeOutput treeForeign = next >>= switch [ If isNull $ \t' -> consTreeError UnexpectedNullCharacter <$> insertCharacter (mapTokenOut (const $ Character replacementChar) t') , If isWhitespace insertCharacter , If isCharacter $ \t' -> setFramesetNotOk *> insertCharacter t' , If isComment insertComment , If isDoctype $ \t' -> packTreeErrors [UnexpectedDoctype $ tokenDocumentType t'] t' , If isEOF $ \t' -> push t' *> treeInBody , If (isStartTag [ "b" , "big" , "blockquote" , "body" , "br" , "center" , "code" , "dd" , "div" , "dl" , "dt" , "em" , "embed" , "h1" , "h2" , "h3" , "h4" , "h5" , "h6" , "head" , "hr" , "i" , "img" , "li" , "listing" , "menu" , "meta" , "nobr" , "ol" , "p" , "pre" , "ruby" , "s" , "small" , "span" , "strong" , "strike" , "sub" , "sup" , "table" , "tt" , "u" , "ul" , "var" ]) htmlStartTag , If (isStartTag ["font"]) $ \t' -> if any (`elem` ["color", "face", "size"]) (map fst . M.toList . tokenAttributes $ tokenOut t') then htmlStartTag t' else anyOtherStartTag t' , If isAnyStartTag anyOtherStartTag , If (isEndTag ["script"]) $ \t' -> do current <- currentNode state <- N.S.get if (current >>= elementNamespace) == Just mathMLNamespace then closeCurrentNode t' <* closeSvgScript else anyOtherEndTag (openElements state) t' , If isAnyEndTag $ \t' -> do state <- N.S.get anyOtherEndTag (openElements state) t' ] where anyOtherStartTag t' = case tokenOut t' of StartTag d -> do current <- adjustedCurrentNode let ns = Y.fromMaybe htmlNamespace $ current >>= elementNamespace d' = case ns of ns' | mathMLNamespace == ns' -> adjustMathMLAttributes d ns' | svgNamespace == ns' -> adjustSvgAttributes $ case tagName d of "altglyph" -> d { tagName = "altGlyph" } "altglyphdef" -> d { tagName = "altGlyphDef" } "altglyphitem" -> d { tagName = "altGlyphItem" } "animatecolor" -> d { tagName = "animateColor" } "animatemotion" -> d { tagName = "animateMotion" } "animatetransform" -> d { tagName = "animateTransform" } "clippath" -> d { tagName = "clipPath" } "feblend" -> d { tagName = "feBlend" } "fecolormatrix" -> d { tagName = "feColorMatrix" } "fecomponenttransfer" -> d { tagName = "feComponentTransfer" } "fecomposite" -> d { tagName = "feComposite" } "feconvolvematrix" -> d { tagName = "feConvolveMatrix" } "fediffuselighting" -> d { tagName = "feDiffuseLighting" } "fedisplacementmap" -> d { tagName = "feDisplacementMap" } "fedistantlight" -> d { tagName = "feDistantLight" } "fedropshadow" -> d { tagName = "feDropShadow" } "feflood" -> d { tagName = "feFlood" } "fefunca" -> d { tagName = "feFuncA" } "fefuncb" -> d { tagName = "feFuncB" } "fefuncg" -> d { tagName = "feFuncG" } "fefuncr" -> d { tagName = "feFuncR" } "fegaussianblur" -> d { tagName = "feGaussianBlur" } "feimage" -> d { tagName = "feImage" } "femerge" -> d { tagName = "feMerge" } "femergenode" -> d { tagName = "feMergeNode" } "femorphology" -> d { tagName = "feMorphology" } "feoffset" -> d { tagName = "feOffset" } "fepointlight" -> d { tagName = "fePointLight" } "fespecularlighting" -> d { tagName = "feSpecularLighting" } "fespotlight" -> d { tagName = "feSpotLight" } "fetile" -> d { tagName = "feTile" } "feturbulence" -> d { tagName = "feTurbulence" } "foreignobject" -> d { tagName = "foreignObject" } "glyphref" -> d { tagName = "glyphRef" } "lineargradient" -> d { tagName = "linearGradient" } "radialgradient" -> d { tagName = "radialGradient" } "textpath" -> d { tagName = "textPath" } _ -> d _ -> d t'' = mapTokenOut (const $ StartTag d') t' if tagIsSelfClosing d' then if tagName d' == "script" && ns == svgNamespace then insertForeignNullElement ns t'' <* closeSvgScript else insertForeignNullElement ns t'' else insertForeignElement ns t'' _ -> packTreeErrors [] t' anyOtherEndTag [] t' = packTreeErrors [] t' anyOtherEndTag es@(e:_) t' = case tokenOut t' of EndTag d -> if T.map toAsciiLower (elementName $ snd e) == tagName d then loopEndTag d es t' else consTreeError UnexpectedElementWithImpliedEndTag <$> loopEndTag d es t' _ -> packTreeErrors [] t' loopEndTag d es t' = either id id <$> loopEndTag' d es t' loopEndTag' _ [] t' = Left <$> packTreeErrors [] t' loopEndTag' _ [_] t' = Left <$> packTreeErrors [] t' loopEndTag' d (e:es) t' | T.map toAsciiLower (elementName $ snd e) == tagName d = Right <$> closeCurrentNode t' | otherwise = case es of (e':_) | elementNamespace (snd e') == Just htmlNamespace -> push t' *> fmap Left dispatchHtml _ -> do recurse <- loopEndTag' d es t' case recurse of l@(Left _) -> return l Right clear -> do close <- closeCurrentNode_ return . Right $ close ++| clear tokenAttributes (StartTag d) = tagAttributes d tokenAttributes (EndTag d) = tagAttributes d tokenAttributes _ = M.empty htmlStartTag t' = do isFragment <- inFragment consTreeError UnexpectedHtmlElementInForeignContent <$> if isFragment then anyOtherStartTag t' else do push t' close <- closeCurrentNode_ clear <- clearToIntegration packTree_ $ close ++ clear clearToIntegration = do current <- currentNode let mathML = maybe False atMathMLIntegration current html = maybe False atHtmlIntegration current ns = Y.fromMaybe htmlNamespace $ current >>= elementNamespace if mathML || html || ns == htmlNamespace then return [] else do close <- closeCurrentNode_ clear <- clearToIntegration return $ close ++ clear closeSvgScript = undefined