{-| Description: Token processing rules for spans of raw character strings. Copyright: (c) 2020-2021 Sam May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: stable Portability: portable -} module Web.Mangrove.Parse.Tree.InText ( treeInText , genericRawTextElement , genericRCDataElement ) where import qualified Control.Monad.Trans.State as N.S import qualified Data.Maybe as Y import Web.Mangrove.Parse.Common.Error import Web.Mangrove.Parse.Tokenize.Common import Web.Mangrove.Parse.Tree.Common import Web.Mangrove.Parse.Tree.Patch import Web.Willow.Common.Parser import Web.Willow.Common.Parser.Switch -- | __HTML:__ -- @[the "text" insertion mode] -- (https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata)@ -- -- The parsing instructions corresponding to the 'InText' section of the state -- machine. treeInText :: TreeBuilder TreeOutput treeInText = next >>= switch [ If isCharacter insertCharacter , If isEOF $ \t' -> do push t' N.S.modify $ \state -> state { insertionMode = Y.fromMaybe InText $ originalInsertionMode state , originalInsertionMode = Nothing } close <- consTreeError_ EOFInText <$> closeCurrentNode_ packTree_ close , If (isEndTag ["script"]) $ \t' -> do resetMode close <- closeCurrentNode_ packTree t' close , If isAnyEndTag $ \t' -> do resetMode closeCurrentNode t' ] -- | __HTML:__ -- @[generic raw text element parsing algorithm] -- (https://html.spec.whatwg.org/multipage/parsing.html#generic-raw-text-element-parsing-algorithm)@ -- -- Insert an element containing unescaped plain text. genericRawTextElement :: TreeInput -> TreeBuilder TreeOutput genericRawTextElement = genericParsingAlgorithm RawTextState -- | __HTML:__ -- @[generic RCDATA element parsing algorithm] -- (https://html.spec.whatwg.org/multipage/parsing.html#generic-rcdata-element-parsing-algorithm)@ -- -- Insert an element containing plain text, potentially with character -- references. genericRCDataElement :: TreeInput -> TreeBuilder TreeOutput genericRCDataElement = genericParsingAlgorithm RCDataState -- | __HTML:__ -- @[parsing elements that contain only text] -- (https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text)@ -- -- The actual algorithm described for 'genericRawTextElement' and -- 'genericRCDataElement', with all variables exported. genericParsingAlgorithm :: CurrentTokenizerState -> TreeInput -> TreeBuilder TreeOutput genericParsingAlgorithm mode t' = do N.S.modify $ \state -> state { insertionMode = InText , originalInsertionMode = Just $ insertionMode state } insertElement . mapTokenState t' $ \state -> state { currentState = mode }