{-| Description: Token processing rules for content misnested within a @\@. Copyright: (c) 2020-2021 Sam May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: stable Portability: portable -} module Web.Mangrove.Parse.Tree.InTableText ( treeInTableText ) where import qualified Data.Foldable as D import qualified Data.List as L import qualified Data.Tuple.HT as U.HT import Web.Mangrove.Parse.Common.Error import Web.Mangrove.Parse.Tokenize.Common import Web.Mangrove.Parse.Tree.Common import Web.Mangrove.Parse.Tree.InTable import Web.Mangrove.Parse.Tree.Patch import Web.Willow.Common.Encoding.Character import Web.Willow.Common.Parser import Web.Willow.Common.Parser.Switch import {-# SOURCE #-} Web.Mangrove.Parse.Tree.Dispatcher -- | __HTML:__ -- @[the "in table text" insertion mode] -- (https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intabletext)@ -- -- The parsing instructions corresponding to the 'InTableText' section of the -- state machine. treeInTableText :: TreeBuilder TreeOutput treeInTableText = do resetMode pending <- treeInTableText' let processF = if not $ all (maybe True isAsciiWhitespace . U.HT.snd3) pending then anythingElse else insertCharacter ps <- D.foldrM (repack processF) [] pending start <- packTree_ [] return $ L.foldr foldOut start ps where repack _ ([], Nothing, _) [] = return [] repack _ (errs, Nothing, _) [] = pure . consTreeErrors errs <$> dispatchHtml repack _ (errs, Nothing, _) (t:ts) = return $ consTreeErrors errs t : ts repack f (errs, Just c, state) ts = (: ts) <$> f (dummyStateToken errs (Character c) state) foldOut ps ps' = ps' { treePatches = treePatches ps ++ treePatches ps' } consTreeErrors = flip $ foldr consTreeError -- | __HTML:__ -- @[the "in table" insertion mode] -- (https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intable)@ -- -- The parsing instructions corresponding to the 'InTableText' section of the -- state machine. Specifically, this consumes all following 'Character' tokens -- to construct the list of [pending table character tokens] -- (https://html.spec.whatwg.org/multipage/parsing.html#concept-pending-table-char-tokens), -- while the surrounding 'treeInTableText' is what actually sends them to the -- 'InTable' state for insertion. treeInTableText' :: TreeBuilder [([ParseError], Maybe Char, TokenizerOutputState)] treeInTableText' = next >>= switch [ If isNull $ \t' -> do cs <- treeInTableText' return $ (UnexpectedNullCharacter : tokenErrs t', Nothing, tokenState t') : cs , If isCharacter $ \t' -> do cs <- treeInTableText' return $ (tokenErrs t', tokenCharacter t', tokenState t') : cs , Else $ \t' -> do push t' return [] ]