{-| Description: Tokenization rules for characters within @\<[CDATA[@ ... @]]\>@ sections. Copyright: (c) 2020-2021 Sam May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: stable Portability: portable -} module Web.Mangrove.Parse.Tokenize.CData ( tokenCDataSection ) where import Web.Mangrove.Parse.Common.Error import Web.Mangrove.Parse.Tokenize.Common -- | __HTML:__ -- @[CDATA section state] -- (https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state)@ -- -- The parsing instructions rooted in the 'CDataState' section of the state -- machine. tokenCDataSection :: Tokenizer [TokenizerOutput Token] tokenCDataSection = tokenizers (Just [([EOFInCData], EndOfStream)]) [ ifs_ (== ']') tokenCDataSectionBracket , elsesChar $ \c -> emit' ([], Character c) ] -- | __HTML:__ -- @[CDATA section bracket state] -- (https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-bracket-state)@ -- -- The parsing instructions for after reading @"]"@ in the 'CDataState' section -- of the state machine. tokenCDataSectionBracket :: Tokenizer [TokenizerOutput Token] tokenCDataSectionBracket = tokenizers (Just [([], Character ']'), ([EOFInCData], EndOfStream)]) [ ifs_ (== ']') tokenCDataSectionEnd , elsePush_ $ emit' ([], Character ']') ] -- | __HTML:__ -- @[CDATA section end state] -- (https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state)@ -- -- The parsing instructions for after reading @"]]"@ in the 'CDataState' -- section of the state machine. tokenCDataSectionEnd :: Tokenizer [TokenizerOutput Token] tokenCDataSectionEnd = tokenCDataSectionEnd' >>= \ts -> case tokenizedOut ts of [] -> return [] (t':ts') -> emits (tokenizedState ts) $ (tokenizedErrs ts, Character t') : [([], Character t) | t <- ts'] where emits state ts = finalStateList state <$> mapM emit ts -- | Loop within the __HTML__ @[CDATA section end state] -- (https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state)@ -- to read and re-emit any number of @']'@ 'Char's before the closing @"]]\>"@. tokenCDataSectionEnd' :: Tokenizer (TokenizerOutput String) tokenCDataSectionEnd' = tokenizer (Just ([EOFInCData], "]]")) [ if_ (== ']') $ consOut ']' <$> tokenCDataSectionEnd' , if_ (== '>') $ changeState DataState *> packToken ([], "") , elsePush_ $ packToken ([], "]]") ]