{-# HLINT ignore "Use ." #-} {-| Description: Tokenization rules for characters within @\@ ... @\@ sections. Copyright: (c) 2020 Sam May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: stable Portability: portable -} module Web.Mangrove.Parse.Tokenize.ScriptData ( tokenScriptData ) where import Web.Mangrove.Parse.Common.Error import Web.Mangrove.Parse.Tokenize.Common import Web.Mangrove.Parse.Tokenize.ScriptDataEscaped import Web.Mangrove.Parse.Tokenize.Tag import Web.Willow.Common.Encoding.Character -- | __HTML:__ -- @[script data state] -- (https://html.spec.whatwg.org/multipage/parsing.html#script-data-state)@ -- -- The parsing instructions rooted in the 'ScriptDataState' section of the -- state machine. tokenScriptData :: Tokenizer [TokenizerOutput Token] tokenScriptData = tokenizers Nothing [ ifs_ (== '<') tokenScriptDataLessThanSign , ifs_ (== '\NUL') $ emit' ([UnexpectedNullCharacter], Character replacementChar) , elsesChar $ \c -> emit' ([], Character c) ] -- | __HTML:__ -- @[script data less-than sign state] -- (https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state)@ -- -- The parsing instructions for after reading @"\<"@ in the 'ScriptDataState' -- section of the state machine. tokenScriptDataLessThanSign :: Tokenizer [TokenizerOutput Token] tokenScriptDataLessThanSign = tokenizers (Just [([], Character '<')]) [ ifs_ (== '/') tokenAppropriateEndTagOpen , ifs_ (== '!') $ consEmit ([], Character '<') . consEmit ([], Character '!') $ tokenScriptDataEscapeStart , elsePush_ $ emit' ([], Character '<') ]