{-| Description: Tokenization rules for characters which should not be processed further. Copyright: (c) 2020 Sam May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: stable Portability: portable -} module Web.Mangrove.Parse.Tokenize.RawText ( tokenRawText ) where import Web.Mangrove.Parse.Common.Error import Web.Mangrove.Parse.Tokenize.Common import Web.Mangrove.Parse.Tokenize.Tag import Web.Willow.Common.Encoding.Character -- | __HTML:__ -- @[RAWTEXT state] -- (https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state)@ -- -- The parsing instructions rooted in the 'RawTextState' section of the state -- machine. tokenRawText :: Tokenizer [TokenizerOutput Token] tokenRawText = tokenizers Nothing [ ifs_ (== '<') tokenAppropriateEndTagLessThanSign , ifs_ (== '\NUL') $ emit' ([UnexpectedNullCharacter], Character replacementChar) , elsesChar $ \c -> emit' ([], Character c) ]