{-| Description: Tokenization rules for characters which should be minimally processed. Copyright: (c) 2020-2021 Sam May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: stable Portability: portable -} module Web.Mangrove.Parse.Tokenize.RCData ( tokenRCData ) where import Web.Mangrove.Parse.Common.Error import Web.Mangrove.Parse.Tokenize.Common import Web.Mangrove.Parse.Tokenize.Character import Web.Mangrove.Parse.Tokenize.Tag import Web.Willow.Common.Encoding.Character -- | __HTML:__ -- @[RCDATA state] -- (https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state)@ -- -- The parsing instructions rooted in the 'RCDataState' section of the state -- machine. tokenRCData :: Tokenizer [TokenizerOutput Token] tokenRCData = tokenizers Nothing [ ifs_ (== '&') $ tokenCharacterReference False >>= flushCharRef , ifs_ (== '<') tokenAppropriateEndTagLessThanSign , ifs_ (== '\NUL') $ emit' ([UnexpectedNullCharacter], Character replacementChar) , elsesChar $ \c -> emit' ([], Character c) ]