{-|
Description:    Tokenization rules for characters which should not be processed further.

Copyright:      (c) 2020 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      stable
Portability:    portable
-}
module Web.Mangrove.Parse.Tokenize.RawText
    ( tokenRawText
    ) where


import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Tokenize.Common
import Web.Mangrove.Parse.Tokenize.Tag
import Web.Willow.Common.Encoding.Character


-- | __HTML:__
--      @[RAWTEXT state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state)@
-- 
-- The parsing instructions rooted in the 'RawTextState' section of the state
-- machine.
tokenRawText :: Tokenizer [TokenizerOutput Token]
tokenRawText = tokenizers Nothing
    [ ifs_ (== '<') tokenAppropriateEndTagLessThanSign
    , ifs_ (== '\NUL') $ emit' ([UnexpectedNullCharacter], Character replacementChar)
    , elsesChar $ \c -> emit' ([], Character c)
    ]