{-|
Description:    Tokenization rules for characters which should not be processed further.

Copyright:      (c) 2020 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      stable
Portability:    portable
-}
module Web.Mangrove.Parse.Tokenize.RawText
    ( tokenRawText
    ) where


import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Tokenize.Common
import Web.Mangrove.Parse.Tokenize.Tag
import Web.Willow.Common.Encoding.Character


-- | __HTML:__
--      @[RAWTEXT state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state)@
-- 
-- The parsing instructions rooted in the 'RawTextState' section of the state
-- machine.
tokenRawText :: Tokenizer [TokenizerOutput Token]
tokenRawText :: Tokenizer [TokenizerOutput Token]
tokenRawText = Maybe [([ParseError], Token)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)]
-> Tokenizer [TokenizerOutput Token]
forall out.
Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers Maybe [([ParseError], Token)]
forall a. Maybe a
Nothing
    [ (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'<') Tokenizer [TokenizerOutput Token]
tokenAppropriateEndTagLessThanSign
    , (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\NUL') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([ParseError
UnexpectedNullCharacter], Char -> Token
Character Char
replacementChar)
    , (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
elsesChar ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ \Char
c -> ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([], Char -> Token
Character Char
c)
    ]