{-|
Description:    Tokenization rules for characters which should not be processed further, without an exit condition.

Copyright:      (c) 2020 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      stable
Portability:    portable
-}
module Web.Mangrove.Parse.Tokenize.PlainText
    ( tokenPlainText
    ) where


import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Tokenize.Common
import Web.Willow.Common.Encoding.Character


-- | __HTML:__
--      @[PLAINTEXT state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state)@
-- 
-- The parsing instructions rooted in the 'PlainTextState' section of the state
-- machine.
tokenPlainText :: Tokenizer (TokenizerOutput Token)
tokenPlainText :: Tokenizer (TokenizerOutput Token)
tokenPlainText = Maybe ([ParseError], Token)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput Token)]
-> Tokenizer (TokenizerOutput Token)
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer Maybe ([ParseError], Token)
forall a. Maybe a
Nothing
    [ (Char -> Bool)
-> Tokenizer (TokenizerOutput Token)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput Token)
forall out.
(Char -> Bool)
-> Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
if_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\NUL') (Tokenizer (TokenizerOutput Token)
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutput Token))
-> Tokenizer (TokenizerOutput Token)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token) -> Tokenizer (TokenizerOutput Token)
emit ([ParseError
UnexpectedNullCharacter], Char -> Token
Character Char
replacementChar)
    , (Char -> Tokenizer (TokenizerOutput Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput Token)
forall out.
(Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
elseChar ((Char -> Tokenizer (TokenizerOutput Token))
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutput Token))
-> (Char -> Tokenizer (TokenizerOutput Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput Token)
forall a b. (a -> b) -> a -> b
$ \Char
c -> ([ParseError], Token) -> Tokenizer (TokenizerOutput Token)
emit ([], Char -> Token
Character Char
c)
    ]