{-|
Description:    Basic tokenization rules for the character stream.

Copyright:      (c) 2020-2021 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      stable
Portability:    portable
-}
module Web.Mangrove.Parse.Tokenize.Data
    ( tokenData
    ) where


import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Tokenize.Common
import Web.Mangrove.Parse.Tokenize.Character
import Web.Mangrove.Parse.Tokenize.Tag


-- | __HTML:__
--      @[data state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#data-state)@
-- 
-- The parsing instructions rooted in the 'DataState' section of the state
-- machine.
tokenData :: Tokenizer [TokenizerOutput Token]
tokenData :: Tokenizer [TokenizerOutput Token]
tokenData = Maybe [([ParseError], Token)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)]
-> Tokenizer [TokenizerOutput Token]
forall out.
Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers ([([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a. a -> Maybe a
Just [([], Token
EndOfStream)])
    [ (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'&') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ Bool -> Tokenizer (TokenizerOutput String)
tokenCharacterReference Bool
False Tokenizer (TokenizerOutput String)
-> (TokenizerOutput String -> Tokenizer [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
flushCharRef
    , (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'<') Tokenizer [TokenizerOutput Token]
tokenTagOpen
    , (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\NUL') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([ParseError
UnexpectedNullCharacter], Char -> Token
Character Char
'\NUL')
    , (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
elsesChar ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ \Char
c -> ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([], Char -> Token
Character Char
c)
    ]