{-# HLINT ignore "Use ." #-}

{-|
Description:    Tokenization rules for characters within @\<script\>@ ... @\</script\>@
                sections.

Copyright:      (c) 2020 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      stable
Portability:    portable
-}
module Web.Mangrove.Parse.Tokenize.ScriptData
    ( tokenScriptData
    ) where


import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Tokenize.Common
import Web.Mangrove.Parse.Tokenize.ScriptDataEscaped
import Web.Mangrove.Parse.Tokenize.Tag
import Web.Willow.Common.Encoding.Character


-- | __HTML:__
--      @[script data state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#script-data-state)@
-- 
-- The parsing instructions rooted in the 'ScriptDataState' section of the
-- state machine.
tokenScriptData :: Tokenizer [TokenizerOutput Token]
tokenScriptData = tokenizers Nothing
    [ ifs_ (== '<') tokenScriptDataLessThanSign
    , ifs_ (== '\NUL') $ emit' ([UnexpectedNullCharacter], Character replacementChar)
    , elsesChar $ \c -> emit' ([], Character c)
    ]

-- | __HTML:__
--      @[script data less-than sign state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state)@
-- 
-- The parsing instructions for after reading @"\<"@ in the 'ScriptDataState'
-- section of the state machine.
tokenScriptDataLessThanSign :: Tokenizer [TokenizerOutput Token]
tokenScriptDataLessThanSign = tokenizers (Just [([], Character '<')])
    [ ifs_ (== '/') tokenAppropriateEndTagOpen
    , ifs_ (== '!') $ consEmit ([], Character '<') . consEmit ([], Character '!') $
        tokenScriptDataEscapeStart
    , elsePush_ $ emit' ([], Character '<')
    ]