{-|
Description:    Tokenization rules for characters within @\<script\>@ ... @\</script\>@
                sections in HTML comments in script data.

Copyright:      (c) 2020-2021 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      stable
Portability:    portable
-}
module Web.Mangrove.Parse.Tokenize.ScriptDataDoubleEscaped
    ( tokenScriptDataDoubleEscaped
    , tokenScriptDataDoubleEscapeStart
    ) where


import qualified Control.Applicative as A

import qualified Data.Maybe as Y

import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Tokenize.Common
import Web.Willow.Common.Encoding.Character
import Web.Willow.Common.Parser

import {-# SOURCE #-} Web.Mangrove.Parse.Tokenize.Dispatcher


-- | __HTML:__
--      @[script data escape start state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state)@
-- 
-- The parsing instructions for after reading @"\<"@ when the next character is
-- a letter in the 'ScriptDataEscapedState' section of the state machine.
tokenScriptDataDoubleEscapeStart :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapeStart :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapeStart = Tokenizer (TokenizerOutput String)
tokenScriptDataDoubleEscapeStart' Tokenizer (TokenizerOutput String)
-> (TokenizerOutput String -> Tokenizer [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \TokenizerOutput String
t' ->
    case (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toAsciiLower (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> String
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput String
t' of
        String
"script" -> do
            [([ParseError], Token)]
recovery <- (TokenizerOutput Token -> ([ParseError], Token))
-> [TokenizerOutput Token] -> [([ParseError], Token)]
forall a b. (a -> b) -> [a] -> [b]
map TokenizerOutput Token -> ([ParseError], Token)
forall b. TokenizerOutput b -> ([ParseError], b)
unpackToken ([TokenizerOutput Token] -> [([ParseError], Token)])
-> Tokenizer [TokenizerOutput Token]
-> StateT
     TokenParserState (Parser [TokenizerInput]) [([ParseError], Token)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
output TokenizerOutput String
t'
            Maybe [([ParseError], Token)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)]
-> Tokenizer [TokenizerOutput Token]
forall out.
Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers ([([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a. a -> Maybe a
Just ([([ParseError], Token)] -> Maybe [([ParseError], Token)])
-> [([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a b. (a -> b) -> a -> b
$ [([ParseError], Token)]
recovery [([ParseError], Token)]
-> [([ParseError], Token)] -> [([ParseError], Token)]
forall a. [a] -> [a] -> [a]
++ [([ParseError
EOFInScriptHtmlCommentLikeText], Token
EndOfStream)])
                [ (Char -> Bool)
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifsChar Char -> Bool
isAsciiWhitespace ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> Char -> Tokenizer [TokenizerOutput Token]
escape TokenizerOutput String
t'
                , (Char -> Bool)
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifsChar (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/') ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> Char -> Tokenizer [TokenizerOutput Token]
escape TokenizerOutput String
t'
                , (Char -> Bool)
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifsChar (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>') ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> Char -> Tokenizer [TokenizerOutput Token]
escape TokenizerOutput String
t'
                , Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
output TokenizerOutput String
t'
                ]
        String
_ -> TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
output TokenizerOutput String
t'
  where escape :: TokenizerOutput String -> Char -> Tokenizer [TokenizerOutput Token]
escape TokenizerOutput String
t' Char
c = do
            CurrentTokenizerState -> Tokenizer ()
changeState CurrentTokenizerState
ScriptDataDoubleEscapedState
            [TokenizerOutput Token]
cs <- DecoderOutputState
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall out.
DecoderOutputState
-> [TokenizerOutput out] -> [TokenizerOutput out]
finalStateList DecoderOutputState
forall a. Maybe a
Nothing ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
output TokenizerOutput String
t'
            [TokenizerOutput Token]
c' <- ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([], Char -> Token
Character Char
c)
            [TokenizerOutput Token]
cs' <- Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscaped
            [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return ([TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token])
-> [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall a b. (a -> b) -> a -> b
$ [TokenizerOutput Token]
cs [TokenizerOutput Token]
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall a. [a] -> [a] -> [a]
++ [TokenizerOutput Token]
c' [TokenizerOutput Token]
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall a. [a] -> [a] -> [a]
++ [TokenizerOutput Token]
cs'
        output :: TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
output TokenizerOutput String
t' = case TokenizerOutput String -> String
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput String
t' of
            [] -> case TokenizerOutput String -> [ParseError]
forall out. TokenizerOutput out -> [ParseError]
tokenizedErrs TokenizerOutput String
t' of
                [] -> [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return []
                [ParseError]
errs -> [ParseError] -> [TokenizerOutput Token] -> [TokenizerOutput Token]
consTokenErrorsList [ParseError]
errs ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer [TokenizerOutput Token]
dispatcher
            (Char
c:String
cs) -> DecoderOutputState
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
emits (TokenizerOutput String -> DecoderOutputState
forall out. TokenizerOutput out -> DecoderOutputState
tokenizedState TokenizerOutput String
t') ([([ParseError], Token)] -> Tokenizer [TokenizerOutput Token])
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
forall a b. (a -> b) -> a -> b
$
                (TokenizerOutput String -> [ParseError]
forall out. TokenizerOutput out -> [ParseError]
tokenizedErrs TokenizerOutput String
t', Char -> Token
Character Char
c) ([ParseError], Token)
-> [([ParseError], Token)] -> [([ParseError], Token)]
forall a. a -> [a] -> [a]
: [([], Char -> Token
Character Char
c') | Char
c' <- String
cs]
        emits :: DecoderOutputState
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
emits DecoderOutputState
state [([ParseError], Token)]
ts = DecoderOutputState
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall out.
DecoderOutputState
-> [TokenizerOutput out] -> [TokenizerOutput out]
finalStateList DecoderOutputState
state ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (([ParseError], Token)
 -> StateT
      TokenParserState (Parser [TokenizerInput]) (TokenizerOutput Token))
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM ([ParseError], Token)
-> StateT
     TokenParserState (Parser [TokenizerInput]) (TokenizerOutput Token)
emit [([ParseError], Token)]
ts
        unpackToken :: TokenizerOutput b -> ([ParseError], b)
unpackToken TokenizerOutput b
t' = (TokenizerOutput b -> [ParseError]
forall out. TokenizerOutput out -> [ParseError]
tokenizedErrs TokenizerOutput b
t', TokenizerOutput b -> b
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput b
t')

-- | Loop within the __HTML__ @[script data double escape start state]
-- (https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state)@
-- to read the name of the tag.
tokenScriptDataDoubleEscapeStart' :: Tokenizer (TokenizerOutput String)
tokenScriptDataDoubleEscapeStart' :: Tokenizer (TokenizerOutput String)
tokenScriptDataDoubleEscapeStart' = Maybe ([ParseError], String)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput String)]
-> Tokenizer (TokenizerOutput String)
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer (([ParseError], String) -> Maybe ([ParseError], String)
forall a. a -> Maybe a
Just ([], String
""))
    [ (Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput String))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
(Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
ifChar Char -> Bool
isAsciiAlpha ((Char -> Tokenizer (TokenizerOutput String))
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> (Char -> Tokenizer (TokenizerOutput String))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ \Char
c -> Char -> TokenizerOutput String -> TokenizerOutput String
forall out. out -> TokenizerOutput [out] -> TokenizerOutput [out]
consOut Char
c (TokenizerOutput String -> TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer (TokenizerOutput String)
tokenScriptDataDoubleEscapeStart'
    , Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (Tokenizer (TokenizerOutput String)
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ ([ParseError], String) -> Tokenizer (TokenizerOutput String)
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([], String
"")
    ]

-- | __HTML:__
--      @[script data double escaped state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state)@
-- 
-- The parsing instructions rooted in the 'ScriptDataDoubleEscapedState'
-- section of the state machine.
tokenScriptDataDoubleEscaped :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscaped :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscaped = Maybe [([ParseError], Token)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)]
-> Tokenizer [TokenizerOutput Token]
forall out.
Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers ([([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a. a -> Maybe a
Just [([ParseError
EOFInScriptHtmlCommentLikeText], Token
EndOfStream)])
    [ (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
consEmit ([], Char -> Token
Character Char
'-') Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedDash
    , (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'<') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
consEmit ([], Char -> Token
Character Char
'<') Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedLessThanSign
    , (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\NUL') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([ParseError
UnexpectedNullCharacter], Char -> Token
Character Char
replacementChar)
    , (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
elsesChar ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ \Char
c -> do
        Maybe ()
e <- Tokenizer ()
-> StateT TokenParserState (Parser [TokenizerInput]) (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
A.optional Tokenizer ()
forall (trans :: * -> *) stream token.
MonadParser trans stream token =>
trans ()
end
        ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
consEmit ([], Char -> Token
Character Char
c) (Tokenizer [TokenizerOutput Token]
 -> Tokenizer [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
forall a b. (a -> b) -> a -> b
$ if Maybe () -> Bool
forall a. Maybe a -> Bool
Y.isJust Maybe ()
e
            then ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([ParseError
EOFInScriptHtmlCommentLikeText], Token
EndOfStream)
            else [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return []
    ]

-- | __HTML:__
--      @[script data double escaped dash state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-double-dash-state)@
-- 
-- The parsing instructions for after reading @"-"@ in the
-- 'ScriptDataDoubleEscapedState' section of the state machine.
tokenScriptDataDoubleEscapedDash :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedDash :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedDash = Maybe [([ParseError], Token)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)]
-> Tokenizer [TokenizerOutput Token]
forall out.
Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers ([([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a. a -> Maybe a
Just [([ParseError
EOFInScriptHtmlCommentLikeText], Token
EndOfStream)])
    [ (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
consEmit ([], Char -> Token
Character Char
'-') Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedDashDash
    , (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'<') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
consEmit ([], Char -> Token
Character Char
'<') Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedLessThanSign
    , (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\NUL') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([ParseError
UnexpectedNullCharacter], Char -> Token
Character Char
replacementChar)
    , (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
elsesChar ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ \Char
c -> ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([], Char -> Token
Character Char
c)
    ]

-- | __HTML:__
--      @[script data double escaped dash dash state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-double-dash-dash-state)@
-- 
-- The parsing instructions for after reading @"--"@ in the
-- 'ScriptDataDoubleEscapedState' section of the state machine.
tokenScriptDataDoubleEscapedDashDash :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedDashDash :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedDashDash = Maybe [([ParseError], Token)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)]
-> Tokenizer [TokenizerOutput Token]
forall out.
Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers ([([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a. a -> Maybe a
Just [([ParseError
EOFInScriptHtmlCommentLikeText], Token
EndOfStream)])
    [ (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
consEmit ([], Char -> Token
Character Char
'-') Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedDashDash
    , (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'<') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
consEmit ([], Char -> Token
Character Char
'<') Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedLessThanSign
    , (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ CurrentTokenizerState -> Tokenizer ()
changeState CurrentTokenizerState
ScriptDataState Tokenizer ()
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([], Char -> Token
Character Char
'>')
    , (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\NUL') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([ParseError
UnexpectedNullCharacter], Char -> Token
Character Char
replacementChar)
    , (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
elsesChar ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ \Char
c -> ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([], Char -> Token
Character Char
c)
    ]

-- | __HTML:__
--      @[script data double escaped less-than state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-state)@
-- 
-- The parsing instructions for after reading @"\<"@ in the
-- 'ScriptDataDoubleEscapedState' section of the state machine.
tokenScriptDataDoubleEscapedLessThanSign :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedLessThanSign :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapedLessThanSign = Maybe [([ParseError], Token)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)]
-> Tokenizer [TokenizerOutput Token]
forall out.
Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers ([([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a. a -> Maybe a
Just [([ParseError
EOFInScriptHtmlCommentLikeText], Token
EndOfStream)])
    [ (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/') (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
consEmit ([], Char -> Token
Character Char
'/') Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapeEnd
    , Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return []
    ]

-- | __HTML:__
--      @[script data double escape end state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state)@
-- 
-- The parsing instructions for after reading @"\</"@ in the
-- 'ScriptDataDoubleEscapedState' section of the state machine.
tokenScriptDataDoubleEscapeEnd :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapeEnd :: Tokenizer [TokenizerOutput Token]
tokenScriptDataDoubleEscapeEnd = Tokenizer (TokenizerOutput String)
tokenScriptDataDoubleEscapeEnd' Tokenizer (TokenizerOutput String)
-> (TokenizerOutput String -> Tokenizer [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \TokenizerOutput String
t' -> do
    case (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toAsciiLower (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> String
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput String
t' of
        String
"script" -> do
            [([ParseError], Token)]
recovery <- (TokenizerOutput Token -> ([ParseError], Token))
-> [TokenizerOutput Token] -> [([ParseError], Token)]
forall a b. (a -> b) -> [a] -> [b]
map TokenizerOutput Token -> ([ParseError], Token)
forall b. TokenizerOutput b -> ([ParseError], b)
unpackToken ([TokenizerOutput Token] -> [([ParseError], Token)])
-> Tokenizer [TokenizerOutput Token]
-> StateT
     TokenParserState (Parser [TokenizerInput]) [([ParseError], Token)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
output TokenizerOutput String
t'
            Maybe [([ParseError], Token)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)]
-> Tokenizer [TokenizerOutput Token]
forall out.
Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers ([([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a. a -> Maybe a
Just ([([ParseError], Token)] -> Maybe [([ParseError], Token)])
-> [([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a b. (a -> b) -> a -> b
$ [([ParseError], Token)]
recovery [([ParseError], Token)]
-> [([ParseError], Token)] -> [([ParseError], Token)]
forall a. [a] -> [a] -> [a]
++ [([ParseError
EOFInScriptHtmlCommentLikeText], Token
EndOfStream)])
                [ (Char -> Bool)
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifsChar Char -> Bool
isAsciiWhitespace ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> Char -> Tokenizer [TokenizerOutput Token]
escape TokenizerOutput String
t'
                , (Char -> Bool)
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifsChar (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/') ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> Char -> Tokenizer [TokenizerOutput Token]
escape TokenizerOutput String
t'
                , (Char -> Bool)
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifsChar (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>') ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> Char -> Tokenizer [TokenizerOutput Token]
escape TokenizerOutput String
t'
                , Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
output TokenizerOutput String
t'
                ]
        String
_ -> TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
output TokenizerOutput String
t'
  where escape :: TokenizerOutput String -> Char -> Tokenizer [TokenizerOutput Token]
escape TokenizerOutput String
t' Char
c = do
            CurrentTokenizerState -> Tokenizer ()
changeState CurrentTokenizerState
ScriptDataEscapedState
            [TokenizerOutput Token]
cs <- DecoderOutputState
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall out.
DecoderOutputState
-> [TokenizerOutput out] -> [TokenizerOutput out]
finalStateList DecoderOutputState
forall a. Maybe a
Nothing ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
output TokenizerOutput String
t'
            [TokenizerOutput Token]
c' <- ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([], Char -> Token
Character Char
c)
            [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return ([TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token])
-> [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall a b. (a -> b) -> a -> b
$ [TokenizerOutput Token]
cs [TokenizerOutput Token]
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall a. [a] -> [a] -> [a]
++ [TokenizerOutput Token]
c'
        output :: TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
output TokenizerOutput String
t' = do
            [TokenizerOutput Token]
buffer <- DecoderOutputState
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
emits (TokenizerOutput String -> DecoderOutputState
forall out. TokenizerOutput out -> DecoderOutputState
tokenizedState TokenizerOutput String
t') [([], Char -> Token
Character Char
c) | Char
c <- TokenizerOutput String -> String
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput String
t']
            case TokenizerOutput String -> [ParseError]
forall out. TokenizerOutput out -> [ParseError]
tokenizedErrs TokenizerOutput String
t' of
                [] -> [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return [TokenizerOutput Token]
buffer
                [ParseError]
errs -> [ParseError] -> [TokenizerOutput Token] -> [TokenizerOutput Token]
consTokenErrorsList [ParseError]
errs ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> case [TokenizerOutput Token]
buffer of
                    [] -> Tokenizer [TokenizerOutput Token]
dispatcher
                    [TokenizerOutput Token]
_ -> [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return [TokenizerOutput Token]
buffer
        emits :: DecoderOutputState
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
emits DecoderOutputState
state [([ParseError], Token)]
ts = DecoderOutputState
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall out.
DecoderOutputState
-> [TokenizerOutput out] -> [TokenizerOutput out]
finalStateList DecoderOutputState
state ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (([ParseError], Token)
 -> StateT
      TokenParserState (Parser [TokenizerInput]) (TokenizerOutput Token))
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM ([ParseError], Token)
-> StateT
     TokenParserState (Parser [TokenizerInput]) (TokenizerOutput Token)
emit [([ParseError], Token)]
ts
        unpackToken :: TokenizerOutput b -> ([ParseError], b)
unpackToken TokenizerOutput b
t' = (TokenizerOutput b -> [ParseError]
forall out. TokenizerOutput out -> [ParseError]
tokenizedErrs TokenizerOutput b
t', TokenizerOutput b -> b
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput b
t')


-- | Loop within the __HTML__ @[script data double escape end state]
-- (https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state)@
-- to read the name of the tag.
tokenScriptDataDoubleEscapeEnd' :: Tokenizer (TokenizerOutput String)
tokenScriptDataDoubleEscapeEnd' :: Tokenizer (TokenizerOutput String)
tokenScriptDataDoubleEscapeEnd' = Maybe ([ParseError], String)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput String)]
-> Tokenizer (TokenizerOutput String)
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer (([ParseError], String) -> Maybe ([ParseError], String)
forall a. a -> Maybe a
Just ([], String
""))
    [ (Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput String))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
(Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
ifChar Char -> Bool
isAsciiAlpha ((Char -> Tokenizer (TokenizerOutput String))
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> (Char -> Tokenizer (TokenizerOutput String))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ \Char
c -> Char -> TokenizerOutput String -> TokenizerOutput String
forall out. out -> TokenizerOutput [out] -> TokenizerOutput [out]
consOut Char
c (TokenizerOutput String -> TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer (TokenizerOutput String)
tokenScriptDataDoubleEscapeEnd'
    , Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (Tokenizer (TokenizerOutput String)
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ ([ParseError], String) -> Tokenizer (TokenizerOutput String)
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([], String
"")
    ]