{-|
Description:    Tokenization rules for characters within @\<[CDATA[@ ... @]]\>@ sections.

Copyright:      (c) 2020-2021 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      stable
Portability:    portable
-}
module Web.Mangrove.Parse.Tokenize.CData
    ( tokenCDataSection
    ) where


import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Tokenize.Common


-- | __HTML:__
--      @[CDATA section state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state)@
-- 
-- The parsing instructions rooted in the 'CDataState' section of the state
-- machine.
tokenCDataSection :: Tokenizer [TokenizerOutput Token]
tokenCDataSection :: Tokenizer [TokenizerOutput Token]
tokenCDataSection = Maybe [([ParseError], Token)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)]
-> Tokenizer [TokenizerOutput Token]
forall out.
Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers ([([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a. a -> Maybe a
Just [([ParseError
EOFInCData], Token
EndOfStream)])
    [ (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
']') Tokenizer [TokenizerOutput Token]
tokenCDataSectionBracket
    , (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
elsesChar ((Char -> Tokenizer [TokenizerOutput Token])
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ \Char
c -> ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([], Char -> Token
Character Char
c)
    ]

-- | __HTML:__
--      @[CDATA section bracket state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-bracket-state)@
-- 
-- The parsing instructions for after reading @"]"@ in the 'CDataState' section
-- of the state machine.
tokenCDataSectionBracket :: Tokenizer [TokenizerOutput Token]
tokenCDataSectionBracket :: Tokenizer [TokenizerOutput Token]
tokenCDataSectionBracket = Maybe [([ParseError], Token)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)]
-> Tokenizer [TokenizerOutput Token]
forall out.
Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers ([([ParseError], Token)] -> Maybe [([ParseError], Token)]
forall a. a -> Maybe a
Just [([], Char -> Token
Character Char
']'), ([ParseError
EOFInCData], Token
EndOfStream)])
    [ (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
']') Tokenizer [TokenizerOutput Token]
tokenCDataSectionEnd
    , Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (Tokenizer [TokenizerOutput Token]
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' ([], Char -> Token
Character Char
']')
    ]

-- | __HTML:__
--      @[CDATA section end state]
--      (https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state)@
-- 
-- The parsing instructions for after reading @"]]"@ in the 'CDataState'
-- section of the state machine.
tokenCDataSectionEnd :: Tokenizer [TokenizerOutput Token]
tokenCDataSectionEnd :: Tokenizer [TokenizerOutput Token]
tokenCDataSectionEnd = Tokenizer (TokenizerOutput String)
tokenCDataSectionEnd' Tokenizer (TokenizerOutput String)
-> (TokenizerOutput String -> Tokenizer [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \TokenizerOutput String
ts -> case TokenizerOutput String -> String
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput String
ts of
    [] -> [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return []
    (Char
t':String
ts') -> DecoderOutputState
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
emits (TokenizerOutput String -> DecoderOutputState
forall out. TokenizerOutput out -> DecoderOutputState
tokenizedState TokenizerOutput String
ts) ([([ParseError], Token)] -> Tokenizer [TokenizerOutput Token])
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
forall a b. (a -> b) -> a -> b
$
        (TokenizerOutput String -> [ParseError]
forall out. TokenizerOutput out -> [ParseError]
tokenizedErrs TokenizerOutput String
ts, Char -> Token
Character Char
t') ([ParseError], Token)
-> [([ParseError], Token)] -> [([ParseError], Token)]
forall a. a -> [a] -> [a]
: [([], Char -> Token
Character Char
t) | Char
t <- String
ts']
  where emits :: DecoderOutputState
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
emits DecoderOutputState
state [([ParseError], Token)]
ts = DecoderOutputState
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall out.
DecoderOutputState
-> [TokenizerOutput out] -> [TokenizerOutput out]
finalStateList DecoderOutputState
state ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (([ParseError], Token)
 -> StateT
      TokenParserState (Parser [TokenizerInput]) (TokenizerOutput Token))
-> [([ParseError], Token)] -> Tokenizer [TokenizerOutput Token]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM ([ParseError], Token)
-> StateT
     TokenParserState (Parser [TokenizerInput]) (TokenizerOutput Token)
emit [([ParseError], Token)]
ts

-- | Loop within the __HTML__ @[CDATA section end state]
-- (https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state)@
-- to read and re-emit any number of @']'@ 'Char's before the closing @"]]\>"@.
tokenCDataSectionEnd' :: Tokenizer (TokenizerOutput String)
tokenCDataSectionEnd' :: Tokenizer (TokenizerOutput String)
tokenCDataSectionEnd' = Maybe ([ParseError], String)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput String)]
-> Tokenizer (TokenizerOutput String)
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer (([ParseError], String) -> Maybe ([ParseError], String)
forall a. a -> Maybe a
Just ([ParseError
EOFInCData], String
"]]"))
    [ (Char -> Bool)
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
(Char -> Bool)
-> Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
if_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
']') (Tokenizer (TokenizerOutput String)
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ Char -> TokenizerOutput String -> TokenizerOutput String
forall out. out -> TokenizerOutput [out] -> TokenizerOutput [out]
consOut Char
']' (TokenizerOutput String -> TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer (TokenizerOutput String)
tokenCDataSectionEnd'
    , (Char -> Bool)
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
(Char -> Bool)
-> Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
if_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>') (Tokenizer (TokenizerOutput String)
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ CurrentTokenizerState -> Tokenizer ()
changeState CurrentTokenizerState
DataState Tokenizer ()
-> Tokenizer (TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ([ParseError], String) -> Tokenizer (TokenizerOutput String)
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([], String
"")
    , Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (Tokenizer (TokenizerOutput String)
 -> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ ([ParseError], String) -> Tokenizer (TokenizerOutput String)
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([], String
"]]")
    ]