{-# LANGUAGE Trustworthy #-}
module Web.Mangrove.Parse.Tokenize.Character
( flushCharRef
, tokenCharacterReference
) where
import qualified Control.Applicative as A
import qualified Control.Monad as N
import qualified Data.Bifunctor as F.B
import qualified Data.Char as C
import qualified Data.HashMap.Strict as M
import qualified Data.Maybe as Y
import qualified Data.Vector as V
import qualified Data.Word as W
import qualified Numeric.Natural as Z
import Data.Functor ( ($>) )
import Data.Vector ( (!?) )
import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Common.Character
import Web.Mangrove.Parse.Tokenize.Common
import Web.Willow.Common.Encoding.Character
import Web.Willow.Common.Parser
import Web.Willow.Common.Parser.Util
import {-# SOURCE #-} Web.Mangrove.Parse.Tokenize.Dispatcher
flushCharRef :: TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
flushCharRef :: TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
flushCharRef TokenizerOutput String
out = case TokenizerOutput String -> String
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput String
out of
String
"" -> case TokenizerOutput String -> [ParseError]
forall out. TokenizerOutput out -> [ParseError]
tokenizedErrs TokenizerOutput String
out of
[] -> [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return []
[ParseError]
errs -> [ParseError] -> [TokenizerOutput Token] -> [TokenizerOutput Token]
consTokenErrorsList [ParseError]
errs ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> (Maybe [TokenizerOutput Token] -> [TokenizerOutput Token])
-> Maybe [TokenizerOutput Token]
-> [TokenizerOutput Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [TokenizerOutput Token]
-> Maybe [TokenizerOutput Token] -> [TokenizerOutput Token]
forall a. a -> Maybe a -> a
Y.fromMaybe [] (Maybe [TokenizerOutput Token] -> [TokenizerOutput Token])
-> StateT
TokenParserState
(Parser [TokenizerInput])
(Maybe [TokenizerOutput Token])
-> Tokenizer [TokenizerOutput Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer [TokenizerOutput Token]
-> StateT
TokenParserState
(Parser [TokenizerInput])
(Maybe [TokenizerOutput Token])
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
A.optional Tokenizer [TokenizerOutput Token]
dispatcher
[Char
c] -> [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return [TokenizerOutput String
out { tokenizedOut :: Token
tokenizedOut = Char -> Token
Character Char
c }]
(Char
c:String
_) -> do
let c' :: TokenizerOutput Token
c' = TokenizerOutput String
out
{ tokenizedOut :: Token
tokenizedOut = Char -> Token
Character Char
c
, tokenizedState :: DecoderOutputState
tokenizedState = DecoderOutputState
forall a. Maybe a
Nothing
}
[TokenizerOutput Token]
cs' <- TokenizerOutput String -> Tokenizer [TokenizerOutput Token]
flushCharRef TokenizerOutput String
out'
[TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return ([TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token])
-> [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall a b. (a -> b) -> a -> b
$ TokenizerOutput Token
c' TokenizerOutput Token
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall a. a -> [a] -> [a]
: [TokenizerOutput Token]
cs'
where out' :: TokenizerOutput String
out' = TokenizerOutput String
out
{ tokenizedErrs :: [ParseError]
tokenizedErrs = []
, tokenizedOut :: String
tokenizedOut = Int -> String -> String
forall a. Int -> [a] -> [a]
drop Int
1 (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ TokenizerOutput String -> String
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput String
out
}
tokenCharacterReference
:: Bool
-> Tokenizer (TokenizerOutput String)
tokenCharacterReference :: Bool -> Tokenizer (TokenizerOutput String)
tokenCharacterReference Bool
inAttribute = Maybe ([ParseError], String)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput String)]
-> Tokenizer (TokenizerOutput String)
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer (([ParseError], String) -> Maybe ([ParseError], String)
forall a. a -> Maybe a
Just ([], String
"&"))
[ (Char -> Bool)
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
(Char -> Bool)
-> Tokenizer out
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
ifPush_ Char -> Bool
isAsciiAlphaNum (Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ do
TokenizerOutput CharacterReference
ref' <- Bool
-> CharacterReferenceTree
-> Tokenizer (TokenizerOutput CharacterReference)
tokenNamedCharacterReference Bool
inAttribute CharacterReferenceTree
characterReferences
case TokenizerOutput CharacterReference -> CharacterReference
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput CharacterReference
ref' of
CharacterReference
NotFound -> [ParseError] -> TokenizerOutput String -> TokenizerOutput String
forall out.
[ParseError] -> TokenizerOutput out -> TokenizerOutput out
consTokenErrors (TokenizerOutput CharacterReference -> [ParseError]
forall out. TokenizerOutput out -> [ParseError]
tokenizedErrs TokenizerOutput CharacterReference
ref') (TokenizerOutput String -> TokenizerOutput String)
-> (TokenizerOutput String -> TokenizerOutput String)
-> TokenizerOutput String
-> TokenizerOutput String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> TokenizerOutput String -> TokenizerOutput String
forall out. out -> TokenizerOutput [out] -> TokenizerOutput [out]
consOut Char
'&' (TokenizerOutput String -> TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer (TokenizerOutput String)
tokenAmbiguousAmpersand
Flush String
name -> TokenizerOutput String -> Tokenizer (TokenizerOutput String)
forall (m :: * -> *) a. Monad m => a -> m a
return TokenizerOutput CharacterReference
ref'
{ tokenizedOut :: String
tokenizedOut = Char
'&' Char -> String -> String
forall a. a -> [a] -> [a]
: String
name
}
Found String
ref -> TokenizerOutput String -> Tokenizer (TokenizerOutput String)
forall (m :: * -> *) a. Monad m => a -> m a
return TokenizerOutput CharacterReference
ref'
{ tokenizedOut :: String
tokenizedOut = String
ref
}
, (Char -> Bool)
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
(Char -> Bool)
-> Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
if_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#') Tokenizer (TokenizerOutput String)
tokenNumericCharacterReference
, Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ ([ParseError], String) -> Tokenizer (TokenizerOutput String)
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([], String
"&")
]
data CharacterReference
= NotFound
| Flush String
| Found String
tokenNamedCharacterReference
:: Bool
-> CharacterReferenceTree
-> Tokenizer (TokenizerOutput CharacterReference)
tokenNamedCharacterReference :: Bool
-> CharacterReferenceTree
-> Tokenizer (TokenizerOutput CharacterReference)
tokenNamedCharacterReference Bool
inAttribute (CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
refs) = do
Maybe TokenizerInput
cm' <- StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (Maybe TokenizerInput)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
A.optional StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next
let state1 :: DecoderOutputState
state1 = DecoderOutputState
endState
state2 :: DecoderOutputState
state2 = do
TokenizerInput
cm <- Maybe TokenizerInput
cm'
TokenizerInput -> DecoderOutputState
decodedState TokenizerInput
cm
case Maybe TokenizerInput
cm' of
Maybe TokenizerInput
Nothing -> ([ParseError], CharacterReference)
-> DecoderOutputState
-> Tokenizer (TokenizerOutput CharacterReference)
forall out.
([ParseError], out)
-> DecoderOutputState -> Tokenizer (TokenizerOutput out)
packState ([], CharacterReference
NotFound) DecoderOutputState
state1
Just TokenizerInput
c -> case Char
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> Maybe (Maybe ReferenceValue, CharacterReferenceTree)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (TokenizerInput -> Char
decodedOut TokenizerInput
c) HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
refs of
Maybe (Maybe ReferenceValue, CharacterReferenceTree)
Nothing -> TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push TokenizerInput
c StateT TokenParserState (Parser [TokenizerInput]) ()
-> Tokenizer (TokenizerOutput CharacterReference)
-> Tokenizer (TokenizerOutput CharacterReference)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ([ParseError], CharacterReference)
-> DecoderOutputState
-> Tokenizer (TokenizerOutput CharacterReference)
forall out.
([ParseError], out)
-> DecoderOutputState -> Tokenizer (TokenizerOutput out)
packState ([], CharacterReference
NotFound) DecoderOutputState
state2
Just (Maybe ReferenceValue
Nothing, CharacterReferenceTree
refs') -> do
TokenizerOutput CharacterReference
ref' <- Bool
-> CharacterReferenceTree
-> Tokenizer (TokenizerOutput CharacterReference)
tokenNamedCharacterReference Bool
inAttribute CharacterReferenceTree
refs'
let errs' :: [ParseError]
errs' = TokenizerInput -> [ParseError]
decodedErrs TokenizerInput
c [ParseError] -> [ParseError] -> [ParseError]
forall a. [a] -> [a] -> [a]
++ TokenizerOutput CharacterReference -> [ParseError]
forall out. TokenizerOutput out -> [ParseError]
tokenizedErrs TokenizerOutput CharacterReference
ref'
case TokenizerOutput CharacterReference -> CharacterReference
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput CharacterReference
ref' of
CharacterReference
NotFound -> TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push TokenizerInput
c StateT TokenParserState (Parser [TokenizerInput]) ()
-> Tokenizer (TokenizerOutput CharacterReference)
-> Tokenizer (TokenizerOutput CharacterReference)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ([ParseError], CharacterReference)
-> DecoderOutputState
-> Tokenizer (TokenizerOutput CharacterReference)
forall out.
([ParseError], out)
-> DecoderOutputState -> Tokenizer (TokenizerOutput out)
packState ([], CharacterReference
NotFound) DecoderOutputState
state2
Flush String
name -> TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference)
forall (m :: * -> *) a. Monad m => a -> m a
return TokenizerOutput CharacterReference
ref'
{ tokenizedErrs :: [ParseError]
tokenizedErrs = [ParseError]
errs'
, tokenizedOut :: CharacterReference
tokenizedOut = String -> CharacterReference
Flush (String -> CharacterReference) -> String -> CharacterReference
forall a b. (a -> b) -> a -> b
$ TokenizerInput -> Char
decodedOut TokenizerInput
c Char -> String -> String
forall a. a -> [a] -> [a]
: String
name
}
Found String
ref -> TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference)
forall (m :: * -> *) a. Monad m => a -> m a
return TokenizerOutput CharacterReference
ref'
{ tokenizedErrs :: [ParseError]
tokenizedErrs = [ParseError]
errs'
, tokenizedOut :: CharacterReference
tokenizedOut = String -> CharacterReference
Found String
ref
}
Just (Just ReferenceValue
found, CharacterReferenceTree
refs') -> do
Maybe TokenizerInput
semicolon <- StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (Maybe TokenizerInput)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
A.optional (StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (Maybe TokenizerInput))
-> StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (Maybe TokenizerInput)
forall a b. (a -> b) -> a -> b
$ StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) TokenizerInput)
-> StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (TokenizerInput -> Bool)
-> TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (trans :: * -> *) stream token out.
MonadParser trans stream token =>
(out -> Bool) -> out -> trans out
satisfying (\TokenizerInput
d -> TokenizerInput -> Char
decodedOut TokenizerInput
d Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
';')
let state3 :: DecoderOutputState
state3 = DecoderOutputState
-> (TokenizerInput -> DecoderOutputState)
-> Maybe TokenizerInput
-> DecoderOutputState
forall b a. b -> (a -> b) -> Maybe a -> b
maybe DecoderOutputState
state2 TokenizerInput -> DecoderOutputState
decodedState Maybe TokenizerInput
semicolon
if Bool -> Bool
not (ReferenceValue -> Bool
isSemicolonOptional ReferenceValue
found) Bool -> Bool -> Bool
&& Maybe TokenizerInput -> Bool
forall a. Maybe a -> Bool
Y.isNothing Maybe TokenizerInput
semicolon
then Bool
-> CharacterReferenceTree
-> Tokenizer (TokenizerOutput CharacterReference)
tokenNamedCharacterReference Bool
inAttribute CharacterReferenceTree
refs' Tokenizer (TokenizerOutput CharacterReference)
-> (TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference))
-> Tokenizer (TokenizerOutput CharacterReference)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \TokenizerOutput CharacterReference
ref ->
case TokenizerOutput CharacterReference -> CharacterReference
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput CharacterReference
ref of
CharacterReference
NotFound -> TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push TokenizerInput
c StateT TokenParserState (Parser [TokenizerInput]) ()
-> TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference)
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> TokenizerOutput CharacterReference
ref
CharacterReference
_ -> TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference)
forall (m :: * -> *) a. Monad m => a -> m a
return TokenizerOutput CharacterReference
ref
else Bool
-> CharacterReferenceTree
-> Tokenizer (TokenizerOutput CharacterReference)
tokenNamedCharacterReference Bool
inAttribute CharacterReferenceTree
refs' Tokenizer (TokenizerOutput CharacterReference)
-> (TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference))
-> Tokenizer (TokenizerOutput CharacterReference)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Bool
-> Bool
-> Char
-> DecoderOutputState
-> String
-> TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference)
foundNamedCharacterReference
Bool
inAttribute
(Maybe TokenizerInput -> Bool
forall a. Maybe a -> Bool
Y.isJust Maybe TokenizerInput
semicolon)
(TokenizerInput -> Char
decodedOut TokenizerInput
c)
DecoderOutputState
state3
(ReferenceValue -> String
referenceValue ReferenceValue
found)
foundNamedCharacterReference
:: Bool
-> Bool
-> Char
-> DecoderOutputState
-> String
-> TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference)
foundNamedCharacterReference :: Bool
-> Bool
-> Char
-> DecoderOutputState
-> String
-> TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference)
foundNamedCharacterReference Bool
attribute Bool
semicolon Char
char DecoderOutputState
state String
found TokenizerOutput CharacterReference
ref = case TokenizerOutput CharacterReference -> CharacterReference
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput CharacterReference
ref of
CharacterReference
NotFound -> if Bool
attribute Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
semicolon
then do
Maybe TokenizerInput
c' <- StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (Maybe TokenizerInput)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
A.optional (StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (Maybe TokenizerInput))
-> StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (Maybe TokenizerInput)
forall a b. (a -> b) -> a -> b
$ StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) TokenizerInput)
-> StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (TokenizerInput -> Bool)
-> TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (trans :: * -> *) stream token out.
MonadParser trans stream token =>
(out -> Bool) -> out -> trans out
satisfying
(\TokenizerInput
c -> TokenizerInput -> Char
decodedOut TokenizerInput
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'=' Bool -> Bool -> Bool
|| Char -> Bool
isAsciiAlphaNum (TokenizerInput -> Char
decodedOut TokenizerInput
c))
case Maybe TokenizerInput
c' of
Maybe TokenizerInput
Nothing -> ([ParseError], CharacterReference)
-> DecoderOutputState
-> Tokenizer (TokenizerOutput CharacterReference)
forall out.
([ParseError], out)
-> DecoderOutputState -> Tokenizer (TokenizerOutput out)
packState ([ParseError
MissingSemicolonAfterCharacterReference], String -> CharacterReference
Found String
found) DecoderOutputState
state
Just TokenizerInput
c -> TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push TokenizerInput
c StateT TokenParserState (Parser [TokenizerInput]) ()
-> Tokenizer (TokenizerOutput CharacterReference)
-> Tokenizer (TokenizerOutput CharacterReference)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ([ParseError], CharacterReference)
-> DecoderOutputState
-> Tokenizer (TokenizerOutput CharacterReference)
forall out.
([ParseError], out)
-> DecoderOutputState -> Tokenizer (TokenizerOutput out)
packState ([], String -> CharacterReference
Flush [Char
char]) DecoderOutputState
state
else ([ParseError], CharacterReference)
-> DecoderOutputState
-> Tokenizer (TokenizerOutput CharacterReference)
forall out.
([ParseError], out)
-> DecoderOutputState -> Tokenizer (TokenizerOutput out)
packState ([ParseError]
errs', String -> CharacterReference
Found String
found) DecoderOutputState
state
Flush String
name -> TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference)
forall (m :: * -> *) a. Monad m => a -> m a
return (TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference))
-> TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference)
forall a b. (a -> b) -> a -> b
$ TokenizerOutput CharacterReference
ref
{ tokenizedOut :: CharacterReference
tokenizedOut = String -> CharacterReference
Flush (String -> CharacterReference) -> String -> CharacterReference
forall a b. (a -> b) -> a -> b
$ Char
char Char -> String -> String
forall a. a -> [a] -> [a]
: String
name
}
CharacterReference
_ -> TokenizerOutput CharacterReference
-> Tokenizer (TokenizerOutput CharacterReference)
forall (m :: * -> *) a. Monad m => a -> m a
return TokenizerOutput CharacterReference
ref
where errs' :: [ParseError]
errs'
| Bool
semicolon = []
| Bool
otherwise = [ParseError
MissingSemicolonAfterCharacterReference]
tokenAmbiguousAmpersand :: Tokenizer (TokenizerOutput String)
tokenAmbiguousAmpersand :: Tokenizer (TokenizerOutput String)
tokenAmbiguousAmpersand = Maybe ([ParseError], String)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput String)]
-> Tokenizer (TokenizerOutput String)
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer (([ParseError], String) -> Maybe ([ParseError], String)
forall a. a -> Maybe a
Just ([], String
""))
[ (Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput String))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
(Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
ifChar Char -> Bool
isAsciiAlphaNum ((Char -> Tokenizer (TokenizerOutput String))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> (Char -> Tokenizer (TokenizerOutput String))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ \Char
c -> Char -> TokenizerOutput String -> TokenizerOutput String
forall out. out -> TokenizerOutput [out] -> TokenizerOutput [out]
consOut Char
c (TokenizerOutput String -> TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
-> Tokenizer (TokenizerOutput String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer (TokenizerOutput String)
tokenAmbiguousAmpersand
, (Char -> Bool)
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
(Char -> Bool)
-> Tokenizer out
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
ifPush_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
';') (Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ ([ParseError], String) -> Tokenizer (TokenizerOutput String)
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([ParseError
UnknownNamedCharacterReference], String
"")
, Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String))
-> Tokenizer (TokenizerOutput String)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput String)
forall a b. (a -> b) -> a -> b
$ ([ParseError], String) -> Tokenizer (TokenizerOutput String)
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([], String
"")
]
tokenNumericCharacterReference :: Tokenizer (TokenizerOutput String)
tokenNumericCharacterReference :: Tokenizer (TokenizerOutput String)
tokenNumericCharacterReference = (Either String String -> String)
-> TokenizerOutput (Either String String) -> TokenizerOutput String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((String -> String)
-> (String -> String) -> Either String String -> String
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String
"&#" String -> String -> String
forall a. [a] -> [a] -> [a]
++) String -> String
forall a. a -> a
id) (TokenizerOutput (Either String String) -> TokenizerOutput String)
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
-> Tokenizer (TokenizerOutput String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe ([ParseError], Either String String)
-> [SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String String))]
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer
(([ParseError], Either String String)
-> Maybe ([ParseError], Either String String)
forall a. a -> Maybe a
Just ([ParseError
AbsenceOfDigitsInNumericCharacterReference], String -> Either String String
forall a b. a -> Either a b
Left String
""))
[ (Char -> Bool)
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String String))
forall out.
(Char -> Bool)
-> Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
if_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'x') (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String String)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String String))
forall a b. (a -> b) -> a -> b
$ TokenizerOutput (Either String ([ParseError], Char))
-> TokenizerOutput (Either String String)
forall a a.
TokenizerOutput (Either a ([ParseError], a))
-> TokenizerOutput (Either a [a])
flatten (TokenizerOutput (Either String ([ParseError], Char))
-> TokenizerOutput (Either String String))
-> (TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String ([ParseError], Char)))
-> TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either String Natural -> Either String ([ParseError], Char))
-> TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String ([ParseError], Char))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((String -> String)
-> (Natural -> ([ParseError], Char))
-> Either String Natural
-> Either String ([ParseError], Char)
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
F.B.bimap (Char
'x' Char -> String -> String
forall a. a -> [a] -> [a]
:) Natural -> ([ParseError], Char)
tokenNumericCharacterReferenceEnd) (TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String String))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
tokenHexadecimalCharacterReferenceStart
, (Char -> Bool)
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String String))
forall out.
(Char -> Bool)
-> Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
if_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'X') (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String String)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String String))
forall a b. (a -> b) -> a -> b
$ TokenizerOutput (Either String ([ParseError], Char))
-> TokenizerOutput (Either String String)
forall a a.
TokenizerOutput (Either a ([ParseError], a))
-> TokenizerOutput (Either a [a])
flatten (TokenizerOutput (Either String ([ParseError], Char))
-> TokenizerOutput (Either String String))
-> (TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String ([ParseError], Char)))
-> TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either String Natural -> Either String ([ParseError], Char))
-> TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String ([ParseError], Char))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((String -> String)
-> (Natural -> ([ParseError], Char))
-> Either String Natural
-> Either String ([ParseError], Char)
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
F.B.bimap (Char
'X' Char -> String -> String
forall a. a -> [a] -> [a]
:) Natural -> ([ParseError], Char)
tokenNumericCharacterReferenceEnd) (TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String String))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
tokenHexadecimalCharacterReferenceStart
, StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String String))
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String String)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String String))
forall a b. (a -> b) -> a -> b
$ TokenizerOutput (Either String ([ParseError], Char))
-> TokenizerOutput (Either String String)
forall a a.
TokenizerOutput (Either a ([ParseError], a))
-> TokenizerOutput (Either a [a])
flatten (TokenizerOutput (Either String ([ParseError], Char))
-> TokenizerOutput (Either String String))
-> (TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String ([ParseError], Char)))
-> TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either String Natural -> Either String ([ParseError], Char))
-> TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String ([ParseError], Char))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Natural -> ([ParseError], Char))
-> Either String Natural -> Either String ([ParseError], Char)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
F.B.second Natural -> ([ParseError], Char)
tokenNumericCharacterReferenceEnd) (TokenizerOutput (Either String Natural)
-> TokenizerOutput (Either String String))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String String))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
tokenDecimalCharacterReferenceStart
]
where flatten :: TokenizerOutput (Either a ([ParseError], a))
-> TokenizerOutput (Either a [a])
flatten TokenizerOutput (Either a ([ParseError], a))
tok = case TokenizerOutput (Either a ([ParseError], a))
-> Either a ([ParseError], a)
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput (Either a ([ParseError], a))
tok of
Left a
str -> TokenizerOutput (Either a ([ParseError], a))
tok { tokenizedOut :: Either a [a]
tokenizedOut = a -> Either a [a]
forall a b. a -> Either a b
Left a
str }
Right ([ParseError]
errs, a
c) -> [ParseError]
-> TokenizerOutput (Either a [a]) -> TokenizerOutput (Either a [a])
forall out.
[ParseError] -> TokenizerOutput out -> TokenizerOutput out
consTokenErrors [ParseError]
errs (TokenizerOutput (Either a [a]) -> TokenizerOutput (Either a [a]))
-> TokenizerOutput (Either a [a]) -> TokenizerOutput (Either a [a])
forall a b. (a -> b) -> a -> b
$ TokenizerOutput (Either a ([ParseError], a))
tok { tokenizedOut :: Either a [a]
tokenizedOut = [a] -> Either a [a]
forall a b. b -> Either a b
Right [a
c] }
tokenHexadecimalCharacterReferenceStart
:: Tokenizer (TokenizerOutput (Either String Z.Natural))
tokenHexadecimalCharacterReferenceStart :: StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
tokenHexadecimalCharacterReferenceStart = Maybe ([ParseError], Either String Natural)
-> [SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural))]
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer
(([ParseError], Either String Natural)
-> Maybe ([ParseError], Either String Natural)
forall a. a -> Maybe a
Just ([ParseError
AbsenceOfDigitsInNumericCharacterReference], String -> Either String Natural
forall a b. a -> Either a b
Left String
""))
[ (Char -> Bool)
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural))
forall out.
(Char -> Bool)
-> Tokenizer out
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
ifPush_ Char -> Bool
C.isHexDigit (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural))
forall a b. (a -> b) -> a -> b
$ (TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Either String Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Either String Natural)
packReference StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
tokenHexadecimalCharacterReference
, StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural))
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural))
forall a b. (a -> b) -> a -> b
$ ([ParseError], Either String Natural)
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([ParseError
AbsenceOfDigitsInNumericCharacterReference], String -> Either String Natural
forall a b. a -> Either a b
Left String
"")
]
tokenDecimalCharacterReferenceStart
:: Tokenizer (TokenizerOutput (Either String Z.Natural))
tokenDecimalCharacterReferenceStart :: StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
tokenDecimalCharacterReferenceStart = Maybe ([ParseError], Either String Natural)
-> [SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural))]
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer
(([ParseError], Either String Natural)
-> Maybe ([ParseError], Either String Natural)
forall a. a -> Maybe a
Just ([ParseError
AbsenceOfDigitsInNumericCharacterReference], String -> Either String Natural
forall a b. a -> Either a b
Left String
""))
[ (Char -> Bool)
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural))
forall out.
(Char -> Bool)
-> Tokenizer out
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
ifPush_ Char -> Bool
C.isDigit (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural))
forall a b. (a -> b) -> a -> b
$ (TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Either String Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Either String Natural)
packReference StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
tokenDecimalCharacterReference
, StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural))
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Either String Natural))
forall a b. (a -> b) -> a -> b
$ ([ParseError], Either String Natural)
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Either String Natural))
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([ParseError
AbsenceOfDigitsInNumericCharacterReference], String -> Either String Natural
forall a b. a -> Either a b
Left String
"")
]
packReference :: TokenizerOutput (Z.Natural, Z.Natural) -> TokenizerOutput (Either String Z.Natural)
packReference :: TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Either String Natural)
packReference = ((Natural, Natural) -> Either String Natural)
-> TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Either String Natural)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (((Natural, Natural) -> Either String Natural)
-> TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Either String Natural))
-> ((Natural, Natural) -> Either String Natural)
-> TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Either String Natural)
forall a b. (a -> b) -> a -> b
$ Natural -> Either String Natural
forall a b. b -> Either a b
Right (Natural -> Either String Natural)
-> ((Natural, Natural) -> Natural)
-> (Natural, Natural)
-> Either String Natural
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Natural, Natural) -> Natural
forall a b. (a, b) -> b
snd
tokenHexadecimalCharacterReference
:: Tokenizer (TokenizerOutput (Z.Natural, Z.Natural))
tokenHexadecimalCharacterReference :: StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
tokenHexadecimalCharacterReference = Maybe ([ParseError], (Natural, Natural))
-> [SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))]
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer
(([ParseError], (Natural, Natural))
-> Maybe ([ParseError], (Natural, Natural))
forall a. a -> Maybe a
Just ([ParseError
MissingSemicolonAfterCharacterReference], (Natural
0, Natural
0)))
[ (Char -> Bool)
-> (Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall out.
(Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
ifChar Char -> Bool
C.isDigit ((Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural)))
-> (Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall a b. (a -> b) -> a -> b
$ \Char
c -> Int
-> Char
-> TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Natural, Natural)
forall (f :: * -> *) b b a.
(Functor f, Num b, Integral b, Enum a) =>
Int -> a -> f (b, b) -> f (b, b)
increment Int
0x30 Char
c (TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
tokenHexadecimalCharacterReference
, (Char -> Bool)
-> (Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall out.
(Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
ifChar (Char -> Char -> Char -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Char
'A' Char
'F') ((Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural)))
-> (Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall a b. (a -> b) -> a -> b
$ \Char
c -> Int
-> Char
-> TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Natural, Natural)
forall (f :: * -> *) b b a.
(Functor f, Num b, Integral b, Enum a) =>
Int -> a -> f (b, b) -> f (b, b)
increment Int
0x37 Char
c (TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
tokenHexadecimalCharacterReference
, (Char -> Bool)
-> (Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall out.
(Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
ifChar (Char -> Char -> Char -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Char
'a' Char
'f') ((Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural)))
-> (Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall a b. (a -> b) -> a -> b
$ \Char
c -> Int
-> Char
-> TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Natural, Natural)
forall (f :: * -> *) b b a.
(Functor f, Num b, Integral b, Enum a) =>
Int -> a -> f (b, b) -> f (b, b)
increment Int
0x57 Char
c (TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
tokenHexadecimalCharacterReference
, (Char -> Bool)
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall out.
(Char -> Bool)
-> Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
if_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
';') (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall a b. (a -> b) -> a -> b
$ ([ParseError], (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([], (Natural
0, Natural
0))
, StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall a b. (a -> b) -> a -> b
$ ([ParseError], (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([ParseError
MissingSemicolonAfterCharacterReference], (Natural
0, Natural
0))
]
where increment :: Int -> a -> f (b, b) -> f (b, b)
increment Int
offset a
c = ((b, b) -> (b, b)) -> f (b, b) -> f (b, b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (((b, b) -> (b, b)) -> f (b, b) -> f (b, b))
-> ((b, b) -> (b, b)) -> f (b, b) -> f (b, b)
forall a b. (a -> b) -> a -> b
$ \(b
pos, b
accum) ->
(b
pos b -> b -> b
forall a. Num a => a -> a -> a
+ b
1, Int -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a -> Int
forall a. Enum a => a -> Int
fromEnum a
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
offset) b -> b -> b
forall a. Num a => a -> a -> a
* (b
16 b -> b -> b
forall a b. (Num a, Integral b) => a -> b -> a
^ b
pos) b -> b -> b
forall a. Num a => a -> a -> a
+ b
accum)
tokenDecimalCharacterReference
:: Tokenizer (TokenizerOutput (Z.Natural, Z.Natural))
tokenDecimalCharacterReference :: StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
tokenDecimalCharacterReference = Maybe ([ParseError], (Natural, Natural))
-> [SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))]
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
forall out.
Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer
(([ParseError], (Natural, Natural))
-> Maybe ([ParseError], (Natural, Natural))
forall a. a -> Maybe a
Just ([ParseError
MissingSemicolonAfterCharacterReference], (Natural
0, Natural
0)))
[ (Char -> Bool)
-> (Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall out.
(Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
ifChar Char -> Bool
C.isDigit ((Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural)))
-> (Char
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural)))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall a b. (a -> b) -> a -> b
$ \Char
c -> Char
-> TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Natural, Natural)
forall (f :: * -> *) b b a.
(Functor f, Num b, Integral b, Enum a) =>
a -> f (b, b) -> f (b, b)
increment Char
c (TokenizerOutput (Natural, Natural)
-> TokenizerOutput (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
tokenDecimalCharacterReference
, (Char -> Bool)
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall out.
(Char -> Bool)
-> Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
if_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
';') (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall a b. (a -> b) -> a -> b
$ ([ParseError], (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([], (Natural
0, Natural
0))
, StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall out.
Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ (StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural)))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
-> SwitchCase
TokenizerInput Tokenizer (WrappedOutput (Natural, Natural))
forall a b. (a -> b) -> a -> b
$ ([ParseError], (Natural, Natural))
-> StateT
TokenParserState
(Parser [TokenizerInput])
(TokenizerOutput (Natural, Natural))
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([ParseError
MissingSemicolonAfterCharacterReference], (Natural
0, Natural
0))
]
where increment :: a -> f (b, b) -> f (b, b)
increment a
c = ((b, b) -> (b, b)) -> f (b, b) -> f (b, b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (((b, b) -> (b, b)) -> f (b, b) -> f (b, b))
-> ((b, b) -> (b, b)) -> f (b, b) -> f (b, b)
forall a b. (a -> b) -> a -> b
$ \(b
pos, b
accum) ->
(b
pos b -> b -> b
forall a. Num a => a -> a -> a
+ b
1, Int -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a -> Int
forall a. Enum a => a -> Int
fromEnum a
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
0x30) b -> b -> b
forall a. Num a => a -> a -> a
* (b
10 b -> b -> b
forall a b. (Num a, Integral b) => a -> b -> a
^ b
pos) b -> b -> b
forall a. Num a => a -> a -> a
+ b
accum)
tokenNumericCharacterReferenceEnd :: Z.Natural -> ([ParseError], Char)
tokenNumericCharacterReferenceEnd :: Natural -> ([ParseError], Char)
tokenNumericCharacterReferenceEnd Natural
0x00 = ([ParseError
NullCharacterReference], Char
replacementChar)
tokenNumericCharacterReferenceEnd Natural
code
| Natural
code Natural -> Natural -> Bool
forall a. Ord a => a -> a -> Bool
> Natural
0x10FFFF =
([Natural -> ParseError
CharacterReferenceOutsideUnicodeRange Natural
code], Char
replacementChar)
| Natural -> Natural -> Natural -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Natural
0xD800 Natural
0xDFFF Natural
code =
([Char -> ParseError
SurrogateCharacterReference (Char -> ParseError) -> (Int -> Char) -> Int -> ParseError
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> ParseError) -> Int -> ParseError
forall a b. (a -> b) -> a -> b
$ Natural -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Natural
code], Char
replacementChar)
| Natural -> Natural -> Natural -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Natural
0xFDD0 Natural
0xFDEF Natural
code =
([Char -> ParseError
NoncharacterCharacterReference (Char -> ParseError) -> (Int -> Char) -> Int -> ParseError
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> ParseError) -> Int -> ParseError
forall a b. (a -> b) -> a -> b
$ Natural -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Natural
code], Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Natural -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Natural
code)
| Word16
cMod Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0xFFFE Bool -> Bool -> Bool
|| Word16
cMod Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0xFFFF =
([Char -> ParseError
NoncharacterCharacterReference (Char -> ParseError) -> (Int -> Char) -> Int -> ParseError
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> ParseError) -> Int -> ParseError
forall a b. (a -> b) -> a -> b
$ Natural -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Natural
code], Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Natural -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Natural
code)
| Natural -> Natural -> Natural -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Natural
0x00 Natural
0x1F Natural
code Bool -> Bool -> Bool
&& Natural -> [Natural] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
notElem Natural
code [Natural
0x09, Natural
0x0A, Natural
0x0C] =
([ParseError
ControlCharacterReference], Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Natural -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Natural
code)
| Natural -> Natural -> Natural -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Natural
0x7F Natural
0x9F Natural
code = ([ParseError
ControlCharacterReference],
Char -> Maybe Char -> Char
forall a. a -> Maybe a -> a
Y.fromMaybe (Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Natural -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Natural
code) (Maybe Char -> Char)
-> (Maybe (Maybe Char) -> Maybe Char) -> Maybe (Maybe Char) -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe (Maybe Char) -> Maybe Char
forall (m :: * -> *) a. Monad m => m (m a) -> m a
N.join (Maybe (Maybe Char) -> Char) -> Maybe (Maybe Char) -> Char
forall a b. (a -> b) -> a -> b
$
Vector (Maybe Char)
controlReplacement Vector (Maybe Char) -> Int -> Maybe (Maybe Char)
forall a. Vector a -> Int -> Maybe a
!? (Natural -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Natural
code Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
0x7F))
| Bool
otherwise = ([], Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Natural -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Natural
code)
where cMod :: Word16
cMod = Natural -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Natural
code :: W.Word16
controlReplacement :: V.Vector (Maybe Char)
controlReplacement :: Vector (Maybe Char)
controlReplacement = [Maybe Char] -> Vector (Maybe Char)
forall a. [a] -> Vector a
V.fromList
[ Maybe Char
forall a. Maybe a
Nothing
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x20AC'
, Maybe Char
forall a. Maybe a
Nothing
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x201A'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x0192'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x201E'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2026'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2020'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2021'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x02C6'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2030'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x0160'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2039'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x0152'
, Maybe Char
forall a. Maybe a
Nothing
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x017D'
, Maybe Char
forall a. Maybe a
Nothing
, Maybe Char
forall a. Maybe a
Nothing
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2018'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2019'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x201C'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x201D'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2022'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2013'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2014'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x02DC'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x2122'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x0161'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x203A'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x0153'
, Maybe Char
forall a. Maybe a
Nothing
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x017E'
, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\x0178'
]