{-# LANGUAGE OverloadedStrings, TupleSections, ViewPatterns #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
module Text.HTML.TagStream
( Token (..)
, tokenStream
) where
import Control.Applicative
import Control.Monad (unless)
import Control.Monad.Trans.Resource (MonadThrow)
import Data.Char
import qualified Data.Conduit.List as CL
import Data.Attoparsec.Text
import Data.Conduit
import qualified Data.Conduit.Attoparsec as CA
import Data.Maybe (fromMaybe, isJust)
import Data.Monoid ((<>))
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.Builder as B
import qualified Text.XML.Stream.Parse as XML
import Data.Map (Map)
import qualified Data.Map.Strict as Map
import Control.Arrow (first)
data Token
= TagOpen Text (Map Text Text) Bool
| TagClose Text
| Text Text
| Text
| Special Text Text
| Incomplete Text
deriving (Token -> Token -> Bool
(Token -> Token -> Bool) -> (Token -> Token -> Bool) -> Eq Token
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq, Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
(Int -> Token -> ShowS)
-> (Token -> String) -> ([Token] -> ShowS) -> Show Token
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show)
data TagType
= TagTypeClose
| TagTypeSpecial
| TagTypeNormal
quoted :: Char -> Parser Text
quoted :: Char -> Parser Text
quoted Char
q = Text -> Text -> Text
T.append (Text -> Text -> Text) -> Parser Text -> Parser Text (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill ((Char, Char) -> Char -> Bool
forall a. Eq a => (a, a) -> a -> Bool
in2 (Char
'\\',Char
q))
Parser Text (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Char -> Parser Char
char Char
q Parser Char -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
""
Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
char Char
'\\' Parser Char -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser Text -> Parser Text
atLeast Int
1 (Char -> Parser Text
quoted Char
q) )
quotedOr :: Parser Text -> Parser Text
quotedOr :: Parser Text -> Parser Text
quotedOr Parser Text
p = Parser Char -> Parser (Maybe Char)
forall a. Parser a -> Parser (Maybe a)
maybeP ((Char -> Bool) -> Parser Char
satisfy ((Char, Char) -> Char -> Bool
forall a. Eq a => (a, a) -> a -> Bool
in2 (Char
'"',Char
'\''))) Parser (Maybe Char) -> (Maybe Char -> Parser Text) -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Parser Text -> (Char -> Parser Text) -> Maybe Char -> Parser Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parser Text
p Char -> Parser Text
quoted
attrValue :: Parser Text
attrValue :: Parser Text
attrValue = Parser Text -> Parser Text
quotedOr (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser Text
takeTill ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
attrName :: Parser Text
attrName :: Parser Text
attrName = Parser Text -> Parser Text
quotedOr (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$
Char -> Text -> Text
T.cons (Char -> Text -> Text) -> Parser Char -> Parser Text (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'>')
Parser Text (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill ((Char, Char, Char) -> Char -> Bool
forall a. Eq a => (a, a, a) -> a -> Bool
in3 (Char
'/',Char
'>',Char
'=') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
tagEnd :: Parser Bool
tagEnd :: Parser Bool
tagEnd = Char -> Parser Char
char Char
'>' Parser Char -> Parser Bool -> Parser Bool
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
Parser Bool -> Parser Bool -> Parser Bool
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string Text
"/>" Parser Text -> Parser Bool -> Parser Bool
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
attr :: Parser (Text, Text)
attr :: Parser (Text, Text)
attr = (,) (Text -> Text -> (Text, Text))
-> Parser Text -> Parser Text (Text -> (Text, Text))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
attrName Parser Text (Text -> (Text, Text))
-> Parser Text () -> Parser Text (Text -> (Text, Text))
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Text ()
skipSpace
Parser Text (Text -> (Text, Text))
-> Parser Text -> Parser (Text, Text)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Parser Char -> Parser Bool
forall a. Parser a -> Parser Bool
boolP (Char -> Parser Char
char Char
'=') Parser Bool -> (Bool -> Parser Text) -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Parser Text -> Parser Text -> Bool -> Parser Text
forall a. a -> a -> Bool -> a
cond (Parser Text ()
skipSpace Parser Text () -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
attrValue)
(Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"")
)
attrs :: Parser (Map Text Text, Bool)
attrs :: Parser (Map Text Text, Bool)
attrs = Map Text Text -> Parser (Map Text Text, Bool)
loop Map Text Text
forall k a. Map k a
Map.empty
where
loop :: Map Text Text -> Parser (Map Text Text, Bool)
loop Map Text Text
acc = Parser Text ()
skipSpace Parser Text ()
-> Parser Text (Either Bool (Text, Text))
-> Parser Text (Either Bool (Text, Text))
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Bool -> Either Bool (Text, Text)
forall a b. a -> Either a b
Left (Bool -> Either Bool (Text, Text))
-> Parser Bool -> Parser Text (Either Bool (Text, Text))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bool
tagEnd Parser Text (Either Bool (Text, Text))
-> Parser Text (Either Bool (Text, Text))
-> Parser Text (Either Bool (Text, Text))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Text, Text) -> Either Bool (Text, Text)
forall a b. b -> Either a b
Right ((Text, Text) -> Either Bool (Text, Text))
-> Parser (Text, Text) -> Parser Text (Either Bool (Text, Text))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser (Text, Text)
attr) Parser Text (Either Bool (Text, Text))
-> (Either Bool (Text, Text) -> Parser (Map Text Text, Bool))
-> Parser (Map Text Text, Bool)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
(Bool -> Parser (Map Text Text, Bool))
-> ((Text, Text) -> Parser (Map Text Text, Bool))
-> Either Bool (Text, Text)
-> Parser (Map Text Text, Bool)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
((Map Text Text, Bool) -> Parser (Map Text Text, Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Map Text Text, Bool) -> Parser (Map Text Text, Bool))
-> (Bool -> (Map Text Text, Bool))
-> Bool
-> Parser (Map Text Text, Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Map Text Text
acc,))
(\(Text
key, Text
value) -> Map Text Text -> Parser (Map Text Text, Bool)
loop (Map Text Text -> Parser (Map Text Text, Bool))
-> Map Text Text -> Parser (Map Text Text, Bool)
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Map Text Text -> Map Text Text
forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Text
key Text
value Map Text Text
acc)
comment :: Parser Token
= Text -> Token
Comment (Text -> Token) -> Parser Text -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
comment'
where comment' :: Parser Text
comment' = Text -> Text -> Text
T.append (Text -> Text -> Text) -> Parser Text -> Parser Text (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'-')
Parser Text (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Text -> Parser Text
string Text
"-->" Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Int -> Parser Text -> Parser Text
atLeast Int
1 Parser Text
comment' )
special :: Parser Token
special :: Parser Token
special = Text -> Text -> Token
Special
(Text -> Text -> Token)
-> Parser Text -> Parser Text (Text -> Token)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ( Char -> Text -> Text
T.cons (Char -> Text -> Text) -> Parser Char -> Parser Text (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'-') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace))
Parser Text (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
Parser Text -> Parser Text () -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Text ()
skipSpace )
Parser Text (Text -> Token) -> Parser Text -> Parser Token
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') Parser Token -> Parser Char -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'>'
tag :: Parser Token
tag :: Parser Token
tag = do
TagType
t <- Text -> Parser Text
string Text
"/" Parser Text -> Parser Text TagType -> Parser Text TagType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TagType -> Parser Text TagType
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeClose
Parser Text TagType -> Parser Text TagType -> Parser Text TagType
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string Text
"!" Parser Text -> Parser Text TagType -> Parser Text TagType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TagType -> Parser Text TagType
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeSpecial
Parser Text TagType -> Parser Text TagType -> Parser Text TagType
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TagType -> Parser Text TagType
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeNormal
case TagType
t of
TagType
TagTypeClose ->
Text -> Token
TagClose (Text -> Token) -> Parser Text -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>')
Parser Token -> Parser Char -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'>'
TagType
TagTypeSpecial -> Parser Text -> Parser Bool
forall a. Parser a -> Parser Bool
boolP (Text -> Parser Text
string Text
"--") Parser Bool -> (Bool -> Parser Token) -> Parser Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Parser Token -> Parser Token -> Bool -> Parser Token
forall a. a -> a -> Bool -> a
cond Parser Token
comment Parser Token
special
TagType
TagTypeNormal -> do
Text
name <- (Char -> Bool) -> Parser Text
takeTill ((Char, Char, Char) -> Char -> Bool
forall a. Eq a => (a, a, a) -> a -> Bool
in3 (Char
'<',Char
'>',Char
'/') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
(Map Text Text
as, Bool
close) <- Parser (Map Text Text, Bool)
attrs
Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ Text -> Map Text Text -> Bool -> Token
TagOpen Text
name ((Text -> Text) -> Map Text Text -> Map Text Text
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map Text -> Text
decodeString Map Text Text
as) Bool
close
incomplete :: Parser Token
incomplete :: Parser Token
incomplete = Text -> Token
Incomplete (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text -> Text
T.cons Char
'<' (Text -> Token) -> Parser Text -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
takeText
text :: Parser Token
text :: Parser Token
text = Text -> Token
Text (Text -> Token) -> Parser Text -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Parser Text -> Parser Text
atLeast Int
1 ((Char -> Bool) -> Parser Text
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'<'))
decodeEntity :: MonadThrow m => Text -> m Text
decodeEntity :: Text -> m Text
decodeEntity Text
entity =
ConduitT () Void m Text -> m Text
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit
(ConduitT () Void m Text -> m Text)
-> ConduitT () Void m Text -> m Text
forall a b. (a -> b) -> a -> b
$ [Text] -> ConduitT () Text m ()
forall (m :: * -> *) a i. Monad m => [a] -> ConduitT i a m ()
CL.sourceList [Text
"&",Text
entity,Text
";"]
#if MIN_VERSION_xml_conduit(1,9,0)
ConduitT () Text m ()
-> ConduitM Text Void m Text -> ConduitT () Void m Text
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ParseSettings -> ConduitT Text Event m ()
forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT Text Event m ()
XML.parseText ParseSettings
forall a. Default a => a
XML.def { psDecodeEntities :: DecodeEntities
XML.psDecodeEntities = DecodeEntities
XML.decodeHtmlEntities }
#else
.| XML.parseText' XML.def { XML.psDecodeEntities = XML.decodeHtmlEntities }
#endif
ConduitT Text Event m ()
-> ConduitM Event Void m Text -> ConduitM Text Void m Text
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM Event Void m Text
forall (m :: * -> *) o. MonadThrow m => ConduitT Event o m Text
XML.content
token :: Parser Token
token :: Parser Token
token = Char -> Parser Char
char Char
'<' Parser Char -> Parser Token -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser Token
tag Parser Token -> Parser Token -> Parser Token
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
incomplete)
Parser Token -> Parser Token -> Parser Token
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
text
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd Token
open =
Builder -> Parser [Token]
loop Builder
forall a. Monoid a => a
mempty
where
loop :: Builder -> Parser [Token]
loop Builder
acc = do
Text
chunk <- (Char -> Bool) -> Parser Text
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'<')
let acc' :: Builder
acc' = Builder
acc Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Text -> Builder
B.fromText Text
chunk
finish :: Parser [Token]
finish = [Token] -> Parser [Token]
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Token
open, Text -> Token
Text (Text -> Token) -> Text -> Token
forall a b. (a -> b) -> a -> b
$ Text -> Text
L.toStrict (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Builder -> Text
B.toLazyText Builder
acc', Text -> Token
TagClose Text
"script"]
hasContent :: Parser [Token]
hasContent = (Text -> Parser Text
string Text
"/script>" Parser Text -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser [Token]
finish) Parser [Token] -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser [Token]
loop (Builder
acc' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"<")
(Char -> Parser Char
char Char
'<' Parser Char -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser [Token]
hasContent) Parser [Token] -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser [Token]
finish
tokens :: Parser [Token]
tokens :: Parser [Token]
tokens = do
Token
t <- Parser Token
token
case Token
t of
TagOpen Text
"script" Map Text Text
_ Bool
False -> Token -> Parser [Token]
tillScriptEnd Token
t
Text Text
text0 -> do
let parseText :: Parser Text
parseText = do
Text Text
text <- Parser Token
token
Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
text
[Text]
texts <- Parser Text -> Parser Text [Text]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser Text
parseText
[Token] -> Parser [Token]
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Text -> Token
Text (Text -> Token) -> Text -> Token
forall a b. (a -> b) -> a -> b
$ Text -> Text
decodeString (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
T.concat ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ Text
text0 Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
texts]
Token
_ -> [Token] -> Parser [Token]
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Token
t]
atLeast :: Int -> Parser Text -> Parser Text
atLeast :: Int -> Parser Text -> Parser Text
atLeast Int
0 Parser Text
p = Parser Text
p
atLeast Int
n Parser Text
p = Char -> Text -> Text
T.cons (Char -> Text -> Text) -> Parser Char -> Parser Text (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar Parser Text (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> Parser Text -> Parser Text
atLeast (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Parser Text
p
cond :: a -> a -> Bool -> a
cond :: a -> a -> Bool -> a
cond a
a1 a
a2 Bool
b = if Bool
b then a
a1 else a
a2
(||.) :: Applicative f => f Bool -> f Bool -> f Bool
||. :: f Bool -> f Bool -> f Bool
(||.) = (Bool -> Bool -> Bool) -> f Bool -> f Bool -> f Bool
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(||)
in2 :: Eq a => (a,a) -> a -> Bool
in2 :: (a, a) -> a -> Bool
in2 (a
a1,a
a2) a
a = a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a2
in3 :: Eq a => (a,a,a) -> a -> Bool
in3 :: (a, a, a) -> a -> Bool
in3 (a
a1,a
a2,a
a3) a
a = a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a2 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a3
boolP :: Parser a -> Parser Bool
boolP :: Parser a -> Parser Bool
boolP Parser a
p = Parser a
p Parser a -> Parser Bool -> Parser Bool
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True Parser Bool -> Parser Bool -> Parser Bool
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
maybeP :: Parser a -> Parser (Maybe a)
maybeP :: Parser a -> Parser (Maybe a)
maybeP Parser a
p = a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> Parser a -> Parser (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p Parser (Maybe a) -> Parser (Maybe a) -> Parser (Maybe a)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe a -> Parser (Maybe a)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe a
forall a. Maybe a
Nothing
tokenStream :: Monad m
=> ConduitT Text Token m ()
tokenStream :: ConduitT Text Token m ()
tokenStream =
(Text -> Bool) -> ConduitT Text Text m ()
forall (m :: * -> *) a. Monad m => (a -> Bool) -> ConduitT a a m ()
CL.filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) ConduitT Text Text m ()
-> ConduitT Text Token m () -> ConduitT Text Token m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| Parser [Token]
-> ConduitT Text (Either ParseError (PositionRange, [Token])) m ()
forall (m :: * -> *) a b.
(Monad m, AttoparsecInput a) =>
Parser a b
-> ConduitT a (Either ParseError (PositionRange, b)) m ()
CA.conduitParserEither Parser [Token]
tokens ConduitT Text (Either ParseError (PositionRange, [Token])) m ()
-> ConduitM (Either ParseError (PositionRange, [Token])) Token m ()
-> ConduitT Text Token m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| (Either ParseError (PositionRange, [Token]) -> [Token])
-> ConduitM (Either ParseError (PositionRange, [Token])) Token m ()
forall (m :: * -> *) a b.
Monad m =>
(a -> [b]) -> ConduitT a b m ()
CL.concatMap Either ParseError (PositionRange, [Token]) -> [Token]
forall a a p. Show a => Either a (a, p) -> p
go
where
go :: Either a (a, p) -> p
go (Left a
e) = String -> p
forall a. HasCallStack => String -> a
error (String -> p) -> String -> p
forall a b. (a -> b) -> a -> b
$ String
"html-conduit: parse error that should never happen occurred! " String -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
e
go (Right (a
_, p
tokens')) = p
tokens'
splitAccum :: [Token] -> (Text, [Token])
splitAccum :: [Token] -> (Text, [Token])
splitAccum [] = (Text
forall a. Monoid a => a
mempty, [])
splitAccum ([Token] -> [Token]
forall a. [a] -> [a]
reverse -> (Incomplete Text
s : [Token]
xs)) = (Text
s, [Token] -> [Token]
forall a. [a] -> [a]
reverse [Token]
xs)
splitAccum [Token]
tokens = (Text
forall a. Monoid a => a
mempty, [Token]
tokens)
decodeEntities :: Monad m => ConduitT Token Token m ()
decodeEntities :: ConduitT Token Token m ()
decodeEntities =
ConduitT Token Token m ()
start
where
start :: ConduitT Token Token m ()
start = ConduitT Token Token m (Maybe Token)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT Token Token m (Maybe Token)
-> (Maybe Token -> ConduitT Token Token m ())
-> ConduitT Token Token m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT Token Token m ()
-> (Token -> ConduitT Token Token m ())
-> Maybe Token
-> ConduitT Token Token m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (() -> ConduitT Token Token m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()) (\Token
token' -> Token -> ConduitT Token Token m ()
forall (m :: * -> *). Monad m => Token -> ConduitM Token Token m ()
start' Token
token' ConduitT Token Token m ()
-> ConduitT Token Token m () -> ConduitT Token Token m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT Token Token m ()
start)
start' :: Token -> ConduitM Token Token m ()
start' (Text Text
t) = (Text -> ConduitT Token Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t ConduitT Token Text m ()
-> ConduitT Token Text m () -> ConduitT Token Text m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT Token Text m ()
forall (m :: * -> *). Monad m => ConduitT Token Text m ()
yieldWhileText) ConduitT Token Text m ()
-> ConduitM Text Token m () -> ConduitM Token Token m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT Text Text m ()
forall (m :: * -> *). Monad m => ConduitT Text Text m ()
decodeEntities' ConduitT Text Text m ()
-> ConduitM Text Token m () -> ConduitM Text Token m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| (Text -> Maybe Token) -> ConduitM Text Token m ()
forall (m :: * -> *) a b.
Monad m =>
(a -> Maybe b) -> ConduitT a b m ()
CL.mapMaybe Text -> Maybe Token
go
start' (TagOpen Text
name Map Text Text
attrs' Bool
bool) = Token -> ConduitM Token Token m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Text -> Map Text Text -> Bool -> Token
TagOpen Text
name ((Text -> Text) -> Map Text Text -> Map Text Text
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map Text -> Text
decodeString Map Text Text
attrs') Bool
bool)
start' Token
token' = Token -> ConduitM Token Token m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Token
token'
go :: Text -> Maybe Token
go Text
t
| Text
t Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"" = Maybe Token
forall a. Maybe a
Nothing
| Bool
otherwise = Token -> Maybe Token
forall a. a -> Maybe a
Just (Text -> Token
Text Text
t)
decodeString :: Text -> Text
decodeString :: Text -> Text
decodeString Text
input =
case Text -> (Text, Text)
makeEntityDecoder Text
input of
(Text
value', Text
remainder)
| Text
value' Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
forall a. Monoid a => a
mempty -> Text
value' Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text
decodeString Text
remainder
| Bool
otherwise -> Text
input
decodeEntities' :: Monad m => ConduitT Text Text m ()
decodeEntities' :: ConduitT Text Text m ()
decodeEntities' =
(Text -> Text) -> ConduitT Text Text m ()
forall (m :: * -> *).
Monad m =>
(Text -> Text) -> ConduitT Text Text m ()
loop Text -> Text
forall a. a -> a
id
where
loop :: (Text -> Text) -> ConduitT Text Text m ()
loop Text -> Text
accum = do
Maybe Text
mchunk <- ConduitT Text Text m (Maybe Text)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await
let chunk :: Text
chunk = Text -> Text
accum (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
forall a. Monoid a => a
mempty Maybe Text
mchunk
(Text
newStr, Text
remainder) = Text -> (Text, Text)
makeEntityDecoder Text
chunk
Text -> ConduitT Text Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
newStr
if Maybe Text -> Bool
forall a. Maybe a -> Bool
isJust Maybe Text
mchunk
then (Text -> Text) -> ConduitT Text Text m ()
loop (Text -> Text -> Text
forall a. Monoid a => a -> a -> a
mappend Text
remainder)
else Text -> ConduitT Text Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
remainder
yieldWhileText :: Monad m => ConduitT Token Text m ()
yieldWhileText :: ConduitT Token Text m ()
yieldWhileText =
ConduitT Token Text m ()
loop
where
loop :: ConduitT Token Text m ()
loop = ConduitT Token Text m (Maybe Token)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT Token Text m (Maybe Token)
-> (Maybe Token -> ConduitT Token Text m ())
-> ConduitT Token Text m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT Token Text m ()
-> (Token -> ConduitT Token Text m ())
-> Maybe Token
-> ConduitT Token Text m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (() -> ConduitT Token Text m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()) Token -> ConduitT Token Text m ()
go
go :: Token -> ConduitT Token Text m ()
go (Text Text
t) = Text -> ConduitT Token Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t ConduitT Token Text m ()
-> ConduitT Token Text m () -> ConduitT Token Text m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT Token Text m ()
loop
go Token
token' = Token -> ConduitT Token Text m ()
forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Token
token'
makeEntityDecoder :: Text -> (Text, Text)
makeEntityDecoder :: Text -> (Text, Text)
makeEntityDecoder = (Builder -> Text) -> (Builder, Text) -> (Text, Text)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (Text -> Text
L.toStrict (Text -> Text) -> (Builder -> Text) -> Builder -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Text
B.toLazyText) ((Builder, Text) -> (Text, Text))
-> (Text -> (Builder, Text)) -> Text -> (Text, Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> (Builder, Text)
go
where
go :: Text -> (Builder, Text)
go Text
s =
case (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'&') Text
s of
(Text
_,Text
"") -> (Text -> Builder
B.fromText Text
s, Text
"")
(Text
before,restPlusAmp :: Text
restPlusAmp@(Int -> Text -> Text
T.drop Int
1 -> Text
rest)) ->
case (Char -> Bool) -> Text -> (Text, Text)
T.break (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\Char
c -> Char -> Bool
isNameChar Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#')) Text
rest of
(Text
_,Text
"") -> (Text -> Builder
B.fromText Text
before, Text
restPlusAmp)
(Text
entity,Text
after) -> (Builder
before1 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
before2, Text
after')
where
before1 :: Builder
before1 = Text -> Builder
B.fromText Text
before
(Builder
before2, Text
after') =
case Maybe Text
mdecoded of
Maybe Text
Nothing -> (Builder -> Builder) -> (Builder, Text) -> (Builder, Text)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first ((Builder
"&" Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Text -> Builder
B.fromText Text
entity) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>) (Text -> (Builder, Text)
go Text
after)
Just (Text -> Builder
B.fromText -> Builder
decoded) ->
case Text -> Maybe (Char, Text)
T.uncons Text
after of
Just (Char
';',Text
validAfter) -> (Builder -> Builder) -> (Builder, Text) -> (Builder, Text)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (Builder
decoded Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>) (Text -> (Builder, Text)
go Text
validAfter)
Just (Char
_invalid,Text
_rest) -> (Builder -> Builder) -> (Builder, Text) -> (Builder, Text)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (Builder
decoded Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>) (Text -> (Builder, Text)
go Text
after)
Maybe (Char, Text)
Nothing -> (Builder
forall a. Monoid a => a
mempty, Text
s)
mdecoded :: Maybe Text
mdecoded =
if Text
entity Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
forall a. Monoid a => a
mempty
then Maybe Text
forall a. Maybe a
Nothing
else Text -> Maybe Text
forall (m :: * -> *). MonadThrow m => Text -> m Text
decodeEntity Text
entity
isNameStart :: Char -> Bool
isNameStart :: Char -> Bool
isNameStart Char
c =
Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':' Bool -> Bool -> Bool
||
Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
||
Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
||
Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xC0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD6') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xD8' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xF6') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xF8' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x37D') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x37F' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x200D') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x218F') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')
isNameChar :: Char -> Bool
isNameChar :: Char -> Bool
isNameChar Char
c =
Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-' Bool -> Bool -> Bool
||
Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.' Bool -> Bool -> Bool
||
Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\xB7' Bool -> Bool -> Bool
||
Char -> Bool
isDigit Char
c Bool -> Bool -> Bool
||
Char -> Bool
isNameStart Char
c Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x0300' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x036F') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x203F' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2040')