{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Text.Seonbi.Html.Scanner
( Result (..)
, scanHtml
) where
import Data.Char
import Prelude hiding (takeWhile)
import Data.Attoparsec.Text.Lazy
import Data.Map.Strict
import qualified Data.Text
import qualified Data.Text.Lazy
import Text.Seonbi.Html.Entity
import Text.Seonbi.Html.Tag
import Text.Seonbi.Html.TagStack
htmlFragments :: Parser [HtmlEntity]
htmlFragments :: Parser [HtmlEntity]
htmlFragments = do
[HtmlEntity]
result <- [HtmlEntity] -> Parser [HtmlEntity] -> Parser [HtmlEntity]
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] (Parser [HtmlEntity] -> Parser [HtmlEntity])
-> Parser [HtmlEntity] -> Parser [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
Text.Seonbi.Html.TagStack.empty
HtmlEntity
txt <- HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
Text.Seonbi.Html.TagStack.empty
Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput
[HtmlEntity] -> Parser [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return ([HtmlEntity] -> Parser [HtmlEntity])
-> [HtmlEntity] -> Parser [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ case HtmlEntity
txt of
HtmlText { rawText :: HtmlEntity -> Text
rawText = Text
"" } -> [HtmlEntity]
result
HtmlEntity
_ -> [HtmlEntity]
result [HtmlEntity] -> [HtmlEntity] -> [HtmlEntity]
forall a. [a] -> [a] -> [a]
++ [HtmlEntity
txt]
fragments :: HtmlTagStack -> Parser [HtmlEntity]
fragments :: HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
tagStack' = do
HtmlEntity
txt <- HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
tagStack'
([HtmlEntity]
entities, HtmlTagStack
nextStack) <- HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity HtmlTagStack
tagStack'
[HtmlEntity]
nextChunk <- [HtmlEntity] -> Parser [HtmlEntity] -> Parser [HtmlEntity]
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] (Parser [HtmlEntity] -> Parser [HtmlEntity])
-> Parser [HtmlEntity] -> Parser [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
nextStack
let chunks :: [HtmlEntity]
chunks = [HtmlEntity]
entities [HtmlEntity] -> [HtmlEntity] -> [HtmlEntity]
forall a. [a] -> [a] -> [a]
++ [HtmlEntity]
nextChunk
[HtmlEntity] -> Parser [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return ([HtmlEntity] -> Parser [HtmlEntity])
-> [HtmlEntity] -> Parser [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ case HtmlEntity
txt of
HtmlText { rawText :: HtmlEntity -> Text
rawText = Text
"" } -> [HtmlEntity]
chunks
HtmlEntity
txt' -> HtmlEntity
txt' HtmlEntity -> [HtmlEntity] -> [HtmlEntity]
forall a. a -> [a] -> [a]
: [HtmlEntity]
chunks
htmlText :: HtmlTagStack -> Parser HtmlEntity
htmlText :: HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
tagStack' = do
[Text]
texts <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' Parser Text Text
textFragment
HtmlEntity -> Parser HtmlEntity
forall (m :: * -> *) a. Monad m => a -> m a
return (HtmlEntity -> Parser HtmlEntity)
-> HtmlEntity -> Parser HtmlEntity
forall a b. (a -> b) -> a -> b
$ Text -> HtmlEntity
mkText (Text -> HtmlEntity) -> Text -> HtmlEntity
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
texts
where
mkText :: Data.Text.Text -> HtmlEntity
mkText :: Text -> HtmlEntity
mkText Text
txt = HtmlText :: HtmlTagStack -> Text -> HtmlEntity
HtmlText { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', rawText :: Text
rawText = Text
txt }
textFragment :: Parser Data.Text.Text
textFragment :: Parser Text Text
textFragment = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ (Char -> Bool) -> Parser Text Text
takeWhile1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'<')
, do
Char
a <- Char -> Parser Char
char Char
'<'
Char
b <- (Char -> Bool) -> Parser Char
satisfy ((Char -> Bool) -> Parser Char) -> (Char -> Bool) -> Parser Char
forall a b. (a -> b) -> a -> b
$ \ Char
c ->
Bool -> Bool
not (Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'!' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c)
Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ String -> Text
Data.Text.pack [Char
a, Char
b]
]
htmlEntity :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity HtmlTagStack
tagStack' = [Parser ([HtmlEntity], HtmlTagStack)]
-> Parser ([HtmlEntity], HtmlTagStack)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlComment HtmlTagStack
tagStack'
, HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata HtmlTagStack
tagStack'
, HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag HtmlTagStack
tagStack'
, HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag HtmlTagStack
tagStack'
, (, HtmlTagStack
tagStack') ([HtmlEntity] -> ([HtmlEntity], HtmlTagStack))
-> (Char -> [HtmlEntity]) -> Char -> ([HtmlEntity], HtmlTagStack)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (HtmlEntity -> [HtmlEntity] -> [HtmlEntity]
forall a. a -> [a] -> [a]
: []) (HtmlEntity -> [HtmlEntity])
-> (Char -> HtmlEntity) -> Char -> [HtmlEntity]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
tagStack' (Text -> HtmlEntity) -> (Char -> Text) -> Char -> HtmlEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
Data.Text.singleton
(Char -> ([HtmlEntity], HtmlTagStack))
-> Parser Char -> Parser ([HtmlEntity], HtmlTagStack)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
]
htmlComment :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
HtmlTagStack
tagStack' = do
Text
_ <- Text -> Parser Text Text
string Text
"<!--"
[Text]
contents <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text Text -> Parser Text [Text])
-> Parser Text Text -> Parser Text [Text]
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ (Char -> Bool) -> Parser Text Text
takeWhile1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'-')
, do
Char
a <- Char -> Parser Char
char Char
'-'
Char
b <- Char -> Parser Char
notChar Char
'-'
Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ String -> Text
Data.Text.pack [Char
a, Char
b]
, do
Text
a <- Text -> Parser Text Text
string Text
"--"
Char
b <- Char -> Parser Char
notChar Char
'>'
Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
a Char
b
]
Text
_ <- Text -> Parser Text Text
string Text
"-->"
([HtmlEntity], HtmlTagStack) -> Parser ([HtmlEntity], HtmlTagStack)
forall (m :: * -> *) a. Monad m => a -> m a
return
( [ HtmlComment :: HtmlTagStack -> Text -> HtmlEntity
HtmlComment
{ tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack'
, comment :: Text
comment = [Text] -> Text
Data.Text.concat [Text]
contents
}
]
, HtmlTagStack
tagStack'
)
cdata :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata HtmlTagStack
tagStack' = do
Text
_ <- Text -> Parser Text Text
string Text
"<![CDATA["
[Text]
contents <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text Text -> Parser Text [Text])
-> Parser Text Text -> Parser Text [Text]
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ (Char -> Bool) -> Parser Text Text
takeWhile1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
']')
, do
Char
a <- Char -> Parser Char
char Char
']'
Char
b <- Char -> Parser Char
notChar Char
']'
Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ String -> Text
Data.Text.pack [Char
a, Char
b]
, do
Text
a <- Text -> Parser Text Text
string Text
"]]"
Char
b <- Char -> Parser Char
notChar Char
'>'
Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
a Char
b
]
Text
_ <- Text -> Parser Text Text
string Text
"]]>"
([HtmlEntity], HtmlTagStack) -> Parser ([HtmlEntity], HtmlTagStack)
forall (m :: * -> *) a. Monad m => a -> m a
return
( [HtmlCdata :: HtmlTagStack -> Text -> HtmlEntity
HtmlCdata { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', text :: Text
text = [Text] -> Text
Data.Text.concat [Text]
contents }]
, HtmlTagStack
tagStack'
)
startTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag HtmlTagStack
tagStack' = do
Char
_ <- Char -> Parser Char
char Char
'<'
HtmlTag
tag' <- Parser HtmlTag
htmlTag
[Text]
attributes <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text Text -> Parser Text [Text])
-> Parser Text Text -> Parser Text [Text]
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ do
Char
s <- Char -> Parser Char
char Char
'"'
Text
c <- (Char -> Bool) -> Parser Text Text
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"')
Char
e <- Char -> Parser Char
char Char
'"'
Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
Data.Text.cons Char
s (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
c Char
e)
, do
Char
s <- Char -> Parser Char
char Char
'\''
Text
c <- (Char -> Bool) -> Parser Text Text
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\'')
Char
e <- Char -> Parser Char
char Char
'\''
Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
Data.Text.cons Char
s (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
c Char
e)
, (Char -> Bool) -> Parser Text Text
takeWhile1 ((Char -> Bool) -> Parser Text Text)
-> (Char -> Bool) -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\'' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'/' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'>'
]
Char
selfClosing <- Char -> Parser Char -> Parser Char
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Char
' ' (Parser Char -> Parser Char) -> Parser Char -> Parser Char
forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
'/'
Char
_ <- Char -> Parser Char
char Char
'>'
let ([HtmlEntity]
trailingEntities, HtmlTagStack
nextTagStack) =
if Char
selfClosing Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| HtmlTag -> HtmlTagKind
htmlTagKind HtmlTag
tag' HtmlTagKind -> HtmlTagKind -> Bool
forall a. Eq a => a -> a -> Bool
== HtmlTagKind
Void
then ([HtmlEndTag :: HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', tag :: HtmlTag
tag = HtmlTag
tag' }], HtmlTagStack
tagStack')
else ([], HtmlTag -> HtmlTagStack -> HtmlTagStack
push HtmlTag
tag' HtmlTagStack
tagStack')
([HtmlEntity], HtmlTagStack) -> Parser ([HtmlEntity], HtmlTagStack)
forall (m :: * -> *) a. Monad m => a -> m a
return
( HtmlStartTag :: HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag
{ tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack'
, tag :: HtmlTag
tag = HtmlTag
tag'
, rawAttributes :: Text
rawAttributes = [Text] -> Text
Data.Text.concat [Text]
attributes
} HtmlEntity -> [HtmlEntity] -> [HtmlEntity]
forall a. a -> [a] -> [a]
: [HtmlEntity]
trailingEntities
, HtmlTagStack
nextTagStack
)
endTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag HtmlTagStack
tagStack' = do
Text
_ <- Text -> Parser Text Text
string Text
"</"
HtmlTag
tag' <- Parser HtmlTag
htmlTag
Char
_ <- Char -> Parser Char
char Char
'>'
([HtmlEntity], HtmlTagStack) -> Parser ([HtmlEntity], HtmlTagStack)
forall (m :: * -> *) a. Monad m => a -> m a
return (([HtmlEntity], HtmlTagStack)
-> Parser ([HtmlEntity], HtmlTagStack))
-> ([HtmlEntity], HtmlTagStack)
-> Parser ([HtmlEntity], HtmlTagStack)
forall a b. (a -> b) -> a -> b
$ case HtmlTag -> HtmlTagKind
htmlTagKind HtmlTag
tag' of
HtmlTagKind
Void -> ([], HtmlTagStack
tagStack')
HtmlTagKind
_ ->
let
nextTagStack :: HtmlTagStack
nextTagStack = HtmlTag -> HtmlTagStack -> HtmlTagStack
pop HtmlTag
tag' HtmlTagStack
tagStack'
in
( [HtmlEndTag :: HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
nextTagStack, tag :: HtmlTag
tag = HtmlTag
tag' }]
, HtmlTagStack
nextTagStack
)
htmlTag :: Parser HtmlTag
htmlTag :: Parser HtmlTag
htmlTag = do
Text
name <- Parser Text Text
tagName
case Text -> Map Text HtmlTag -> Maybe HtmlTag
forall k a. Ord k => k -> Map k a -> Maybe a
Data.Map.Strict.lookup (Text -> Text
Data.Text.toLower Text
name) Map Text HtmlTag
htmlTagNames of
Just HtmlTag
t -> HtmlTag -> Parser HtmlTag
forall (m :: * -> *) a. Monad m => a -> m a
return HtmlTag
t
Maybe HtmlTag
_ -> String -> Parser HtmlTag
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
"failed to parse; invalid tag: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
Data.Text.unpack Text
name)
tagName :: Parser Data.Text.Text
tagName :: Parser Text Text
tagName = do
Char
first <- (Char -> Bool) -> Parser Char
satisfy ((Char -> Bool) -> Parser Char) -> (Char -> Bool) -> Parser Char
forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c
Text
rest <- (Char -> Bool) -> Parser Text Text
takeWhile ((Char -> Bool) -> Parser Text Text)
-> (Char -> Bool) -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
c
Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ Char -> Text -> Text
Data.Text.cons Char
first Text
rest
scanHtml :: Data.Text.Lazy.Text -> Result [HtmlEntity]
scanHtml :: Text -> Result [HtmlEntity]
scanHtml = Parser [HtmlEntity] -> Text -> Result [HtmlEntity]
forall a. Parser a -> Text -> Result a
parse Parser [HtmlEntity]
htmlFragments