{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Text.Seonbi.Html.Scanner
    ( Result (..)
    , scanHtml
    ) where

import Data.Char
import Prelude hiding (takeWhile)

import Data.Attoparsec.Text.Lazy
import Data.Map.Strict
import qualified Data.Text
import qualified Data.Text.Lazy

import Text.Seonbi.Html.Entity
import Text.Seonbi.Html.Tag
import Text.Seonbi.Html.TagStack

htmlFragments :: Parser [HtmlEntity]
htmlFragments :: Parser [HtmlEntity]
htmlFragments = do
    [HtmlEntity]
result <- [HtmlEntity] -> Parser [HtmlEntity] -> Parser [HtmlEntity]
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] (Parser [HtmlEntity] -> Parser [HtmlEntity])
-> Parser [HtmlEntity] -> Parser [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
Text.Seonbi.Html.TagStack.empty
    HtmlEntity
txt <- HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
Text.Seonbi.Html.TagStack.empty
    Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput
    [HtmlEntity] -> Parser [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return ([HtmlEntity] -> Parser [HtmlEntity])
-> [HtmlEntity] -> Parser [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ case HtmlEntity
txt of
        HtmlText { rawText :: HtmlEntity -> Text
rawText = Text
"" } -> [HtmlEntity]
result
        HtmlEntity
_ -> [HtmlEntity]
result [HtmlEntity] -> [HtmlEntity] -> [HtmlEntity]
forall a. [a] -> [a] -> [a]
++ [HtmlEntity
txt]

fragments :: HtmlTagStack -> Parser [HtmlEntity]
fragments :: HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
tagStack' = do
    HtmlEntity
txt <- HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
tagStack'
    ([HtmlEntity]
entities, HtmlTagStack
nextStack) <- HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity HtmlTagStack
tagStack'
    [HtmlEntity]
nextChunk <- [HtmlEntity] -> Parser [HtmlEntity] -> Parser [HtmlEntity]
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] (Parser [HtmlEntity] -> Parser [HtmlEntity])
-> Parser [HtmlEntity] -> Parser [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
nextStack
    let chunks :: [HtmlEntity]
chunks = [HtmlEntity]
entities [HtmlEntity] -> [HtmlEntity] -> [HtmlEntity]
forall a. [a] -> [a] -> [a]
++ [HtmlEntity]
nextChunk
    [HtmlEntity] -> Parser [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return ([HtmlEntity] -> Parser [HtmlEntity])
-> [HtmlEntity] -> Parser [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ case HtmlEntity
txt of
        HtmlText { rawText :: HtmlEntity -> Text
rawText = Text
"" } -> [HtmlEntity]
chunks
        HtmlEntity
txt' -> HtmlEntity
txt' HtmlEntity -> [HtmlEntity] -> [HtmlEntity]
forall a. a -> [a] -> [a]
: [HtmlEntity]
chunks

htmlText :: HtmlTagStack -> Parser HtmlEntity
htmlText :: HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
tagStack' = do
    [Text]
texts <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' Parser Text Text
textFragment
    HtmlEntity -> Parser HtmlEntity
forall (m :: * -> *) a. Monad m => a -> m a
return (HtmlEntity -> Parser HtmlEntity)
-> HtmlEntity -> Parser HtmlEntity
forall a b. (a -> b) -> a -> b
$ Text -> HtmlEntity
mkText (Text -> HtmlEntity) -> Text -> HtmlEntity
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
texts
  where
      mkText :: Data.Text.Text -> HtmlEntity
      mkText :: Text -> HtmlEntity
mkText Text
txt = HtmlText :: HtmlTagStack -> Text -> HtmlEntity
HtmlText { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', rawText :: Text
rawText = Text
txt }

textFragment :: Parser Data.Text.Text
textFragment :: Parser Text Text
textFragment = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
    [ (Char -> Bool) -> Parser Text Text
takeWhile1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'<')
    , do
        Char
a <- Char -> Parser Char
char Char
'<'
        Char
b <- (Char -> Bool) -> Parser Char
satisfy ((Char -> Bool) -> Parser Char) -> (Char -> Bool) -> Parser Char
forall a b. (a -> b) -> a -> b
$ \ Char
c ->
            Bool -> Bool
not (Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'!' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c)
        Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ String -> Text
Data.Text.pack [Char
a, Char
b]
    ]

htmlEntity :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity HtmlTagStack
tagStack' = [Parser ([HtmlEntity], HtmlTagStack)]
-> Parser ([HtmlEntity], HtmlTagStack)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
    [ HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlComment HtmlTagStack
tagStack'
    , HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata HtmlTagStack
tagStack'
    , HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag HtmlTagStack
tagStack'
    , HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag HtmlTagStack
tagStack'
    -- fallback:
    , (, HtmlTagStack
tagStack') ([HtmlEntity] -> ([HtmlEntity], HtmlTagStack))
-> (Char -> [HtmlEntity]) -> Char -> ([HtmlEntity], HtmlTagStack)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (HtmlEntity -> [HtmlEntity] -> [HtmlEntity]
forall a. a -> [a] -> [a]
: []) (HtmlEntity -> [HtmlEntity])
-> (Char -> HtmlEntity) -> Char -> [HtmlEntity]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
tagStack' (Text -> HtmlEntity) -> (Char -> Text) -> Char -> HtmlEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
Data.Text.singleton
        (Char -> ([HtmlEntity], HtmlTagStack))
-> Parser Char -> Parser ([HtmlEntity], HtmlTagStack)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
    ]

-- https://www.w3.org/TR/html5/syntax.html#comments
htmlComment :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlComment :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlComment HtmlTagStack
tagStack' = do
    Text
_ <- Text -> Parser Text Text
string Text
"<!--"
    [Text]
contents <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text Text -> Parser Text [Text])
-> Parser Text Text -> Parser Text [Text]
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ (Char -> Bool) -> Parser Text Text
takeWhile1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'-')
        , do
              Char
a <- Char -> Parser Char
char Char
'-'
              Char
b <- Char -> Parser Char
notChar Char
'-'
              Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ String -> Text
Data.Text.pack [Char
a, Char
b]
        , do
            Text
a <- Text -> Parser Text Text
string Text
"--"
            Char
b <- Char -> Parser Char
notChar Char
'>'
            Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
a Char
b
        ]
    Text
_ <- Text -> Parser Text Text
string Text
"-->"
    ([HtmlEntity], HtmlTagStack) -> Parser ([HtmlEntity], HtmlTagStack)
forall (m :: * -> *) a. Monad m => a -> m a
return
        ( [ HtmlComment :: HtmlTagStack -> Text -> HtmlEntity
HtmlComment
                { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack'
                , comment :: Text
comment = [Text] -> Text
Data.Text.concat [Text]
contents
                }
          ]
        , HtmlTagStack
tagStack'
        )

-- https://www.w3.org/TR/html5/syntax.html#cdata-sections
cdata :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata HtmlTagStack
tagStack' = do
    Text
_ <- Text -> Parser Text Text
string Text
"<![CDATA["
    [Text]
contents <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text Text -> Parser Text [Text])
-> Parser Text Text -> Parser Text [Text]
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ (Char -> Bool) -> Parser Text Text
takeWhile1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
']')
        , do
            Char
a <- Char -> Parser Char
char Char
']'
            Char
b <- Char -> Parser Char
notChar Char
']'
            Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ String -> Text
Data.Text.pack [Char
a, Char
b]
        , do
            Text
a <- Text -> Parser Text Text
string Text
"]]"
            Char
b <- Char -> Parser Char
notChar Char
'>'
            Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
a Char
b
        ]
    Text
_ <- Text -> Parser Text Text
string Text
"]]>"
    ([HtmlEntity], HtmlTagStack) -> Parser ([HtmlEntity], HtmlTagStack)
forall (m :: * -> *) a. Monad m => a -> m a
return
        ( [HtmlCdata :: HtmlTagStack -> Text -> HtmlEntity
HtmlCdata { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', text :: Text
text = [Text] -> Text
Data.Text.concat [Text]
contents }]
        , HtmlTagStack
tagStack'
        )

-- https://www.w3.org/TR/html5/syntax.html#start-tags
startTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag HtmlTagStack
tagStack' = do
    Char
_ <- Char -> Parser Char
char Char
'<'
    HtmlTag
tag' <- Parser HtmlTag
htmlTag
    [Text]
attributes <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text Text -> Parser Text [Text])
-> Parser Text Text -> Parser Text [Text]
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ do
            Char
s <- Char -> Parser Char
char Char
'"'
            Text
c <- (Char -> Bool) -> Parser Text Text
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"')
            Char
e <- Char -> Parser Char
char Char
'"'
            Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
Data.Text.cons Char
s (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
c Char
e)
        , do
            Char
s <- Char -> Parser Char
char Char
'\''
            Text
c <- (Char -> Bool) -> Parser Text Text
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\'')
            Char
e <- Char -> Parser Char
char Char
'\''
            Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
Data.Text.cons Char
s (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
c Char
e)
        , (Char -> Bool) -> Parser Text Text
takeWhile1 ((Char -> Bool) -> Parser Text Text)
-> (Char -> Bool) -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\'' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'/' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'>'
        ]
    Char
selfClosing <- Char -> Parser Char -> Parser Char
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Char
' ' (Parser Char -> Parser Char) -> Parser Char -> Parser Char
forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
'/'
    Char
_ <- Char -> Parser Char
char Char
'>'
    let ([HtmlEntity]
trailingEntities, HtmlTagStack
nextTagStack) =
            if Char
selfClosing Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| HtmlTag -> HtmlTagKind
htmlTagKind HtmlTag
tag' HtmlTagKind -> HtmlTagKind -> Bool
forall a. Eq a => a -> a -> Bool
== HtmlTagKind
Void
            then ([HtmlEndTag :: HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', tag :: HtmlTag
tag = HtmlTag
tag' }], HtmlTagStack
tagStack')
            else ([], HtmlTag -> HtmlTagStack -> HtmlTagStack
push HtmlTag
tag' HtmlTagStack
tagStack')
    ([HtmlEntity], HtmlTagStack) -> Parser ([HtmlEntity], HtmlTagStack)
forall (m :: * -> *) a. Monad m => a -> m a
return
        ( HtmlStartTag :: HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag
            { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack'
            , tag :: HtmlTag
tag = HtmlTag
tag'
            , rawAttributes :: Text
rawAttributes = [Text] -> Text
Data.Text.concat [Text]
attributes
            } HtmlEntity -> [HtmlEntity] -> [HtmlEntity]
forall a. a -> [a] -> [a]
: [HtmlEntity]
trailingEntities
        , HtmlTagStack
nextTagStack
        )

-- https://www.w3.org/TR/html5/syntax.html#end-tags
endTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag HtmlTagStack
tagStack' = do
    Text
_ <- Text -> Parser Text Text
string Text
"</"
    HtmlTag
tag' <- Parser HtmlTag
htmlTag
    Char
_ <- Char -> Parser Char
char Char
'>'
    ([HtmlEntity], HtmlTagStack) -> Parser ([HtmlEntity], HtmlTagStack)
forall (m :: * -> *) a. Monad m => a -> m a
return (([HtmlEntity], HtmlTagStack)
 -> Parser ([HtmlEntity], HtmlTagStack))
-> ([HtmlEntity], HtmlTagStack)
-> Parser ([HtmlEntity], HtmlTagStack)
forall a b. (a -> b) -> a -> b
$ case HtmlTag -> HtmlTagKind
htmlTagKind HtmlTag
tag' of
        HtmlTagKind
Void -> ([], HtmlTagStack
tagStack')
        HtmlTagKind
_ ->
            let
                nextTagStack :: HtmlTagStack
nextTagStack = HtmlTag -> HtmlTagStack -> HtmlTagStack
pop HtmlTag
tag' HtmlTagStack
tagStack'
            in
                ( [HtmlEndTag :: HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
nextTagStack, tag :: HtmlTag
tag = HtmlTag
tag' }]
                , HtmlTagStack
nextTagStack
                )

htmlTag :: Parser HtmlTag
htmlTag :: Parser HtmlTag
htmlTag = do
    Text
name <- Parser Text Text
tagName
    case Text -> Map Text HtmlTag -> Maybe HtmlTag
forall k a. Ord k => k -> Map k a -> Maybe a
Data.Map.Strict.lookup (Text -> Text
Data.Text.toLower Text
name) Map Text HtmlTag
htmlTagNames of
        Just HtmlTag
t -> HtmlTag -> Parser HtmlTag
forall (m :: * -> *) a. Monad m => a -> m a
return HtmlTag
t
        Maybe HtmlTag
_ -> String -> Parser HtmlTag
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
"failed to parse; invalid tag: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
Data.Text.unpack Text
name)

tagName :: Parser Data.Text.Text
tagName :: Parser Text Text
tagName = do
    Char
first <- (Char -> Bool) -> Parser Char
satisfy ((Char -> Bool) -> Parser Char) -> (Char -> Bool) -> Parser Char
forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c
    Text
rest <- (Char -> Bool) -> Parser Text Text
takeWhile ((Char -> Bool) -> Parser Text Text)
-> (Char -> Bool) -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
c
    Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ Char -> Text -> Text
Data.Text.cons Char
first Text
rest

scanHtml :: Data.Text.Lazy.Text -> Result [HtmlEntity]
scanHtml :: Text -> Result [HtmlEntity]
scanHtml = Parser [HtmlEntity] -> Text -> Result [HtmlEntity]
forall a. Parser a -> Text -> Result a
parse Parser [HtmlEntity]
htmlFragments