{-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} module Text.Seonbi.Html.ScannerSpec (spec) where import Test.Hspec import Text.Seonbi.Html.Entity import Text.Seonbi.Html.Scanner import Text.Seonbi.Html.Tag import Text.Seonbi.Html.TextNormalizer isDone :: Result a -> Bool isDone Done {} = True isDone Fail {} = False shouldBeDone :: (Eq a, Show a) => Result a -> a -> Expectation shouldBeDone result expected = do result `shouldSatisfy` isDone let Done "" r = result r `shouldBe` expected spec :: Spec spec = describe "scanHtml" $ do it "returns an empty list if the input is empty" $ scanHtml "" `shouldBeDone` [] it "parses text nodes" $ scanHtml "foobar" `shouldBeDone` [HtmlText { tagStack = [], rawText = "foobar" }] it "parses HTML comments" $ do scanHtml "" `shouldBeDone` [HtmlComment { tagStack = [], comment = " foo " }] scanHtml "" `shouldBeDone` [HtmlComment { tagStack = [], comment = " foo- " }] scanHtml "" `shouldBeDone` [HtmlComment { tagStack = [], comment = " foo-> " }] scanHtml "" `shouldBeDone` [HtmlComment { tagStack = [], comment = " foo-- " }] scanHtml "foo " `shouldBeDone` [ HtmlText { tagStack = [], rawText = "foo " } , HtmlComment { tagStack = [], comment = " bar " } ] scanHtml " bar" `shouldBeDone` [ HtmlComment { tagStack = [], comment = " foo " } , HtmlText { tagStack = [], rawText = " bar" } ] scanHtml "foo baz" `shouldBeDone` [ HtmlText { tagStack = [], rawText = "foo " } , HtmlComment { tagStack = [], comment = " bar " } , HtmlText { tagStack = [], rawText = " baz" } ] scanHtml "

foo qux

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlText { tagStack = [P], rawText = "foo " } , HtmlComment { tagStack = [P], comment = " bar baz " } , HtmlText { tagStack = [P], rawText = " qux" } , HtmlEndTag { tagStack = [], tag = P } ] scanHtml "

foo qux

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlText { tagStack = [P], rawText = "foo " } , HtmlComment { tagStack = [P], comment = " bar baz " } , HtmlText { tagStack = [P], rawText = " qux" } , HtmlEndTag { tagStack = [], tag = P } ] it "parses CDATA sections" $ do scanHtml "" `shouldBeDone` [HtmlCdata { tagStack = [], text = "foo" }] scanHtml "foo " `shouldBeDone` [ HtmlText { tagStack = [], rawText = "foo " } , HtmlCdata { tagStack = [], text = "bar" } ] scanHtml " bar" `shouldBeDone` [ HtmlCdata { tagStack = [], text = "foo" } , HtmlText { tagStack = [], rawText = " bar" } ] scanHtml "foo baz" `shouldBeDone` [ HtmlText { tagStack = [], rawText = "foo " } , HtmlCdata { tagStack = [], text = "bar" } , HtmlText { tagStack = [], rawText = " baz" } ] scanHtml "" `shouldBeDone` [HtmlCdata { tagStack = [], text = "foo] " }] scanHtml "

foo qux

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlText { tagStack = [P], rawText = "foo " } , HtmlCdata { tagStack = [P], text = "bar baz" } , HtmlText { tagStack = [P], rawText = " qux" } , HtmlEndTag { tagStack = [], tag = P } ] scanHtml "

foo bar baz]]> qux

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlText { tagStack = [P], rawText = "foo " } , HtmlCdata { tagStack = [P], text = "bar baz" } , HtmlText { tagStack = [P], rawText = " qux" } , HtmlEndTag { tagStack = [], tag = P } ] it "treats malformed CDATA sections as text nodes" $ (normalizeText <$> scanHtml "" `shouldBeDone` [HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" }] scanHtml "

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlStartTag { tagStack = [P], tag = Em, rawAttributes = "" } ] it "parses HTML start tags having attributes" $ do scanHtml "

" `shouldBeDone` [ HtmlStartTag { tagStack = [] , tag = P , rawAttributes = " class=foo" } ] scanHtml "" `shouldBeDone` [ HtmlStartTag { tagStack = [] , tag = A , rawAttributes = " href=\"https://example.com/\"" } ] it "parses html end tags" $ do scanHtml "

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlEndTag { tagStack = [], tag = P } ] scanHtml "

test

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlStartTag { tagStack = [P], tag = Em, rawAttributes = "" } , HtmlText { tagStack = [P, Em], rawText = "test" } , HtmlEndTag { tagStack = [P], tag = Em } , HtmlEndTag { tagStack = [], tag = P } ] it "can parse even if tags end in wrong order" $ do scanHtml "

test

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlStartTag { tagStack = [P], tag = Em, rawAttributes = "" } , HtmlText { tagStack = [P, Em], rawText = "test" } , HtmlEndTag { tagStack = [Em], tag = P } , HtmlEndTag { tagStack = [], tag = Em } ] scanHtml "

Hello World

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlStartTag { tagStack = [P] , tag = B , rawAttributes = " class=\"baz\"" } , HtmlText { tagStack = [P, B], rawText = "Hel" } , HtmlStartTag { tagStack = [P, B] , tag = I , rawAttributes = "" } , HtmlText { tagStack = [P, B, I], rawText = "lo Wo" } , HtmlEndTag { tagStack = [P, I], tag = B } , HtmlText { tagStack = [P, I], rawText = "rld" } , HtmlEndTag { tagStack = [P], tag = I } , HtmlEndTag { tagStack = [], tag = P } ] it "can parse XHTML-style self-closing tags" $ scanHtml "

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlStartTag { tagStack = [P], tag = Em, rawAttributes = "" } , HtmlEndTag { tagStack = [P], tag = Em } ] it "emits both start and end tags for void tags (e.g.,


)" $ do scanHtml "


" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlStartTag { tagStack = [P], tag = HR, rawAttributes = "" } , HtmlEndTag { tagStack = [P], tag = HR } ] scanHtml "

foo


bar


" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = Div, rawAttributes = "" } , HtmlStartTag { tagStack = [Div], tag = P, rawAttributes = "" } , HtmlText { tagStack = [Div, P], rawText = "foo" } , HtmlEndTag { tagStack = [Div], tag = P } , HtmlStartTag { tagStack = [Div] , tag = HR , rawAttributes = "" } , HtmlEndTag { tagStack = [Div], tag = HR } , HtmlStartTag { tagStack = [Div], tag = P, rawAttributes = "" } , HtmlText { tagStack = [Div, P], rawText = "bar " } , HtmlStartTag { tagStack = [Div, P] , tag = Img , rawAttributes = " src=\"a.jpg\"" } , HtmlEndTag { tagStack = [Div, P], tag = Img } , HtmlEndTag { tagStack = [Div], tag = P } , HtmlStartTag { tagStack = [Div] , tag = HR , rawAttributes = "" } , HtmlEndTag { tagStack = [Div], tag = HR } , HtmlEndTag { tagStack = [], tag = Div } ] scanHtml "" `shouldBeDone` [ HtmlStartTag { tagStack = [] , tag = Embed , rawAttributes = "" } , HtmlEndTag { tagStack = [], tag = Embed } ] it "can parses a flat element" $ scanHtml "
Hello
" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = Div, rawAttributes = "" } , HtmlText { tagStack = [Div], rawText = "Hello" } , HtmlEndTag { tagStack = [], tag = Div } ] it "can parses nested elements" $ do scanHtml "

Hello

World

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = Div, rawAttributes = "" } , HtmlStartTag { tagStack = [Div] , tag = P , rawAttributes = " class=foo" } , HtmlText { tagStack = [Div, P], rawText = "Hello" } , HtmlEndTag { tagStack = [Div], tag = P } , HtmlStartTag { tagStack = [Div] , tag = P , rawAttributes = " class='bar'" } , HtmlText { tagStack = [Div, P], rawText = "World" } , HtmlEndTag { tagStack = [Div], tag = P } , HtmlEndTag { tagStack = [], tag = Div } ] scanHtml "

Hello World

" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlStartTag { tagStack = [P] , tag = B , rawAttributes = " class=\"baz\"" } , HtmlText { tagStack = [P, B], rawText = "Hel" } , HtmlStartTag { tagStack = [P, B] , tag = I , rawAttributes = "" } , HtmlText { tagStack = [P, B, I], rawText = "lo Wo" } , HtmlEndTag { tagStack = [P, B], tag = I } , HtmlText { tagStack = [P, B], rawText = "rld" } , HtmlEndTag { tagStack = [P], tag = B } , HtmlEndTag { tagStack = [], tag = P } ] it "can parses an HTML fragment having multiple root elements" $ scanHtml "Hello world!

\n

Second paragraph.

" `shouldBeDone` [ HtmlStartTag { tagStack = [] , tag = P , rawAttributes = "\nid=\"a\"" } , HtmlText { tagStack = [P], rawText = "Hello " } , HtmlStartTag { tagStack = [P], tag = B, rawAttributes = "" } , HtmlText { tagStack = [P, B], rawText = "world" } , HtmlEndTag { tagStack = [P], tag = B } , HtmlText { tagStack = [P], rawText = "!" } , HtmlEndTag { tagStack = [], tag = P } , HtmlText { tagStack = [], rawText = "\n" } , HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlText { tagStack = [P], rawText = "Second paragraph." } , HtmlEndTag { tagStack = [], tag = P } ] it "treats an invalid tag as a text node" $ (normalizeText <$> scanHtml "") `shouldBeDone` [HtmlText { tagStack = [], rawText = "" }]