{-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} module Text.Seonbi.Html.ScannerSpec (spec) where import Test.Hspec import Text.Seonbi.Html.Entity import Text.Seonbi.Html.Scanner import Text.Seonbi.Html.Tag import Text.Seonbi.Html.TextNormalizer isDone :: Result a -> Bool isDone Done {} = True isDone Fail {} = False shouldBeDone :: (Eq a, Show a) => Result a -> a -> Expectation shouldBeDone result expected = do result `shouldSatisfy` isDone let Done "" r = result r `shouldBe` expected spec :: Spec spec = describe "scanHtml" $ do it "returns an empty list if the input is empty" $ scanHtml "" `shouldBeDone` [] it "parses text nodes" $ scanHtml "foobar" `shouldBeDone` [HtmlText { tagStack = [], rawText = "foobar" }] it "parses HTML comments" $ do scanHtml "" `shouldBeDone` [HtmlComment { tagStack = [], comment = " foo " }] scanHtml "" `shouldBeDone` [HtmlComment { tagStack = [], comment = " foo- " }] scanHtml "" `shouldBeDone` [HtmlComment { tagStack = [], comment = " foo-> " }] scanHtml "" `shouldBeDone` [HtmlComment { tagStack = [], comment = " foo-- " }] scanHtml "foo " `shouldBeDone` [ HtmlText { tagStack = [], rawText = "foo " } , HtmlComment { tagStack = [], comment = " bar " } ] scanHtml " bar" `shouldBeDone` [ HtmlComment { tagStack = [], comment = " foo " } , HtmlText { tagStack = [], rawText = " bar" } ] scanHtml "foo baz" `shouldBeDone` [ HtmlText { tagStack = [], rawText = "foo " } , HtmlComment { tagStack = [], comment = " bar " } , HtmlText { tagStack = [], rawText = " baz" } ] scanHtml "
foo qux
" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlText { tagStack = [P], rawText = "foo " } , HtmlComment { tagStack = [P], comment = " bar baz " } , HtmlText { tagStack = [P], rawText = " qux" } , HtmlEndTag { tagStack = [], tag = P } ] scanHtml "foo qux
" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlText { tagStack = [P], rawText = "foo " } , HtmlComment { tagStack = [P], comment = " bar baz " } , HtmlText { tagStack = [P], rawText = " qux" } , HtmlEndTag { tagStack = [], tag = P } ] it "parses CDATA sections" $ do scanHtml "" `shouldBeDone` [HtmlCdata { tagStack = [], text = "foo" }] scanHtml "foo " `shouldBeDone` [ HtmlText { tagStack = [], rawText = "foo " } , HtmlCdata { tagStack = [], text = "bar" } ] scanHtml " bar" `shouldBeDone` [ HtmlCdata { tagStack = [], text = "foo" } , HtmlText { tagStack = [], rawText = " bar" } ] scanHtml "foo baz" `shouldBeDone` [ HtmlText { tagStack = [], rawText = "foo " } , HtmlCdata { tagStack = [], text = "bar" } , HtmlText { tagStack = [], rawText = " baz" } ] scanHtml "" `shouldBeDone` [HtmlCdata { tagStack = [], text = "foo] " }] scanHtml "foo qux
" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlText { tagStack = [P], rawText = "foo " } , HtmlCdata { tagStack = [P], text = "bar baz" } , HtmlText { tagStack = [P], rawText = " qux" } , HtmlEndTag { tagStack = [], tag = P } ] scanHtml "foo bar baz]]> qux
" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlText { tagStack = [P], rawText = "foo " } , HtmlCdata { tagStack = [P], text = "bar baz" } , HtmlText { tagStack = [P], rawText = " qux" } , HtmlEndTag { tagStack = [], tag = P } ] it "treats malformed CDATA sections as text nodes" $ (normalizeText <$> scanHtml "" `shouldBeDone` [HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" }] scanHtml "" `shouldBeDone`
[ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" }
, HtmlStartTag { tagStack = [P], tag = Em, rawAttributes = "" }
]
it "parses HTML start tags having attributes" $ do
scanHtml " " `shouldBeDone`
[ HtmlStartTag
{ tagStack = []
, tag = P
, rawAttributes = " class=foo"
}
]
scanHtml "" `shouldBeDone`
[ HtmlStartTag
{ tagStack = []
, tag = A
, rawAttributes = " href=\"https://example.com/\""
}
]
it "parses html end tags" $ do
scanHtml "" `shouldBeDone`
[ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" }
, HtmlEndTag { tagStack = [], tag = P }
]
scanHtml " test test
Hello World
" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlStartTag { tagStack = [P] , tag = B , rawAttributes = " class=\"baz\"" } , HtmlText { tagStack = [P, B], rawText = "Hel" } , HtmlStartTag { tagStack = [P, B] , tag = I , rawAttributes = "" } , HtmlText { tagStack = [P, B, I], rawText = "lo Wo" } , HtmlEndTag { tagStack = [P, I], tag = B } , HtmlText { tagStack = [P, I], rawText = "rld" } , HtmlEndTag { tagStack = [P], tag = I } , HtmlEndTag { tagStack = [], tag = P } ] it "can parse XHTML-style self-closing tags" $ scanHtml "" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlStartTag { tagStack = [P], tag = Em, rawAttributes = "" } , HtmlEndTag { tagStack = [P], tag = Em } ] it "emits both start and end tags for void tags (e.g.,
foo
bar
Hello
Hello World
" `shouldBeDone` [ HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlStartTag { tagStack = [P] , tag = B , rawAttributes = " class=\"baz\"" } , HtmlText { tagStack = [P, B], rawText = "Hel" } , HtmlStartTag { tagStack = [P, B] , tag = I , rawAttributes = "" } , HtmlText { tagStack = [P, B, I], rawText = "lo Wo" } , HtmlEndTag { tagStack = [P, B], tag = I } , HtmlText { tagStack = [P, B], rawText = "rld" } , HtmlEndTag { tagStack = [P], tag = B } , HtmlEndTag { tagStack = [], tag = P } ] it "can parses an HTML fragment having multiple root elements" $ scanHtml "Hello world!
\nSecond paragraph.
" `shouldBeDone` [ HtmlStartTag { tagStack = [] , tag = P , rawAttributes = "\nid=\"a\"" } , HtmlText { tagStack = [P], rawText = "Hello " } , HtmlStartTag { tagStack = [P], tag = B, rawAttributes = "" } , HtmlText { tagStack = [P, B], rawText = "world" } , HtmlEndTag { tagStack = [P], tag = B } , HtmlText { tagStack = [P], rawText = "!" } , HtmlEndTag { tagStack = [], tag = P } , HtmlText { tagStack = [], rawText = "\n" } , HtmlStartTag { tagStack = [], tag = P, rawAttributes = "" } , HtmlText { tagStack = [P], rawText = "Second paragraph." } , HtmlEndTag { tagStack = [], tag = P } ] it "treats an invalid tag as a text node" $ (normalizeText <$> scanHtml "