{-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE OverloadedStrings #-} module Text.XmlHtml.Tests (tests) where import Blaze.ByteString.Builder import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as B import Data.Monoid import Data.String import Data.Text () -- for string instance import qualified Data.Text.Encoding as T import Test.Framework import Test.Framework.Providers.HUnit import Test.HUnit hiding (Test, Node) import Text.Blaze import qualified Text.Blaze.Html5 as H import qualified Text.Blaze.Html5.Attributes as A import Text.Blaze.Renderer.XmlHtml import Text.XmlHtml import Text.XmlHtml.CursorTests import Text.XmlHtml.DocumentTests import Text.XmlHtml.TestCommon import Text.XmlHtml.OASISTest ------------------------------------------------------------------------------ -- The master list of tests to run. ------------------------------------------ ------------------------------------------------------------------------------ tests :: [Test] tests = xmlParsingTests ++ htmlXMLParsingTests ++ htmlParsingQuirkTests ++ xmlRenderingTests ++ htmlXMLRenderingTests ++ htmlRenderingQuirkTests ++ documentTests ++ cursorTests ++ blazeRenderTests ++ testsOASIS ------------------------------------------------------------------------------ -- XML Parsing Tests --------------------------------------------------------- ------------------------------------------------------------------------------ xmlParsingTests :: [Test] xmlParsingTests = [ testCase "byteOrderMark " byteOrderMark, testIt "emptyDocument " emptyDocument, testIt "publicDocType " publicDocType, testIt "systemDocType " systemDocType, testIt "emptyDocType " emptyDocType, testCase "dtdInternalScan " dtdInternalScan, testIt "textOnly " textOnly, testIt "textWithRefs " textWithRefs, testIt "untermRef " untermRef, testIt "textWithCDATA " textWithCDATA, testIt "cdataOnly " cdataOnly, testIt "commentOnly " commentOnly, testIt "emptyElement " emptyElement, testIt "emptyElement2 " emptyElement2, testIt "elemWithText " elemWithText, testIt "xmlDecl " xmlDecl, testIt "procInst " procInst, testIt "badDoctype1 " badDoctype1, testIt "badDoctype2 " badDoctype2, testIt "badDoctype3 " badDoctype3, testIt "badDoctype4 " badDoctype4, testIt "badDoctype5 " badDoctype5, testCase "tagNames " tagNames ] byteOrderMark :: Assertion byteOrderMark = do assertEqual "BOM UTF16BE" (Right $ XmlDocument UTF16BE Nothing []) (parseXML "" $ T.encodeUtf16BE "\xFEFF") assertEqual "BOM UTF16LE" (Right $ XmlDocument UTF16LE Nothing []) (parseXML "" $ T.encodeUtf16LE "\xFEFF") assertEqual "BOM UTF8" (Right $ XmlDocument UTF8 Nothing []) (parseXML "" $ T.encodeUtf8 "\xFEFF") assertEqual "BOM None" (Right $ XmlDocument UTF8 Nothing []) (parseXML "" $ T.encodeUtf8 "") emptyDocument :: Bool emptyDocument = parseXML "" "" == Right (XmlDocument UTF8 Nothing []) publicDocType :: Bool publicDocType = parseXML "" "" == Right (XmlDocument UTF8 (Just (DocType "tag" (Public "foo" "bar") NoInternalSubset)) []) systemDocType :: Bool systemDocType = parseXML "" "" == Right (XmlDocument UTF8 (Just (DocType "tag" (System "foo") NoInternalSubset)) []) emptyDocType :: Bool emptyDocType = parseXML "" "" == Right (XmlDocument UTF8 (Just (DocType "tag" NoExternalID NoInternalSubset)) []) dtdInternalScan :: Assertion dtdInternalScan = do assertEqual "empty" (parseXML "" "") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[]" ))) [])) assertBool "bad brackets" (isLeft $ parseXML "" "") assertEqual "quoted" (parseXML "" "") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[\"]\"]" ))) [])) assertBool "bad quote" (isLeft $ parseXML "" "") assertEqual "nested brackets" (parseXML "" "") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[[[]]]" ))) [])) assertEqual "part comment 1" (parseXML "" "") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[<]" ))) [])) assertEqual "part comment 2" (parseXML "" "") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[[<]]" ))) [])) assertEqual "part comment 3" (parseXML "" "") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[<[]]" ))) [])) assertEqual "part comment 4" (parseXML "" "") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[[") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[]>") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[]" ))) [])) assertEqual "docint 1" (parseXML "" "]>") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[<''<\"\"]" ))) [])) assertEqual "docint2" (parseXML "" "") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[") (Right (XmlDocument UTF8 (Just (DocType "a" NoExternalID (InternalText "[]>") textOnly :: Bool textOnly = parseXML "" "sldhfsklj''a's s" == Right (XmlDocument UTF8 Nothing [TextNode "sldhfsklj''a's s"]) textWithRefs :: Bool textWithRefs = parseXML "" "This is Bob's sled" == Right (XmlDocument UTF8 Nothing [TextNode "This is Bob's sled"]) untermRef :: Bool untermRef = isLeft (parseXML "" "j") textWithCDATA :: Bool textWithCDATA = parseXML "" "Testing c]data]]>" == Right (XmlDocument UTF8 Nothing [TextNode "Testing with c]data"]) cdataOnly :: Bool cdataOnly = parseXML "" "" == Right (XmlDocument UTF8 Nothing [TextNode " Testing a \"comment -->" == Right (XmlDocument UTF8 Nothing [Comment " this a \"comment "]) emptyElement :: Bool emptyElement = parseXML "" "" == Right (XmlDocument UTF8 Nothing [Element "myElement" [] []]) emptyElement2 :: Bool emptyElement2 = parseXML "" "" == Right (XmlDocument UTF8 Nothing [Element "myElement" [] []]) elemWithText :: Bool elemWithText = parseXML "" "text" == Right (XmlDocument UTF8 Nothing [Element "myElement" [] [TextNode "text"]]) xmlDecl :: Bool xmlDecl = parseXML "" "" == Right (XmlDocument UTF8 Nothing []) procInst :: Bool procInst = parseXML "" " parsed!?>" == Right (XmlDocument UTF8 Nothing []) badDoctype1 :: Bool badDoctype1 = isLeft $ parseXML "" "" badDoctype2 :: Bool badDoctype2 = isLeft $ parseXML "" "" badDoctype3 :: Bool badDoctype3 = isLeft $ parseXML "" "" badDoctype4 :: Bool badDoctype4 = isLeft $ parseXML "" "" badDoctype5 :: Bool badDoctype5 = isLeft $ parseXML "" ("") tagNames :: Assertion tagNames = do assertBool "tag name 0" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "") assertBool "tag name 1" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\xc0\&foo />") assertBool "tag name 2" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\xd8\&foo />") assertBool "tag name 3" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\xf8\&foo />") assertBool "tag name 4" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\x370\&foo />") assertBool "tag name 5" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\x37f\&foo />") assertBool "tag name 6" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\x200c\&foo />") assertBool "tag name 7" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\x2070\&foo />") assertBool "tag name 8" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\x2c00\&foo />") assertBool "tag name 9" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\x3001\&foo />") assertBool "tag name 10" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\xf900\&foo />") assertBool "tag name 11" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\xfdf0\&foo />") assertBool "tag name 12" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "<\x10000\&foo />") assertBool "tag name 13" $ id $ isLeft $ parseXML "" (T.encodeUtf8 "<\xd7\&foo />") assertBool "tag name 14" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "") assertBool "tag name 15" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "") assertBool "tag name 16" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "") assertBool "tag name 17" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "") assertBool "tag name 18" $ not $ isLeft $ parseXML "" (T.encodeUtf8 "") assertBool "tag name 19" $ id $ isLeft $ parseXML "" (T.encodeUtf8 "") ------------------------------------------------------------------------------ -- HTML Repetitions of XML Parsing Tests ------------------------------------- ------------------------------------------------------------------------------ htmlXMLParsingTests :: [Test] htmlXMLParsingTests = [ testIt "emptyDocumentHTML " emptyDocumentHTML, testIt "publicDocTypeHTML " publicDocTypeHTML, testIt "systemDocTypeHTML " systemDocTypeHTML, testIt "emptyDocTypeHTML " emptyDocTypeHTML, testIt "textOnlyHTML " textOnlyHTML, testIt "textWithRefsHTML " textWithRefsHTML, testIt "textWithCDataHTML " textWithCDataHTML, testIt "cdataOnlyHTML " cdataOnlyHTML, testIt "commentOnlyHTML " commentOnlyHTML, testIt "emptyElementHTML " emptyElementHTML, testIt "emptyElement2HTML " emptyElement2HTML, testIt "elemWithTextHTML " elemWithTextHTML, testIt "xmlDeclHTML " xmlDeclHTML, testIt "procInstHTML " procInstHTML, testIt "badDoctype1HTML " badDoctype1HTML, testIt "badDoctype2HTML " badDoctype2HTML, testIt "badDoctype3HTML " badDoctype3HTML, testIt "badDoctype4HTML " badDoctype4HTML, testIt "badDoctype5HTML " badDoctype5HTML ] emptyDocumentHTML :: Bool emptyDocumentHTML = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing []) publicDocTypeHTML :: Bool publicDocTypeHTML = parseHTML "" "" == Right (HtmlDocument UTF8 (Just (DocType "tag" (Public "foo" "bar") NoInternalSubset)) []) systemDocTypeHTML :: Bool systemDocTypeHTML = parseHTML "" "" == Right (HtmlDocument UTF8 (Just (DocType "tag" (System "foo") NoInternalSubset)) []) emptyDocTypeHTML :: Bool emptyDocTypeHTML = parseHTML "" "" == Right (HtmlDocument UTF8 (Just (DocType "tag" NoExternalID NoInternalSubset)) []) textOnlyHTML :: Bool textOnlyHTML = parseHTML "" "sldhfsklj''a's s" == Right (HtmlDocument UTF8 Nothing [TextNode "sldhfsklj''a's s"]) textWithRefsHTML :: Bool textWithRefsHTML = parseHTML "" "This is Bob's sled" == Right (HtmlDocument UTF8 Nothing [TextNode "This is Bob's sled"]) textWithCDataHTML :: Bool textWithCDataHTML = parseHTML "" "Testing c]data]]>" == Right (HtmlDocument UTF8 Nothing [TextNode "Testing with c]data"]) cdataOnlyHTML :: Bool cdataOnlyHTML = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [TextNode " Testing a \"comment -->" == Right (HtmlDocument UTF8 Nothing [Comment " this a \"comment "]) emptyElementHTML :: Bool emptyElementHTML = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "myElement" [] []]) emptyElement2HTML :: Bool emptyElement2HTML = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "myElement" [] []]) elemWithTextHTML :: Bool elemWithTextHTML = parseHTML "" "text" == Right (HtmlDocument UTF8 Nothing [Element "myElement" [] [TextNode "text"]]) xmlDeclHTML :: Bool xmlDeclHTML = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing []) procInstHTML :: Bool procInstHTML = parseHTML "" " parsed!?>" == Right (HtmlDocument UTF8 Nothing []) badDoctype1HTML :: Bool badDoctype1HTML = isLeft $ parseHTML "" "" badDoctype2HTML :: Bool badDoctype2HTML = isLeft $ parseHTML "" "" badDoctype3HTML :: Bool badDoctype3HTML = isLeft $ parseHTML "" "" badDoctype4HTML :: Bool badDoctype4HTML = isLeft $ parseHTML "" "" badDoctype5HTML :: Bool badDoctype5HTML = isLeft $ parseHTML "" ("") ------------------------------------------------------------------------------ -- HTML Quirks Parsing Tests ------------------------------------------------- ------------------------------------------------------------------------------ htmlParsingQuirkTests :: [Test] htmlParsingQuirkTests = [ testIt "voidElem " voidElem, testIt "caseInsDoctype1 " caseInsDoctype1, testIt "caseInsDoctype2 " caseInsDoctype2, testIt "voidEmptyElem " voidEmptyElem, testIt "rawTextElem " rawTextElem, testIt "endTagCase " endTagCase, testIt "hexEntityCap " hexEntityCap, testIt "laxAttrName " laxAttrName, testCase "badAttrName " badAttrName, testIt "emptyAttr " emptyAttr, testIt "unquotedAttr " unquotedAttr, testIt "laxAttrVal " laxAttrVal, testIt "ampersandInText " ampersandInText, testIt "omitOptionalEnds " omitOptionalEnds, testIt "omitEndHEAD " omitEndHEAD, testIt "omitEndLI " omitEndLI, testIt "omitEndDT " omitEndDT, testIt "omitEndDD " omitEndDD, testIt "omitEndP " omitEndP, testIt "omitEndRT " omitEndRT, testIt "omitEndRP " omitEndRP, testIt "omitEndOPTGRP " omitEndOPTGRP, testIt "omitEndOPTION " omitEndOPTION, testIt "omitEndCOLGRP " omitEndCOLGRP, testIt "omitEndTHEAD " omitEndTHEAD, testIt "omitEndTBODY " omitEndTBODY, testIt "omitEndTFOOT " omitEndTFOOT, testIt "omitEndTR " omitEndTR, testIt "omitEndTD " omitEndTD, testIt "omitEndTH " omitEndTH, testIt "testNewRefs " testNewRefs ] caseInsDoctype1 :: Bool caseInsDoctype1 = parseHTML "" "" == Right (HtmlDocument UTF8 (Just (DocType "html" (System "foo") NoInternalSubset)) []) caseInsDoctype2 :: Bool caseInsDoctype2 = parseHTML "" "" == Right (HtmlDocument UTF8 (Just (DocType "html" (Public "foo" "bar") NoInternalSubset)) []) voidElem :: Bool voidElem = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "img" [] []]) voidEmptyElem :: Bool voidEmptyElem = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "img" [] []]) rawTextElem :: Bool rawTextElem = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "script" [] [ TextNode "Thistest&"] ]) endTagCase :: Bool endTagCase = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "testing" [] []]) hexEntityCap :: Bool hexEntityCap = parseHTML "" "j" == Right (HtmlDocument UTF8 Nothing [TextNode "\x6a"]) laxAttrName :: Bool laxAttrName = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "test" [("val") assertBool "attr name 1" $ isLeft $ parseHTML "" (T.encodeUtf8 "") assertBool "attr name 2" $ isLeft $ parseHTML "" (T.encodeUtf8 "") assertBool "attr name 3" $ isLeft $ parseHTML "" (T.encodeUtf8 "") assertBool "attr name 4" $ isLeft $ parseHTML "" (T.encodeUtf8 "") emptyAttr :: Bool emptyAttr = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "test" [("attr", "")] []]) unquotedAttr :: Bool unquotedAttr = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "test" [("attr", "you&me")] []]) laxAttrVal :: Bool laxAttrVal = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "test" [("attr", "a & d < b & c")] []]) ampersandInText :: Bool ampersandInText = parseHTML "" "j" == Right (HtmlDocument UTF8 Nothing [TextNode "j"]) omitOptionalEnds :: Bool omitOptionalEnds = parseHTML "" "

" == Right (HtmlDocument UTF8 Nothing [Element "html" [] [ Element "body" [] [ Element "p" [] []]]]) omitEndHEAD :: Bool omitEndHEAD = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "head" [] [], Element "body" [] []]) omitEndLI :: Bool omitEndLI = parseHTML "" "

  • " == Right (HtmlDocument UTF8 Nothing [Element "li" [] [], Element "li" [] []]) omitEndDT :: Bool omitEndDT = parseHTML "" "
    " == Right (HtmlDocument UTF8 Nothing [Element "dt" [] [], Element "dd" [] []]) omitEndDD :: Bool omitEndDD = parseHTML "" "
    " == Right (HtmlDocument UTF8 Nothing [Element "dd" [] [], Element "dt" [] []]) omitEndP :: Bool omitEndP = parseHTML "" "

    " == Right (HtmlDocument UTF8 Nothing [Element "p" [] [], Element "h1" [] []]) omitEndRT :: Bool omitEndRT = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "rt" [] [], Element "rp" [] []]) omitEndRP :: Bool omitEndRP = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "rp" [] [], Element "rt" [] []]) omitEndOPTGRP :: Bool omitEndOPTGRP = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "optgroup" [] [], Element "optgroup" [] []]) omitEndOPTION :: Bool omitEndOPTION = parseHTML "" "