{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} -- | Simple test suite. module Main where import Data.Either (isRight) import Data.ByteString (ByteString) import qualified Data.ByteString as BS import Test.Hspec import Xeno.SAX (validate, skipDoctype) import Xeno.DOM (Node, Content(..), parse, name, contents, attributes, children) import qualified Xeno.DOM.Robust as RDOM import Xeno.Types import qualified Debug.Trace as Debug(trace) main :: IO () main = hspec spec spec :: SpecWith () spec = do describe "Xeno.DOM tests" $ do it "test 1" $ do xml <- BS.readFile "data/books-4kb.xml" let (Right dom) = parse xml (name dom) `shouldBe` "catalog" (length $ contents dom) `shouldBe` 25 (length $ children dom) `shouldBe` 12 (length $ allChildrens dom) `shouldBe` 84 (length $ concatMap attributes $ allChildrens dom) `shouldBe` 12 (concatMap attributes $ allChildrens dom) `shouldBe` [("id","bk101"),("id","bk102"),("id","bk103"),("id","bk104") ,("id","bk105"),("id","bk106"),("id","bk107"),("id","bk108") ,("id","bk109"),("id","bk110"),("id","bk111"),("id","bk112")] (map name $ allChildrens dom) `shouldBe` (replicate 12 "book" ++ (concat $ replicate 12 ["author","title","genre","price","publish_date","description"])) describe "Xeno.DOM tests" $ do it "DOM from bytestring substring" $ do let substr = BS.drop 5 "5<8& xml" parsedRoot = fromRightE $ parse substr name parsedRoot `shouldBe` "valid" it "Leading whitespace characters are accepted by parse" $ isRight (parse "\n") `shouldBe` True let doc = parse "\n" it "children test" $ map name (children $ fromRightE doc) `shouldBe` ["test", "test", "b", "test", "test"] it "attributes" $ attributes (head (children $ fromRightE doc)) `shouldBe` [("id", "1"), ("extra", "2")] it "xml prologue test" $ do let docWithPrologue = "\nHello, world!" parsedRoot = fromRightE $ Xeno.DOM.parse docWithPrologue name parsedRoot `shouldBe` "greeting" describe "hexml tests" (do mapM_ (\(v, i) -> it (show i) (shouldBe (validate i) v)) (hexml_examples_sax ++ extra_examples_sax) mapM_ (\(v, i) -> it (show i) (shouldBe (either (Left . show) (Right . id) (contents <$> parse i)) v)) cdata_tests -- If this works without crashing we're happy. let nsdoc = ("Content." :: ByteString) it "namespaces" $ validate nsdoc `shouldBe` True ) describe "robust XML tests" $ do it "DOM from bytestring substring" $ do let substr = BS.drop 5 "5<8& xml" parsedRoot = fromRightE $ RDOM.parse substr name parsedRoot `shouldBe` "valid" it "Leading whitespace characters are accepted by parse" $ isRight (RDOM.parse "\n") `shouldBe` True let doc = RDOM.parse "\n" it "children test" $ map name (children $ fromRightE doc) `shouldBe` ["test", "test", "b", "test", "test"] it "attributes" $ attributes (head (children $ fromRightE doc)) `shouldBe` [("id", "1"), ("extra", "2")] it "xml prologue test" $ do let docWithPrologue = "\nHello, world!" parsedRoot = fromRightE $ RDOM.parse docWithPrologue name parsedRoot `shouldBe` "greeting" it "html doctype test" $ do let docWithPrologue = "\nHello, world!" parsedRoot = fromRightE $ RDOM.parse docWithPrologue name parsedRoot `shouldBe` "greeting" describe "hexml tests" (do mapM_ (\(v, i) -> it (show i) (shouldBe (validate i) v)) (hexml_examples_sax ++ extra_examples_sax) mapM_ (\(v, i) -> it (show i) (shouldBe (either (Left . show) (Right . id) (contents <$> parse i)) v)) cdata_tests -- If this works without crashing we're happy. let nsdoc = ("Content." :: ByteString) it "namespaces" $ validate nsdoc `shouldBe` True ) it "recovers unclosed tag" $ do let parsed = RDOM.parse "" Debug.trace (show parsed) $ do name (fromRightE parsed) `shouldBe` "a" RDOM.attributes (fromRightE parsed) `shouldBe` [("attr", "a")] map name (RDOM.children $ fromRightE parsed) `shouldBe` ["img"] it "ignores too many closing tags" $ do let parsed = RDOM.parse "" isRight parsed `shouldBe` True describe "skipDoctype" $ do it "strips initial doctype declaration" $ do skipDoctype "Hello" `shouldBe` "Hello" it "strips doctype after spaces" $ do skipDoctype " \nHello" `shouldBe` "Hello" it "does not strip anything after or inside element" $ do let insideElt = "Hello" skipDoctype insideElt `shouldBe` insideElt hexml_examples_sax :: [(Bool, ByteString)] hexml_examples_sax = [(True, "herethere") ,(True, "") ,(True, "") ,(True, "here more text at the end") ,(True, "") -- SAX doesn't care about tag balancing ,(False, "\nHello, world!") ] extra_examples_sax :: [(Bool, ByteString)] extra_examples_sax = [(True, "") ,(True, "") ,(True, "") ] -- | We want to make sure that the parser doesn't jump out of the CDATA -- area prematurely because it encounters a single ]. cdata_tests :: [(Either a [Content], ByteString)] cdata_tests = [ ( Right [CData "Oneliner CDATA."] , "") , ( Right [CData "This is strong but not XML tags."] , "This is strong but not XML tags.]]>") , ( Right [CData "A lonely ], sad isn't it?"] , "") ] -- | Horrible hack. Don't try this at home. fromRightE :: Either XenoException a -> a fromRightE = either (error . show) id mapLeft :: Applicative f => (a -> f b) -> Either a b -> f b mapLeft f = either f pure mapRight :: Applicative f => (b -> f a) -> Either a b -> f a mapRight = either pure allChildrens :: Node -> [Node] allChildrens n = allChildrens' [n] where allChildrens' :: [Node] -> [Node] allChildrens' [] = [] allChildrens' ns = let nextNodes = concatMap children ns in nextNodes ++ (allChildrens' nextNodes)