-- |
-- HUnit - Haskell XML Toolbox examples and tests for arrows
--
-- Author: Uwe Schmidt uwe@fh-wedel.de
--
-- Version : $Id: HUnitExample.hs,v 1.4 2006/09/06 06:35:28 hxml Exp $
module Main where
import System
import Test.HUnit
import Text.XML.HXT.Arrow
import Data.Char
{-
aMinimalDoc :: XmlTree
aMinimalDoc = mkRootTree [] (xtext "")
aSimpleXHTMLDoc :: XmlTree
aSimpleXHTMLDoc
= mkRootTree
( xattr a_source "simple.xml" )
( xtext
( concatMap (++ "\n")
[ ""
, ""
, "
A Simple XHTML Document"
, " "
, " "
, ""
]
)
)
mkMinimalDoc :: String -> XmlTree
mkMinimalDoc cont
= mkRootTree [] (xtext cont)
mkInputDoc :: String -> XmlTree
mkInputDoc src
= mkRootTree ( xattr a_source src) []
mkDoc :: String -> XmlTree
mkDoc cont
= mkRootTree
( xattr a_source "a test")
( xtext ("\n" ++ cont) )
-- |
-- compare trees
testEqualTrees :: XmlTrees -> XmlTrees -> Test
testEqualTrees e1 e2
= TestCase $
assertEqual "" e1 e2
-- |
-- testDocFilter creates a test for the application of a pure
-- functional filter to a XML tree. The result is converted into
-- the external string representation and compared with the expected
-- result.
--
-- see also: 'testStateFilter' and 'testEditFilter'
testDocFilter :: String -> XmlFilter -> XmlTree -> Test
testDocFilter expected xfilter doc
= TestCase $
assertEqual "" expected (xshow $ xfilter doc)
-- |
-- testStateFilter creates a test for monadic filters, doing IO and using
-- the internal state, consisting of program options and status information
-- The filter is applied to a XML tree, the result is converted to a string
-- and this result is compared with the expected result.
--
-- see also: 'testEditFilter' for pure functional filters
testStateFilter :: String -> XmlStateFilter () -> XmlTree -> Test
testStateFilter expected xfilter input
= TestCase $
do
res <- run' $ xfilter input
assertEqual "" expected (xshow res)
-- |
-- The test bed for edit filter for external documents.
-- The input tree must specify the external document to be read.
-- This document is checked for wellformedness, canonicalized
-- (char refs substituted, ...) and processed with the edit filter.
-- The result is transformed into the string representation and compared
-- with the expected string.
testEditFilter :: String -> XmlFilter -> XmlTree -> Test
testEditFilter expected xfilter input
= TestCase $
do
res <- run'
$ ( getWellformedDoc -- read the document
.>>
liftMf canonicalizeAllNodes -- normalize the document and remove decl
.>>
liftMf ( processChildren xfilter ) -- apply the editing filter only to the 'real' document
.>> -- nodes, not to nodes surrounding the document tag
putXmlTree -- just trace output
.>>
liftMf getChildren -- just for comparing the document contents
)
input
assertEqual "" expected (xshow res)
-- |
-- tree versus filter construction tests
-- documents may be constructed by tree operations
-- or with the more flexible filter operations applied to an arbitray tree
treeConstructionTests :: Test
treeConstructionTests
= TestLabel "tree and filter construction tests" $
TestList $
map (\ (t, f) -> testEqualTrees t (f undefined) )
[ (xtext "xyz", txt "xyz" )
, (xcmt "cmt", cmt "cmt" )
, (xtag "img" [] [], tag "img" [] [] )
, (xtag "img" [] [], etag "img" )
, (xtag "img" (xattr "b" "13") [], tag "img" [sattr "b" "13"] [] )
, (xtag "img" (xattr "b" "13") [], atag "img" [sattr "b" "13"] )
, (xtag "t" (xattr "a" "42" ++ xattr "b" "id")
(xtext "abc"),
tag "t" [sattr "a" "42", sattr "b" "id"]
[txt "abc"] )
]
-- |
-- construction tests are all applied to the empty document,
-- no information is used from that document,
-- but construction is lifted to the filter level.
-- this is more general than explicit construction.
-- in more complex examples some information may be extracted from
-- the source, some can be constant.
--
-- see also: 'testDocFilter'
constructionTests :: Test
constructionTests
= TestLabel "document construction tests" $
TestList $
map (\ (res, f) -> testDocFilter res f undefined) -- undefined or something else, e.g. emptyRoot
[ ("" , mkXTag "x" none none )
, ("" , tag "x" [] [] ) -- syntactically more comfortable shortcut for mkXTag
, ("" , stag "x" [] ) -- shortcut for tags without attributes
, ("" , atag "x" [] ) -- shortcut for empty tags with attributes
, ("" , etag "x" ) -- shortcut for empty tags without attributes
, (""
, mkXTag "x" (mkXAttr "a" (mkXText "1")) none )
, ("42"
, mkXTag "x" (mkXAttr "a" (mkXText "1")) (mkXText "42") )
, ("42"
, tag "x" [ attr "a" $ txt "1" ] [ txt "42" ] )
, ("42"
, tag "x" [ sattr "a" "1" ] [ txt "42" ] )
, ("" , atag "x" [ sattr "a" "1" ] )
, ("4243"
, tag "x" [ sattr "a" "1"
, sattr "b" "2"
] [ txt "42"
, etag "y"
, txt "43"
] )
, (""
, mkXCmt $ txt " a comment " )
, (""
, cmt "a comment" ) -- short cut for uncomputed comment
, (""
, cmt " an illegal XML comment containing 2 -- in comment text " )
, ("*" , mkXCharRef 42 )
, ("<" , mkXEntityRef "lt" )
, ("]]>"
, mkXCdata $ txt "")
, ("]]>" -- short cut for uncomputed cdata content
, cdata "")
, ("" , mkXPi "abc" $ txt "xyz" )
, ("" , spi "abc" "xyz" ) -- short cut for an uncomputed pi content
]
-- |
-- simple selection tests
simpleSelectionTests :: Test
simpleSelectionTests
= TestLabel "simple tree selection tests" $
TestList $
map (\ (res, f) -> testDocFilter (singleToDoubleQuote res) f simpleDoc)
[ (src , this )
, ("1" , getValue "a" )
, ("" , getValue "b" )
, (body1 , getChildren )
-- text selection
, ("xyz" , getChildren .> isXText )
, ("xyz" , getChildren .> isText "xyz" )
, ("" , getChildren .> isText "abc" )
, ("xyz" , getChildren .> isOfText (>="x") )
, ("" , getChildren .> isOfText (<"x") )
, ("" , getChildren .> isWhiteSpace )
-- tag selection
, ("abc"
, getChildren .> isXTag )
-- comment selection
, ("" , getChildren .> isXCmt )
-- text OR tag selection
, ("abcxyz"
, getChildren .> (isXTag +++ isXText) )
-- generalised OR
, ("abcxyz"
, getChildren .> cat [isXTag, isXText] )
, ("abc" , getChildren .> getChildren )
-- atribute value selection
, ("1" , getValue "a" )
, ("abc"
, getChildren .> isTag "t" )
-- attribute selection
, ("abc"
, getChildren .> hasAttr "a" )
, ("42" , getChildren .> getValue "a" )
, ("42" , getChildren .> hasValue "a" (all isDigit) )
, ("13" , getChildren .> hasValue "b" (all isDigit) )
, ("" , getChildren .> isTag "t" .> hasValue "b" (all isDigit) )
, ("42" , getChildren .> hasValue "a" (== "42") )
, ("" , getChildren .> hasValue "a" (== "43") )
, ("42" , getChildren .> hasValue "a" (\ str -> ((read str)::Int) > 40) )
, ("42" , getChildren .> hasValue "a" (not . null) )
, ("id13" , getChildren .> hasValue "b" (const True) )
, ("13" , getChildren .> isTag "img" .> hasValue "b" (const True) )
, ("" , getChildren .> hasValue "c" (const True) )
, (" a='42' b='id' b='13' x='0'"
, getChildren .> getAttrl )
, (" a='42' b='id'"
, getChildren .> isTag "t" .> getAttrl )
, ("42id" , getChildren .> isTag "t" .> getAttrl .> getChildren )
, (" b='id' b='13'"
, getChildren .> getAttrl .> isAttr "b" )
, (" b='13'" , getChildren .> isTag "img" .> getAttrl .> isAttr "b" )
, ("13" , getChildren .> isTag "img" .> getAttrl .> isAttr "b" .> getChildren )
, ("13" , getChildren .> isTag "img" .> getValue "b" )
, ("timgyz" , getChildren .> getName )
, ("t" , getChildren .> isTag "t" .> getName )
, ("abbx" , getChildren .> getAttrl .> getName )
, ("bb" , getChildren .> getAttrl .> isAttr "b" .> getName )
, ("ab" , getChildren .> isTag "t" .> getAttrl .> getName )
, ("b" , getChildren .> isTag "t" .> getAttrl .> isAttr "b" .> getName )
-- deep search for text or tags
, ("abcxyz" , deep isXText )
, ("abc" , deep (isTag "t") .> deep isXText )
]
where
src = "" ++ body1 ++ ""
body1 = ""
++ "abc"
++ ""
++ "xyz"
++ ""
++ ""
sDocFilter
= tag "r" [ sattr "a" "1" ]
[ cmt "cmt"
, tag "t" [ sattr "a" "42", sattr "b" "id" ]
[ txt "abc"
]
, atag "img" [ sattr "b" "13" ]
, txt "xyz"
, atag "y" [ sattr "x" "0" ]
, etag "z"
]
simpleDoc = head $ sDocFilter undefined
-- |
-- simple tests with namespaces
namespaceTests :: Test
namespaceTests
= TestLabel "namespace tests" $
TestList $
map (\ (res, f) -> testDocFilter (singleToDoubleQuote res) f simpleDoc)
[ (src , this )
-- select all "t" tags, even nested ones (multi) and extract the text from the body
, ("sssaaa" , multi (isTag "t" .> getChildren .> isXText) )
-- select all text within all tags of a specific namespace
, ("sssaaazzzeee" , multi (isXTag .> hasNamespace "dns" .> getChildren .> isXText) )
, ("bbb" , multi (isXTag .> hasNamespace "n1" .> getChildren .> isXText) )
, ("cccdddfffggg" , multi (isXTag .> hasNamespace "n2" .> getChildren .> isXText) )
-- select all text within all tags with a specific local name
, ("sssaaabbbcccdddzzz"
, multi (isXTag .> hasLocalPart "t" .> getChildren .> isXText) )
, ("eee" , multi (isXTag .> hasLocalPart "e" .> getChildren .> isXText) )
-- select all text within all tags with a specific prefix
, ("bbbddd" , multi (isXTag .> hasPrefix "x" .> getChildren .> isXText) )
, ("ccc" , multi (isXTag .> hasPrefix "y" .> getChildren .> isXText) )
, ("zzz" , multi (isXTag .> hasPrefix "z" .> getChildren .> isXText) )
, ("sssaaaeeefffggg"
, multi (isXTag .> hasPrefix "" .> getChildren .> isXText) )
-- select all attribute values of a specific namespace
-- !!! the default namespace is not propagated to attribute names
, ("z5" , multi (isXTag .> getAttrl .> hasNamespace "dns" .> getChildren) )
, ("2z4" , multi (isXTag .> getAttrl .> hasNamespace "n1" .> getChildren) )
, ("x3y3u4" , multi (isXTag .> getAttrl .> hasNamespace "n2" .> getChildren) )
, ("dnsn2" , multi (isXTag .> getAttrl .> hasLocalPart "xmlns" .> getChildren) )
-- select namespaces from all namespace declarations
, ("dnsn1n2dnsn2n2"
, multi (isXTag .> getAttrl .> isNamespaceDecl .> getChildren) )
-- all default namespace declarations
, ("dnsn2" , multi (isXTag .> getAttrl .> hasLocalPart "xmlns" .> getChildren) )
-- all none default namespace declarations
, ("n1n2dnsn2" , multi (isXTag .> getAttrl .> hasPrefix "xmlns" .> getChildren) )
, ("n1n2" , multi (isXTag .> getAttrl .> hasPrefix "xmlns" .> hasLocalPart "x" .> getChildren) )
, ("n2" , multi (isXTag .> getAttrl .> hasPrefix "xmlns" .> hasLocalPart "y" .> getChildren) )
, ("dns" , multi (isXTag .> getAttrl .> hasPrefix "xmlns" .> hasLocalPart "z" .> getChildren) )
]
where
-- the text representation of the tree
-- with ' instead of " for readablility of the string constants
src = ""
++ "sss"
++ "aaa"
++ "bbb"
++ "ccc"
++ "ddd"
++ "zzz"
++ "eee"
++ "fffggg" ++
""
-- the filter for generating the test document
sDocFilter
= tag "t" [ sattr "xmlns" "dns" -- 4 namespace declarations
, sattr "xmlns:x" "n1"
, sattr "xmlns:y" "n2"
, sattr "xmlns:z" "dns"
]
[ txt "sss"
, tag "t" [ sattr "a" "1"
, sattr "x:a" "2" ]
[ txt "aaa"
]
, tag "x:t" [ sattr "a" "x1"
, sattr "y:a" "x3"
]
[ txt "bbb"
]
, tag "y:t" [ sattr "a" "y1"
, sattr "y:a" "y3"
]
[ txt "ccc"
]
, tag "x:t" [ sattr "xmlns:x" "n2" -- redefinition of prefix x:
, sattr "x:a" "u4"
]
[ txt "ddd"
]
, tag "z:t" [ sattr "x:a" "z4"
, sattr "z:a" "z5"
]
[ txt "zzz"
]
, tag "e" [ sattr "b" "42" ]
[ txt "eee"
]
, tag "f" [ sattr "xmlns" "n2" -- redefinition of default namespace
]
[ txt "fff"
, tag "g" [ ]
[ txt "ggg" ]
]
]
-- create document and propagate namespaces
simpleDoc = head $ (sDocFilter .> propagateNamespaces) undefined
-- |
-- minimal tests for parsing data.
--
-- the tests are document trees with a root
-- node containing a single text node with the document source.
-- That tree is passed to the parser, which substitutes the text node with the parsed xml tree.
-- In general this may be a list of trees containing also the
-- surrounding parts like the xml declaraion, the document type definition
-- possibly comments and processing instructions.
--
-- see also: 'testStateFilter'
parseTests :: Test
parseTests
= TestLabel "minimal parser tests" $
TestList
[ testStateFilter ">/>" parseXmlDoc mini -- the tree inclusive root node
, testStateFilter "" (parseXmlDoc .>> liftMf getChildren) mini -- only the content
, testStateFilter "" (parseXmlDoc .>> liftMf getChildren) min2 -- same content
, testStateFilter "" (parseXmlDoc .>> liftMf getChildren) minErr -- syntax error: content empty
, testStateFilter errr (parseXmlDoc .>> liftMf (getValue a_status)) minErr -- syntax error reported in a_status
]
where
errr = show c_err
mini = mkMinimalDoc ""
min2 = mkMinimalDoc ""
minErr = mkMinimalDoc ""
-- |
-- simple external file containing one tag
mini1 :: XmlTree
mini1 = newDocument "mini1.xml" -- test documents
-- |
-- simple external file containing one tag
mini2 :: XmlTree
mini2 = newDocument "mini2.xml"
-- |
-- simple not existing external file
notThere :: XmlTree
notThere = newDocument "notThere.xml" -- does not exist
-- |
-- These test check the access to external files
-- and error reporting in case of input or parse errors
-- Some test issue tree like trace output of the documents read
-- to show the internal structure and nesting
inputTests :: Test
inputTests
= TestLabel "input tests" $
TestList
[ testStateFilter "mini1.xml"
( putXmlTree .>>
liftMf (getValue a_source)
) mini1
, testStateFilter ok ( liftMf (getValue a_status)
) mini1
, testStateFilter "" ( liftMf getChildren
) mini1
, testStateFilter "" ( getXmlContents .>>
putXmlTree .>>
liftMf getChildren
) mini1
, testStateFilter ok ( getXmlContents .>>
liftMf (getValue a_status)
) mini1
, testStateFilter "4" ( getXmlContents .>>
liftMf getContentLength .>>
putXmlTree .>>
liftMf (getValue a_contentLength)
) mini1
, testStateFilter ok ( getXmlContents .>>
liftMf (getValue a_status)
) mini2
, testStateFilter "" ( getXmlContents .>>
putXmlTree .>>
liftMf getChildren
) notThere
, testStateFilter fat ( getXmlContents .>>
liftMf (getValue a_status)
) notThere
]
where
ok = show c_ok
fat = show c_fatal
-- |
-- A few tests for removing parts of a document tree.
-- As input tree the document in file mini2.xml is used.
removeTests :: Test
removeTests
= TestLabel "document transformation tests" $
TestList
[ testEditFilter allText getAllText mini2
, testEditFilter allData (removeAllWhiteSpace .> getAllText) mini2
, testEditFilter allImp (removeIgnorableData .> getAllText) mini2
, testEditFilter allDat (removeMeta .> getAllText) mini2
]
where
getAllText = deep isXText
containsUnknownOrIgnore = getChildren .> ( isText "unknown"
+++
isText "ignore"
)
removeIgnorableData = processTopDown ( none
`when`
( isTag "data" .> containsUnknownOrIgnore )
)
removeMeta = processTopDown ( none
`when`
( isTag "data" .> hasValue "class" ( == "meta" ) )
)
allText = " ignore unknown important important "
allData = "ignoreunknownimportantimportant"
allImp = " important important "
allDat = " ignore important "
ys :: XmlTree
ys = head $
tag "s:Envelope"
[ sattr "xmlns:s" "http://schemas.xmlsoap.org/soap/envelope/",
sattr "xmlns:xenc" "http://www.w3.org/2001/04/xmlenc#",
sattr "xmlns:eg" "http://example.org/paymentv2",
sattr "xmlns:wsse" "http://schemas.xmlsoap.org/ws/2002/12/secext"
]
[ stag "s:Header"
[ stag "wsse:Security"
[ tag "wsse:BinarySecurityToken"
[ sattr "ValueType" "wsse:Kerberosv5ST",
sattr "EncodingType" "wsse:Base64Binary"
]
[ txt "QMwcAG ..."
]
]
],
stag "s:Body"
[ stag "eg:OrderCurrency"
[ stag "eg:Name"
[ txt "John Smith" ],
tag "eg:Amount"
[ sattr "Currency" "'USD'" ]
[ txt "1000" ],
tag "eg:CreditCard"
[ sattr "Limit" "'5000'",
sattr "Currency" "'GBP'"
]
[ stag "xenc:EncryptedData"
[ atag "xenc:EncryptionMethod"
[ sattr "Algorithm" "http://www.w3.org/2001/04/xmlenc#tripledes-cbc" ]
],
stag "xenc:CipherData"
[ stag "xenc:CipherValue"
[ txt "r5KipsDV ..." ]
]
]
]
]
]
.> propagateNamespaces
.> indentDoc
$ undefined
-}
-- |
-- ausiliary function to make haskell string constants with quotes more readable
singleToDoubleQuote :: String -> String
singleToDoubleQuote
= map (\ c -> if c == '\'' then '"' else c)
testLA :: String -> String -> LA XmlTree XmlTree -> Test
testLA doc expected f
= TestCase $ assertEqual "LA XmlTree XmlTree:" [expected] res
where
res = runLA (xread >>> xshow f) doc
testLAString :: String -> String -> LA XmlTree String -> Test
testLAString doc expected f
= TestCase $ assertEqual "LA XmlTree String:" [expected] res
where
res = runLA (xread >>> f) doc
mkTestSeqLA :: String -> [(String, LA XmlTree XmlTree)] -> [Test]
mkTestSeqLA doc
= map (\ (res, f) -> testLA doc (singleToDoubleQuote res) f)
simpleTests :: Test
simpleTests
= TestLabel "simple LA tests" $
TestList $
mkTestSeqLA doc tests
where
doc = "012"
tests = [ (doc , this )
, ("012" , getChildren )
, ("1" , getChildren >>> isElem )
, ("02" , getChildren >>> isText )
, ("" , getChildren >>> hasName "z" )
, ("" , getChildren >>> (hasName "x" <+> hasName "z") )
, ("1" , getChildren >>> (hasName "y" <+> hasName "z") )
, ("1" , getChildren >>> getChildren )
, ("1" , getChildren >>> getChildren >>> isText )
, ("" , getChildren >>> getChildren >>> isElem )
, ("b" , getAttrValue "a" >>> mkText )
, ("" , getAttrValue "z" >>> mkText )
, ("d" , getChildren >>> getAttrValue "c" >>> mkText )
, ("1" , hasName "x" >>> getChildren >>> hasName "y" >>> getChildren )
, ("012" , deep isText )
, ("12" , deep (hasText (all (/= '0'))) )
, (doc , deep isElem )
, ("1" , getChildren >>> deep isElem )
, ("1" , deep (hasName "y") )
, ("1" , deep (hasAttr "c") )
, ("1" , deep (hasAttr "c") >>> getChildren )
, ("1" , deepest isElem )
, ("0121" , multi isElem )
, ("" , multi isElem >>> replaceChildren none )
, ("xyz" , multi (isElem >>> getName) >>> mkText )
, ("b" , getAttrValue "a" >>> mkText )
, ("" , getAttrValue "aa" >>> mkText )
, ("b" , getAttrValue0 "a" >>> mkText )
, ("z" , withDefault (getAttrValue0 "aa") "z" >>> mkText )
]
nodeSetTests :: Test
nodeSetTests
= TestList $
[ TestLabel "node set and simple XPath tests with getXPathTrees" $
TestList $
mkTestSeqLA doc (testGetXPathTrees tests)
, TestLabel "node set and simple XPath tests with getXPathNodeSet" $
TestList $
mkTestSeqLA doc (testGetXPathNodes tests)
, TestLabel "node set and simple XPath tests with processFromNodeSet" $
TestList $
mkTestSeqLA doc (testProcessXPath processTests)
, TestLabel "node set and simple XPath tests with processXPathTrees" $
TestList $
mkTestSeqLA doc (testProcessXPath' processTests)
]
where
doc = ".0.1.0.2.3.0.3.1.0.4"
testGetXPathTrees = map (\ (r, xp) -> (r, getXPathTrees xp)) -- these arrows are equivalent
testGetXPathNodes = map (\ (r, xp) -> (r, getFromNodeSet $< getXPathNodeSet xp)) -- except for the ordering of the result set
-- which does not matter for these tests
testProcessXPath = map (\ (r, xp, a) -> (r, processFromNodeSet a $< getXPathNodeSet xp))
testProcessXPath' = map (\ (r, xp, a) -> (r, processXPathTrees a xp))
tests = [ (doc , "/x" )
, (".3.0.3.1.0" , "/x/y" )
, (".3.1.0" , "/x/y/x" )
, (".0.2.4" , "/x/text()" )
, (".3.0" , "/x/y/text()" )
, (".1.0.3.1.0" , "/x//x" )
]
processTests
= [ ("xxxxxx", "//text()", changeText (const "x") )
, (".0.1.0.2.3.0x.4", "/x/y/x/text()", changeText (const "x") )
, (".0.1.0.2.3.0.4", "/x/y/x", none )
, (".0.1.0.2.3.0zzz.4", "/x/y/x", txt "zzz" )
, (".0.1.0.2.3.0.3.1.0.4",
"/x/y/x", addAttr "q" "3.2" )
]
-- |
-- the complete set of test cases
allTests :: Test
allTests
= TestList
[ simpleTests
, nodeSetTests
]
main :: IO ()
main
= do
c <- runTestTT allTests
putStrLn $ show c
let errs = errors c
fails = failures c
System.exitWith (codeGet errs fails)
codeGet :: Int -> Int -> ExitCode
codeGet errs fails
| fails > 0 = ExitFailure 2
| errs > 0 = ExitFailure 1
| otherwise = ExitSuccess
-- ----------------------------------------------------------