{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE FlexibleContexts, FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ImplicitParams #-}
{-# LANGUAGE OverloadedStrings, DisambiguateRecordFields #-}
import qualified Text.XML.Hexml as Hexml
import Control.Monad
import Data.Char
import Data.Foldable
import Data.List (sort)
import Data.Monoid
import qualified Data.ByteString.Char8 as BS
import qualified Data.Vector.Storable as V
import System.Process (callCommand)
import System.FilePath
import Text.Printf
import Config
import Text.Xml.Tiny
import Text.Xml.Tiny.Internal(Node(..), Attribute(..), ParseDetails(ParseDetails), AttributeParseDetails(..), Slice)
import qualified Text.Xml.Tiny.Internal as Slice
examples :: [(Bool, BS.ByteString)]
examples =
[(True,"world")
,(True,"")
,(True, "herethere")
,(True, "")
,(True, "")
,(True, "here more text at the end")
,(False, "")
,(False, "\nHello, world!")
]
xmlFiles = [ "mail", "benchmark" ]
main = do
forM_ examples $ \(parses,src) -> do
case parse src of
Left err ->
when parses $ fail ("Unexpected failure on " ++ BS.unpack src ++ ": " ++ show err)
Right doc -> do
unless parses $ fail ( "Unexpected success on " ++ BS.unpack src)
print src
print doc
let Right doc = parse "\n"
map name (children doc) === ["test","test","b","test","test"]
location (children doc !! 2) === (2,16)
length (childrenBy doc "test") === 4
length (childrenBy doc "b") === 1
length (childrenBy doc "extra") === 0
attributes (head $ children doc) === [Attribute "id" "1", Attribute "extra" "2"]
map (`attributeBy` "id") (childrenBy doc "test") === map (fmap (Attribute "id")) [Just "1", Just "2", Just "4", Nothing]
Right _ <- return $ parse $ " BS.unwords [BS.pack $ "x" ++ show i ++ "='value'" | i <- [1..10000]] <> " />"
Right _ <- return $ parse $ BS.unlines $ replicate 10000 ""
let attrs = ["usd:jpy","test","extra","more","stuff","jpy:usd","xxx","xxxx"]
Right doc <- return $ parse $ " BS.unwords [x <> "='" <> x <> "'" | x <- attrs] <> ">middle"
[c] <- return $ childrenBy doc "test"
forM_ attrs $ \a -> attributeBy c a === Just (Attribute a a)
forM_ ["missing","gone","nothing"] $ \a -> attributeBy c a === Nothing
forM_ xmlFiles $ \name -> do
putStrLn ""
let path = "xml" > name <.> "xml"
let pathGz = path <.> ".bz2"
callCommand $ "bunzip2 -f -k " ++ pathGz
xml <- BS.readFile path
let us = either (error $ "failed to parse: " ++ path) id $ parse xml
checkStructure us
let hexml = either (error $ "Hexml failed to parse: " ++ path ) id $ Hexml.parse xml
testEq us hexml
putStrLn "\nSuccess"
checkFind :: Node -> IO ()
checkFind n = do
forM_ (attributes n) $ \a -> attributeBy n (attributeName a) === Just a
attributeBy n "xxx" === (Nothing :: Maybe Attribute)
let cs = children n
forM_ ("xxx":map name cs) $ \c ->
map outer (filter ((==) c . name) cs) === map outer (childrenBy n c)
mapM_ checkFind $ children n
pairs f (a:b:rest) = f a b && pairs f (b:rest)
pairs f _ = True
checkStructure :: Config => Node -> IO ()
checkStructure n = checkNode [] n where
checkNode path n@Node{attributesV, slices=ParseDetails{attributes}} = do
let nn = children n
unless (sorted nn) $ fail "not sorted"
unless (pairs (nonOverlapping path) nn) $ fail "overlapping children nodes"
unless (pairs nonOverlappingA (Slice.vector attributes attributesV)) $ fail "overlapping attributes"
putChar '.'
forM_ nn $ \n' -> checkNode (name n : path) n'
nonOverlapping :: Config => [BS.ByteString] -> Node -> Node -> Bool
nonOverlapping path n1@Node{slices=ParseDetails{outer=o1}} n2@Node{slices=ParseDetails{outer=o2}} =
nonOverlappingS o1 o2
|| error (printf "%s Overlapping nodes: %s(%s) %s(%s)" (show path) (show$ outer n1) (show $ location n1) (show$ outer n2) (show$ location n2))
nonOverlappingA :: Config => AttributeParseDetails -> AttributeParseDetails -> Bool
nonOverlappingA a1@(AttributeParseDetails n v) a2@(AttributeParseDetails n' v') =
let slices = [n,v,n',v']
in and [ s >= s' || nonOverlappingS s s'
| s <- slices, s' <- slices]
|| error (printf "overlapping attributes" (show a1) (show a2))
nonOverlappingS :: Config => Slice -> Slice -> Bool
nonOverlappingS s1 s2 = Slice.end s1 <= Slice.start s2
|| Slice.end s2 <= Slice.start s1
-- || error (printf "Overlapping slices: %s, %s" (show s1) (show s2))
sorted nn =
let outers = map (Slice.start.Slice.outer.slices) nn
in sort outers == outers
|| error ("Internal error - nodes not sorted: " ++
show [ (name n, Slice.start(Slice.outer(slices n))) | n <- nn])
class (Show a, Show b) => TestEq a b where testEq :: a -> b -> IO ()
(===) :: Config => TestEq a a => a -> a -> IO ()
(===) = testEq
instance (Show a, Eq a) => TestEq a a where
a `testEq` b = if a == b then putChar '.' else error $ "mismatch, " ++ show a ++ " /= " ++ show b
instance TestEq Node Hexml.Node where
testEq n n' = do
name n `testEq` Hexml.name n'
test "attributes" (attributes n) (Hexml.attributes n')
test "contents" (contents n) (Hexml.contents n')
where
test (msg :: String) aa bb
| length aa == length bb = zipWithM_ testEq aa bb
| otherwise = error$ printf "Length of %s does not match (%d /= %d):\n%s\n---------------\n%s" msg (length aa) (length bb) (show aa) (show bb)
instance TestEq Attribute Hexml.Attribute where
Attribute n v `testEq` Hexml.Attribute n' v' = do
n `testEq` n'
v `testEq` v'
instance (Show a, Show b, TestEq a a', TestEq b b') => TestEq (Either a b) (Either a' b') where
Left e `testEq` Left e' = e `testEq` e'
Right x `testEq` Right x' = x `testEq` x'
testEq a b = error $ printf "mismatch in children: %s /= %s" (show a) (show b)
debugShow :: Node -> String
debugShow n =
unlines $
"Nodes buffer: "
: [ " " ++ show n | n <- V.toList $ nodesV n]
++ showNodeContents (Right n)
where
showNodeContents :: Either BS.ByteString Node -> [String]
showNodeContents (Right n) =
[ "Node contents:"
, " name: " ++ show (name n)
, " slices: " ++ show (slices n)
, " attributes: " ++ (show $ attributes n)
, " contents: "
] ++
[ " " ++ l | n' <- contents n, l <- showNodeContents n']
showNodeContents (Left txt) =
[ "Text content: " ++ BS.unpack txt ]