module Text.XML.HXQ.XTree where
import System.IO
import Char(isSpace)
import XMLParse(XMLEvent(..))
import HXML(Name,AttList)
import Text.XML.HXQ.Parser(Ast(..))
import System.CPUTime
type Prefix = String
type URI = String
type LocalName = String
data QName = QName { prefix :: Prefix, uri :: URI, localName :: LocalName }
instance Eq QName where
(QName _ u1 ln1) == (QName _ u2 ln2)
= u1 == u2 && ln1 == ln2
instance Ord QName where
(QName _ u1 ln1) <= (QName _ u2 ln2)
= u1 < u2 || (u1 == u2 && ln1 <= ln2)
instance Show QName where
show (QName "" _ ln) = ln
show (QName ns _ ln) = ns++":"++ln
type Attributes = [(QName,String)]
matchQName x@(QName _ u1 ln1) y@(QName _ u2 ln2)
= (u1 == u2 || u1 == "" || u2 == "")
&& (ln1 == ln2 || ln2 == "*" || ln1 == "*")
documentRootTag = QName "" "" "_document"
data XTree = XElem !QName !Attributes !Int XTree [XTree]
| XAttr !QName !String
| XText !String
| XInt !Int
| XFloat !Double
| XBool !Bool
| XPI String String
| XGERef String
| XComment String
| XError String
| XNull
| XType Type
| XNoPad
deriving Eq
type XSeq = [XTree]
emptyElem :: XTree -> Bool
emptyElem e
= case e of
XElem _ al _ _ xs
-> emptyAL al && all emptyElem xs && not (null xs)
XText text -> all isSpace text
XNull -> True
XNoPad -> True
_ -> False
where emptyAL = all (\(a,v) -> case (a,v) of
(_,"") -> True
(QName _ _ ('_':_),_) -> True
_ -> False)
showAL :: Attributes -> String
showAL = foldr (\(a,v) r -> case (a,v) of
(QName _ _ ('_':_),_) -> r
_ -> " "++show a++"=\""++v++"\""++r) []
showXT :: XTree -> Bool -> String
showXT e pad
= case e of
XElem tag _ _ _ xs | tag == documentRootTag -> showXS xs
XElem tag al _ _ [] -> "<"++show tag++showAL al++"/>"
XElem _ _ _ _ _ | emptyElem e -> ""
XElem tag al _ _ xs | all emptyElem xs -> "<"++show tag++showAL al++"/>"
XElem tag al _ _ xs -> "<"++show tag++showAL al++">"++showXS xs++"</"++show tag++">"
XAttr tag val -> p++show tag++"=\""++val++"\""
XText text -> p++text
XInt n -> p++show n
XFloat n -> p++show n
XBool v -> p++if v then "true" else "false"
XComment s -> "<!--"++s++"-->"
XPI n s -> "<?"++n++" "++s++">"
XError s -> error s
XNull -> "?"
XType tp -> show tp
_ -> ""
where p = if pad then " " else ""
showXS :: XSeq -> String
showXS [] = ""
showXS (x:xs) = showXT x False ++ sXS xs
where sXS (XNoPad:x:xs) = (showXT x False) ++ sXS xs
sXS (x:xs) = (showXT x True) ++ sXS xs
sXS _ = ""
instance Show XTree where
show t = showXT t False
putXSeq :: XSeq -> IO ()
putXSeq xs = hSetBuffering stdout NoBuffering >> putStrLn (showXS xs)
type TVar = Int
type TQualifier = Char
data Type
= TVariable TVar
| TBase QName
| TItem String
| TNamed QName
| TElement String Type
| TAttribute String Type
| TAny
| TEmpty
| TSequence Type Type
| TInterleaving Type Type
| TChoice Type Type
| TQualified Type TQualifier
deriving Eq
showType :: Type -> Int -> String
showType t prec
= case t of
TVariable s -> '#':show s
TBase s -> show s
TItem s -> s++"()"
TNamed s -> show s
TEmpty -> "()"
TAny -> "xs:any"
TElement n TAny -> "element "++n
TAttribute a TAny -> "attribute "++a
TElement n t
-> "element "++n++" { "++showType t 3++" }"
TAttribute a t
-> "attribute "++a++" { "++showType t 3++" }"
TSequence t1 t2 -> paren 3 prec (showType t1 3++", "++showType t2 3)
TInterleaving t1 t2 -> paren 3 prec (showType t1 2++" & "++showType t2 2)
TChoice t1 t2 -> paren 2 prec (showType t1 2++" | "++showType t2 2)
TQualified t c -> paren 4 prec (showType t 4++[c])
where paren p1 p2 s | p1<p2 = "("++s++")"
| otherwise = s
instance Show Type where
show t = showType t 4
type XMLSchema = [(LocalName,Type)]
type Schemas = [(URI,XMLSchema)]
type NamespacePrefixes = [(Prefix,URI)]
data NS = NS { defaultElementNS :: URI, defaultFunctionNS :: URI,
prefixes :: NamespacePrefixes, schemas :: Schemas }
deriving Show
tag :: String -> NS -> QName
tag s ns
= case span (/= ':') s of
(_,"") -> QName "" (defaultElementNS ns) s
(s1,_:s2) -> case lookup s1 (prefixes ns) of
Just u -> QName s1 u s2
Nothing -> error ("Undeclared element namespace: "++s1)
attributeTag s ns
= case span (/= ':') s of
(_,"") -> QName "" "" s
(s1,_:s2) -> case lookup s1 (prefixes ns) of
Just u -> QName s1 u s2
Nothing -> error ("Undeclared attribute namespace: "++s1)
functionTag :: String -> NS -> QName
functionTag s ns
= case span (/= ':') s of
(_,"") -> QName "" (defaultFunctionNS ns) s
(s1,_:s2) -> case lookup s1 (prefixes ns) of
Just u -> QName s1 u s2
Nothing -> error ("Undeclared function namespace: "++s1)
attributes :: AttList -> NS -> Attributes
attributes al ns
= map (\(s,v) -> (attributeTag s ns,v)) al
elementNamespaces :: AttList -> NS -> NS
elementNamespaces atts ns
= ns { defaultElementNS = foldr (\(a,v) r -> case a of "xmlns" -> v; _ -> r) (defaultElementNS ns) atts,
prefixes = foldr (\(a,v) r -> case splitAt 6 a of
("xmlns:",p)
-> case lookup p predefinedNamespaces of
Just uri
-> if uri == v
then (p,v):r
else error ("You cannot redefine the system namespace "++p)
Nothing -> (p,v):r
_ -> r) (prefixes ns) atts }
predefinedNamespaces :: NamespacePrefixes
predefinedNamespaces
= [ ("xml", "http://www.w3.org/XML/1998/namespace"),
("xmlns", "http://www.w3.org/2000/xmlns/"),
("xsl", "http://www.w3.org/1999/XSL/Transform"),
("xs", "http://www.w3.org/2001/XMLSchema"),
("xsi", "http://www.w3.org/2001/XMLSchema-instance"),
("fn", "http://www.w3.org/2005/xpath-functions"),
("xdt", "http://www.w3.org/2005/xpath-datatypes"),
("local", "http://www.w3.org/2005/xquery-local-functions") ]
xsNamespace = let Just uri = lookup "xs" predefinedNamespaces in uri
fnNamespace = let Just uri = lookup "fn" predefinedNamespaces in uri
initialNS = NS { defaultElementNS="", defaultFunctionNS=fnNamespace,
prefixes=predefinedNamespaces, schemas=[] }
type Stream = [XMLEvent]
noParentError = XError "Undefined parent reference"
statistics = id
materializeWithoutParent :: Int -> Stream -> NS -> XTree
materializeWithoutParent level stream ns
= XElem documentRootTag [] 1 noParentError
((\(x,_,_)->x) (mdl stream 2 level [] ns))
where md s@(x@(StartEvent n atts):xs) i level ls ns
| level == 0
= let f [] i ns = m s i ns
f ((StartEvent n atts):ls) i ns
= let ns' = elementNamespaces atts ns
(e,xs',i') = (f ls $! i+1) $! ns'
in (((XElem $! (tag n ns')) $! (attributes atts ns')) i noParentError [e],xs',i')
(e,xs',i') = (f $! (reverse ls)) i ns
in (e,xs',i',ls)
| otherwise
= (((md xs $! i+1) $! level1) $! x:ls) ns
md ((EndEvent n):xs) i level (x:ls) ns
= (md xs i $! level+1) ls ns
md (_:xs) i level ls ns
= md xs i level ls ns
md [] _ _ _ _ = (XText "",[],0,[])
mdl xs@(_:_) i level ls ns
= let (e,xs',i',ls') = md xs i level ls ns
(el,xs'',i'') = mdl xs' i' 0 ls' ns
in (e:el,xs'',i'')
mdl [] _ _ _ _ = ([],[],0)
m ((TextEvent t):xs) i _ = (XText t,xs,i)
m ((EmptyEvent n atts):xs) i ns
= let ns' = elementNamespaces atts ns
in (((XElem $! (tag n ns')) $! (attributes atts ns')) i noParentError [],xs,i+1)
m ((StartEvent n atts):xs) i ns
= let ns' = elementNamespaces atts ns
(el,xs',i') = (ml n xs $! i+1) $! ns'
in (((XElem $! (tag n ns')) $! (attributes atts ns')) i noParentError el,xs',i')
m ((PIEvent n s):xs) i _ = (XPI n s,xs,i)
m ((CommentEvent s):xs) i _ = (XComment s,xs,i)
m ((GERefEvent n):xs) i _ = (XGERef n,xs,i)
m ((ErrorEvent s):xs) i _ = (XError s,xs,i)
m (_:xs) i _ = (XError "Unrecognized XML event",xs,i)
m [] i _ = (XError "Unterminated element",[],i)
ml _ [] i _ = ([],[],i)
ml tag ((EndEvent n):xs) i ns
| n==tag = ([],xs,i)
| otherwise = error $ "Unmatched tags: <"++show tag++"></"++n++">"
ml tag xs i ns
= let (e,xs',i') = m xs i ns
(el,xs'',i'') = ml tag xs' i' ns
in (e:el,xs'',i'')
materializeWithParent :: Int -> Stream -> NS -> XTree
materializeWithParent level stream ns = root
where root = XElem documentRootTag [] 1 (XError "Trying to access the root parent")
((\(x,_,_)->x) (mdl stream 2 level root [] ns))
md s@(x@(StartEvent n atts):xs) i level p ls ns
| level == 0
= let f [] i p ns = m s i p ns
f ((StartEvent n atts):ls) i p ns
= let ns' = elementNamespaces atts ns
(e,xs',i') = (f ls $! i+1) x $! ns'
x = ((XElem $! (tag n ns')) $! (attributes atts ns')) i p [e]
in (x,xs',i')
(e,xs',i') = (f $! (reverse ls)) i p ns
in (e,xs',i',ls)
| otherwise
= (((md xs $! i+1) $! level1) p $! x:ls) ns
md ((EndEvent n):xs) i level p (x:ls) ns
= (md xs i $! level+1) p ls ns
md (_:xs) i level p ls ns
= md xs i level p ls ns
md [] _ _ _ _ _ = (XText "",[],0,[])
mdl xs@(_:_) i level p ls ns
= let (e,xs',i',ls') = md xs i level p ls ns
(el,xs'',i'') = mdl xs' i' 0 p ls' ns
in (e:el,xs'',i'')
mdl [] _ _ _ _ _ = ([],[],0)
m ((TextEvent t):xs) i _ _ = (XText t,xs,i)
m ((EmptyEvent n atts):xs) i p ns
= let ns' = elementNamespaces atts ns
in (((XElem $! (tag n ns')) $! (attributes atts ns')) i p [],xs,i+1)
m ((StartEvent n atts):xs) i p ns
= let ns' = elementNamespaces atts ns
(el,xs',i') = (ml n xs $! (i+1)) node $! ns'
node = ((XElem $! (tag n ns')) $! (attributes atts ns')) i p el
in (node,xs',i')
m ((PIEvent n s):xs) i _ _ = (XPI n s,xs,i)
m ((CommentEvent s):xs) i _ _ = (XComment s,xs,i)
m ((GERefEvent n):xs) i _ _ = (XGERef n,xs,i)
m ((ErrorEvent s):xs) i _ _ = (XError s,xs,i)
m (_:xs) i _ _ = (XError "unrecognized XML event",xs,i)
m [] i _ _ = (XError "unbalanced tags",[],i)
ml _ [] i _ _ = ([],[],i)
ml tag ((EndEvent n):xs) i _ _
| n==tag = ([],xs,i)
| otherwise = error $ "Unmatched tags: <"++show tag++"></"++n++">"
ml tag xs i p ns
= let (e,xs',i') = m xs i p ns
(el,xs'',i'') = ml tag xs' i' p ns
in (e:el,xs'',i'')
docLevel :: Stream -> Int -> Int -> Int -> Int
docLevel stream i level minLevel
= if i < 0
then minLevel
else case stream of
((StartEvent n _):xs)
-> ((docLevel xs $! i1) $! level+1) minLevel
((EndEvent n):xs)
-> ((docLevel xs $! i1) $! level1) $! min minLevel (level1)
(_:xs) -> (docLevel xs $! i1) level minLevel
_ -> 0
maxStreamingUnitSize :: Int
maxStreamingUnitSize = 10000
materialize :: Bool -> Stream -> XTree
materialize withParent stream
= let level = docLevel stream maxStreamingUnitSize 0 1000000
in if withParent
then materializeWithParent level stream initialNS
else materializeWithoutParent level stream initialNS