module Text.XML.HXQ.XTree where
import System.IO
import Char(isSpace)
import XMLParse(XMLEvent(..))
import HXML(Name,AttList)
import Text.XML.HXQ.Parser(Ast(..))
import System.CPUTime
type Prefix = String
type URI = String
type LocalName = String
data QName = QName { prefix :: Prefix, uri :: URI, localName :: LocalName }
instance Eq QName where
(QName _ u1 ln1) == (QName _ u2 ln2)
= u1 == u2 && ln1 == ln2
instance Ord QName where
(QName _ u1 ln1) <= (QName _ u2 ln2)
= u1 < u2 || (u1 == u2 && ln1 <= ln2)
instance Show QName where
show (QName "" _ ln) = ln
show (QName ns _ ln) = ns++(':':ln)
type Attributes = [(QName,String)]
matchQName x@(QName _ u1 ln1) y@(QName _ u2 ln2)
= (u1 == u2 || u1 == "" || u2 == "")
&& (ln1 == ln2 || ln2 == "*" || ln1 == "*")
documentRootTag = QName "" "" "_document"
data XTree = XElem !QName !Attributes !Int XTree [XTree]
| XAttr !QName !String
| XText !String
| XInt !Int
| XFloat !Double
| XBool !Bool
| XPI String String
| XGERef String
| XComment String
| XError String
| XNull
| XType Type
| XNoPad
deriving Eq
type XSeq = [XTree]
emptyElem :: XTree -> Bool
emptyElem e
= case e of
XElem _ al _ _ xs
-> emptyAL al && all emptyElem xs && not (null xs)
XText text -> all isSpace text
XNull -> True
XNoPad -> True
_ -> False
where emptyAL = all (\(a,v) -> case (a,v) of
(_,"") -> True
(QName _ _ ('_':_),_) -> True
_ -> False)
showsAL :: Attributes -> String -> String
showsAL al acc
= foldr (\(a,v) r -> case (a,v) of
(QName _ _ ('_':_),_) -> r
_ -> ' ':shows a ('=':shows v r)) acc al
showsXT :: XTree -> String -> Bool -> String
showsXT e acc pad
= case e of
XElem tag _ _ _ xs
| tag == documentRootTag
-> showsXS xs acc
XElem tag al _ _ []
-> '<':shows tag (showsAL al ('/':'>':acc))
XElem _ _ _ _ _
| emptyElem e
-> acc
XElem tag al _ _ xs
| all emptyElem xs
-> '<':shows tag (showsAL al ('/':'>':acc))
XElem tag al _ _ xs
-> '<':shows tag (showsAL al ('>':showsXS xs ('<':'/':shows tag ('>':acc))))
XAttr tag val
-> p (shows tag ('=':shows val acc))
XText text
-> p (text++acc)
XInt n
-> p (shows n acc)
XFloat n
-> p (shows n acc)
XBool v
-> p (if v then "true"++acc else "false"++acc)
XComment s
-> "<!--"++s++"-->"++acc
XPI n s
-> "<?"++n++(' ':(s++('>':acc)))
XError s -> error s
XNull -> '?':acc
XType tp -> shows tp acc
_ -> acc
where p acc = if pad then ' ':acc else acc
showsXS :: XSeq -> String -> String
showsXS [] acc = acc
showsXS (x:xs) acc = showsXT x (sXS xs acc) False
where sXS (XNoPad:x:xs) acc = showsXT x (sXS xs acc) False
sXS (x:xs) acc = showsXT x (sXS xs acc) True
sXS _ acc = acc
instance Show XTree where
show t = showsXT t "" False
putXSeq :: XSeq -> IO ()
putXSeq xs = hSetBuffering stdout NoBuffering >> putStrLn (showsXS xs "")
type TVar = Int
type TQualifier = Char
data Type
= TVariable TVar
| TBase QName
| TItem String
| TNamed QName
| TElement String Type
| TAttribute String Type
| TAny
| TEmpty
| TSequence Type Type
| TInterleaving Type Type
| TChoice Type Type
| TQualified Type TQualifier
deriving Eq
showsType :: Type -> Int -> String -> String
showsType t prec acc
= case t of
TVariable s -> '#':shows s acc
TBase s -> shows s acc
TItem s -> s++('(':')':acc)
TNamed s -> shows s acc
TEmpty -> '(':')':acc
TAny -> "xs:any"++acc
TElement n TAny -> "element "++n++acc
TAttribute a TAny -> "attribute "++a++acc
TElement n t
-> "element "++n++" { "++showsType t 3 (" }"++acc)
TAttribute a t
-> "attribute "++a++" { "++showsType t 3 (" }"++acc)
TSequence t1 t2
-> paren 3 prec (showsType t1 3 (", "++showsType t2 3 acc))
TInterleaving t1 t2
-> paren 3 prec (showsType t1 2 (" & "++showsType t2 2 acc))
TChoice t1 t2
-> paren 2 prec (showsType t1 2 (" | "++showsType t2 2 acc))
TQualified t c
-> paren 4 prec (showsType t 4 (c:acc))
where paren p1 p2 s | p1<p2 = "("++s++")"
| otherwise = s
instance Show Type where
show t = showsType t 4 ""
type XMLSchema = [(LocalName,Type)]
type Schemas = [(URI,XMLSchema)]
type NamespacePrefixes = [(Prefix,URI)]
data NS = NS { defaultElementNS :: URI, defaultFunctionNS :: URI,
prefixes :: NamespacePrefixes, schemas :: Schemas }
deriving Show
tag :: String -> NS -> QName
tag s ns
= if elem ':' s
then case span (/= ':') s of
(s1,_:s2) -> case lookup s1 (prefixes ns) of
Just u -> QName s1 u s2
Nothing -> error ("Undeclared element namespace: "++s1)
else QName "" (defaultElementNS ns) s
attributeTag s ns
= if elem ':' s
then case span (/= ':') s of
(s1,_:s2) -> case lookup s1 (prefixes ns) of
Just u -> QName s1 u s2
Nothing -> error ("Undeclared attribute namespace: "++s1)
else QName "" "" s
functionTag :: String -> NS -> QName
functionTag s ns
= if elem ':' s
then case span (/= ':') s of
(s1,_:s2) -> case lookup s1 (prefixes ns) of
Just u -> QName s1 u s2
Nothing -> error ("Undeclared function namespace: "++s1)
else QName "" (defaultFunctionNS ns) s
attributes :: AttList -> NS -> Attributes
attributes al ns
= map (\(s,v) -> (attributeTag s ns,v)) al
elementNamespaces :: AttList -> NS -> NS
elementNamespaces atts ns
= ns { defaultElementNS = foldr (\(a,v) r -> case a of "xmlns" -> v; _ -> r) (defaultElementNS ns) atts,
prefixes = foldr (\(a,v) r -> case splitAt 6 a of
("xmlns:",p)
-> case lookup p predefinedNamespaces of
Just uri
-> if uri == v
then (p,v):r
else error ("You cannot redefine the system namespace "++p)
Nothing -> (p,v):r
_ -> r) (prefixes ns) atts }
predefinedNamespaces :: NamespacePrefixes
predefinedNamespaces
= [ ("xml", "http://www.w3.org/XML/1998/namespace"),
("xmlns", "http://www.w3.org/2000/xmlns/"),
("xsl", "http://www.w3.org/1999/XSL/Transform"),
("xs", "http://www.w3.org/2001/XMLSchema"),
("xsi", "http://www.w3.org/2001/XMLSchema-instance"),
("fn", "http://www.w3.org/2005/xpath-functions"),
("xdt", "http://www.w3.org/2005/xpath-datatypes"),
("local", "http://www.w3.org/2005/xquery-local-functions") ]
xsNamespace = let Just uri = lookup "xs" predefinedNamespaces in uri
fnNamespace = let Just uri = lookup "fn" predefinedNamespaces in uri
initialNS = NS { defaultElementNS="", defaultFunctionNS=fnNamespace,
prefixes=predefinedNamespaces, schemas=[] }
type Stream = [XMLEvent]
noParentError = XError "Undefined parent reference"
statistics = id
materializeWithoutParent :: Int -> Stream -> NS -> XTree
materializeWithoutParent level stream ns
= XElem documentRootTag [] 1 noParentError
(if level<=1
then [head (filter (\x -> case x of XElem _ _ _ _ _ -> True; _ -> False)
((\(x,_,_)->x) (ml "" stream 2 ns)))]
else ((\(x,_,_)->x) (mdl stream 2 level [] ns)))
where md s@(x@(StartEvent n atts):xs) i level ls ns
| level == 0
= let f [] i ns = m s i ns
f ((StartEvent n atts):ls) i ns
= let ns' = elementNamespaces atts ns
(e,xs',i') = (f ls $! i+1) $! ns'
in (((XElem $! (tag n ns')) $! (attributes atts ns')) i noParentError [e],xs',i')
(e,xs',i') = (f $! (reverse ls)) i ns
in (e,xs',i',ls)
| otherwise
= (((md xs $! i+1) $! level1) $! x:ls) ns
md ((EndEvent n):xs) i level (x:ls) ns
= (md xs i $! level+1) ls ns
md (_:xs) i level ls ns
= md xs i level ls ns
md [] _ _ _ _ = (XText "",[],0,[])
mdl xs@(_:_) i level ls ns
= let (e,xs',i',ls') = md xs i level ls ns
(el,xs'',i'') = mdl xs' i' 0 ls' ns
in (e:el,xs'',i'')
mdl [] _ _ _ _ = ([],[],0)
m ((TextEvent t):xs) i _ = (XText t,xs,i)
m ((EmptyEvent n atts):xs) i ns
= let ns' = elementNamespaces atts ns
in (((XElem $! (tag n ns')) $! (attributes atts ns')) i noParentError [],xs,i+1)
m ((StartEvent n atts):xs) i ns
= let ns' = elementNamespaces atts ns
(el,xs',i') = (ml n xs $! i+1) $! ns'
in (((XElem $! (tag n ns')) $! (attributes atts ns')) i noParentError el,xs',i')
m ((PIEvent n s):xs) i _ = (XPI n s,xs,i)
m ((CommentEvent s):xs) i _ = (XComment s,xs,i)
m ((GERefEvent n):xs) i _ = (XGERef n,xs,i)
m ((ErrorEvent s):xs) i _ = (XError s,xs,i)
m (_:xs) i _ = (XError "Unrecognized XML event",xs,i)
m [] i _ = (XError "Unterminated element",[],i)
ml _ [] i _ = ([],[],i)
ml tag ((EndEvent n):xs) i ns
| n==tag = ([],xs,i)
| otherwise = error $ "Unmatched tags: <"++show tag++"></"++n++">"
ml tag xs i ns
= let (e,xs',i') = m xs i ns
(el,xs'',i'') = ml tag xs' i' ns
in (e:el,xs'',i'')
materializeWithParent :: Int -> Stream -> NS -> XTree
materializeWithParent level stream ns = root
where root = XElem documentRootTag [] 1 (XError "Trying to access the root parent")
(if level<=1
then [head (filter (\x -> case x of XElem _ _ _ _ _ -> True; _ -> False)
((\(x,_,_)->x) (ml "" stream 2 root ns)))]
else ((\(x,_,_)->x) (mdl stream 2 level root [] ns)))
md s@(x@(StartEvent n atts):xs) i level p ls ns
| level == 0
= let f [] i p ns = m s i p ns
f ((StartEvent n atts):ls) i p ns
= let ns' = elementNamespaces atts ns
(e,xs',i') = (f ls $! i+1) x $! ns'
x = ((XElem $! (tag n ns')) $! (attributes atts ns')) i p [e]
in (x,xs',i')
(e,xs',i') = (f $! (reverse ls)) i p ns
in (e,xs',i',ls)
| otherwise
= (((md xs $! i+1) $! level1) p $! x:ls) ns
md ((EndEvent n):xs) i level p (x:ls) ns
= (md xs i $! level+1) p ls ns
md (_:xs) i level p ls ns
= md xs i level p ls ns
md [] _ _ _ _ _ = (XText "",[],0,[])
mdl xs@(_:_) i level p ls ns
= let (e,xs',i',ls') = md xs i level p ls ns
(el,xs'',i'') = mdl xs' i' 0 p ls' ns
in (e:el,xs'',i'')
mdl [] _ _ _ _ _ = ([],[],0)
m ((TextEvent t):xs) i _ _ = (XText t,xs,i)
m ((EmptyEvent n atts):xs) i p ns
= let ns' = elementNamespaces atts ns
in (((XElem $! (tag n ns')) $! (attributes atts ns')) i p [],xs,i+1)
m ((StartEvent n atts):xs) i p ns
= let ns' = elementNamespaces atts ns
(el,xs',i') = (ml n xs $! (i+1)) node $! ns'
node = ((XElem $! (tag n ns')) $! (attributes atts ns')) i p el
in (node,xs',i')
m ((PIEvent n s):xs) i _ _ = (XPI n s,xs,i)
m ((CommentEvent s):xs) i _ _ = (XComment s,xs,i)
m ((GERefEvent n):xs) i _ _ = (XGERef n,xs,i)
m ((ErrorEvent s):xs) i _ _ = (XError s,xs,i)
m (_:xs) i _ _ = (XError "unrecognized XML event",xs,i)
m [] i _ _ = (XError "unbalanced tags",[],i)
ml _ [] i _ _ = ([],[],i)
ml tag ((EndEvent n):xs) i _ _
| n==tag = ([],xs,i)
| otherwise = error $ "Unmatched tags: <"++show tag++"></"++n++">"
ml tag xs i p ns
= let (e,xs',i') = m xs i p ns
(el,xs'',i'') = ml tag xs' i' p ns
in (e:el,xs'',i'')
docLevel :: Stream -> Int -> Int -> Int -> Int
docLevel stream i level minLevel
= if i < 0
then minLevel
else case stream of
((StartEvent n _):xs)
-> ((docLevel xs $! i1) $! level+1) minLevel
((EndEvent n):xs)
-> ((docLevel xs $! i1) $! level1) $! min minLevel $! (level1)
(_:xs) -> (docLevel xs $! i1) level minLevel
_ -> 0
maxStreamingUnitSize :: Int
maxStreamingUnitSize = 10000
materialize :: Bool -> Stream -> XTree
materialize withParent stream
= let level = docLevel stream maxStreamingUnitSize 0 1000000
in if withParent
then materializeWithParent level stream initialNS
else materializeWithoutParent level stream initialNS