{-------------------------------------------------------------------------------------
-
- XML Trees (represented as rose trees)
- Programmer: Leonidas Fegaras
- Email: fegaras@cse.uta.edu
- Web: http://lambda.uta.edu/
- Creation: 05/01/08, last update: 03/29/09
- 
- Copyright (c) 2008 by Leonidas Fegaras, the University of Texas at Arlington. All rights reserved.
- This material is provided as is, with absolutely no warranty expressed or implied.
- Any use is at your own risk. Permission is hereby granted to use or copy this program
- for any purpose, provided the above notices are retained on all copies.
-
--------------------------------------------------------------------------------------}


{-# OPTIONS_GHC -funbox-strict-fields #-}


module Text.XML.HXQ.XTree
    ( XTree(..), XSeq, materialize, putXSeq, showXS ) where

import System.IO
import Char(isSpace)
import XMLParse(XMLEvent(..))
import HXML(Name,AttList)
import Text.XML.HXQ.Parser(Ast(..))
import System.CPUTime
-- import Debug.Trace
-- import System.IO.Unsafe


-- | A rose tree representation of XML data.
-- An XML element is:  @XElem tagname atributes preorder parent children@.
-- The preorder numbering is the document order of elements.
-- The parent is a cyclic reference to the parent element.
data XTree =  XElem    !Name !AttList !Int XTree [XTree]   -- ^ an XML tree node (element)
           |  XAttr    !Name !String    -- ^ attribute construction
           |  XText    !String          -- ^ an XML tree leaf (PCDATA)
           |  XInt     !Int             -- ^ an XML tree leaf (int)
           |  XFloat   !Double          -- ^ an XML tree leaf (double)
           |  XBool    !Bool            -- ^ an XML tree leaf (boolean)
           |  XPI      Name String	-- ^ processing instruction
           |  XGERef   String		-- ^ general entity reference
           |  XComment String		-- ^ comment
           |  XError   String		-- ^ error report
           |  XNull                     -- ^ null value
           |  XType    Ast              -- ^ type information
           |  XNoPad                    -- ^ marker for no padding in XSeq
           deriving Eq


-- | A sequence of XML fragments
type XSeq = [XTree]


emptyElem :: XTree -> Bool
emptyElem e
    = case e of
        XElem _ al _ _ xs
              -> emptyAL al && all emptyElem xs && not (null xs)
        XText text -> all isSpace text
        XNull -> True
        XNoPad -> True
        _ -> False
      where emptyAL = all (\(a,v) -> case (a,v) of (_,"") -> True; ('_':_,_) -> True; _ -> False)


showAL :: AttList -> String
showAL = foldr (\(a,v) r -> case (a,v) of ('_':_,_) -> r; _ -> " "++a++"=\""++v++"\""++r) []

showXT :: XTree -> Bool -> String
showXT e pad
    = case e of
        XElem "_document" _ _ _ xs -> showXS xs
        XElem tag al _ _ [] -> "<"++tag++showAL al++"/>"
        XElem _ _ _ _ _ | emptyElem e -> ""
        XElem tag al _ _ xs | all emptyElem xs -> "<"++tag++showAL al++"/>"
        XElem tag al _ _ xs -> "<"++tag++showAL al++">"++showXS xs++"</"++tag++">"
        XAttr tag val -> p++tag++"=\""++val++"\""
        XText text -> p++text
        XInt n -> p++show n
        XFloat n -> p++show n
        XBool v -> p++if v then "true" else "false"
        XComment s -> "<!--"++s++"-->"
        XPI n s -> "<?"++n++" "++s++">"
        XError s -> error s
        XNull -> "?"
        XType tp -> show tp
        _ -> ""
      where p = if pad then " " else ""

showXS :: XSeq -> String
showXS [] = ""
showXS (x:xs) = showXT x False ++ sXS xs
    where sXS (XNoPad:x:xs) = (showXT x False) ++ sXS xs
          sXS (x:xs) = (showXT x True) ++ sXS xs
          sXS _ = ""

instance Show XTree where
    show t = showXT t False


-- | Print the XQuery result (which is a sequence of XML fragments) without buffering.
putXSeq :: XSeq -> IO ()
putXSeq xs = hSetBuffering stdout NoBuffering >> putStrLn (showXS xs)


{--------------- Build the rose tree from the XML stream ----------------------------}


type Stream = [XMLEvent]

noParentError = XError "Undefined parent reference"


statistics = id
{- Collect statistics about the timing of I/O events
statistics n
    = if mod n 1000 == 0
      then unsafePerformIO (do t <- getCPUTime
                               putStr $ show (div n 1000)
                               putStrLn $ " " ++ show (div t (10^6))
                               return n)
      else n
-}


-- Lazily materialize the SAX stream into a DOM tree without setting parent references.
materializeWithoutParent :: Int -> Stream -> XTree
materializeWithoutParent level stream
    = XElem "_document" [] 1 noParentError
            ((\(x,_,_)->x) (mdl stream 2 level []))
      where md s@(x@(StartEvent n atts):xs) i level ns
                | level == 0
                    = let f [] i = m s i
                          f ((StartEvent n atts):ns) i
                              = let (e,xs',i') = f ns $! i+1
                                in (XElem n atts i noParentError [e],xs',i')
                          (e,xs',i') = (f $! (reverse ns)) i
                      in (e,xs',i',ns)
                | otherwise
                    = ((md xs $! i+1) $! level-1) $! x:ns
            md ((EndEvent n):xs) i level (x:ns)
                = (md xs i $! level+1) ns
            md (_:xs) i level ns
                    = md xs i level ns
            md [] _ _ _ = (XText "",[],0,[])
            mdl xs@(_:_) i level ns
                = let (e,xs',i',ns') = md xs i level ns
                      (el,xs'',i'') = mdl xs' i' 0 ns'
                  in (e:el,xs'',i'')
            mdl [] _ _ _ = ([],[],0)
            m ((TextEvent t):xs) i = (XText t,xs,i)
            m ((EmptyEvent n atts):xs) i
                = (XElem n atts i noParentError [],xs,i+1)
            m ((StartEvent n atts):xs) i
                = let (el,xs',i') = ml n xs $! i+1
                  in (XElem n atts i noParentError el,xs',i')
            m ((PIEvent n s):xs) i = (XPI n s,xs,i)
            m ((CommentEvent s):xs) i = (XComment s,xs,i)
            m ((GERefEvent n):xs) i = (XGERef n,xs,i)
            m ((ErrorEvent s):xs) i = (XError s,xs,i)
            m (_:xs) i = (XError "Unrecognized XML event",xs,i)
            m [] i = (XError "Unterminated element",[],i)
            ml _ [] i = ([],[],i)
            ml tag ((EndEvent n):xs) i
                | n==tag = ([],xs,i)
                | otherwise = error $ "Unmatched tags: <"++tag++"></"++n++">"
            ml tag xs i = let (e,xs',i') = m xs i
                              (el,xs'',i'') = ml tag xs' i'
                          in (e:el,xs'',i'')


-- Lazily materialize the SAX stream into a DOM tree setting parent references.
-- It has space leaks for large documents.
-- Used only if the query has backward steps that cannot be eliminated.
materializeWithParent :: Int -> Stream -> XTree
materializeWithParent level stream = root
    where root = XElem "_document" [] 1 (XError "Trying to access the root parent")
                       ((\(x,_,_)->x) (mdl stream 2 level root []))
          md s@(x@(StartEvent n atts):xs) i level p ns
              | level == 0
                  = let f [] i p = m s i p
                        f ((StartEvent n atts):ns) i p
                            = let (e,xs',i') = (f ns $! i+1) x
                                  x = XElem n atts i p [e]
                              in (x,xs',i')
                        (e,xs',i') = (f $! (reverse ns)) i p
                    in (e,xs',i',ns)
              | otherwise
                  = ((md xs $! i+1) $! level-1) p $! x:ns
          md ((EndEvent n):xs) i level p (x:ns)
              = (md xs i $! level+1) p ns
          md (_:xs) i level p ns
              = md xs i level p ns
          md [] _ _ _ _ = (XText "",[],0,[])
          mdl xs@(_:_) i level p ns
              = let (e,xs',i',ns') = md xs i level p ns
                    (el,xs'',i'') = mdl xs' i' 0 p ns'
                in (e:el,xs'',i'')
          mdl [] _ _ _ _ = ([],[],0)
          m ((TextEvent t):xs) i _ = (XText t,xs,i)
          m ((EmptyEvent n atts):xs) i p = (XElem n atts i p [],xs,i+1)
          m ((StartEvent n atts):xs) i p
              = let (el,xs',i') = (ml n xs $! (i+1)) node
                    node = XElem n atts i p el
                in (node,xs',i')
          m ((PIEvent n s):xs) i _ = (XPI n s,xs,i)
          m ((CommentEvent s):xs) i _ = (XComment s,xs,i)
          m ((GERefEvent n):xs) i _ = (XGERef n,xs,i)
          m ((ErrorEvent s):xs) i _ = (XError s,xs,i)
          m (_:xs) i _ = (XError "unrecognized XML event",xs,i)
          m [] i _ = (XError "unbalanced tags",[],i)
          ml _ [] i _ = ([],[],i)
          ml tag ((EndEvent n):xs) i _
                | n==tag = ([],xs,i)
                | otherwise = error $ "Unmatched tags: <"++tag++"></"++n++">"
          ml tag xs i p = let (e,xs',i') = m xs i p
                              (el,xs'',i'') = ml tag xs' i' p
                          in (e:el,xs'',i'')


-- It looks at the first maxStreamingUnitSize events in the stream to determine
-- the minimum nesting depth. If this minimum depth is greater than 0, then
-- the stream will be chopped into a list of XML elements at this depth level,
-- where each XML element corresponds to at most maxStreamingUnitSize events.
-- Otherwise, the cache requirenments would be the size of the largest root child
docLevel :: Stream -> Int -> Int -> Int -> Int
docLevel stream i level minLevel
    = if i < 0
      then minLevel
      else case stream of
             ((StartEvent n _):xs)
                 -> ((docLevel xs $! i-1) $! level+1) minLevel
             ((EndEvent n):xs)
                 -> ((docLevel xs $! i-1) $! level-1) $! min minLevel (level-1)
             (_:xs) -> (docLevel xs $! i-1) level minLevel
             _ -> 0


-- | Maximum number of events that each streaming unit corresponds to
maxStreamingUnitSize :: Int
maxStreamingUnitSize = 10000


materialize :: Bool -> Stream -> XTree
materialize withParent stream
    = let level = docLevel stream maxStreamingUnitSize 0 1000000
      in if withParent
         then materializeWithParent level stream
         else materializeWithoutParent level stream