{-------------------------------------------------------------------------------------
-
- XQuery functions
- Programmer: Leonidas Fegaras
- Email: fegaras@cse.uta.edu
- Web: http://lambda.uta.edu/
- Creation: 08/15/08, last update: 08/18/08
- 
- Copyright (c) 2008 by Leonidas Fegaras, the University of Texas at Arlington. All rights reserved.
- This material is provided as is, with absolutely no warranty expressed or implied.
- Any use is at your own risk. Permission is hereby granted to use or copy this program
- for any purpose, provided the above notices are retained on all copies.
-
--------------------------------------------------------------------------------------}


{-# OPTIONS_GHC -fth -fbang-patterns #-}

module Text.XML.HXQ.Functions where

import Data.List(foldl')
import Char(isDigit)
import Language.Haskell.TH
import HXML(AttList)
import Text.XML.HXQ.XTree


{--------------- XPath Steps ---------------------------------------------------------}


-- XPath step self /.
self_step :: Tag -> XTree -> XSeq
self_step tag x
    = case x of
        XElem t _ _ _ _ -> if t==tag || tag=="*" then [x] else []
        _ -> [x]


-- XPath step /tag or /*
child_step :: Tag -> XTree -> XSeq
child_step tag x
    = case x of
        XElem _ _ _ _ bs
            -> foldl' (\s b -> case b of
                                 XElem t _ _ _ _ | (t==tag || tag=="*") -> b:s
                                 _ -> s) [] bs
        _ -> []


-- XPath step //tag or //*
descendant_or_self_step :: Tag -> XTree -> XSeq
descendant_or_self_step tag (x@(XElem t _ _ _ cs))
    | tag==t || tag=="*"
    = x:(concatMap (descendant_or_self_step tag) cs)
descendant_or_self_step tag (XElem t _ _ _ cs)
    = concatMap (descendant_or_self_step tag) cs
descendant_or_self_step _ _ = []


-- XPath step descendant
descendant_step :: Tag -> XTree -> XSeq
descendant_step tag (XElem t _ _ _ cs)
    = concatMap (descendant_or_self_step tag) cs
descendant_step _ _ = []


-- It's like //* but has tagged children, which are derived statically
-- After examining 100 children it gives up: this avoids space leaks
descendant_any_with_tagged_children :: [Tag] -> XTree -> XSeq
descendant_any_with_tagged_children tags (x@(XElem t _ _ _ cs))
    | all (\tag -> foldl' (\s b -> case b of
                                     (XElem k _ _ _ _) -> s || k == tag
                                     _ -> s) False cs100) tags
    = x:(concatMap (descendant_any_with_tagged_children tags) cs)
    where cs100 = take 100 cs
descendant_any_with_tagged_children tags (XElem t _ _ _ cs)
    = concatMap (descendant_any_with_tagged_children tags) cs
descendant_any_with_tagged_children tags _ = []


-- XPath step /@attr or /@*
attribute_step :: Tag -> XTree -> XSeq
attribute_step attr x
    = case x of
        XElem _ al _ _ _ -> foldl' (\s (a,v) -> if a==attr || attr=="*"
                                                then (XText v):s
                                                else s) [] al
        _ -> []


-- XPath step //@attr or //@*
attribute_descendant_step :: Tag -> XTree -> XSeq
attribute_descendant_step attr (x@(XElem _ al _ _ cs))
    = foldl' (\s (a,v) -> if a==attr || attr=="*"
                          then (XText v):s
                          else s)
            (concatMap (attribute_descendant_step attr) cs) al
attribute_descendant_step _ _ = []


-- XPath step parent /..
parent_step :: Tag -> XTree -> XSeq
parent_step tag (XElem _ _ _ p _)
 = case p of
     XElem t _ _ _ _ | (t==tag || tag=="*") -> [p]
     _ -> []
parent_step _ _ = []


-- XPath step ancestor
ancestor_step :: Tag -> XTree -> XSeq
ancestor_step tag (XElem _ _ _ p _)
 = case p of
     XElem t _ _ _ _
         -> if t==tag || tag=="*"
            then p:(ancestor_step tag p)
            else ancestor_step tag p
     _ -> []
ancestor_step _ _ = []


-- XPath step ancestor-or-self
ancestor_or_self_step :: Tag -> XTree -> XSeq
ancestor_or_self_step tag e
 = case e of
     XElem t _ _ _ _
         -> if t==tag || tag=="*"
            then e:(ancestor_step tag e)
            else ancestor_step tag e
     _ -> []


-- XPath step following-sibling
following_sibling_step :: Tag -> XTree -> XSeq
following_sibling_step tag (XElem _ _ order (XElem _ _ _ _ cs) _)
 = concatMap (self_step tag)
             (tail (dropWhile filter cs))
   where filter (XElem _ _ o _ _) = o /= order
         filter _ = True
following_sibling_step _ _ = []


-- XPath step following
following_step :: Tag -> XTree -> XSeq
following_step tag (XElem _ _ order p _)
 = case p of
     XElem _ _ _ _ cs
         -> (concatMap (descendant_or_self_step tag)
                       (tail (dropWhile filter cs)))
            ++(following_step tag p)
            where filter (XElem _ _ o _ _) = o /= order
                  filter _ = True
     _ -> []
following_step _ _ = []


-- XPath step preceding-sibling
preceding_sibling_step :: Tag -> XTree -> XSeq
preceding_sibling_step tag (XElem _ _ order (XElem _ _ _ _ cs) _)
 = concatMap (self_step tag)
             (takeWhile filter cs)
   where filter (XElem _ _ o _ _) = o /= order
         filter _ = True
preceding_sibling_step _ _ = []


-- XPath step preceding
preceding_step :: Tag -> XTree -> XSeq
preceding_step tag (XElem _ _ order p _)
 = case p of
     XElem t _ _ _ cs
         -> (concatMap (descendant_or_self_step tag)
                       (takeWhile filter cs))
            ++(preceding_step tag p)
            where filter (XElem _ _ o _ _) = o /= order
                  filter _ = True
     _ -> []
preceding_step _ _ = []


-- XPath steps
paths :: [(Tag,Q Exp)]
paths = [ ( "child", [| child_step |] ),
          ( "descendant", [| descendant_step |] ),
          ( "attribute", [| attribute_step |] ),
          ( "self", [| self_step |] ),
          ( "descendant-or-self", [| descendant_or_self_step |] ),
          ( "attribute-descendant", [| attribute_descendant_step |] ),
          ( "following-sibling", [| following_sibling_step |] ),
          ( "following", [| following_step |] ),
          ( "parent", [| parent_step |] ),
          ( "ancestor", [| ancestor_step |] ),
          ( "preceding-sibling", [| preceding_sibling_step |] ),
          ( "preceding", [| preceding_step |] ),
          ( "ancestor-or-self", [| ancestor_or_self_step |] ) ]


-- XPath steps to be used by the interpreter
-- when evaluated, it gives [(String,Tag->XTree->XSeq)]
pFunctions = foldr (\(pname,p) r -> let pn = litE (StringL pname) in [| ($pn,$p) : $r |]) [| [] |] paths


{------------ Functions --------------------------------------------------------------}


-- find the value of a variable in an association list
findV var env
  = case filter (\(n,_) -> n==var) env of
      (_,b):_ -> b
      _ -> error ("Undefined variable: "++var)


-- is the variable defined in the association list?
memV var env
  = case filter (\(n,_) -> n==var) env of
      (_,b):_ -> True
      _ -> False


-- like foldr but with an index
foldir :: (a -> Int -> b -> b) -> b -> [a] -> Int -> b
foldir c n [] i = n
foldir c n (x:xs) i = c x i (foldir c n xs (i+1))


trueXT = XBool True
falseXT = XBool False


readNum :: String -> Maybe XTree
readNum cs = case span isDigit cs of
               (n,[]) -> Just (XInt (read n))
               (n,'.':rest) -> case span isDigit rest of
                                 (k,[]) -> Just (XFloat (read (n++('.':k))))
                                 _ -> Nothing
               _ -> Nothing


text :: XSeq -> XSeq
text xs = foldl' (\r x -> case x of
                            XElem _ _ _ _ zs
                                -> (filter (\a -> case a of XText _ -> True; XInt _ -> True;
                                                            XFloat _ -> True; XBool _ -> True; _ -> False) zs)++r
                            XText _ -> x:r
                            XInt _ -> x:r
                            XFloat _ -> x:r
                            XBool _ -> x:r
                            _ -> r) [] xs


toString :: XSeq -> [String]
toString xs = map (\x -> case x of 
                           XText t -> t
                           XInt n -> show n
                           XFloat n -> show n
                           XBool n -> show n)
                  (text xs)


-- concatenate text with no padding (for element content)
appendText :: [XSeq] -> XSeq
appendText [] = []
appendText [x] = x
appendText (x:xs) = x++(XNoPad:(appendText xs))


toNum :: XSeq -> XSeq
toNum xs = foldl' (\r x -> case x of
                             XInt n -> x:r
                             XFloat n -> x:r
                             XText s -> case readNum s of
                                          Just t -> t:r
                                          _ -> r
                             _ -> r) [] (text xs)


toFloat :: XTree -> Float
toFloat (XText s) = case readNum s of
                      Just (XInt n) -> fromIntegral n
                      Just (XFloat n) -> n
                      _ -> error("Cannot convert to a float: "++s)
toFloat (XInt n) = fromIntegral n
toFloat (XFloat n) = n
toFloat x = error("Cannot convert to a float: "++(show x))


mean :: (Fractional t) => [t] -> t
mean = uncurry (/) . foldl' (\(!s, !n) x -> (s+x, n+1)) (0,0.0)


contains :: String -> String -> Bool
contains text word
    = let len = length word
          c xs | ((take len xs) == word) = True
          c (_:xs) = c xs
          c _ = False
      in c text


distinct :: Eq a => [a] -> [a]
distinct = foldl (\r a -> if elem a r then r else r++[a]) []


arithmetic :: (Float -> Float -> Float) -> XTree -> XTree -> XTree
arithmetic op (XInt n) (XInt m) = XInt (round (op (fromIntegral n) (fromIntegral m)))
arithmetic op (XFloat n) (XFloat m) = XFloat (op n m)
arithmetic op (XFloat n) (XInt m) = XFloat (op n (fromIntegral m))
arithmetic op (XInt n) (XFloat m) = XFloat (op (fromIntegral n) m)


compareXTrees :: XTree -> XTree -> Ordering
compareXTrees (XElem _ _ _ _ _) _ = EQ
compareXTrees _ (XElem _ _ _ _ _) = EQ
compareXTrees (XInt n) (XInt m) = compare n m
compareXTrees (XFloat n) (XInt m) = compare n (fromIntegral m)
compareXTrees (XInt n) (XFloat m) = compare (fromIntegral n) m
compareXTrees (XFloat n) (XFloat m) = compare n m
compareXTrees (XText n) (XText m) = compare n m
compareXTrees x y = compare (toFloat x) (toFloat y)


strictCompareOne [XInt n] [XInt m] = compare n m
strictCompareOne [XFloat n] [XFloat m] = compare n m
strictCompareOne [XFloat n] [XInt m] = compare n (fromIntegral m)
strictCompareOne [XInt n] [XFloat m] = compare (fromIntegral n) m
strictCompareOne [XText n] [XText m] = compare n m
strictCompareOne x y = error ("Illegal operands in strict comparison: "++(show x)++" "++(show y))

strictCompare :: XSeq -> XSeq -> Ordering
strictCompare [XElem _ _ _ _ x] [XElem _ _ _ _ y] = strictCompareOne x y
strictCompare x [XElem _ _ _ _ y] = strictCompareOne x y
strictCompare [XElem _ _ _ _ x] y = strictCompareOne x y
strictCompare x y = strictCompareOne x y

compareXSeqs :: Bool -> XSeq -> XSeq -> Ordering
compareXSeqs ord xs ys
    = let comps = [ compareXTrees x y | x <- xs, y <- ys ]
      in if ord
            then if all (\x -> x == LT) comps
                    then LT
                 else if all (\x -> x == GT) comps
                    then GT
                 else EQ
         else if all (\x -> x == LT) comps
                 then GT
              else if all (\x -> x == GT) comps
                 then LT
              else EQ


conditionTest :: XSeq -> Bool
conditionTest [] = False
conditionTest [XText ""] = False
conditionTest [XInt 0] = False
conditionTest [XBool False] = False
conditionTest _ = True


type Function = [Q Exp] -> Q Exp

-- System functions: they can also be defined as Haskell functions of type (XSeq,...,XSeq) -> XSeq
-- but here we make sure they are unfolded and fused with the rest of the query
functions :: [(Tag,Int,Function)]
functions = [ ( "=", 2, \[xs,ys] -> [| [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y == EQ ] |] ),
              ( "!=", 2, \[xs,ys] -> [| if null [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y == EQ ]
                                        then [trueXT]
                                        else [falseXT] |] ),
              ( ">", 2, \[xs,ys] -> [| [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y == GT ] |] ),
              ( "<", 2, \[xs,ys] -> [| [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y == LT ] |] ),
              ( ">=", 2, \[xs,ys] -> [| [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y `elem` [GT,EQ] ] |] ),
              ( "<=", 2, \[xs,ys] -> [| [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y `elem` [LT,EQ] ] |] ),
              ( "eq", 2, \[xs,ys] -> [| if strictCompare $xs $ys == EQ then [trueXT] else [falseXT] |] ),
              ( "neq", 2, \[xs,ys] -> [| if strictCompare $xs $ys /= EQ then [trueXT] else [falseXT] |] ),
              ( "lt", 2, \[xs,ys] -> [| if strictCompare $xs $ys == LT then [trueXT] else [falseXT] |] ),
              ( "gt", 2, \[xs,ys] -> [| if strictCompare $xs $ys == GT then [trueXT] else [falseXT] |] ),
              ( "le", 2, \[xs,ys] -> [| if strictCompare $xs $ys `elem` [LT,EQ] then [trueXT] else [falseXT] |] ),
              ( "ge", 2, \[xs,ys] -> [| if strictCompare $xs $ys `elem` [GT,EQ] then [trueXT] else [falseXT] |] ),
              ( "<<", 2, \[xs,ys] -> [| [ trueXT | XElem _ _ ox _ _ <- $xs, XElem _ _ oy _ _ <- $ys, ox < oy ] |] ),
              ( ">>", 2, \[xs,ys] -> [| [ trueXT | XElem _ _ ox _ _ <- $xs, XElem _ _ oy _ _ <- $ys, ox > oy ] |] ),
              ( "is", 2, \[xs,ys] -> [| [ trueXT | XElem _ _ ox _ _ <- $xs, XElem _ _ oy _ _ <- $ys, ox == oy ] |] ),
              ( "+", 2, \[xs,ys] -> [| [ arithmetic (+) x y | x <- toNum $xs, y <- toNum $ys ] |] ),
              ( "-", 2, \[xs,ys] -> [| [ arithmetic (-) x y | x <- toNum $xs, y <- toNum $ys ] |] ),
              ( "*", 2, \[xs,ys] -> [| [ arithmetic (*) x y | x <- toNum $xs, y <- toNum $ys ] |] ),
              ( "div", 2, \[xs,ys] -> [| [ arithmetic (/) x y | x <- toNum $xs, y <- toNum $ys ] |] ),
              ( "idiv", 2, \[xs,ys] -> [| [ XInt (div x y) | (XInt x) <- toNum $xs, (XInt y) <- toNum $ys ] |] ),
              ( "mod", 2, \[xs,ys] -> [| [ XInt (mod x y) | (XInt x) <- toNum $xs, (XInt y) <- toNum $ys ] |] ),
              ( "uplus", 1, \[xs] -> [| [ x | x <- toNum $xs ] |] ),
              ( "uminus", 1, \[xs] -> [| [ case x of XInt n -> XInt (-n); XFloat n -> XFloat (-n) | x <- toNum $xs ] |] ),
              ( "and", 2, \[xs,ys] -> [| if (conditionTest $xs) && (conditionTest $ys) then [trueXT] else [falseXT] |] ),
              ( "or", 2, \[xs,ys] -> [| if (conditionTest $xs) || (conditionTest $ys) then [trueXT] else [falseXT] |] ),
              ( "not", 1, \[xs] -> [| if (conditionTest $xs) then [falseXT] else [trueXT] |] ),
              ( "some", 1, \[xs] -> [| if (conditionTest $xs) then [trueXT] else [falseXT] |] ),
              ( "count", 1, \[xs] -> [| [ XInt (length $xs) ] |] ),
              ( "sum", 1, \[xs] -> [| [ XFloat (sum [ toFloat x | x <- toNum $xs ]) ] |] ),
              ( "avg", 1, \[xs] -> [| [ XFloat (mean [ toFloat x | x <- toNum $xs ]) ] |] ),
              ( "min", 1, \[xs] -> [| [ XFloat (minimum [ toFloat x | x <- toNum $xs ]) ] |] ),
              ( "max", 1, \[xs] -> [| [ XFloat (maximum [ toFloat x | x <- toNum $xs ]) ] |] ),
              ( "to", 2, \[xs,ys] -> [| [ XInt i | XInt n <- toNum $xs, XInt m <- toNum $ys, i <- [n..m] ] |] ),
              ( "text", 1, \[xs] -> [| text $xs |] ),
              ( "string", 1, \[xs] -> [| text $xs |] ),
              ( "data", 1, \[xs] -> [| text $xs |] ),
              ( "node", 1, \[xs] -> [| [ w | w@(XElem _ _ _ _ _) <- $xs ] |] ),
              ( "exists", 1, \[xs] -> [| [ XBool (not (null $xs)) ] |] ),
              ( "empty", 0, \[] -> [| [] |] ),
              ( "true", 0, \[] -> [| [trueXT] |] ),
              ( "false", 0, \[] -> [| [] |] ),
              ( "if", 3, \[cs,ts,es] -> [| if conditionTest $cs then $ts else $es |] ),
              ( "element", 2, \[tags,xs] -> [| [ x | tag <- toString $tags, x@(XElem t _ _ _ _) <- $xs, (t==tag || tag=="*") ] |] ),
              ( "attribute", 2, \[tags,xs] -> [| [ z | tag <- toString $tags, x <- $xs, z <- attribute_step tag x ] |] ),
              ( "name", 1, \[xs] -> [| [ XText tag | XElem tag _ _ _ _ <- $xs ] |] ),
              ( "contains", 2, \[xs,text] -> [| [ trueXT | x <- toString $xs, t <- toString $text, contains x t ] |] ),
              ( "substring", 3, \[xs,n1,n2] -> [| [ XText (take m2 (drop (m1-1) x)) | x <- toString $xs,
                                                    XInt m1 <- toNum $n1, XInt m2 <- toNum $n2 ] |] ),
              ( "concatenate", 2, \[xs,ys] -> [| $xs ++ $ys |] ),
              ( "distinct-values", 1, \[xs] -> [| distinct $xs |] ),
              ( "union", 2, \[xs,ys] -> [| distinct ($xs ++ $ys) |] ),
              ( "intersect", 2, \[xs,ys] -> [| filter (\x -> elem x $ys) $xs |] ),
              ( "except", 2, \[xs,ys] -> [| filter (\x -> not (elem x $ys)) $xs  |] ),
              ( "reverse", 1, \[xs] -> [| reverse $xs |] )
            ]


-- functions to be used by the interpreter
-- when evaluated, it gives [(String,Int,[XSeq]->XSeq)]
iFunctions :: Q Exp
iFunctions = foldr (\(fname,len,f) r
                        -> let vars = map (\i -> mkName ("v_"++(show i))) [1..len]
                               entry = tupE [litE (StringL fname),litE (IntegerL (toInteger len)),
                                             lamE [listP (map varP vars)] (f (map varE vars))]
                           in [| $entry : $r |]) [| [] |] functions


-- make a function call
callF :: Tag -> Function
callF fname args = case filter (\(n,_,_) -> n == fname || ("fn:"++n)==fname) functions of
                     (_,len,f):_ -> if (length args) == len
                                       then f args
                                    else error ("wrong number of arguments in function call: " ++ fname)
                     _ ->     -- otherwise, it must be a Haskell function of type (XSeq,...,XSeq) -> XSeq
                          let itp = case args of
                                      [] -> [t| () |]
                                      [_] -> [t| XSeq |]
                                      _ -> foldr (\_ r -> appT r [t| XSeq |]) (appT (tupleT (length args)) [t| XSeq |])
                                                 (tail args)
                              fn = sigE (varE (mkName fname))
                                        (appT (appT arrowT itp) [t| XSeq |])
                          in appE fn (tupE args)