{------------------------------------------------------------------------------------- - - XQuery functions - Programmer: Leonidas Fegaras - Email: fegaras@cse.uta.edu - Web: http://lambda.uta.edu/ - Creation: 08/15/08, last update: 11/18/08 - - Copyright (c) 2008 by Leonidas Fegaras, the University of Texas at Arlington. All rights reserved. - This material is provided as is, with absolutely no warranty expressed or implied. - Any use is at your own risk. Permission is hereby granted to use or copy this program - for any purpose, provided the above notices are retained on all copies. - --------------------------------------------------------------------------------------} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE BangPatterns #-} module Text.XML.HXQ.Functions where import Data.List(foldl') import Char(isDigit) import HXML(AttList) import Text.XML.HXQ.XTree import Language.Haskell.TH {--------------- XPath Steps ---------------------------------------------------------} -- XPath step self /. self_step :: Tag -> XTree -> XSeq self_step tag x = case x of XElem t _ _ _ _ -> if t==tag || tag=="*" then [x] else [] _ -> [x] -- XPath step /tag or /* child_step :: Tag -> XTree -> XSeq child_step tag x = case x of XElem _ _ _ _ bs -> foldr (\b s -> case b of XElem t _ _ _ _ | (t==tag || tag=="*") -> b:s _ -> s) [] bs _ -> [] -- XPath step //tag or //* descendant_or_self_step :: Tag -> XTree -> XSeq descendant_or_self_step tag (x@(XElem t _ _ _ cs)) | tag==t || tag=="*" = x:(concatMap (descendant_or_self_step tag) cs) descendant_or_self_step tag (XElem t _ _ _ cs) = concatMap (descendant_or_self_step tag) cs descendant_or_self_step _ _ = [] -- XPath step descendant descendant_step :: Tag -> XTree -> XSeq descendant_step tag (XElem t _ _ _ cs) = concatMap (descendant_or_self_step tag) cs descendant_step _ _ = [] -- It's like //* but has tagged children, which are derived statically -- After examining 100 children it gives up: this avoids space leaks descendant_any_with_tagged_children :: [Tag] -> XTree -> XSeq descendant_any_with_tagged_children tags (x@(XElem t _ _ _ cs)) | all (\tag -> foldr (\b s -> case b of (XElem k _ _ _ _) -> s || k == tag _ -> s) False cs100) tags = x:(concatMap (descendant_any_with_tagged_children tags) cs) where cs100 = take 100 cs descendant_any_with_tagged_children tags (XElem t _ _ _ cs) = concatMap (descendant_any_with_tagged_children tags) cs descendant_any_with_tagged_children tags _ = [] -- XPath step /@attr or /@* attribute_step :: Tag -> XTree -> XSeq attribute_step attr x = case x of XElem _ al _ _ _ -> foldr (\(a,v) s -> if a==attr || attr=="*" then (XText v):s else s) [] al _ -> [] -- XPath step //@attr or //@* attribute_descendant_step :: Tag -> XTree -> XSeq attribute_descendant_step attr (x@(XElem _ al _ _ cs)) = foldr (\(a,v) s -> if a==attr || attr=="*" then (XText v):s else s) (concatMap (attribute_descendant_step attr) cs) al attribute_descendant_step _ _ = [] -- XPath step parent /.. parent_step :: Tag -> XTree -> XSeq parent_step tag (XElem _ _ _ p _) = case p of XElem t _ _ _ _ | (t==tag || tag=="*") -> [p] _ -> [] parent_step _ _ = [] -- XPath step ancestor ancestor_step :: Tag -> XTree -> XSeq ancestor_step tag (XElem _ _ _ p _) = case p of XElem t _ _ _ _ -> if t==tag || tag=="*" then p:(ancestor_step tag p) else ancestor_step tag p _ -> [] ancestor_step _ _ = [] -- XPath step ancestor-or-self ancestor_or_self_step :: Tag -> XTree -> XSeq ancestor_or_self_step tag e = case e of XElem t _ _ _ _ -> if t==tag || tag=="*" then e:(ancestor_step tag e) else ancestor_step tag e _ -> [] -- XPath step following-sibling following_sibling_step :: Tag -> XTree -> XSeq following_sibling_step tag (XElem _ _ order (XElem _ _ _ _ cs) _) = concatMap (self_step tag) (tail (dropWhile filter cs)) where filter (XElem _ _ o _ _) = o /= order filter _ = True following_sibling_step _ _ = [] -- XPath step following following_step :: Tag -> XTree -> XSeq following_step tag (XElem _ _ order p _) = case p of XElem _ _ _ _ cs -> (concatMap (descendant_or_self_step tag) (tail (dropWhile filter cs))) ++(following_step tag p) where filter (XElem _ _ o _ _) = o /= order filter _ = True _ -> [] following_step _ _ = [] -- XPath step preceding-sibling preceding_sibling_step :: Tag -> XTree -> XSeq preceding_sibling_step tag (XElem _ _ order (XElem _ _ _ _ cs) _) = concatMap (self_step tag) (takeWhile filter cs) where filter (XElem _ _ o _ _) = o /= order filter _ = True preceding_sibling_step _ _ = [] -- XPath step preceding preceding_step :: Tag -> XTree -> XSeq preceding_step tag (XElem _ _ order p _) = case p of XElem t _ _ _ cs -> (concatMap (descendant_or_self_step tag) (takeWhile filter cs)) ++(preceding_step tag p) where filter (XElem _ _ o _ _) = o /= order filter _ = True _ -> [] preceding_step _ _ = [] -- XPath steps paths :: [(Tag,Q Exp)] paths = [ ( "child", [| child_step |] ), ( "descendant", [| descendant_step |] ), ( "attribute", [| attribute_step |] ), ( "self", [| self_step |] ), ( "descendant-or-self", [| descendant_or_self_step |] ), ( "attribute-descendant", [| attribute_descendant_step |] ), ( "following-sibling", [| following_sibling_step |] ), ( "following", [| following_step |] ), ( "parent", [| parent_step |] ), ( "ancestor", [| ancestor_step |] ), ( "preceding-sibling", [| preceding_sibling_step |] ), ( "preceding", [| preceding_step |] ), ( "ancestor-or-self", [| ancestor_or_self_step |] ) ] -- XPath steps to be used by the interpreter -- when evaluated, it gives [(String,Tag->XTree->XSeq)] pFunctions = foldr (\(pname,p) r -> let pn = litE (StringL pname) in [| ($pn,$p) : $r |]) [| [] |] paths {------------ Functions --------------------------------------------------------------} -- find the value of a variable in an association list findV var env = case filter (\(n,_) -> n==var) env of (_,b):_ -> b _ -> error ("Undefined variable: "++var) -- is the variable defined in the association list? memV var env = case filter (\(n,_) -> n==var) env of (_,b):_ -> True _ -> False -- like foldr but with an index foldir :: (a -> Int -> b -> b) -> b -> [a] -> Int -> b foldir c n [] i = n foldir c n (x:xs) i = c x i (foldir c n xs (i+1)) trueXT = XBool True falseXT = XBool False readNum :: String -> Maybe XTree readNum cs = case span isDigit cs of (n,[]) -> Just (XInt (read n)) (n,'.':rest) -> case span isDigit rest of (k,[]) -> Just (XFloat (read (n++('.':k)))) _ -> Nothing _ -> Nothing text :: XSeq -> XSeq text xs = foldr (\x r -> case x of XElem _ _ _ _ zs -> (filter (\a -> case a of XText _ -> True; XInt _ -> True; XNull -> True; XFloat _ -> True; XBool _ -> True; _ -> False) zs)++r XText _ -> x:r XInt _ -> x:r XFloat _ -> x:r XBool _ -> x:r _ -> r) [] xs string :: XSeq -> XSeq string xs = foldr (\x r -> case x of XElem _ _ _ _ zs -> (string zs)++r XText _ -> x:r XInt _ -> x:r XFloat _ -> x:r XBool _ -> x:r _ -> r) [] xs toString :: XSeq -> [String] toString xs = map (\x -> case x of XText t -> t XInt n -> show n XFloat n -> show n XBool n -> show n) (text xs) -- concatenate text with no padding (for element content) appendText :: [XSeq] -> XSeq appendText [] = [] appendText [x] = x appendText (x:xs) = x++(XNoPad:(appendText xs)) toNum :: XSeq -> XSeq toNum xs = foldr (\x r -> case x of XInt n -> x:r XFloat n -> x:r XText s -> case readNum s of Just t -> t:r _ -> r _ -> r) [] (text xs) toFloat :: XTree -> Float toFloat (XText s) = case readNum s of Just (XInt n) -> fromIntegral n Just (XFloat n) -> n _ -> error("Cannot convert to a float: "++s) toFloat (XInt n) = fromIntegral n toFloat (XFloat n) = n toFloat x = error("Cannot convert to a float: "++(show x)) mean :: (Fractional t) => [t] -> t mean = uncurry (/) . foldl' (\(!s, !n) x -> (s+x, n+1)) (0,0.0) contains :: String -> String -> Bool contains text word = let len = length word c xs | ((take len xs) == word) = True c (_:xs) = c xs c _ = False in c text distinct :: Eq a => [a] -> [a] distinct = foldl (\r a -> if elem a r then r else r++[a]) [] arithmetic :: (Float -> Float -> Float) -> XTree -> XTree -> XTree arithmetic op (XInt n) (XInt m) = XInt (round (op (fromIntegral n) (fromIntegral m))) arithmetic op (XFloat n) (XFloat m) = XFloat (op n m) arithmetic op (XFloat n) (XInt m) = XFloat (op n (fromIntegral m)) arithmetic op (XInt n) (XFloat m) = XFloat (op (fromIntegral n) m) compareXTrees :: XTree -> XTree -> Ordering compareXTrees (XElem _ _ _ _ _) _ = EQ compareXTrees _ (XElem _ _ _ _ _) = EQ compareXTrees (XInt n) (XInt m) = compare n m compareXTrees (XFloat n) (XInt m) = compare n (fromIntegral m) compareXTrees (XInt n) (XFloat m) = compare (fromIntegral n) m compareXTrees (XFloat n) (XFloat m) = compare n m compareXTrees (XText n) (XText m) = compare n m compareXTrees x y = compare (toFloat x) (toFloat y) strictCompareOne [XInt n] [XInt m] = compare n m strictCompareOne [XFloat n] [XFloat m] = compare n m strictCompareOne [XFloat n] [XInt m] = compare n (fromIntegral m) strictCompareOne [XInt n] [XFloat m] = compare (fromIntegral n) m strictCompareOne [XText n] [XText m] = compare n m strictCompareOne x y = error ("Illegal operands in strict comparison: "++(show x)++" "++(show y)) strictCompare :: XSeq -> XSeq -> Ordering strictCompare [XElem _ _ _ _ x] [XElem _ _ _ _ y] = strictCompareOne x y strictCompare x [XElem _ _ _ _ y] = strictCompareOne x y strictCompare [XElem _ _ _ _ x] y = strictCompareOne x y strictCompare x y = strictCompareOne x y compareXSeqs :: Bool -> XSeq -> XSeq -> Ordering compareXSeqs ord xs ys = let comps = [ compareXTrees x y | x <- xs, y <- ys ] in if ord then if all (\x -> x == LT) comps then LT else if all (\x -> x == GT) comps then GT else EQ else if all (\x -> x == LT) comps then GT else if all (\x -> x == GT) comps then LT else EQ conditionTest :: XSeq -> Bool conditionTest [] = False conditionTest [XText ""] = False conditionTest [XInt 0] = False conditionTest [XBool False] = False conditionTest _ = True type Function = [Q Exp] -> Q Exp -- System functions: they can also be defined as Haskell functions of type (XSeq,...,XSeq) -> XSeq -- but here we make sure they are unfolded and fused with the rest of the query functions :: [(Tag,Int,Function)] functions = [ ( "=", 2, \[xs,ys] -> [| [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y == EQ ] |] ), ( "!=", 2, \[xs,ys] -> [| if null [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y == EQ ] then [trueXT] else [falseXT] |] ), ( ">", 2, \[xs,ys] -> [| [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y == GT ] |] ), ( "<", 2, \[xs,ys] -> [| [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y == LT ] |] ), ( ">=", 2, \[xs,ys] -> [| [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y `elem` [GT,EQ] ] |] ), ( "<=", 2, \[xs,ys] -> [| [ trueXT | x <- text $xs, y <- text $ys, compareXTrees x y `elem` [LT,EQ] ] |] ), ( "eq", 2, \[xs,ys] -> [| if strictCompare $xs $ys == EQ then [trueXT] else [falseXT] |] ), ( "neq", 2, \[xs,ys] -> [| if strictCompare $xs $ys /= EQ then [trueXT] else [falseXT] |] ), ( "lt", 2, \[xs,ys] -> [| if strictCompare $xs $ys == LT then [trueXT] else [falseXT] |] ), ( "gt", 2, \[xs,ys] -> [| if strictCompare $xs $ys == GT then [trueXT] else [falseXT] |] ), ( "le", 2, \[xs,ys] -> [| if strictCompare $xs $ys `elem` [LT,EQ] then [trueXT] else [falseXT] |] ), ( "ge", 2, \[xs,ys] -> [| if strictCompare $xs $ys `elem` [GT,EQ] then [trueXT] else [falseXT] |] ), ( "<<", 2, \[xs,ys] -> [| [ trueXT | XElem _ _ ox _ _ <- $xs, XElem _ _ oy _ _ <- $ys, ox < oy ] |] ), ( ">>", 2, \[xs,ys] -> [| [ trueXT | XElem _ _ ox _ _ <- $xs, XElem _ _ oy _ _ <- $ys, ox > oy ] |] ), ( "is", 2, \[xs,ys] -> [| [ trueXT | XElem _ _ ox _ _ <- $xs, XElem _ _ oy _ _ <- $ys, ox == oy ] |] ), ( "+", 2, \[xs,ys] -> [| [ arithmetic (+) x y | x <- toNum $xs, y <- toNum $ys ] |] ), ( "-", 2, \[xs,ys] -> [| [ arithmetic (-) x y | x <- toNum $xs, y <- toNum $ys ] |] ), ( "*", 2, \[xs,ys] -> [| [ arithmetic (*) x y | x <- toNum $xs, y <- toNum $ys ] |] ), ( "div", 2, \[xs,ys] -> [| [ arithmetic (/) x y | x <- toNum $xs, y <- toNum $ys ] |] ), ( "idiv", 2, \[xs,ys] -> [| [ XInt (div x y) | (XInt x) <- toNum $xs, (XInt y) <- toNum $ys ] |] ), ( "mod", 2, \[xs,ys] -> [| [ XInt (mod x y) | (XInt x) <- toNum $xs, (XInt y) <- toNum $ys ] |] ), ( "uplus", 1, \[xs] -> [| [ x | x <- toNum $xs ] |] ), ( "uminus", 1, \[xs] -> [| [ case x of XInt n -> XInt (-n); XFloat n -> XFloat (-n) | x <- toNum $xs ] |] ), ( "and", 2, \[xs,ys] -> [| if (conditionTest $xs) && (conditionTest $ys) then [trueXT] else [falseXT] |] ), ( "or", 2, \[xs,ys] -> [| if (conditionTest $xs) || (conditionTest $ys) then [trueXT] else [falseXT] |] ), ( "not", 1, \[xs] -> [| if conditionTest $xs then [falseXT] else [trueXT] |] ), ( "some", 1, \[xs] -> [| if conditionTest $xs then [trueXT] else [falseXT] |] ), ( "count", 1, \[xs] -> [| [ XInt (length $xs) ] |] ), ( "sum", 1, \[xs] -> [| [ XFloat (sum [ toFloat x | x <- toNum $xs ]) ] |] ), ( "avg", 1, \[xs] -> [| [ XFloat (mean [ toFloat x | x <- toNum $xs ]) ] |] ), ( "min", 1, \[xs] -> [| [ XFloat (minimum [ toFloat x | x <- toNum $xs ]) ] |] ), ( "max", 1, \[xs] -> [| [ XFloat (maximum [ toFloat x | x <- toNum $xs ]) ] |] ), ( "to", 2, \[xs,ys] -> [| [ XInt i | XInt n <- toNum $xs, XInt m <- toNum $ys, i <- [n..m] ] |] ), ( "text", 1, \[xs] -> [| text $xs |] ), ( "string", 1, \[xs] -> [| string $xs |] ), ( "data", 1, \[xs] -> [| text $xs |] ), ( "node", 1, \[xs] -> [| [ w | w@(XElem _ _ _ _ _) <- $xs ] |] ), ( "exists", 1, \[xs] -> [| [ XBool (not (null $xs)) ] |] ), ( "empty", 0, \[] -> [| [] |] ), ( "true", 0, \[] -> [| [trueXT] |] ), ( "false", 0, \[] -> [| [] |] ), ( "if", 3, \[cs,ts,es] -> [| if conditionTest $cs then $ts else $es |] ), ( "element", 2, \[tags,xs] -> [| [ x | tag <- toString $tags, x@(XElem t _ _ _ _) <- $xs, (t==tag || tag=="*") ] |] ), ( "attribute", 2, \[tags,xs] -> [| [ z | tag <- toString $tags, x <- $xs, z <- attribute_step tag x ] |] ), ( "name", 1, \[xs] -> [| [ XText tag | XElem tag _ _ _ _ <- $xs ] |] ), ( "contains", 2, \[xs,text] -> [| [ trueXT | x <- toString $xs, t <- toString $text, contains x t ] |] ), ( "substring", 3, \[xs,n1,n2] -> [| [ XText (take m2 (drop (m1-1) x)) | x <- toString $xs, XInt m1 <- toNum $n1, XInt m2 <- toNum $n2 ] |] ), ( "concatenate", 2, \[xs,ys] -> [| $xs ++ $ys |] ), ( "distinct-values", 1, \[xs] -> [| distinct $xs |] ), ( "union", 2, \[xs,ys] -> [| distinct ($xs ++ $ys) |] ), ( "intersect", 2, \[xs,ys] -> [| filter (\x -> elem x $ys) $xs |] ), ( "except", 2, \[xs,ys] -> [| filter (\x -> not (elem x $ys)) $xs |] ), ( "reverse", 1, \[xs] -> [| reverse $xs |] ) ] system_functions :: [String] system_functions = map (\(nm,_,_) -> nm) functions ++ map (\(nm,_,_) -> "fn:"++nm) functions -- functions to be used by the interpreter -- when evaluated, it gives [(String,Int,[XSeq]->XSeq)] iFunctions :: Q Exp iFunctions = foldr (\(fname,len,f) r -> let vars = map (\i -> mkName ("v_"++(show i))) [1..len] entry = tupE [litE (StringL fname),litE (IntegerL (toInteger len)), lamE [listP (map varP vars)] (f (map varE vars))] in [| $entry : $r |]) [| [] |] functions -- make a function call callF :: Tag -> Function callF fname args = case filter (\(n,_,_) -> n == fname || ("fn:"++n)==fname) functions of (_,len,f):_ -> if (length args) == len then f args else error ("wrong number of arguments in function call: " ++ fname) _ -> -- otherwise, it must be a Haskell function of type (XSeq,...,XSeq) -> XSeq let itp = case args of [] -> [t| () |] [_] -> [t| XSeq |] _ -> foldr (\_ r -> appT r [t| XSeq |]) (appT (tupleT (length args)) [t| XSeq |]) (tail args) fn = sigE (varE (mkName fname)) (appT (appT arrowT itp) [t| IO XSeq |]) in appE fn (tupE args)