-- ------------------------------------------------------------ {- | Module : Yuuko.Text.XML.HXT.XPath.XPathParser Copyright : Copyright (C) 2006-2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable the XPath Parser -} -- ------------------------------------------------------------ module Yuuko.Text.XML.HXT.XPath.XPathParser ( parseNumber , parseXPath ) where import Data.Maybe import Text.ParserCombinators.Parsec import Yuuko.Text.XML.HXT.DOM.TypeDefs import Yuuko.Text.XML.HXT.XPath.XPathKeywords import Yuuko.Text.XML.HXT.XPath.XPathDataTypes import Yuuko.Text.XML.HXT.Parser.XmlTokenParser ( separator , systemLiteral , skipS0 , ncName , qName ) lookupNs :: NsEnv -> XName -> XName lookupNs uris prefix -- downwards compatibility: if namespace env is not supported -- no error is raised, but the uri remains empty -- not conformant to XPath spec: -- If namespaces are used, a complete env must be supported, -- but we don't care about this = fromMaybe nullXName $ lookup prefix uris enhanceAttrQName :: NsEnv -> QName -> QName enhanceAttrQName uris qn | isNullXName (namePrefix' qn) = qn | otherwise = enhanceQName uris qn enhanceQName :: NsEnv -> QName -> QName enhanceQName uris qn = setNamespaceUri' ( lookupNs uris (namePrefix' qn) ) qn enhanceQN :: AxisSpec -> NsEnv -> QName -> QName enhanceQN Attribute = enhanceAttrQName enhanceQN _ = enhanceQName type XParser a = GenParser Char NsEnv a -- ------------------------------------------------------------ -- parse functions which are used in the XPathFct module -- | -- parsing a number, parseNumber is used in "XPathFct" -- by the number function -- -- - returns : the parsed number as 'XPNumber' float -- or 'XPVNumber' 'NaN' in case of error parseNumber :: String -> XPathValue parseNumber s = case (runParser parseNumber' [] {- Map.empty -} "" s) of Left _ -> XPVNumber NaN Right x -> if (read x :: Float) == 0 then (XPVNumber Pos0) else XPVNumber (Float (read x)) parseNumber' :: XParser String parseNumber' = do skipS0 m <- option "" (string "-") n <- number skipS0 eof return (m ++ n) -- ------------------------------------------------------------ -- | -- the main entry point: -- parsing a XPath expression parseXPath :: XParser Expr parseXPath = do skipS0 xPathExpr <- expr skipS0 eof return xPathExpr -- some useful token and symbol parser lpar, rpar, lbra, rbra, slash, dslash :: XParser () lpar = tokenParser (symbol "(") rpar = tokenParser (symbol ")") lbra = tokenParser (symbol "[") rbra = tokenParser (symbol "]") slash = tokenParser (symbol "/") dslash = tokenParser (symbol "//") tokenParser :: XParser String -> XParser () tokenParser p = try ( do skipS0 _ <- p skipS0 ) symbolParser :: (String, a) -> XParser a symbolParser (s,a) = do tokenParser (symbol s) return a symbol :: String -> XParser String symbol s = try (string s) -- operation parser orOp, andOp, eqOp, relOp, addOp, multiOp, unionOp :: XParser Op orOp = symbolParser ("or", Or) andOp = symbolParser ("and", And) eqOp = symbolParser ("=", Eq) <|> symbolParser ("!=", NEq) relOp = choice [ symbolParser ("<=", LessEq) , symbolParser (">=", GreaterEq) , symbolParser ("<", Less) , symbolParser (">", Greater) ] addOp = symbolParser ("+", Plus) <|> symbolParser ("-", Minus) multiOp = choice [ symbolParser ("*", Mult) , symbolParser ("mod", Mod) , symbolParser ("div", Div) ] unionOp = symbolParser ("|", Union) -- ------------------------------------------------------------ mkExprNode :: Expr -> [(Op, Expr)] -> Expr mkExprNode e1 [] = e1 mkExprNode e1 l@((op, _): _) = if null rest then GenExpr op (e1:(map snd l)) else GenExpr op $ (e1:(map snd $ init same)) ++ [mkExprNode (snd $ last same) rest] where (same, rest) = span ((==op) . fst) l -- Tim Walkenhorst, original expr. below: -- It seems mkExprNode is called only with operators of the same precedence, that should make it fixable -- FIXED, see above! --mkExprNode e1 l@((op, _): _) = GenExpr op (e1:(map snd l)) -- Less than ideal: 1+1-1 = 3 ??? --GenExpr op (e1:(map snd l)) exprRest :: XParser Op -> XParser Expr -> XParser (Op, Expr) exprRest parserOp parserExpr = do op <- parserOp e2 <- parserExpr return (op, e2) -- ------------------------------------------------------------ -- abbreviation of "//" descOrSelfStep :: XStep descOrSelfStep = (Step DescendantOrSelf (TypeTest XPNode) []) -- ------------------------------------------------------------ -- Location Paths (2) -- [1] LocationPath locPath :: XParser LocationPath locPath = absLocPath <|> relLocPath' -- [2] AbsoluteLocationPath absLocPath :: XParser LocationPath absLocPath = do -- [10] dslash s <- relLocPath return (LocPath Abs ([descOrSelfStep] ++ s)) <|> do slash s <- option [] relLocPath return (LocPath Abs s) "absLocPath" -- [3] RelativeLocationPath relLocPath' :: XParser LocationPath relLocPath' = do rel <- relLocPath return (LocPath Rel rel) relLocPath :: XParser [XStep] relLocPath = do s1 <- step s2 <- many (step') return ([s1] ++ (concat s2)) "relLocPath" -- Location Steps (2.1) -- -- [4] Step step' :: XParser [XStep] step' = do -- [11] dslash s <- step return [descOrSelfStep,s] <|> do slash s <- step return [s] "step'" step :: XParser XStep step = abbrStep <|> do as <- axisSpecifier' nt <- nodeTest as pr <- many predicate return (Step as nt pr) "step" -- [5] AxisSpecifier axisSpecifier' :: XParser AxisSpec axisSpecifier' = do -- [13] tokenParser (symbol "@") return Attribute <|> do as <- option Child ( try ( do -- child-axis is default-axis a <- axisSpecifier tokenParser (symbol "::") return a ) ) return as "axisSpecifier'" -- Axes (2.2) -- -- [6] AxisName axisSpecifier :: XParser AxisSpec axisSpecifier = choice [ symbolParser (a_ancestor_or_self, AncestorOrSelf) , symbolParser (a_ancestor, Ancestor) , symbolParser (a_attribute, Attribute) , symbolParser (a_child, Child) , symbolParser (a_descendant_or_self, DescendantOrSelf) , symbolParser (a_descendant, Descendant) , symbolParser (a_following_sibling, FollowingSibling) , symbolParser (a_following, Following) , symbolParser (a_namespace, Namespace) , symbolParser (a_parent, Parent) , symbolParser (a_preceding_sibling, PrecedingSibling) , symbolParser (a_preceding, Preceding) , symbolParser (a_self, Self) ] "axisSpecifier" -- Node Tests (2.3) -- -- [7] NodeTest nodeTest :: AxisSpec -> XParser NodeTest nodeTest as = do nt <- try nodeType' return (TypeTest nt) <|> do processInst <- pI return (PI processInst) <|> do nt <- nameTest as return (NameTest nt) "nodeTest" pI :: XParser String pI = do tokenParser (symbol n_processing_instruction) li <- between lpar rpar literal return li "Processing-Instruction" -- Predicates (2.4) -- -- [8] Predicate -- [9] PredicateExpr predicate :: XParser Expr predicate = do ex <- between lbra rbra expr return ex -- Abbreviated Syntax (2.5) -- -- [10] AbbreviatedAbsoluteLocationPath: q.v. [2] -- [11] AbbreviatedRelativeLocationPath: q.v. [4] -- [12] AbbreviatedStep abbrStep :: XParser XStep abbrStep = do tokenParser (symbol "..") return (Step Parent (TypeTest XPNode) []) <|> do tokenParser (symbol ".") return (Step Self (TypeTest XPNode) []) "abbrStep" -- [13] AbbreviatedAxisSpecifier: q.v. [5] -- ------------------------------------------------------------ -- Expressions (3) -- Basics (3.1) -- -- [14] Expr expr :: XParser Expr expr = orExpr -- [15] PrimaryExpr primaryExpr :: XParser Expr primaryExpr = do vr <- variableReference return (VarExpr vr) <|> do ex <- between lpar rpar expr return ex <|> do li <- literal return (LiteralExpr li) <|> do num <- number return (NumberExpr (Float $ read num)) <|> do fc <- functionCall return (fc) "primaryExpr" -- Function Calls (3.2) -- -- [16] FunctionCall -- [17] Argument functionCall :: XParser Expr functionCall = do fn <- functionName arg <- between lpar rpar ( sepBy expr (separator ',') ) return (FctExpr fn arg) "functionCall" -- Node-sets (3.3) -- -- [18] UnionExpr unionExpr :: XParser Expr unionExpr = do e1 <- pathExpr eRest <- many (exprRest unionOp pathExpr) return (mkExprNode e1 eRest) -- [19] PathExpr pathExpr :: XParser Expr pathExpr = do fe <- try filterExpr path <- do dslash LocPath t1 t2 <- relLocPath' return (PathExpr (Just fe) (Just (LocPath t1 ([descOrSelfStep] ++ t2)))) <|> do slash relPath <- relLocPath' return (PathExpr (Just fe) (Just relPath)) <|> return fe return path <|> do lp <- locPath return (PathExpr Nothing (Just lp)) "pathExpr" -- [20] FilterExpr filterExpr :: XParser Expr filterExpr = do prim <- primaryExpr predicates <- many predicate if length predicates > 0 then return (FilterExpr (prim : predicates)) else return prim "filterExpr" -- Booleans (3.4) -- -- [21] OrExpr orExpr :: XParser Expr orExpr = do e1 <- andExpr eRest <- many (exprRest orOp andExpr) return (mkExprNode e1 eRest) "orExpr" -- [22] AndExpr andExpr :: XParser Expr andExpr = do e1 <- equalityExpr eRest <- many (exprRest andOp equalityExpr) return (mkExprNode e1 eRest) "andExpr" -- [23] EqualityExpr equalityExpr :: XParser Expr equalityExpr = do e1 <- relationalExpr eRest <- many (exprRest eqOp relationalExpr) return (mkExprNode e1 eRest) "equalityExpr" -- [24] RelationalExpr relationalExpr :: XParser Expr relationalExpr = do e1 <- additiveExpr eRest <- many (exprRest relOp additiveExpr) return (mkExprNode e1 eRest) "relationalExpr" -- Numbers (3.5) -- -- [25] AdditiveExpr additiveExpr :: XParser Expr additiveExpr = do e1 <- multiplicativeExpr eRest <- many (exprRest addOp multiplicativeExpr) return (mkExprNode e1 eRest) "additiveExpr" -- [26] MultiplicativeExpr multiplicativeExpr :: XParser Expr multiplicativeExpr = do e1 <- unaryExpr eRest <- many (exprRest multiOp unaryExpr) return (mkExprNode e1 eRest) "multiplicativeExpr" -- [27] UnaryExpr unaryExpr :: XParser Expr unaryExpr = do tokenParser (symbol "-") u <- unaryExpr return (GenExpr Unary [u]) <|> do u <- unionExpr return u "unaryExpr" -- Lexical Structure (3.7) -- -- [29] Literal -- systemLiteral from XmlParser is used literal :: XParser String literal = systemLiteral -- [30] Number number :: XParser String number = do tokenParser (symbol ".") d <- many1 digit return ("0." ++ d) <|> do d <- many1 digit d1 <- option "" ( do tokenParser (symbol ".") d2 <- option "0" (many1 digit) return ("." ++ d2) ) return (d ++ d1) "number" -- [35] FunctionName -- no nodetype name is allowed as a function name -- Tim Walkenhorst: -- Change in String encoding for function name -- -- previoulsy: new: -- -- name name -- pref:name {http://uri-for-pref}name functionName :: XParser String functionName = try ( do (p, n) <- qName uris <- getState u <- return $ if null p then "" else '{' : show (lookupNs uris (newXName p)) ++ "}" let fn = (u ++ n) in if fn `elem` ["processing-instruction", "comment", "text", "node"] then fail ("function name: " ++ fn ++ "not allowed") else return fn ) "functionName" -- [36] VariableReference variableReference :: XParser (String, String) variableReference = do tokenParser (symbol "$") (p,n) <- qName uris <- getState return (show (lookupNs uris (newXName p)), n) "variableReference" -- [37] NameTest nameTest :: AxisSpec -> XParser QName nameTest as = do tokenParser (symbol "*") uris <- getState return (enhanceQN as uris $ mkPrefixLocalPart "" "*") <|> try ( do pre <- ncName _ <- symbol ":*" uris <- getState return (enhanceQN as uris $ mkPrefixLocalPart pre "*") ) <|> do (pre,local) <- qName uris <- getState return (enhanceQN as uris $ mkPrefixLocalPart pre local) "nameTest" -- [38] NodeType nodeType' :: XParser XPathNode nodeType' = do nt <- nodeType lpar rpar return nt "nodeType'" nodeType :: XParser XPathNode nodeType = choice [ symbolParser (n_comment, XPCommentNode) , symbolParser (n_text, XPTextNode) , symbolParser (n_processing_instruction, XPPINode) , symbolParser (n_node, XPNode) ] "nodeType" -- ------------------------------------------------------------