-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.XPath.XPathParser Copyright : Copyright (C) 2006-2010 Uwe Schmidt, Torben Kuseler License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable The XPath Parser -} -- ------------------------------------------------------------ module Text.XML.HXT.XPath.XPathParser ( parseNumber , parseXPath ) where import Text.ParserCombinators.Parsec import Text.XML.HXT.DOM.TypeDefs import Text.XML.HXT.XPath.XPathKeywords import Text.XML.HXT.XPath.XPathDataTypes import Text.XML.HXT.Parser.XmlCharParser ( XParser , XPState(..) , withNormNewline ) import Text.XML.HXT.Parser.XmlTokenParser ( separator , systemLiteral , skipS0 , ncName , qName ) -- ------------------------------------------------------------ lookupNs :: NsEnv -> XName -> Maybe XName lookupNs uris prefix | null uris = Just nullXName -- not namespace aware XPath | isNullXName prefix = maybe (Just nullXName) Just $ -- no default namespace given lookup prefix uris | otherwise = lookup prefix uris -- namespace aware enhanceAttrQName :: NsEnv -> QName -> Maybe QName enhanceAttrQName uris qn | isNullXName (namePrefix' qn) = Just qn | otherwise = enhanceQName uris qn enhanceQName :: NsEnv -> QName -> Maybe QName enhanceQName uris qn = do nsu <- lookupNs uris (namePrefix' qn) return $ setNamespaceUri' nsu qn enhanceQN :: AxisSpec -> NsEnv -> QName -> Maybe QName enhanceQN Attribute = enhanceAttrQName enhanceQN _ = enhanceQName type XPathParser a = XParser NsEnv a -- ------------------------------------------------------------ -- parse functions which are used in the XPathFct module -- | -- parsing a number, parseNumber is used in "XPathFct" -- by the number function -- -- - returns : the parsed number as 'XPNumber' float -- or 'XPVNumber' 'NaN' in case of error parseNumber :: String -> XPathValue parseNumber s = case (runParser parseNumber' (withNormNewline []) {- Map.empty -} "" s) of Left _ -> XPVNumber NaN Right x -> if (read x :: Float) == 0 then (XPVNumber Pos0) else XPVNumber (Float (read x)) parseNumber' :: XPathParser String parseNumber' = do skipS0 m <- option "" (string "-") n <- number skipS0 eof return (m ++ n) -- ------------------------------------------------------------ -- | -- the main entry point: -- parsing a XPath expression parseXPath :: XPathParser Expr parseXPath = do skipS0 xPathExpr <- expr skipS0 eof return xPathExpr -- some useful token and symbol parser lpar, rpar, lbra, rbra, slash, dslash :: XPathParser () lpar = tokenParser (symbol "(") rpar = tokenParser (symbol ")") lbra = tokenParser (symbol "[") rbra = tokenParser (symbol "]") slash = tokenParser (symbol "/") dslash = tokenParser (symbol "//") tokenParser :: XPathParser String -> XPathParser () tokenParser p = try ( do skipS0 _ <- p skipS0 ) symbolParser :: (String, a) -> XPathParser a symbolParser (s,a) = do tokenParser (symbol s) return a symbol :: String -> XPathParser String symbol s = try (string s) -- operation parser orOp, andOp, eqOp, relOp, addOp, multiOp, unionOp :: XPathParser Op orOp = symbolParser ("or", Or) andOp = symbolParser ("and", And) eqOp = symbolParser ("=", Eq) <|> symbolParser ("!=", NEq) relOp = choice [ symbolParser ("<=", LessEq) , symbolParser (">=", GreaterEq) , symbolParser ("<", Less) , symbolParser (">", Greater) ] addOp = symbolParser ("+", Plus) <|> symbolParser ("-", Minus) multiOp = choice [ symbolParser ("*", Mult) , symbolParser ("mod", Mod) , symbolParser ("div", Div) ] unionOp = symbolParser ("|", Union) -- ------------------------------------------------------------ mkExprNode :: Expr -> [(Op, Expr)] -> Expr mkExprNode e1 [] = e1 mkExprNode e1 l@((op, _): _) = if null rest then GenExpr op (e1:(map snd l)) else GenExpr op $ (e1:(map snd $ init same)) ++ [mkExprNode (snd $ last same) rest] where (same, rest) = span ((==op) . fst) l -- Tim Walkenhorst, original expr. below: -- It seems mkExprNode is called only with operators of the same precedence, that should make it fixable -- FIXED, see above! --mkExprNode e1 l@((op, _): _) = GenExpr op (e1:(map snd l)) -- Less than ideal: 1+1-1 = 3 ??? --GenExpr op (e1:(map snd l)) exprRest :: XPathParser Op -> XPathParser Expr -> XPathParser (Op, Expr) exprRest parserOp parserExpr = do op <- parserOp e2 <- parserExpr return (op, e2) -- ------------------------------------------------------------ -- abbreviation of "//" descOrSelfStep :: XStep descOrSelfStep = (Step DescendantOrSelf (TypeTest XPNode) []) -- ------------------------------------------------------------ -- Location Paths (2) -- [1] LocationPath locPath :: XPathParser LocationPath locPath = absLocPath <|> relLocPath' -- [2] AbsoluteLocationPath absLocPath :: XPathParser LocationPath absLocPath = do -- [10] dslash s <- relLocPath return (LocPath Abs ([descOrSelfStep] ++ s)) <|> do slash s <- option [] relLocPath return (LocPath Abs s) "absLocPath" -- [3] RelativeLocationPath relLocPath' :: XPathParser LocationPath relLocPath' = do rel <- relLocPath return (LocPath Rel rel) relLocPath :: XPathParser [XStep] relLocPath = do s1 <- step s2 <- many (step') return ([s1] ++ (concat s2)) "relLocPath" -- Location Steps (2.1) -- -- [4] Step step' :: XPathParser [XStep] step' = do -- [11] dslash s <- step return [descOrSelfStep,s] <|> do slash s <- step return [s] "step'" step :: XPathParser XStep step = abbrStep <|> do as <- axisSpecifier' nt <- nodeTest as pr <- many predicate return (Step as nt pr) "step" -- [5] AxisSpecifier axisSpecifier' :: XPathParser AxisSpec axisSpecifier' = do -- [13] tokenParser (symbol "@") return Attribute <|> do as <- option Child ( try ( do -- child-axis is default-axis a <- axisSpecifier tokenParser (symbol "::") return a ) ) return as "axisSpecifier'" -- Axes (2.2) -- -- [6] AxisName axisSpecifier :: XPathParser AxisSpec axisSpecifier = choice [ symbolParser (a_ancestor_or_self, AncestorOrSelf) , symbolParser (a_ancestor, Ancestor) , symbolParser (a_attribute, Attribute) , symbolParser (a_child, Child) , symbolParser (a_descendant_or_self, DescendantOrSelf) , symbolParser (a_descendant, Descendant) , symbolParser (a_following_sibling, FollowingSibling) , symbolParser (a_following, Following) , symbolParser (a_namespace, Namespace) , symbolParser (a_parent, Parent) , symbolParser (a_preceding_sibling, PrecedingSibling) , symbolParser (a_preceding, Preceding) , symbolParser (a_self, Self) ] "axisSpecifier" -- Node Tests (2.3) -- -- [7] NodeTest nodeTest :: AxisSpec -> XPathParser NodeTest nodeTest as = do nt <- try nodeType' return (TypeTest nt) <|> do processInst <- pI return (PI processInst) <|> do nt <- nameTest as return (NameTest nt) "nodeTest" pI :: XPathParser String pI = do tokenParser (symbol n_processing_instruction) li <- between lpar rpar literal return li "Processing-Instruction" -- Predicates (2.4) -- -- [8] Predicate -- [9] PredicateExpr predicate :: XPathParser Expr predicate = do ex <- between lbra rbra expr return ex -- Abbreviated Syntax (2.5) -- -- [10] AbbreviatedAbsoluteLocationPath: q.v. [2] -- [11] AbbreviatedRelativeLocationPath: q.v. [4] -- [12] AbbreviatedStep abbrStep :: XPathParser XStep abbrStep = do tokenParser (symbol "..") return (Step Parent (TypeTest XPNode) []) <|> do tokenParser (symbol ".") return (Step Self (TypeTest XPNode) []) "abbrStep" -- [13] AbbreviatedAxisSpecifier: q.v. [5] -- ------------------------------------------------------------ -- Expressions (3) -- Basics (3.1) -- -- [14] Expr expr :: XPathParser Expr expr = orExpr -- [15] PrimaryExpr primaryExpr :: XPathParser Expr primaryExpr = do vr <- variableReference return (VarExpr vr) <|> do ex <- between lpar rpar expr return ex <|> do li <- literal return (LiteralExpr li) <|> do num <- number return (NumberExpr (Float $ read num)) <|> do fc <- functionCall return (fc) "primaryExpr" -- Function Calls (3.2) -- -- [16] FunctionCall -- [17] Argument functionCall :: XPathParser Expr functionCall = do fn <- functionName arg <- between lpar rpar ( sepBy expr (separator ',') ) return (FctExpr fn arg) "functionCall" -- Node-sets (3.3) -- -- [18] UnionExpr unionExpr :: XPathParser Expr unionExpr = do e1 <- pathExpr eRest <- many (exprRest unionOp pathExpr) return (mkExprNode e1 eRest) -- [19] PathExpr pathExpr :: XPathParser Expr pathExpr = do fe <- try filterExpr path <- do dslash LocPath t1 t2 <- relLocPath' return (PathExpr (Just fe) (Just (LocPath t1 ([descOrSelfStep] ++ t2)))) <|> do slash relPath <- relLocPath' return (PathExpr (Just fe) (Just relPath)) <|> return fe return path <|> do lp <- locPath return (PathExpr Nothing (Just lp)) "pathExpr" -- [20] FilterExpr filterExpr :: XPathParser Expr filterExpr = do prim <- primaryExpr predicates <- many predicate if length predicates > 0 then return (FilterExpr (prim : predicates)) else return prim "filterExpr" -- Booleans (3.4) -- -- [21] OrExpr orExpr :: XPathParser Expr orExpr = do e1 <- andExpr eRest <- many (exprRest orOp andExpr) return (mkExprNode e1 eRest) "orExpr" -- [22] AndExpr andExpr :: XPathParser Expr andExpr = do e1 <- equalityExpr eRest <- many (exprRest andOp equalityExpr) return (mkExprNode e1 eRest) "andExpr" -- [23] EqualityExpr equalityExpr :: XPathParser Expr equalityExpr = do e1 <- relationalExpr eRest <- many (exprRest eqOp relationalExpr) return (mkExprNode e1 eRest) "equalityExpr" -- [24] RelationalExpr relationalExpr :: XPathParser Expr relationalExpr = do e1 <- additiveExpr eRest <- many (exprRest relOp additiveExpr) return (mkExprNode e1 eRest) "relationalExpr" -- Numbers (3.5) -- -- [25] AdditiveExpr additiveExpr :: XPathParser Expr additiveExpr = do e1 <- multiplicativeExpr eRest <- many (exprRest addOp multiplicativeExpr) return (mkExprNode e1 eRest) "additiveExpr" -- [26] MultiplicativeExpr multiplicativeExpr :: XPathParser Expr multiplicativeExpr = do e1 <- unaryExpr eRest <- many (exprRest multiOp unaryExpr) return (mkExprNode e1 eRest) "multiplicativeExpr" -- [27] UnaryExpr unaryExpr :: XPathParser Expr unaryExpr = do tokenParser (symbol "-") u <- unaryExpr return (GenExpr Unary [u]) <|> do u <- unionExpr return u "unaryExpr" -- Lexical Structure (3.7) -- -- [29] Literal -- systemLiteral from XmlParser is used literal :: XPathParser String literal = systemLiteral -- [30] Number number :: XPathParser String number = do tokenParser (symbol ".") d <- many1 digit return ("0." ++ d) <|> do d <- many1 digit d1 <- option "" ( do tokenParser (symbol ".") d2 <- option "0" (many1 digit) return ("." ++ d2) ) return (d ++ d1) "number" -- [35] FunctionName -- no nodetype name is allowed as a function name -- Tim Walkenhorst: -- Change in String encoding for function name -- -- previoulsy: new: -- -- name name -- pref:name {http://uri-for-pref}name functionName :: XPathParser String functionName = do (p, n) <- try qName fn <- enhanceName Attribute $ mkPrefixLocalPart p n if null p then if n `elem` ["processing-instruction", "comment", "text", "node"] then fail $ "function name: " ++ n ++ "not allowed" else return n else return $ "{" ++ namespaceUri fn ++ "}" ++ n "functionName" -- [36] VariableReference variableReference :: XPathParser (String, String) variableReference = do tokenParser (symbol "$") (p, n) <- qName vn <- enhanceName Attribute $ mkPrefixLocalPart p n return (namespaceUri vn, n) "variableReference" -- [37] NameTest nameTest :: AxisSpec -> XPathParser QName nameTest axs = do tokenParser (symbol "*") enhanceName axs $ mkPrefixLocalPart "" "*" <|> do pre <- try ( do pre' <- ncName _ <- symbol ":*" return pre' ) enhanceName axs $ mkPrefixLocalPart pre "*" <|> do (pre,local) <- qName enhanceName axs $ mkPrefixLocalPart pre local "nameTest" enhanceName :: AxisSpec -> QName -> XPathParser QName enhanceName axs qn = do uris <- getState >>= return . xps_userState case enhanceQN axs uris qn of Nothing -> fail $ "no namespace uri given for prefix " ++ show (namePrefix qn) Just qn' -> return qn' "qualified name with defined namespace uri" -- [38] NodeType nodeType' :: XPathParser XPathNode nodeType' = do nt <- nodeType lpar rpar return nt "nodeType'" nodeType :: XPathParser XPathNode nodeType = choice [ symbolParser (n_comment, XPCommentNode) , symbolParser (n_text, XPTextNode) , symbolParser (n_processing_instruction, XPPINode) , symbolParser (n_node, XPNode) ] "nodeType" -- ------------------------------------------------------------