----------------------------------------------------------------------------- -- | -- Module : Database.TxtSushi.SQLParser -- Copyright : (c) Keith Sheppard 2009 -- License : GPL3 or greater -- Maintainer : keithshep@gmail.com -- Stability : experimental -- Portability : portable -- -- Module for parsing SQL -- ----------------------------------------------------------------------------- module Database.TxtSushi.SQLParser ( parseSelectStatement, allMaybeTableNames, SelectStatement(..), TableExpression(..), ColumnIdentifier(..), ColumnSelection(..), Expression(..), SQLFunction(..), OrderByItem(..)) where import Data.Char import Data.List import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec.Expr import Database.TxtSushi.ParseUtil import Database.TxtSushi.SQLExpression import Database.TxtSushi.SQLFunctionDefinitions -- | Parses a SQL select statement parseSelectStatement :: GenParser Char st SelectStatement parseSelectStatement = (try $ spaces >> parseToken "SELECT") >> parseSelectBody -- | Parses all of the stuff that comes after "SELECT " parseSelectBody :: GenParser Char st SelectStatement parseSelectBody = do columnVals <- parseColumnSelections -- TODO need a better error message for missing "ON" etc. in -- the from part, can do this by grabing "FROM" first maybeFrom <- maybeParseFromPart maybeWhere <- maybeParseWherePart groupByExprs <- maybeParseGroupByPart orderBy <- parseOrderByPart return SelectStatement { columnSelections = columnVals, maybeFromTable = maybeFrom, maybeWhereFilter = maybeWhere, orderByItems = orderBy, maybeGroupByHaving = groupByExprs} where maybeParseFromPart = ifParseThen (parseToken "FROM") parseTableExpression maybeParseWherePart = ifParseThen (parseToken "WHERE") parseExpression -- | Parses the "ORDER BY ..." part of a select statement returning the list -- of OrderByItem's that were parsed (this list will be empty if there is no -- "ORDER BY" parsed parseOrderByPart :: GenParser Char st [OrderByItem] parseOrderByPart = ifParseThenElse -- if we see an "ORDER BY" (parseToken "ORDER") -- then parse the order expressions (parseToken "BY" >> sepByAtLeast 1 parseOrderByItem commaSeparator) -- else there is nothing to sort by (return []) where parseOrderByItem :: GenParser Char st OrderByItem parseOrderByItem = do orderExpr <- parseExpression isAscending <- ifParseThenElse -- if we parse "DESC" (try parseDescending) -- then return false, it isn't ascending (return False) -- else try to consume "ASC" but even if we don't it's still -- ascending so return true unconditionally ((parseAscending <|> return []) >> return True) return $ OrderByItem orderExpr isAscending parseAscending = parseToken "ASCENDING" <|> parseToken "ASC" parseDescending = parseToken "DESCENDING" <|> parseToken "DESC" maybeParseGroupByPart :: GenParser Char st (Maybe ([Expression], Maybe Expression)) maybeParseGroupByPart = ifParseThen -- if we see a "GROUP BY" (parseToken "GROUP") -- then parse the expressions (parseToken "BY" >> parseGroupBy) where parseGroupBy = do groupExprs <- atLeastOneExpr maybeHavingExpr <- ifParseThen (parseToken "HAVING") parseExpression return (groupExprs, maybeHavingExpr) atLeastOneExpr :: GenParser Char st [Expression] atLeastOneExpr = sepByAtLeast 1 parseExpression commaSeparator -------------------------------------------------------------------------------- -- Functions for parsing the column names specified after "SELECT" -------------------------------------------------------------------------------- parseColumnSelections :: GenParser Char st [ColumnSelection] parseColumnSelections = sepBy1 parseAnyColType (try commaSeparator) where parseAnyColType = parseRangeColumns <|> parseAllCols <|> (try parseAllColsFromTbl) <|> (try parseColExpression) parseRangeColumns :: GenParser Char st ColumnSelection parseRangeColumns = parseRangeInner where parseRangeInner = do parseToken "FOR" bindingId <- parseColumnId parseToken "IN" colRange <- parseColRange parseToken "YIELD" expr <- parseExpression return $ ExpressionColumnRange bindingId colRange expr where parseColRange = brace $ do maybeStartCol <- maybeParse parseColumnId parseToken ".." maybeEndCol <- maybeParse parseColumnId return $ ColumnRange maybeStartCol maybeEndCol parseAllCols :: GenParser Char st ColumnSelection parseAllCols = parseToken "*" >> return AllColumns parseAllColsFromTbl :: GenParser Char st ColumnSelection parseAllColsFromTbl = do tableVal <- parseIdentifier string "." >> spaces >> parseToken "*" return $ AllColumnsFrom tableVal parseColExpression :: GenParser Char st ColumnSelection parseColExpression = do expr <- parseExpression maybeAlias <- maybeParseAlias return $ ExpressionColumn expr maybeAlias parseColumnId :: GenParser Char st ColumnIdentifier parseColumnId = do firstId <- parseIdentifier maybeFullyQual <- maybeParse $ parseToken "." case maybeFullyQual of -- No '.' means it's a partially qualified column Nothing -> return $ ColumnIdentifier Nothing firstId Just _ -> do secondId <- parseIdentifier return $ ColumnIdentifier (Just firstId) secondId -------------------------------------------------------------------------------- -- Functions for parsing the table part (after "FROM") -------------------------------------------------------------------------------- parseTableExpression :: GenParser Char a TableExpression parseTableExpression = parenthesize parseTableExpression <|> parseSelectExpression <|> parseTableIdentifierOrJoin "Table Expression" parseSelectExpression :: GenParser Char a TableExpression parseSelectExpression = do selectStmt <- parseSelectStatement maybeAlias <- maybeParseAlias return $ SelectExpression selectStmt maybeAlias parseTableIdentifierOrJoin :: GenParser Char a TableExpression parseTableIdentifierOrJoin = do nextTblId <- parseTableIdentifier let ifCrossOrInnerJoinParse = ifParseThenElse -- if crossJoinSep -- TODO commit to join -- then (parseCrossJoinRemainder nextTblId) -- else ifInnerJoinParse ifInnerJoinParse = ifParseThenElse -- if innerJoinSep -- TODO commit to join -- then (parseInnerJoinRemainder nextTblId) -- else (return nextTblId) ifCrossOrInnerJoinParse where crossJoinSep = (commaSeparator >> return "") <|> (parseToken "CROSS" >> parseToken "JOIN") innerJoinSep = ((maybeParse $ parseToken "INNER") >> parseToken "JOIN") parseInnerJoinRemainder :: TableExpression -> GenParser Char a TableExpression parseInnerJoinRemainder leftTblExpr = do rightTblExpr <- parseTableExpression parseToken "ON" onPart <- parseExpression maybeAlias <- maybeParseAlias return InnerJoin { leftJoinTable=leftTblExpr, rightJoinTable=rightTblExpr, onCondition=onPart, maybeTableAlias=maybeAlias} parseCrossJoinRemainder :: TableExpression -> GenParser Char a TableExpression parseCrossJoinRemainder leftTblExpr = do rightTblExpr <- parseTableExpression maybeAlias <- maybeParseAlias return CrossJoin { leftJoinTable=leftTblExpr, rightJoinTable=rightTblExpr, maybeTableAlias=maybeAlias} parseTableIdentifier :: GenParser Char st TableExpression parseTableIdentifier = do theId <- parseIdentifier maybeAlias <- maybeParseAlias return $ TableIdentifier theId maybeAlias maybeParseAlias :: GenParser Char st (Maybe [Char]) maybeParseAlias = ifParseThen (parseToken "AS") parseIdentifier -------------------------------------------------------------------------------- -- Expression parsing: These can be after "SELECT", "WHERE" or "HAVING" -------------------------------------------------------------------------------- parseExpression :: GenParser Char st Expression parseExpression = let opTable = map (map parseInfixOp) infixFunctions in buildExpressionParser opTable parseAnyNonInfixExpression "expression" parseAnyNonInfixExpression :: GenParser Char st Expression parseAnyNonInfixExpression = parseParenthesizedExpression <|> parseBoolConstant <|> parseStringConstant <|> try parseRealConstant <|> try parseIntConstant <|> parseAnyNormalFunction <|> parseNegateFunction <|> parseSubstringFunction <|> parseNotFunction <|> parseCountStar <|> (parseColumnId >>= \colId -> return $ ColumnExpression colId (columnToString colId)) parseParenthesizedExpression :: GenParser Char st Expression parseParenthesizedExpression = parenthesize parseExpression >>= \e -> return e {stringRepresentation = "(" ++ stringRepresentation e ++ ")"} parseBoolConstant :: GenParser Char st Expression parseBoolConstant = (parseToken "TRUE" >>= return . BoolConstantExpression True) <|> (parseToken "FALSE" >>= return . BoolConstantExpression False) parseStringConstant :: GenParser Char st Expression parseStringConstant = (quotedText True '"' <|> quotedText True '\'') >>= \str -> return $ StringConstantExpression str ("'" ++ str ++ "'") -- TODO this quoting is not robust! parseIntConstant :: GenParser Char st Expression parseIntConstant = parseInt >>= \int -> return $ IntConstantExpression int (show int) parseRealConstant :: GenParser Char st Expression parseRealConstant = parseReal >>= \real -> return $ RealConstantExpression real (show real) parseAnyNormalFunction :: GenParser Char st Expression parseAnyNormalFunction = let allParsers = map parseNormalFunction normalSyntaxFunctions in choice allParsers parseNormalFunction :: SQLFunction -> GenParser Char st Expression parseNormalFunction sqlFunc = try (parseToken $ functionName sqlFunc) >>= parseNormalFunctionArgs sqlFunc parseNormalFunctionArgs :: SQLFunction -> String -> GenParser Char st Expression parseNormalFunctionArgs sqlFunc sqlFuncStr = do args <- parenthesize $ sepBy parseExpression commaSeparator return $ FunctionExpression sqlFunc args (sqlFuncStr ++ toArgListString args) where toArgListString argExprs = '(' : intercalate ", " (map expressionToString argExprs) ++ ")" -- | This function parses the operator part of the infix function and returns -- a function that excepts a left expression and right expression to form -- an Expression from the FunctionExpression constructor parseInfixOp :: SQLFunction -> Operator Char st Expression parseInfixOp infixFunc = -- use the magic infix type, always assuming left associativity Infix opParser AssocLeft where opParser = parseToken (functionName infixFunc) >> return buildExpr buildExpr leftSubExpr rightSubExpr = FunctionExpression { sqlFunction = infixFunc, functionArguments = [leftSubExpr, rightSubExpr], stringRepresentation = expressionToString leftSubExpr ++ " " ++ functionName infixFunc ++ " " ++ expressionToString rightSubExpr} parseSubstringFunction :: GenParser Char st Expression parseSubstringFunction = do funcStr <- parseToken $ functionName substringFromFunction eatSpacesAfter $ char '(' strExpr <- parseExpression fromStr <- parseToken "FROM" startExpr <- parseExpression maybeForStrAndLength <- preservingIfParseThen (parseToken "FOR") parseExpression eatSpacesAfter $ char ')' let funcStrStart = funcStr ++ "(" ++ expressionToString strExpr ++ " " ++ fromStr ++ expressionToString startExpr return $ case maybeForStrAndLength of Nothing -> FunctionExpression substringFromFunction [strExpr, startExpr] (funcStrStart ++ ")") Just (forStr, lenExpr) -> FunctionExpression substringFromToFunction [strExpr, startExpr, lenExpr] (funcStrStart ++ " " ++ forStr ++ " " ++ expressionToString lenExpr ++ ")") parseNegateFunction :: GenParser Char st Expression parseNegateFunction = do funcStr <- parseToken $ functionName negateFunction expr <- parseAnyNonInfixExpression let funcWithExprsStr = funcStr ++ expressionToString expr return $ FunctionExpression negateFunction [expr] funcWithExprsStr parseNotFunction :: GenParser Char st Expression parseNotFunction = do funcStr <-parseToken $ functionName notFunction expr <- parseAnyNonInfixExpression let funcWithExprsStr = funcStr ++ expressionToString expr return $ FunctionExpression notFunction [expr] funcWithExprsStr parseCountStar :: GenParser Char st Expression parseCountStar = do funcStr <- try (parseToken $ functionName countFunction) parenthesize (parseToken "*") return $ FunctionExpression countFunction [IntConstantExpression 0 "*"] (funcStr ++ "(*)") -------------------------------------------------------------------------------- -- Parse utility functions -------------------------------------------------------------------------------- parseOpChar :: CharParser st Char parseOpChar = oneOf opChars opChars :: [Char] opChars = "~!@#$%^&*-+=|\\<>/?." -- | find out if the given string ends with an op char endsWithOp :: String -> Bool endsWithOp strToTest = last strToTest `elem` opChars -- | A token parser that allows either upper or lower case. all trailing -- whitespace is consumed parseToken :: String -> GenParser Char st String parseToken tokStr = eatSpacesAfter (try $ if endsWithOp tokStr then parseOpTok else parseAlphaNumTok) where parseOpTok = withoutTrailing parseOpChar (string tokStr) parseAlphaNumTok = withoutTrailing (alphaNum <|> char '_') (upperOrLower tokStr) -- | parses an identifier. you can use a tick '`' as a quote for -- an identifier with white-space parseIdentifier :: GenParser Char st String parseIdentifier = do let parseId = do let idChar = alphaNum <|> char '_' notFollowedBy digit quotedText False '`' <|> many1 idChar ((eatSpacesAfter parseId) `genExcept` parseReservedWord) "identifier" commaSeparator :: GenParser Char st Char commaSeparator = eatSpacesAfter $ char ',' -- | Wraps braces parsers around the given inner parser brace :: GenParser Char st a -> GenParser Char st a brace innerParser = do eatSpacesAfter $ char '[' innerParseResults <- innerParser eatSpacesAfter $ char ']' return innerParseResults -- | Wraps parentheses parsers around the given inner parser parenthesize :: GenParser Char st a -> GenParser Char st a parenthesize innerParser = do eatSpacesAfter $ char '(' innerParseResults <- innerParser eatSpacesAfter $ char ')' return innerParseResults parseReservedWord :: GenParser Char st String parseReservedWord = let reservedWordParsers = map parseToken reservedWords in choice reservedWordParsers -- TODO are function names reserved... i don't think so reservedWords :: [String] reservedWords = map functionName normalSyntaxFunctions ++ map functionName (concat infixFunctions) ++ map functionName specialFunctions ++ ["BY","CROSS", "FROM", "FOR", "GROUP", "HAVING", "IN", "INNER", "JOIN", "ON", "ORDER", "SELECT", "WHERE", "TRUE", "FALSE", "YIELD"] -- | tries parsing both the upper and lower case versions of the given string upperOrLower :: String -> GenParser Char st String upperOrLower stringToParse = string (map toUpper stringToParse) <|> string (map toLower stringToParse) stringToParse