module Database.TxtSushi.SQLParser (
allMaybeTableNames,
parseSelectStatement,
SelectStatement(..),
TableExpression(..),
ColumnIdentifier(..),
prettyFormatColumn,
ColumnSelection(..),
expressionIdentifier,
Expression(..),
OrderByItem(..),
prettyFormatWithArgs,
SQLFunction(..),
withTrailing,
withoutTrailing,
isAggregate,
selectStatementContainsAggregates,
avgFunction,
countFunction,
firstFunction,
lastFunction,
maxFunction,
minFunction,
sumFunction,
concatenateFunction,
upperFunction,
lowerFunction,
trimFunction,
substringFromFunction,
substringFromToFunction,
multiplyFunction,
divideFunction,
plusFunction,
minusFunction,
negateFunction,
isFunction,
isNotFunction,
lessThanFunction,
lessThanOrEqualToFunction,
greaterThanFunction,
greaterThanOrEqualToFunction,
andFunction,
orFunction,
notFunction,
regexMatchFunction,
maybeReadInt,
maybeReadReal) where
import Data.Char
import Data.List
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Database.TxtSushi.Util.ListUtil
data SelectStatement = SelectStatement {
columnSelections :: [ColumnSelection],
maybeFromTable :: Maybe TableExpression,
maybeWhereFilter :: Maybe Expression,
maybeGroupByHaving :: Maybe ([Expression], Maybe Expression),
orderByItems :: [OrderByItem]}
deriving (Show, Ord, Eq)
data TableExpression =
TableIdentifier {
tableName :: String,
maybeTableAlias :: Maybe String} |
InnerJoin {
leftJoinTable :: TableExpression,
rightJoinTable :: TableExpression,
onCondition :: Expression,
maybeTableAlias :: Maybe String} |
CrossJoin {
leftJoinTable :: TableExpression,
rightJoinTable :: TableExpression,
maybeTableAlias :: Maybe String}
deriving (Show, Ord, Eq)
allMaybeTableNames :: (Maybe TableExpression) -> [String]
allMaybeTableNames Nothing = []
allMaybeTableNames (Just tblExp) = allTableNames tblExp
allTableNames (TableIdentifier tblName _) = [tblName]
allTableNames (InnerJoin lftTbl rtTbl _ _) =
(allTableNames lftTbl) ++ (allTableNames rtTbl)
allTableNames (CrossJoin lftTbl rtTbl _) =
(allTableNames lftTbl) ++ (allTableNames rtTbl)
data ColumnSelection =
AllColumns |
AllColumnsFrom {sourceTableName :: String} |
ExpressionColumn {expression :: Expression}
deriving (Show, Ord, Eq)
data ColumnIdentifier =
ColumnIdentifier {
maybeTableName :: Maybe String,
columnId :: String}
deriving (Show, Ord, Eq)
prettyFormatColumn :: ColumnIdentifier -> String
prettyFormatColumn (ColumnIdentifier (Just tblName) colId) = tblName ++ "." ++ colId
prettyFormatColumn (ColumnIdentifier (Nothing) colId) = colId
data Expression =
FunctionExpression {
sqlFunction :: SQLFunction,
functionArguments :: [Expression]} |
ColumnExpression {
column :: ColumnIdentifier} |
StringConstantExpression {
stringConstant :: String} |
IntegerConstantExpression {
intConstant :: Int} |
RealConstantExpression {
realConstant :: Double}
deriving (Show, Ord, Eq)
isAggregate :: SQLFunction -> Bool
isAggregate sqlFun = minArgCount sqlFun == 1 && not (argCountIsFixed sqlFun)
containsAggregates :: Expression -> Bool
containsAggregates (FunctionExpression sqlFun args) =
isAggregate sqlFun || any containsAggregates args
containsAggregates _ = False
selectionContainsAggregates :: ColumnSelection -> Bool
selectionContainsAggregates (ExpressionColumn expr) =
containsAggregates expr
selectionContainsAggregates _ = False
orderByItemContainsAggregates :: OrderByItem -> Bool
orderByItemContainsAggregates (OrderByItem expr _) =
containsAggregates expr
selectStatementContainsAggregates :: SelectStatement -> Bool
selectStatementContainsAggregates select =
any selectionContainsAggregates (columnSelections select) ||
any orderByItemContainsAggregates (orderByItems select)
expressionIdentifier :: Expression -> ColumnIdentifier
expressionIdentifier (FunctionExpression func args) =
ColumnIdentifier Nothing ((prettyFormatWithArgs func) args)
expressionIdentifier (ColumnExpression col) = col
expressionIdentifier (StringConstantExpression str) =
ColumnIdentifier Nothing ("\"" ++ str ++ "\"")
expressionIdentifier (IntegerConstantExpression int) =
ColumnIdentifier Nothing (show int)
expressionIdentifier (RealConstantExpression real) =
ColumnIdentifier Nothing (show real)
needsParens :: Expression -> Bool
needsParens (FunctionExpression _ _) = True
needsParens _ = False
toArgString :: Expression -> String
toArgString expr =
let exprFmt = prettyFormatColumn $ expressionIdentifier expr
in if needsParens expr then "(" ++ exprFmt ++ ")" else exprFmt
prettyFormatWithArgs :: SQLFunction -> [Expression] -> String
prettyFormatWithArgs sqlFunc funcArgs
| sqlFunc `elem` normalSyntaxFunctions = prettyFormatNormalFunctionExpression sqlFunc funcArgs
| or (map (sqlFunc `elem`) infixFunctions) = prettyFormatInfixFunctionExpression sqlFunc funcArgs
| sqlFunc == negateFunction = "-" ++ toArgString (head funcArgs)
| sqlFunc == countFunction = functionName countFunction ++ "(*)"
| sqlFunc == substringFromToFunction ||
sqlFunc == substringFromFunction ||
sqlFunc == notFunction =
prettyFormatNormalFunctionExpression sqlFunc funcArgs
prettyFormatInfixFunctionExpression :: SQLFunction -> [Expression] -> String
prettyFormatInfixFunctionExpression sqlFunc funcArgs =
let
arg1 = head funcArgs
arg2 = funcArgs !! 1
in
toArgString arg1 ++ functionName sqlFunc ++ toArgString arg2
prettyFormatNormalFunctionExpression :: SQLFunction -> [Expression] -> String
prettyFormatNormalFunctionExpression sqlFunc funcArgs =
let argString = intercalate ", " (map toArgString funcArgs)
in functionName sqlFunc ++ "(" ++ argString ++ ")"
data SQLFunction = SQLFunction {
functionName :: String,
minArgCount :: Int,
argCountIsFixed :: Bool}
deriving (Show, Ord, Eq)
data OrderByItem = OrderByItem {
orderExpression :: Expression,
orderAscending :: Bool}
deriving (Show, Ord, Eq)
parseSelectStatement :: GenParser Char st SelectStatement
parseSelectStatement = (try $ spaces >> parseToken "SELECT") >> parseSelectBody
parseSelectBody :: GenParser Char st SelectStatement
parseSelectBody = do
columnVals <- parseColumnSelections
maybeFrom <- maybeParseFromPart
maybeWhere <- maybeParseWherePart
groupByExprs <- maybeParseGroupByPart
orderBy <- parseOrderByPart
return SelectStatement {
columnSelections = columnVals,
maybeFromTable = maybeFrom,
maybeWhereFilter = maybeWhere,
orderByItems = orderBy,
maybeGroupByHaving = groupByExprs}
where
maybeParseFromPart =
ifParseThen (parseToken "FROM") parseTableExpression
maybeParseWherePart =
ifParseThen (parseToken "WHERE") parseExpression
parseOrderByPart :: GenParser Char st [OrderByItem]
parseOrderByPart =
ifParseThenElse
(parseToken "ORDER")
(parseToken "BY" >> sepByAtLeast 1 parseOrderByItem commaSeparator)
(return [])
where
parseOrderByItem :: GenParser Char st OrderByItem
parseOrderByItem = do
orderExpr <- parseExpression
isAscending <- ifParseThenElse
(try parseDescending)
(return False)
((parseAscending <|> return []) >> return True)
return $ OrderByItem orderExpr isAscending
parseAscending = parseToken "ASCENDING" <|> parseToken "ASC"
parseDescending = parseToken "DESCENDING" <|> parseToken "DESC"
maybeParseGroupByPart =
ifParseThen
(parseToken "GROUP")
(parseToken "BY" >> parseGroupBy)
where
parseGroupBy = do
groupExprs <- atLeastOneExpr
maybeHavingExpr <- ifParseThen (parseToken "HAVING") parseExpression
return (groupExprs, maybeHavingExpr)
atLeastOneExpr = sepByAtLeast 1 parseExpression commaSeparator
parseColumnSelections =
sepBy1 parseAnyColType (try commaSeparator)
where parseAnyColType = parseAllCols <|>
(try parseAllColsFromTbl) <|>
(try parseColExpression)
parseAllCols = parseToken "*" >> return AllColumns
parseAllColsFromTbl = do
tableVal <- parseIdentifier
string "." >> spaces >> parseToken "*"
return $ AllColumnsFrom tableVal
parseColExpression = parseExpression >>= \expr -> return $ ExpressionColumn expr
parseColumnId = do
firstId <- parseIdentifier
maybeFullyQual <- maybeParse $ (char '.' >> spaces)
case maybeFullyQual of
Nothing -> return $ ColumnIdentifier Nothing firstId
Just _ -> do
secondId <- parseIdentifier
return $ ColumnIdentifier (Just firstId) secondId
parseTableExpression = do
nextTblChunk <- parseNextTblExpChunk
let ifInnerJoinParse = ifParseThenElse
((maybeParse $ parseToken "INNER") >> parseToken "JOIN")
(parseInnerJoinRemainder nextTblChunk)
(return nextTblChunk)
ifCrossOrInnerJoinParse = ifParseThenElse
(parseToken "CROSS" >> parseToken "JOIN")
(parseCrossJoinRemainder nextTblChunk)
ifInnerJoinParse
ifCrossOrInnerJoinParse
parseNextTblExpChunk =
parenthesize parseTableExpression <|> parseTableIdentifier
parseInnerJoinRemainder leftTblExpr = do
rightTblExpr <- parseTableExpression
parseToken "ON"
onPart <- parseExpression
maybeAlias <- maybeParse parseTableAlias
return InnerJoin {
leftJoinTable=leftTblExpr,
rightJoinTable=rightTblExpr,
onCondition=onPart,
maybeTableAlias=maybeAlias}
parseCrossJoinRemainder leftTblExpr = do
rightTblExpr <- parseTableExpression
maybeAlias <- maybeParse parseTableAlias
return CrossJoin {
leftJoinTable=leftTblExpr,
rightJoinTable=rightTblExpr,
maybeTableAlias=maybeAlias}
parseTableIdentifier = do
theId <- parseIdentifier
maybeAlias <- maybeParse parseTableAlias
return $ TableIdentifier theId maybeAlias
parseTableAlias = parseToken "AS" >> parseIdentifier
parseExpression :: GenParser Char st Expression
parseExpression =
let opTable = map (map parseInfixOp) infixFunctions
in buildExpressionParser opTable parseAnyNonInfixExpression <?> "expression"
parseAnyNonInfixExpression :: GenParser Char st Expression
parseAnyNonInfixExpression =
parenthesize parseExpression <|>
parseStringConstant <|>
try parseRealConstant <|>
try parseIntConstant <|>
parseAnyNormalFunction <|>
parseNegateFunction <|>
parseSubstringFunction <|>
parseNotFunction <|>
parseCountStar <|>
(parseColumnId >>= (\colId -> return $ ColumnExpression colId))
parseStringConstant :: GenParser Char st Expression
parseStringConstant =
(quotedText True '"' <|> quotedText True '\'') >>=
(\txt -> return $ StringConstantExpression txt)
parseIntConstant :: GenParser Char st Expression
parseIntConstant =
parseInt >>= (\int -> return $ IntegerConstantExpression int)
parseInt :: GenParser Char st Int
parseInt = eatSpacesAfter . try . (withoutTrailing alphaNum) $ do
digitTxt <- anyParseTxt
return $ read digitTxt
where
anyParseTxt = signedParseTxt <|> unsignedParseTxt <?> "integer"
unsignedParseTxt = many1 digit
signedParseTxt = do
char '-'
unsignedDigitTxt <- unsignedParseTxt
return ('-':unsignedDigitTxt)
maybeReadInt :: String -> Maybe Int
maybeReadInt intStr =
case parse (withTrailing (spaces >> eof) (spaces >> parseInt)) "" intStr of
Left _ -> Nothing
Right int -> Just int
maybeReadReal :: String -> Maybe Double
maybeReadReal realStr =
case parse (withTrailing (spaces >> eof) (spaces >> parseReal)) "" realStr of
Left _ -> maybeReadInt realStr >>= (\int -> Just $ fromIntegral int)
Right real -> Just real
parseRealConstant :: GenParser Char st Expression
parseRealConstant =
parseReal >>= (\real -> return $ RealConstantExpression real)
parseReal :: GenParser Char st Double
parseReal = eatSpacesAfter . try . (withoutTrailing alphaNum) $ do
realTxt <- anyParseTxt
return $ read realTxt
where
anyParseTxt = signedParseTxt <|> unsignedParseTxt <?> "real"
unsignedParseTxt = do
intTxt <- many1 digit
char '.'
fracTxt <- many1 digit
return $ intTxt ++ "." ++ fracTxt
signedParseTxt = do
char '-'
unsignedDigitTxt <- unsignedParseTxt
return ('-':unsignedDigitTxt)
parseAnyNormalFunction :: GenParser Char st Expression
parseAnyNormalFunction =
let allParsers = map parseNormalFunction normalSyntaxFunctions
in choice allParsers
parseNormalFunction sqlFunc =
try (parseToken $ functionName sqlFunc) >> parseNormalFunctionArgs sqlFunc
parseNormalFunctionArgs sqlFunc = do
args <- parenthesize $ argSepBy (minArgCount sqlFunc) parseExpression commaSeparator
return $ FunctionExpression sqlFunc args
where argSepBy = if argCountIsFixed sqlFunc then sepByExactly else sepByAtLeast
normalSyntaxFunctions =
[upperFunction, lowerFunction, trimFunction,
avgFunction, firstFunction, lastFunction, maxFunction,
minFunction, sumFunction]
upperFunction = SQLFunction {
functionName = "UPPER",
minArgCount = 1,
argCountIsFixed = True}
lowerFunction = SQLFunction {
functionName = "LOWER",
minArgCount = 1,
argCountIsFixed = True}
trimFunction = SQLFunction {
functionName = "TRIM",
minArgCount = 1,
argCountIsFixed = True}
avgFunction = SQLFunction {
functionName = "AVG",
minArgCount = 1,
argCountIsFixed = False}
countFunction = SQLFunction {
functionName = "COUNT",
minArgCount = 1,
argCountIsFixed = False}
firstFunction = SQLFunction {
functionName = "FIRST",
minArgCount = 1,
argCountIsFixed = False}
lastFunction = SQLFunction {
functionName = "LAST",
minArgCount = 1,
argCountIsFixed = False}
maxFunction = SQLFunction {
functionName = "MAX",
minArgCount = 1,
argCountIsFixed = False}
minFunction = SQLFunction {
functionName = "MIN",
minArgCount = 1,
argCountIsFixed = False}
sumFunction = SQLFunction {
functionName = "SUM",
minArgCount = 1,
argCountIsFixed = False}
infixFunctions =
[[multiplyFunction, divideFunction],
[plusFunction, minusFunction],
[concatenateFunction],
[isFunction, isNotFunction, lessThanFunction, lessThanOrEqualToFunction,
greaterThanFunction, greaterThanOrEqualToFunction, regexMatchFunction],
[andFunction],
[orFunction]]
parseInfixOp infixFunc =
Infix opParser AssocLeft
where
opParser = parseToken (functionName infixFunc) >> return buildExpr
buildExpr leftSubExpr rightSubExpr = FunctionExpression {
sqlFunction = infixFunc,
functionArguments = [leftSubExpr, rightSubExpr]}
multiplyFunction = SQLFunction {
functionName = "*",
minArgCount = 2,
argCountIsFixed = True}
divideFunction = SQLFunction {
functionName = "/",
minArgCount = 2,
argCountIsFixed = True}
plusFunction = SQLFunction {
functionName = "+",
minArgCount = 2,
argCountIsFixed = True}
minusFunction = SQLFunction {
functionName = "-",
minArgCount = 2,
argCountIsFixed = True}
isFunction = SQLFunction {
functionName = "=",
minArgCount = 2,
argCountIsFixed = True}
isNotFunction = SQLFunction {
functionName = "<>",
minArgCount = 2,
argCountIsFixed = True}
lessThanFunction = SQLFunction {
functionName = "<",
minArgCount = 2,
argCountIsFixed = True}
lessThanOrEqualToFunction = SQLFunction {
functionName = "<=",
minArgCount = 2,
argCountIsFixed = True}
greaterThanFunction = SQLFunction {
functionName = ">",
minArgCount = 2,
argCountIsFixed = True}
greaterThanOrEqualToFunction = SQLFunction {
functionName = ">=",
minArgCount = 2,
argCountIsFixed = True}
andFunction = SQLFunction {
functionName = "AND",
minArgCount = 2,
argCountIsFixed = True}
orFunction = SQLFunction {
functionName = "OR",
minArgCount = 2,
argCountIsFixed = True}
concatenateFunction = SQLFunction {
functionName = "||",
minArgCount = 2,
argCountIsFixed = True}
regexMatchFunction = SQLFunction {
functionName = "=~",
minArgCount = 2,
argCountIsFixed = True}
specialFunctions = [substringFromFunction,
substringFromToFunction,
negateFunction,
notFunction]
substringFromFunction = SQLFunction {
functionName = "SUBSTRING",
minArgCount = 2,
argCountIsFixed = True}
substringFromToFunction = SQLFunction {
functionName = "SUBSTRING",
minArgCount = 3,
argCountIsFixed = True}
parseSubstringFunction :: GenParser Char st Expression
parseSubstringFunction = do
parseToken $ functionName substringFromFunction
eatSpacesAfter $ char '('
strExpr <- parseExpression
parseToken "FROM"
startExpr <- parseExpression
maybeLength <- ifParseThen (parseToken "FOR") parseExpression
eatSpacesAfter $ char ')'
return $ case maybeLength of
Nothing -> FunctionExpression substringFromFunction [strExpr, startExpr]
Just len -> FunctionExpression substringFromToFunction [strExpr, startExpr, len]
negateFunction = SQLFunction {
functionName = "-",
minArgCount = 1,
argCountIsFixed = True}
parseNegateFunction :: GenParser Char st Expression
parseNegateFunction = do
parseToken "-"
expr <- parseAnyNonInfixExpression
return $ FunctionExpression negateFunction [expr]
notFunction = SQLFunction {
functionName = "NOT",
minArgCount = 1,
argCountIsFixed = True}
parseNotFunction = do
parseToken $ functionName notFunction
expr <- parseAnyNonInfixExpression
return $ FunctionExpression notFunction [expr]
parseCountStar = do
try (parseToken $ functionName countFunction)
try parseStar <|> parseNormalFunctionArgs countFunction
where
parseStar = do
parenthesize $ parseToken "*"
return $ FunctionExpression countFunction [IntegerConstantExpression 0]
parseOpChar = oneOf opChars
opChars = "~!@#$%^&*-+=|\\<>/?"
withoutTrailing end p = p >>= (\x -> genNotFollowedBy end >> return x)
withTrailing end p = p >>= (\x -> end >> return x)
eatSpacesAfter p = p >>= (\x -> spaces >> return x)
endsWithOp strToTest = last strToTest `elem` opChars
parseToken :: String -> GenParser Char st String
parseToken tokStr =
eatSpacesAfter (try $ if endsWithOp tokStr then parseOpTok else parseAlphaNumTok)
where
parseOpTok = withoutTrailing parseOpChar (string tokStr)
parseAlphaNumTok = withoutTrailing alphaNum (upperOrLower tokStr)
parseIdentifier = do
let parseId = do
let idChar = alphaNum <|> char '_'
notFollowedBy digit
quotedText False '`' <|> many1 idChar
((eatSpacesAfter parseId) `genExcept` parseReservedWord) <?> "identifier"
quotedText allowEmpty quoteChar = do
let quote = char quoteChar
manyFunc = if allowEmpty then many else many1
quote
textValue <- manyFunc $ (anyChar `genExcept` quote) <|>
try (escapedQuote quoteChar)
quote
spaces
return textValue
exceptChar parser theException = notFollowedBy theException >> parser
escapedQuote quoteChar = string [quoteChar, quoteChar] >> return quoteChar
commaSeparator = eatSpacesAfter $ char ','
parenthesize :: GenParser Char st a -> GenParser Char st a
parenthesize innerParser = do
eatSpacesAfter $ char '('
innerParseResults <- innerParser
eatSpacesAfter $ char ')'
return innerParseResults
eitherParse :: GenParser tok st a -> GenParser tok st b -> GenParser tok st (Either a b)
eitherParse leftParser rightParser =
do {parseResult <- try leftParser; return $ Left parseResult} <|>
do {parseResult <- rightParser; return $ Right parseResult}
spaces1 = skipMany1 space <?> "whitespace"
ifParseThen :: GenParser tok st a -> GenParser tok st b -> GenParser tok st (Maybe b)
ifParseThen ifParse thenPart = do
ifResult <- maybeParse ifParse
case ifResult of
Just _ -> thenPart >>= (\x -> return $ Just x)
Nothing -> return Nothing
ifParseThenElse :: GenParser tok st a -> GenParser tok st b -> GenParser tok st b -> GenParser tok st b
ifParseThenElse ifParse thenPart elsePart = do
ifResult <- maybeParse ifParse
case ifResult of
Just _ -> thenPart
Nothing -> elsePart
parseReservedWord = do
let reservedWordParsers = map parseToken reservedWords
choice reservedWordParsers
reservedWords =
map functionName normalSyntaxFunctions ++
map functionName (concat infixFunctions) ++
map functionName specialFunctions ++
["BY","CROSS", "FROM", "FOR", "GROUP", "HAVING", "INNER", "JOIN", "ON", "ORDER", "SELECT", "WHERE"]
upperOrLower :: String -> GenParser Char st String
upperOrLower stringToParse =
string (map toUpper stringToParse) <|>
string (map toLower stringToParse) <?> stringToParse
genExcept :: (Show b) => GenParser tok st a -> GenParser tok st b -> GenParser tok st a
genExcept parser theException = do
genNotFollowedBy theException
parser
genNotFollowedBy :: (Show a) => GenParser tok st a -> GenParser tok st ()
genNotFollowedBy theParser = try $ do
mayParseResult <- maybeParse theParser
case mayParseResult of
Nothing -> return ()
Just x -> unexpected $ show x
maybeParse :: GenParser tok st a -> GenParser tok st (Maybe a)
maybeParse parser =
(try parser >>= (\x -> return $ Just x)) <|>
return Nothing
sepByExactly :: Int -> GenParser tok st a -> GenParser tok st sep -> GenParser tok st [a]
sepByExactly count itemParser sepParser =
let itemParsers = replicate count itemParser
in parseEach itemParsers
where
parseEach [] = return []
parseEach [lastParser] = lastParser >>= (\x -> return [x])
parseEach (headParser:parserTail) = do
resultHead <- headParser
sepParser
resultTail <- parseEach parserTail
return $ resultHead:resultTail
sepByAtLeast :: Int -> GenParser tok st a -> GenParser tok st sep -> GenParser tok st [a]
sepByAtLeast minCount itemParser sepParser = do
minResults <- sepByExactly minCount itemParser sepParser
ifParseThenElse
sepParser
(sepBy1 itemParser sepParser >>= (\tailResults -> return $ minResults ++ tailResults))
(return minResults)