/*-------------------------------------------------------------------- * Symbols referenced in this file: * - raw_parser * - base_yylex * - raw_parser *-------------------------------------------------------------------- */ /*------------------------------------------------------------------------- * * parser.c * Main entry point/driver for PostgreSQL grammar * * Note that the grammar is not allowed to perform any table access * (since we need to be able to do basic parsing even while inside an * aborted transaction). Therefore, the data structures returned by * the grammar are "raw" parsetrees that still need to be analyzed by * analyze.c and related files. * * * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * src/backend/parser/parser.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "parser/gramparse.h" #include "parser/parser.h" /* * raw_parser * Given a query in string form, do lexical and grammatical analysis. * * Returns a list of raw (un-analyzed) parse trees. */ List * raw_parser(const char *str) { core_yyscan_t yyscanner; base_yy_extra_type yyextra; int yyresult; /* initialize the flex scanner */ yyscanner = scanner_init(str, &yyextra.core_yy_extra, ScanKeywords, NumScanKeywords); /* base_yylex() only needs this much initialization */ yyextra.have_lookahead = false; /* initialize the bison parser */ parser_init(&yyextra); /* Parse! */ yyresult = base_yyparse(yyscanner); /* Clean up (release memory) */ scanner_finish(yyscanner); if (yyresult) /* error */ return NIL; return yyextra.parsetree; } /* * Intermediate filter between parser and core lexer (core_yylex in scan.l). * * This filter is needed because in some cases the standard SQL grammar * requires more than one token lookahead. We reduce these cases to one-token * lookahead by replacing tokens here, in order to keep the grammar LALR(1). * * Using a filter is simpler than trying to recognize multiword tokens * directly in scan.l, because we'd have to allow for comments between the * words. Furthermore it's not clear how to do that without re-introducing * scanner backtrack, which would cost more performance than this filter * layer does. * * The filter also provides a convenient place to translate between * the core_YYSTYPE and YYSTYPE representations (which are really the * same thing anyway, but notationally they're different). */ int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner) { base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner); int cur_token; int next_token; int cur_token_length; YYLTYPE cur_yylloc; /* Get next token --- we might already have it */ if (yyextra->have_lookahead) { cur_token = yyextra->lookahead_token; lvalp->core_yystype = yyextra->lookahead_yylval; *llocp = yyextra->lookahead_yylloc; *(yyextra->lookahead_end) = yyextra->lookahead_hold_char; yyextra->have_lookahead = false; } else cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); /* * If this token isn't one that requires lookahead, just return it. If it * does, determine the token length. (We could get that via strlen(), but * since we have such a small set of possibilities, hardwiring seems * feasible and more efficient.) */ switch (cur_token) { case NOT: cur_token_length = 3; break; case NULLS_P: cur_token_length = 5; break; case WITH: cur_token_length = 4; break; default: return cur_token; } /* * Identify end+1 of current token. core_yylex() has temporarily stored a * '\0' here, and will undo that when we call it again. We need to redo * it to fully revert the lookahead call for error reporting purposes. */ yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf + *llocp + cur_token_length; Assert(*(yyextra->lookahead_end) == '\0'); /* * Save and restore *llocp around the call. It might look like we could * avoid this by just passing &lookahead_yylloc to core_yylex(), but that * does not work because flex actually holds onto the last-passed pointer * internally, and will use that for error reporting. We need any error * reports to point to the current token, not the next one. */ cur_yylloc = *llocp; /* Get next token, saving outputs into lookahead variables */ next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner); yyextra->lookahead_token = next_token; yyextra->lookahead_yylloc = *llocp; *llocp = cur_yylloc; /* Now revert the un-truncation of the current token */ yyextra->lookahead_hold_char = *(yyextra->lookahead_end); *(yyextra->lookahead_end) = '\0'; yyextra->have_lookahead = true; /* Replace cur_token if needed, based on lookahead */ switch (cur_token) { case NOT: /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */ switch (next_token) { case BETWEEN: case IN_P: case LIKE: case ILIKE: case SIMILAR: cur_token = NOT_LA; break; } break; case NULLS_P: /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */ switch (next_token) { case FIRST_P: case LAST_P: cur_token = NULLS_LA; break; } break; case WITH: /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */ switch (next_token) { case TIME: case ORDINALITY: cur_token = WITH_LA; break; } break; } return cur_token; }