From 10bcfa189bedaeaa6bfe8d7841ed3b17f23c0df4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 9 Nov 2009 18:38:48 +0000 Subject: [PATCH] Re-refactor the core scanner's API, in order to get out from under the problem of different parsers having different YYSTYPE unions that they want to use with it. I defined a new union core_YYSTYPE that is just the (very short) list of semantic values returned by the core scanner. I had originally worried that this would require an extra interface layer, but actually we can have parser.c's base_yylex (formerly filtered_base_yylex) take care of that at no extra cost. Names associated with the core scanner are now "core_yy_foo", with "base_yy_foo" being used in the core Bison parser and the parser.c interface layer. This solves the last serious stumbling block to eliminating plpgsql's separate lexer. One restriction that will still be present is that plpgsql and the core will have to agree on the token numbers assigned to tokens that can be returned by the core lexer. Since Bison doesn't seem willing to accept external assignments of those numbers, we'll have to live with decreeing that core and plpgsql grammars declare these tokens first and in the same order. --- src/backend/parser/gram.y | 55 +++++++-------- src/backend/parser/parser.c | 48 +++++++------ src/backend/parser/scan.l | 105 +++++++++++++++-------------- src/include/parser/gramparse.h | 80 +++------------------- src/include/parser/scanner.h | 120 +++++++++++++++++++++++++++++++++ 5 files changed, 236 insertions(+), 172 deletions(-) create mode 100644 src/include/parser/scanner.h diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index d3c7c356d9..4325e4d0ed 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.689 2009/11/09 02:36:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.690 2009/11/09 18:38:48 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -75,12 +75,6 @@ (Current) = (Rhs)[0]; \ } while (0) -/* - * The %name-prefix option below will make bison call base_yylex, but we - * really want it to call filtered_base_yylex (see parser.c). - */ -#define base_yylex filtered_base_yylex - /* * Bison doesn't allocate anything that needs to live across parser calls, * so we can easily have it use palloc instead of malloc. This prevents @@ -104,10 +98,10 @@ typedef struct PrivTarget #define parser_yyerror(msg) scanner_yyerror(msg, yyscanner) #define parser_errposition(pos) scanner_errposition(pos, yyscanner) -static void base_yyerror(YYLTYPE *yylloc, base_yyscan_t yyscanner, +static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner, const char *msg); static Node *makeColumnRef(char *colname, List *indirection, - int location, base_yyscan_t yyscanner); + int location, core_yyscan_t yyscanner); static Node *makeTypeCast(Node *arg, TypeName *typename, int location); static Node *makeStringConst(char *str, int location); static Node *makeStringConstCast(char *str, int location, TypeName *typename); @@ -118,17 +112,17 @@ static Node *makeNullAConst(int location); static Node *makeAConst(Value *v, int location); static Node *makeBoolAConst(bool state, int location); static FuncCall *makeOverlaps(List *largs, List *rargs, - int location, base_yyscan_t yyscanner); -static void check_qualified_name(List *names, base_yyscan_t yyscanner); -static List *check_func_name(List *names, base_yyscan_t yyscanner); -static List *check_indirection(List *indirection, base_yyscan_t yyscanner); + int location, core_yyscan_t yyscanner); +static void check_qualified_name(List *names, core_yyscan_t yyscanner); +static List *check_func_name(List *names, core_yyscan_t yyscanner); +static List *check_indirection(List *indirection, core_yyscan_t yyscanner); static List *extractArgTypes(List *parameters); static SelectStmt *findLeftmostSelect(SelectStmt *node); static void insertSelectOptions(SelectStmt *stmt, List *sortClause, List *lockingClause, Node *limitOffset, Node *limitCount, WithClause *withClause, - base_yyscan_t yyscanner); + core_yyscan_t yyscanner); static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg); static Node *doNegate(Node *n, int location); static void doNegateFloat(Value *v); @@ -145,15 +139,18 @@ static TypeName *TableFuncTypeName(List *columns); %name-prefix="base_yy" %locations -%parse-param {base_yyscan_t yyscanner} -%lex-param {base_yyscan_t yyscanner} +%parse-param {core_yyscan_t yyscanner} +%lex-param {core_yyscan_t yyscanner} %union { + core_YYSTYPE core_yystype; + /* these fields must match core_YYSTYPE: */ int ival; - char chr; char *str; const char *keyword; + + char chr; bool boolean; JoinType jtype; DropBehavior dbehavior; @@ -162,7 +159,6 @@ static TypeName *TableFuncTypeName(List *columns); Node *node; Value *value; ObjectType objtype; - TypeName *typnam; FunctionParameter *fun_param; FunctionParameterMode fun_param_mode; @@ -180,7 +176,6 @@ static TypeName *TableFuncTypeName(List *columns); ResTarget *target; struct PrivTarget *privtarget; AccessPriv *accesspriv; - InsertStmt *istmt; VariableSetStmt *vsetstmt; } @@ -602,6 +597,7 @@ static TypeName *TableFuncTypeName(List *columns); %left JOIN CROSS LEFT FULL RIGHT INNER_P NATURAL /* kluge to keep xml_whitespace_option from causing shift/reduce conflicts */ %right PRESERVE STRIP_P + %% /* @@ -10932,14 +10928,14 @@ reserved_keyword: * available from the scanner. */ static void -base_yyerror(YYLTYPE *yylloc, base_yyscan_t yyscanner, const char *msg) +base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner, const char *msg) { parser_yyerror(msg); } static Node * makeColumnRef(char *colname, List *indirection, - int location, base_yyscan_t yyscanner) + int location, core_yyscan_t yyscanner) { /* * Generate a ColumnRef node, with an A_Indirection node added if there @@ -11109,7 +11105,7 @@ makeBoolAConst(bool state, int location) * Create and populate a FuncCall node to support the OVERLAPS operator. */ static FuncCall * -makeOverlaps(List *largs, List *rargs, int location, base_yyscan_t yyscanner) +makeOverlaps(List *largs, List *rargs, int location, core_yyscan_t yyscanner) { FuncCall *n = makeNode(FuncCall); @@ -11143,7 +11139,7 @@ makeOverlaps(List *largs, List *rargs, int location, base_yyscan_t yyscanner) * subscripts and '*', which we then must reject here. */ static void -check_qualified_name(List *names, base_yyscan_t yyscanner) +check_qualified_name(List *names, core_yyscan_t yyscanner) { ListCell *i; @@ -11160,7 +11156,7 @@ check_qualified_name(List *names, base_yyscan_t yyscanner) * and '*', which we then must reject here. */ static List * -check_func_name(List *names, base_yyscan_t yyscanner) +check_func_name(List *names, core_yyscan_t yyscanner) { ListCell *i; @@ -11178,7 +11174,7 @@ check_func_name(List *names, base_yyscan_t yyscanner) * in the grammar, so do it here. */ static List * -check_indirection(List *indirection, base_yyscan_t yyscanner) +check_indirection(List *indirection, core_yyscan_t yyscanner) { ListCell *l; @@ -11237,7 +11233,7 @@ insertSelectOptions(SelectStmt *stmt, List *sortClause, List *lockingClause, Node *limitOffset, Node *limitCount, WithClause *withClause, - base_yyscan_t yyscanner) + core_yyscan_t yyscanner) { Assert(IsA(stmt, SelectStmt)); @@ -11463,12 +11459,9 @@ TableFuncTypeName(List *columns) } /* - * Must undefine base_yylex before including scan.c, since we want it - * to create the function base_yylex not filtered_base_yylex. + * Must undefine this stuff before including scan.c, since it has different + * definitions for these macros. */ -#undef base_yylex - -/* Undefine some other stuff that would conflict in scan.c, too */ #undef yyerror #undef yylval #undef yylloc diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c index 93632c8811..354e335ce9 100644 --- a/src/backend/parser/parser.c +++ b/src/backend/parser/parser.c @@ -14,7 +14,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.81 2009/07/14 20:24:10 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.82 2009/11/09 18:38:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -34,14 +34,15 @@ List * raw_parser(const char *str) { - base_yyscan_t yyscanner; + core_yyscan_t yyscanner; base_yy_extra_type yyextra; int yyresult; /* initialize the flex scanner */ - yyscanner = scanner_init(str, &yyextra, ScanKeywords, NumScanKeywords); + yyscanner = scanner_init(str, &yyextra.core_yy_extra, + ScanKeywords, NumScanKeywords); - /* filtered_base_yylex() only needs this much initialization */ + /* base_yylex() only needs this much initialization */ yyextra.have_lookahead = false; /* initialize the bison parser */ @@ -73,15 +74,16 @@ raw_parser(const char *str) char * pg_parse_string_token(const char *token) { - base_yyscan_t yyscanner; + core_yyscan_t yyscanner; base_yy_extra_type yyextra; int ctoken; - YYSTYPE yylval; + core_YYSTYPE yylval; YYLTYPE yylloc; - yyscanner = scanner_init(token, &yyextra, ScanKeywords, NumScanKeywords); + yyscanner = scanner_init(token, &yyextra.core_yy_extra, + ScanKeywords, NumScanKeywords); - ctoken = base_yylex(&yylval, &yylloc, yyscanner); + ctoken = core_yylex(&yylval, &yylloc, yyscanner); if (ctoken != SCONST) /* caller error */ elog(ERROR, "expected string constant, got token code %d", ctoken); @@ -93,7 +95,7 @@ pg_parse_string_token(const char *token) /* - * Intermediate filter between parser and base lexer (base_yylex in scan.l). + * Intermediate filter between parser and core lexer (core_yylex in scan.l). * * The filter is needed because in some cases the standard SQL grammar * requires more than one token lookahead. We reduce these cases to one-token @@ -104,26 +106,30 @@ pg_parse_string_token(const char *token) * words. Furthermore it's not clear how to do it without re-introducing * scanner backtrack, which would cost more performance than this filter * layer does. + * + * The filter also provides a convenient place to translate between + * the core_YYSTYPE and YYSTYPE representations (which are really the + * same thing anyway, but notationally they're different). */ int -filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) +base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner) { base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner); int cur_token; int next_token; - YYSTYPE cur_yylval; + core_YYSTYPE cur_yylval; YYLTYPE cur_yylloc; /* Get next token --- we might already have it */ if (yyextra->have_lookahead) { cur_token = yyextra->lookahead_token; - *lvalp = yyextra->lookahead_yylval; + lvalp->core_yystype = yyextra->lookahead_yylval; *llocp = yyextra->lookahead_yylloc; yyextra->have_lookahead = false; } else - cur_token = base_yylex(lvalp, llocp, yyscanner); + cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); /* Do we need to look ahead for a possible multiword token? */ switch (cur_token) @@ -133,9 +139,9 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) /* * NULLS FIRST and NULLS LAST must be reduced to one token */ - cur_yylval = *lvalp; + cur_yylval = lvalp->core_yystype; cur_yylloc = *llocp; - next_token = base_yylex(lvalp, llocp, yyscanner); + next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); switch (next_token) { case FIRST_P: @@ -147,11 +153,11 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) default: /* save the lookahead token for next time */ yyextra->lookahead_token = next_token; - yyextra->lookahead_yylval = *lvalp; + yyextra->lookahead_yylval = lvalp->core_yystype; yyextra->lookahead_yylloc = *llocp; yyextra->have_lookahead = true; /* and back up the output info to cur_token */ - *lvalp = cur_yylval; + lvalp->core_yystype = cur_yylval; *llocp = cur_yylloc; break; } @@ -162,9 +168,9 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) /* * WITH TIME must be reduced to one token */ - cur_yylval = *lvalp; + cur_yylval = lvalp->core_yystype; cur_yylloc = *llocp; - next_token = base_yylex(lvalp, llocp, yyscanner); + next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); switch (next_token) { case TIME: @@ -173,11 +179,11 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) default: /* save the lookahead token for next time */ yyextra->lookahead_token = next_token; - yyextra->lookahead_yylval = *lvalp; + yyextra->lookahead_yylval = lvalp->core_yystype; yyextra->lookahead_yylloc = *llocp; yyextra->have_lookahead = true; /* and back up the output info to cur_token */ - *lvalp = cur_yylval; + lvalp->core_yystype = cur_yylval; *llocp = cur_yylloc; break; } diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 150202e77c..8a53221930 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -24,7 +24,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.162 2009/09/27 03:27:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.163 2009/11/09 18:38:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,8 +33,8 @@ #include #include -#include "parser/gramparse.h" -#include "parser/keywords.h" +#include "parser/parser.h" /* only needed for GUC variables */ +#include "parser/scanner.h" #include "parser/scansup.h" #include "mb/pg_wchar.h" @@ -54,11 +54,16 @@ int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING; bool escape_string_warning = true; bool standard_conforming_strings = false; +/* + * Set the type of YYSTYPE. + */ +#define YYSTYPE core_YYSTYPE + /* * Set the type of yyextra. All state variables used by the scanner should * be in yyextra, *not* statically allocated. */ -#define YY_EXTRA_TYPE base_yy_extra_type * +#define YY_EXTRA_TYPE core_yy_extra_type * /* * Each call to yylex must set yylloc to the location of the found token @@ -75,21 +80,22 @@ bool standard_conforming_strings = false; #define ADVANCE_YYLLOC(delta) ( *(yylloc) += (delta) ) #define startlit() ( yyextra->literallen = 0 ) -static void addlit(char *ytext, int yleng, base_yyscan_t yyscanner); -static void addlitchar(unsigned char ychar, base_yyscan_t yyscanner); -static char *litbufdup(base_yyscan_t yyscanner); -static char *litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner); -static unsigned char unescape_single_char(unsigned char c, base_yyscan_t yyscanner); +static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner); +static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner); +static char *litbufdup(core_yyscan_t yyscanner); +static char *litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner); +static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner); static bool is_utf16_surrogate_first(pg_wchar c); static bool is_utf16_surrogate_second(pg_wchar c); static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second); +static void addunicode(pg_wchar c, yyscan_t yyscanner); #define yyerror(msg) scanner_yyerror(msg, yyscanner) #define lexer_errposition() scanner_errposition(*(yylloc), yyscanner) -static void check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner); -static void check_escape_warning(base_yyscan_t yyscanner); +static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner); +static void check_escape_warning(core_yyscan_t yyscanner); /* * Work around a bug in flex 2.5.35: it emits a couple of functions that @@ -97,10 +103,8 @@ static void check_escape_warning(base_yyscan_t yyscanner); * this would cause warnings. Providing our own declarations should be * harmless even when the bug gets fixed. */ -extern int base_yyget_column(yyscan_t yyscanner); -extern void base_yyset_column(int column_no, yyscan_t yyscanner); - -static void addunicode(pg_wchar c, yyscan_t yyscanner); +extern int core_yyget_column(yyscan_t yyscanner); +extern void core_yyset_column(int column_no, yyscan_t yyscanner); %} @@ -117,7 +121,7 @@ static void addunicode(pg_wchar c, yyscan_t yyscanner); %option noyyrealloc %option noyyfree %option warn -%option prefix="base_yy" +%option prefix="core_yy" /* * OK, here is a short description of lex/flex rules behavior. @@ -958,7 +962,7 @@ other . * to still be available. */ int -scanner_errposition(int location, base_yyscan_t yyscanner) +scanner_errposition(int location, core_yyscan_t yyscanner) { int pos; @@ -984,7 +988,7 @@ scanner_errposition(int location, base_yyscan_t yyscanner) * be misleading! */ void -scanner_yyerror(const char *message, base_yyscan_t yyscanner) +scanner_yyerror(const char *message, core_yyscan_t yyscanner) { const char *loc = yyextra->scanbuf + *yylloc; @@ -1010,9 +1014,9 @@ scanner_yyerror(const char *message, base_yyscan_t yyscanner) /* * Called before any actual parsing is done */ -base_yyscan_t +core_yyscan_t scanner_init(const char *str, - base_yy_extra_type *yyext, + core_yy_extra_type *yyext, const ScanKeyword *keywords, int num_keywords) { @@ -1022,7 +1026,7 @@ scanner_init(const char *str, if (yylex_init(&scanner) != 0) elog(ERROR, "yylex_init() failed: %m"); - base_yyset_extra(yyext, scanner); + core_yyset_extra(yyext, scanner); yyext->keywords = keywords; yyext->num_keywords = num_keywords; @@ -1049,7 +1053,7 @@ scanner_init(const char *str, * Called after parsing is done to clean up after scanner_init() */ void -scanner_finish(base_yyscan_t yyscanner) +scanner_finish(core_yyscan_t yyscanner) { /* * We don't bother to call yylex_destroy(), because all it would do @@ -1069,7 +1073,7 @@ scanner_finish(base_yyscan_t yyscanner) static void -addlit(char *ytext, int yleng, base_yyscan_t yyscanner) +addlit(char *ytext, int yleng, core_yyscan_t yyscanner) { /* enlarge buffer if needed */ if ((yyextra->literallen + yleng) >= yyextra->literalalloc) @@ -1087,7 +1091,7 @@ addlit(char *ytext, int yleng, base_yyscan_t yyscanner) static void -addlitchar(unsigned char ychar, base_yyscan_t yyscanner) +addlitchar(unsigned char ychar, core_yyscan_t yyscanner) { /* enlarge buffer if needed */ if ((yyextra->literallen + 1) >= yyextra->literalalloc) @@ -1106,7 +1110,7 @@ addlitchar(unsigned char ychar, base_yyscan_t yyscanner) * Create a palloc'd copy of literalbuf, adding a trailing null. */ static char * -litbufdup(base_yyscan_t yyscanner) +litbufdup(core_yyscan_t yyscanner) { int llen = yyextra->literallen; char *new; @@ -1131,7 +1135,7 @@ hexval(unsigned char c) } static void -check_unicode_value(pg_wchar c, char *loc, base_yyscan_t yyscanner) +check_unicode_value(pg_wchar c, char *loc, core_yyscan_t yyscanner) { if (GetDatabaseEncoding() == PG_UTF8) return; @@ -1161,8 +1165,25 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second) return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF); } +static void +addunicode(pg_wchar c, core_yyscan_t yyscanner) +{ + char buf[8]; + + if (c == 0 || c > 0x10FFFF) + yyerror("invalid Unicode escape value"); + if (c > 0x7F) + { + if (GetDatabaseEncoding() != PG_UTF8) + yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); + yyextra->saw_non_ascii = true; + } + unicode_to_utf8(c, (unsigned char *)buf); + addlit(buf, pg_mblen(buf), yyscanner); +} + static char * -litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner) +litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) { char *new; char *litbuf, *in, *out; @@ -1294,7 +1315,7 @@ litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner) } static unsigned char -unescape_single_char(unsigned char c, base_yyscan_t yyscanner) +unescape_single_char(unsigned char c, core_yyscan_t yyscanner) { switch (c) { @@ -1318,7 +1339,7 @@ unescape_single_char(unsigned char c, base_yyscan_t yyscanner) } static void -check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner) +check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner) { if (ychar == '\'') { @@ -1345,7 +1366,7 @@ check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner) } static void -check_escape_warning(base_yyscan_t yyscanner) +check_escape_warning(core_yyscan_t yyscanner) { if (yyextra->warn_on_first_escape && escape_string_warning) ereport(WARNING, @@ -1362,13 +1383,13 @@ check_escape_warning(base_yyscan_t yyscanner) */ void * -base_yyalloc(yy_size_t bytes, base_yyscan_t yyscanner) +core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner) { return palloc(bytes); } void * -base_yyrealloc(void *ptr, yy_size_t bytes, base_yyscan_t yyscanner) +core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner) { if (ptr) return repalloc(ptr, bytes); @@ -1377,26 +1398,8 @@ base_yyrealloc(void *ptr, yy_size_t bytes, base_yyscan_t yyscanner) } void -base_yyfree(void *ptr, base_yyscan_t yyscanner) +core_yyfree(void *ptr, core_yyscan_t yyscanner) { if (ptr) pfree(ptr); } - -static void -addunicode(pg_wchar c, base_yyscan_t yyscanner) -{ - char buf[8]; - - if (c == 0 || c > 0x10FFFF) - yyerror("invalid Unicode escape value"); - if (c > 0x7F) - { - if (GetDatabaseEncoding() != PG_UTF8) - yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); - yyextra->saw_non_ascii = true; - } - unicode_to_utf8(c, (unsigned char *)buf); - addlit(buf, pg_mblen(buf), yyscanner); -} - diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h index 09c9909136..41774028b5 100644 --- a/src/include/parser/gramparse.h +++ b/src/include/parser/gramparse.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.49 2009/11/05 23:24:26 tgl Exp $ + * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.50 2009/11/09 18:38:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,20 +20,11 @@ #define GRAMPARSE_H #include "nodes/parsenodes.h" -#include "parser/keywords.h" +#include "parser/scanner.h" /* - * We track token locations in terms of byte offsets from the start of the - * source string, not the column number/line number representation that - * bison uses by default. Also, to minimize overhead we track only one - * location (usually the first token location) for each construct, not - * the beginning and ending locations as bison does by default. It's - * therefore sufficient to make YYLTYPE an int. - */ -#define YYLTYPE int - -/* - * After defining YYLTYPE, it's safe to include gram.h. + * NB: include gram.h only AFTER including scanner.h, because scanner.h + * is what #defines YYLTYPE. */ #include "parser/gram.h" @@ -44,62 +35,24 @@ typedef struct base_yy_extra_type { /* - * The string the lexer is physically scanning. We keep this mainly so - * that we can cheaply compute the offset of the current token (yytext). + * Fields used by the core scanner. */ - char *scanbuf; - Size scanbuflen; + core_yy_extra_type core_yy_extra; /* - * The keyword list to use. - */ - const ScanKeyword *keywords; - int num_keywords; - - /* - * literalbuf is used to accumulate literal values when multiple rules - * are needed to parse a single literal. Call startlit() to reset buffer - * to empty, addlit() to add text. NOTE: the string in literalbuf is - * NOT necessarily null-terminated, but there always IS room to add a - * trailing null at offset literallen. We store a null only when we - * need it. - */ - char *literalbuf; /* palloc'd expandable buffer */ - int literallen; /* actual current string length */ - int literalalloc; /* current allocated buffer size */ - - int xcdepth; /* depth of nesting in slash-star comments */ - char *dolqstart; /* current $foo$ quote start string */ - - /* first part of UTF16 surrogate pair for Unicode escapes */ - int32 utf16_first_part; - - /* state variables for literal-lexing warnings */ - bool warn_on_first_escape; - bool saw_non_ascii; - - /* - * State variables for filtered_base_yylex(). + * State variables for base_yylex(). */ bool have_lookahead; /* is lookahead info valid? */ int lookahead_token; /* one-token lookahead */ - YYSTYPE lookahead_yylval; /* yylval for lookahead token */ + core_YYSTYPE lookahead_yylval; /* yylval for lookahead token */ YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ /* - * State variables that belong to the grammar, not the lexer. It's - * simpler to keep these here than to invent a separate structure. - * These fields are unused/undefined if the lexer is invoked on its own. + * State variables that belong to the grammar. */ - List *parsetree; /* final parse result is delivered here */ } base_yy_extra_type; -/* - * The type of yyscanner is opaque outside scan.l. - */ -typedef void *base_yyscan_t; - /* * In principle we should use yyget_extra() to fetch the yyextra field * from a yyscanner struct. However, flex always puts that field first, @@ -110,22 +63,11 @@ typedef void *base_yyscan_t; /* from parser.c */ -extern int filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, - base_yyscan_t yyscanner); - -/* from scan.l */ -extern base_yyscan_t scanner_init(const char *str, - base_yy_extra_type *yyext, - const ScanKeyword *keywords, - int num_keywords); -extern void scanner_finish(base_yyscan_t yyscanner); extern int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, - base_yyscan_t yyscanner); -extern int scanner_errposition(int location, base_yyscan_t yyscanner); -extern void scanner_yyerror(const char *message, base_yyscan_t yyscanner); + core_yyscan_t yyscanner); /* from gram.y */ extern void parser_init(base_yy_extra_type *yyext); -extern int base_yyparse(base_yyscan_t yyscanner); +extern int base_yyparse(core_yyscan_t yyscanner); #endif /* GRAMPARSE_H */ diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h new file mode 100644 index 0000000000..ccab1db862 --- /dev/null +++ b/src/include/parser/scanner.h @@ -0,0 +1,120 @@ +/*------------------------------------------------------------------------- + * + * scanner.h + * API for the core scanner (flex machine) + * + * The core scanner is also used by PL/pgsql, so we provide a public API + * for it. However, the rest of the backend is only expected to use the + * higher-level API provided by parser.h. + * + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/parser/scanner.h,v 1.1 2009/11/09 18:38:48 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#ifndef SCANNER_H +#define SCANNER_H + +#include "parser/keywords.h" + +/* + * The scanner returns extra data about scanned tokens in this union type. + * Note that this is a subset of the fields used in YYSTYPE of the bison + * parsers built atop the scanner. + */ +typedef union core_YYSTYPE +{ + int ival; /* for integer literals */ + char *str; /* for identifiers and non-integer literals */ + const char *keyword; /* canonical spelling of keywords */ +} core_YYSTYPE; + +/* + * We track token locations in terms of byte offsets from the start of the + * source string, not the column number/line number representation that + * bison uses by default. Also, to minimize overhead we track only one + * location (usually the first token location) for each construct, not + * the beginning and ending locations as bison does by default. It's + * therefore sufficient to make YYLTYPE an int. + */ +#define YYLTYPE int + +/* + * Another important component of the scanner's API is the token code numbers. + * However, those are not defined in this file, because bison insists on + * defining them for itself. The token codes used by the core scanner are + * the ASCII characters plus these: + * %token IDENT FCONST SCONST BCONST XCONST Op + * %token ICONST PARAM + * %token TYPECAST DOT_DOT COLON_EQUALS + * The above token definitions *must* be the first ones declared in any + * bison parser built atop this scanner, so that they will have consistent + * numbers assigned to them (specifically, IDENT = 258 and so on). + */ + +/* + * The YY_EXTRA data that a flex scanner allows us to pass around. + * Private state needed by the core scanner goes here. Note that the actual + * yy_extra struct may be larger and have this as its first component, thus + * allowing the calling parser to keep some fields of its own in YY_EXTRA. + */ +typedef struct core_yy_extra_type +{ + /* + * The string the scanner is physically scanning. We keep this mainly so + * that we can cheaply compute the offset of the current token (yytext). + */ + char *scanbuf; + Size scanbuflen; + + /* + * The keyword list to use. + */ + const ScanKeyword *keywords; + int num_keywords; + + /* + * literalbuf is used to accumulate literal values when multiple rules + * are needed to parse a single literal. Call startlit() to reset buffer + * to empty, addlit() to add text. NOTE: the string in literalbuf is + * NOT necessarily null-terminated, but there always IS room to add a + * trailing null at offset literallen. We store a null only when we + * need it. + */ + char *literalbuf; /* palloc'd expandable buffer */ + int literallen; /* actual current string length */ + int literalalloc; /* current allocated buffer size */ + + int xcdepth; /* depth of nesting in slash-star comments */ + char *dolqstart; /* current $foo$ quote start string */ + + /* first part of UTF16 surrogate pair for Unicode escapes */ + int32 utf16_first_part; + + /* state variables for literal-lexing warnings */ + bool warn_on_first_escape; + bool saw_non_ascii; +} core_yy_extra_type; + +/* + * The type of yyscanner is opaque outside scan.l. + */ +typedef void *core_yyscan_t; + + +/* Entry points in parser/scan.l */ +extern core_yyscan_t scanner_init(const char *str, + core_yy_extra_type *yyext, + const ScanKeyword *keywords, + int num_keywords); +extern void scanner_finish(core_yyscan_t yyscanner); +extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, + core_yyscan_t yyscanner); +extern int scanner_errposition(int location, core_yyscan_t yyscanner); +extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner); + +#endif /* SCANNER_H */ -- 2.40.0