From: Tom Lane Date: Tue, 14 Jul 2009 20:24:10 +0000 (+0000) Subject: Tweak the core scanner so that it can be used by plpgsql too. X-Git-Tag: REL8_5_ALPHA1~144 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1aa58d3a8389fcf8899745049f128f6b8fec7bc9;p=postgresql Tweak the core scanner so that it can be used by plpgsql too. Changes: Pass in the keyword lookup array instead of having it be hardwired. (This incidentally allows elimination of some duplicate coding in ecpg.) Re-order the token declarations in gram.y so that non-keyword tokens have numbers that won't change when keywords are added or removed. Add ".." and ":=" to the set of tokens recognized by scan.l. (Since these combinations are nowhere legal in core SQL, this does not change anything except the precise wording of the error you get when you write this.) --- diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index f4b795db45..dfe9b19cf1 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.668 2009/07/13 02:02:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.669 2009/07/14 20:24:10 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -421,10 +421,23 @@ static TypeName *TableFuncTypeName(List *columns); /* - * If you make any token changes, update the keyword table in - * src/include/parser/kwlist.h and add new keywords to the appropriate one of - * the reserved-or-not-so-reserved keyword lists, below; search - * this file for "Name classification hierarchy". + * Non-keyword token types. These are hard-wired into the "flex" lexer. + * They must be listed first so that their numeric codes do not depend on + * the set of keywords. PL/pgsql depends on this so that it can share the + * same lexer. If you add/change tokens here, fix PL/pgsql to match! + * + * DOT_DOT and COLON_EQUALS are unused in the core SQL grammar, and so will + * always provoke parse errors. They are needed by PL/pgsql. + */ +%token IDENT FCONST SCONST BCONST XCONST Op +%token ICONST PARAM +%token TYPECAST DOT_DOT COLON_EQUALS + +/* + * If you want to make any keyword changes, update the keyword table in + * src/include/parser/kwlist.h and add new keywords to the appropriate one + * of the reserved-or-not-so-reserved keyword lists, below; search + * this file for "Keyword category lists". */ /* ordinary key words in alphabetical order */ @@ -515,17 +528,15 @@ static TypeName *TableFuncTypeName(List *columns); ZONE -/* The grammar thinks these are keywords, but they are not in the kwlist.h +/* + * The grammar thinks these are keywords, but they are not in the kwlist.h * list and so can never be entered directly. The filter in parser.c * creates these tokens when required. */ %token NULLS_FIRST NULLS_LAST WITH_TIME -/* Special token types, not actually keywords - see the "lex" file */ -%token IDENT FCONST SCONST BCONST XCONST Op -%token ICONST PARAM -/* precedence: lowest to highest */ +/* Precedence: lowest to highest */ %nonassoc SET /* see relation_expr_opt_alias */ %left UNION EXCEPT %left INTERSECT diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index 05e7fb9ee5..4fce452846 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -9,14 +9,13 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.213 2009/07/12 17:12:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.214 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "parser/gramparse.h" -#include "parser/keywords.h" #define PG_KEYWORD(a,b,c) {a,b,c}, @@ -25,5 +24,4 @@ const ScanKeyword ScanKeywords[] = { #include "parser/kwlist.h" }; -/* End of ScanKeywords, for use in kwlookup.c and elsewhere */ -const ScanKeyword *LastScanKeyword = endof(ScanKeywords); +const int NumScanKeywords = lengthof(ScanKeywords); diff --git a/src/backend/parser/kwlookup.c b/src/backend/parser/kwlookup.c index 7321a57c15..58c8cdd78f 100644 --- a/src/backend/parser/kwlookup.c +++ b/src/backend/parser/kwlookup.c @@ -6,15 +6,12 @@ * NB - this file is also used by ECPG and several frontend programs in * src/bin/ including pg_dump and psql * - * Note that this file expects that the ScanKeywords array is defined - * and that LastScanKeyword points to its element one past the last. - * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/kwlookup.c,v 2.2 2009/03/08 16:53:30 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/parser/kwlookup.c,v 2.3 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -39,7 +36,9 @@ * receive a different case-normalization mapping. */ const ScanKeyword * -ScanKeywordLookup(const char *text) +ScanKeywordLookup(const char *text, + const ScanKeyword *keywords, + int num_keywords) { int len, i; @@ -69,8 +68,8 @@ ScanKeywordLookup(const char *text) /* * Now do a binary search using plain strcmp() comparison. */ - low = &ScanKeywords[0]; - high = LastScanKeyword - 1; + low = keywords; + high = keywords + (num_keywords - 1); while (low <= high) { const ScanKeyword *middle; diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c index cb8ff8a339..93632c8811 100644 --- a/src/backend/parser/parser.c +++ b/src/backend/parser/parser.c @@ -14,7 +14,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.80 2009/07/13 02:02:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.81 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -39,7 +39,7 @@ raw_parser(const char *str) int yyresult; /* initialize the flex scanner */ - yyscanner = scanner_init(str, &yyextra); + yyscanner = scanner_init(str, &yyextra, ScanKeywords, NumScanKeywords); /* filtered_base_yylex() only needs this much initialization */ yyextra.have_lookahead = false; @@ -79,7 +79,7 @@ pg_parse_string_token(const char *token) YYSTYPE yylval; YYLTYPE yylloc; - yyscanner = scanner_init(token, &yyextra); + yyscanner = scanner_init(token, &yyextra, ScanKeywords, NumScanKeywords); ctoken = base_yylex(&yylval, &yylloc, yyscanner); diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index a73934913d..a5ed54792b 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -24,7 +24,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.156 2009/07/13 03:11:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.157 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -304,6 +304,10 @@ identifier {ident_start}{ident_cont}* typecast "::" +/* these two token types are used by PL/pgsql, though not in core SQL */ +dot_dot \.\. +colon_equals ":=" + /* * "self" is the set of chars that should be returned as single-character * tokens. "op_chars" is the set of chars that can make up "Op" tokens, @@ -450,11 +454,21 @@ other . SET_YYLLOC(); yyless(1); /* eat only 'n' this time */ - /* nchar had better be a keyword! */ - keyword = ScanKeywordLookup("nchar"); - Assert(keyword != NULL); - yylval->keyword = keyword->name; - return keyword->value; + + keyword = ScanKeywordLookup("nchar", + yyextra->keywords, + yyextra->num_keywords); + if (keyword != NULL) + { + yylval->keyword = keyword->name; + return keyword->value; + } + else + { + /* If NCHAR isn't a keyword, just return "n" */ + yylval->str = pstrdup("n"); + return IDENT; + } } {xqstart} { @@ -680,6 +694,16 @@ other . return TYPECAST; } +{dot_dot} { + SET_YYLLOC(); + return DOT_DOT; + } + +{colon_equals} { + SET_YYLLOC(); + return COLON_EQUALS; + } + {self} { SET_YYLLOC(); return yytext[0]; @@ -830,7 +854,9 @@ other . SET_YYLLOC(); /* Is it a keyword? */ - keyword = ScanKeywordLookup(yytext); + keyword = ScanKeywordLookup(yytext, + yyextra->keywords, + yyextra->num_keywords); if (keyword != NULL) { yylval->keyword = keyword->name; @@ -939,7 +965,10 @@ scanner_yyerror(const char *message, base_yyscan_t yyscanner) * Called before any actual parsing is done */ base_yyscan_t -scanner_init(const char *str, base_yy_extra_type *yyext) +scanner_init(const char *str, + base_yy_extra_type *yyext, + const ScanKeyword *keywords, + int num_keywords) { Size slen = strlen(str); yyscan_t scanner; @@ -949,6 +978,9 @@ scanner_init(const char *str, base_yy_extra_type *yyext) base_yyset_extra(yyext, scanner); + yyext->keywords = keywords; + yyext->num_keywords = num_keywords; + /* * Make a scan buffer with special termination needed by flex. */ diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index f78fc7363d..e2da654c53 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/misc.c,v 1.71 2009/06/11 14:49:03 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/misc.c,v 1.72 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -334,7 +334,7 @@ pg_get_keywords(PG_FUNCTION_ARGS) funcctx = SRF_PERCALL_SETUP(); - if (&ScanKeywords[funcctx->call_cntr] < LastScanKeyword) + if (funcctx->call_cntr < NumScanKeywords) { char *values[3]; HeapTuple tuple; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 6fdef41cc0..d30db3a2ba 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.301 2009/07/12 17:12:34 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.302 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -6219,7 +6219,9 @@ quote_identifier(const char *ident) * Note: ScanKeywordLookup() does case-insensitive comparison, but * that's fine, since we already know we have all-lower-case. */ - const ScanKeyword *keyword = ScanKeywordLookup(ident); + const ScanKeyword *keyword = ScanKeywordLookup(ident, + ScanKeywords, + NumScanKeywords); if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD) safe = false; diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c index 6b82823bc7..404e1d3ed3 100644 --- a/src/bin/pg_dump/dumputils.c +++ b/src/bin/pg_dump/dumputils.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/bin/pg_dump/dumputils.c,v 1.46 2009/06/11 14:49:07 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/pg_dump/dumputils.c,v 1.47 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -130,7 +130,9 @@ fmtId(const char *rawid) * Note: ScanKeywordLookup() does case-insensitive comparison, but * that's fine, since we already know we have all-lower-case. */ - const ScanKeyword *keyword = ScanKeywordLookup(rawid); + const ScanKeyword *keyword = ScanKeywordLookup(rawid, + ScanKeywords, + NumScanKeywords); if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD) need_quotes = true; diff --git a/src/bin/pg_dump/keywords.c b/src/bin/pg_dump/keywords.c index 9e5ff25d50..6aad5d32aa 100644 --- a/src/bin/pg_dump/keywords.c +++ b/src/bin/pg_dump/keywords.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/pg_dump/keywords.c,v 1.3 2009/06/11 14:49:07 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/pg_dump/keywords.c,v 1.4 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,5 +27,4 @@ const ScanKeyword ScanKeywords[] = { #include "parser/kwlist.h" }; -/* End of ScanKeywords, for use in kwlookup.c */ -const ScanKeyword *LastScanKeyword = endof(ScanKeywords); +const int NumScanKeywords = lengthof(ScanKeywords); diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h index a8c2f407e4..4b061e0504 100644 --- a/src/include/parser/gramparse.h +++ b/src/include/parser/gramparse.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.46 2009/07/13 02:02:20 tgl Exp $ + * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.47 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,6 +20,7 @@ #define GRAMPARSE_H #include "nodes/parsenodes.h" +#include "parser/keywords.h" /* * We track token locations in terms of byte offsets from the start of the @@ -49,6 +50,12 @@ typedef struct base_yy_extra_type char *scanbuf; Size scanbuflen; + /* + * The keyword list to use. + */ + const ScanKeyword *keywords; + int num_keywords; + /* * literalbuf is used to accumulate literal values when multiple rules * are needed to parse a single literal. Call startlit() to reset buffer @@ -106,7 +113,10 @@ extern int filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner); /* from scan.l */ -extern base_yyscan_t scanner_init(const char *str, base_yy_extra_type *yyext); +extern base_yyscan_t scanner_init(const char *str, + base_yy_extra_type *yyext, + const ScanKeyword *keywords, + int num_keywords); extern void scanner_finish(base_yyscan_t yyscanner); extern int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner); diff --git a/src/include/parser/keywords.h b/src/include/parser/keywords.h index 203700122e..2099e55664 100644 --- a/src/include/parser/keywords.h +++ b/src/include/parser/keywords.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/keywords.h,v 1.26 2009/01/01 17:24:00 momjian Exp $ + * $PostgreSQL: pgsql/src/include/parser/keywords.h,v 1.27 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,8 +29,10 @@ typedef struct ScanKeyword } ScanKeyword; extern const ScanKeyword ScanKeywords[]; -extern const ScanKeyword *LastScanKeyword; +extern const int NumScanKeywords; -extern const ScanKeyword *ScanKeywordLookup(const char *text); +extern const ScanKeyword *ScanKeywordLookup(const char *text, + const ScanKeyword *keywords, + int num_keywords); #endif /* KEYWORDS_H */ diff --git a/src/interfaces/ecpg/preproc/c_keywords.c b/src/interfaces/ecpg/preproc/c_keywords.c index 2eae2c769b..36f72b537e 100644 --- a/src/interfaces/ecpg/preproc/c_keywords.c +++ b/src/interfaces/ecpg/preproc/c_keywords.c @@ -1,10 +1,10 @@ /*------------------------------------------------------------------------- * - * keywords.c + * c_keywords.c * lexical token lookup for reserved words in postgres embedded SQL * - * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/c_keywords.c,v 1.23 2009/06/11 14:49:13 momjian Exp $ - * § + * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/c_keywords.c,v 1.24 2009/07/14 20:24:10 tgl Exp $ + * *------------------------------------------------------------------------- */ #include "postgres_fe.h" @@ -55,8 +55,31 @@ static const ScanKeyword ScanCKeywords[] = { {"year", YEAR_P, 0}, }; + +/* + * Do a binary search using plain strcmp() comparison. This is much like + * ScanKeywordLookup(), except we want case-sensitive matching. + */ const ScanKeyword * ScanCKeywordLookup(const char *text) { - return DoLookup(text, &ScanCKeywords[0], endof(ScanCKeywords) - 1); + const ScanKeyword *low = &ScanCKeywords[0]; + const ScanKeyword *high = &ScanCKeywords[lengthof(ScanCKeywords) - 1]; + + while (low <= high) + { + const ScanKeyword *middle; + int difference; + + middle = low + (high - low) / 2; + difference = strcmp(middle->name, text); + if (difference == 0) + return middle; + else if (difference < 0) + low = middle + 1; + else + high = middle - 1; + } + + return NULL; } diff --git a/src/interfaces/ecpg/preproc/ecpg_keywords.c b/src/interfaces/ecpg/preproc/ecpg_keywords.c index 0aef816169..c475bf9671 100644 --- a/src/interfaces/ecpg/preproc/ecpg_keywords.c +++ b/src/interfaces/ecpg/preproc/ecpg_keywords.c @@ -4,7 +4,7 @@ * lexical token lookup for reserved words in postgres embedded SQL * * IDENTIFICATION - * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/ecpg_keywords.c,v 1.40 2009/06/11 14:49:13 momjian Exp $ + * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/ecpg_keywords.c,v 1.41 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -75,79 +75,26 @@ static const ScanKeyword ScanECPGKeywords[] = { {"whenever", SQL_WHENEVER, 0}, }; -/* This is all taken from src/backend/parser/keyword.c and adjusted for our needs. */ -/* - * Do a binary search using plain strcmp() comparison. - */ -const ScanKeyword * -DoLookup(const char *word, const ScanKeyword *low, const ScanKeyword *high) -{ - while (low <= high) - { - const ScanKeyword *middle; - int difference; - - middle = low + (high - low) / 2; - difference = strcmp(middle->name, word); - if (difference == 0) - return middle; - else if (difference < 0) - low = middle + 1; - else - high = middle - 1; - } - - return NULL; -} - /* * ScanECPGKeywordLookup - see if a given word is a keyword * * Returns a pointer to the ScanKeyword table entry, or NULL if no match. - * - * The match is done case-insensitively. Note that we deliberately use a - * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', - * even if we are in a locale where tolower() would produce more or different - * translations. This is to conform to the SQL99 spec, which says that - * keywords are to be matched in this way even though non-keyword identifiers - * receive a different case-normalization mapping. + * Keywords are matched using the same case-folding rules as in the backend. */ const ScanKeyword * ScanECPGKeywordLookup(const char *text) { - int len, - i; - char word[NAMEDATALEN]; const ScanKeyword *res; /* First check SQL symbols defined by the backend. */ - - res = ScanKeywordLookup(text); + res = ScanKeywordLookup(text, ScanKeywords, NumScanKeywords); if (res) return res; - len = strlen(text); - /* We assume all keywords are shorter than NAMEDATALEN. */ - if (len >= NAMEDATALEN) - return NULL; - - /* - * Apply an ASCII-only downcasing. We must not use tolower() since it may - * produce the wrong translation in some locales (eg, Turkish). - */ - for (i = 0; i < len; i++) - { - char ch = text[i]; - - if (ch >= 'A' && ch <= 'Z') - ch += 'a' - 'A'; - word[i] = ch; - } - word[len] = '\0'; - - /* - * Now do a binary search using plain strcmp() comparison. - */ + /* Try ECPG-specific keywords. */ + res = ScanKeywordLookup(text, ScanECPGKeywords, lengthof(ScanECPGKeywords)); + if (res) + return res; - return DoLookup(word, &ScanECPGKeywords[0], endof(ScanECPGKeywords) - 1); + return NULL; } diff --git a/src/interfaces/ecpg/preproc/extern.h b/src/interfaces/ecpg/preproc/extern.h index a7a125c13b..ff80a5f243 100644 --- a/src/interfaces/ecpg/preproc/extern.h +++ b/src/interfaces/ecpg/preproc/extern.h @@ -1,4 +1,4 @@ -/* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/extern.h,v 1.73 2009/06/11 14:49:13 momjian Exp $ */ +/* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/extern.h,v 1.74 2009/07/14 20:24:10 tgl Exp $ */ #ifndef _ECPG_PREPROC_EXTERN_H #define _ECPG_PREPROC_EXTERN_H @@ -101,7 +101,6 @@ extern void remove_variables(int); extern struct variable *new_variable(const char *, struct ECPGtype *, int); extern const ScanKeyword *ScanCKeywordLookup(const char *); extern const ScanKeyword *ScanECPGKeywordLookup(const char *text); -extern const ScanKeyword *DoLookup(const char *, const ScanKeyword *, const ScanKeyword *); extern void scanner_init(const char *); extern void parser_init(void); extern void scanner_finish(void); diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c index 3f47ea1452..0dcac907fa 100644 --- a/src/interfaces/ecpg/preproc/keywords.c +++ b/src/interfaces/ecpg/preproc/keywords.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.88 2009/03/08 16:53:30 alvherre Exp $ + * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.89 2009/07/14 20:24:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,5 +26,4 @@ const ScanKeyword ScanKeywords[] = { #include "parser/kwlist.h" }; -/* End of ScanKeywords, for use in kwlookup.c */ -const ScanKeyword *LastScanKeyword = endof(ScanKeywords); +const int NumScanKeywords = lengthof(ScanKeywords);