From afb0d0712f1a62efe2addd95262cf38e8481e84a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 6 Jan 2019 17:02:57 -0500 Subject: [PATCH] Replace the data structure used for keyword lookup. Previously, ScanKeywordLookup was passed an array of string pointers. This had some performance deficiencies: the strings themselves might be scattered all over the place depending on the compiler (and some quick checking shows that at least with gcc-on-Linux, they indeed weren't reliably close together). That led to very cache-unfriendly behavior as the binary search touched strings in many different pages. Also, depending on the platform, the string pointers might need to be adjusted at program start, so that they couldn't be simple constant data. And the ScanKeyword struct had been designed with an eye to 32-bit machines originally; on 64-bit it requires 16 bytes per keyword, making it even more cache-unfriendly. Redesign so that the keyword strings themselves are allocated consecutively (as part of one big char-string constant), thereby eliminating the touch-lots-of-unrelated-pages syndrome. And get rid of the ScanKeyword array in favor of three separate arrays: uint16 offsets into the keyword array, uint16 token codes, and uint8 keyword categories. That reduces the overhead per keyword to 5 bytes instead of 16 (even less in programs that only need one of the token codes and categories); moreover, the binary search only touches the offsets array, further reducing its cache footprint. This also lets us put the token codes somewhere else than the keyword strings are, which avoids some unpleasant build dependencies. While we're at it, wrap the data used by ScanKeywordLookup into a struct that can be treated as an opaque type by most callers. That doesn't change things much right now, but it will make it less painful to switch to a hash-based lookup method, as is being discussed in the mailing list thread. Most of the change here is associated with adding a generator script that can build the new data structure from the same list-of-PG_KEYWORD header representation we used before. The PG_KEYWORD lists that plpgsql and ecpg used to embed in their scanner .c files have to be moved into headers, and the Makefiles have to be taught to invoke the generator script. This work is also necessary if we're to consider hash-based lookup, since the generator script is what would be responsible for constructing a hash table. Aside from saving a few kilobytes in each program that includes the keyword table, this seems to speed up raw parsing (flex+bison) by a few percent. So it's worth doing even as it stands, though we think we can gain even more with a follow-on patch to switch to hash-based lookup. John Naylor, with further hacking by me Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com --- .../pg_stat_statements/pg_stat_statements.c | 4 +- src/backend/parser/parser.c | 2 +- src/backend/parser/scan.l | 51 ++++-- src/backend/utils/adt/misc.c | 8 +- src/backend/utils/adt/ruleutils.c | 6 +- src/common/.gitignore | 1 + src/common/Makefile | 26 +-- src/common/keywords.c | 99 +---------- src/common/kwlookup.c | 94 ++++++++++ src/fe_utils/string_utils.c | 6 +- src/include/common/keywords.h | 25 +-- src/include/common/kwlookup.h | 40 +++++ src/include/parser/kwlist.h | 2 +- src/include/parser/scanner.h | 13 +- src/interfaces/ecpg/preproc/.gitignore | 2 + src/interfaces/ecpg/preproc/Makefile | 22 ++- src/interfaces/ecpg/preproc/c_keywords.c | 75 ++++---- src/interfaces/ecpg/preproc/c_kwlist.h | 53 ++++++ src/interfaces/ecpg/preproc/ecpg_keywords.c | 84 +++------ src/interfaces/ecpg/preproc/ecpg_kwlist.h | 68 ++++++++ src/interfaces/ecpg/preproc/keywords.c | 16 +- src/interfaces/ecpg/preproc/pgc.l | 29 +-- src/interfaces/ecpg/preproc/preproc_extern.h | 7 +- src/pl/plpgsql/src/.gitignore | 2 + src/pl/plpgsql/src/Makefile | 19 +- src/pl/plpgsql/src/pl_reserved_kwlist.h | 53 ++++++ src/pl/plpgsql/src/pl_scanner.c | 165 +++--------------- src/pl/plpgsql/src/pl_unreserved_kwlist.h | 111 ++++++++++++ src/tools/gen_keywordlist.pl | 156 +++++++++++++++++ src/tools/msvc/Mkvcbuild.pm | 2 +- src/tools/msvc/Solution.pm | 36 ++++ src/tools/msvc/clean.bat | 5 + 32 files changed, 843 insertions(+), 439 deletions(-) create mode 100644 src/common/.gitignore create mode 100644 src/common/kwlookup.c create mode 100644 src/include/common/kwlookup.h create mode 100644 src/interfaces/ecpg/preproc/c_kwlist.h create mode 100644 src/interfaces/ecpg/preproc/ecpg_kwlist.h create mode 100644 src/pl/plpgsql/src/pl_reserved_kwlist.h create mode 100644 src/pl/plpgsql/src/pl_unreserved_kwlist.h create mode 100644 src/tools/gen_keywordlist.pl diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index e8ef966bb5..9131991b83 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -3075,8 +3075,8 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query, /* initialize the flex scanner --- should match raw_parser() */ yyscanner = scanner_init(query, &yyextra, - ScanKeywords, - NumScanKeywords); + &ScanKeywords, + ScanKeywordTokens); /* we don't want to re-emit any escape string warnings */ yyextra.escape_string_warning = false; diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c index 7e9b1222fd..4c0c258cd7 100644 --- a/src/backend/parser/parser.c +++ b/src/backend/parser/parser.c @@ -41,7 +41,7 @@ raw_parser(const char *str) /* initialize the flex scanner */ yyscanner = scanner_init(str, &yyextra.core_yy_extra, - ScanKeywords, NumScanKeywords); + &ScanKeywords, ScanKeywordTokens); /* base_yylex() only needs this much initialization */ yyextra.have_lookahead = false; diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index fbeb86f890..e1cae859e8 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -66,6 +66,21 @@ int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING; bool escape_string_warning = true; bool standard_conforming_strings = true; +/* + * Constant data exported from this file. This array maps from the + * zero-based keyword numbers returned by ScanKeywordLookup to the + * Bison token numbers needed by gram.y. This is exported because + * callers need to pass it to scanner_init, if they are using the + * standard keyword list ScanKeywords. + */ +#define PG_KEYWORD(kwname, value, category) value, + +const uint16 ScanKeywordTokens[] = { +#include "parser/kwlist.h" +}; + +#undef PG_KEYWORD + /* * Set the type of YYSTYPE. */ @@ -504,18 +519,18 @@ other . * We will pass this along as a normal character string, * but preceded with an internally-generated "NCHAR". */ - const ScanKeyword *keyword; + int kwnum; SET_YYLLOC(); yyless(1); /* eat only 'n' this time */ - keyword = ScanKeywordLookup("nchar", - yyextra->keywords, - yyextra->num_keywords); - if (keyword != NULL) + kwnum = ScanKeywordLookup("nchar", + yyextra->keywordlist); + if (kwnum >= 0) { - yylval->keyword = keyword->name; - return keyword->value; + yylval->keyword = GetScanKeyword(kwnum, + yyextra->keywordlist); + return yyextra->keyword_tokens[kwnum]; } else { @@ -1021,19 +1036,19 @@ other . {identifier} { - const ScanKeyword *keyword; + int kwnum; char *ident; SET_YYLLOC(); /* Is it a keyword? */ - keyword = ScanKeywordLookup(yytext, - yyextra->keywords, - yyextra->num_keywords); - if (keyword != NULL) + kwnum = ScanKeywordLookup(yytext, + yyextra->keywordlist); + if (kwnum >= 0) { - yylval->keyword = keyword->name; - return keyword->value; + yylval->keyword = GetScanKeyword(kwnum, + yyextra->keywordlist); + return yyextra->keyword_tokens[kwnum]; } /* @@ -1142,8 +1157,8 @@ scanner_yyerror(const char *message, core_yyscan_t yyscanner) core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, - const ScanKeyword *keywords, - int num_keywords) + const ScanKeywordList *keywordlist, + const uint16 *keyword_tokens) { Size slen = strlen(str); yyscan_t scanner; @@ -1153,8 +1168,8 @@ scanner_init(const char *str, core_yyset_extra(yyext, scanner); - yyext->keywords = keywords; - yyext->num_keywords = num_keywords; + yyext->keywordlist = keywordlist; + yyext->keyword_tokens = keyword_tokens; yyext->backslash_quote = backslash_quote; yyext->escape_string_warning = escape_string_warning; diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index 7b69b824e1..746b7d2fba 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -417,15 +417,17 @@ pg_get_keywords(PG_FUNCTION_ARGS) funcctx = SRF_PERCALL_SETUP(); - if (funcctx->call_cntr < NumScanKeywords) + if (funcctx->call_cntr < ScanKeywords.num_keywords) { char *values[3]; HeapTuple tuple; /* cast-away-const is ugly but alternatives aren't much better */ - values[0] = unconstify(char *, ScanKeywords[funcctx->call_cntr].name); + values[0] = unconstify(char *, + GetScanKeyword(funcctx->call_cntr, + &ScanKeywords)); - switch (ScanKeywords[funcctx->call_cntr].category) + switch (ScanKeywordCategories[funcctx->call_cntr]) { case UNRESERVED_KEYWORD: values[1] = "U"; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 368eacf68e..77811f6818 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -10601,11 +10601,9 @@ quote_identifier(const char *ident) * Note: ScanKeywordLookup() does case-insensitive comparison, but * that's fine, since we already know we have all-lower-case. */ - const ScanKeyword *keyword = ScanKeywordLookup(ident, - ScanKeywords, - NumScanKeywords); + int kwnum = ScanKeywordLookup(ident, &ScanKeywords); - if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD) + if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD) safe = false; } diff --git a/src/common/.gitignore b/src/common/.gitignore new file mode 100644 index 0000000000..ffa3284fbf --- /dev/null +++ b/src/common/.gitignore @@ -0,0 +1 @@ +/kwlist_d.h diff --git a/src/common/Makefile b/src/common/Makefile index ec8139f014..317b071e02 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -41,11 +41,11 @@ override CPPFLAGS += -DVAL_LDFLAGS_EX="\"$(LDFLAGS_EX)\"" override CPPFLAGS += -DVAL_LDFLAGS_SL="\"$(LDFLAGS_SL)\"" override CPPFLAGS += -DVAL_LIBS="\"$(LIBS)\"" -override CPPFLAGS := -DFRONTEND $(CPPFLAGS) +override CPPFLAGS := -DFRONTEND -I. -I$(top_srcdir)/src/common $(CPPFLAGS) LIBS += $(PTHREAD_LIBS) OBJS_COMMON = base64.o config_info.o controldata_utils.o exec.o file_perm.o \ - ip.o keywords.o link-canary.o md5.o pg_lzcompress.o \ + ip.o keywords.o kwlookup.o link-canary.o md5.o pg_lzcompress.o \ pgfnames.o psprintf.o relpath.o \ rmtree.o saslprep.o scram-common.o string.o unicode_norm.o \ username.o wait_error.o @@ -65,6 +65,8 @@ OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o) all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a +distprep: kwlist_d.h + # libpgcommon is needed by some contrib install: all installdirs $(INSTALL_STLIB) libpgcommon.a '$(DESTDIR)$(libdir)/libpgcommon.a' @@ -115,16 +117,18 @@ libpgcommon_srv.a: $(OBJS_SRV) %_srv.o: %.c %.o $(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@ -# Dependencies of keywords.o need to be managed explicitly to make sure -# that you don't get broken parsing code, even in a non-enable-depend build. -# Note that gram.h isn't required for the frontend versions of keywords.o. -$(top_builddir)/src/include/parser/gram.h: $(top_srcdir)/src/backend/parser/gram.y - $(MAKE) -C $(top_builddir)/src/backend $(top_builddir)/src/include/parser/gram.h +# generate SQL keyword lookup table to be included into keywords*.o. +kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(top_srcdir)/src/tools/gen_keywordlist.pl + $(PERL) $(top_srcdir)/src/tools/gen_keywordlist.pl --extern $< -keywords.o: $(top_srcdir)/src/include/parser/kwlist.h -keywords_shlib.o: $(top_srcdir)/src/include/parser/kwlist.h -keywords_srv.o: $(top_builddir)/src/include/parser/gram.h $(top_srcdir)/src/include/parser/kwlist.h +# Dependencies of keywords*.o need to be managed explicitly to make sure +# that you don't get broken parsing code, even in a non-enable-depend build. +keywords.o keywords_shlib.o keywords_srv.o: kwlist_d.h -clean distclean maintainer-clean: +# kwlist_d.h is in the distribution tarball, so it is not cleaned here. +clean distclean: rm -f libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a rm -f $(OBJS_FRONTEND) $(OBJS_SHLIB) $(OBJS_SRV) + +maintainer-clean: distclean + rm -f kwlist_d.h diff --git a/src/common/keywords.c b/src/common/keywords.c index 6f99090a29..84f779feb9 100644 --- a/src/common/keywords.c +++ b/src/common/keywords.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * keywords.c - * lexical token lookup for key words in PostgreSQL + * PostgreSQL's list of SQL keywords * * * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group @@ -13,102 +13,21 @@ * *------------------------------------------------------------------------- */ -#ifndef FRONTEND -#include "postgres.h" -#else -#include "postgres_fe.h" -#endif +#include "c.h" -#ifndef FRONTEND - -#include "parser/gramparse.h" +#include "common/keywords.h" -#define PG_KEYWORD(a,b,c) {a,b,c}, -#else +/* ScanKeywordList lookup data for SQL keywords */ -#include "common/keywords.h" - -/* - * We don't need the token number for frontend uses, so leave it out to avoid - * requiring backend headers that won't compile cleanly here. - */ -#define PG_KEYWORD(a,b,c) {a,0,c}, +#include "kwlist_d.h" -#endif /* FRONTEND */ +/* Keyword categories for SQL keywords */ +#define PG_KEYWORD(kwname, value, category) category, -const ScanKeyword ScanKeywords[] = { +const uint8 ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS] = { #include "parser/kwlist.h" }; -const int NumScanKeywords = lengthof(ScanKeywords); - - -/* - * ScanKeywordLookup - see if a given word is a keyword - * - * The table to be searched is passed explicitly, so that this can be used - * to search keyword lists other than the standard list appearing above. - * - * Returns a pointer to the ScanKeyword table entry, or NULL if no match. - * - * The match is done case-insensitively. Note that we deliberately use a - * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', - * even if we are in a locale where tolower() would produce more or different - * translations. This is to conform to the SQL99 spec, which says that - * keywords are to be matched in this way even though non-keyword identifiers - * receive a different case-normalization mapping. - */ -const ScanKeyword * -ScanKeywordLookup(const char *text, - const ScanKeyword *keywords, - int num_keywords) -{ - int len, - i; - char word[NAMEDATALEN]; - const ScanKeyword *low; - const ScanKeyword *high; - - len = strlen(text); - /* We assume all keywords are shorter than NAMEDATALEN. */ - if (len >= NAMEDATALEN) - return NULL; - - /* - * Apply an ASCII-only downcasing. We must not use tolower() since it may - * produce the wrong translation in some locales (eg, Turkish). - */ - for (i = 0; i < len; i++) - { - char ch = text[i]; - - if (ch >= 'A' && ch <= 'Z') - ch += 'a' - 'A'; - word[i] = ch; - } - word[len] = '\0'; - - /* - * Now do a binary search using plain strcmp() comparison. - */ - low = keywords; - high = keywords + (num_keywords - 1); - while (low <= high) - { - const ScanKeyword *middle; - int difference; - - middle = low + (high - low) / 2; - difference = strcmp(middle->name, word); - if (difference == 0) - return middle; - else if (difference < 0) - low = middle + 1; - else - high = middle - 1; - } - - return NULL; -} +#undef PG_KEYWORD diff --git a/src/common/kwlookup.c b/src/common/kwlookup.c new file mode 100644 index 0000000000..d72842e759 --- /dev/null +++ b/src/common/kwlookup.c @@ -0,0 +1,94 @@ +/*------------------------------------------------------------------------- + * + * kwlookup.c + * Key word lookup for PostgreSQL + * + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/kwlookup.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/kwlookup.h" + + +/* + * ScanKeywordLookup - see if a given word is a keyword + * + * The list of keywords to be matched against is passed as a ScanKeywordList. + * + * Returns the keyword number (0..N-1) of the keyword, or -1 if no match. + * Callers typically use the keyword number to index into information + * arrays, but that is no concern of this code. + * + * The match is done case-insensitively. Note that we deliberately use a + * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', + * even if we are in a locale where tolower() would produce more or different + * translations. This is to conform to the SQL99 spec, which says that + * keywords are to be matched in this way even though non-keyword identifiers + * receive a different case-normalization mapping. + */ +int +ScanKeywordLookup(const char *text, + const ScanKeywordList *keywords) +{ + int len, + i; + char word[NAMEDATALEN]; + const char *kw_string; + const uint16 *kw_offsets; + const uint16 *low; + const uint16 *high; + + len = strlen(text); + + if (len > keywords->max_kw_len) + return -1; /* too long to be any keyword */ + + /* We assume all keywords are shorter than NAMEDATALEN. */ + Assert(len < NAMEDATALEN); + + /* + * Apply an ASCII-only downcasing. We must not use tolower() since it may + * produce the wrong translation in some locales (eg, Turkish). + */ + for (i = 0; i < len; i++) + { + char ch = text[i]; + + if (ch >= 'A' && ch <= 'Z') + ch += 'a' - 'A'; + word[i] = ch; + } + word[len] = '\0'; + + /* + * Now do a binary search using plain strcmp() comparison. + */ + kw_string = keywords->kw_string; + kw_offsets = keywords->kw_offsets; + low = kw_offsets; + high = kw_offsets + (keywords->num_keywords - 1); + while (low <= high) + { + const uint16 *middle; + int difference; + + middle = low + (high - low) / 2; + difference = strcmp(kw_string + *middle, word); + if (difference == 0) + return middle - kw_offsets; + else if (difference < 0) + low = middle + 1; + else + high = middle - 1; + } + + return -1; +} diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c index 9b47b62f41..5c1732aabe 100644 --- a/src/fe_utils/string_utils.c +++ b/src/fe_utils/string_utils.c @@ -104,11 +104,9 @@ fmtId(const char *rawid) * Note: ScanKeywordLookup() does case-insensitive comparison, but * that's fine, since we already know we have all-lower-case. */ - const ScanKeyword *keyword = ScanKeywordLookup(rawid, - ScanKeywords, - NumScanKeywords); + int kwnum = ScanKeywordLookup(rawid, &ScanKeywords); - if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD) + if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD) need_quotes = true; } diff --git a/src/include/common/keywords.h b/src/include/common/keywords.h index 8f22f32548..fb18858a53 100644 --- a/src/include/common/keywords.h +++ b/src/include/common/keywords.h @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * keywords.h - * lexical token lookup for key words in PostgreSQL + * PostgreSQL's list of SQL keywords * * * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group @@ -14,31 +14,20 @@ #ifndef KEYWORDS_H #define KEYWORDS_H +#include "common/kwlookup.h" + /* Keyword categories --- should match lists in gram.y */ #define UNRESERVED_KEYWORD 0 #define COL_NAME_KEYWORD 1 #define TYPE_FUNC_NAME_KEYWORD 2 #define RESERVED_KEYWORD 3 - -typedef struct ScanKeyword -{ - const char *name; /* in lower case */ - int16 value; /* grammar's token code */ - int16 category; /* see codes above */ -} ScanKeyword; - #ifndef FRONTEND -extern PGDLLIMPORT const ScanKeyword ScanKeywords[]; -extern PGDLLIMPORT const int NumScanKeywords; +extern PGDLLIMPORT const ScanKeywordList ScanKeywords; +extern PGDLLIMPORT const uint8 ScanKeywordCategories[]; #else -extern const ScanKeyword ScanKeywords[]; -extern const int NumScanKeywords; +extern const ScanKeywordList ScanKeywords; +extern const uint8 ScanKeywordCategories[]; #endif - -extern const ScanKeyword *ScanKeywordLookup(const char *text, - const ScanKeyword *keywords, - int num_keywords); - #endif /* KEYWORDS_H */ diff --git a/src/include/common/kwlookup.h b/src/include/common/kwlookup.h new file mode 100644 index 0000000000..39efb3503f --- /dev/null +++ b/src/include/common/kwlookup.h @@ -0,0 +1,40 @@ +/*------------------------------------------------------------------------- + * + * kwlookup.h + * Key word lookup for PostgreSQL + * + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/common/kwlookup.h + * + *------------------------------------------------------------------------- + */ +#ifndef KWLOOKUP_H +#define KWLOOKUP_H + +/* + * This struct contains the data needed by ScanKeywordLookup to perform a + * search within a set of keywords. The contents are typically generated by + * src/tools/gen_keywordlist.pl from a header containing PG_KEYWORD macros. + */ +typedef struct ScanKeywordList +{ + const char *kw_string; /* all keywords in order, separated by \0 */ + const uint16 *kw_offsets; /* offsets to the start of each keyword */ + int num_keywords; /* number of keywords */ + int max_kw_len; /* length of longest keyword */ +} ScanKeywordList; + + +extern int ScanKeywordLookup(const char *text, const ScanKeywordList *keywords); + +/* Code that wants to retrieve the text of the N'th keyword should use this. */ +static inline const char * +GetScanKeyword(int n, const ScanKeywordList *keywords) +{ + return keywords->kw_string + keywords->kw_offsets[n]; +} + +#endif /* KWLOOKUP_H */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 0256d53998..b8902d3403 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -2,7 +2,7 @@ * * kwlist.h * - * The keyword list is kept in its own source file for possible use by + * The keyword lists are kept in their own source files for use by * automatic tools. The exact representation of a keyword is determined * by the PG_KEYWORD macro, which is not defined in this file; it can * be defined by the caller for special purposes. diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h index 009550f424..91e1c836d2 100644 --- a/src/include/parser/scanner.h +++ b/src/include/parser/scanner.h @@ -73,10 +73,10 @@ typedef struct core_yy_extra_type Size scanbuflen; /* - * The keyword list to use. + * The keyword list to use, and the associated grammar token codes. */ - const ScanKeyword *keywords; - int num_keywords; + const ScanKeywordList *keywordlist; + const uint16 *keyword_tokens; /* * Scanner settings to use. These are initialized from the corresponding @@ -116,11 +116,14 @@ typedef struct core_yy_extra_type typedef void *core_yyscan_t; +/* Constant data exported from parser/scan.l */ +extern PGDLLIMPORT const uint16 ScanKeywordTokens[]; + /* Entry points in parser/scan.l */ extern core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, - const ScanKeyword *keywords, - int num_keywords); + const ScanKeywordList *keywordlist, + const uint16 *keyword_tokens); extern void scanner_finish(core_yyscan_t yyscanner); extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner); diff --git a/src/interfaces/ecpg/preproc/.gitignore b/src/interfaces/ecpg/preproc/.gitignore index 38ae2fe4d9..958a826f9e 100644 --- a/src/interfaces/ecpg/preproc/.gitignore +++ b/src/interfaces/ecpg/preproc/.gitignore @@ -2,5 +2,7 @@ /preproc.c /preproc.h /pgc.c +/c_kwlist_d.h +/ecpg_kwlist_d.h /typename.c /ecpg diff --git a/src/interfaces/ecpg/preproc/Makefile b/src/interfaces/ecpg/preproc/Makefile index 69ddd8e9f7..b5b74a3b81 100644 --- a/src/interfaces/ecpg/preproc/Makefile +++ b/src/interfaces/ecpg/preproc/Makefile @@ -28,6 +28,8 @@ OBJS= preproc.o pgc.o type.o ecpg.o output.o parser.o \ keywords.o c_keywords.o ecpg_keywords.o typename.o descriptor.o variable.o \ $(WIN32RES) +GEN_KEYWORDLIST = $(top_srcdir)/src/tools/gen_keywordlist.pl + # Suppress parallel build to avoid a bug in GNU make 3.82 # (see comments in ../Makefile) ifeq ($(MAKE_VERSION),3.82) @@ -53,9 +55,20 @@ preproc.y: ../../../backend/parser/gram.y parse.pl ecpg.addons ecpg.header ecpg. $(PERL) $(srcdir)/parse.pl $(srcdir) < $< > $@ $(PERL) $(srcdir)/check_rules.pl $(srcdir) $< +# generate keyword headers +c_kwlist_d.h: c_kwlist.h $(GEN_KEYWORDLIST) + $(PERL) $(GEN_KEYWORDLIST) --varname ScanCKeywords $< + +ecpg_kwlist_d.h: ecpg_kwlist.h $(GEN_KEYWORDLIST) + $(PERL) $(GEN_KEYWORDLIST) --varname ScanECPGKeywords $< + +# Force these dependencies to be known even without dependency info built: ecpg_keywords.o c_keywords.o keywords.o preproc.o pgc.o parser.o: preproc.h +ecpg_keywords.o: ecpg_kwlist_d.h +c_keywords.o: c_kwlist_d.h +keywords.o: $(top_srcdir)/src/include/parser/kwlist.h -distprep: preproc.y preproc.c preproc.h pgc.c +distprep: preproc.y preproc.c preproc.h pgc.c c_kwlist_d.h ecpg_kwlist_d.h install: all installdirs $(INSTALL_PROGRAM) ecpg$(X) '$(DESTDIR)$(bindir)' @@ -66,12 +79,11 @@ installdirs: uninstall: rm -f '$(DESTDIR)$(bindir)/ecpg$(X)' +# preproc.y, preproc.c, preproc.h, pgc.c, c_kwlist_d.h, and ecpg_kwlist_d.h +# are in the distribution tarball, so they are not cleaned here. clean distclean: rm -f *.o ecpg$(X) rm -f typename.c -# `make distclean' must not remove preproc.y, preproc.c, preproc.h, or pgc.c -# since we want to ship those files in the distribution for people with -# inadequate tools. Instead, `make maintainer-clean' will remove them. maintainer-clean: distclean - rm -f preproc.y preproc.c preproc.h pgc.c + rm -f preproc.y preproc.c preproc.h pgc.c c_kwlist_d.h ecpg_kwlist_d.h diff --git a/src/interfaces/ecpg/preproc/c_keywords.c b/src/interfaces/ecpg/preproc/c_keywords.c index c367dbfc20..38ddf6f135 100644 --- a/src/interfaces/ecpg/preproc/c_keywords.c +++ b/src/interfaces/ecpg/preproc/c_keywords.c @@ -14,72 +14,57 @@ #include "preproc_extern.h" #include "preproc.h" -/* - * List of (keyword-name, keyword-token-value) pairs. - * - * !!WARNING!!: This list must be sorted, because binary - * search is used to locate entries. - */ -static const ScanKeyword ScanCKeywords[] = { - /* name, value, category */ +/* ScanKeywordList lookup data for C keywords */ +#include "c_kwlist_d.h" - /* - * category is not needed in ecpg, it is only here so we can share the - * data structure with the backend - */ - {"VARCHAR", VARCHAR, 0}, - {"auto", S_AUTO, 0}, - {"bool", SQL_BOOL, 0}, - {"char", CHAR_P, 0}, - {"const", S_CONST, 0}, - {"enum", ENUM_P, 0}, - {"extern", S_EXTERN, 0}, - {"float", FLOAT_P, 0}, - {"hour", HOUR_P, 0}, - {"int", INT_P, 0}, - {"long", SQL_LONG, 0}, - {"minute", MINUTE_P, 0}, - {"month", MONTH_P, 0}, - {"register", S_REGISTER, 0}, - {"second", SECOND_P, 0}, - {"short", SQL_SHORT, 0}, - {"signed", SQL_SIGNED, 0}, - {"static", S_STATIC, 0}, - {"struct", SQL_STRUCT, 0}, - {"to", TO, 0}, - {"typedef", S_TYPEDEF, 0}, - {"union", UNION, 0}, - {"unsigned", SQL_UNSIGNED, 0}, - {"varchar", VARCHAR, 0}, - {"volatile", S_VOLATILE, 0}, - {"year", YEAR_P, 0}, +/* Token codes for C keywords */ +#define PG_KEYWORD(kwname, value) value, + +static const uint16 ScanCKeywordTokens[] = { +#include "c_kwlist.h" }; +#undef PG_KEYWORD + /* + * ScanCKeywordLookup - see if a given word is a keyword + * + * Returns the token value of the keyword, or -1 if no match. + * * Do a binary search using plain strcmp() comparison. This is much like * ScanKeywordLookup(), except we want case-sensitive matching. */ -const ScanKeyword * +int ScanCKeywordLookup(const char *text) { - const ScanKeyword *low = &ScanCKeywords[0]; - const ScanKeyword *high = &ScanCKeywords[lengthof(ScanCKeywords) - 1]; + const char *kw_string; + const uint16 *kw_offsets; + const uint16 *low; + const uint16 *high; + + if (strlen(text) > ScanCKeywords.max_kw_len) + return -1; /* too long to be any keyword */ + + kw_string = ScanCKeywords.kw_string; + kw_offsets = ScanCKeywords.kw_offsets; + low = kw_offsets; + high = kw_offsets + (ScanCKeywords.num_keywords - 1); while (low <= high) { - const ScanKeyword *middle; + const uint16 *middle; int difference; middle = low + (high - low) / 2; - difference = strcmp(middle->name, text); + difference = strcmp(kw_string + *middle, text); if (difference == 0) - return middle; + return ScanCKeywordTokens[middle - kw_offsets]; else if (difference < 0) low = middle + 1; else high = middle - 1; } - return NULL; + return -1; } diff --git a/src/interfaces/ecpg/preproc/c_kwlist.h b/src/interfaces/ecpg/preproc/c_kwlist.h new file mode 100644 index 0000000000..4545505298 --- /dev/null +++ b/src/interfaces/ecpg/preproc/c_kwlist.h @@ -0,0 +1,53 @@ +/*------------------------------------------------------------------------- + * + * c_kwlist.h + * + * The keyword lists are kept in their own source files for use by + * automatic tools. The exact representation of a keyword is determined + * by the PG_KEYWORD macro, which is not defined in this file; it can + * be defined by the caller for special purposes. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/interfaces/ecpg/preproc/c_kwlist.h + * + *------------------------------------------------------------------------- + */ + +/* There is deliberately not an #ifndef C_KWLIST_H here. */ + +/* + * List of (keyword-name, keyword-token-value) pairs. + * + * !!WARNING!!: This list must be sorted by ASCII name, because binary + * search is used to locate entries. + */ + +/* name, value */ +PG_KEYWORD("VARCHAR", VARCHAR) +PG_KEYWORD("auto", S_AUTO) +PG_KEYWORD("bool", SQL_BOOL) +PG_KEYWORD("char", CHAR_P) +PG_KEYWORD("const", S_CONST) +PG_KEYWORD("enum", ENUM_P) +PG_KEYWORD("extern", S_EXTERN) +PG_KEYWORD("float", FLOAT_P) +PG_KEYWORD("hour", HOUR_P) +PG_KEYWORD("int", INT_P) +PG_KEYWORD("long", SQL_LONG) +PG_KEYWORD("minute", MINUTE_P) +PG_KEYWORD("month", MONTH_P) +PG_KEYWORD("register", S_REGISTER) +PG_KEYWORD("second", SECOND_P) +PG_KEYWORD("short", SQL_SHORT) +PG_KEYWORD("signed", SQL_SIGNED) +PG_KEYWORD("static", S_STATIC) +PG_KEYWORD("struct", SQL_STRUCT) +PG_KEYWORD("to", TO) +PG_KEYWORD("typedef", S_TYPEDEF) +PG_KEYWORD("union", UNION) +PG_KEYWORD("unsigned", SQL_UNSIGNED) +PG_KEYWORD("varchar", VARCHAR) +PG_KEYWORD("volatile", S_VOLATILE) +PG_KEYWORD("year", YEAR_P) diff --git a/src/interfaces/ecpg/preproc/ecpg_keywords.c b/src/interfaces/ecpg/preproc/ecpg_keywords.c index 37c97e162d..4839c37bbc 100644 --- a/src/interfaces/ecpg/preproc/ecpg_keywords.c +++ b/src/interfaces/ecpg/preproc/ecpg_keywords.c @@ -16,82 +16,40 @@ #include "preproc_extern.h" #include "preproc.h" -/* - * List of (keyword-name, keyword-token-value) pairs. - * - * !!WARNING!!: This list must be sorted, because binary - * search is used to locate entries. - */ -static const ScanKeyword ECPGScanKeywords[] = { - /* name, value, category */ +/* ScanKeywordList lookup data for ECPG keywords */ +#include "ecpg_kwlist_d.h" + +/* Token codes for ECPG keywords */ +#define PG_KEYWORD(kwname, value) value, - /* - * category is not needed in ecpg, it is only here so we can share the - * data structure with the backend - */ - {"allocate", SQL_ALLOCATE, 0}, - {"autocommit", SQL_AUTOCOMMIT, 0}, - {"bool", SQL_BOOL, 0}, - {"break", SQL_BREAK, 0}, - {"cardinality", SQL_CARDINALITY, 0}, - {"connect", SQL_CONNECT, 0}, - {"count", SQL_COUNT, 0}, - {"datetime_interval_code", SQL_DATETIME_INTERVAL_CODE, 0}, - {"datetime_interval_precision", SQL_DATETIME_INTERVAL_PRECISION, 0}, - {"describe", SQL_DESCRIBE, 0}, - {"descriptor", SQL_DESCRIPTOR, 0}, - {"disconnect", SQL_DISCONNECT, 0}, - {"found", SQL_FOUND, 0}, - {"free", SQL_FREE, 0}, - {"get", SQL_GET, 0}, - {"go", SQL_GO, 0}, - {"goto", SQL_GOTO, 0}, - {"identified", SQL_IDENTIFIED, 0}, - {"indicator", SQL_INDICATOR, 0}, - {"key_member", SQL_KEY_MEMBER, 0}, - {"length", SQL_LENGTH, 0}, - {"long", SQL_LONG, 0}, - {"nullable", SQL_NULLABLE, 0}, - {"octet_length", SQL_OCTET_LENGTH, 0}, - {"open", SQL_OPEN, 0}, - {"output", SQL_OUTPUT, 0}, - {"reference", SQL_REFERENCE, 0}, - {"returned_length", SQL_RETURNED_LENGTH, 0}, - {"returned_octet_length", SQL_RETURNED_OCTET_LENGTH, 0}, - {"scale", SQL_SCALE, 0}, - {"section", SQL_SECTION, 0}, - {"short", SQL_SHORT, 0}, - {"signed", SQL_SIGNED, 0}, - {"sqlerror", SQL_SQLERROR, 0}, - {"sqlprint", SQL_SQLPRINT, 0}, - {"sqlwarning", SQL_SQLWARNING, 0}, - {"stop", SQL_STOP, 0}, - {"struct", SQL_STRUCT, 0}, - {"unsigned", SQL_UNSIGNED, 0}, - {"var", SQL_VAR, 0}, - {"whenever", SQL_WHENEVER, 0}, +static const uint16 ECPGScanKeywordTokens[] = { +#include "ecpg_kwlist.h" }; +#undef PG_KEYWORD + + /* * ScanECPGKeywordLookup - see if a given word is a keyword * - * Returns a pointer to the ScanKeyword table entry, or NULL if no match. + * Returns the token value of the keyword, or -1 if no match. + * * Keywords are matched using the same case-folding rules as in the backend. */ -const ScanKeyword * +int ScanECPGKeywordLookup(const char *text) { - const ScanKeyword *res; + int kwnum; /* First check SQL symbols defined by the backend. */ - res = ScanKeywordLookup(text, SQLScanKeywords, NumSQLScanKeywords); - if (res) - return res; + kwnum = ScanKeywordLookup(text, &ScanKeywords); + if (kwnum >= 0) + return SQLScanKeywordTokens[kwnum]; /* Try ECPG-specific keywords. */ - res = ScanKeywordLookup(text, ECPGScanKeywords, lengthof(ECPGScanKeywords)); - if (res) - return res; + kwnum = ScanKeywordLookup(text, &ScanECPGKeywords); + if (kwnum >= 0) + return ECPGScanKeywordTokens[kwnum]; - return NULL; + return -1; } diff --git a/src/interfaces/ecpg/preproc/ecpg_kwlist.h b/src/interfaces/ecpg/preproc/ecpg_kwlist.h new file mode 100644 index 0000000000..97ef254166 --- /dev/null +++ b/src/interfaces/ecpg/preproc/ecpg_kwlist.h @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * ecpg_kwlist.h + * + * The keyword lists are kept in their own source files for use by + * automatic tools. The exact representation of a keyword is determined + * by the PG_KEYWORD macro, which is not defined in this file; it can + * be defined by the caller for special purposes. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/interfaces/ecpg/preproc/ecpg_kwlist.h + * + *------------------------------------------------------------------------- + */ + +/* There is deliberately not an #ifndef ECPG_KWLIST_H here. */ + +/* + * List of (keyword-name, keyword-token-value) pairs. + * + * !!WARNING!!: This list must be sorted by ASCII name, because binary + * search is used to locate entries. + */ + +/* name, value */ +PG_KEYWORD("allocate", SQL_ALLOCATE) +PG_KEYWORD("autocommit", SQL_AUTOCOMMIT) +PG_KEYWORD("bool", SQL_BOOL) +PG_KEYWORD("break", SQL_BREAK) +PG_KEYWORD("cardinality", SQL_CARDINALITY) +PG_KEYWORD("connect", SQL_CONNECT) +PG_KEYWORD("count", SQL_COUNT) +PG_KEYWORD("datetime_interval_code", SQL_DATETIME_INTERVAL_CODE) +PG_KEYWORD("datetime_interval_precision", SQL_DATETIME_INTERVAL_PRECISION) +PG_KEYWORD("describe", SQL_DESCRIBE) +PG_KEYWORD("descriptor", SQL_DESCRIPTOR) +PG_KEYWORD("disconnect", SQL_DISCONNECT) +PG_KEYWORD("found", SQL_FOUND) +PG_KEYWORD("free", SQL_FREE) +PG_KEYWORD("get", SQL_GET) +PG_KEYWORD("go", SQL_GO) +PG_KEYWORD("goto", SQL_GOTO) +PG_KEYWORD("identified", SQL_IDENTIFIED) +PG_KEYWORD("indicator", SQL_INDICATOR) +PG_KEYWORD("key_member", SQL_KEY_MEMBER) +PG_KEYWORD("length", SQL_LENGTH) +PG_KEYWORD("long", SQL_LONG) +PG_KEYWORD("nullable", SQL_NULLABLE) +PG_KEYWORD("octet_length", SQL_OCTET_LENGTH) +PG_KEYWORD("open", SQL_OPEN) +PG_KEYWORD("output", SQL_OUTPUT) +PG_KEYWORD("reference", SQL_REFERENCE) +PG_KEYWORD("returned_length", SQL_RETURNED_LENGTH) +PG_KEYWORD("returned_octet_length", SQL_RETURNED_OCTET_LENGTH) +PG_KEYWORD("scale", SQL_SCALE) +PG_KEYWORD("section", SQL_SECTION) +PG_KEYWORD("short", SQL_SHORT) +PG_KEYWORD("signed", SQL_SIGNED) +PG_KEYWORD("sqlerror", SQL_SQLERROR) +PG_KEYWORD("sqlprint", SQL_SQLPRINT) +PG_KEYWORD("sqlwarning", SQL_SQLWARNING) +PG_KEYWORD("stop", SQL_STOP) +PG_KEYWORD("struct", SQL_STRUCT) +PG_KEYWORD("unsigned", SQL_UNSIGNED) +PG_KEYWORD("var", SQL_VAR) +PG_KEYWORD("whenever", SQL_WHENEVER) diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c index 12409e9805..03804099d1 100644 --- a/src/interfaces/ecpg/preproc/keywords.c +++ b/src/interfaces/ecpg/preproc/keywords.c @@ -17,24 +17,22 @@ /* * This is much trickier than it looks. We are #include'ing kwlist.h - * but the "value" numbers that go into the table are from preproc.h - * not the backend's gram.h. Therefore this table will recognize all - * keywords known to the backend, but will supply the token numbers used + * but the token numbers that go into the table are from preproc.h + * not the backend's gram.h. Therefore this token table will match + * the ScanKeywords table supplied from common/keywords.c, including all + * keywords known to the backend, but it will supply the token numbers used * by ecpg's grammar, which is what we need. The ecpg grammar must * define all the same token names the backend does, else we'll get * undefined-symbol failures in this compile. */ -#include "common/keywords.h" - #include "preproc_extern.h" #include "preproc.h" +#define PG_KEYWORD(kwname, value, category) value, -#define PG_KEYWORD(a,b,c) {a,b,c}, - -const ScanKeyword SQLScanKeywords[] = { +const uint16 SQLScanKeywordTokens[] = { #include "parser/kwlist.h" }; -const int NumSQLScanKeywords = lengthof(SQLScanKeywords); +#undef PG_KEYWORD diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index a60564c690..3131f5f147 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -920,19 +920,19 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ } {identifier} { - const ScanKeyword *keyword; - if (!isdefine()) { + int kwvalue; + /* Is it an SQL/ECPG keyword? */ - keyword = ScanECPGKeywordLookup(yytext); - if (keyword != NULL) - return keyword->value; + kwvalue = ScanECPGKeywordLookup(yytext); + if (kwvalue >= 0) + return kwvalue; /* Is it a C keyword? */ - keyword = ScanCKeywordLookup(yytext); - if (keyword != NULL) - return keyword->value; + kwvalue = ScanCKeywordLookup(yytext); + if (kwvalue >= 0) + return kwvalue; /* * None of the above. Return it as an identifier. @@ -1010,12 +1010,11 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ return CPP_LINE; } {identifier} { - const ScanKeyword *keyword; - /* * Try to detect a function name: * look for identifiers at the global scope - * keep the last identifier before the first '(' and '{' */ + * keep the last identifier before the first '(' and '{' + */ if (braces_open == 0 && parenths_open == 0) { if (current_function) @@ -1026,9 +1025,11 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ /* however, some defines have to be taken care of for compatibility */ if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine()) { - keyword = ScanCKeywordLookup(yytext); - if (keyword != NULL) - return keyword->value; + int kwvalue; + + kwvalue = ScanCKeywordLookup(yytext); + if (kwvalue >= 0) + return kwvalue; else { base_yylval.str = mm_strdup(yytext); diff --git a/src/interfaces/ecpg/preproc/preproc_extern.h b/src/interfaces/ecpg/preproc/preproc_extern.h index 13eda670ff..97467800dc 100644 --- a/src/interfaces/ecpg/preproc/preproc_extern.h +++ b/src/interfaces/ecpg/preproc/preproc_extern.h @@ -59,8 +59,7 @@ extern struct when when_error, extern struct ECPGstruct_member *struct_member_list[STRUCT_DEPTH]; /* Globals from keywords.c */ -extern const ScanKeyword SQLScanKeywords[]; -extern const int NumSQLScanKeywords; +extern const uint16 SQLScanKeywordTokens[]; /* functions */ @@ -102,8 +101,8 @@ extern void check_indicator(struct ECPGtype *); extern void remove_typedefs(int); extern void remove_variables(int); extern struct variable *new_variable(const char *, struct ECPGtype *, int); -extern const ScanKeyword *ScanCKeywordLookup(const char *); -extern const ScanKeyword *ScanECPGKeywordLookup(const char *text); +extern int ScanCKeywordLookup(const char *text); +extern int ScanECPGKeywordLookup(const char *text); extern void parser_init(void); extern int filtered_base_yylex(void); diff --git a/src/pl/plpgsql/src/.gitignore b/src/pl/plpgsql/src/.gitignore index ff6ac965fd..3ab9a2243c 100644 --- a/src/pl/plpgsql/src/.gitignore +++ b/src/pl/plpgsql/src/.gitignore @@ -1,5 +1,7 @@ /pl_gram.c /pl_gram.h +/pl_reserved_kwlist_d.h +/pl_unreserved_kwlist_d.h /plerrcodes.h /log/ /results/ diff --git a/src/pl/plpgsql/src/Makefile b/src/pl/plpgsql/src/Makefile index 25a5a9d448..9dd4a74c34 100644 --- a/src/pl/plpgsql/src/Makefile +++ b/src/pl/plpgsql/src/Makefile @@ -29,6 +29,8 @@ REGRESS_OPTS = --dbname=$(PL_TESTDB) REGRESS = plpgsql_call plpgsql_control plpgsql_domain plpgsql_record \ plpgsql_cache plpgsql_transaction plpgsql_varprops +GEN_KEYWORDLIST = $(top_srcdir)/src/tools/gen_keywordlist.pl + all: all-lib # Shared library stuff @@ -61,6 +63,7 @@ uninstall-headers: # Force these dependencies to be known even without dependency info built: pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o pl_scanner.o: plpgsql.h pl_gram.h plerrcodes.h +pl_scanner.o: pl_reserved_kwlist_d.h pl_unreserved_kwlist_d.h # See notes in src/backend/parser/Makefile about the following two rules pl_gram.h: pl_gram.c @@ -72,6 +75,13 @@ pl_gram.c: BISONFLAGS += -d plerrcodes.h: $(top_srcdir)/src/backend/utils/errcodes.txt generate-plerrcodes.pl $(PERL) $(srcdir)/generate-plerrcodes.pl $< > $@ +# generate keyword headers for the scanner +pl_reserved_kwlist_d.h: pl_reserved_kwlist.h $(GEN_KEYWORDLIST) + $(PERL) $(GEN_KEYWORDLIST) --varname ReservedPLKeywords $< + +pl_unreserved_kwlist_d.h: pl_unreserved_kwlist.h $(GEN_KEYWORDLIST) + $(PERL) $(GEN_KEYWORDLIST) --varname UnreservedPLKeywords $< + check: submake $(pg_regress_check) $(REGRESS_OPTS) $(REGRESS) @@ -84,13 +94,14 @@ submake: $(MAKE) -C $(top_builddir)/src/test/regress pg_regress$(X) -distprep: pl_gram.h pl_gram.c plerrcodes.h +distprep: pl_gram.h pl_gram.c plerrcodes.h pl_reserved_kwlist_d.h pl_unreserved_kwlist_d.h -# pl_gram.c, pl_gram.h and plerrcodes.h are in the distribution tarball, -# so they are not cleaned here. +# pl_gram.c, pl_gram.h, plerrcodes.h, pl_reserved_kwlist_d.h, and +# pl_unreserved_kwlist_d.h are in the distribution tarball, so they +# are not cleaned here. clean distclean: clean-lib rm -f $(OBJS) rm -rf $(pg_regress_clean_files) maintainer-clean: distclean - rm -f pl_gram.c pl_gram.h plerrcodes.h + rm -f pl_gram.c pl_gram.h plerrcodes.h pl_reserved_kwlist_d.h pl_unreserved_kwlist_d.h diff --git a/src/pl/plpgsql/src/pl_reserved_kwlist.h b/src/pl/plpgsql/src/pl_reserved_kwlist.h new file mode 100644 index 0000000000..5c2e0c1c4b --- /dev/null +++ b/src/pl/plpgsql/src/pl_reserved_kwlist.h @@ -0,0 +1,53 @@ +/*------------------------------------------------------------------------- + * + * pl_reserved_kwlist.h + * + * The keyword lists are kept in their own source files for use by + * automatic tools. The exact representation of a keyword is determined + * by the PG_KEYWORD macro, which is not defined in this file; it can + * be defined by the caller for special purposes. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/pl/plpgsql/src/pl_reserved_kwlist.h + * + *------------------------------------------------------------------------- + */ + +/* There is deliberately not an #ifndef PL_RESERVED_KWLIST_H here. */ + +/* + * List of (keyword-name, keyword-token-value) pairs. + * + * Be careful not to put the same word in both lists. + * + * !!WARNING!!: This list must be sorted by ASCII name, because binary + * search is used to locate entries. + */ + +/* name, value */ +PG_KEYWORD("all", K_ALL) +PG_KEYWORD("begin", K_BEGIN) +PG_KEYWORD("by", K_BY) +PG_KEYWORD("case", K_CASE) +PG_KEYWORD("declare", K_DECLARE) +PG_KEYWORD("else", K_ELSE) +PG_KEYWORD("end", K_END) +PG_KEYWORD("execute", K_EXECUTE) +PG_KEYWORD("for", K_FOR) +PG_KEYWORD("foreach", K_FOREACH) +PG_KEYWORD("from", K_FROM) +PG_KEYWORD("if", K_IF) +PG_KEYWORD("in", K_IN) +PG_KEYWORD("into", K_INTO) +PG_KEYWORD("loop", K_LOOP) +PG_KEYWORD("not", K_NOT) +PG_KEYWORD("null", K_NULL) +PG_KEYWORD("or", K_OR) +PG_KEYWORD("strict", K_STRICT) +PG_KEYWORD("then", K_THEN) +PG_KEYWORD("to", K_TO) +PG_KEYWORD("using", K_USING) +PG_KEYWORD("when", K_WHEN) +PG_KEYWORD("while", K_WHILE) diff --git a/src/pl/plpgsql/src/pl_scanner.c b/src/pl/plpgsql/src/pl_scanner.c index 8340628de3..c260438d7d 100644 --- a/src/pl/plpgsql/src/pl_scanner.c +++ b/src/pl/plpgsql/src/pl_scanner.c @@ -22,16 +22,15 @@ #include "pl_gram.h" /* must be after parser/scanner.h */ -#define PG_KEYWORD(a,b,c) {a,b,c}, - - /* Klugy flag to tell scanner how to look up identifiers */ IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL; /* * A word about keywords: * - * We keep reserved and unreserved keywords in separate arrays. The + * We keep reserved and unreserved keywords in separate headers. Be careful + * not to put the same word in both headers. Also be sure that pl_gram.y's + * unreserved_keyword production agrees with the unreserved header. The * reserved keywords are passed to the core scanner, so they will be * recognized before (and instead of) any variable name. Unreserved words * are checked for separately, usually after determining that the identifier @@ -57,130 +56,22 @@ IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL; * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE */ -/* - * Lists of keyword (name, token-value, category) entries. - * - * !!WARNING!!: These lists must be sorted by ASCII name, because binary - * search is used to locate entries. - * - * Be careful not to put the same word in both lists. Also be sure that - * pl_gram.y's unreserved_keyword production agrees with the second list. - */ +/* ScanKeywordList lookup data for PL/pgSQL keywords */ +#include "pl_reserved_kwlist_d.h" +#include "pl_unreserved_kwlist_d.h" + +/* Token codes for PL/pgSQL keywords */ +#define PG_KEYWORD(kwname, value) value, -static const ScanKeyword reserved_keywords[] = { - PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD) - PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD) - PG_KEYWORD("by", K_BY, RESERVED_KEYWORD) - PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD) - PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD) - PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD) - PG_KEYWORD("end", K_END, RESERVED_KEYWORD) - PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD) - PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD) - PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD) - PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD) - PG_KEYWORD("if", K_IF, RESERVED_KEYWORD) - PG_KEYWORD("in", K_IN, RESERVED_KEYWORD) - PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD) - PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD) - PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD) - PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD) - PG_KEYWORD("or", K_OR, RESERVED_KEYWORD) - PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD) - PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD) - PG_KEYWORD("to", K_TO, RESERVED_KEYWORD) - PG_KEYWORD("using", K_USING, RESERVED_KEYWORD) - PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD) - PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD) +static const uint16 ReservedPLKeywordTokens[] = { +#include "pl_reserved_kwlist.h" }; -static const int num_reserved_keywords = lengthof(reserved_keywords); - -static const ScanKeyword unreserved_keywords[] = { - PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD) - PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD) - PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD) - PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD) - PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD) - PG_KEYWORD("call", K_CALL, UNRESERVED_KEYWORD) - PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD) - PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD) - PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD) - PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD) - PG_KEYWORD("commit", K_COMMIT, UNRESERVED_KEYWORD) - PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD) - PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD) - PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD) - PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD) - PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD) - PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD) - PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD) - PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD) - PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD) - PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD) - PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD) - PG_KEYWORD("do", K_DO, UNRESERVED_KEYWORD) - PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD) - PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD) - PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD) - PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD) - PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD) - PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD) - PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD) - PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD) - PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD) - PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD) - PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD) - PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD) - PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD) - PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD) - PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD) - PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD) - PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD) - PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD) - PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD) - PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD) - PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD) - PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD) - PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD) - PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD) - PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD) - PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD) - PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD) - PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD) - PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD) - PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD) - PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD) - PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD) - PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD) - PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD) - PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD) - PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD) - PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD) - PG_KEYWORD("reset", K_RESET, UNRESERVED_KEYWORD) - PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD) - PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD) - PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD) - PG_KEYWORD("rollback", K_ROLLBACK, UNRESERVED_KEYWORD) - PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD) - PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD) - PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD) - PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD) - PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD) - PG_KEYWORD("set", K_SET, UNRESERVED_KEYWORD) - PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD) - PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD) - PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD) - PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD) - PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD) - PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD) - PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD) - PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD) - PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD) - PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD) +static const uint16 UnreservedPLKeywordTokens[] = { +#include "pl_unreserved_kwlist.h" }; -static const int num_unreserved_keywords = lengthof(unreserved_keywords); +#undef PG_KEYWORD /* * This macro must recognize all tokens that can immediately precede a @@ -256,7 +147,7 @@ plpgsql_yylex(void) { int tok1; TokenAuxData aux1; - const ScanKeyword *kw; + int kwnum; tok1 = internal_yylex(&aux1); if (tok1 == IDENT || tok1 == PARAM) @@ -333,12 +224,12 @@ plpgsql_yylex(void) &aux1.lval.word)) tok1 = T_DATUM; else if (!aux1.lval.word.quoted && - (kw = ScanKeywordLookup(aux1.lval.word.ident, - unreserved_keywords, - num_unreserved_keywords))) + (kwnum = ScanKeywordLookup(aux1.lval.word.ident, + &UnreservedPLKeywords)) >= 0) { - aux1.lval.keyword = kw->name; - tok1 = kw->value; + aux1.lval.keyword = GetScanKeyword(kwnum, + &UnreservedPLKeywords); + tok1 = UnreservedPLKeywordTokens[kwnum]; } else tok1 = T_WORD; @@ -375,12 +266,12 @@ plpgsql_yylex(void) &aux1.lval.word)) tok1 = T_DATUM; else if (!aux1.lval.word.quoted && - (kw = ScanKeywordLookup(aux1.lval.word.ident, - unreserved_keywords, - num_unreserved_keywords))) + (kwnum = ScanKeywordLookup(aux1.lval.word.ident, + &UnreservedPLKeywords)) >= 0) { - aux1.lval.keyword = kw->name; - tok1 = kw->value; + aux1.lval.keyword = GetScanKeyword(kwnum, + &UnreservedPLKeywords); + tok1 = UnreservedPLKeywordTokens[kwnum]; } else tok1 = T_WORD; @@ -497,9 +388,9 @@ plpgsql_token_is_unreserved_keyword(int token) { int i; - for (i = 0; i < num_unreserved_keywords; i++) + for (i = 0; i < lengthof(UnreservedPLKeywordTokens); i++) { - if (unreserved_keywords[i].value == token) + if (UnreservedPLKeywordTokens[i] == token) return true; } return false; @@ -696,7 +587,7 @@ plpgsql_scanner_init(const char *str) { /* Start up the core scanner */ yyscanner = scanner_init(str, &core_yy, - reserved_keywords, num_reserved_keywords); + &ReservedPLKeywords, ReservedPLKeywordTokens); /* * scanorig points to the original string, which unlike the scanner's diff --git a/src/pl/plpgsql/src/pl_unreserved_kwlist.h b/src/pl/plpgsql/src/pl_unreserved_kwlist.h new file mode 100644 index 0000000000..ef2aea05b8 --- /dev/null +++ b/src/pl/plpgsql/src/pl_unreserved_kwlist.h @@ -0,0 +1,111 @@ +/*------------------------------------------------------------------------- + * + * pl_unreserved_kwlist.h + * + * The keyword lists are kept in their own source files for use by + * automatic tools. The exact representation of a keyword is determined + * by the PG_KEYWORD macro, which is not defined in this file; it can + * be defined by the caller for special purposes. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/pl/plpgsql/src/pl_unreserved_kwlist.h + * + *------------------------------------------------------------------------- + */ + +/* There is deliberately not an #ifndef PL_UNRESERVED_KWLIST_H here. */ + +/* + * List of (keyword-name, keyword-token-value) pairs. + * + * Be careful not to put the same word in both lists. Also be sure that + * pl_gram.y's unreserved_keyword production agrees with this list. + * + * !!WARNING!!: This list must be sorted by ASCII name, because binary + * search is used to locate entries. + */ + +/* name, value */ +PG_KEYWORD("absolute", K_ABSOLUTE) +PG_KEYWORD("alias", K_ALIAS) +PG_KEYWORD("array", K_ARRAY) +PG_KEYWORD("assert", K_ASSERT) +PG_KEYWORD("backward", K_BACKWARD) +PG_KEYWORD("call", K_CALL) +PG_KEYWORD("close", K_CLOSE) +PG_KEYWORD("collate", K_COLLATE) +PG_KEYWORD("column", K_COLUMN) +PG_KEYWORD("column_name", K_COLUMN_NAME) +PG_KEYWORD("commit", K_COMMIT) +PG_KEYWORD("constant", K_CONSTANT) +PG_KEYWORD("constraint", K_CONSTRAINT) +PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME) +PG_KEYWORD("continue", K_CONTINUE) +PG_KEYWORD("current", K_CURRENT) +PG_KEYWORD("cursor", K_CURSOR) +PG_KEYWORD("datatype", K_DATATYPE) +PG_KEYWORD("debug", K_DEBUG) +PG_KEYWORD("default", K_DEFAULT) +PG_KEYWORD("detail", K_DETAIL) +PG_KEYWORD("diagnostics", K_DIAGNOSTICS) +PG_KEYWORD("do", K_DO) +PG_KEYWORD("dump", K_DUMP) +PG_KEYWORD("elseif", K_ELSIF) +PG_KEYWORD("elsif", K_ELSIF) +PG_KEYWORD("errcode", K_ERRCODE) +PG_KEYWORD("error", K_ERROR) +PG_KEYWORD("exception", K_EXCEPTION) +PG_KEYWORD("exit", K_EXIT) +PG_KEYWORD("fetch", K_FETCH) +PG_KEYWORD("first", K_FIRST) +PG_KEYWORD("forward", K_FORWARD) +PG_KEYWORD("get", K_GET) +PG_KEYWORD("hint", K_HINT) +PG_KEYWORD("import", K_IMPORT) +PG_KEYWORD("info", K_INFO) +PG_KEYWORD("insert", K_INSERT) +PG_KEYWORD("is", K_IS) +PG_KEYWORD("last", K_LAST) +PG_KEYWORD("log", K_LOG) +PG_KEYWORD("message", K_MESSAGE) +PG_KEYWORD("message_text", K_MESSAGE_TEXT) +PG_KEYWORD("move", K_MOVE) +PG_KEYWORD("next", K_NEXT) +PG_KEYWORD("no", K_NO) +PG_KEYWORD("notice", K_NOTICE) +PG_KEYWORD("open", K_OPEN) +PG_KEYWORD("option", K_OPTION) +PG_KEYWORD("perform", K_PERFORM) +PG_KEYWORD("pg_context", K_PG_CONTEXT) +PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME) +PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT) +PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL) +PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT) +PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS) +PG_KEYWORD("prior", K_PRIOR) +PG_KEYWORD("query", K_QUERY) +PG_KEYWORD("raise", K_RAISE) +PG_KEYWORD("relative", K_RELATIVE) +PG_KEYWORD("reset", K_RESET) +PG_KEYWORD("return", K_RETURN) +PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE) +PG_KEYWORD("reverse", K_REVERSE) +PG_KEYWORD("rollback", K_ROLLBACK) +PG_KEYWORD("row_count", K_ROW_COUNT) +PG_KEYWORD("rowtype", K_ROWTYPE) +PG_KEYWORD("schema", K_SCHEMA) +PG_KEYWORD("schema_name", K_SCHEMA_NAME) +PG_KEYWORD("scroll", K_SCROLL) +PG_KEYWORD("set", K_SET) +PG_KEYWORD("slice", K_SLICE) +PG_KEYWORD("sqlstate", K_SQLSTATE) +PG_KEYWORD("stacked", K_STACKED) +PG_KEYWORD("table", K_TABLE) +PG_KEYWORD("table_name", K_TABLE_NAME) +PG_KEYWORD("type", K_TYPE) +PG_KEYWORD("use_column", K_USE_COLUMN) +PG_KEYWORD("use_variable", K_USE_VARIABLE) +PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT) +PG_KEYWORD("warning", K_WARNING) diff --git a/src/tools/gen_keywordlist.pl b/src/tools/gen_keywordlist.pl new file mode 100644 index 0000000000..d764affaa6 --- /dev/null +++ b/src/tools/gen_keywordlist.pl @@ -0,0 +1,156 @@ +#---------------------------------------------------------------------- +# +# gen_keywordlist.pl +# Perl script that transforms a list of keywords into a ScanKeywordList +# data structure that can be passed to ScanKeywordLookup(). +# +# The input is a C header file containing a series of macro calls +# PG_KEYWORD("keyword", ...) +# Lines not starting with PG_KEYWORD are ignored. The keywords are +# implicitly numbered 0..N-1 in order of appearance in the header file. +# Currently, the keywords are required to appear in ASCII order. +# +# The output is a C header file that defines a "const ScanKeywordList" +# variable named according to the -v switch ("ScanKeywords" by default). +# The variable is marked "static" unless the -e switch is given. +# +# +# Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/tools/gen_keywordlist.pl +# +#---------------------------------------------------------------------- + +use strict; +use warnings; +use Getopt::Long; + +my $output_path = ''; +my $extern = 0; +my $varname = 'ScanKeywords'; + +GetOptions( + 'output:s' => \$output_path, + 'extern' => \$extern, + 'varname:s' => \$varname) || usage(); + +my $kw_input_file = shift @ARGV || die "No input file.\n"; + +# Make sure output_path ends in a slash if needed. +if ($output_path ne '' && substr($output_path, -1) ne '/') +{ + $output_path .= '/'; +} + +$kw_input_file =~ /(\w+)\.h$/ || die "Input file must be named something.h.\n"; +my $base_filename = $1 . '_d'; +my $kw_def_file = $output_path . $base_filename . '.h'; + +open(my $kif, '<', $kw_input_file) || die "$kw_input_file: $!\n"; +open(my $kwdef, '>', $kw_def_file) || die "$kw_def_file: $!\n"; + +# Opening boilerplate for keyword definition header. +printf $kwdef <) +{ + if (/^PG_KEYWORD\("(\w+)"/) + { + push @keywords, $1; + } +} + +# Error out if the keyword names are not in ASCII order. +for my $i (0..$#keywords - 1) +{ + die qq|The keyword "$keywords[$i + 1]" is out of order in $kw_input_file\n| + if ($keywords[$i] cmp $keywords[$i + 1]) >= 0; +} + +# Emit the string containing all the keywords. + +printf $kwdef qq|static const char %s_kw_string[] =\n\t"|, $varname; +print $kwdef join qq|\\0"\n\t"|, @keywords; +print $kwdef qq|";\n\n|; + +# Emit an array of numerical offsets which will be used to index into the +# keyword string. Also determine max keyword length. + +printf $kwdef "static const uint16 %s_kw_offsets[] = {\n", $varname; + +my $offset = 0; +my $max_len = 0; +foreach my $name (@keywords) +{ + my $this_length = length($name); + + print $kwdef "\t$offset,\n"; + + # Calculate the cumulative offset of the next keyword, + # taking into account the null terminator. + $offset += $this_length + 1; + + # Update max keyword length. + $max_len = $this_length if $max_len < $this_length; +} + +print $kwdef "};\n\n"; + +# Emit a macro defining the number of keywords. +# (In some places it's useful to have access to that as a constant.) + +printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords; + +# Emit the struct that wraps all this lookup info into one variable. + +print $kwdef "static " if !$extern; +printf $kwdef "const ScanKeywordList %s = {\n", $varname; +printf $kwdef qq|\t%s_kw_string,\n|, $varname; +printf $kwdef qq|\t%s_kw_offsets,\n|, $varname; +printf $kwdef qq|\t%s_NUM_KEYWORDS,\n|, uc $varname; +printf $kwdef qq|\t%d\n|, $max_len; +print $kwdef "};\n\n"; + +printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename; + + +sub usage +{ + die <] [--varname/-v ] [--extern/-e] input_file + --output Output directory (default '.') + --varname Name for ScanKeywordList variable (default 'ScanKeywords') + --extern Allow the ScanKeywordList variable to be globally visible + +gen_keywordlist.pl transforms a list of keywords into a ScanKeywordList. +The output filename is derived from the input file by inserting _d, +for example kwlist_d.h is produced from kwlist.h. +EOM +} diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 2921d193a1..56192f1b20 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -118,7 +118,7 @@ sub mkvcbuild our @pgcommonallfiles = qw( base64.c config_info.c controldata_utils.c exec.c file_perm.c ip.c - keywords.c link-canary.c md5.c + keywords.c kwlookup.c link-canary.c md5.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c saslprep.c scram-common.c string.c unicode_norm.c username.c wait_error.c); diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index eb2346b8d3..937bf184e2 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -409,6 +409,42 @@ sub GenerateFiles chdir('../../..'); } + if (IsNewer( + 'src/common/kwlist_d.h', + 'src/include/parser/kwlist.h')) + { + print "Generating kwlist_d.h...\n"; + system('perl src/tools/gen_keywordlist.pl --extern -o src/common src/include/parser/kwlist.h'); + } + + if (IsNewer( + 'src/pl/plpgsql/src/pl_reserved_kwlist_d.h', + 'src/pl/plpgsql/src/pl_reserved_kwlist.h') + || IsNewer( + 'src/pl/plpgsql/src/pl_unreserved_kwlist_d.h', + 'src/pl/plpgsql/src/pl_unreserved_kwlist.h')) + { + print "Generating pl_reserved_kwlist_d.h and pl_unreserved_kwlist_d.h...\n"; + chdir('src/pl/plpgsql/src'); + system('perl ../../../tools/gen_keywordlist.pl --varname ReservedPLKeywords pl_reserved_kwlist.h'); + system('perl ../../../tools/gen_keywordlist.pl --varname UnreservedPLKeywords pl_unreserved_kwlist.h'); + chdir('../../../..'); + } + + if (IsNewer( + 'src/interfaces/ecpg/preproc/c_kwlist_d.h', + 'src/interfaces/ecpg/preproc/c_kwlist.h') + || IsNewer( + 'src/interfaces/ecpg/preproc/ecpg_kwlist_d.h', + 'src/interfaces/ecpg/preproc/ecpg_kwlist.h')) + { + print "Generating c_kwlist_d.h and ecpg_kwlist_d.h...\n"; + chdir('src/interfaces/ecpg/preproc'); + system('perl ../../../tools/gen_keywordlist.pl --varname ScanCKeywords c_kwlist.h'); + system('perl ../../../tools/gen_keywordlist.pl --varname ScanECPGKeywords ecpg_kwlist.h'); + chdir('../../../..'); + } + if (IsNewer( 'src/interfaces/ecpg/preproc/preproc.y', 'src/backend/parser/gram.y')) diff --git a/src/tools/msvc/clean.bat b/src/tools/msvc/clean.bat index 7a23a2b55f..069d6eb569 100755 --- a/src/tools/msvc/clean.bat +++ b/src/tools/msvc/clean.bat @@ -64,6 +64,11 @@ if %DIST%==1 if exist src\pl\tcl\pltclerrcodes.h del /q src\pl\tcl\pltclerrcodes if %DIST%==1 if exist src\backend\utils\sort\qsort_tuple.c del /q src\backend\utils\sort\qsort_tuple.c if %DIST%==1 if exist src\bin\psql\sql_help.c del /q src\bin\psql\sql_help.c if %DIST%==1 if exist src\bin\psql\sql_help.h del /q src\bin\psql\sql_help.h +if %DIST%==1 if exist src\common\kwlist_d.h del /q src\common\kwlist_d.h +if %DIST%==1 if exist src\pl\plpgsql\src\pl_reserved_kwlist_d.h del /q src\pl\plpgsql\src\pl_reserved_kwlist_d.h +if %DIST%==1 if exist src\pl\plpgsql\src\pl_unreserved_kwlist_d.h del /q src\pl\plpgsql\src\pl_unreserved_kwlist_d.h +if %DIST%==1 if exist src\interfaces\ecpg\preproc\c_kwlist_d.h del /q src\interfaces\ecpg\preproc\c_kwlist_d.h +if %DIST%==1 if exist src\interfaces\ecpg\preproc\ecpg_kwlist_d.h del /q src\interfaces\ecpg\preproc\ecpg_kwlist_d.h if %DIST%==1 if exist src\interfaces\ecpg\preproc\preproc.y del /q src\interfaces\ecpg\preproc\preproc.y if %DIST%==1 if exist src\backend\catalog\postgres.bki del /q src\backend\catalog\postgres.bki if %DIST%==1 if exist src\backend\catalog\postgres.description del /q src\backend\catalog\postgres.description -- 2.40.0