]> granicus.if.org Git - postgresql/commitdiff
Replace the data structure used for keyword lookup.
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 6 Jan 2019 22:02:57 +0000 (17:02 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 6 Jan 2019 22:02:57 +0000 (17:02 -0500)
Previously, ScanKeywordLookup was passed an array of string pointers.
This had some performance deficiencies: the strings themselves might
be scattered all over the place depending on the compiler (and some
quick checking shows that at least with gcc-on-Linux, they indeed
weren't reliably close together).  That led to very cache-unfriendly
behavior as the binary search touched strings in many different pages.
Also, depending on the platform, the string pointers might need to
be adjusted at program start, so that they couldn't be simple constant
data.  And the ScanKeyword struct had been designed with an eye to
32-bit machines originally; on 64-bit it requires 16 bytes per
keyword, making it even more cache-unfriendly.

Redesign so that the keyword strings themselves are allocated
consecutively (as part of one big char-string constant), thereby
eliminating the touch-lots-of-unrelated-pages syndrome.  And get
rid of the ScanKeyword array in favor of three separate arrays:
uint16 offsets into the keyword array, uint16 token codes, and
uint8 keyword categories.  That reduces the overhead per keyword
to 5 bytes instead of 16 (even less in programs that only need
one of the token codes and categories); moreover, the binary search
only touches the offsets array, further reducing its cache footprint.
This also lets us put the token codes somewhere else than the
keyword strings are, which avoids some unpleasant build dependencies.

While we're at it, wrap the data used by ScanKeywordLookup into
a struct that can be treated as an opaque type by most callers.
That doesn't change things much right now, but it will make it
less painful to switch to a hash-based lookup method, as is being
discussed in the mailing list thread.

Most of the change here is associated with adding a generator
script that can build the new data structure from the same
list-of-PG_KEYWORD header representation we used before.
The PG_KEYWORD lists that plpgsql and ecpg used to embed in
their scanner .c files have to be moved into headers, and the
Makefiles have to be taught to invoke the generator script.
This work is also necessary if we're to consider hash-based lookup,
since the generator script is what would be responsible for
constructing a hash table.

Aside from saving a few kilobytes in each program that includes
the keyword table, this seems to speed up raw parsing (flex+bison)
by a few percent.  So it's worth doing even as it stands, though
we think we can gain even more with a follow-on patch to switch
to hash-based lookup.

John Naylor, with further hacking by me

Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com

32 files changed:
contrib/pg_stat_statements/pg_stat_statements.c
src/backend/parser/parser.c
src/backend/parser/scan.l
src/backend/utils/adt/misc.c
src/backend/utils/adt/ruleutils.c
src/common/.gitignore [new file with mode: 0644]
src/common/Makefile
src/common/keywords.c
src/common/kwlookup.c [new file with mode: 0644]
src/fe_utils/string_utils.c
src/include/common/keywords.h
src/include/common/kwlookup.h [new file with mode: 0644]
src/include/parser/kwlist.h
src/include/parser/scanner.h
src/interfaces/ecpg/preproc/.gitignore
src/interfaces/ecpg/preproc/Makefile
src/interfaces/ecpg/preproc/c_keywords.c
src/interfaces/ecpg/preproc/c_kwlist.h [new file with mode: 0644]
src/interfaces/ecpg/preproc/ecpg_keywords.c
src/interfaces/ecpg/preproc/ecpg_kwlist.h [new file with mode: 0644]
src/interfaces/ecpg/preproc/keywords.c
src/interfaces/ecpg/preproc/pgc.l
src/interfaces/ecpg/preproc/preproc_extern.h
src/pl/plpgsql/src/.gitignore
src/pl/plpgsql/src/Makefile
src/pl/plpgsql/src/pl_reserved_kwlist.h [new file with mode: 0644]
src/pl/plpgsql/src/pl_scanner.c
src/pl/plpgsql/src/pl_unreserved_kwlist.h [new file with mode: 0644]
src/tools/gen_keywordlist.pl [new file with mode: 0644]
src/tools/msvc/Mkvcbuild.pm
src/tools/msvc/Solution.pm
src/tools/msvc/clean.bat

index e8ef966bb51a7a174b8d1362132839fae0ecbeb6..9131991b837e83f73e23a8bcae1feb89caa05356 100644 (file)
@@ -3075,8 +3075,8 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
        /* initialize the flex scanner --- should match raw_parser() */
        yyscanner = scanner_init(query,
                                                         &yyextra,
-                                                        ScanKeywords,
-                                                        NumScanKeywords);
+                                                        &ScanKeywords,
+                                                        ScanKeywordTokens);
 
        /* we don't want to re-emit any escape string warnings */
        yyextra.escape_string_warning = false;
index 7e9b1222fd8a718b9c41f09ab1007f5d158adda7..4c0c258cd7f346ec33ff03590709ef8c8a1e4170 100644 (file)
@@ -41,7 +41,7 @@ raw_parser(const char *str)
 
        /* initialize the flex scanner */
        yyscanner = scanner_init(str, &yyextra.core_yy_extra,
-                                                        ScanKeywords, NumScanKeywords);
+                                                        &ScanKeywords, ScanKeywordTokens);
 
        /* base_yylex() only needs this much initialization */
        yyextra.have_lookahead = false;
index fbeb86f890e6de6013b1794f922967da4c0d2aff..e1cae859e8feb7d5c99ca42783638f42604bc33f 100644 (file)
@@ -66,6 +66,21 @@ int                  backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
 bool           escape_string_warning = true;
 bool           standard_conforming_strings = true;
 
+/*
+ * Constant data exported from this file.  This array maps from the
+ * zero-based keyword numbers returned by ScanKeywordLookup to the
+ * Bison token numbers needed by gram.y.  This is exported because
+ * callers need to pass it to scanner_init, if they are using the
+ * standard keyword list ScanKeywords.
+ */
+#define PG_KEYWORD(kwname, value, category) value,
+
+const uint16 ScanKeywordTokens[] = {
+#include "parser/kwlist.h"
+};
+
+#undef PG_KEYWORD
+
 /*
  * Set the type of YYSTYPE.
  */
@@ -504,18 +519,18 @@ other                     .
                                         * We will pass this along as a normal character string,
                                         * but preceded with an internally-generated "NCHAR".
                                         */
-                                       const ScanKeyword *keyword;
+                                       int             kwnum;
 
                                        SET_YYLLOC();
                                        yyless(1);      /* eat only 'n' this time */
 
-                                       keyword = ScanKeywordLookup("nchar",
-                                                                                               yyextra->keywords,
-                                                                                               yyextra->num_keywords);
-                                       if (keyword != NULL)
+                                       kwnum = ScanKeywordLookup("nchar",
+                                                                                         yyextra->keywordlist);
+                                       if (kwnum >= 0)
                                        {
-                                               yylval->keyword = keyword->name;
-                                               return keyword->value;
+                                               yylval->keyword = GetScanKeyword(kwnum,
+                                                                                                                yyextra->keywordlist);
+                                               return yyextra->keyword_tokens[kwnum];
                                        }
                                        else
                                        {
@@ -1021,19 +1036,19 @@ other                   .
 
 
 {identifier}   {
-                                       const ScanKeyword *keyword;
+                                       int                     kwnum;
                                        char       *ident;
 
                                        SET_YYLLOC();
 
                                        /* Is it a keyword? */
-                                       keyword = ScanKeywordLookup(yytext,
-                                                                                               yyextra->keywords,
-                                                                                               yyextra->num_keywords);
-                                       if (keyword != NULL)
+                                       kwnum = ScanKeywordLookup(yytext,
+                                                                                         yyextra->keywordlist);
+                                       if (kwnum >= 0)
                                        {
-                                               yylval->keyword = keyword->name;
-                                               return keyword->value;
+                                               yylval->keyword = GetScanKeyword(kwnum,
+                                                                                                                yyextra->keywordlist);
+                                               return yyextra->keyword_tokens[kwnum];
                                        }
 
                                        /*
@@ -1142,8 +1157,8 @@ scanner_yyerror(const char *message, core_yyscan_t yyscanner)
 core_yyscan_t
 scanner_init(const char *str,
                         core_yy_extra_type *yyext,
-                        const ScanKeyword *keywords,
-                        int num_keywords)
+                        const ScanKeywordList *keywordlist,
+                        const uint16 *keyword_tokens)
 {
        Size            slen = strlen(str);
        yyscan_t        scanner;
@@ -1153,8 +1168,8 @@ scanner_init(const char *str,
 
        core_yyset_extra(yyext, scanner);
 
-       yyext->keywords = keywords;
-       yyext->num_keywords = num_keywords;
+       yyext->keywordlist = keywordlist;
+       yyext->keyword_tokens = keyword_tokens;
 
        yyext->backslash_quote = backslash_quote;
        yyext->escape_string_warning = escape_string_warning;
index 7b69b824e1fa82e94747559d9a55180093bd0299..746b7d2fbacbdede2dea8acb2458698bb7b28d58 100644 (file)
@@ -417,15 +417,17 @@ pg_get_keywords(PG_FUNCTION_ARGS)
 
        funcctx = SRF_PERCALL_SETUP();
 
-       if (funcctx->call_cntr < NumScanKeywords)
+       if (funcctx->call_cntr < ScanKeywords.num_keywords)
        {
                char       *values[3];
                HeapTuple       tuple;
 
                /* cast-away-const is ugly but alternatives aren't much better */
-               values[0] = unconstify(char *, ScanKeywords[funcctx->call_cntr].name);
+               values[0] = unconstify(char *,
+                                                          GetScanKeyword(funcctx->call_cntr,
+                                                                                         &ScanKeywords));
 
-               switch (ScanKeywords[funcctx->call_cntr].category)
+               switch (ScanKeywordCategories[funcctx->call_cntr])
                {
                        case UNRESERVED_KEYWORD:
                                values[1] = "U";
index 368eacf68e8a63cb571b09dd425537a5c978e134..77811f6818aae6559dbb8bce393fe3e9e77ae887 100644 (file)
@@ -10601,11 +10601,9 @@ quote_identifier(const char *ident)
                 * Note: ScanKeywordLookup() does case-insensitive comparison, but
                 * that's fine, since we already know we have all-lower-case.
                 */
-               const ScanKeyword *keyword = ScanKeywordLookup(ident,
-                                                                                                          ScanKeywords,
-                                                                                                          NumScanKeywords);
+               int                     kwnum = ScanKeywordLookup(ident, &ScanKeywords);
 
-               if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
+               if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
                        safe = false;
        }
 
diff --git a/src/common/.gitignore b/src/common/.gitignore
new file mode 100644 (file)
index 0000000..ffa3284
--- /dev/null
@@ -0,0 +1 @@
+/kwlist_d.h
index ec8139f014e21bd6a25ab16e64fc35c551dd6c60..317b071e026e0b6b0faf8b4afb43859bb2b36da6 100644 (file)
@@ -41,11 +41,11 @@ override CPPFLAGS += -DVAL_LDFLAGS_EX="\"$(LDFLAGS_EX)\""
 override CPPFLAGS += -DVAL_LDFLAGS_SL="\"$(LDFLAGS_SL)\""
 override CPPFLAGS += -DVAL_LIBS="\"$(LIBS)\""
 
-override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
+override CPPFLAGS := -DFRONTEND -I. -I$(top_srcdir)/src/common $(CPPFLAGS)
 LIBS += $(PTHREAD_LIBS)
 
 OBJS_COMMON = base64.o config_info.o controldata_utils.o exec.o file_perm.o \
-       ip.o keywords.o link-canary.o md5.o pg_lzcompress.o \
+       ip.o keywords.o kwlookup.o link-canary.o md5.o pg_lzcompress.o \
        pgfnames.o psprintf.o relpath.o \
        rmtree.o saslprep.o scram-common.o string.o unicode_norm.o \
        username.o wait_error.o
@@ -65,6 +65,8 @@ OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o)
 
 all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a
 
+distprep: kwlist_d.h
+
 # libpgcommon is needed by some contrib
 install: all installdirs
        $(INSTALL_STLIB) libpgcommon.a '$(DESTDIR)$(libdir)/libpgcommon.a'
@@ -115,16 +117,18 @@ libpgcommon_srv.a: $(OBJS_SRV)
 %_srv.o: %.c %.o
        $(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@
 
-# Dependencies of keywords.o need to be managed explicitly to make sure
-# that you don't get broken parsing code, even in a non-enable-depend build.
-# Note that gram.h isn't required for the frontend versions of keywords.o.
-$(top_builddir)/src/include/parser/gram.h: $(top_srcdir)/src/backend/parser/gram.y
-       $(MAKE) -C $(top_builddir)/src/backend $(top_builddir)/src/include/parser/gram.h
+# generate SQL keyword lookup table to be included into keywords*.o.
+kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(top_srcdir)/src/tools/gen_keywordlist.pl
+       $(PERL) $(top_srcdir)/src/tools/gen_keywordlist.pl --extern $<
 
-keywords.o: $(top_srcdir)/src/include/parser/kwlist.h
-keywords_shlib.o: $(top_srcdir)/src/include/parser/kwlist.h
-keywords_srv.o: $(top_builddir)/src/include/parser/gram.h $(top_srcdir)/src/include/parser/kwlist.h
+# Dependencies of keywords*.o need to be managed explicitly to make sure
+# that you don't get broken parsing code, even in a non-enable-depend build.
+keywords.o keywords_shlib.o keywords_srv.o: kwlist_d.h
 
-clean distclean maintainer-clean:
+# kwlist_d.h is in the distribution tarball, so it is not cleaned here.
+clean distclean:
        rm -f libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a
        rm -f $(OBJS_FRONTEND) $(OBJS_SHLIB) $(OBJS_SRV)
+
+maintainer-clean: distclean
+       rm -f kwlist_d.h
index 6f99090a2959581fcad8f8d2bd605336ca3a7b76..84f779feb91082e848c2907e82f02d79a75a5e7d 100644 (file)
@@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * keywords.c
- *       lexical token lookup for key words in PostgreSQL
+ *       PostgreSQL's list of SQL keywords
  *
  *
  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  *
  *-------------------------------------------------------------------------
  */
-#ifndef FRONTEND
-#include "postgres.h"
-#else
-#include "postgres_fe.h"
-#endif
+#include "c.h"
 
-#ifndef FRONTEND
-
-#include "parser/gramparse.h"
+#include "common/keywords.h"
 
-#define PG_KEYWORD(a,b,c) {a,b,c},
 
-#else
+/* ScanKeywordList lookup data for SQL keywords */
 
-#include "common/keywords.h"
-
-/*
- * We don't need the token number for frontend uses, so leave it out to avoid
- * requiring backend headers that won't compile cleanly here.
- */
-#define PG_KEYWORD(a,b,c) {a,0,c},
+#include "kwlist_d.h"
 
-#endif                                                 /* FRONTEND */
+/* Keyword categories for SQL keywords */
 
+#define PG_KEYWORD(kwname, value, category) category,
 
-const ScanKeyword ScanKeywords[] = {
+const uint8 ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS] = {
 #include "parser/kwlist.h"
 };
 
-const int      NumScanKeywords = lengthof(ScanKeywords);
-
-
-/*
- * ScanKeywordLookup - see if a given word is a keyword
- *
- * The table to be searched is passed explicitly, so that this can be used
- * to search keyword lists other than the standard list appearing above.
- *
- * Returns a pointer to the ScanKeyword table entry, or NULL if no match.
- *
- * The match is done case-insensitively.  Note that we deliberately use a
- * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
- * even if we are in a locale where tolower() would produce more or different
- * translations.  This is to conform to the SQL99 spec, which says that
- * keywords are to be matched in this way even though non-keyword identifiers
- * receive a different case-normalization mapping.
- */
-const ScanKeyword *
-ScanKeywordLookup(const char *text,
-                                 const ScanKeyword *keywords,
-                                 int num_keywords)
-{
-       int                     len,
-                               i;
-       char            word[NAMEDATALEN];
-       const ScanKeyword *low;
-       const ScanKeyword *high;
-
-       len = strlen(text);
-       /* We assume all keywords are shorter than NAMEDATALEN. */
-       if (len >= NAMEDATALEN)
-               return NULL;
-
-       /*
-        * Apply an ASCII-only downcasing.  We must not use tolower() since it may
-        * produce the wrong translation in some locales (eg, Turkish).
-        */
-       for (i = 0; i < len; i++)
-       {
-               char            ch = text[i];
-
-               if (ch >= 'A' && ch <= 'Z')
-                       ch += 'a' - 'A';
-               word[i] = ch;
-       }
-       word[len] = '\0';
-
-       /*
-        * Now do a binary search using plain strcmp() comparison.
-        */
-       low = keywords;
-       high = keywords + (num_keywords - 1);
-       while (low <= high)
-       {
-               const ScanKeyword *middle;
-               int                     difference;
-
-               middle = low + (high - low) / 2;
-               difference = strcmp(middle->name, word);
-               if (difference == 0)
-                       return middle;
-               else if (difference < 0)
-                       low = middle + 1;
-               else
-                       high = middle - 1;
-       }
-
-       return NULL;
-}
+#undef PG_KEYWORD
diff --git a/src/common/kwlookup.c b/src/common/kwlookup.c
new file mode 100644 (file)
index 0000000..d72842e
--- /dev/null
@@ -0,0 +1,94 @@
+/*-------------------------------------------------------------------------
+ *
+ * kwlookup.c
+ *       Key word lookup for PostgreSQL
+ *
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *       src/common/kwlookup.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/kwlookup.h"
+
+
+/*
+ * ScanKeywordLookup - see if a given word is a keyword
+ *
+ * The list of keywords to be matched against is passed as a ScanKeywordList.
+ *
+ * Returns the keyword number (0..N-1) of the keyword, or -1 if no match.
+ * Callers typically use the keyword number to index into information
+ * arrays, but that is no concern of this code.
+ *
+ * The match is done case-insensitively.  Note that we deliberately use a
+ * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
+ * even if we are in a locale where tolower() would produce more or different
+ * translations.  This is to conform to the SQL99 spec, which says that
+ * keywords are to be matched in this way even though non-keyword identifiers
+ * receive a different case-normalization mapping.
+ */
+int
+ScanKeywordLookup(const char *text,
+                                 const ScanKeywordList *keywords)
+{
+       int                     len,
+                               i;
+       char            word[NAMEDATALEN];
+       const char *kw_string;
+       const uint16 *kw_offsets;
+       const uint16 *low;
+       const uint16 *high;
+
+       len = strlen(text);
+
+       if (len > keywords->max_kw_len)
+               return -1;                              /* too long to be any keyword */
+
+       /* We assume all keywords are shorter than NAMEDATALEN. */
+       Assert(len < NAMEDATALEN);
+
+       /*
+        * Apply an ASCII-only downcasing.  We must not use tolower() since it may
+        * produce the wrong translation in some locales (eg, Turkish).
+        */
+       for (i = 0; i < len; i++)
+       {
+               char            ch = text[i];
+
+               if (ch >= 'A' && ch <= 'Z')
+                       ch += 'a' - 'A';
+               word[i] = ch;
+       }
+       word[len] = '\0';
+
+       /*
+        * Now do a binary search using plain strcmp() comparison.
+        */
+       kw_string = keywords->kw_string;
+       kw_offsets = keywords->kw_offsets;
+       low = kw_offsets;
+       high = kw_offsets + (keywords->num_keywords - 1);
+       while (low <= high)
+       {
+               const uint16 *middle;
+               int                     difference;
+
+               middle = low + (high - low) / 2;
+               difference = strcmp(kw_string + *middle, word);
+               if (difference == 0)
+                       return middle - kw_offsets;
+               else if (difference < 0)
+                       low = middle + 1;
+               else
+                       high = middle - 1;
+       }
+
+       return -1;
+}
index 9b47b62f41915ea9e7a94cb7aa38210e40ba42a2..5c1732aabea80430e68f2fc70fcca055ccf43c97 100644 (file)
@@ -104,11 +104,9 @@ fmtId(const char *rawid)
                 * Note: ScanKeywordLookup() does case-insensitive comparison, but
                 * that's fine, since we already know we have all-lower-case.
                 */
-               const ScanKeyword *keyword = ScanKeywordLookup(rawid,
-                                                                                                          ScanKeywords,
-                                                                                                          NumScanKeywords);
+               int                     kwnum = ScanKeywordLookup(rawid, &ScanKeywords);
 
-               if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
+               if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
                        need_quotes = true;
        }
 
index 8f22f32548c8fc2f3cb9f1ead85f68377e4d5a43..fb18858a53ee80e069ab3ab1728f915ebb189b08 100644 (file)
@@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * keywords.h
- *       lexical token lookup for key words in PostgreSQL
+ *       PostgreSQL's list of SQL keywords
  *
  *
  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
 #ifndef KEYWORDS_H
 #define KEYWORDS_H
 
+#include "common/kwlookup.h"
+
 /* Keyword categories --- should match lists in gram.y */
 #define UNRESERVED_KEYWORD             0
 #define COL_NAME_KEYWORD               1
 #define TYPE_FUNC_NAME_KEYWORD 2
 #define RESERVED_KEYWORD               3
 
-
-typedef struct ScanKeyword
-{
-       const char *name;                       /* in lower case */
-       int16           value;                  /* grammar's token code */
-       int16           category;               /* see codes above */
-} ScanKeyword;
-
 #ifndef FRONTEND
-extern PGDLLIMPORT const ScanKeyword ScanKeywords[];
-extern PGDLLIMPORT const int NumScanKeywords;
+extern PGDLLIMPORT const ScanKeywordList ScanKeywords;
+extern PGDLLIMPORT const uint8 ScanKeywordCategories[];
 #else
-extern const ScanKeyword ScanKeywords[];
-extern const int NumScanKeywords;
+extern const ScanKeywordList ScanKeywords;
+extern const uint8 ScanKeywordCategories[];
 #endif
 
-
-extern const ScanKeyword *ScanKeywordLookup(const char *text,
-                                 const ScanKeyword *keywords,
-                                 int num_keywords);
-
 #endif                                                 /* KEYWORDS_H */
diff --git a/src/include/common/kwlookup.h b/src/include/common/kwlookup.h
new file mode 100644 (file)
index 0000000..39efb35
--- /dev/null
@@ -0,0 +1,40 @@
+/*-------------------------------------------------------------------------
+ *
+ * kwlookup.h
+ *       Key word lookup for PostgreSQL
+ *
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/common/kwlookup.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef KWLOOKUP_H
+#define KWLOOKUP_H
+
+/*
+ * This struct contains the data needed by ScanKeywordLookup to perform a
+ * search within a set of keywords.  The contents are typically generated by
+ * src/tools/gen_keywordlist.pl from a header containing PG_KEYWORD macros.
+ */
+typedef struct ScanKeywordList
+{
+       const char *kw_string;          /* all keywords in order, separated by \0 */
+       const uint16 *kw_offsets;       /* offsets to the start of each keyword */
+       int                     num_keywords;   /* number of keywords */
+       int                     max_kw_len;             /* length of longest keyword */
+} ScanKeywordList;
+
+
+extern int     ScanKeywordLookup(const char *text, const ScanKeywordList *keywords);
+
+/* Code that wants to retrieve the text of the N'th keyword should use this. */
+static inline const char *
+GetScanKeyword(int n, const ScanKeywordList *keywords)
+{
+       return keywords->kw_string + keywords->kw_offsets[n];
+}
+
+#endif                                                 /* KWLOOKUP_H */
index 0256d539981fbb499d8b2ea555c73c3da35eea8b..b8902d34030e53b0426b6277548d313162c1586e 100644 (file)
@@ -2,7 +2,7 @@
  *
  * kwlist.h
  *
- * The keyword list is kept in its own source file for possible use by
+ * The keyword lists are kept in their own source files for use by
  * automatic tools.  The exact representation of a keyword is determined
  * by the PG_KEYWORD macro, which is not defined in this file; it can
  * be defined by the caller for special purposes.
index 009550f424678e20699639201193fef29a20c632..91e1c836d22be58f1d4f9967b4334f8d5887be60 100644 (file)
@@ -73,10 +73,10 @@ typedef struct core_yy_extra_type
        Size            scanbuflen;
 
        /*
-        * The keyword list to use.
+        * The keyword list to use, and the associated grammar token codes.
         */
-       const ScanKeyword *keywords;
-       int                     num_keywords;
+       const ScanKeywordList *keywordlist;
+       const uint16 *keyword_tokens;
 
        /*
         * Scanner settings to use.  These are initialized from the corresponding
@@ -116,11 +116,14 @@ typedef struct core_yy_extra_type
 typedef void *core_yyscan_t;
 
 
+/* Constant data exported from parser/scan.l */
+extern PGDLLIMPORT const uint16 ScanKeywordTokens[];
+
 /* Entry points in parser/scan.l */
 extern core_yyscan_t scanner_init(const char *str,
                         core_yy_extra_type *yyext,
-                        const ScanKeyword *keywords,
-                        int num_keywords);
+                        const ScanKeywordList *keywordlist,
+                        const uint16 *keyword_tokens);
 extern void scanner_finish(core_yyscan_t yyscanner);
 extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
                   core_yyscan_t yyscanner);
index 38ae2fe4d928e650a8a163b3477f6a8844d31d69..958a826f9e83a5059470ccd1837f429de718dab8 100644 (file)
@@ -2,5 +2,7 @@
 /preproc.c
 /preproc.h
 /pgc.c
+/c_kwlist_d.h
+/ecpg_kwlist_d.h
 /typename.c
 /ecpg
index 69ddd8e9f71e5779b7ff8a91310012d3e8780c14..b5b74a3b81ed4cae2afceffaba03f9f94b5fcdad 100644 (file)
@@ -28,6 +28,8 @@ OBJS= preproc.o pgc.o type.o ecpg.o output.o parser.o \
        keywords.o c_keywords.o ecpg_keywords.o typename.o descriptor.o variable.o \
        $(WIN32RES)
 
+GEN_KEYWORDLIST = $(top_srcdir)/src/tools/gen_keywordlist.pl
+
 # Suppress parallel build to avoid a bug in GNU make 3.82
 # (see comments in ../Makefile)
 ifeq ($(MAKE_VERSION),3.82)
@@ -53,9 +55,20 @@ preproc.y: ../../../backend/parser/gram.y parse.pl ecpg.addons ecpg.header ecpg.
        $(PERL) $(srcdir)/parse.pl $(srcdir) < $< > $@
        $(PERL) $(srcdir)/check_rules.pl $(srcdir) $<
 
+# generate keyword headers
+c_kwlist_d.h: c_kwlist.h $(GEN_KEYWORDLIST)
+       $(PERL) $(GEN_KEYWORDLIST) --varname ScanCKeywords $<
+
+ecpg_kwlist_d.h: ecpg_kwlist.h $(GEN_KEYWORDLIST)
+       $(PERL) $(GEN_KEYWORDLIST) --varname ScanECPGKeywords $<
+
+# Force these dependencies to be known even without dependency info built:
 ecpg_keywords.o c_keywords.o keywords.o preproc.o pgc.o parser.o: preproc.h
+ecpg_keywords.o: ecpg_kwlist_d.h
+c_keywords.o: c_kwlist_d.h
+keywords.o: $(top_srcdir)/src/include/parser/kwlist.h
 
-distprep: preproc.y preproc.c preproc.h pgc.c
+distprep: preproc.y preproc.c preproc.h pgc.c c_kwlist_d.h ecpg_kwlist_d.h
 
 install: all installdirs
        $(INSTALL_PROGRAM) ecpg$(X) '$(DESTDIR)$(bindir)'
@@ -66,12 +79,11 @@ installdirs:
 uninstall:
        rm -f '$(DESTDIR)$(bindir)/ecpg$(X)'
 
+# preproc.y, preproc.c, preproc.h, pgc.c, c_kwlist_d.h, and ecpg_kwlist_d.h
+# are in the distribution tarball, so they are not cleaned here.
 clean distclean:
        rm -f *.o ecpg$(X)
        rm -f typename.c
 
-# `make distclean' must not remove preproc.y, preproc.c, preproc.h, or pgc.c
-# since we want to ship those files in the distribution for people with
-# inadequate tools.  Instead, `make maintainer-clean' will remove them.
 maintainer-clean: distclean
-       rm -f preproc.y preproc.c preproc.h pgc.c
+       rm -f preproc.y preproc.c preproc.h pgc.c c_kwlist_d.h ecpg_kwlist_d.h
index c367dbfc207d7d02a9737f85ddc489025c691fb3..38ddf6f135974911e7ae6bcb0af493e1b3fc3821 100644 (file)
 #include "preproc_extern.h"
 #include "preproc.h"
 
-/*
- * List of (keyword-name, keyword-token-value) pairs.
- *
- * !!WARNING!!: This list must be sorted, because binary
- *              search is used to locate entries.
- */
-static const ScanKeyword ScanCKeywords[] = {
-       /* name, value, category */
+/* ScanKeywordList lookup data for C keywords */
+#include "c_kwlist_d.h"
 
-       /*
-        * category is not needed in ecpg, it is only here so we can share the
-        * data structure with the backend
-        */
-       {"VARCHAR", VARCHAR, 0},
-       {"auto", S_AUTO, 0},
-       {"bool", SQL_BOOL, 0},
-       {"char", CHAR_P, 0},
-       {"const", S_CONST, 0},
-       {"enum", ENUM_P, 0},
-       {"extern", S_EXTERN, 0},
-       {"float", FLOAT_P, 0},
-       {"hour", HOUR_P, 0},
-       {"int", INT_P, 0},
-       {"long", SQL_LONG, 0},
-       {"minute", MINUTE_P, 0},
-       {"month", MONTH_P, 0},
-       {"register", S_REGISTER, 0},
-       {"second", SECOND_P, 0},
-       {"short", SQL_SHORT, 0},
-       {"signed", SQL_SIGNED, 0},
-       {"static", S_STATIC, 0},
-       {"struct", SQL_STRUCT, 0},
-       {"to", TO, 0},
-       {"typedef", S_TYPEDEF, 0},
-       {"union", UNION, 0},
-       {"unsigned", SQL_UNSIGNED, 0},
-       {"varchar", VARCHAR, 0},
-       {"volatile", S_VOLATILE, 0},
-       {"year", YEAR_P, 0},
+/* Token codes for C keywords */
+#define PG_KEYWORD(kwname, value) value,
+
+static const uint16 ScanCKeywordTokens[] = {
+#include "c_kwlist.h"
 };
 
+#undef PG_KEYWORD
+
 
 /*
+ * ScanCKeywordLookup - see if a given word is a keyword
+ *
+ * Returns the token value of the keyword, or -1 if no match.
+ *
  * Do a binary search using plain strcmp() comparison.  This is much like
  * ScanKeywordLookup(), except we want case-sensitive matching.
  */
-const ScanKeyword *
+int
 ScanCKeywordLookup(const char *text)
 {
-       const ScanKeyword *low = &ScanCKeywords[0];
-       const ScanKeyword *high = &ScanCKeywords[lengthof(ScanCKeywords) - 1];
+       const char *kw_string;
+       const uint16 *kw_offsets;
+       const uint16 *low;
+       const uint16 *high;
+
+       if (strlen(text) > ScanCKeywords.max_kw_len)
+               return -1;                              /* too long to be any keyword */
+
+       kw_string = ScanCKeywords.kw_string;
+       kw_offsets = ScanCKeywords.kw_offsets;
+       low = kw_offsets;
+       high = kw_offsets + (ScanCKeywords.num_keywords - 1);
 
        while (low <= high)
        {
-               const ScanKeyword *middle;
+               const uint16 *middle;
                int                     difference;
 
                middle = low + (high - low) / 2;
-               difference = strcmp(middle->name, text);
+               difference = strcmp(kw_string + *middle, text);
                if (difference == 0)
-                       return middle;
+                       return ScanCKeywordTokens[middle - kw_offsets];
                else if (difference < 0)
                        low = middle + 1;
                else
                        high = middle - 1;
        }
 
-       return NULL;
+       return -1;
 }
diff --git a/src/interfaces/ecpg/preproc/c_kwlist.h b/src/interfaces/ecpg/preproc/c_kwlist.h
new file mode 100644 (file)
index 0000000..4545505
--- /dev/null
@@ -0,0 +1,53 @@
+/*-------------------------------------------------------------------------
+ *
+ * c_kwlist.h
+ *
+ * The keyword lists are kept in their own source files for use by
+ * automatic tools.  The exact representation of a keyword is determined
+ * by the PG_KEYWORD macro, which is not defined in this file; it can
+ * be defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/interfaces/ecpg/preproc/c_kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* There is deliberately not an #ifndef C_KWLIST_H here. */
+
+/*
+ * List of (keyword-name, keyword-token-value) pairs.
+ *
+ * !!WARNING!!: This list must be sorted by ASCII name, because binary
+ *              search is used to locate entries.
+ */
+
+/* name, value */
+PG_KEYWORD("VARCHAR", VARCHAR)
+PG_KEYWORD("auto", S_AUTO)
+PG_KEYWORD("bool", SQL_BOOL)
+PG_KEYWORD("char", CHAR_P)
+PG_KEYWORD("const", S_CONST)
+PG_KEYWORD("enum", ENUM_P)
+PG_KEYWORD("extern", S_EXTERN)
+PG_KEYWORD("float", FLOAT_P)
+PG_KEYWORD("hour", HOUR_P)
+PG_KEYWORD("int", INT_P)
+PG_KEYWORD("long", SQL_LONG)
+PG_KEYWORD("minute", MINUTE_P)
+PG_KEYWORD("month", MONTH_P)
+PG_KEYWORD("register", S_REGISTER)
+PG_KEYWORD("second", SECOND_P)
+PG_KEYWORD("short", SQL_SHORT)
+PG_KEYWORD("signed", SQL_SIGNED)
+PG_KEYWORD("static", S_STATIC)
+PG_KEYWORD("struct", SQL_STRUCT)
+PG_KEYWORD("to", TO)
+PG_KEYWORD("typedef", S_TYPEDEF)
+PG_KEYWORD("union", UNION)
+PG_KEYWORD("unsigned", SQL_UNSIGNED)
+PG_KEYWORD("varchar", VARCHAR)
+PG_KEYWORD("volatile", S_VOLATILE)
+PG_KEYWORD("year", YEAR_P)
index 37c97e162d7dbf9486513fa1e303ff21f66e56f6..4839c37bbca92f0066437b1b13493ecec985752a 100644 (file)
 #include "preproc_extern.h"
 #include "preproc.h"
 
-/*
- * List of (keyword-name, keyword-token-value) pairs.
- *
- * !!WARNING!!: This list must be sorted, because binary
- *              search is used to locate entries.
- */
-static const ScanKeyword ECPGScanKeywords[] = {
-       /* name, value, category */
+/* ScanKeywordList lookup data for ECPG keywords */
+#include "ecpg_kwlist_d.h"
+
+/* Token codes for ECPG keywords */
+#define PG_KEYWORD(kwname, value) value,
 
-       /*
-        * category is not needed in ecpg, it is only here so we can share the
-        * data structure with the backend
-        */
-       {"allocate", SQL_ALLOCATE, 0},
-       {"autocommit", SQL_AUTOCOMMIT, 0},
-       {"bool", SQL_BOOL, 0},
-       {"break", SQL_BREAK, 0},
-       {"cardinality", SQL_CARDINALITY, 0},
-       {"connect", SQL_CONNECT, 0},
-       {"count", SQL_COUNT, 0},
-       {"datetime_interval_code", SQL_DATETIME_INTERVAL_CODE, 0},
-       {"datetime_interval_precision", SQL_DATETIME_INTERVAL_PRECISION, 0},
-       {"describe", SQL_DESCRIBE, 0},
-       {"descriptor", SQL_DESCRIPTOR, 0},
-       {"disconnect", SQL_DISCONNECT, 0},
-       {"found", SQL_FOUND, 0},
-       {"free", SQL_FREE, 0},
-       {"get", SQL_GET, 0},
-       {"go", SQL_GO, 0},
-       {"goto", SQL_GOTO, 0},
-       {"identified", SQL_IDENTIFIED, 0},
-       {"indicator", SQL_INDICATOR, 0},
-       {"key_member", SQL_KEY_MEMBER, 0},
-       {"length", SQL_LENGTH, 0},
-       {"long", SQL_LONG, 0},
-       {"nullable", SQL_NULLABLE, 0},
-       {"octet_length", SQL_OCTET_LENGTH, 0},
-       {"open", SQL_OPEN, 0},
-       {"output", SQL_OUTPUT, 0},
-       {"reference", SQL_REFERENCE, 0},
-       {"returned_length", SQL_RETURNED_LENGTH, 0},
-       {"returned_octet_length", SQL_RETURNED_OCTET_LENGTH, 0},
-       {"scale", SQL_SCALE, 0},
-       {"section", SQL_SECTION, 0},
-       {"short", SQL_SHORT, 0},
-       {"signed", SQL_SIGNED, 0},
-       {"sqlerror", SQL_SQLERROR, 0},
-       {"sqlprint", SQL_SQLPRINT, 0},
-       {"sqlwarning", SQL_SQLWARNING, 0},
-       {"stop", SQL_STOP, 0},
-       {"struct", SQL_STRUCT, 0},
-       {"unsigned", SQL_UNSIGNED, 0},
-       {"var", SQL_VAR, 0},
-       {"whenever", SQL_WHENEVER, 0},
+static const uint16 ECPGScanKeywordTokens[] = {
+#include "ecpg_kwlist.h"
 };
 
+#undef PG_KEYWORD
+
+
 /*
  * ScanECPGKeywordLookup - see if a given word is a keyword
  *
- * Returns a pointer to the ScanKeyword table entry, or NULL if no match.
+ * Returns the token value of the keyword, or -1 if no match.
+ *
  * Keywords are matched using the same case-folding rules as in the backend.
  */
-const ScanKeyword *
+int
 ScanECPGKeywordLookup(const char *text)
 {
-       const ScanKeyword *res;
+       int                     kwnum;
 
        /* First check SQL symbols defined by the backend. */
-       res = ScanKeywordLookup(text, SQLScanKeywords, NumSQLScanKeywords);
-       if (res)
-               return res;
+       kwnum = ScanKeywordLookup(text, &ScanKeywords);
+       if (kwnum >= 0)
+               return SQLScanKeywordTokens[kwnum];
 
        /* Try ECPG-specific keywords. */
-       res = ScanKeywordLookup(text, ECPGScanKeywords, lengthof(ECPGScanKeywords));
-       if (res)
-               return res;
+       kwnum = ScanKeywordLookup(text, &ScanECPGKeywords);
+       if (kwnum >= 0)
+               return ECPGScanKeywordTokens[kwnum];
 
-       return NULL;
+       return -1;
 }
diff --git a/src/interfaces/ecpg/preproc/ecpg_kwlist.h b/src/interfaces/ecpg/preproc/ecpg_kwlist.h
new file mode 100644 (file)
index 0000000..97ef254
--- /dev/null
@@ -0,0 +1,68 @@
+/*-------------------------------------------------------------------------
+ *
+ * ecpg_kwlist.h
+ *
+ * The keyword lists are kept in their own source files for use by
+ * automatic tools.  The exact representation of a keyword is determined
+ * by the PG_KEYWORD macro, which is not defined in this file; it can
+ * be defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/interfaces/ecpg/preproc/ecpg_kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* There is deliberately not an #ifndef ECPG_KWLIST_H here. */
+
+/*
+ * List of (keyword-name, keyword-token-value) pairs.
+ *
+ * !!WARNING!!: This list must be sorted by ASCII name, because binary
+ *              search is used to locate entries.
+ */
+
+/* name, value */
+PG_KEYWORD("allocate", SQL_ALLOCATE)
+PG_KEYWORD("autocommit", SQL_AUTOCOMMIT)
+PG_KEYWORD("bool", SQL_BOOL)
+PG_KEYWORD("break", SQL_BREAK)
+PG_KEYWORD("cardinality", SQL_CARDINALITY)
+PG_KEYWORD("connect", SQL_CONNECT)
+PG_KEYWORD("count", SQL_COUNT)
+PG_KEYWORD("datetime_interval_code", SQL_DATETIME_INTERVAL_CODE)
+PG_KEYWORD("datetime_interval_precision", SQL_DATETIME_INTERVAL_PRECISION)
+PG_KEYWORD("describe", SQL_DESCRIBE)
+PG_KEYWORD("descriptor", SQL_DESCRIPTOR)
+PG_KEYWORD("disconnect", SQL_DISCONNECT)
+PG_KEYWORD("found", SQL_FOUND)
+PG_KEYWORD("free", SQL_FREE)
+PG_KEYWORD("get", SQL_GET)
+PG_KEYWORD("go", SQL_GO)
+PG_KEYWORD("goto", SQL_GOTO)
+PG_KEYWORD("identified", SQL_IDENTIFIED)
+PG_KEYWORD("indicator", SQL_INDICATOR)
+PG_KEYWORD("key_member", SQL_KEY_MEMBER)
+PG_KEYWORD("length", SQL_LENGTH)
+PG_KEYWORD("long", SQL_LONG)
+PG_KEYWORD("nullable", SQL_NULLABLE)
+PG_KEYWORD("octet_length", SQL_OCTET_LENGTH)
+PG_KEYWORD("open", SQL_OPEN)
+PG_KEYWORD("output", SQL_OUTPUT)
+PG_KEYWORD("reference", SQL_REFERENCE)
+PG_KEYWORD("returned_length", SQL_RETURNED_LENGTH)
+PG_KEYWORD("returned_octet_length", SQL_RETURNED_OCTET_LENGTH)
+PG_KEYWORD("scale", SQL_SCALE)
+PG_KEYWORD("section", SQL_SECTION)
+PG_KEYWORD("short", SQL_SHORT)
+PG_KEYWORD("signed", SQL_SIGNED)
+PG_KEYWORD("sqlerror", SQL_SQLERROR)
+PG_KEYWORD("sqlprint", SQL_SQLPRINT)
+PG_KEYWORD("sqlwarning", SQL_SQLWARNING)
+PG_KEYWORD("stop", SQL_STOP)
+PG_KEYWORD("struct", SQL_STRUCT)
+PG_KEYWORD("unsigned", SQL_UNSIGNED)
+PG_KEYWORD("var", SQL_VAR)
+PG_KEYWORD("whenever", SQL_WHENEVER)
index 12409e9805917de99211685d62b1a0a8c9f7e9c2..03804099d12b0f507affcc4fc380a08d8d246db3 100644 (file)
 
 /*
  * This is much trickier than it looks.  We are #include'ing kwlist.h
- * but the "value" numbers that go into the table are from preproc.h
- * not the backend's gram.h.  Therefore this table will recognize all
- * keywords known to the backend, but will supply the token numbers used
+ * but the token numbers that go into the table are from preproc.h
+ * not the backend's gram.h.  Therefore this token table will match
+ * the ScanKeywords table supplied from common/keywords.c, including all
+ * keywords known to the backend, but it will supply the token numbers used
  * by ecpg's grammar, which is what we need.  The ecpg grammar must
  * define all the same token names the backend does, else we'll get
  * undefined-symbol failures in this compile.
  */
 
-#include "common/keywords.h"
-
 #include "preproc_extern.h"
 #include "preproc.h"
 
+#define PG_KEYWORD(kwname, value, category) value,
 
-#define PG_KEYWORD(a,b,c) {a,b,c},
-
-const ScanKeyword SQLScanKeywords[] = {
+const uint16 SQLScanKeywordTokens[] = {
 #include "parser/kwlist.h"
 };
 
-const int      NumSQLScanKeywords = lengthof(SQLScanKeywords);
+#undef PG_KEYWORD
index a60564c6908d150394d2bc24d0892c0b78cdd776..3131f5f1479055bd24a231b3ecfb35f5ca28b8c6 100644 (file)
@@ -920,19 +920,19 @@ cppline                   {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                                }
 
 {identifier}   {
-                                       const ScanKeyword  *keyword;
-
                                        if (!isdefine())
                                        {
+                                               int             kwvalue;
+
                                                /* Is it an SQL/ECPG keyword? */
-                                               keyword = ScanECPGKeywordLookup(yytext);
-                                               if (keyword != NULL)
-                                                       return keyword->value;
+                                               kwvalue = ScanECPGKeywordLookup(yytext);
+                                               if (kwvalue >= 0)
+                                                       return kwvalue;
 
                                                /* Is it a C keyword? */
-                                               keyword = ScanCKeywordLookup(yytext);
-                                               if (keyword != NULL)
-                                                       return keyword->value;
+                                               kwvalue = ScanCKeywordLookup(yytext);
+                                               if (kwvalue >= 0)
+                                                       return kwvalue;
 
                                                /*
                                                 * None of the above.  Return it as an identifier.
@@ -1010,12 +1010,11 @@ cppline                 {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                                                return CPP_LINE;
                                        }
 <C>{identifier}                {
-                                               const ScanKeyword               *keyword;
-
                                                /*
                                                 * Try to detect a function name:
                                                 * look for identifiers at the global scope
-                                                * keep the last identifier before the first '(' and '{' */
+                                                * keep the last identifier before the first '(' and '{'
+                                                */
                                                if (braces_open == 0 && parenths_open == 0)
                                                {
                                                        if (current_function)
@@ -1026,9 +1025,11 @@ cppline                  {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                                                /* however, some defines have to be taken care of for compatibility */
                                                if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine())
                                                {
-                                                       keyword = ScanCKeywordLookup(yytext);
-                                                       if (keyword != NULL)
-                                                               return keyword->value;
+                                                       int             kwvalue;
+
+                                                       kwvalue = ScanCKeywordLookup(yytext);
+                                                       if (kwvalue >= 0)
+                                                               return kwvalue;
                                                        else
                                                        {
                                                                base_yylval.str = mm_strdup(yytext);
index 13eda670ffe8af9b485ca4a814d74af86fec910b..97467800dc9eff9ede01d08cff43d7596ad069df 100644 (file)
@@ -59,8 +59,7 @@ extern struct when when_error,
 extern struct ECPGstruct_member *struct_member_list[STRUCT_DEPTH];
 
 /* Globals from keywords.c */
-extern const ScanKeyword SQLScanKeywords[];
-extern const int NumSQLScanKeywords;
+extern const uint16 SQLScanKeywordTokens[];
 
 /* functions */
 
@@ -102,8 +101,8 @@ extern void check_indicator(struct ECPGtype *);
 extern void remove_typedefs(int);
 extern void remove_variables(int);
 extern struct variable *new_variable(const char *, struct ECPGtype *, int);
-extern const ScanKeyword *ScanCKeywordLookup(const char *);
-extern const ScanKeyword *ScanECPGKeywordLookup(const char *text);
+extern int     ScanCKeywordLookup(const char *text);
+extern int     ScanECPGKeywordLookup(const char *text);
 extern void parser_init(void);
 extern int     filtered_base_yylex(void);
 
index ff6ac965fddf1db7d78888b2132c2133b6289035..3ab9a2243cc6d27f1466e43862bb41944465e8b3 100644 (file)
@@ -1,5 +1,7 @@
 /pl_gram.c
 /pl_gram.h
+/pl_reserved_kwlist_d.h
+/pl_unreserved_kwlist_d.h
 /plerrcodes.h
 /log/
 /results/
index 25a5a9d44856780478b75a5aef0c034a912b3d5b..9dd4a74c3468f9366e6a73d1ea61a4ba41b4a54f 100644 (file)
@@ -29,6 +29,8 @@ REGRESS_OPTS = --dbname=$(PL_TESTDB)
 REGRESS = plpgsql_call plpgsql_control plpgsql_domain plpgsql_record \
        plpgsql_cache plpgsql_transaction plpgsql_varprops
 
+GEN_KEYWORDLIST = $(top_srcdir)/src/tools/gen_keywordlist.pl
+
 all: all-lib
 
 # Shared library stuff
@@ -61,6 +63,7 @@ uninstall-headers:
 
 # Force these dependencies to be known even without dependency info built:
 pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o pl_scanner.o: plpgsql.h pl_gram.h plerrcodes.h
+pl_scanner.o: pl_reserved_kwlist_d.h pl_unreserved_kwlist_d.h
 
 # See notes in src/backend/parser/Makefile about the following two rules
 pl_gram.h: pl_gram.c
@@ -72,6 +75,13 @@ pl_gram.c: BISONFLAGS += -d
 plerrcodes.h: $(top_srcdir)/src/backend/utils/errcodes.txt generate-plerrcodes.pl
        $(PERL) $(srcdir)/generate-plerrcodes.pl $< > $@
 
+# generate keyword headers for the scanner
+pl_reserved_kwlist_d.h: pl_reserved_kwlist.h $(GEN_KEYWORDLIST)
+       $(PERL) $(GEN_KEYWORDLIST) --varname ReservedPLKeywords $<
+
+pl_unreserved_kwlist_d.h: pl_unreserved_kwlist.h $(GEN_KEYWORDLIST)
+       $(PERL) $(GEN_KEYWORDLIST) --varname UnreservedPLKeywords $<
+
 
 check: submake
        $(pg_regress_check) $(REGRESS_OPTS) $(REGRESS)
@@ -84,13 +94,14 @@ submake:
        $(MAKE) -C $(top_builddir)/src/test/regress pg_regress$(X)
 
 
-distprep: pl_gram.h pl_gram.c plerrcodes.h
+distprep: pl_gram.h pl_gram.c plerrcodes.h pl_reserved_kwlist_d.h pl_unreserved_kwlist_d.h
 
-# pl_gram.c, pl_gram.h and plerrcodes.h are in the distribution tarball,
-# so they are not cleaned here.
+# pl_gram.c, pl_gram.h, plerrcodes.h, pl_reserved_kwlist_d.h, and
+# pl_unreserved_kwlist_d.h are in the distribution tarball, so they
+# are not cleaned here.
 clean distclean: clean-lib
        rm -f $(OBJS)
        rm -rf $(pg_regress_clean_files)
 
 maintainer-clean: distclean
-       rm -f pl_gram.c pl_gram.h plerrcodes.h
+       rm -f pl_gram.c pl_gram.h plerrcodes.h pl_reserved_kwlist_d.h pl_unreserved_kwlist_d.h
diff --git a/src/pl/plpgsql/src/pl_reserved_kwlist.h b/src/pl/plpgsql/src/pl_reserved_kwlist.h
new file mode 100644 (file)
index 0000000..5c2e0c1
--- /dev/null
@@ -0,0 +1,53 @@
+/*-------------------------------------------------------------------------
+ *
+ * pl_reserved_kwlist.h
+ *
+ * The keyword lists are kept in their own source files for use by
+ * automatic tools.  The exact representation of a keyword is determined
+ * by the PG_KEYWORD macro, which is not defined in this file; it can
+ * be defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/pl/plpgsql/src/pl_reserved_kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* There is deliberately not an #ifndef PL_RESERVED_KWLIST_H here. */
+
+/*
+ * List of (keyword-name, keyword-token-value) pairs.
+ *
+ * Be careful not to put the same word in both lists.
+ *
+ * !!WARNING!!: This list must be sorted by ASCII name, because binary
+ *              search is used to locate entries.
+ */
+
+/* name, value */
+PG_KEYWORD("all", K_ALL)
+PG_KEYWORD("begin", K_BEGIN)
+PG_KEYWORD("by", K_BY)
+PG_KEYWORD("case", K_CASE)
+PG_KEYWORD("declare", K_DECLARE)
+PG_KEYWORD("else", K_ELSE)
+PG_KEYWORD("end", K_END)
+PG_KEYWORD("execute", K_EXECUTE)
+PG_KEYWORD("for", K_FOR)
+PG_KEYWORD("foreach", K_FOREACH)
+PG_KEYWORD("from", K_FROM)
+PG_KEYWORD("if", K_IF)
+PG_KEYWORD("in", K_IN)
+PG_KEYWORD("into", K_INTO)
+PG_KEYWORD("loop", K_LOOP)
+PG_KEYWORD("not", K_NOT)
+PG_KEYWORD("null", K_NULL)
+PG_KEYWORD("or", K_OR)
+PG_KEYWORD("strict", K_STRICT)
+PG_KEYWORD("then", K_THEN)
+PG_KEYWORD("to", K_TO)
+PG_KEYWORD("using", K_USING)
+PG_KEYWORD("when", K_WHEN)
+PG_KEYWORD("while", K_WHILE)
index 8340628de3c8726c0d0e3e06ca8b4260d89b2814..c260438d7d40589d313809a320f055529fca5d2f 100644 (file)
 #include "pl_gram.h"                   /* must be after parser/scanner.h */
 
 
-#define PG_KEYWORD(a,b,c) {a,b,c},
-
-
 /* Klugy flag to tell scanner how to look up identifiers */
 IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
 
 /*
  * A word about keywords:
  *
- * We keep reserved and unreserved keywords in separate arrays.  The
+ * We keep reserved and unreserved keywords in separate headers.  Be careful
+ * not to put the same word in both headers.  Also be sure that pl_gram.y's
+ * unreserved_keyword production agrees with the unreserved header.  The
  * reserved keywords are passed to the core scanner, so they will be
  * recognized before (and instead of) any variable name.  Unreserved words
  * are checked for separately, usually after determining that the identifier
@@ -57,130 +56,22 @@ IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
  * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE
  */
 
-/*
- * Lists of keyword (name, token-value, category) entries.
- *
- * !!WARNING!!: These lists must be sorted by ASCII name, because binary
- *              search is used to locate entries.
- *
- * Be careful not to put the same word in both lists.  Also be sure that
- * pl_gram.y's unreserved_keyword production agrees with the second list.
- */
+/* ScanKeywordList lookup data for PL/pgSQL keywords */
+#include "pl_reserved_kwlist_d.h"
+#include "pl_unreserved_kwlist_d.h"
+
+/* Token codes for PL/pgSQL keywords */
+#define PG_KEYWORD(kwname, value) value,
 
-static const ScanKeyword reserved_keywords[] = {
-       PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)
-       PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
-       PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
-       PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
-       PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
-       PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
-       PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
-       PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
-       PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
-       PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD)
-       PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
-       PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
-       PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
-       PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
-       PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
-       PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
-       PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
-       PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
-       PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
-       PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
-       PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
-       PG_KEYWORD("using", K_USING, RESERVED_KEYWORD)
-       PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD)
-       PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD)
+static const uint16 ReservedPLKeywordTokens[] = {
+#include "pl_reserved_kwlist.h"
 };
 
-static const int num_reserved_keywords = lengthof(reserved_keywords);
-
-static const ScanKeyword unreserved_keywords[] = {
-       PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
-       PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
-       PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
-       PG_KEYWORD("call", K_CALL, UNRESERVED_KEYWORD)
-       PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
-       PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
-       PG_KEYWORD("commit", K_COMMIT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
-       PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
-       PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
-       PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
-       PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
-       PG_KEYWORD("do", K_DO, UNRESERVED_KEYWORD)
-       PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
-       PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
-       PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
-       PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
-       PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
-       PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
-       PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
-       PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
-       PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
-       PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
-       PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
-       PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
-       PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
-       PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
-       PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
-       PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
-       PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
-       PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
-       PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
-       PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
-       PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
-       PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
-       PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("reset", K_RESET, UNRESERVED_KEYWORD)
-       PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
-       PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("rollback", K_ROLLBACK, UNRESERVED_KEYWORD)
-       PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD)
-       PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD)
-       PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
-       PG_KEYWORD("set", K_SET, UNRESERVED_KEYWORD)
-       PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
-       PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD)
-       PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
-       PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
-       PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
-       PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
+static const uint16 UnreservedPLKeywordTokens[] = {
+#include "pl_unreserved_kwlist.h"
 };
 
-static const int num_unreserved_keywords = lengthof(unreserved_keywords);
+#undef PG_KEYWORD
 
 /*
  * This macro must recognize all tokens that can immediately precede a
@@ -256,7 +147,7 @@ plpgsql_yylex(void)
 {
        int                     tok1;
        TokenAuxData aux1;
-       const ScanKeyword *kw;
+       int                     kwnum;
 
        tok1 = internal_yylex(&aux1);
        if (tok1 == IDENT || tok1 == PARAM)
@@ -333,12 +224,12 @@ plpgsql_yylex(void)
                                                                           &aux1.lval.word))
                                        tok1 = T_DATUM;
                                else if (!aux1.lval.word.quoted &&
-                                                (kw = ScanKeywordLookup(aux1.lval.word.ident,
-                                                                                                unreserved_keywords,
-                                                                                                num_unreserved_keywords)))
+                                                (kwnum = ScanKeywordLookup(aux1.lval.word.ident,
+                                                                                                       &UnreservedPLKeywords)) >= 0)
                                {
-                                       aux1.lval.keyword = kw->name;
-                                       tok1 = kw->value;
+                                       aux1.lval.keyword = GetScanKeyword(kwnum,
+                                                                                                          &UnreservedPLKeywords);
+                                       tok1 = UnreservedPLKeywordTokens[kwnum];
                                }
                                else
                                        tok1 = T_WORD;
@@ -375,12 +266,12 @@ plpgsql_yylex(void)
                                                                   &aux1.lval.word))
                                tok1 = T_DATUM;
                        else if (!aux1.lval.word.quoted &&
-                                        (kw = ScanKeywordLookup(aux1.lval.word.ident,
-                                                                                        unreserved_keywords,
-                                                                                        num_unreserved_keywords)))
+                                        (kwnum = ScanKeywordLookup(aux1.lval.word.ident,
+                                                                                               &UnreservedPLKeywords)) >= 0)
                        {
-                               aux1.lval.keyword = kw->name;
-                               tok1 = kw->value;
+                               aux1.lval.keyword = GetScanKeyword(kwnum,
+                                                                                                  &UnreservedPLKeywords);
+                               tok1 = UnreservedPLKeywordTokens[kwnum];
                        }
                        else
                                tok1 = T_WORD;
@@ -497,9 +388,9 @@ plpgsql_token_is_unreserved_keyword(int token)
 {
        int                     i;
 
-       for (i = 0; i < num_unreserved_keywords; i++)
+       for (i = 0; i < lengthof(UnreservedPLKeywordTokens); i++)
        {
-               if (unreserved_keywords[i].value == token)
+               if (UnreservedPLKeywordTokens[i] == token)
                        return true;
        }
        return false;
@@ -696,7 +587,7 @@ plpgsql_scanner_init(const char *str)
 {
        /* Start up the core scanner */
        yyscanner = scanner_init(str, &core_yy,
-                                                        reserved_keywords, num_reserved_keywords);
+                                                        &ReservedPLKeywords, ReservedPLKeywordTokens);
 
        /*
         * scanorig points to the original string, which unlike the scanner's
diff --git a/src/pl/plpgsql/src/pl_unreserved_kwlist.h b/src/pl/plpgsql/src/pl_unreserved_kwlist.h
new file mode 100644 (file)
index 0000000..ef2aea0
--- /dev/null
@@ -0,0 +1,111 @@
+/*-------------------------------------------------------------------------
+ *
+ * pl_unreserved_kwlist.h
+ *
+ * The keyword lists are kept in their own source files for use by
+ * automatic tools.  The exact representation of a keyword is determined
+ * by the PG_KEYWORD macro, which is not defined in this file; it can
+ * be defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/pl/plpgsql/src/pl_unreserved_kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* There is deliberately not an #ifndef PL_UNRESERVED_KWLIST_H here. */
+
+/*
+ * List of (keyword-name, keyword-token-value) pairs.
+ *
+ * Be careful not to put the same word in both lists.  Also be sure that
+ * pl_gram.y's unreserved_keyword production agrees with this list.
+ *
+ * !!WARNING!!: This list must be sorted by ASCII name, because binary
+ *              search is used to locate entries.
+ */
+
+/* name, value */
+PG_KEYWORD("absolute", K_ABSOLUTE)
+PG_KEYWORD("alias", K_ALIAS)
+PG_KEYWORD("array", K_ARRAY)
+PG_KEYWORD("assert", K_ASSERT)
+PG_KEYWORD("backward", K_BACKWARD)
+PG_KEYWORD("call", K_CALL)
+PG_KEYWORD("close", K_CLOSE)
+PG_KEYWORD("collate", K_COLLATE)
+PG_KEYWORD("column", K_COLUMN)
+PG_KEYWORD("column_name", K_COLUMN_NAME)
+PG_KEYWORD("commit", K_COMMIT)
+PG_KEYWORD("constant", K_CONSTANT)
+PG_KEYWORD("constraint", K_CONSTRAINT)
+PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME)
+PG_KEYWORD("continue", K_CONTINUE)
+PG_KEYWORD("current", K_CURRENT)
+PG_KEYWORD("cursor", K_CURSOR)
+PG_KEYWORD("datatype", K_DATATYPE)
+PG_KEYWORD("debug", K_DEBUG)
+PG_KEYWORD("default", K_DEFAULT)
+PG_KEYWORD("detail", K_DETAIL)
+PG_KEYWORD("diagnostics", K_DIAGNOSTICS)
+PG_KEYWORD("do", K_DO)
+PG_KEYWORD("dump", K_DUMP)
+PG_KEYWORD("elseif", K_ELSIF)
+PG_KEYWORD("elsif", K_ELSIF)
+PG_KEYWORD("errcode", K_ERRCODE)
+PG_KEYWORD("error", K_ERROR)
+PG_KEYWORD("exception", K_EXCEPTION)
+PG_KEYWORD("exit", K_EXIT)
+PG_KEYWORD("fetch", K_FETCH)
+PG_KEYWORD("first", K_FIRST)
+PG_KEYWORD("forward", K_FORWARD)
+PG_KEYWORD("get", K_GET)
+PG_KEYWORD("hint", K_HINT)
+PG_KEYWORD("import", K_IMPORT)
+PG_KEYWORD("info", K_INFO)
+PG_KEYWORD("insert", K_INSERT)
+PG_KEYWORD("is", K_IS)
+PG_KEYWORD("last", K_LAST)
+PG_KEYWORD("log", K_LOG)
+PG_KEYWORD("message", K_MESSAGE)
+PG_KEYWORD("message_text", K_MESSAGE_TEXT)
+PG_KEYWORD("move", K_MOVE)
+PG_KEYWORD("next", K_NEXT)
+PG_KEYWORD("no", K_NO)
+PG_KEYWORD("notice", K_NOTICE)
+PG_KEYWORD("open", K_OPEN)
+PG_KEYWORD("option", K_OPTION)
+PG_KEYWORD("perform", K_PERFORM)
+PG_KEYWORD("pg_context", K_PG_CONTEXT)
+PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME)
+PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT)
+PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL)
+PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT)
+PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS)
+PG_KEYWORD("prior", K_PRIOR)
+PG_KEYWORD("query", K_QUERY)
+PG_KEYWORD("raise", K_RAISE)
+PG_KEYWORD("relative", K_RELATIVE)
+PG_KEYWORD("reset", K_RESET)
+PG_KEYWORD("return", K_RETURN)
+PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE)
+PG_KEYWORD("reverse", K_REVERSE)
+PG_KEYWORD("rollback", K_ROLLBACK)
+PG_KEYWORD("row_count", K_ROW_COUNT)
+PG_KEYWORD("rowtype", K_ROWTYPE)
+PG_KEYWORD("schema", K_SCHEMA)
+PG_KEYWORD("schema_name", K_SCHEMA_NAME)
+PG_KEYWORD("scroll", K_SCROLL)
+PG_KEYWORD("set", K_SET)
+PG_KEYWORD("slice", K_SLICE)
+PG_KEYWORD("sqlstate", K_SQLSTATE)
+PG_KEYWORD("stacked", K_STACKED)
+PG_KEYWORD("table", K_TABLE)
+PG_KEYWORD("table_name", K_TABLE_NAME)
+PG_KEYWORD("type", K_TYPE)
+PG_KEYWORD("use_column", K_USE_COLUMN)
+PG_KEYWORD("use_variable", K_USE_VARIABLE)
+PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT)
+PG_KEYWORD("warning", K_WARNING)
diff --git a/src/tools/gen_keywordlist.pl b/src/tools/gen_keywordlist.pl
new file mode 100644 (file)
index 0000000..d764aff
--- /dev/null
@@ -0,0 +1,156 @@
+#----------------------------------------------------------------------
+#
+# gen_keywordlist.pl
+#      Perl script that transforms a list of keywords into a ScanKeywordList
+#      data structure that can be passed to ScanKeywordLookup().
+#
+# The input is a C header file containing a series of macro calls
+#      PG_KEYWORD("keyword", ...)
+# Lines not starting with PG_KEYWORD are ignored.  The keywords are
+# implicitly numbered 0..N-1 in order of appearance in the header file.
+# Currently, the keywords are required to appear in ASCII order.
+#
+# The output is a C header file that defines a "const ScanKeywordList"
+# variable named according to the -v switch ("ScanKeywords" by default).
+# The variable is marked "static" unless the -e switch is given.
+#
+#
+# Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/tools/gen_keywordlist.pl
+#
+#----------------------------------------------------------------------
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $output_path = '';
+my $extern = 0;
+my $varname = 'ScanKeywords';
+
+GetOptions(
+       'output:s' => \$output_path,
+       'extern'   => \$extern,
+       'varname:s' => \$varname) || usage();
+
+my $kw_input_file = shift @ARGV || die "No input file.\n";
+
+# Make sure output_path ends in a slash if needed.
+if ($output_path ne '' && substr($output_path, -1) ne '/')
+{
+       $output_path .= '/';
+}
+
+$kw_input_file =~ /(\w+)\.h$/ || die "Input file must be named something.h.\n";
+my $base_filename = $1 . '_d';
+my $kw_def_file = $output_path . $base_filename . '.h';
+
+open(my $kif, '<', $kw_input_file) || die "$kw_input_file: $!\n";
+open(my $kwdef, '>', $kw_def_file) || die "$kw_def_file: $!\n";
+
+# Opening boilerplate for keyword definition header.
+printf $kwdef <<EOM, $base_filename, uc $base_filename, uc $base_filename;
+/*-------------------------------------------------------------------------
+ *
+ * %s.h
+ *    List of keywords represented as a ScanKeywordList.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES
+ *  ******************************
+ *  *** DO NOT EDIT THIS FILE! ***
+ *  ******************************
+ *
+ *  It has been GENERATED by src/tools/gen_keywordlist.pl
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef %s_H
+#define %s_H
+
+#include "common/kwlookup.h"
+
+EOM
+
+# Parse input file for keyword names.
+my @keywords;
+while (<$kif>)
+{
+       if (/^PG_KEYWORD\("(\w+)"/)
+       {
+               push @keywords, $1;
+       }
+}
+
+# Error out if the keyword names are not in ASCII order.
+for my $i (0..$#keywords - 1)
+{
+       die qq|The keyword "$keywords[$i + 1]" is out of order in $kw_input_file\n|
+         if ($keywords[$i] cmp $keywords[$i + 1]) >= 0;
+}
+
+# Emit the string containing all the keywords.
+
+printf $kwdef qq|static const char %s_kw_string[] =\n\t"|, $varname;
+print $kwdef join qq|\\0"\n\t"|, @keywords;
+print $kwdef qq|";\n\n|;
+
+# Emit an array of numerical offsets which will be used to index into the
+# keyword string.  Also determine max keyword length.
+
+printf $kwdef "static const uint16 %s_kw_offsets[] = {\n", $varname;
+
+my $offset = 0;
+my $max_len = 0;
+foreach my $name (@keywords)
+{
+       my $this_length = length($name);
+
+       print $kwdef "\t$offset,\n";
+
+       # Calculate the cumulative offset of the next keyword,
+       # taking into account the null terminator.
+       $offset += $this_length + 1;
+
+       # Update max keyword length.
+       $max_len = $this_length if $max_len < $this_length;
+}
+
+print $kwdef "};\n\n";
+
+# Emit a macro defining the number of keywords.
+# (In some places it's useful to have access to that as a constant.)
+
+printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords;
+
+# Emit the struct that wraps all this lookup info into one variable.
+
+print $kwdef "static " if !$extern;
+printf $kwdef "const ScanKeywordList %s = {\n", $varname;
+printf $kwdef qq|\t%s_kw_string,\n|, $varname;
+printf $kwdef qq|\t%s_kw_offsets,\n|, $varname;
+printf $kwdef qq|\t%s_NUM_KEYWORDS,\n|, uc $varname;
+printf $kwdef qq|\t%d\n|, $max_len;
+print $kwdef "};\n\n";
+
+printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename;
+
+
+sub usage
+{
+       die <<EOM;
+Usage: gen_keywordlist.pl [--output/-o <path>] [--varname/-v <varname>] [--extern/-e] input_file
+    --output   Output directory (default '.')
+    --varname  Name for ScanKeywordList variable (default 'ScanKeywords')
+    --extern   Allow the ScanKeywordList variable to be globally visible
+
+gen_keywordlist.pl transforms a list of keywords into a ScanKeywordList.
+The output filename is derived from the input file by inserting _d,
+for example kwlist_d.h is produced from kwlist.h.
+EOM
+}
index 2921d193a16a1ac15ad08ef7c6823a989c3e9922..56192f1b20c41452a3608918f5b83147b1653ce6 100644 (file)
@@ -118,7 +118,7 @@ sub mkvcbuild
 
        our @pgcommonallfiles = qw(
          base64.c config_info.c controldata_utils.c exec.c file_perm.c ip.c
-         keywords.c link-canary.c md5.c
+         keywords.c kwlookup.c link-canary.c md5.c
          pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
          saslprep.c scram-common.c string.c unicode_norm.c username.c
          wait_error.c);
index eb2346b8d3e9c71b606a23c2980f6ef5f58b589c..937bf184e27e86141447e11cac59af0a3ffb45ff 100644 (file)
@@ -409,6 +409,42 @@ sub GenerateFiles
                chdir('../../..');
        }
 
+       if (IsNewer(
+                       'src/common/kwlist_d.h',
+                       'src/include/parser/kwlist.h'))
+       {
+               print "Generating kwlist_d.h...\n";
+               system('perl src/tools/gen_keywordlist.pl --extern -o src/common src/include/parser/kwlist.h');
+       }
+
+       if (IsNewer(
+                       'src/pl/plpgsql/src/pl_reserved_kwlist_d.h',
+                       'src/pl/plpgsql/src/pl_reserved_kwlist.h')
+               || IsNewer(
+                       'src/pl/plpgsql/src/pl_unreserved_kwlist_d.h',
+                       'src/pl/plpgsql/src/pl_unreserved_kwlist.h'))
+       {
+               print "Generating pl_reserved_kwlist_d.h and pl_unreserved_kwlist_d.h...\n";
+               chdir('src/pl/plpgsql/src');
+               system('perl ../../../tools/gen_keywordlist.pl --varname ReservedPLKeywords pl_reserved_kwlist.h');
+               system('perl ../../../tools/gen_keywordlist.pl --varname UnreservedPLKeywords pl_unreserved_kwlist.h');
+               chdir('../../../..');
+       }
+
+       if (IsNewer(
+                       'src/interfaces/ecpg/preproc/c_kwlist_d.h',
+                       'src/interfaces/ecpg/preproc/c_kwlist.h')
+               || IsNewer(
+                       'src/interfaces/ecpg/preproc/ecpg_kwlist_d.h',
+                       'src/interfaces/ecpg/preproc/ecpg_kwlist.h'))
+       {
+               print "Generating c_kwlist_d.h and ecpg_kwlist_d.h...\n";
+               chdir('src/interfaces/ecpg/preproc');
+               system('perl ../../../tools/gen_keywordlist.pl --varname ScanCKeywords c_kwlist.h');
+               system('perl ../../../tools/gen_keywordlist.pl --varname ScanECPGKeywords ecpg_kwlist.h');
+               chdir('../../../..');
+       }
+
        if (IsNewer(
                        'src/interfaces/ecpg/preproc/preproc.y',
                        'src/backend/parser/gram.y'))
index 7a23a2b55f715cf0259af5c43e65e24fbbe928e3..069d6eb569d5a76a57e113077c7aeead57f07bfe 100755 (executable)
@@ -64,6 +64,11 @@ if %DIST%==1 if exist src\pl\tcl\pltclerrcodes.h del /q src\pl\tcl\pltclerrcodes
 if %DIST%==1 if exist src\backend\utils\sort\qsort_tuple.c del /q src\backend\utils\sort\qsort_tuple.c
 if %DIST%==1 if exist src\bin\psql\sql_help.c del /q src\bin\psql\sql_help.c
 if %DIST%==1 if exist src\bin\psql\sql_help.h del /q src\bin\psql\sql_help.h
+if %DIST%==1 if exist src\common\kwlist_d.h del /q src\common\kwlist_d.h
+if %DIST%==1 if exist src\pl\plpgsql\src\pl_reserved_kwlist_d.h del /q src\pl\plpgsql\src\pl_reserved_kwlist_d.h
+if %DIST%==1 if exist src\pl\plpgsql\src\pl_unreserved_kwlist_d.h del /q src\pl\plpgsql\src\pl_unreserved_kwlist_d.h
+if %DIST%==1 if exist src\interfaces\ecpg\preproc\c_kwlist_d.h del /q src\interfaces\ecpg\preproc\c_kwlist_d.h
+if %DIST%==1 if exist src\interfaces\ecpg\preproc\ecpg_kwlist_d.h del /q src\interfaces\ecpg\preproc\ecpg_kwlist_d.h
 if %DIST%==1 if exist src\interfaces\ecpg\preproc\preproc.y del /q src\interfaces\ecpg\preproc\preproc.y
 if %DIST%==1 if exist src\backend\catalog\postgres.bki del /q src\backend\catalog\postgres.bki
 if %DIST%==1 if exist src\backend\catalog\postgres.description del /q src\backend\catalog\postgres.description