Remove plpgsql's separate lexer (finally!), in favor of using the core lexer

author Tom Lane <tgl@sss.pgh.pa.us>

Thu, 12 Nov 2009 00:13:00 +0000 (00:13 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Thu, 12 Nov 2009 00:13:00 +0000 (00:13 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Thu, 12 Nov 2009 00:13:00 +0000 (00:13 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Thu, 12 Nov 2009 00:13:00 +0000 (00:13 +0000)
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l

index 8a53221930c4323c1ffeee5354153c64eba70aba..6bc9f2db53acea57805775c54d4e06cc105457b3 100644 (file)
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -24,7 +24,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.163 2009/11/09 18:38:48 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.164 2009/11/12 00:13:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -85,6 +85,7 @@ static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
  static char *litbufdup(core_yyscan_t yyscanner);
  static char *litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner);
  static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
+static int     process_integer_literal(const char *token, YYSTYPE *lval);
  static bool is_utf16_surrogate_first(pg_wchar c);
  static bool is_utf16_surrogate_second(pg_wchar c);
  static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second);
@@ -339,12 +340,15 @@ operator          {op_chars}+
   * instead we pass it separately to parser. there it gets
   * coerced via doNegate() -- Leon aug 20 1999
   *
+* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
+*
   * {realfail1} and {realfail2} are added to prevent the need for scanner
   * backup when the {real} rule fails to match completely.
   */
  
  integer                        {digit}+
  decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
+decimalfail            {digit}+\.\.
  real                   ({integer}|{decimal})[Ee][-+]?{digit}+
  realfail1              ({integer}|{decimal})[Ee]
  realfail2              ({integer}|{decimal})[Ee][-+]
@@ -846,31 +850,20 @@ other                     .
                                 }
  
  {integer}              {
-                                       long val;
-                                       char* endptr;
-
                                         SET_YYLLOC();
-                                       errno = 0;
-                                       val = strtol(yytext, &endptr, 10);
-                                       if (*endptr != '\0' || errno == ERANGE
-#ifdef HAVE_LONG_INT_64
-                                               /* if long > 32 bits, check for overflow of int4 */
-                                               || val != (long) ((int32) val)
-#endif
-                                               )
-                                       {
-                                               /* integer too large, treat it as a float */
-                                               yylval->str = pstrdup(yytext);
-                                               return FCONST;
-                                       }
-                                       yylval->ival = val;
-                                       return ICONST;
+                                       return process_integer_literal(yytext, yylval);
                                 }
  {decimal}              {
                                         SET_YYLLOC();
                                         yylval->str = pstrdup(yytext);
                                         return FCONST;
                                 }
+{decimalfail}  {
+                                       /* throw back the .., and treat as integer */
+                                       yyless(yyleng-2);
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext, yylval);
+                               }
  {real}                 {
                                         SET_YYLLOC();
                                         yylval->str = pstrdup(yytext);
@@ -1121,6 +1114,29 @@ litbufdup(core_yyscan_t yyscanner)
         return new;
  }
  
+static int
+process_integer_literal(const char *token, YYSTYPE *lval)
+{
+       long            val;
+       char       *endptr;
+
+       errno = 0;
+       val = strtol(token, &endptr, 10);
+       if (*endptr != '\0' || errno == ERANGE
+#ifdef HAVE_LONG_INT_64
+               /* if long > 32 bits, check for overflow of int4 */
+               || val != (long) ((int32) val)
+#endif
+               )
+       {
+               /* integer too large, treat it as a float */
+               lval->str = pstrdup(token);
+               return FCONST;
+       }
+       lval->ival = val;
+       return ICONST;
+}
+
  static int
  hexval(unsigned char c)
  {
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c

index b4d8612d6c95722824945cb062d91968b280fc63..b3b5daa2213b4377d45a75fdb5c3c074f8feda06 100644 (file)
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.37 2009/01/01 17:23:46 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.38 2009/11/12 00:13:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -197,7 +197,6 @@ bool
  scanner_isspace(char ch)
  {
         /* This must match scan.l's list of {space} characters */
-       /* and plpgsql's scan.l as well */
         if (ch == ' ' ||
                 ch == '\t' ||
                 ch == '\n' ||
diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l

index 894800aaf5867d60525de9435b530bc1f335a36f..03fbc6c8438b49481c59717a6c2b100695f4e0fd 100644 (file)
--- a/src/bin/psql/psqlscan.l
+++ b/src/bin/psql/psqlscan.l
@@ -33,7 +33,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.29 2009/09/27 03:27:24 tgl Exp $
+ *       $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.30 2009/11/12 00:13:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -364,12 +364,15 @@ operator          {op_chars}+
   * instead we pass it separately to parser. there it gets
   * coerced via doNegate() -- Leon aug 20 1999
   *
+ * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
+ *
   * {realfail1} and {realfail2} are added to prevent the need for scanner
   * backup when the {real} rule fails to match completely.
   */
  
  integer                        {digit}+
  decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
+decimalfail            {digit}+\.\.
  real                   ({integer}|{decimal})[Ee][-+]?{digit}+
  realfail1              ({integer}|{decimal})[Ee]
  realfail2              ({integer}|{decimal})[Ee][-+]
@@ -776,6 +779,11 @@ other                      .
  {decimal}              {
                                         ECHO;
                                 }
+{decimalfail}  {
+                                       /* throw back the .., and treat as integer */
+                                       yyless(yyleng-2);
+                                       ECHO;
+                               }
  {real}                 {
                                         ECHO;
                                 }
diff --git a/src/pl/plpgsql/src/.cvsignore b/src/pl/plpgsql/src/.cvsignore

index e3b861c6fef73505149ee084fa379e5ece6738f3..fa18d49267b82f789d7853d0a45d3cfc62e8fe8e 100644 (file)
--- a/src/pl/plpgsql/src/.cvsignore
+++ b/src/pl/plpgsql/src/.cvsignore
@@ -1,3 +1,2 @@
  pl_gram.c
  pl_gram.h
-pl_scan.c
diff --git a/src/pl/plpgsql/src/Makefile b/src/pl/plpgsql/src/Makefile

index da84357e20bbaf074bdbad10da3cecfe398a7c3f..c414ac76218cf31f3bea5e0ddead9b81ac1f2b70 100644 (file)
--- a/src/pl/plpgsql/src/Makefile
+++ b/src/pl/plpgsql/src/Makefile
@@ -2,7 +2,7 @@
  #
  # Makefile for the plpgsql shared object
  #
-# $PostgreSQL: pgsql/src/pl/plpgsql/src/Makefile,v 1.34 2009/08/28 20:26:19 petere Exp $
+# $PostgreSQL: pgsql/src/pl/plpgsql/src/Makefile,v 1.35 2009/11/12 00:13:00 tgl Exp $
  #
  #-------------------------------------------------------------------------
  
@@ -17,7 +17,7 @@ override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS)
  SHLIB_LINK = $(filter -lintl, $(LIBS))
  rpath =
  
-OBJS = pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o
+OBJS = pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o pl_scanner.o
  
  all: all-lib
  
@@ -33,10 +33,7 @@ uninstall: uninstall-lib
  
  
  # Force these dependencies to be known even without dependency info built:
-pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o: plpgsql.h pl_gram.h
-
-# pl_scan is compiled as part of pl_gram
-pl_gram.o: pl_scan.c
+pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o pl_scanner.o: plpgsql.h pl_gram.h
  
  # See notes in src/backend/parser/Makefile about the following two rules
  
@@ -49,23 +46,12 @@ else
         @$(missing) bison $< $@
  endif
  
-# Because we use %option case-insensitive, flex's results could vary
-# depending on what the compile-time locale setting is.  Hence, force
-# it to see LC_CTYPE=C to ensure consistent build results.
-
-pl_scan.c: scan.l
-ifdef FLEX
-       LC_CTYPE=C $(FLEX) $(FLEXFLAGS) -o'$@' $<
-else
-       @$(missing) flex $< $@
-endif
-
-distprep: pl_scan.c pl_gram.h pl_gram.c
+distprep: pl_gram.h pl_gram.c
  
-# pl_gram.c, pl_gram.h, and pl_scan.c are in the distribution tarball,
+# pl_gram.c and pl_gram.h are in the distribution tarball,
  # so they are not cleaned here.
  clean distclean: clean-lib
         rm -f $(OBJS)
  
  maintainer-clean: clean
-       rm -f pl_gram.c pl_gram.h pl_scan.c
+       rm -f pl_gram.c pl_gram.h
diff --git a/src/pl/plpgsql/src/gram.y b/src/pl/plpgsql/src/gram.y

index ec269a88c55f62d51916c9d5a87d3b79645df681..6edd01c4d8d3c9fe3a6938c130d6b0c1a73d0b85 100644 (file)
--- a/src/pl/plpgsql/src/gram.y
+++ b/src/pl/plpgsql/src/gram.y
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.134 2009/11/10 02:13:13 tgl Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.135 2009/11/12 00:13:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,19 +19,10 @@
  #include "catalog/pg_type.h"
  #include "parser/parser.h"
  #include "parser/parse_type.h"
+#include "parser/scanner.h"
  #include "parser/scansup.h"
  
  
-/*
- * We track token locations in terms of byte offsets from the start of the
- * source string, not the column number/line number representation that
- * bison uses by default.  Also, to minimize overhead we track only one
- * location (usually the first token location) for each construct, not
- * the beginning and ending locations as bison does by default.  It's
- * therefore sufficient to make YYLTYPE an int.
- */
-#define YYLTYPE  int
-
  /* Location tracking support --- simpler than bison's default */
  #define YYLLOC_DEFAULT(Current, Rhs, N) \
         do { \
@@ -64,7 +55,7 @@ typedef struct
  union YYSTYPE;                                 /* need forward reference for tok_is_keyword */
  
  static bool                    tok_is_keyword(int token, union YYSTYPE *lval,
-                                                                          const char *keyword);
+                                                                          int kw_token, const char *kw_str);
  static void                    token_is_not_variable(int tok);
  static PLpgSQL_expr    *read_sql_construct(int until,
                                                                                         int until2,
@@ -75,6 +66,8 @@ static        PLpgSQL_expr    *read_sql_construct(int until,
                                                                                         bool valid_sql,
                                                                                         int *startloc,
                                                                                         int *endtoken);
+static PLpgSQL_expr    *read_sql_expression(int until,
+                                                                                        const char *expected);
  static PLpgSQL_expr    *read_sql_expression2(int until, int until2,
                                                                                           const char *expected,
                                                                                           int *endtoken);
@@ -103,7 +96,6 @@ static       void                     check_sql_expr(const char *stmt, int location,
                                                                                 int leaderlen);
  static void                     plpgsql_sql_error_callback(void *arg);
  static PLpgSQL_type    *parse_datatype(const char *string, int location);
-static char                    *parse_string_token(const char *token, int location);
  static void                     check_labels(const char *start_label,
                                                                           const char *end_label,
                                                                           int end_location);
@@ -118,6 +110,7 @@ static      List                    *read_raise_options(void);
  %locations
  
  %union {
+               core_YYSTYPE                    core_yystype;
                 /* these fields must match core_YYSTYPE: */
                 int                                             ival;
                 char                                    *str;
@@ -158,7 +151,6 @@ static      List                    *read_raise_options(void);
                 PLpgSQL_var                             *var;
                 PLpgSQL_expr                    *expr;
                 PLpgSQL_stmt                    *stmt;
-               PLpgSQL_stmt_block              *program;
                 PLpgSQL_condition               *condition;
                 PLpgSQL_exception               *exception;
                 PLpgSQL_exception_block *exception_block;
@@ -212,7 +204,10 @@ static     List                    *read_raise_options(void);
  %type <ival>   getdiag_item getdiag_target
  
  %type <ival>   opt_scrollable
-%type <fetch>   opt_fetch_direction
+%type <fetch>  opt_fetch_direction
+
+%type <keyword>        unreserved_keyword
+
  
  /*
   * Basic non-keyword token types.  These are hard-wired into the core lexer.
@@ -227,84 +222,103 @@ static   List                    *read_raise_options(void);
  %token                 TYPECAST DOT_DOT COLON_EQUALS
  
  /*
- * Other tokens recognized by plpgsql's lexer interface layer.
+ * Other tokens recognized by plpgsql's lexer interface layer (pl_scanner.c).
   */
-%token                         T_STRING
-%token                         T_NUMBER
  %token <word>          T_WORD          /* unrecognized simple identifier */
  %token <cword>         T_CWORD         /* unrecognized composite identifier */
  %token <wdatum>                T_DATUM         /* a VAR, ROW, REC, or RECFIELD variable */
-
-%token O_OPTION
-%token O_DUMP
+%token                         LESS_LESS
+%token                         GREATER_GREATER
  
  /*
- * Keyword tokens
+ * Keyword tokens.  Some of these are reserved and some are not;
+ * see pl_scanner.c for info.  Be sure unreserved keywords are listed
+ * in the "unreserved_keyword" production below.
   */
-%token K_ALIAS
-%token K_ALL
-%token K_ASSIGN
-%token K_BEGIN
-%token K_BY
-%token K_CASE
-%token K_CLOSE
-%token K_CONSTANT
-%token K_CONTINUE
-%token K_CURSOR
-%token K_DECLARE
-%token K_DEFAULT
-%token K_DIAGNOSTICS
-%token K_DOTDOT
-%token K_ELSE
-%token K_ELSIF
-%token K_END
-%token K_EXCEPTION
-%token K_EXECUTE
-%token K_EXIT
-%token K_FOR
-%token K_FETCH
-%token K_FROM
-%token K_GET
-%token K_IF
-%token K_IN
-%token K_INSERT
-%token K_INTO
-%token K_IS
-%token K_LOOP
-%token K_MOVE
-%token K_NOSCROLL
-%token K_NOT
-%token K_NULL
-%token K_OPEN
-%token K_OR
-%token K_PERFORM
-%token K_RAISE
-%token K_RETURN
-%token K_SCROLL
-%token K_STRICT
-%token K_THEN
-%token K_TO
-%token K_USING
-%token K_WHEN
-%token K_WHILE
+%token <keyword>       K_ABSOLUTE
+%token <keyword>       K_ALIAS
+%token <keyword>       K_ALL
+%token <keyword>       K_BACKWARD
+%token <keyword>       K_BEGIN
+%token <keyword>       K_BY
+%token <keyword>       K_CASE
+%token <keyword>       K_CLOSE
+%token <keyword>       K_CONSTANT
+%token <keyword>       K_CONTINUE
+%token <keyword>       K_CURSOR
+%token <keyword>       K_DEBUG
+%token <keyword>       K_DECLARE
+%token <keyword>       K_DEFAULT
+%token <keyword>       K_DETAIL
+%token <keyword>       K_DIAGNOSTICS
+%token <keyword>       K_DUMP
+%token <keyword>       K_ELSE
+%token <keyword>       K_ELSIF
+%token <keyword>       K_END
+%token <keyword>       K_ERRCODE
+%token <keyword>       K_EXCEPTION
+%token <keyword>       K_EXECUTE
+%token <keyword>       K_EXIT
+%token <keyword>       K_FETCH
+%token <keyword>       K_FIRST
+%token <keyword>       K_FOR
+%token <keyword>       K_FORWARD
+%token <keyword>       K_FROM
+%token <keyword>       K_GET
+%token <keyword>       K_HINT
+%token <keyword>       K_IF
+%token <keyword>       K_IN
+%token <keyword>       K_INFO
+%token <keyword>       K_INSERT
+%token <keyword>       K_INTO
+%token <keyword>       K_IS
+%token <keyword>       K_LAST
+%token <keyword>       K_LOG
+%token <keyword>       K_LOOP
+%token <keyword>       K_MESSAGE
+%token <keyword>       K_MOVE
+%token <keyword>       K_NEXT
+%token <keyword>       K_NO
+%token <keyword>       K_NOT
+%token <keyword>       K_NOTICE
+%token <keyword>       K_NULL
+%token <keyword>       K_OPEN
+%token <keyword>       K_OPTION
+%token <keyword>       K_OR
+%token <keyword>       K_PERFORM
+%token <keyword>       K_PRIOR
+%token <keyword>       K_QUERY
+%token <keyword>       K_RAISE
+%token <keyword>       K_RELATIVE
+%token <keyword>       K_RESULT_OID
+%token <keyword>       K_RETURN
+%token <keyword>       K_REVERSE
+%token <keyword>       K_ROWTYPE
+%token <keyword>       K_ROW_COUNT
+%token <keyword>       K_SCROLL
+%token <keyword>       K_SQLSTATE
+%token <keyword>       K_STRICT
+%token <keyword>       K_THEN
+%token <keyword>       K_TO
+%token <keyword>       K_TYPE
+%token <keyword>       K_USING
+%token <keyword>       K_WARNING
+%token <keyword>       K_WHEN
+%token <keyword>       K_WHILE
  
  %%
  
-pl_function            : comp_optsect pl_block opt_semi
+pl_function            : comp_options pl_block opt_semi
                                         {
-                                               yylval.program = (PLpgSQL_stmt_block *) $2;
+                                               plpgsql_parse_result = (PLpgSQL_stmt_block *) $2;
                                         }
                                 ;
  
-comp_optsect   :
-                               | comp_options
-                               ;
-
-comp_options   : comp_options comp_option
-                               | comp_option
+comp_options   :
+                               | comp_options comp_option
                                 ;
  
-comp_option            : O_OPTION O_DUMP
+comp_option            : '#' K_OPTION K_DUMP
                                         {
                                                 plpgsql_DumpExecTree = true;
                                         }
@@ -381,8 +395,8 @@ decl_stmts          : decl_stmts decl_stmt
                                         {       $$ = $1;        }
                                 ;
  
-decl_stmt              : '<' '<' any_identifier '>' '>'
-                                       {       $$ = $3;        }
+decl_stmt              : LESS_LESS any_identifier GREATER_GREATER
+                                       {       $$ = $2;        }
                                 | K_DECLARE
                                         {       $$ = NULL;      }
                                 | decl_statement
@@ -487,7 +501,7 @@ opt_scrollable :
                                         {
                                                 $$ = 0;
                                         }
-                               | K_NOSCROLL
+                               | K_NO K_SCROLL
                                         {
                                                 $$ = CURSOR_OPT_NO_SCROLL;
                                         }
@@ -613,6 +627,19 @@ decl_varname       : T_WORD
                                                                                           NULL) != NULL)
                                                         yyerror("duplicate declaration");
                                         }
+                               | unreserved_keyword
+                                       {
+                                               $$.name = pstrdup($1);
+                                               $$.lineno = plpgsql_location_to_lineno(@1);
+                                               /*
+                                                * Check to make sure name isn't already declared
+                                                * in the current block.
+                                                */
+                                               if (plpgsql_ns_lookup(plpgsql_ns_top(), true,
+                                                                                         $1, NULL, NULL,
+                                                                                         NULL) != NULL)
+                                                       yyerror("duplicate declaration");
+                                       }
                                 ;
  
  decl_const             :
@@ -642,14 +669,18 @@ decl_defval               : ';'
                                         { $$ = NULL; }
                                 | decl_defkey
                                         {
-                                               $$ = plpgsql_read_expression(';', ";");
+                                               $$ = read_sql_expression(';', ";");
                                         }
                                 ;
  
-decl_defkey            : K_ASSIGN
+decl_defkey            : assign_operator
                                 | K_DEFAULT
                                 ;
  
+assign_operator        : '='
+                               | COLON_EQUALS
+                               ;
+
  proc_sect              :
                                         { $$ = NIL; }
                                 | proc_stmts
@@ -725,7 +756,7 @@ stmt_perform        : K_PERFORM expr_until_semi
                                         }
                                 ;
  
-stmt_assign            : assign_var K_ASSIGN expr_until_semi
+stmt_assign            : assign_var assign_operator expr_until_semi
                                         {
                                                 PLpgSQL_stmt_assign *new;
  
@@ -762,7 +793,7 @@ getdiag_list : getdiag_list ',' getdiag_list_item
                                         }
                                 ;
  
-getdiag_list_item : getdiag_target K_ASSIGN getdiag_item
+getdiag_list_item : getdiag_target assign_operator getdiag_item
                                         {
                                                 PLpgSQL_diag_item *new;
  
@@ -778,9 +809,11 @@ getdiag_item :
                                         {
                                                 int     tok = yylex();
  
-                                               if (tok_is_keyword(tok, &yylval, "row_count"))
+                                               if (tok_is_keyword(tok, &yylval,
+                                                                                  K_ROW_COUNT, "row_count"))
                                                         $$ = PLPGSQL_GETDIAG_ROW_COUNT;
-                                               else if (tok_is_keyword(tok, &yylval, "result_oid"))
+                                               else if (tok_is_keyword(tok, &yylval,
+                                                                                               K_RESULT_OID, "result_oid"))
                                                         $$ = PLPGSQL_GETDIAG_RESULT_OID;
                                                 else
                                                         yyerror("unrecognized GET DIAGNOSTICS item");
@@ -901,7 +934,7 @@ opt_expr_until_when :
                                                 if (tok != K_WHEN)
                                                 {
                                                         plpgsql_push_back_token(tok);
-                                                       expr = plpgsql_read_expression(K_WHEN, "WHEN");
+                                                       expr = read_sql_expression(K_WHEN, "WHEN");
                                                 }
                                                 plpgsql_push_back_token(K_WHEN);
                                                 $$ = expr;
@@ -1130,7 +1163,8 @@ for_control               : for_variable K_IN
                                                          * keyword, which means it must be an
                                                          * integer loop.
                                                          */
-                                                       if (tok_is_keyword(tok, &yylval, "reverse"))
+                                                       if (tok_is_keyword(tok, &yylval,
+                                                                                          K_REVERSE, "reverse"))
                                                                 reverse = true;
                                                         else
                                                                 plpgsql_push_back_token(tok);
@@ -1142,7 +1176,7 @@ for_control               : for_variable K_IN
                                                          * statement, so we need to invoke
                                                          * read_sql_construct directly.
                                                          */
-                                                       expr1 = read_sql_construct(K_DOTDOT,
+                                                       expr1 = read_sql_construct(DOT_DOT,
                                                                                                            K_LOOP,
                                                                                                            0,
                                                                                                            "LOOP",
@@ -1152,7 +1186,7 @@ for_control               : for_variable K_IN
                                                                                                            &expr1loc,
                                                                                                            &tok);
  
-                                                       if (tok == K_DOTDOT)
+                                                       if (tok == DOT_DOT)
                                                         {
                                                                 /* Saw "..", so it must be an integer loop */
                                                                 PLpgSQL_expr            *expr2;
@@ -1170,8 +1204,8 @@ for_control               : for_variable K_IN
  
                                                                 /* Get the BY clause if any */
                                                                 if (tok == K_BY)
-                                                                       expr_by = plpgsql_read_expression(K_LOOP,
-                                                                                                                                         "LOOP");
+                                                                       expr_by = read_sql_expression(K_LOOP,
+                                                                                                                                 "LOOP");
                                                                 else
                                                                         expr_by = NULL;
  
@@ -1370,11 +1404,13 @@ stmt_return             : K_RETURN
                                                 if (tok == 0)
                                                         yyerror("unexpected end of function definition");
  
-                                               if (tok_is_keyword(tok, &yylval, "next"))
+                                               if (tok_is_keyword(tok, &yylval,
+                                                                                  K_NEXT, "next"))
                                                 {
                                                         $$ = make_return_next_stmt(@1);
                                                 }
-                                               else if (tok_is_keyword(tok, &yylval, "query"))
+                                               else if (tok_is_keyword(tok, &yylval,
+                                                                                               K_QUERY, "query"))
                                                 {
                                                         $$ = make_return_query_stmt(@1);
                                                 }
@@ -1414,32 +1450,38 @@ stmt_raise              : K_RAISE
                                                         /*
                                                          * First is an optional elog severity level.
                                                          */
-                                                       if (tok == K_EXCEPTION)
+                                                       if (tok_is_keyword(tok, &yylval,
+                                                                                          K_EXCEPTION, "exception"))
                                                         {
                                                                 new->elog_level = ERROR;
                                                                 tok = yylex();
                                                         }
-                                                       else if (tok_is_keyword(tok, &yylval, "warning"))
+                                                       else if (tok_is_keyword(tok, &yylval,
+                                                                                                       K_WARNING, "warning"))
                                                         {
                                                                 new->elog_level = WARNING;
                                                                 tok = yylex();
                                                         }
-                                                       else if (tok_is_keyword(tok, &yylval, "notice"))
+                                                       else if (tok_is_keyword(tok, &yylval,
+                                                                                                       K_NOTICE, "notice"))
                                                         {
                                                                 new->elog_level = NOTICE;
                                                                 tok = yylex();
                                                         }
-                                                       else if (tok_is_keyword(tok, &yylval, "info"))
+                                                       else if (tok_is_keyword(tok, &yylval,
+                                                                                                       K_INFO, "info"))
                                                         {
                                                                 new->elog_level = INFO;
                                                                 tok = yylex();
                                                         }
-                                                       else if (tok_is_keyword(tok, &yylval, "log"))
+                                                       else if (tok_is_keyword(tok, &yylval,
+                                                                                                       K_LOG, "log"))
                                                         {
                                                                 new->elog_level = LOG;
                                                                 tok = yylex();
                                                         }
-                                                       else if (tok_is_keyword(tok, &yylval, "debug"))
+                                                       else if (tok_is_keyword(tok, &yylval,
+                                                                                                       K_DEBUG, "debug"))
                                                         {
                                                                 new->elog_level = DEBUG1;
                                                                 tok = yylex();
@@ -1453,10 +1495,10 @@ stmt_raise              : K_RAISE
                                                          * literal that is the old-style message format,
                                                          * or USING to start the option list immediately.
                                                          */
-                                                       if (tok == T_STRING)
+                                                       if (tok == SCONST)
                                                         {
                                                                 /* old style message and parameters */
-                                                               new->message = parse_string_token(yytext, yylloc);
+                                                               new->message = yylval.str;
                                                                 /*
                                                                  * We expect either a semi-colon, which
                                                                  * indicates no parameters, or a comma that
@@ -1482,14 +1524,15 @@ stmt_raise              : K_RAISE
                                                         else if (tok != K_USING)
                                                         {
                                                                 /* must be condition name or SQLSTATE */
-                                                               if (tok_is_keyword(tok, &yylval, "sqlstate"))
+                                                               if (tok_is_keyword(tok, &yylval,
+                                                                                                  K_SQLSTATE, "sqlstate"))
                                                                 {
                                                                         /* next token should be a string literal */
                                                                         char   *sqlstatestr;
  
-                                                                       if (yylex() != T_STRING)
+                                                                       if (yylex() != SCONST)
                                                                                 yyerror("syntax error");
-                                                                       sqlstatestr = parse_string_token(yytext, yylloc);
+                                                                       sqlstatestr = yylval.str;
  
                                                                         if (strlen(sqlstatestr) != 5)
                                                                                 yyerror("invalid SQLSTATE code");
@@ -1603,12 +1646,19 @@ stmt_open               : K_OPEN cursor_variable
                                                 {
                                                         /* be nice if we could use opt_scrollable here */
                                                     tok = yylex();
-                                                       if (tok == K_NOSCROLL)
+                                                       if (tok_is_keyword(tok, &yylval,
+                                                                                          K_NO, "no"))
                                                         {
-                                                               new->cursor_options |= CURSOR_OPT_NO_SCROLL;
                                                                 tok = yylex();
+                                                               if (tok_is_keyword(tok, &yylval,
+                                                                                                  K_SCROLL, "scroll"))
+                                                               {
+                                                                       new->cursor_options |= CURSOR_OPT_NO_SCROLL;
+                                                                       tok = yylex();
+                                                               }
                                                         }
-                                                       else if (tok == K_SCROLL)
+                                                       else if (tok_is_keyword(tok, &yylval,
+                                                                                                       K_SCROLL, "scroll"))
                                                         {
                                                                 new->cursor_options |= CURSOR_OPT_SCROLL;
                                                                 tok = yylex();
@@ -1824,9 +1874,9 @@ proc_condition    : any_identifier
                                                                 char   *sqlstatestr;
  
                                                                 /* next token should be a string literal */
-                                                               if (yylex() != T_STRING)
+                                                               if (yylex() != SCONST)
                                                                         yyerror("syntax error");
-                                                               sqlstatestr = parse_string_token(yytext, yylloc);
+                                                               sqlstatestr = yylval.str;
  
                                                                 if (strlen(sqlstatestr) != 5)
                                                                         yyerror("invalid SQLSTATE code");
@@ -1849,19 +1899,19 @@ proc_condition  : any_identifier
                                 ;
  
  expr_until_semi :
-                                       { $$ = plpgsql_read_expression(';', ";"); }
+                                       { $$ = read_sql_expression(';', ";"); }
                                 ;
  
  expr_until_rightbracket :
-                                       { $$ = plpgsql_read_expression(']', "]"); }
+                                       { $$ = read_sql_expression(']', "]"); }
                                 ;
  
  expr_until_then :
-                                       { $$ = plpgsql_read_expression(K_THEN, "THEN"); }
+                                       { $$ = read_sql_expression(K_THEN, "THEN"); }
                                 ;
  
  expr_until_loop :
-                                       { $$ = plpgsql_read_expression(K_LOOP, "LOOP"); }
+                                       { $$ = read_sql_expression(K_LOOP, "LOOP"); }
                                 ;
  
  opt_block_label        :
@@ -1869,10 +1919,10 @@ opt_block_label :
                                                 plpgsql_ns_push(NULL);
                                                 $$ = NULL;
                                         }
-                               | '<' '<' any_identifier '>' '>'
+                               | LESS_LESS any_identifier GREATER_GREATER
                                         {
-                                               plpgsql_ns_push($3);
-                                               $$ = $3;
+                                               plpgsql_ns_push($2);
+                                               $$ = $2;
                                         }
                                 ;
  
@@ -1909,30 +1959,67 @@ any_identifier  : T_WORD
                                         }
                                 ;
  
+unreserved_keyword     :
+                               K_ABSOLUTE
+                               | K_ALIAS
+                               | K_BACKWARD
+                               | K_CONSTANT
+                               | K_CURSOR
+                               | K_DEBUG
+                               | K_DETAIL
+                               | K_DUMP
+                               | K_ERRCODE
+                               | K_FIRST
+                               | K_FORWARD
+                               | K_HINT
+                               | K_INFO
+                               | K_IS
+                               | K_LAST
+                               | K_LOG
+                               | K_MESSAGE
+                               | K_NEXT
+                               | K_NO
+                               | K_NOTICE
+                               | K_OPTION
+                               | K_PRIOR
+                               | K_QUERY
+                               | K_RELATIVE
+                               | K_RESULT_OID
+                               | K_REVERSE
+                               | K_ROW_COUNT
+                               | K_ROWTYPE
+                               | K_SCROLL
+                               | K_SQLSTATE
+                               | K_TYPE
+                               | K_WARNING
+                               ;
+
  %%
  
  /*
   * Check whether a token represents an "unreserved keyword".
   * We have various places where we want to recognize a keyword in preference
   * to a variable name, but not reserve that keyword in other contexts.
- * Hence, this kluge.  CAUTION: don't use this for reserved keywords;
- * it won't recognize them.
+ * Hence, this kluge.
   */
  static bool
-tok_is_keyword(int token, union YYSTYPE *lval, const char *keyword)
+tok_is_keyword(int token, union YYSTYPE *lval,
+                          int kw_token, const char *kw_str)
  {
-       if (token == T_WORD)
+       if (token == kw_token)
         {
-               /* must be unquoted and match the downcased string */
-               if (!lval->word.quoted && strcmp(lval->word.ident, keyword) == 0)
-                       return true;
+               /* Normal case, was recognized by scanner (no conflicting variable) */
+               return true;
         }
         else if (token == T_DATUM)
         {
-               /* like the T_WORD case, but also reject composite identifiers */
-               /* (hence an unreserved word followed by "." will not be recognized) */
+               /*
+                * It's a variable, so recheck the string name.  Note we will not
+                * match composite names (hence an unreserved word followed by "."
+                * will not be recognized).
+                */
                 if (!lval->word.quoted && lval->word.ident != NULL &&
-                       strcmp(lval->word.ident, keyword) == 0)
+                       strcmp(lval->word.ident, kw_str) == 0)
                         return true;
         }
         return false;                           /* not the keyword */
@@ -1963,8 +2050,8 @@ token_is_not_variable(int tok)
  }
  
  /* Convenience routine to read an expression with one possible terminator */
-PLpgSQL_expr *
-plpgsql_read_expression(int until, const char *expected)
+static PLpgSQL_expr *
+read_sql_expression(int until, const char *expected)
  {
         return read_sql_construct(until, 0, 0, expected,
                                                           "SELECT ", true, true, NULL, NULL);
@@ -2135,13 +2222,15 @@ read_datatype(int tok)
                 if (tok == '%')
                 {
                         tok = yylex();
-                       if (tok_is_keyword(tok, &yylval, "type"))
+                       if (tok_is_keyword(tok, &yylval,
+                                                          K_TYPE, "type"))
                         {
                                 result = plpgsql_parse_wordtype(dtname);
                                 if (result)
                                         return result;
                         }
-                       else if (tok_is_keyword(tok, &yylval, "rowtype"))
+                       else if (tok_is_keyword(tok, &yylval,
+                                                                       K_ROWTYPE, "rowtype"))
                         {
                                 result = plpgsql_parse_wordrowtype(dtname);
                                 if (result)
@@ -2157,13 +2246,15 @@ read_datatype(int tok)
                 if (tok == '%')
                 {
                         tok = yylex();
-                       if (tok_is_keyword(tok, &yylval, "type"))
+                       if (tok_is_keyword(tok, &yylval,
+                                                          K_TYPE, "type"))
                         {
                                 result = plpgsql_parse_cwordtype(dtnames);
                                 if (result)
                                         return result;
                         }
-                       else if (tok_is_keyword(tok, &yylval, "rowtype"))
+                       else if (tok_is_keyword(tok, &yylval,
+                                                                       K_ROWTYPE, "rowtype"))
                         {
                                 result = plpgsql_parse_cwordrowtype(dtnames);
                                 if (result)
@@ -2182,7 +2273,7 @@ read_datatype(int tok)
                                 yyerror("incomplete data type declaration");
                 }
                 /* Possible followers for datatype in a declaration */
-               if (tok == K_NOT || tok == K_ASSIGN || tok == K_DEFAULT)
+               if (tok == K_NOT || tok == '=' || tok == COLON_EQUALS || tok == K_DEFAULT)
                         break;
                 /* Possible followers for datatype in a cursor_arg list */
                 if ((tok == ',' || tok == ')') && parenlevel == 0)
@@ -2335,24 +2426,29 @@ read_fetch_direction(void)
         if (tok == 0)
                 yyerror("unexpected end of function definition");
  
-       if (tok_is_keyword(tok, &yylval, "next"))
+       if (tok_is_keyword(tok, &yylval,
+                                          K_NEXT, "next"))
         {
                 /* use defaults */
         }
-       else if (tok_is_keyword(tok, &yylval, "prior"))
+       else if (tok_is_keyword(tok, &yylval,
+                                                       K_PRIOR, "prior"))
         {
                 fetch->direction = FETCH_BACKWARD;
         }
-       else if (tok_is_keyword(tok, &yylval, "first"))
+       else if (tok_is_keyword(tok, &yylval,
+                                                       K_FIRST, "first"))
         {
                 fetch->direction = FETCH_ABSOLUTE;
         }
-       else if (tok_is_keyword(tok, &yylval, "last"))
+       else if (tok_is_keyword(tok, &yylval,
+                                                       K_LAST, "last"))
         {
                 fetch->direction = FETCH_ABSOLUTE;
                 fetch->how_many  = -1;
         }
-       else if (tok_is_keyword(tok, &yylval, "absolute"))
+       else if (tok_is_keyword(tok, &yylval,
+                                                       K_ABSOLUTE, "absolute"))
         {
                 fetch->direction = FETCH_ABSOLUTE;
                 fetch->expr = read_sql_expression2(K_FROM, K_IN,
@@ -2360,7 +2456,8 @@ read_fetch_direction(void)
                                                                                    NULL);
                 check_FROM = false;
         }
-       else if (tok_is_keyword(tok, &yylval, "relative"))
+       else if (tok_is_keyword(tok, &yylval,
+                                                       K_RELATIVE, "relative"))
         {
                 fetch->direction = FETCH_RELATIVE;
                 fetch->expr = read_sql_expression2(K_FROM, K_IN,
@@ -2368,16 +2465,19 @@ read_fetch_direction(void)
                                                                                    NULL);
                 check_FROM = false;
         }
-       else if (tok == K_ALL)
+       else if (tok_is_keyword(tok, &yylval,
+                                                       K_ALL, "all"))
         {
                 fetch->how_many = FETCH_ALL;
                 fetch->returns_multiple_rows = true;
         }
-       else if (tok_is_keyword(tok, &yylval, "forward"))
+       else if (tok_is_keyword(tok, &yylval,
+                                                       K_FORWARD, "forward"))
         {
                 complete_direction(fetch, &check_FROM);
         }
-       else if (tok_is_keyword(tok, &yylval, "backward"))
+       else if (tok_is_keyword(tok, &yylval,
+                                                       K_BACKWARD, "backward"))
         {
                 fetch->direction = FETCH_BACKWARD;
                 complete_direction(fetch, &check_FROM);
@@ -2532,7 +2632,7 @@ make_return_stmt(int location)
                  * Note that a well-formed expression is _required_ here;
                  * anything else is a compile-time error.
                  */
-               new->expr = plpgsql_read_expression(';', ";");
+               new->expr = read_sql_expression(';', ";");
         }
  
         return (PLpgSQL_stmt *) new;
@@ -2591,7 +2691,7 @@ make_return_next_stmt(int location)
                         yyerror("syntax error");
         }
         else
-               new->expr = plpgsql_read_expression(';', ";");
+               new->expr = read_sql_expression(';', ";");
  
         return (PLpgSQL_stmt *) new;
  }
@@ -2956,36 +3056,6 @@ parse_datatype(const char *string, int location)
         return plpgsql_build_datatype(type_id, typmod);
  }
  
-/*
- * Convert a string-literal token to the represented string value.
- *
- * To do this, we need to invoke the core lexer.  Here we are only concerned
- * with setting up an errcontext link, which is handled the same as
- * in check_sql_expr().
- */
-static char *
-parse_string_token(const char *token, int location)
-{
-       char       *result;
-       sql_error_callback_arg cbarg;
-       ErrorContextCallback  syntax_errcontext;
-
-       cbarg.location = location;
-       cbarg.leaderlen = 0;
-
-       syntax_errcontext.callback = plpgsql_sql_error_callback;
-       syntax_errcontext.arg = &cbarg;
-       syntax_errcontext.previous = error_context_stack;
-       error_context_stack = &syntax_errcontext;
-
-       result = pg_parse_string_token(token);
-
-       /* Restore former ereport callback */
-       error_context_stack = syntax_errcontext.previous;
-
-       return result;
-}
-
  /*
   * Check block starting and ending labels match.
   */
@@ -3052,7 +3122,7 @@ read_cursor_args(PLpgSQL_var *cursor, int until, const char *expected)
         /*
          * Read expressions until the matching ')'.
          */
-       expr = plpgsql_read_expression(')', ")");
+       expr = read_sql_expression(')', ")");
  
         /* Next we'd better find the until token */
         tok = yylex();
@@ -3080,18 +3150,23 @@ read_raise_options(void)
  
                 opt = (PLpgSQL_raise_option *) palloc(sizeof(PLpgSQL_raise_option));
  
-               if (tok_is_keyword(tok, &yylval, "errcode"))
+               if (tok_is_keyword(tok, &yylval,
+                                                  K_ERRCODE, "errcode"))
                         opt->opt_type = PLPGSQL_RAISEOPTION_ERRCODE;
-               else if (tok_is_keyword(tok, &yylval, "message"))
+               else if (tok_is_keyword(tok, &yylval,
+                                                               K_MESSAGE, "message"))
                         opt->opt_type = PLPGSQL_RAISEOPTION_MESSAGE;
-               else if (tok_is_keyword(tok, &yylval, "detail"))
+               else if (tok_is_keyword(tok, &yylval,
+                                                               K_DETAIL, "detail"))
                         opt->opt_type = PLPGSQL_RAISEOPTION_DETAIL;
-               else if (tok_is_keyword(tok, &yylval, "hint"))
+               else if (tok_is_keyword(tok, &yylval,
+                                                               K_HINT, "hint"))
                         opt->opt_type = PLPGSQL_RAISEOPTION_HINT;
                 else
                         yyerror("unrecognized RAISE statement option");
  
-               if (yylex() != K_ASSIGN)
+               tok = yylex();
+               if (tok != '=' && tok != COLON_EQUALS)
                         yyerror("syntax error, expected \"=\"");
  
                 opt->expr = read_sql_expression2(',', ';', ", or ;", &tok);
@@ -3181,9 +3256,3 @@ make_case(int location, PLpgSQL_expr *t_expr,
  
         return (PLpgSQL_stmt *) new;
  }
-
-
-/* Needed to avoid conflict between different prefix settings: */
-#undef yylex
-
-#include "pl_scan.c"
diff --git a/src/pl/plpgsql/src/nls.mk b/src/pl/plpgsql/src/nls.mk

index 9c535f35347d67ce1a88ed2267428c7fde358cf7..b9a3beab86ca1a22f4e3768c577dd1690b878d74 100644 (file)
--- a/src/pl/plpgsql/src/nls.mk
+++ b/src/pl/plpgsql/src/nls.mk
@@ -1,7 +1,7 @@
-# $PostgreSQL: pgsql/src/pl/plpgsql/src/nls.mk,v 1.11 2009/10/20 18:23:27 petere Exp $
+# $PostgreSQL: pgsql/src/pl/plpgsql/src/nls.mk,v 1.12 2009/11/12 00:13:00 tgl Exp $
  CATALOG_NAME   := plpgsql
  AVAIL_LANGUAGES        := de es fr it ja ro
-GETTEXT_FILES  := pl_comp.c pl_exec.c pl_gram.c pl_funcs.c pl_handler.c pl_scan.c
+GETTEXT_FILES  := pl_comp.c pl_exec.c pl_gram.c pl_funcs.c pl_handler.c pl_scanner.c
  GETTEXT_TRIGGERS:= _ errmsg errmsg_plural:1,2 errdetail errdetail_log errdetail_plural:1,2 errhint errcontext yyerror plpgsql_yyerror
  
  .PHONY: gettext-files
diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c

index 2f69a647f8efc400373947f529ac11d480dd0bd6..bc0cde6c7674009a98eb1dc7aee9d7b98bb26cb8 100644 (file)
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.144 2009/11/10 02:13:13 tgl Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.145 2009/11/12 00:13:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -17,8 +17,6 @@
  
  #include <ctype.h>
  
-#include "pl_gram.h"
-
  #include "catalog/namespace.h"
  #include "catalog/pg_attrdef.h"
  #include "catalog/pg_attribute.h"
@@ -41,6 +39,8 @@
   * Our own local and global variables
   * ----------
   */
+PLpgSQL_stmt_block *plpgsql_parse_result;
+
  static int     datums_alloc;
  int                    plpgsql_nDatums;
  PLpgSQL_datum **plpgsql_Datums;
@@ -672,7 +672,7 @@ do_compile(FunctionCallInfo fcinfo,
         parse_rc = plpgsql_yyparse();
         if (parse_rc != 0)
                 elog(ERROR, "plpgsql parser returned %d", parse_rc);
-       function->action = plpgsql_yylval.program;
+       function->action = plpgsql_parse_result;
  
         plpgsql_scanner_finish();
         pfree(proc_source);
@@ -823,7 +823,7 @@ plpgsql_compile_inline(char *proc_source)
         parse_rc = plpgsql_yyparse();
         if (parse_rc != 0)
                 elog(ERROR, "plpgsql parser returned %d", parse_rc);
-       function->action = plpgsql_yylval.program;
+       function->action = plpgsql_parse_result;
  
         plpgsql_scanner_finish();
  
@@ -1237,18 +1237,25 @@ make_datum_param(PLpgSQL_expr *expr, int dno, int location)
  
  /* ----------
   * plpgsql_parse_word          The scanner calls this to postparse
- *                             any single word not found by a
- *                             keyword rule.
+ *                             any single word that is not a reserved keyword.
+ *
+ * word1 is the downcased/dequoted identifier; it must be palloc'd in the
+ * function's long-term memory context.
+ *
+ * yytxt is the original token text; we need this to check for quoting,
+ * so that later checks for unreserved keywords work properly.
+ *
+ * If recognized as a variable, fill in *wdatum and return TRUE;
+ * if not recognized, fill in *word and return FALSE.
+ * (Note: those two pointers actually point to members of the same union,
+ * but for notational reasons we pass them separately.)
   * ----------
   */
-int
-plpgsql_parse_word(const char *word)
+bool
+plpgsql_parse_word(char *word1, const char *yytxt,
+                                  PLwdatum *wdatum, PLword *word)
  {
-       PLpgSQL_nsitem *nse;
-       char       *cp[1];
-
-       /* Do case conversion and word separation */
-       plpgsql_convert_ident(word, cp, 1);
+       PLpgSQL_nsitem *ns;
  
         /* No lookup if disabled */
         if (plpgsql_LookupIdentifiers)
@@ -1256,26 +1263,26 @@ plpgsql_parse_word(const char *word)
                 /*
                  * Do a lookup in the current namespace stack
                  */
-               nse = plpgsql_ns_lookup(plpgsql_ns_top(), false,
-                                                               cp[0], NULL, NULL,
-                                                               NULL);
+               ns = plpgsql_ns_lookup(plpgsql_ns_top(), false,
+                                                          word1, NULL, NULL,
+                                                          NULL);
  
-               if (nse != NULL)
+               if (ns != NULL)
                 {
-                       switch (nse->itemtype)
+                       switch (ns->itemtype)
                         {
                                 case PLPGSQL_NSTYPE_VAR:
                                 case PLPGSQL_NSTYPE_ROW:
                                 case PLPGSQL_NSTYPE_REC:
-                                       plpgsql_yylval.wdatum.datum = plpgsql_Datums[nse->itemno];
-                                       plpgsql_yylval.wdatum.ident = cp[0];
-                                       plpgsql_yylval.wdatum.quoted = (word[0] == '"');
-                                       plpgsql_yylval.wdatum.idents = NIL;
-                                       return T_DATUM;
+                                       wdatum->datum = plpgsql_Datums[ns->itemno];
+                                       wdatum->ident = word1;
+                                       wdatum->quoted = (yytxt[0] == '"');
+                                       wdatum->idents = NIL;
+                                       return true;
  
                                 default:
                                         elog(ERROR, "unrecognized plpgsql itemtype: %d",
-                                                nse->itemtype);
+                                                ns->itemtype);
                         }
                 }
         }
@@ -1284,9 +1291,9 @@ plpgsql_parse_word(const char *word)
          * Nothing found - up to now it's a word without any special meaning for
          * us.
          */
-       plpgsql_yylval.word.ident = cp[0];
-       plpgsql_yylval.word.quoted = (word[0] == '"');
-       return T_WORD;
+       word->ident = word1;
+       word->quoted = (yytxt[0] == '"');
+       return false;
  }
  
  
@@ -1295,19 +1302,16 @@ plpgsql_parse_word(const char *word)
   *                                     separated by a dot.
   * ----------
   */
-int
-plpgsql_parse_dblword(const char *word)
+bool
+plpgsql_parse_dblword(char *word1, char *word2,
+                                         PLwdatum *wdatum, PLcword *cword)
  {
         PLpgSQL_nsitem *ns;
-       char       *cp[2];
         List       *idents;
         int                     nnames;
  
-       /* Do case conversion and word separation */
-       plpgsql_convert_ident(word, cp, 2);
-
-       idents = list_make2(makeString(cp[0]),
-                                               makeString(cp[1]));
+       idents = list_make2(makeString(word1),
+                                               makeString(word2));
  
         /* No lookup if disabled */
         if (plpgsql_LookupIdentifiers)
@@ -1316,7 +1320,7 @@ plpgsql_parse_dblword(const char *word)
                  * Do a lookup in the current namespace stack
                  */
                 ns = plpgsql_ns_lookup(plpgsql_ns_top(), false,
-                                                          cp[0], cp[1], NULL,
+                                                          word1, word2, NULL,
                                                            &nnames);
                 if (ns != NULL)
                 {
@@ -1324,11 +1328,11 @@ plpgsql_parse_dblword(const char *word)
                         {
                                 case PLPGSQL_NSTYPE_VAR:
                                         /* Block-qualified reference to scalar variable. */
-                                       plpgsql_yylval.wdatum.datum = plpgsql_Datums[ns->itemno];
-                                       plpgsql_yylval.wdatum.ident = NULL;
-                                       plpgsql_yylval.wdatum.quoted = false; /* not used */
-                                       plpgsql_yylval.wdatum.idents = idents;
-                                       return T_DATUM;
+                                       wdatum->datum = plpgsql_Datums[ns->itemno];
+                                       wdatum->ident = NULL;
+                                       wdatum->quoted = false; /* not used */
+                                       wdatum->idents = idents;
+                                       return true;
  
                                 case PLPGSQL_NSTYPE_REC:
                                         if (nnames == 1)
@@ -1341,22 +1345,22 @@ plpgsql_parse_dblword(const char *word)
  
                                                 new = palloc(sizeof(PLpgSQL_recfield));
                                                 new->dtype = PLPGSQL_DTYPE_RECFIELD;
-                                               new->fieldname = pstrdup(cp[1]);
+                                               new->fieldname = pstrdup(word2);
                                                 new->recparentno = ns->itemno;
  
                                                 plpgsql_adddatum((PLpgSQL_datum *) new);
  
-                                               plpgsql_yylval.wdatum.datum = (PLpgSQL_datum *) new;
+                                               wdatum->datum = (PLpgSQL_datum *) new;
                                         }
                                         else
                                         {
                                                 /* Block-qualified reference to record variable. */
-                                               plpgsql_yylval.wdatum.datum = plpgsql_Datums[ns->itemno];
+                                               wdatum->datum = plpgsql_Datums[ns->itemno];
                                         }
-                                       plpgsql_yylval.wdatum.ident = NULL;
-                                       plpgsql_yylval.wdatum.quoted = false; /* not used */
-                                       plpgsql_yylval.wdatum.idents = idents;
-                                       return T_DATUM;
+                                       wdatum->ident = NULL;
+                                       wdatum->quoted = false; /* not used */
+                                       wdatum->idents = idents;
+                                       return true;
  
                                 case PLPGSQL_NSTYPE_ROW:
                                         if (nnames == 1)
@@ -1372,28 +1376,28 @@ plpgsql_parse_dblword(const char *word)
                                                 for (i = 0; i < row->nfields; i++)
                                                 {
                                                         if (row->fieldnames[i] &&
-                                                               strcmp(row->fieldnames[i], cp[1]) == 0)
+                                                               strcmp(row->fieldnames[i], word2) == 0)
                                                         {
-                                                               plpgsql_yylval.wdatum.datum = plpgsql_Datums[row->varnos[i]];
-                                                               plpgsql_yylval.wdatum.ident = NULL;
-                                                               plpgsql_yylval.wdatum.quoted = false; /* not used */
-                                                               plpgsql_yylval.wdatum.idents = idents;
-                                                               return T_DATUM;
+                                                               wdatum->datum = plpgsql_Datums[row->varnos[i]];
+                                                               wdatum->ident = NULL;
+                                                               wdatum->quoted = false; /* not used */
+                                                               wdatum->idents = idents;
+                                                               return true;
                                                         }
                                                 }
                                                 ereport(ERROR,
                                                                 (errcode(ERRCODE_UNDEFINED_COLUMN),
                                                                  errmsg("row \"%s\" has no field \"%s\"",
-                                                                               cp[0], cp[1])));
+                                                                               word1, word2)));
                                         }
                                         else
                                         {
                                                 /* Block-qualified reference to row variable. */
-                                               plpgsql_yylval.wdatum.datum = plpgsql_Datums[ns->itemno];
-                                               plpgsql_yylval.wdatum.ident = NULL;
-                                               plpgsql_yylval.wdatum.quoted = false; /* not used */
-                                               plpgsql_yylval.wdatum.idents = idents;
-                                               return T_DATUM;
+                                               wdatum->datum = plpgsql_Datums[ns->itemno];
+                                               wdatum->ident = NULL;
+                                               wdatum->quoted = false; /* not used */
+                                               wdatum->idents = idents;
+                                               return true;
                                         }
  
                                 default:
@@ -1403,8 +1407,8 @@ plpgsql_parse_dblword(const char *word)
         }
  
         /* Nothing found */
-       plpgsql_yylval.cword.idents = idents;
-       return T_CWORD;
+       cword->idents = idents;
+       return false;
  }
  
  
@@ -1413,20 +1417,17 @@ plpgsql_parse_dblword(const char *word)
   *                                     separated by dots.
   * ----------
   */
-int
-plpgsql_parse_tripword(const char *word)
+bool
+plpgsql_parse_tripword(char *word1, char *word2, char *word3,
+                                          PLwdatum *wdatum, PLcword *cword)
  {
         PLpgSQL_nsitem *ns;
-       char       *cp[3];
         List       *idents;
         int                     nnames;
  
-       /* Do case conversion and word separation */
-       plpgsql_convert_ident(word, cp, 3);
-
-       idents = list_make3(makeString(cp[0]),
-                                               makeString(cp[1]),
-                                               makeString(cp[2]));
+       idents = list_make3(makeString(word1),
+                                               makeString(word2),
+                                               makeString(word3));
  
         /* No lookup if disabled */
         if (plpgsql_LookupIdentifiers)
@@ -1436,7 +1437,7 @@ plpgsql_parse_tripword(const char *word)
                  * reference, else ignore.
                  */
                 ns = plpgsql_ns_lookup(plpgsql_ns_top(), false,
-                                                          cp[0], cp[1], cp[2],
+                                                          word1, word2, word3,
                                                            &nnames);
                 if (ns != NULL && nnames == 2)
                 {
@@ -1452,16 +1453,16 @@ plpgsql_parse_tripword(const char *word)
  
                                         new = palloc(sizeof(PLpgSQL_recfield));
                                         new->dtype = PLPGSQL_DTYPE_RECFIELD;
-                                       new->fieldname = pstrdup(cp[2]);
+                                       new->fieldname = pstrdup(word3);
                                         new->recparentno = ns->itemno;
  
                                         plpgsql_adddatum((PLpgSQL_datum *) new);
  
-                                       plpgsql_yylval.wdatum.datum = (PLpgSQL_datum *) new;
-                                       plpgsql_yylval.wdatum.ident = NULL;
-                                       plpgsql_yylval.wdatum.quoted = false; /* not used */
-                                       plpgsql_yylval.wdatum.idents = idents;
-                                       return T_DATUM;
+                                       wdatum->datum = (PLpgSQL_datum *) new;
+                                       wdatum->ident = NULL;
+                                       wdatum->quoted = false; /* not used */
+                                       wdatum->idents = idents;
+                                       return true;
                                 }
  
                                 case PLPGSQL_NSTYPE_ROW:
@@ -1477,19 +1478,19 @@ plpgsql_parse_tripword(const char *word)
                                         for (i = 0; i < row->nfields; i++)
                                         {
                                                 if (row->fieldnames[i] &&
-                                                       strcmp(row->fieldnames[i], cp[2]) == 0)
+                                                       strcmp(row->fieldnames[i], word3) == 0)
                                                 {
-                                                       plpgsql_yylval.wdatum.datum = plpgsql_Datums[row->varnos[i]];
-                                                       plpgsql_yylval.wdatum.ident = NULL;
-                                                       plpgsql_yylval.wdatum.quoted = false; /* not used */
-                                                       plpgsql_yylval.wdatum.idents = idents;
-                                                       return T_DATUM;
+                                                       wdatum->datum = plpgsql_Datums[row->varnos[i]];
+                                                       wdatum->ident = NULL;
+                                                       wdatum->quoted = false; /* not used */
+                                                       wdatum->idents = idents;
+                                                       return true;
                                                 }
                                         }
                                         ereport(ERROR,
                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
                                                          errmsg("row \"%s.%s\" has no field \"%s\"",
-                                                                       cp[0], cp[1], cp[2])));
+                                                                       word1, word2, word3)));
                                 }
  
                                 default:
@@ -1499,8 +1500,8 @@ plpgsql_parse_tripword(const char *word)
         }
  
         /* Nothing found */
-       plpgsql_yylval.cword.idents = idents;
-       return T_CWORD;
+       cword->idents = idents;
+       return false;
  }
  
  
diff --git a/src/pl/plpgsql/src/pl_funcs.c b/src/pl/plpgsql/src/pl_funcs.c

index 12f0ae994644801d5206578c1dda556270853e68..d1a89d002be0c7336ae4634aaed65559ef204791 100644 (file)
--- a/src/pl/plpgsql/src/pl_funcs.c
+++ b/src/pl/plpgsql/src/pl_funcs.c
@@ -8,17 +8,13 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.85 2009/11/07 00:52:26 tgl Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.86 2009/11/12 00:13:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  
  #include "plpgsql.h"
  
-#include <ctype.h>
-
-#include "parser/scansup.h"
-
  
  /* ----------
   * Local variables for namespace handling
@@ -208,104 +204,6 @@ plpgsql_ns_lookup_label(PLpgSQL_nsitem *ns_cur, const char *name)
  }
  
  
-/* ----------
- * plpgsql_convert_ident
- *
- * Convert a possibly-qualified identifier to internal form: handle
- * double quotes, translate to lower case where not inside quotes,
- * truncate to NAMEDATALEN.
- *
- * There may be several identifiers separated by dots and optional
- * whitespace. Each one is converted to a separate palloc'd string.
- * The caller passes the expected number of identifiers, as well as
- * a char* array to hold them. It is an error if we find the wrong
- * number of identifiers (cf grammar processing of fori_varname).
- *
- * NOTE: the input string has already been accepted by the flex lexer,
- * so we don't need a heckuva lot of error checking here.
- * ----------
- */
-void
-plpgsql_convert_ident(const char *s, char **output, int numidents)
-{
-       const char *sstart = s;
-       int                     identctr = 0;
-
-       /* Outer loop over identifiers */
-       while (*s)
-       {
-               char       *curident;
-               char       *cp;
-
-               /* Process current identifier */
-
-               if (*s == '"')
-               {
-                       /* Quoted identifier: copy, collapsing out doubled quotes */
-
-                       curident = palloc(strlen(s) + 1);       /* surely enough room */
-                       cp = curident;
-                       s++;
-                       while (*s)
-                       {
-                               if (*s == '"')
-                               {
-                                       if (s[1] != '"')
-                                               break;
-                                       s++;
-                               }
-                               *cp++ = *s++;
-                       }
-                       if (*s != '"')          /* should not happen if lexer checked */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                          errmsg("unterminated \" in identifier: %s", sstart)));
-                       s++;
-                       *cp = '\0';
-                       /* Truncate to NAMEDATALEN */
-                       truncate_identifier(curident, cp - curident, false);
-               }
-               else
-               {
-                       /* Normal identifier: extends till dot or whitespace */
-                       const char *thisstart = s;
-
-                       while (*s && *s != '.' && !scanner_isspace(*s))
-                               s++;
-                       /* Downcase and truncate to NAMEDATALEN */
-                       curident = downcase_truncate_identifier(thisstart, s - thisstart,
-                                                                                                       false);
-               }
-
-               /* Pass ident to caller */
-               if (identctr < numidents)
-                       output[identctr++] = curident;
-               else
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                        errmsg("qualified identifier cannot be used here: %s",
-                                                       sstart)));
-
-               /* If not done, skip whitespace, dot, whitespace */
-               if (*s)
-               {
-                       while (*s && scanner_isspace(*s))
-                               s++;
-                       if (*s++ != '.')
-                               elog(ERROR, "expected dot between identifiers: %s", sstart);
-                       while (*s && scanner_isspace(*s))
-                               s++;
-                       if (*s == '\0')
-                               elog(ERROR, "expected another identifier: %s", sstart);
-               }
-       }
-
-       if (identctr != numidents)
-               elog(ERROR, "improperly qualified identifier: %s",
-                        sstart);
-}
-
-
  /*
   * Statement type as a string, for use in error messages etc.
   */
diff --git a/src/pl/plpgsql/src/pl_scanner.c b/src/pl/plpgsql/src/pl_scanner.c

new file mode 100644 (file)

index 0000000..523a095
--- /dev/null
+++ b/src/pl/plpgsql/src/pl_scanner.c
@@ -0,0 +1,584 @@
+/*-------------------------------------------------------------------------
+ *
+ * pl_scanner.c
+ *       lexical scanning for PL/pgSQL
+ *
+ *
+ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_scanner.c,v 1.1 2009/11/12 00:13:00 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "plpgsql.h"
+
+#include "mb/pg_wchar.h"
+#include "parser/scanner.h"
+
+#include "pl_gram.h"                   /* must be after parser/scanner.h */
+
+#define PG_KEYWORD(a,b,c) {a,b,c},
+
+
+/* Klugy flag to tell scanner whether to lookup identifiers */
+bool   plpgsql_LookupIdentifiers = true;
+
+/*
+ * A word about keywords:
+ *
+ * We keep reserved and unreserved keywords in separate arrays.  The
+ * reserved keywords are passed to the core scanner, so they will be
+ * recognized before (and instead of) any variable name.  Unreserved
+ * words are checked for separately, after determining that the identifier
+ * isn't a known variable name.  If plpgsql_LookupIdentifiers is off then
+ * no variable names will be recognized, so the unreserved words always work.
+ * (Note in particular that this helps us avoid reserving keywords that are
+ * only needed in DECLARE sections, since we scan those sections with
+ * plpgsql_LookupIdentifiers off.)
+ *
+ * In certain contexts it is desirable to prefer recognizing an unreserved
+ * keyword over recognizing a variable name.  Those cases are handled in
+ * gram.y using tok_is_keyword().
+ *
+ * For the most part, the reserved keywords are those that start a PL/pgSQL
+ * statement (and so would conflict with an assignment to a variable of the
+ * same name).  We also don't sweat it much about reserving keywords that
+ * are reserved in the core grammar.  Try to avoid reserving other words.
+ */
+
+/*
+ * Lists of keyword (name, token-value, category) entries.
+ *
+ * !!WARNING!!: These lists must be sorted by ASCII name, because binary
+ *              search is used to locate entries.
+ *
+ * Be careful not to put the same word in both lists.  Also be sure that
+ * gram.y's unreserved_keyword production agrees with the second list.
+ */
+
+static const ScanKeyword reserved_keywords[] = {
+       PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)
+       PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
+       PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
+       PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
+       PG_KEYWORD("close", K_CLOSE, RESERVED_KEYWORD)
+       PG_KEYWORD("continue", K_CONTINUE, RESERVED_KEYWORD)
+       PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
+       PG_KEYWORD("default", K_DEFAULT, RESERVED_KEYWORD)
+       PG_KEYWORD("diagnostics", K_DIAGNOSTICS, RESERVED_KEYWORD)
+       PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
+       PG_KEYWORD("elseif", K_ELSIF, RESERVED_KEYWORD)
+       PG_KEYWORD("elsif", K_ELSIF, RESERVED_KEYWORD)
+       PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
+       PG_KEYWORD("exception", K_EXCEPTION, RESERVED_KEYWORD)
+       PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
+       PG_KEYWORD("exit", K_EXIT, RESERVED_KEYWORD)
+       PG_KEYWORD("fetch", K_FETCH, RESERVED_KEYWORD)
+       PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
+       PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
+       PG_KEYWORD("get", K_GET, RESERVED_KEYWORD)
+       PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
+       PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
+       PG_KEYWORD("insert", K_INSERT, RESERVED_KEYWORD)
+       PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
+       PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
+       PG_KEYWORD("move", K_MOVE, RESERVED_KEYWORD)
+       PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
+       PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
+       PG_KEYWORD("open", K_OPEN, RESERVED_KEYWORD)
+       PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
+       PG_KEYWORD("perform", K_PERFORM, RESERVED_KEYWORD)
+       PG_KEYWORD("raise", K_RAISE, RESERVED_KEYWORD)
+       PG_KEYWORD("return", K_RETURN, RESERVED_KEYWORD)
+       PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
+       PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
+       PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
+       PG_KEYWORD("using", K_USING, RESERVED_KEYWORD)
+       PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD)
+       PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD)
+};
+
+static const int num_reserved_keywords = lengthof(reserved_keywords);
+
+static const ScanKeyword unreserved_keywords[] = {
+       PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
+       PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
+       PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
+       PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
+       PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
+       PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
+       PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
+       PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
+       PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
+       PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
+       PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
+       PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
+       PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
+       PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
+       PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
+       PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
+       PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
+       PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
+       PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
+       PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
+       PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
+       PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
+       PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
+       PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
+       PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD)
+       PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
+       PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
+       PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
+       PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
+       PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
+       PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
+       PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
+};
+
+static const int num_unreserved_keywords = lengthof(unreserved_keywords);
+
+
+/* Auxiliary data about a token (other than the token type) */
+typedef struct
+{
+       YYSTYPE         lval;                   /* semantic information */
+       YYLTYPE         lloc;                   /* offset in scanbuf */
+       int                     leng;                   /* length in bytes */
+} TokenAuxData;
+
+/*
+ * Scanner working state.  At some point we might wish to fold all this
+ * into a YY_EXTRA struct.  For the moment, there is no need for plpgsql's
+ * lexer to be re-entrant, and the notational burden of passing a yyscanner
+ * pointer around is great enough to not want to do it without need.
+ */
+
+/* The stuff the core lexer needs */
+static core_yyscan_t yyscanner = NULL;
+static core_yy_extra_type core_yy;
+
+/* The original input string */
+static const char *scanorig;
+
+/* Current token's length (corresponds to plpgsql_yylval and plpgsql_yylloc) */
+static int             plpgsql_yyleng;
+
+/* Token pushback stack */
+#define MAX_PUSHBACKS 4
+
+static int                     num_pushbacks;
+static int                     pushback_token[MAX_PUSHBACKS];
+static TokenAuxData    pushback_auxdata[MAX_PUSHBACKS];
+
+/* State for plpgsql_location_to_lineno() */
+static const char *cur_line_start;
+static const char *cur_line_end;
+static int     cur_line_num;
+
+/* Internal functions */
+static int     internal_yylex(TokenAuxData *auxdata);
+static void push_back_token(int token, TokenAuxData *auxdata);
+static void location_lineno_init(void);
+
+
+/*
+ * This is the yylex routine called from the PL/pgSQL grammar.
+ * It is a wrapper around the core lexer, with the ability to recognize
+ * PL/pgSQL variables and return them as special T_DATUM tokens.  If a
+ * word or compound word does not match any variable name, or if matching
+ * is turned off by plpgsql_LookupIdentifiers, it is returned as
+ * T_WORD or T_CWORD respectively, or as an unreserved keyword if it
+ * matches one of those.
+ */
+int
+plpgsql_yylex(void)
+{
+       int                     tok1;
+       TokenAuxData aux1;
+       const ScanKeyword *kw;
+
+       tok1 = internal_yylex(&aux1);
+       if (tok1 == IDENT || tok1 == PARAM)
+       {
+               int                     tok2;
+               TokenAuxData aux2;
+
+               tok2 = internal_yylex(&aux2);
+               if (tok2 == '.')
+               {
+                       int                     tok3;
+                       TokenAuxData aux3;
+
+                       tok3 = internal_yylex(&aux3);
+                       if (tok3 == IDENT)
+                       {
+                               int                     tok4;
+                               TokenAuxData aux4;
+
+                               tok4 = internal_yylex(&aux4);
+                               if (tok4 == '.')
+                               {
+                                       int                     tok5;
+                                       TokenAuxData aux5;
+
+                                       tok5 = internal_yylex(&aux5);
+                                       if (tok5 == IDENT)
+                                       {
+                                               if (plpgsql_parse_tripword(aux1.lval.str,
+                                                                                                  aux3.lval.str,
+                                                                                                  aux5.lval.str,
+                                                                                                  &aux1.lval.wdatum,
+                                                                                                  &aux1.lval.cword))
+                                                       tok1 = T_DATUM;
+                                               else
+                                                       tok1 = T_CWORD;
+                                       }
+                                       else
+                                       {
+                                               /* not A.B.C, so just process A.B */
+                                               push_back_token(tok5, &aux5);
+                                               push_back_token(tok4, &aux4);
+                                               if (plpgsql_parse_dblword(aux1.lval.str,
+                                                                                                 aux3.lval.str,
+                                                                                                 &aux1.lval.wdatum,
+                                                                                                 &aux1.lval.cword))
+                                                       tok1 = T_DATUM;
+                                               else
+                                                       tok1 = T_CWORD;
+                                       }
+                               }
+                               else
+                               {
+                                       /* not A.B.C, so just process A.B */
+                                       push_back_token(tok4, &aux4);
+                                       if (plpgsql_parse_dblword(aux1.lval.str,
+                                                                                         aux3.lval.str,
+                                                                                         &aux1.lval.wdatum,
+                                                                                         &aux1.lval.cword))
+                                               tok1 = T_DATUM;
+                                       else
+                                               tok1 = T_CWORD;
+                               }
+                       }
+                       else
+                       {
+                               /* not A.B, so just process A */
+                               push_back_token(tok3, &aux3);
+                               push_back_token(tok2, &aux2);
+                               if (plpgsql_parse_word(aux1.lval.str,
+                                                                          core_yy.scanbuf + aux1.lloc,
+                                                                          &aux1.lval.wdatum,
+                                                                          &aux1.lval.word))
+                                       tok1 = T_DATUM;
+                               else if (!aux1.lval.word.quoted &&
+                                                (kw = ScanKeywordLookup(aux1.lval.word.ident,
+                                                                                                unreserved_keywords,
+                                                                                                num_unreserved_keywords)))
+                               {
+                                       aux1.lval.keyword = kw->name;
+                                       tok1 = kw->value;
+                               }
+                               else
+                                       tok1 = T_WORD;
+                       }
+               }
+               else
+               {
+                       /* not A.B, so just process A */
+                       push_back_token(tok2, &aux2);
+                       if (plpgsql_parse_word(aux1.lval.str,
+                                                                  core_yy.scanbuf + aux1.lloc,
+                                                                  &aux1.lval.wdatum,
+                                                                  &aux1.lval.word))
+                               tok1 = T_DATUM;
+                       else if (!aux1.lval.word.quoted &&
+                                        (kw = ScanKeywordLookup(aux1.lval.word.ident,
+                                                                                        unreserved_keywords,
+                                                                                        num_unreserved_keywords)))
+                       {
+                               aux1.lval.keyword = kw->name;
+                               tok1 = kw->value;
+                       }
+                       else
+                               tok1 = T_WORD;
+               }
+       }
+       else
+       {
+               /* Not a potential plpgsql variable name, just return the data */
+       }
+
+       plpgsql_yylval = aux1.lval;
+       plpgsql_yylloc = aux1.lloc;
+       plpgsql_yyleng = aux1.leng;
+       return tok1;
+}
+
+/*
+ * Internal yylex function.  This wraps the core lexer and adds one feature:
+ * a token pushback stack.  We also make a couple of trivial single-token
+ * translations from what the core lexer does to what we want, in particular
+ * interfacing from the core_YYSTYPE to YYSTYPE union.
+ */
+static int
+internal_yylex(TokenAuxData *auxdata)
+{
+       int                     token;
+       const char *yytext;
+
+       if (num_pushbacks > 0)
+       {
+               num_pushbacks--;
+               token = pushback_token[num_pushbacks];
+               *auxdata = pushback_auxdata[num_pushbacks];
+       }
+       else
+       {
+               token = core_yylex(&auxdata->lval.core_yystype,
+                                                  &auxdata->lloc,
+                                                  yyscanner);
+
+               /* remember the length of yytext before it gets changed */
+               yytext = core_yy.scanbuf + auxdata->lloc;
+               auxdata->leng = strlen(yytext);
+
+               /* Check for << >> and #, which the core considers operators */
+               if (token == Op)
+               {
+                       if (strcmp(auxdata->lval.str, "<<") == 0)
+                               token = LESS_LESS;
+                       else if (strcmp(auxdata->lval.str, ">>") == 0)
+                               token = GREATER_GREATER;
+                       else if (strcmp(auxdata->lval.str, "#") == 0)
+                               token = '#';
+               }
+
+               /* The core returns PARAM as ival, but we treat it like IDENT */
+               else if (token == PARAM)
+               {
+                       auxdata->lval.str = pstrdup(yytext);
+               }
+       }
+
+       return token;
+}
+
+/*
+ * Push back a token to be re-read by next internal_yylex() call.
+ */
+static void
+push_back_token(int token, TokenAuxData *auxdata)
+{
+       if (num_pushbacks >= MAX_PUSHBACKS)
+               elog(ERROR, "too many tokens pushed back");
+       pushback_token[num_pushbacks] = token;
+       pushback_auxdata[num_pushbacks] = *auxdata;
+       num_pushbacks++;
+}
+
+/*
+ * Push back a single token to be re-read by next plpgsql_yylex() call.
+ *
+ * NOTE: this does not cause yylval or yylloc to "back up".  Also, it
+ * is not a good idea to push back a token code other than what you read.
+ */
+void
+plpgsql_push_back_token(int token)
+{
+       TokenAuxData    auxdata;
+
+       auxdata.lval = plpgsql_yylval;
+       auxdata.lloc = plpgsql_yylloc;
+       auxdata.leng = plpgsql_yyleng;
+       push_back_token(token, &auxdata);
+}
+
+/*
+ * Append the function text starting at startlocation and extending to
+ * (not including) endlocation onto the existing contents of "buf".
+ */
+void
+plpgsql_append_source_text(StringInfo buf,
+                                                  int startlocation, int endlocation)
+{
+       Assert(startlocation <= endlocation);
+       appendBinaryStringInfo(buf, scanorig + startlocation,
+                                                  endlocation - startlocation);
+}
+
+/*
+ * plpgsql_scanner_errposition
+ *             Report an error cursor position, if possible.
+ *
+ * This is expected to be used within an ereport() call.  The return value
+ * is a dummy (always 0, in fact).
+ *
+ * Note that this can only be used for messages emitted during initial
+ * parsing of a plpgsql function, since it requires the scanorig string
+ * to still be available.
+ */
+int
+plpgsql_scanner_errposition(int location)
+{
+       int             pos;
+
+       if (location < 0 || scanorig == NULL)
+               return 0;                               /* no-op if location is unknown */
+
+       /* Convert byte offset to character number */
+       pos = pg_mbstrlen_with_len(scanorig, location) + 1;
+       /* And pass it to the ereport mechanism */
+       (void) internalerrposition(pos);
+       /* Also pass the function body string */
+       return internalerrquery(scanorig);
+}
+
+/*
+ * plpgsql_yyerror
+ *             Report a lexer or grammar error.
+ *
+ * The message's cursor position refers to the current token (the one
+ * last returned by plpgsql_yylex()).
+ * This is OK for syntax error messages from the Bison parser, because Bison
+ * parsers report error as soon as the first unparsable token is reached.
+ * Beware of using yyerror for other purposes, as the cursor position might
+ * be misleading!
+ */
+void
+plpgsql_yyerror(const char *message)
+{
+       char       *yytext = core_yy.scanbuf + plpgsql_yylloc;
+
+       if (*yytext == '\0')
+       {
+               ereport(ERROR,
+                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                /* translator: %s is typically the translation of "syntax error" */
+                                errmsg("%s at end of input", _(message)),
+                                plpgsql_scanner_errposition(plpgsql_yylloc)));
+       }
+       else
+       {
+               /*
+                * If we have done any lookahead then flex will have restored the
+                * character after the end-of-token.  Zap it again so that we
+                * report only the single token here.  This modifies scanbuf but
+                * we no longer care about that.
+                */
+               yytext[plpgsql_yyleng] = '\0';
+
+               ereport(ERROR,
+                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                /* translator: first %s is typically the translation of "syntax error" */
+                                errmsg("%s at or near \"%s\"", _(message), yytext),
+                                plpgsql_scanner_errposition(plpgsql_yylloc)));
+       }
+}
+
+/*
+ * Given a location (a byte offset in the function source text),
+ * return a line number.
+ *
+ * We expect that this is typically called for a sequence of increasing
+ * location values, so optimize accordingly by tracking the endpoints
+ * of the "current" line.
+ */
+int
+plpgsql_location_to_lineno(int location)
+{
+       const char *loc;
+
+       if (location < 0 || scanorig == NULL)
+               return 0;                               /* garbage in, garbage out */
+       loc = scanorig + location;
+
+       /* be correct, but not fast, if input location goes backwards */
+       if (loc < cur_line_start)
+               location_lineno_init();
+
+       while (cur_line_end != NULL && loc > cur_line_end)
+       {
+               cur_line_start = cur_line_end + 1;
+               cur_line_num++;
+               cur_line_end = strchr(cur_line_start, '\n');
+       }
+
+       return cur_line_num;
+}
+
+/* initialize or reset the state for plpgsql_location_to_lineno */
+static void
+location_lineno_init(void)
+{
+       cur_line_start = scanorig;
+       cur_line_num = 1;
+
+       /*----------
+        * Hack: skip any initial newline, so that in the common coding layout
+        *              CREATE FUNCTION ... AS $$
+        *                      code body
+        *              $$ LANGUAGE plpgsql;
+        * we will think "line 1" is what the programmer thinks of as line 1.
+        *----------
+        */
+    if (*cur_line_start == '\r')
+        cur_line_start++;
+    if (*cur_line_start == '\n')
+        cur_line_start++;
+
+       cur_line_end = strchr(cur_line_start, '\n');
+}
+
+/* return the most recently computed lineno */
+int
+plpgsql_latest_lineno(void)
+{
+       return cur_line_num;
+}
+
+
+/*
+ * Called before any actual parsing is done
+ *
+ * Note: the passed "str" must remain valid until plpgsql_scanner_finish().
+ * Although it is not fed directly to flex, we need the original string
+ * to cite in error messages.
+ */
+void
+plpgsql_scanner_init(const char *str)
+{
+       /* Start up the core scanner */
+       yyscanner = scanner_init(str, &core_yy,
+                                                        reserved_keywords, num_reserved_keywords);
+
+       /*
+        * scanorig points to the original string, which unlike the scanner's
+        * scanbuf won't be modified on-the-fly by flex.  Notice that although
+        * yytext points into scanbuf, we rely on being able to apply locations
+        * (offsets from string start) to scanorig as well.
+        */
+       scanorig = str;
+
+       /* Other setup */
+       plpgsql_LookupIdentifiers = true;
+
+       num_pushbacks = 0;
+
+       location_lineno_init();
+}
+
+/*
+ * Called after parsing is done to clean up after plpgsql_scanner_init()
+ */
+void
+plpgsql_scanner_finish(void)
+{
+       /* release storage */
+       scanner_finish(yyscanner);
+       /* avoid leaving any dangling pointers */
+       yyscanner = NULL;
+       scanorig = NULL;
+}
diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h

index 6b6669973d7dc80a00a4946830c4aa8fbf322a9b..1c05f5d4c2f152797e748beb7244c1ee09c90df9 100644 (file)
--- a/src/pl/plpgsql/src/plpgsql.h
+++ b/src/pl/plpgsql/src/plpgsql.h
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.123 2009/11/10 02:13:13 tgl Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.124 2009/11/12 00:13:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -794,20 +794,18 @@ typedef struct
   * Global variable declarations
   **********************************************************************/
  
+extern bool plpgsql_check_syntax;
  extern bool plpgsql_DumpExecTree;
  extern bool plpgsql_LookupIdentifiers;
+
+extern PLpgSQL_stmt_block *plpgsql_parse_result;
+
  extern int     plpgsql_nDatums;
  extern PLpgSQL_datum **plpgsql_Datums;
  
  extern char *plpgsql_error_funcname;
  
-/* linkage to the real yytext variable */
-extern char *plpgsql_base_yytext;
-
-#define yytext plpgsql_base_yytext
-
  extern PLpgSQL_function *plpgsql_curr_compile;
-extern bool plpgsql_check_syntax;
  extern MemoryContext compile_tmp_cxt;
  
  extern PLpgSQL_plugin **plugin_ptr;
@@ -825,9 +823,12 @@ extern PLpgSQL_function *plpgsql_compile(FunctionCallInfo fcinfo,
  extern PLpgSQL_function *plpgsql_compile_inline(char *proc_source);
  extern void plpgsql_parser_setup(struct ParseState *pstate,
                                                                  PLpgSQL_expr *expr);
-extern int     plpgsql_parse_word(const char *word);
-extern int     plpgsql_parse_dblword(const char *word);
-extern int     plpgsql_parse_tripword(const char *word);
+extern bool plpgsql_parse_word(char *word1, const char *yytxt,
+                                                          PLwdatum *wdatum, PLword *word);
+extern bool plpgsql_parse_dblword(char *word1, char *word2,
+                                                                 PLwdatum *wdatum, PLcword *cword);
+extern bool plpgsql_parse_tripword(char *word1, char *word2, char *word3,
+                                                                  PLwdatum *wdatum, PLcword *cword);
  extern PLpgSQL_type *plpgsql_parse_wordtype(char *ident);
  extern PLpgSQL_type *plpgsql_parse_cwordtype(List *idents);
  extern PLpgSQL_type *plpgsql_parse_wordrowtype(char *ident);
@@ -889,16 +890,13 @@ extern PLpgSQL_nsitem *plpgsql_ns_lookup_label(PLpgSQL_nsitem *ns_cur,
   * Other functions in pl_funcs.c
   * ----------
   */
-extern void plpgsql_convert_ident(const char *s, char **output, int numidents);
  extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt);
  extern void plpgsql_dumptree(PLpgSQL_function *func);
  
  /* ----------
- * Externs in gram.y and scan.l
+ * Scanner functions in pl_scanner.c
   * ----------
   */
-extern PLpgSQL_expr *plpgsql_read_expression(int until, const char *expected);
-extern int     plpgsql_yyparse(void);
  extern int     plpgsql_base_yylex(void);
  extern int     plpgsql_yylex(void);
  extern void plpgsql_push_back_token(int token);
@@ -911,4 +909,10 @@ extern int plpgsql_latest_lineno(void);
  extern void plpgsql_scanner_init(const char *str);
  extern void plpgsql_scanner_finish(void);
  
+/* ----------
+ * Externs in gram.y
+ * ----------
+ */
+extern int     plpgsql_yyparse(void);
+
  #endif   /* PLPGSQL_H */
diff --git a/src/pl/plpgsql/src/scan.l b/src/pl/plpgsql/src/scan.l

deleted file mode 100644 (file)

index 101559f..0000000
--- a/src/pl/plpgsql/src/scan.l
+++ /dev/null
@@ -1,605 +0,0 @@
-%{
-/*-------------------------------------------------------------------------
- *
- * scan.l              - Scanner for the PL/pgSQL procedural language
- *
- * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.76 2009/11/10 02:13:13 tgl Exp $
- *
- *-------------------------------------------------------------------------
- */
-
-#include "plpgsql.h"
-
-#include "mb/pg_wchar.h"
-
-
-/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
-#undef fprintf
-#define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))
-
-/*
- * Each call to yylex must set yylloc to the location of the found token
- * (expressed as a byte offset from the start of the input text).
- * When we parse a token that requires multiple lexer rules to process,
- * this should be done in the first such rule, else yylloc will point
- * into the middle of the token.
- */
-#define SET_YYLLOC()  (yylloc = yytext - scanbuf)
-
-/* Handles to the buffer that the lexer uses internally */
-static YY_BUFFER_STATE scanbufhandle;
-static char *scanbuf;
-
-static const char *scanorig;           /* original input string */
-
-static int     pushback_token;
-static bool have_pushback_token;
-static const char *cur_line_start;
-static const char *cur_line_end;
-static int     cur_line_num;
-static int             xcdepth = 0;    /* depth of nesting in slash-star comments */
-static char    *dolqstart;      /* current $foo$ quote start string */
-
-bool plpgsql_LookupIdentifiers = true;
-
-static void location_lineno_init(void);
-
-%}
-
-%option 8bit
-%option never-interactive
-%option nodefault
-%option noinput
-%option nounput
-%option noyywrap
-%option noyyalloc
-%option noyyrealloc
-%option noyyfree
-%option warn
-%option prefix="plpgsql_base_yy"
-
-%option case-insensitive
-
-/*
- * Exclusive states are a subset of the core lexer's:
- *  <xc> extended C-style comments
- *  <xq> standard quoted strings
- *  <xe> extended quoted strings (support backslash escape sequences)
- *  <xdolq> $foo$ quoted strings
- */
-
-%x xc
-%x xe
-%x xq
-%x xdolq
-
-/*
- * Definitions --- these generally must match the core lexer, but in some
- * cases we can simplify, since we only care about identifying the token
- * boundaries and not about deriving the represented value.  Also, we
- * aren't trying to lex multicharacter operators so their interactions
- * with comments go away.
- */
-
-space                  [ \t\n\r\f]
-horiz_space            [ \t\f]
-newline                        [\n\r]
-non_newline            [^\n\r]
-
-comment                        ("--"{non_newline}*)
-
-whitespace             ({space}+|{comment})
-special_whitespace             ({space}+|{comment}{newline})
-horiz_whitespace               ({horiz_space}|{comment})
-whitespace_with_newline        ({horiz_whitespace}*{newline}{special_whitespace}*)
-
-quote                  '
-quotestop              {quote}{whitespace}*
-quotecontinue  {quote}{whitespace_with_newline}{quote}
-quotefail              {quote}{whitespace}*"-"
-
-xestart                        [eE]{quote}
-xeinside               [^\\']+
-xeescape               [\\].
-
-xqstart                        {quote}
-xqdouble               {quote}{quote}
-xqinside               [^']+
-
-dolq_start             [A-Za-z\200-\377_]
-dolq_cont              [A-Za-z\200-\377_0-9]
-dolqdelim              \$({dolq_start}{dolq_cont}*)?\$
-dolqfailed             \${dolq_start}{dolq_cont}*
-dolqinside             [^$]+
-
-xcstart                        \/\*
-xcstop                 \*+\/
-xcinside               [^*/]+
-
-digit                  [0-9]
-ident_start            [A-Za-z\200-\377_]
-ident_cont             [A-Za-z\200-\377_0-9\$]
-
-/* This is a simpler treatment of quoted identifiers than the core uses */
-quoted_ident   (\"[^\"]*\")+
-
-identifier             ({ident_start}{ident_cont}*|{quoted_ident})
-
-param                  \${digit}+
-
-%%
-    /* ----------
-     * Reset the state when entering yylex()
-     * ----------
-     */
-    BEGIN(INITIAL);
-
-    /* ----------
-     * The keyword rules
-     * ----------
-     */
-:=                             { SET_YYLLOC(); return K_ASSIGN;                }
-=                              { SET_YYLLOC(); return K_ASSIGN;                }
-\.\.                   { SET_YYLLOC(); return K_DOTDOT;                }
-alias                  { SET_YYLLOC(); return K_ALIAS;                 }
-all                            { SET_YYLLOC(); return K_ALL;                   }
-begin                  { SET_YYLLOC(); return K_BEGIN;                 }
-by                             { SET_YYLLOC(); return K_BY;                    }
-case                   { SET_YYLLOC(); return K_CASE;                  }
-close                  { SET_YYLLOC(); return K_CLOSE;                 }
-constant               { SET_YYLLOC(); return K_CONSTANT;              }
-continue               { SET_YYLLOC(); return K_CONTINUE;              }
-cursor                 { SET_YYLLOC(); return K_CURSOR;                }
-declare                        { SET_YYLLOC(); return K_DECLARE;               }
-default                        { SET_YYLLOC(); return K_DEFAULT;               }
-diagnostics            { SET_YYLLOC(); return K_DIAGNOSTICS;   }
-else                   { SET_YYLLOC(); return K_ELSE;                  }
-elseif                 { SET_YYLLOC(); return K_ELSIF;                 }
-elsif                  { SET_YYLLOC(); return K_ELSIF;                 }
-end                            { SET_YYLLOC(); return K_END;                   }
-exception              { SET_YYLLOC(); return K_EXCEPTION;             }
-execute                        { SET_YYLLOC(); return K_EXECUTE;               }
-exit                   { SET_YYLLOC(); return K_EXIT;                  }
-fetch                  { SET_YYLLOC(); return K_FETCH;                 }
-for                            { SET_YYLLOC(); return K_FOR;                   }
-from                   { SET_YYLLOC(); return K_FROM;                  }
-get                            { SET_YYLLOC(); return K_GET;                   }
-if                             { SET_YYLLOC(); return K_IF;                    }
-in                             { SET_YYLLOC(); return K_IN;                    }
-insert                 { SET_YYLLOC(); return K_INSERT;                }
-into                   { SET_YYLLOC(); return K_INTO;                  }
-is                             { SET_YYLLOC(); return K_IS;                    }
-loop                   { SET_YYLLOC(); return K_LOOP;                  }
-move                   { SET_YYLLOC(); return K_MOVE;                  }
-no{space}+scroll { SET_YYLLOC(); return K_NOSCROLL;            }
-not                            { SET_YYLLOC(); return K_NOT;                   }
-null                   { SET_YYLLOC(); return K_NULL;                  }
-open                   { SET_YYLLOC(); return K_OPEN;                  }
-or                             { SET_YYLLOC(); return K_OR;                    }
-perform                        { SET_YYLLOC(); return K_PERFORM;               }
-raise                  { SET_YYLLOC(); return K_RAISE;                 }
-return                 { SET_YYLLOC(); return K_RETURN;                }
-scroll                 { SET_YYLLOC(); return K_SCROLL;                }
-strict                 { SET_YYLLOC(); return K_STRICT;            }
-then                   { SET_YYLLOC(); return K_THEN;                  }
-to                             { SET_YYLLOC(); return K_TO;                    }
-using                  { SET_YYLLOC(); return K_USING;                 }
-when                   { SET_YYLLOC(); return K_WHEN;                  }
-while                  { SET_YYLLOC(); return K_WHILE;                 }
-
-^#option               { SET_YYLLOC(); return O_OPTION;                }
-dump                   { SET_YYLLOC(); return O_DUMP;                  }
-
-
-    /* ----------
-     * Special word rules
-     * ----------
-     */
-{identifier}                                   {
-       SET_YYLLOC();
-       return plpgsql_parse_word(yytext); }
-{identifier}{space}*\.{space}*{identifier}     {
-       SET_YYLLOC();
-       return plpgsql_parse_dblword(yytext); }
-{identifier}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier}       {
-       SET_YYLLOC();
-       return plpgsql_parse_tripword(yytext); }
-{param}                                                        {
-       SET_YYLLOC();
-       return plpgsql_parse_word(yytext); }
-{param}{space}*\.{space}*{identifier}  {
-       SET_YYLLOC();
-       return plpgsql_parse_dblword(yytext); }
-{param}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier}    {
-       SET_YYLLOC();
-       return plpgsql_parse_tripword(yytext); }
-
-{digit}+               { SET_YYLLOC(); return T_NUMBER;                }
-
-\".                            { SET_YYLLOC(); yyerror("unterminated quoted identifier"); }
-
-    /* ----------
-     * Comment and literal handling is mostly copied from the core lexer
-     * ----------
-     */
-{whitespace}   {
-                                       /* ignore */
-                               }
-
-{xcstart}              {
-                                       SET_YYLLOC();
-                                       xcdepth = 0;
-                                       BEGIN(xc);
-                               }
-
-<xc>{xcstart}  {
-                                       xcdepth++;
-                               }
-
-<xc>{xcstop}   {
-                                       if (xcdepth <= 0)
-                                               BEGIN(INITIAL);
-                                       else
-                                               xcdepth--;
-                               }
-
-<xc>{xcinside} {
-                                       /* ignore */
-                               }
-
-<xc>\/+                        {
-                                       /* ignore */
-                               }
-
-<xc>\*+                        {
-                                       /* ignore */
-                               }
-
-<xc><<EOF>>            { yyerror("unterminated /* comment"); }
-
-{xqstart}              {
-                                       SET_YYLLOC();
-                                       if (standard_conforming_strings)
-                                               BEGIN(xq);
-                                       else
-                                               BEGIN(xe);
-                               }
-{xestart}              {
-                                       SET_YYLLOC();
-                                       BEGIN(xe);
-                               }
-<xq,xe>{quotestop}     |
-<xq,xe>{quotefail} {
-                                       yyless(1);
-                                       BEGIN(INITIAL);
-                                       /* adjust yytext/yyleng to describe whole string token */
-                                       yyleng += (yytext - (scanbuf + yylloc));
-                                       yytext = scanbuf + yylloc;
-                                       return T_STRING;
-                               }
-<xq,xe>{xqdouble} {
-                               }
-<xq>{xqinside}  {
-                               }
-<xe>{xeinside}  {
-                               }
-<xe>{xeescape}  {
-                               }
-<xq,xe>{quotecontinue} {
-                                       /* ignore */
-                               }
-<xe>.                  {
-                                       /* This is only needed for \ just before EOF */
-                               }
-<xq,xe><<EOF>>         { yyerror("unterminated quoted string"); }
-
-{dolqdelim}            {
-                                       SET_YYLLOC();
-                                       dolqstart = pstrdup(yytext);
-                                       BEGIN(xdolq);
-                               }
-{dolqfailed}   {
-                                       /* throw back all but the initial "$" */
-                                       yyless(1);
-                                       /* and treat it as {other} */
-                                       SET_YYLLOC(); return yytext[0];
-                               }
-<xdolq>{dolqdelim} {
-                                       if (strcmp(yytext, dolqstart) == 0)
-                                       {
-                                               pfree(dolqstart);
-                                               BEGIN(INITIAL);
-                                               /* adjust yytext/yyleng to describe whole string */
-                                               yyleng += (yytext - (scanbuf + yylloc));
-                                               yytext = scanbuf + yylloc;
-                                               return T_STRING;
-                                       }
-                                       else
-                                       {
-                                               /*
-                                                * When we fail to match $...$ to dolqstart, transfer
-                                                * the $... part to the output, but put back the final
-                                                * $ for rescanning.  Consider $delim$...$junk$delim$
-                                                */
-                                               yyless(yyleng-1);
-                                       }
-                               }
-<xdolq>{dolqinside} {
-                               }
-<xdolq>{dolqfailed} {
-                               }
-<xdolq>.               {
-                                       /* This is only needed for $ inside the quoted text */
-                               }
-<xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
-
-    /* ----------
-     * Any unmatched character is returned as is
-     * ----------
-     */
-.                              {
-                                       SET_YYLLOC(); return yytext[0];
-                               }
-
-%%
-
-
-/*
- * This is the yylex routine called from outside. It exists to provide
- * a one-token pushback facility.  Beware of trying to push back more;
- * for the most part, plpgsql's gram.y assumes that yytext and yylloc
- * are in step with the "current token".  In particular it is assumed that
- * those are in step with the result immediately after any yylex() call.
- */
-int
-plpgsql_yylex(void)
-{
-       if (have_pushback_token)
-       {
-               have_pushback_token = false;
-               return pushback_token;
-       }
-       return yylex();
-}
-
-/*
- * Push back a single token to be re-read by next plpgsql_yylex() call.
- *
- * NOTE: this does not cause yytext or yylloc to "back up".  Also, it
- * is not a good idea to push back a token other than what you read.
- */
-void
-plpgsql_push_back_token(int token)
-{
-       if (have_pushback_token)
-               elog(ERROR, "cannot push back multiple tokens");
-       pushback_token = token;
-       have_pushback_token = true;
-}
-
-/*
- * Append the function text starting at startlocation and extending to
- * (not including) endlocation onto the existing contents of "buf".
- */
-void
-plpgsql_append_source_text(StringInfo buf,
-                                                  int startlocation, int endlocation)
-{
-       Assert(startlocation <= endlocation);
-       appendBinaryStringInfo(buf, scanorig + startlocation,
-                                                  endlocation - startlocation);
-}
-
-/*
- * plpgsql_scanner_errposition
- *             Report an error cursor position, if possible.
- *
- * This is expected to be used within an ereport() call.  The return value
- * is a dummy (always 0, in fact).
- *
- * Note that this can only be used for messages emitted during initial
- * parsing of a plpgsql function, since it requires the scanorig string
- * to still be available.
- */
-int
-plpgsql_scanner_errposition(int location)
-{
-       int             pos;
-
-       if (location < 0 || scanorig == NULL)
-               return 0;                               /* no-op if location is unknown */
-
-       /* Convert byte offset to character number */
-       pos = pg_mbstrlen_with_len(scanorig, location) + 1;
-       /* And pass it to the ereport mechanism */
-       (void) internalerrposition(pos);
-       /* Also pass the function body string */
-       return internalerrquery(scanorig);
-}
-
-/*
- * plpgsql_yyerror
- *             Report a lexer or grammar error.
- *
- * The message's cursor position is whatever YYLLOC was last set to,
- * ie, the start of the current token if called within yylex(), or the
- * most recently lexed token if called from the grammar.
- * This is OK for syntax error messages from the Bison parser, because Bison
- * parsers report error as soon as the first unparsable token is reached.
- * Beware of using yyerror for other purposes, as the cursor position might
- * be misleading!
- */
-void
-plpgsql_yyerror(const char *message)
-{
-       const char *loc = scanbuf + yylloc;
-
-       if (*loc == YY_END_OF_BUFFER_CHAR)
-       {
-               ereport(ERROR,
-                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                /* translator: %s is typically the translation of "syntax error" */
-                                errmsg("%s at end of input", _(message)),
-                                plpgsql_scanner_errposition(yylloc)));
-       }
-       else
-       {
-               ereport(ERROR,
-                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                /* translator: first %s is typically the translation of "syntax error" */
-                                errmsg("%s at or near \"%s\"", _(message), loc),
-                                plpgsql_scanner_errposition(yylloc)));
-       }
-}
-
-/*
- * Given a location (a byte offset in the function source text),
- * return a line number.
- *
- * We expect that this is typically called for a sequence of increasing
- * location values, so optimize accordingly by tracking the endpoints
- * of the "current" line.
- */
-int
-plpgsql_location_to_lineno(int location)
-{
-       const char *loc;
-
-       if (location < 0 || scanorig == NULL)
-               return 0;                               /* garbage in, garbage out */
-       loc = scanorig + location;
-
-       /* be correct, but not fast, if input location goes backwards */
-       if (loc < cur_line_start)
-               location_lineno_init();
-
-       while (cur_line_end != NULL && loc > cur_line_end)
-       {
-               cur_line_start = cur_line_end + 1;
-               cur_line_num++;
-               cur_line_end = strchr(cur_line_start, '\n');
-       }
-
-       return cur_line_num;
-}
-
-/* initialize or reset the state for plpgsql_location_to_lineno */
-static void
-location_lineno_init(void)
-{
-       cur_line_start = scanorig;
-       cur_line_num = 1;
-
-       /*----------
-        * Hack: skip any initial newline, so that in the common coding layout
-        *              CREATE FUNCTION ... AS $$
-        *                      code body
-        *              $$ LANGUAGE plpgsql;
-        * we will think "line 1" is what the programmer thinks of as line 1.
-        *----------
-        */
-    if (*cur_line_start == '\r')
-        cur_line_start++;
-    if (*cur_line_start == '\n')
-        cur_line_start++;
-
-       cur_line_end = strchr(cur_line_start, '\n');
-}
-
-/* return the most recently computed lineno */
-int
-plpgsql_latest_lineno(void)
-{
-       return cur_line_num;
-}
-
-
-/*
- * Called before any actual parsing is done
- *
- * Note: the passed "str" must remain valid until plpgsql_scanner_finish().
- * Although it is not fed directly to flex, we need the original string
- * to cite in error messages.
- */
-void
-plpgsql_scanner_init(const char *str)
-{
-       Size    slen = strlen(str);
-
-       /*
-        * Reset flex internal state.  Whatever data it might think it has
-        * has long since been pfree'd.
-        */
-       yy_init_globals();
-
-       /*
-        * Make a scan buffer with special termination needed by flex.
-        */
-       scanbuf = (char *) palloc(slen + 2);
-       memcpy(scanbuf, str, slen);
-       scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
-       scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
-
-       /*
-        * scanorig points to the original string, which unlike scanbuf won't
-        * be modified on-the-fly by flex.  Notice that although yytext points
-        * into scanbuf, we rely on being able to apply locations (offsets from
-        * string start) to scanorig as well.
-        */
-       scanorig = str;
-
-       /* Other setup */
-       have_pushback_token = false;
-
-       location_lineno_init();
-
-       BEGIN(INITIAL);
-       plpgsql_LookupIdentifiers = true;
-}
-
-/*
- * Called after parsing is done to clean up after plpgsql_scanner_init()
- */
-void
-plpgsql_scanner_finish(void)
-{
-       /* release storage */
-       yy_delete_buffer(scanbufhandle);
-       pfree(scanbuf);
-       /* avoid leaving any dangling pointers */
-       scanbufhandle = NULL;
-       scanbuf = NULL;
-       scanorig = NULL;
-}
-
-/*
- * Interface functions to make flex use palloc() instead of malloc().
- * It'd be better to make these static, but flex insists otherwise.
- */
-
-void *
-plpgsql_base_yyalloc(yy_size_t bytes)
-{
-       return palloc(bytes);
-}
-
-void *
-plpgsql_base_yyrealloc(void *ptr, yy_size_t bytes)
-{
-       if (ptr)
-               return repalloc(ptr, bytes);
-       else
-               return palloc(bytes);
-}
-
-void
-plpgsql_base_yyfree(void *ptr)
-{
-       if (ptr)
-               pfree(ptr);
-}
diff --git a/src/test/regress/expected/plpgsql.out b/src/test/regress/expected/plpgsql.out

index 16b7907a2f7c838352bfdc61111d46002b462d8a..877ed329a675951ae3ce4a39c671e2f04a7bebd4 100644 (file)
--- a/src/test/regress/expected/plpgsql.out
+++ b/src/test/regress/expected/plpgsql.out
@@ -3889,6 +3889,10 @@ WARNING:  nonstandard use of \\ in a string literal
  LINE 4:   return 'foo\\bar\041baz';
                   ^
  HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
+WARNING:  nonstandard use of \\ in a string literal
+LINE 4:   return 'foo\\bar\041baz';
+                 ^
+HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
  select strtest();
  NOTICE:  foo\bar!baz
  WARNING:  nonstandard use of \\ in a string literal
@@ -4020,3 +4024,19 @@ select * from conflict_test();
  (5 rows)
  
  drop function conflict_test();
+-- Check that an unreserved keyword can be used as a variable name
+create function unreserved_test() returns int as $$
+declare
+  forward int := 21;
+begin
+  forward := forward * 2;
+  return forward;
+end
+$$ language plpgsql;
+select unreserved_test();
+ unreserved_test 
+-----------------
+              42
+(1 row)
+
+drop function unreserved_test();
diff --git a/src/test/regress/sql/plpgsql.sql b/src/test/regress/sql/plpgsql.sql

index c75f037cdbc0a93cf94fd4bb9c05e0a695c46e7f..f0a77469d92abe24ef60ee9ad9104d756759c866 100644 (file)
--- a/src/test/regress/sql/plpgsql.sql
+++ b/src/test/regress/sql/plpgsql.sql
@@ -3189,3 +3189,18 @@ $$ language plpgsql;
  select * from conflict_test();
  
  drop function conflict_test();
+
+-- Check that an unreserved keyword can be used as a variable name
+
+create function unreserved_test() returns int as $$
+declare
+  forward int := 21;
+begin
+  forward := forward * 2;
+  return forward;
+end
+$$ language plpgsql;
+
+select unreserved_test();
+
+drop function unreserved_test();
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm

index 7579bf5e7deceb0b7a121ba0f7573d27a9e5c90d..9e7e6cf0066b4befb663f265cfa7c1e2bf9f0512 100644 (file)
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -3,7 +3,7 @@ package Mkvcbuild;
  #
  # Package that generates build files for msvc build
  #
-# $PostgreSQL: pgsql/src/tools/msvc/Mkvcbuild.pm,v 1.43 2009/10/01 01:58:58 tgl Exp $
+# $PostgreSQL: pgsql/src/tools/msvc/Mkvcbuild.pm,v 1.44 2009/11/12 00:13:00 tgl Exp $
  #
  use Carp;
  use Win32;
@@ -80,7 +80,7 @@ sub mkvcbuild
      $snowball->AddReference($postgres);
  
      my $plpgsql = $solution->AddProject('plpgsql','dll','PLs','src\pl\plpgsql\src');
-    $plpgsql->AddFiles('src\pl\plpgsql\src','scan.l','gram.y');
+    $plpgsql->AddFiles('src\pl\plpgsql\src', 'gram.y');
      $plpgsql->AddReference($postgres);
  
      if ($solution->{options}->{perl})
diff --git a/src/tools/msvc/Project.pm b/src/tools/msvc/Project.pm

index 53b55cd59139c9f9d3b8072c5a3e66f39c03f8d5..15732b146b8d5a257274f1420f0b16f61177c5c4 100644 (file)
--- a/src/tools/msvc/Project.pm
+++ b/src/tools/msvc/Project.pm
@@ -3,7 +3,7 @@ package Project;
  #
  # Package that encapsulates a Visual C++ project file generation
  #
-# $PostgreSQL: pgsql/src/tools/msvc/Project.pm,v 1.20 2009/07/27 07:11:15 mha Exp $
+# $PostgreSQL: pgsql/src/tools/msvc/Project.pm,v 1.21 2009/11/12 00:13:00 tgl Exp $
  #
  use Carp;
  use strict;
@@ -398,7 +398,6 @@ EOF
          {
              my $of = $f;
              $of =~ s/\.l$/.c/;
-            $of =~ s{^src\\pl\\plpgsql\\src\\scan.c$}{src\\pl\\plpgsql\\src\\pl_scan.c};
              print F '>'
                . GenerateCustomTool('Running flex on ' . $f, 'src\tools\msvc\pgflex.bat ' . $f,$of)
                . '</File>' . "\n";
diff --git a/src/tools/msvc/clean.bat b/src/tools/msvc/clean.bat

index 9beb6f33c08e19b3542ee36df27581b371576b0b..fc12cc913680697d75f0b85a9e52bb3843280229 100755 (executable)
--- a/src/tools/msvc/clean.bat
+++ b/src/tools/msvc/clean.bat
@@ -1,5 +1,5 @@
  @echo off
-REM $PostgreSQL: pgsql/src/tools/msvc/clean.bat,v 1.15 2008/08/30 02:32:24 tgl Exp $
+REM $PostgreSQL: pgsql/src/tools/msvc/clean.bat,v 1.16 2009/11/12 00:13:00 tgl Exp $
  
  set DIST=0
  if "%1"=="dist" set DIST=1
@@ -49,7 +49,6 @@ if %DIST%==1 if exist src\interfaces\ecpg\preproc\preproc.h del /q src\interface
  if exist src\port\pg_config_paths.h del /q src\port\pg_config_paths.h
  
  if exist src\pl\plperl\spi.c del /q src\pl\plperl\spi.c
-if %DIST%==1 if exist src\pl\plpgsql\src\pl_scan.c del /q src\pl\plpgsql\src\pl_scan.c
  if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.c del /q src\pl\plpgsql\src\pl_gram.c
  if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.h del /q src\pl\plpgsql\src\pl_gram.h
  
diff --git a/src/tools/msvc/pgflex.bat b/src/tools/msvc/pgflex.bat

index f7427117e346d15e68f9a20306fbdc52aacbeee2..00c675723116d8872f4264908afe762a6dbb3a5a 100755 (executable)
--- a/src/tools/msvc/pgflex.bat
+++ b/src/tools/msvc/pgflex.bat
@@ -1,5 +1,5 @@
  @echo off
-REM $PostgreSQL: pgsql/src/tools/msvc/pgflex.bat,v 1.5 2007/12/19 12:29:36 mha Exp $
+REM $PostgreSQL: pgsql/src/tools/msvc/pgflex.bat,v 1.6 2009/11/12 00:13:00 tgl Exp $
  
  IF NOT EXIST src\tools\msvc\buildenv.pl goto nobuildenv
  perl -e "require 'src/tools/msvc/buildenv.pl'; while(($k,$v) = each %ENV) { print qq[\@SET $k=$v\n]; }" > bldenv.bat
@@ -13,7 +13,6 @@ if errorlevel 1 goto noflex
  if "%1" == "src\backend\parser\scan.l" call :generate %1 src\backend\parser\scan.c -CF
  if "%1" == "src\backend\bootstrap\bootscanner.l" call :generate %1 src\backend\bootstrap\bootscanner.c
  if "%1" == "src\backend\utils\misc\guc-file.l" call :generate %1 src\backend\utils\misc\guc-file.c
-if "%1" == "src\pl\plpgsql\src\scan.l" call :generate %1 src\pl\plpgsql\src\pl_scan.c
  if "%1" == "src\interfaces\ecpg\preproc\pgc.l" call :generate %1 src\interfaces\ecpg\preproc\pgc.c
  if "%1" == "src\bin\psql\psqlscan.l" call :generate %1 src\bin\psql\psqlscan.c
  if "%1" == "contrib\cube\cubescan.l" call :generate %1 contrib\cube\cubescan.c
author	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 12 Nov 2009 00:13:00 +0000 (00:13 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 12 Nov 2009 00:13:00 +0000 (00:13 +0000)
src/backend/parser/scan.l		patch \| blob \| history
src/backend/parser/scansup.c		patch \| blob \| history
src/bin/psql/psqlscan.l		patch \| blob \| history
src/pl/plpgsql/src/.cvsignore		patch \| blob \| history
src/pl/plpgsql/src/Makefile		patch \| blob \| history
src/pl/plpgsql/src/gram.y		patch \| blob \| history
src/pl/plpgsql/src/nls.mk		patch \| blob \| history
src/pl/plpgsql/src/pl_comp.c		patch \| blob \| history
src/pl/plpgsql/src/pl_funcs.c		patch \| blob \| history
src/pl/plpgsql/src/pl_scanner.c	[new file with mode: 0644]	patch \| blob
src/pl/plpgsql/src/plpgsql.h		patch \| blob \| history
src/pl/plpgsql/src/scan.l	[deleted file]	patch \| blob \| history
src/test/regress/expected/plpgsql.out		patch \| blob \| history
src/test/regress/sql/plpgsql.sql		patch \| blob \| history
src/tools/msvc/Mkvcbuild.pm		patch \| blob \| history
src/tools/msvc/Project.pm		patch \| blob \| history
src/tools/msvc/clean.bat		patch \| blob \| history
src/tools/msvc/pgflex.bat		patch \| blob \| history