]> granicus.if.org Git - postgresql/commitdiff
Alter plpgsql's lexer so that yylineno and yymore are not used. This
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 5 May 2003 16:46:28 +0000 (16:46 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 5 May 2003 16:46:28 +0000 (16:46 +0000)
avoids 'input buffer overflow' failure on long literals, improves
performance, gives the right answer for line position in functions
containing multiline literals, suppresses annoying compiler warnings,
and generally is so much better I wonder why we didn't do it before.

src/pl/plpgsql/src/gram.y
src/pl/plpgsql/src/pl_comp.c
src/pl/plpgsql/src/plpgsql.h
src/pl/plpgsql/src/scan.l

index dd15cf80a2f83f6dd5a944940b54a163c686fa84..ef7b934f25e2b17c3bf9480fe55dc6dac8eb1f6f 100644 (file)
@@ -4,7 +4,7 @@
  *                                               procedural language
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/pl/plpgsql/src/gram.y,v 1.42 2003/04/27 22:21:22 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/pl/plpgsql/src/gram.y,v 1.43 2003/05/05 16:46:27 tgl Exp $
  *
  *       This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -334,7 +334,7 @@ decl_statement      : decl_varname decl_const decl_datatype decl_notnull decl_defval
                                                        /* Composite type --- treat as rowtype */
                                                        PLpgSQL_row        *row;
 
-                                                       row = build_rowtype($3->typrelid);
+                                                       row = plpgsql_build_rowtype($3->typrelid);
                                                        row->dtype              = PLPGSQL_DTYPE_ROW;
                                                        row->refname    = $1.name;
                                                        row->lineno             = $1.lineno;
@@ -486,7 +486,7 @@ decl_cursor_arglist : decl_cursor_arg
 
                                                new->dtype = PLPGSQL_DTYPE_ROW;
                                                new->refname = strdup("*internal*");
-                                               new->lineno = yylineno;
+                                               new->lineno = plpgsql_scanner_lineno();
                                                new->rowtypeclass = InvalidOid;
                                                /*
                                                 * We make temporary fieldnames/varnos arrays that
@@ -553,7 +553,7 @@ decl_aliasitem      : T_WORD
                                                nsi = plpgsql_ns_lookup(name, NULL);
                                                if (nsi == NULL)
                                                {
-                                                       plpgsql_error_lineno = yylineno;
+                                                       plpgsql_error_lineno = plpgsql_scanner_lineno();
                                                        elog(ERROR, "function has no parameter %s", name);
                                                }
 
@@ -578,7 +578,7 @@ decl_varname        : T_WORD
                                                plpgsql_convert_ident(yytext, &name, 1);
                                                /* name should be malloc'd for use as varname */
                                                $$.name = strdup(name);
-                                               $$.lineno  = yylineno;
+                                               $$.lineno  = plpgsql_scanner_lineno();
                                                pfree(name);
                                        }
                                ;
@@ -625,7 +625,7 @@ decl_defval         : ';'
                                                PLpgSQL_dstring ds;
                                                PLpgSQL_expr    *expr;
 
-                                               lno = yylineno;
+                                               lno = plpgsql_scanner_lineno();
                                                expr = malloc(sizeof(PLpgSQL_expr));
                                                plpgsql_dstring_init(&ds);
                                                plpgsql_dstring_append(&ds, "SELECT ");
@@ -1034,7 +1034,7 @@ fori_varname      : T_VARIABLE
                                                plpgsql_convert_ident(yytext, &name, 1);
                                                /* name should be malloc'd for use as varname */
                                                $$.name = strdup(name);
-                                               $$.lineno  = yylineno;
+                                               $$.lineno  = plpgsql_scanner_lineno();
                                                pfree(name);
                                        }
                                | T_WORD
@@ -1044,7 +1044,7 @@ fori_varname      : T_VARIABLE
                                                plpgsql_convert_ident(yytext, &name, 1);
                                                /* name should be malloc'd for use as varname */
                                                $$.name = strdup(name);
-                                               $$.lineno  = yylineno;
+                                               $$.lineno  = plpgsql_scanner_lineno();
                                                pfree(name);
                                        }
                                ;
@@ -1405,7 +1405,7 @@ stmt_open         : K_OPEN lno cursor_varptr
 
                                                                if (tok != '(')
                                                                {
-                                                                       plpgsql_error_lineno = yylineno;
+                                                                       plpgsql_error_lineno = plpgsql_scanner_lineno();
                                                                        elog(ERROR, "cursor %s has arguments",
                                                                                 $3->refname);
                                                                }
@@ -1427,7 +1427,7 @@ stmt_open         : K_OPEN lno cursor_varptr
 
                                                                if (strncmp(cp, "SELECT", 6) != 0)
                                                                {
-                                                                       plpgsql_error_lineno = yylineno;
+                                                                       plpgsql_error_lineno = plpgsql_scanner_lineno();
                                                                        elog(ERROR, "expected 'SELECT (', got '%s' (internal error)",
                                                                                 new->argquery->query);
                                                                }
@@ -1436,7 +1436,7 @@ stmt_open         : K_OPEN lno cursor_varptr
                                                                        cp++;
                                                                if (*cp != '(')
                                                                {
-                                                                       plpgsql_error_lineno = yylineno;
+                                                                       plpgsql_error_lineno = plpgsql_scanner_lineno();
                                                                        elog(ERROR, "expected 'SELECT (', got '%s' (internal error)",
                                                                                 new->argquery->query);
                                                                }
@@ -1454,13 +1454,13 @@ stmt_open               : K_OPEN lno cursor_varptr
 
                                                                if (tok == '(')
                                                                {
-                                                                       plpgsql_error_lineno = yylineno;
+                                                                       plpgsql_error_lineno = plpgsql_scanner_lineno();
                                                                        elog(ERROR, "cursor %s has no arguments", $3->refname);
                                                                }
                                                                
                                                                if (tok != ';')
                                                                {
-                                                                       plpgsql_error_lineno = yylineno;
+                                                                       plpgsql_error_lineno = plpgsql_scanner_lineno();
                                                                        elog(ERROR, "syntax error at \"%s\"", yytext);
                                                                }
                                                        }
@@ -1502,7 +1502,7 @@ cursor_varptr     : T_VARIABLE
 
                                                if (((PLpgSQL_var *) yylval.variable)->datatype->typoid != REFCURSOROID)
                                                {
-                                                       plpgsql_error_lineno = yylineno;
+                                                       plpgsql_error_lineno = plpgsql_scanner_lineno();
                                                        elog(ERROR, "%s must be of type cursor or refcursor",
                                                                 ((PLpgSQL_var *) yylval.variable)->refname);
                                                }
@@ -1517,7 +1517,7 @@ cursor_variable   : T_VARIABLE
 
                                                if (((PLpgSQL_var *) yylval.variable)->datatype->typoid != REFCURSOROID)
                                                {
-                                                       plpgsql_error_lineno = yylineno;
+                                                       plpgsql_error_lineno = plpgsql_scanner_lineno();
                                                        elog(ERROR, "%s must be of type refcursor",
                                                                 ((PLpgSQL_var *) yylval.variable)->refname);
                                                }
@@ -1583,8 +1583,7 @@ opt_lblname               : T_WORD
 
 lno                            :
                                        {
-                                               plpgsql_error_lineno = yylineno;
-                                               $$ = yylineno;
+                                               $$ = plpgsql_error_lineno = plpgsql_scanner_lineno();
                                        }
                                ;
 
@@ -1618,7 +1617,7 @@ read_sql_construct(int until,
        char                            buf[32];
        PLpgSQL_expr            *expr;
 
-       lno = yylineno;
+       lno = plpgsql_scanner_lineno();
        plpgsql_dstring_init(&ds);
        plpgsql_dstring_append(&ds, (char *) sqlstart);
 
@@ -1690,7 +1689,7 @@ read_datatype(int tok)
        bool                            needspace = false;
        int                                     parenlevel = 0;
 
-       lno = yylineno;
+       lno = plpgsql_scanner_lineno();
 
        /* Often there will be a lookahead token, but if not, get one */
        if (tok == YYEMPTY)
@@ -1769,14 +1768,14 @@ make_select_stmt(void)
                        break;
                if (tok == 0)
                {
-                       plpgsql_error_lineno = yylineno;
+                       plpgsql_error_lineno = plpgsql_scanner_lineno();
                        elog(ERROR, "unexpected end of file");
                }
                if (tok == K_INTO)
                {
                        if (have_into)
                        {
-                               plpgsql_error_lineno = yylineno;
+                               plpgsql_error_lineno = plpgsql_scanner_lineno();
                                elog(ERROR, "INTO specified more than once");
                        }
                        tok = yylex();
@@ -1814,7 +1813,7 @@ make_select_stmt(void)
                                                                break;
 
                                                        default:
-                                                               plpgsql_error_lineno = yylineno;
+                                                               plpgsql_error_lineno = plpgsql_scanner_lineno();
                                                                elog(ERROR, "plpgsql: %s is not a variable",
                                                                         yytext);
                                                }
@@ -1824,7 +1823,7 @@ make_select_stmt(void)
                                        row = malloc(sizeof(PLpgSQL_row));
                                        row->dtype = PLPGSQL_DTYPE_ROW;
                                        row->refname = strdup("*internal*");
-                                       row->lineno = yylineno;
+                                       row->lineno = plpgsql_scanner_lineno();
                                        row->rowtypeclass = InvalidOid;
                                        row->nfields = nfields;
                                        row->fieldnames = malloc(sizeof(char *) * nfields);
@@ -1945,7 +1944,7 @@ make_fetch_stmt(void)
                                                        break;
 
                                                default:
-                                                       plpgsql_error_lineno = yylineno;
+                                                       plpgsql_error_lineno = plpgsql_scanner_lineno();
                                                        elog(ERROR, "plpgsql: %s is not a variable",
                                                                 yytext);
                                        }
@@ -1955,7 +1954,7 @@ make_fetch_stmt(void)
                                row = malloc(sizeof(PLpgSQL_row));
                                row->dtype = PLPGSQL_DTYPE_ROW;
                                row->refname = strdup("*internal*");
-                               row->lineno = yylineno;
+                               row->lineno = plpgsql_scanner_lineno();
                                row->rowtypeclass = InvalidOid;
                                row->nfields = nfields;
                                row->fieldnames = malloc(sizeof(char *) * nfields);
@@ -2028,7 +2027,7 @@ check_assignable(PLpgSQL_datum *datum)
                case PLPGSQL_DTYPE_VAR:
                        if (((PLpgSQL_var *) datum)->isconst)
                        {
-                               plpgsql_error_lineno = yylineno;
+                               plpgsql_error_lineno = plpgsql_scanner_lineno();
                                elog(ERROR, "%s is declared CONSTANT",
                                         ((PLpgSQL_var *) datum)->refname);
                        }
index 5c88761e05dd11fa0c69fe2d5539f809ed9c8eec..d62b237f1145975e0e381a7dfb5edb496e2e6a6e 100644 (file)
@@ -3,7 +3,7 @@
  *                       procedural language
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.57 2003/04/27 22:21:22 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.58 2003/05/05 16:46:27 tgl Exp $
  *
  *       This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -141,7 +141,8 @@ plpgsql_compile(Oid fn_oid, int functype)
        procStruct = (Form_pg_proc) GETSTRUCT(procTup);
        proc_source = DatumGetCString(DirectFunctionCall1(textout,
                                                                  PointerGetDatum(&procStruct->prosrc)));
-       plpgsql_setinput(proc_source, functype);
+       plpgsql_scanner_init(proc_source, functype);
+       pfree(proc_source);
 
        plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname));
        plpgsql_error_lineno = 0;
@@ -258,7 +259,7 @@ plpgsql_compile(Oid fn_oid, int functype)
                                         * For tuple type parameters, we set up a record of
                                         * that type
                                         */
-                                       row = build_rowtype(typeStruct->typrelid);
+                                       row = plpgsql_build_rowtype(typeStruct->typrelid);
 
                                        row->refname = strdup(buf);
 
@@ -496,6 +497,8 @@ plpgsql_compile(Oid fn_oid, int functype)
        if (parse_rc != 0)
                elog(ERROR, "plpgsql: parser returned %d ???", parse_rc);
 
+       plpgsql_scanner_finish();
+
        /*
         * If that was successful, complete the functions info.
         */
@@ -1200,7 +1203,7 @@ plpgsql_parse_wordrowtype(char *word)
        /*
         * Build and return the complete row definition
         */
-       plpgsql_yylval.row = build_rowtype(classOid);
+       plpgsql_yylval.row = plpgsql_build_rowtype(classOid);
 
        pfree(cp[0]);
        pfree(cp[1]);
@@ -1241,7 +1244,7 @@ plpgsql_parse_dblwordrowtype(char *word)
        /*
         * Build and return the complete row definition
         */
-       plpgsql_yylval.row = build_rowtype(classOid);
+       plpgsql_yylval.row = plpgsql_build_rowtype(classOid);
 
        pfree(cp);
 
@@ -1252,7 +1255,7 @@ plpgsql_parse_dblwordrowtype(char *word)
  * Build a rowtype data structure given the pg_class OID.
  */
 PLpgSQL_row *
-build_rowtype(Oid classOid)
+plpgsql_build_rowtype(Oid classOid)
 {
        PLpgSQL_row *row;
        HeapTuple       classtup;
@@ -1494,6 +1497,6 @@ plpgsql_add_initdatums(int **varnos)
 void
 plpgsql_yyerror(const char *s)
 {
-       plpgsql_error_lineno = plpgsql_yylineno;
+       plpgsql_error_lineno = plpgsql_scanner_lineno();
        elog(ERROR, "%s at or near \"%s\"", s, plpgsql_yytext);
 }
index 1140cebddccf44cb911db2011ff540f0d42539f0..e10dd18ac8f64238bfe5952e43d074a9a059a72d 100644 (file)
@@ -3,7 +3,7 @@
  *                       procedural language
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.35 2003/04/27 22:21:22 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.36 2003/05/05 16:46:28 tgl Exp $
  *
  *       This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -573,13 +573,10 @@ extern PLpgSQL_datum **plpgsql_Datums;
 extern int     plpgsql_error_lineno;
 extern char *plpgsql_error_funcname;
 
-/* linkage to the real yytext and yylineno variables */
+/* linkage to the real yytext variable */
 extern char *plpgsql_base_yytext;
 
 #define plpgsql_yytext plpgsql_base_yytext
-extern int     plpgsql_base_yylineno;
-
-#define plpgsql_yylineno plpgsql_base_yylineno
 
 extern PLpgSQL_function *plpgsql_curr_compile;
 
@@ -601,7 +598,7 @@ extern int  plpgsql_parse_tripwordtype(char *word);
 extern int     plpgsql_parse_wordrowtype(char *word);
 extern int     plpgsql_parse_dblwordrowtype(char *word);
 extern PLpgSQL_type *plpgsql_parse_datatype(char *string);
-extern PLpgSQL_row *build_rowtype(Oid classOid);
+extern PLpgSQL_row *plpgsql_build_rowtype(Oid classOid);
 extern void plpgsql_adddatum(PLpgSQL_datum * new);
 extern int     plpgsql_add_initdatums(int **varnos);
 extern void plpgsql_yyerror(const char *s);
@@ -660,6 +657,8 @@ extern int  plpgsql_yyparse(void);
 extern int     plpgsql_base_yylex(void);
 extern int     plpgsql_yylex(void);
 extern void plpgsql_push_back_token(int token);
-extern void plpgsql_setinput(char *s, int functype);
+extern int     plpgsql_scanner_lineno(void);
+extern void plpgsql_scanner_init(const char *str, int functype);
+extern void plpgsql_scanner_finish(void);
 
 #endif   /* PLPGSQL_H */
index 7e89ca83098b8d9e679ed8391ece45ec3d9f0e79..5f7e162f08e08f1293a39a0409b105b52e8cc02e 100644 (file)
@@ -4,7 +4,7 @@
  *                       procedural language
  *
  * IDENTIFICATION
- *    $Header: /cvsroot/pgsql/src/pl/plpgsql/src/Attic/scan.l,v 1.24 2002/11/07 06:06:17 tgl Exp $
+ *    $Header: /cvsroot/pgsql/src/pl/plpgsql/src/Attic/scan.l,v 1.25 2003/05/05 16:46:28 tgl Exp $
  *
  *    This software is copyrighted by Jan Wieck - Hamburg.
  *
 #include "plpgsql.h"
 
 
-static char    *plpgsql_source;
-static int     plpgsql_bytes_left;
+/* No reason to constrain amount of data slurped */
+#define YY_READ_BUF_SIZE 16777216
+
+/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
+#define fprintf(file, fmt, msg)  ereport(FATAL, (errmsg_internal("%s", msg)))
+
+/* Handles to the buffer that the lexer uses internally */
+static YY_BUFFER_STATE scanbufhandle;
+static char *scanbuf;
+
 static int     scanner_functype;
 static int     scanner_typereported;
 static int     pushback_token;
 static bool have_pushback_token;
 static int     lookahead_token;        
 static bool have_lookahead_token;
+static const char *cur_line_start;
+static int     cur_line_num;
 
 int    plpgsql_SpaceScanned = 0;
-
-static void plpgsql_input(char *buf, int *result, int max);
-
-#define YY_INPUT(buf,res,max)  plpgsql_input(buf, &res, max)
 %}
 
 %option 8bit
@@ -60,7 +66,6 @@ static void plpgsql_input(char *buf, int *result, int max);
 %option nounput
 %option noyywrap
 
-%option yylineno
 %option case-insensitive
 
 
@@ -78,11 +83,12 @@ space                       [ \t\n\r\f]
 
 %%
     /* ----------
-     * Local variable in scanner to remember where
+     * Local variables in scanner to remember where
      * a string or comment started
      * ----------
      */
     int        start_lineno = 0;
+       char *start_charpos = NULL;
 
     /* ----------
      * Reset the state when entering the scanner
@@ -185,7 +191,7 @@ dump                        { return O_DUMP;                        }
 {digit}+               { return T_NUMBER;                      }
 
 \".                            {
-                               plpgsql_error_lineno = yylineno;
+                               plpgsql_error_lineno = plpgsql_scanner_lineno();
                                elog(ERROR, "unterminated quoted identifier");
                        }
 
@@ -201,7 +207,7 @@ dump                        { return O_DUMP;                        }
      */
 --[^\r\n]*             ;
 
-\/\*                   { start_lineno = yylineno;
+\/\*                   { start_lineno = plpgsql_scanner_lineno();
                          BEGIN IN_COMMENT;
                        }
 <IN_COMMENT>\*\/       { BEGIN INITIAL; plpgsql_SpaceScanned = 1; }
@@ -214,22 +220,30 @@ dump                      { return O_DUMP;                        }
 
     /* ----------
      * Collect anything inside of ''s and return one STRING
+        *
+        * Hacking yytext/yyleng here lets us avoid using yymore(), which is
+        * a win for performance.  It's safe because we know the underlying
+        * input buffer is not changing.
      * ----------
      */
-'                      { start_lineno = yylineno;
+'                      {
+                         start_lineno = plpgsql_scanner_lineno();
+                         start_charpos = yytext;
                          BEGIN IN_STRING;
-                         yymore();
                        }
-<IN_STRING>\\.         |
-<IN_STRING>''          { yymore();                             }
-<IN_STRING>'           { BEGIN INITIAL;
+<IN_STRING>\\.         { }
+<IN_STRING>''          { }
+<IN_STRING>'           {
+                         yyleng -= (yytext - start_charpos);
+                         yytext = start_charpos;
+                         BEGIN INITIAL;
                          return T_STRING;
                        }
 <IN_STRING><<EOF>>     {
                                plpgsql_error_lineno = start_lineno;
                                elog(ERROR, "unterminated string");
                        }
-<IN_STRING>[^'\\]*     { yymore();                             }
+<IN_STRING>[^'\\]*     { }
 
     /* ----------
      * Any unmatched character is returned as is
@@ -240,26 +254,6 @@ dump                       { return O_DUMP;                        }
 %%
 
 
-static void
-plpgsql_input(char *buf, int *result, int max)
-{
-    int                n = max;
-
-    if (n > plpgsql_bytes_left)
-        n = plpgsql_bytes_left;
-
-    if (n == 0)
-       {
-        *result = YY_NULL;
-               return;
-    }
-
-    *result = n;
-    memcpy(buf, plpgsql_source, n);
-    plpgsql_source += n;
-    plpgsql_bytes_left -= n;
-}
-
 /*
  * This is the yylex routine called from outside. It exists to provide
  * a pushback facility, as well as to allow us to parse syntax that
@@ -319,17 +313,35 @@ plpgsql_push_back_token(int token)
        have_pushback_token = true;
 }
 
+/*
+ * Get the line number at which the current token ends.  This substitutes
+ * for flex's very poorly implemented yylineno facility.
+ *
+ * We assume that flex has written a '\0' over the character following the
+ * current token in scanbuf.  So, we just have to count the '\n' characters
+ * before that.  We optimize this a little by keeping track of the last
+ * '\n' seen so far.
+ */
+int
+plpgsql_scanner_lineno(void)
+{
+       const char *c;
+
+       while ((c = strchr(cur_line_start, '\n')) != NULL)
+       {
+               cur_line_start = c + 1;
+               cur_line_num++;
+       }
+       return cur_line_num;
+}
 
 /*
- * Initialize the scanner for new input.
+ * Called before any actual parsing is done
  */
 void
-plpgsql_setinput(char *source, int functype)
+plpgsql_scanner_init(const char *str, int functype)
 {
-    yyrestart(NULL);
-    yylineno = 1;
-
-    plpgsql_source = source;
+       Size    slen;
 
        /*----------
         * Hack: skip any initial newline, so that in the common coding layout
@@ -339,16 +351,47 @@ plpgsql_setinput(char *source, int functype)
         * we will think "line 1" is what the programmer thinks of as line 1.
         *----------
         */
-    if (*plpgsql_source == '\r')
-        plpgsql_source++;
-    if (*plpgsql_source == '\n')
-        plpgsql_source++;
+    if (*str == '\r')
+        str++;
+    if (*str == '\n')
+        str++;
 
-    plpgsql_bytes_left = strlen(plpgsql_source);
+       slen = strlen(str);
 
+       /*
+        * Might be left over after ereport()
+        */
+       if (YY_CURRENT_BUFFER)
+               yy_delete_buffer(YY_CURRENT_BUFFER);
+
+       /*
+        * Make a scan buffer with special termination needed by flex.
+        */
+       scanbuf = palloc(slen + 2);
+       memcpy(scanbuf, str, slen);
+       scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
+       scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
+
+       /* Other setup */
     scanner_functype     = functype;
     scanner_typereported = 0;
 
        have_pushback_token = false;
        have_lookahead_token = false;
+
+       cur_line_start = scanbuf;
+       cur_line_num = 1;
+
+       BEGIN(INITIAL);
+}
+
+
+/*
+ * Called after parsing is done to clean up after plpgsql_scanner_init()
+ */
+void
+plpgsql_scanner_finish(void)
+{
+       yy_delete_buffer(scanbufhandle);
+       pfree(scanbuf);
 }