]> granicus.if.org Git - postgresql/commitdiff
SQL commands in pgbench scripts are now ended by semicolons, not newlines.
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 20 Mar 2016 16:58:44 +0000 (12:58 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 20 Mar 2016 16:58:51 +0000 (12:58 -0400)
To allow multiline SQL commands in scripts, adopt the same rules psql uses
to decide what is the end of a SQL command, to wit, an unquoted semicolon
not encased in parentheses.  Do this by importing the same flex lexer that
psql uses, since coping with stuff like dollar-quoted literals is hard to
get right without going the full nine yards.

This makes use of the infrastructure added in commit 0ea9efbe9ec1bf07 to
support independently-written flex lexers scanning the same PsqlScanState
input-buffer data structure.  Since that infrastructure isn't very
friendly to ad-hoc parsing code such as strtok(), improve exprscan.l
so that it can parse either whitespace-separated words or expression
tokens, on demand, and rewrite pgbench.c's backslash-command parsing
code to always use the lexer to fetch tokens.

It's still the case that pgbench backslash commands extend to the end
of the line, no more and no less.  That could be changed in a fairly
localized way now, and there was some interest in doing so, but it
seems like material for a separate patch.

In passing, make some marginal cleanups in syntax error reporting,
const-ify a few data structures that could use it, and run some of
this code through pgindent.

I can't tell whether the MSVC build scripts need to be taught explicitly
about the changes here or not, but the buildfarm will soon tell us.

Kyotaro Horiguchi and Tom Lane

doc/src/sgml/ref/pgbench.sgml
src/bin/pgbench/Makefile
src/bin/pgbench/exprparse.y
src/bin/pgbench/exprscan.l
src/bin/pgbench/pgbench.c
src/bin/pgbench/pgbench.h

index dd3fb1dce0faeafd1b01c633c76e4474d4f3a322..c6d1454b1e955f67b9920c21f65e813f8b7f6d9f 100644 (file)
@@ -743,13 +743,25 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
   </para>
 
   <para>
-   The format of a script file is one SQL command per line; multiline
-   SQL commands are not supported.  Empty lines and lines beginning with
-   <literal>--</> are ignored.  Script file lines can also be
+   A script file contains one or more SQL commands terminated by
+   semicolons.  Empty lines and lines beginning with
+   <literal>--</> are ignored.  Script files can also contain
    <quote>meta commands</>, which are interpreted by <application>pgbench</>
    itself, as described below.
   </para>
 
+  <note>
+   <para>
+    Before <productname>PostgreSQL</> 9.6, SQL commands in script files
+    were terminated by newlines, and so they could not be continued across
+    lines.  Now a semicolon is <emphasis>required</> to separate consecutive
+    SQL commands (though a SQL command does not need one if it is followed
+    by a meta command).  If you need to create a script file that works with
+    both old and new versions of <application>pgbench</>, be sure to write
+    each SQL command on a single line ending with a semicolon.
+   </para>
+  </note>
+
   <para>
    There is a simple variable-substitution facility for script files.
    Variables can be set by the command-line <option>-D</> option,
@@ -789,7 +801,8 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
    </table>
 
   <para>
-   Script file meta commands begin with a backslash (<literal>\</>).
+   Script file meta commands begin with a backslash (<literal>\</>) and
+   extend to the end of the line.
    Arguments to a meta command are separated by white space.
    These meta commands are supported:
   </para>
@@ -806,9 +819,9 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
       from <replaceable>expression</>.
       The expression may contain integer constants such as <literal>5432</>,
       references to variables <literal>:</><replaceable>variablename</>,
-      and expressions composed of unary (<literal>-</>) or binary operators
+      unary operators (<literal>+</>, <literal>-</>) and binary operators
       (<literal>+</>, <literal>-</>, <literal>*</>, <literal>/</>,
-      <literal>%</>) with their usual associativity,
+      <literal>%</>) with their usual precedence and associativity,
       <link linkend="pgbench-builtin-functions">function calls</>, and
       parentheses.
      </para>
@@ -938,14 +951,15 @@ f(x) = exp(-parameter * (x - min) / (max - min + 1)) / (1.0 - exp(-parameter))
     <listitem>
      <para>
       Sets variable <replaceable>varname</> to the result of the shell command
-      <replaceable>command</>. The command must return an integer value
-      through its standard output.
+      <replaceable>command</> with the given <replaceable>argument</>(s).
+      The command must return an integer value through its standard output.
      </para>
 
-     <para><replaceable>argument</> can be either a text constant or a
-      <literal>:</><replaceable>variablename</> reference to a variable of
-      any types. If you want to use <replaceable>argument</> starting with
-      colons, you need to add an additional colon at the beginning of
+     <para>
+      <replaceable>command</> and each <replaceable>argument</> can be either
+      a text constant or a <literal>:</><replaceable>variablename</> reference
+      to a variable. If you want to use an <replaceable>argument</> starting
+      with a colon, write an additional colon at the beginning of
       <replaceable>argument</>.
      </para>
 
@@ -964,7 +978,8 @@ f(x) = exp(-parameter * (x - min) / (max - min + 1)) / (1.0 - exp(-parameter))
 
     <listitem>
      <para>
-      Same as <literal>\setshell</literal>, but the result is ignored.
+      Same as <literal>\setshell</literal>, but the result of the command
+      is discarded.
      </para>
 
      <para>
@@ -1010,7 +1025,7 @@ END;
 
    <para>
      The following functions are built into <application>pgbench</> and
-     may be used in conjunction with
+     may be used in expressions appearing in
      <link linkend="pgbench-metacommand-set"><literal>\set</literal></link>.
    </para>
 
index 560bfea6c4f0f14bd4e222f598d5a6b7820478c3..e5d22c2e7c254a925e7095b564d9c6930293b3bc 100644 (file)
@@ -7,9 +7,10 @@ subdir = src/bin/pgbench
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = pgbench.o exprparse.o $(WIN32RES)
+OBJS = pgbench.o exprparse.o psqlscan.o $(WIN32RES)
 
-override CPPFLAGS := -I. -I$(srcdir) -I$(libpq_srcdir) $(CPPFLAGS)
+override CPPFLAGS := -I. -I$(srcdir) -I$(libpq_srcdir) \
+       -I$(top_srcdir)/src/bin/psql $(CPPFLAGS)
 
 ifneq ($(PORTNAME), win32)
 override CFLAGS += $(PTHREAD_CFLAGS)
@@ -24,6 +25,15 @@ pgbench: $(OBJS) | submake-libpq submake-libpgport
 # exprscan is compiled as part of exprparse
 exprparse.o: exprscan.c
 
+# we import psqlscan.o as-is from psql
+submake-psqlscan:
+       $(MAKE) -C $(top_builddir)/src/bin/psql psqlscan.o
+
+psqlscan.o: | submake-psqlscan
+       rm -f $@ && $(LN_S) $(top_builddir)/src/bin/psql/psqlscan.o .
+
+.PHONY: submake-psqlscan
+
 distprep: exprparse.c exprscan.c
 
 install: all installdirs
index 55fda5b254c224b804514367b7e742659987582b..5d0d97272f7cf7877befaebd8e8e185004657136 100644 (file)
@@ -22,8 +22,8 @@ static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
 static PgBenchExpr *make_integer_constant(int64 ival);
 static PgBenchExpr *make_variable(char *varname);
 static PgBenchExpr *make_op(yyscan_t yyscanner, const char *operator,
-                                                       PgBenchExpr *lexpr, PgBenchExpr *rexpr);
-static int find_func(yyscan_t yyscanner, const char *fname);
+               PgBenchExpr *lexpr, PgBenchExpr *rexpr);
+static int     find_func(yyscan_t yyscanner, const char *fname);
 static PgBenchExpr *make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args);
 
 %}
@@ -114,28 +114,49 @@ make_op(yyscan_t yyscanner, const char *operator,
  * List of available functions:
  * - fname: function name
  * - nargs: number of arguments
- *          -1 is a special value for min & max meaning #args >= 1
+ *                     -1 is a special value for min & max meaning #args >= 1
  * - tag: function identifier from PgBenchFunction enum
  */
-static struct
+static const struct
 {
-       char * fname;
-       int nargs;
+       const char *fname;
+       int                     nargs;
        PgBenchFunction tag;
-} PGBENCH_FUNCTIONS[] = {
+}      PGBENCH_FUNCTIONS[] =
+{
        /* parsed as operators, executed as functions */
-       { "+", 2, PGBENCH_ADD },
-       { "-", 2, PGBENCH_SUB },
-       { "*", 2, PGBENCH_MUL },
-       { "/", 2, PGBENCH_DIV },
-       { "%", 2, PGBENCH_MOD },
+       {
+               "+", 2, PGBENCH_ADD
+       },
+       {
+               "-", 2, PGBENCH_SUB
+       },
+       {
+               "*", 2, PGBENCH_MUL
+       },
+       {
+               "/", 2, PGBENCH_DIV
+       },
+       {
+               "%", 2, PGBENCH_MOD
+       },
        /* actual functions */
-       { "abs", 1, PGBENCH_ABS },
-       { "min", -1, PGBENCH_MIN },
-       { "max", -1, PGBENCH_MAX },
-       { "debug", 1, PGBENCH_DEBUG },
+       {
+               "abs", 1, PGBENCH_ABS
+       },
+       {
+               "min", -1, PGBENCH_MIN
+       },
+       {
+               "max", -1, PGBENCH_MAX
+       },
+       {
+               "debug", 1, PGBENCH_DEBUG
+       },
        /* keep as last array element */
-       { NULL, 0, 0 }
+       {
+               NULL, 0, 0
+       }
 };
 
 /*
@@ -147,7 +168,7 @@ static struct
 static int
 find_func(yyscan_t yyscanner, const char *fname)
 {
-       int i = 0;
+       int                     i = 0;
 
        while (PGBENCH_FUNCTIONS[i].fname)
        {
@@ -166,7 +187,7 @@ find_func(yyscan_t yyscanner, const char *fname)
 static PgBenchExprList *
 make_elist(PgBenchExpr *expr, PgBenchExprList *list)
 {
-       PgBenchExprLink * cons;
+       PgBenchExprLink *cons;
 
        if (list == NULL)
        {
@@ -193,8 +214,8 @@ make_elist(PgBenchExpr *expr, PgBenchExprList *list)
 static int
 elist_length(PgBenchExprList *list)
 {
-       PgBenchExprLink *link = list != NULL? list->head: NULL;
-       int len = 0;
+       PgBenchExprLink *link = list != NULL ? list->head : NULL;
+       int                     len = 0;
 
        for (; link != NULL; link = link->next)
                len++;
@@ -225,7 +246,7 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
        expr->u.function.function = PGBENCH_FUNCTIONS[fnumber].tag;
 
        /* only the link is used, the head/tail is not useful anymore */
-       expr->u.function.args = args != NULL? args->head: NULL;
+       expr->u.function.args = args != NULL ? args->head : NULL;
        if (args)
                pg_free(args);
 
index 00cb74d7dadd70cc0caece072294e977abacd981..d069c5b05b0e39ce687a92a6fb0420375b4e5033 100644 (file)
@@ -2,7 +2,18 @@
 /*-------------------------------------------------------------------------
  *
  * exprscan.l
- *       a lexical scanner for a simple expression syntax
+ *       lexical scanner for pgbench backslash commands
+ *
+ * This lexer supports two operating modes:
+ *
+ * In INITIAL state, just parse off whitespace-separated words (this mode
+ * is basically equivalent to strtok(), which is what we used to use).
+ *
+ * In EXPR state, lex for the simple expression syntax of exprparse.y.
+ *
+ * In either mode, stop upon hitting newline or end of string.
+ *
+ * Note that this lexer operates within the framework created by psqlscan.l,
  *
  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *-------------------------------------------------------------------------
  */
 
-/* line and column number for error reporting */
-static int     yyline = 0, yycol = 0;
+#include "psqlscan_int.h"
 
-/* Handles to the buffer that the lexer uses internally */
-static YY_BUFFER_STATE scanbufhandle;
-static char *scanbuf;
-
-/* context information for error reporting */
+/* context information for reporting errors in expressions */
 static const char *expr_source = NULL;
-static int expr_lineno = 0;
-static const char *expr_full_line = NULL;
+static int     expr_lineno = 0;
+static int     expr_start_offset = 0;
 static const char *expr_command = NULL;
-static int expr_col = 0;
+
+/* indicates whether last yylex() call read a newline */
+static bool last_was_newline = false;
 
 /*
  * Work around a bug in flex 2.5.35: it emits a couple of functions that
@@ -48,122 +56,296 @@ extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
 %option warn
 %option prefix="expr_yy"
 
+/* Character classes */
 alpha                  [a-zA-Z_]
 digit                  [0-9]
 alnum                  [a-zA-Z0-9_]
-space                  [ \t\r\f]
+/* {space} + {nonspace} + {newline} should cover all characters */
+space                  [ \t\r\f\v]
+nonspace               [^ \t\r\f\v\n]
+newline                        [\n]
+
+/* Exclusive states */
+%x EXPR
 
 %%
 
 %{
+               /* Declare some local variables inside yylex(), for convenience */
+               PsqlScanState cur_state = yyextra;
+
                /*
-                * Force flex into the appropriate start state ... which, for this
-                * case, is always INITIAL.  This ensures that we can transition
-                * between different lexers sharing the same yyscan_t.
+                * Force flex into the state indicated by start_state.  This has a
+                * couple of purposes: it lets some of the functions below set a new
+                * starting state without ugly direct access to flex variables, and it
+                * allows us to transition from one flex lexer to another so that we
+                * can lex different parts of the source string using separate lexers.
                 */
-               BEGIN(INITIAL);
+               BEGIN(cur_state->start_state);
+
+               /* Reset was-newline flag */
+               last_was_newline = false;
 %}
 
-"+"                            { yycol += yyleng; return '+'; }
-"-"                            { yycol += yyleng; return '-'; }
-"*"                            { yycol += yyleng; return '*'; }
-"/"                            { yycol += yyleng; return '/'; }
-"%"                            { yycol += yyleng; return '%'; }
-"("                            { yycol += yyleng; return '('; }
-")"                            { yycol += yyleng; return ')'; }
-","                            { yycol += yyleng; return ','; }
+       /* INITIAL state */
+
+{nonspace}+            {
+                                       /* Found a word, emit and return it */
+                                       psqlscan_emit(cur_state, yytext, yyleng);
+                                       return 1;
+                               }
+
+{space}+               { /* ignore */ }
+
+{newline}              {
+                                       /* report end of command */
+                                       last_was_newline = true;
+                                       return 0;
+                               }
+
+       /* EXPR state */
+
+<EXPR>{
+
+"+"                            { return '+'; }
+"-"                            { return '-'; }
+"*"                            { return '*'; }
+"/"                            { return '/'; }
+"%"                            { return '%'; }
+"("                            { return '('; }
+")"                            { return ')'; }
+","                            { return ','; }
 
 :{alnum}+              {
-                                       yycol += yyleng;
                                        yylval.str = pg_strdup(yytext + 1);
                                        return VARIABLE;
                                }
 {digit}+               {
-                                       yycol += yyleng;
                                        yylval.ival = strtoint64(yytext);
                                        return INTEGER;
                                }
 {alpha}{alnum}*        {
-                                       yycol += yyleng;
                                        yylval.str = pg_strdup(yytext);
                                        return FUNCTION;
                                }
 
-[\n]                   { yycol = 0; yyline++; }
+{newline}              {
+                                       /* report end of command */
+                                       last_was_newline = true;
+                                       return 0;
+                               }
 
-{space}+               { yycol += yyleng; /* otherwise ignore */ }
+{space}+               { /* ignore */ }
 
 .                              {
-                                       yycol += yyleng;
-                                       syntax_error(expr_source, expr_lineno, expr_full_line, expr_command,
-                                                                "unexpected character", yytext, expr_col + yycol);
-                                       /* NOTREACHED, exit is called from syntax_error */
+                                       /*
+                                        * must strdup yytext so that expr_yyerror_more doesn't
+                                        * change it while finding end of line
+                                        */
+                                       expr_yyerror_more(yyscanner, "unexpected character",
+                                                                         pg_strdup(yytext));
+                                       /* NOTREACHED, syntax_error calls exit() */
                                        return 0;
                                }
+
+}
+
+<<EOF>>                        {
+                                       if (cur_state->buffer_stack == NULL)
+                                               return 0;                       /* end of input reached */
+
+                                       /*
+                                        * We were expanding a variable, so pop the inclusion
+                                        * stack and keep lexing
+                                        */
+                                       psqlscan_pop_buffer_stack(cur_state);
+                                       psqlscan_select_top_buffer(cur_state);
+                               }
+
 %%
 
 void
 expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
 {
-       syntax_error(expr_source, expr_lineno, expr_full_line, expr_command,
-                                message, more, expr_col + yycol);
+       PsqlScanState state = yyget_extra(yyscanner);
+       int                     error_detection_offset = expr_scanner_offset(state) - 1;
+       char       *full_line;
+       size_t          l;
+
+       /*
+        * While parsing an expression, we may not have collected the whole line
+        * yet from the input source.  Lex till EOL so we can report whole line.
+        * (If we're at EOF, it's okay to call yylex() an extra time.)
+        */
+       if (!last_was_newline)
+       {
+               while (yylex(yyscanner))
+                        /* skip */ ;
+       }
+
+       full_line = expr_scanner_get_substring(state,
+                                                                                  expr_start_offset,
+                                                                                  expr_scanner_offset(state));
+       /* Trim trailing newline if any */
+       l = strlen(full_line);
+       while (l > 0 && full_line[l - 1] == '\n')
+               full_line[--l] = '\0';
+
+       syntax_error(expr_source, expr_lineno, full_line, expr_command,
+                                message, more, error_detection_offset - expr_start_offset);
 }
 
 void
-yyerror(yyscan_t yyscanner, const char *message)
+expr_yyerror(yyscan_t yyscanner, const char *message)
 {
        expr_yyerror_more(yyscanner, message, NULL);
 }
 
 /*
- * Called before any actual parsing is done
+ * Collect a space-separated word from a backslash command and return it
+ * in word_buf, along with its starting string offset in *offset.
+ * Returns true if successful, false if at end of command.
  */
-yyscan_t
-expr_scanner_init(const char *str, const char *source,
-                                 int lineno, const char *line,
-                                 const char *cmd, int ecol)
+bool
+expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
 {
-       yyscan_t yyscanner;
-       Size    slen = strlen(str);
+       int                     lexresult;
 
-       /* Set up yyscan_t */
-       yylex_init(&yyscanner);
+       /* Must be scanning already */
+       Assert(state->scanbufhandle != NULL);
 
-       /* save context information for error messages */
-       expr_source = source;
-       expr_lineno = lineno;
-       expr_full_line = line;
-       expr_command = cmd;
-       expr_col = ecol;
+       /* Set current output target */
+       state->output_buf = word_buf;
+       resetPQExpBuffer(word_buf);
+
+       /* Set input source */
+       if (state->buffer_stack != NULL)
+               yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
+       else
+               yy_switch_to_buffer(state->scanbufhandle, state->scanner);
 
-       /* reset error pointers for this scan */
-       yycol = yyline = 0;
+       /* Set start state */
+       state->start_state = INITIAL;
+
+       /* And lex. */
+       lexresult = yylex(state->scanner);
 
        /*
-        * Make a scan buffer with special termination needed by flex.
+        * Save start offset of word, if any.  We could do this more efficiently,
+        * but for now this seems fine.
         */
-       scanbuf = pg_malloc(slen + 2);
-       memcpy(scanbuf, str, slen);
-       scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
-       scanbufhandle = yy_scan_buffer(scanbuf, slen + 2, yyscanner);
+       if (lexresult)
+               *offset = expr_scanner_offset(state) - word_buf->len;
+       else
+               *offset = -1;
 
-       return yyscanner;
+       /*
+        * In case the caller returns to using the regular SQL lexer, reselect the
+        * appropriate initial state.
+        */
+       psql_scan_reselect_sql_lexer(state);
+
+       return (bool) lexresult;
 }
 
+/*
+ * Prepare to lex an expression via expr_yyparse().
+ *
+ * Returns the yyscan_t that is to be passed to expr_yyparse().
+ * (This is just state->scanner, but callers don't need to know that.)
+ */
+yyscan_t
+expr_scanner_init(PsqlScanState state,
+                                 const char *source, int lineno, int start_offset,
+                                 const char *command)
+{
+       /* Save error context info */
+       expr_source = source;
+       expr_lineno = lineno;
+       expr_start_offset = start_offset;
+       expr_command = command;
+
+       /* Must be scanning already */
+       Assert(state->scanbufhandle != NULL);
+
+       /* Set current output target */
+       state->output_buf = NULL;
+
+       /* Set input source */
+       if (state->buffer_stack != NULL)
+               yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
+       else
+               yy_switch_to_buffer(state->scanbufhandle, state->scanner);
+
+       /* Set start state */
+       state->start_state = EXPR;
+
+       return state->scanner;
+}
 
 /*
- * Called after parsing is done to clean up after expr_scanner_init()
+ * Finish lexing an expression.
  */
 void
 expr_scanner_finish(yyscan_t yyscanner)
 {
-       yy_delete_buffer(scanbufhandle, yyscanner);
-       pg_free(scanbuf);
-       yylex_destroy(yyscanner);
-
-       expr_source = NULL;
-       expr_lineno = 0;
-       expr_full_line = NULL;
-       expr_command = NULL;
-       expr_col = 0;
+       PsqlScanState state = yyget_extra(yyscanner);
+
+       /*
+        * Reselect appropriate initial state for SQL lexer.
+        */
+       psql_scan_reselect_sql_lexer(state);
+}
+
+/*
+ * Get offset from start of string to end of current lexer token.
+ *
+ * We rely on the knowledge that flex modifies the scan buffer by storing
+ * a NUL at the end of the current token (yytext).  Note that this might
+ * not work quite right if we were parsing a sub-buffer, but since pgbench
+ * never invokes that functionality, it doesn't matter.
+ */
+int
+expr_scanner_offset(PsqlScanState state)
+{
+       return strlen(state->scanbuf);
+}
+
+/*
+ * Get a malloc'd copy of the lexer input string from start_offset
+ * to just before end_offset.
+ */
+char *
+expr_scanner_get_substring(PsqlScanState state,
+                                                  int start_offset, int end_offset)
+{
+       char       *result;
+       int                     slen = end_offset - start_offset;
+
+       Assert(slen >= 0);
+       Assert(end_offset <= strlen(state->scanbuf));
+       result = (char *) pg_malloc(slen + 1);
+       memcpy(result, state->scanbuf + start_offset, slen);
+       result[slen] = '\0';
+
+       return result;
+}
+
+/*
+ * Get the line number associated with the given string offset
+ * (which must not be past the end of where we've lexed to).
+ */
+int
+expr_scanner_get_lineno(PsqlScanState state, int offset)
+{
+       int                     lineno = 1;
+       const char *p = state->scanbuf;
+
+       while (*p && offset > 0)
+       {
+               if (*p == '\n')
+                       lineno++;
+               p++, offset--;
+       }
+       return lineno;
 }
index dab1ed4114e96da1c4a3a6d0a92d62be846251d8..4196b0e94b01e390b95c056b56e9b40ed94f7caa 100644 (file)
@@ -291,22 +291,21 @@ static const char *QUERYMODE[] = {"simple", "extended", "prepared"};
 
 typedef struct
 {
-       char       *line;                       /* full text of command line */
+       char       *line;                       /* text of command line */
        int                     command_num;    /* unique index of this Command struct */
        int                     type;                   /* command type (SQL_COMMAND or META_COMMAND) */
        int                     argc;                   /* number of command words */
        char       *argv[MAX_ARGS]; /* command word list */
-       int                     cols[MAX_ARGS]; /* corresponding column starting from 1 */
-       PgBenchExpr *expr;                      /* parsed expression */
+       PgBenchExpr *expr;                      /* parsed expression, if needed */
        SimpleStats stats;                      /* time spent in this command */
 } Command;
 
 typedef struct ParsedScript
 {
-       const char *desc;
-       int                     weight;
-       Command   **commands;
-       StatsData       stats;
+       const char *desc;                       /* script descriptor (eg, file name) */
+       int                     weight;                 /* selection weight */
+       Command   **commands;           /* NULL-terminated array of Commands */
+       StatsData       stats;                  /* total time spent in script */
 } ParsedScript;
 
 static ParsedScript sql_script[MAX_SCRIPTS];   /* SQL script files */
@@ -319,12 +318,12 @@ static int        debug = 0;                      /* debug flag */
 /* Builtin test scripts */
 typedef struct BuiltinScript
 {
-       char       *name;                       /* very short name for -b ... */
-       char       *desc;                       /* short description */
-       char       *script;                     /* actual pgbench script */
+       const char *name;                       /* very short name for -b ... */
+       const char *desc;                       /* short description */
+       const char *script;                     /* actual pgbench script */
 } BuiltinScript;
 
-static BuiltinScript builtin_script[] =
+static const BuiltinScript builtin_script[] =
 {
        {
                "tpcb-like",
@@ -371,16 +370,23 @@ static BuiltinScript builtin_script[] =
 
 
 /* Function prototypes */
-static void setalarm(int seconds);
-static void *threadRun(void *arg);
-
-static void processXactStats(TState *thread, CState *st, instr_time *now,
-                                bool skipped, StatsData *agg);
+static bool evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval);
 static void doLog(TState *thread, CState *st, instr_time *now,
          StatsData *agg, bool skipped, double latency, double lag);
+static void processXactStats(TState *thread, CState *st, instr_time *now,
+                                bool skipped, StatsData *agg);
+static void pgbench_error(const char *fmt,...) pg_attribute_printf(1, 2);
+static void addScript(ParsedScript script);
+static void *threadRun(void *arg);
+static void setalarm(int seconds);
 
 
-static bool evaluateExpr(CState *, PgBenchExpr *, int64 *);
+/* callback functions for our flex lexer */
+static const PsqlScanCallbacks pgbench_callbacks = {
+       NULL,                                           /* don't need get_variable functionality */
+       pgbench_error
+};
+
 
 static void
 usage(void)
@@ -2366,26 +2372,53 @@ parseQuery(Command *cmd, const char *raw_sql)
        return true;
 }
 
+/*
+ * Simple error-printing function, might be needed by lexer
+ */
+static void
+pgbench_error(const char *fmt,...)
+{
+       va_list         ap;
+
+       fflush(stdout);
+       va_start(ap, fmt);
+       vfprintf(stderr, _(fmt), ap);
+       va_end(ap);
+}
+
+/*
+ * syntax error while parsing a script (in practice, while parsing a
+ * backslash command, because we don't detect syntax errors in SQL)
+ *
+ * source: source of script (filename or builtin-script ID)
+ * lineno: line number within script (count from 1)
+ * line: whole line of backslash command, if available
+ * command: backslash command name, if available
+ * msg: the actual error message
+ * more: optional extra message
+ * column: zero-based column number, or -1 if unknown
+ */
 void
-pg_attribute_noreturn()
-syntax_error(const char *source, const int lineno,
+syntax_error(const char *source, int lineno,
                         const char *line, const char *command,
-                        const char *msg, const char *more, const int column)
+                        const char *msg, const char *more, int column)
 {
        fprintf(stderr, "%s:%d: %s", source, lineno, msg);
        if (more != NULL)
                fprintf(stderr, " (%s)", more);
-       if (column != -1)
-               fprintf(stderr, " at column %d", column);
-       fprintf(stderr, " in command \"%s\"\n", command);
+       if (column >= 0 && line == NULL)
+               fprintf(stderr, " at column %d", column + 1);
+       if (command != NULL)
+               fprintf(stderr, " in command \"%s\"", command);
+       fprintf(stderr, "\n");
        if (line != NULL)
        {
                fprintf(stderr, "%s\n", line);
-               if (column != -1)
+               if (column >= 0)
                {
                        int                     i;
 
-                       for (i = 0; i < column - 1; i++)
+                       for (i = 0; i < column; i++)
                                fprintf(stderr, " ");
                        fprintf(stderr, "^ error found here\n");
                }
@@ -2393,293 +2426,425 @@ syntax_error(const char *source, const int lineno,
        exit(1);
 }
 
-/* Parse a command; return a Command struct, or NULL if it's a comment */
+/*
+ * Parse a SQL command; return a Command struct, or NULL if it's a comment
+ *
+ * On entry, psqlscan.l has collected the command into "buf", so we don't
+ * really need to do much here except check for comment and set up a
+ * Command struct.
+ */
 static Command *
-process_commands(char *buf, const char *source, const int lineno)
+process_sql_command(PQExpBuffer buf, const char *source)
 {
-       const char      delim[] = " \f\n\r\t\v";
-       Command    *my_commands;
+       Command    *my_command;
+       char       *p;
+       char       *nlpos;
+
+       /* Skip any leading whitespace, as well as "--" style comments */
+       p = buf->data;
+       for (;;)
+       {
+               if (isspace((unsigned char) *p))
+                       p++;
+               else if (strncmp(p, "--", 2) == 0)
+               {
+                       p = strchr(p, '\n');
+                       if (p == NULL)
+                               return NULL;
+                       p++;
+               }
+               else
+                       break;
+       }
+
+       /* If there's nothing but whitespace and comments, we're done */
+       if (*p == '\0')
+               return NULL;
+
+       /* Allocate and initialize Command structure */
+       my_command = (Command *) pg_malloc0(sizeof(Command));
+       my_command->command_num = num_commands++;
+       my_command->type = SQL_COMMAND;
+       my_command->argc = 0;
+       initSimpleStats(&my_command->stats);
+
+       /*
+        * If SQL command is multi-line, we only want to save the first line as
+        * the "line" label.
+        */
+       nlpos = strchr(p, '\n');
+       if (nlpos)
+       {
+               my_command->line = pg_malloc(nlpos - p + 1);
+               memcpy(my_command->line, p, nlpos - p);
+               my_command->line[nlpos - p] = '\0';
+       }
+       else
+               my_command->line = pg_strdup(p);
+
+       switch (querymode)
+       {
+               case QUERY_SIMPLE:
+                       my_command->argv[0] = pg_strdup(p);
+                       my_command->argc++;
+                       break;
+               case QUERY_EXTENDED:
+               case QUERY_PREPARED:
+                       if (!parseQuery(my_command, p))
+                               exit(1);
+                       break;
+               default:
+                       exit(1);
+       }
+
+       return my_command;
+}
+
+/*
+ * Parse a backslash command; return a Command struct, or NULL if comment
+ *
+ * At call, we have scanned only the initial backslash.
+ */
+static Command *
+process_backslash_command(PsqlScanState sstate, const char *source)
+{
+       Command    *my_command;
+       PQExpBufferData word_buf;
+       int                     word_offset;
+       int                     offsets[MAX_ARGS];              /* offsets of argument words */
+       int                     start_offset,
+                               end_offset;
+       int                     lineno;
        int                     j;
-       char       *p,
-                          *tok;
 
-       /* Make the string buf end at the next newline */
-       if ((p = strchr(buf, '\n')) != NULL)
-               *p = '\0';
+       initPQExpBuffer(&word_buf);
 
-       /* Skip leading whitespace */
-       p = buf;
-       while (isspace((unsigned char) *p))
-               p++;
+       /* Remember location of the backslash */
+       start_offset = expr_scanner_offset(sstate) - 1;
+       lineno = expr_scanner_get_lineno(sstate, start_offset);
 
-       /* If the line is empty or actually a comment, we're done */
-       if (*p == '\0' || strncmp(p, "--", 2) == 0)
+       /* Collect first word of command */
+       if (!expr_lex_one_word(sstate, &word_buf, &word_offset))
+       {
+               termPQExpBuffer(&word_buf);
                return NULL;
+       }
 
        /* Allocate and initialize Command structure */
-       my_commands = (Command *) pg_malloc(sizeof(Command));
-       my_commands->line = pg_strdup(buf);
-       my_commands->command_num = num_commands++;
-       my_commands->type = 0;          /* until set */
-       my_commands->argc = 0;
-       initSimpleStats(&my_commands->stats);
-
-       if (*p == '\\')
+       my_command = (Command *) pg_malloc0(sizeof(Command));
+       my_command->command_num = num_commands++;
+       my_command->type = META_COMMAND;
+       my_command->argc = 0;
+       initSimpleStats(&my_command->stats);
+
+       /* Save first word (command name) */
+       j = 0;
+       offsets[j] = word_offset;
+       my_command->argv[j++] = pg_strdup(word_buf.data);
+       my_command->argc++;
+
+       if (pg_strcasecmp(my_command->argv[0], "set") == 0)
        {
-               int                     max_args = -1;
+               /* For \set, collect var name, then lex the expression. */
+               yyscan_t        yyscanner;
 
-               my_commands->type = META_COMMAND;
+               if (!expr_lex_one_word(sstate, &word_buf, &word_offset))
+                       syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                "missing argument", NULL, -1);
 
-               j = 0;
-               tok = strtok(++p, delim);
+               offsets[j] = word_offset;
+               my_command->argv[j++] = pg_strdup(word_buf.data);
+               my_command->argc++;
 
-               if (tok != NULL && pg_strcasecmp(tok, "set") == 0)
-                       max_args = 2;
+               yyscanner = expr_scanner_init(sstate, source, lineno, start_offset,
+                                                                         my_command->argv[0]);
 
-               while (tok != NULL)
+               if (expr_yyparse(yyscanner) != 0)
                {
-                       my_commands->cols[j] = tok - buf + 1;
-                       my_commands->argv[j++] = pg_strdup(tok);
-                       my_commands->argc++;
-                       if (max_args >= 0 && my_commands->argc >= max_args)
-                               tok = strtok(NULL, "");
-                       else
-                               tok = strtok(NULL, delim);
+                       /* dead code: exit done from syntax_error called by yyerror */
+                       exit(1);
                }
 
-               if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
-               {
-                       /*--------
-                        * parsing:
-                        *       \setrandom variable min max [uniform]
-                        *       \setrandom variable min max (gaussian|exponential) parameter
-                        */
+               my_command->expr = expr_parse_result;
 
-                       if (my_commands->argc < 4)
-                       {
-                               syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-                                                        "missing arguments", NULL, -1);
-                       }
+               /* Get location of the ending newline */
+               end_offset = expr_scanner_offset(sstate) - 1;
 
-                       /* argc >= 4 */
+               /* Save line */
+               my_command->line = expr_scanner_get_substring(sstate,
+                                                                                                         start_offset,
+                                                                                                         end_offset);
 
-                       if (my_commands->argc == 4 ||           /* uniform without/with
-                                                                                                * "uniform" keyword */
-                               (my_commands->argc == 5 &&
-                                pg_strcasecmp(my_commands->argv[4], "uniform") == 0))
-                       {
-                               /* nothing to do */
-                       }
-                       else if (                       /* argc >= 5 */
-                                        (pg_strcasecmp(my_commands->argv[4], "gaussian") == 0) ||
-                                  (pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
-                       {
-                               if (my_commands->argc < 6)
-                               {
-                                       syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-                                                         "missing parameter", my_commands->argv[4], -1);
-                               }
-                               else if (my_commands->argc > 6)
-                               {
-                                       syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-                                                                "too many arguments", my_commands->argv[4],
-                                                                my_commands->cols[6]);
-                               }
-                       }
-                       else    /* cannot parse, unexpected arguments */
-                       {
-                               syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-                                                        "unexpected argument", my_commands->argv[4],
-                                                        my_commands->cols[4]);
-                       }
-               }
-               else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
-               {
-                       yyscan_t        yyscanner;
+               expr_scanner_finish(yyscanner);
 
-                       if (my_commands->argc < 3)
-                       {
-                               syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-                                                        "missing argument", NULL, -1);
-                       }
+               termPQExpBuffer(&word_buf);
 
-                       yyscanner = expr_scanner_init(my_commands->argv[2],
-                                                                                 source,
-                                                                                 lineno,
-                                                                                 my_commands->line,
-                                                                                 my_commands->argv[0],
-                                                                                 my_commands->cols[2] - 1);
+               return my_command;
+       }
 
-                       if (expr_yyparse(yyscanner) != 0)
-                       {
-                               /* dead code: exit done from syntax_error called by yyerror */
-                               exit(1);
-                       }
+       /* For all other commands, collect remaining words. */
+       while (expr_lex_one_word(sstate, &word_buf, &word_offset))
+       {
+               if (j >= MAX_ARGS)
+                       syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                "too many arguments", NULL, -1);
 
-                       my_commands->expr = expr_parse_result;
+               offsets[j] = word_offset;
+               my_command->argv[j++] = pg_strdup(word_buf.data);
+               my_command->argc++;
+       }
 
-                       expr_scanner_finish(yyscanner);
-               }
-               else if (pg_strcasecmp(my_commands->argv[0], "sleep") == 0)
-               {
-                       if (my_commands->argc < 2)
-                       {
-                               syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-                                                        "missing argument", NULL, -1);
-                       }
+       /* Get location of the ending newline */
+       end_offset = expr_scanner_offset(sstate) - 1;
 
-                       /*
-                        * Split argument into number and unit to allow "sleep 1ms" etc.
-                        * We don't have to terminate the number argument with null
-                        * because it will be parsed with atoi, which ignores trailing
-                        * non-digit characters.
-                        */
-                       if (my_commands->argv[1][0] != ':')
-                       {
-                               char       *c = my_commands->argv[1];
+       /* Save line */
+       my_command->line = expr_scanner_get_substring(sstate,
+                                                                                                 start_offset,
+                                                                                                 end_offset);
 
-                               while (isdigit((unsigned char) *c))
-                                       c++;
-                               if (*c)
-                               {
-                                       my_commands->argv[2] = c;
-                                       if (my_commands->argc < 3)
-                                               my_commands->argc = 3;
-                               }
-                       }
+       if (pg_strcasecmp(my_command->argv[0], "setrandom") == 0)
+       {
+               /*--------
+                * parsing:
+                *       \setrandom variable min max [uniform]
+                *       \setrandom variable min max (gaussian|exponential) parameter
+                */
 
-                       if (my_commands->argc >= 3)
-                       {
-                               if (pg_strcasecmp(my_commands->argv[2], "us") != 0 &&
-                                       pg_strcasecmp(my_commands->argv[2], "ms") != 0 &&
-                                       pg_strcasecmp(my_commands->argv[2], "s") != 0)
-                               {
-                                       syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-                                                                "unknown time unit, must be us, ms or s",
-                                                                my_commands->argv[2], my_commands->cols[2]);
-                               }
-                       }
+               if (my_command->argc < 4)
+                       syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                "missing arguments", NULL, -1);
 
-                       /* this should be an error?! */
-                       for (j = 3; j < my_commands->argc; j++)
-                               fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
-                                               my_commands->argv[0], my_commands->argv[j]);
+               if (my_command->argc == 4 ||    /* uniform without/with "uniform"
+                                                                                * keyword */
+                       (my_command->argc == 5 &&
+                        pg_strcasecmp(my_command->argv[4], "uniform") == 0))
+               {
+                       /* nothing to do */
                }
-               else if (pg_strcasecmp(my_commands->argv[0], "setshell") == 0)
+               else if (                               /* argc >= 5 */
+                                (pg_strcasecmp(my_command->argv[4], "gaussian") == 0) ||
+                                (pg_strcasecmp(my_command->argv[4], "exponential") == 0))
                {
-                       if (my_commands->argc < 3)
-                       {
-                               syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-                                                        "missing argument", NULL, -1);
-                       }
+                       if (my_command->argc < 6)
+                               syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                        "missing parameter", NULL, -1);
+                       else if (my_command->argc > 6)
+                               syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                        "too many arguments", NULL,
+                                                        offsets[6] - start_offset);
                }
-               else if (pg_strcasecmp(my_commands->argv[0], "shell") == 0)
+               else    /* unrecognized distribution argument */
+                       syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                "unexpected argument", my_command->argv[4],
+                                                offsets[4] - start_offset);
+       }
+       else if (pg_strcasecmp(my_command->argv[0], "sleep") == 0)
+       {
+               if (my_command->argc < 2)
+                       syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                "missing argument", NULL, -1);
+
+               if (my_command->argc > 3)
+                       syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                "too many arguments", NULL,
+                                                offsets[3] - start_offset);
+
+               /*
+                * Split argument into number and unit to allow "sleep 1ms" etc. We
+                * don't have to terminate the number argument with null because it
+                * will be parsed with atoi, which ignores trailing non-digit
+                * characters.
+                */
+               if (my_command->argc == 2 && my_command->argv[1][0] != ':')
                {
-                       if (my_commands->argc < 1)
+                       char       *c = my_command->argv[1];
+
+                       while (isdigit((unsigned char) *c))
+                               c++;
+                       if (*c)
                        {
-                               syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-                                                        "missing command", NULL, -1);
+                               my_command->argv[2] = c;
+                               offsets[2] = offsets[1] + (c - my_command->argv[1]);
+                               my_command->argc = 3;
                        }
                }
-               else
+
+               if (my_command->argc == 3)
                {
-                       syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-                                                "invalid command", NULL, -1);
+                       if (pg_strcasecmp(my_command->argv[2], "us") != 0 &&
+                               pg_strcasecmp(my_command->argv[2], "ms") != 0 &&
+                               pg_strcasecmp(my_command->argv[2], "s") != 0)
+                               syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                        "unrecognized time unit, must be us, ms or s",
+                                                        my_command->argv[2], offsets[2] - start_offset);
                }
        }
+       else if (pg_strcasecmp(my_command->argv[0], "setshell") == 0)
+       {
+               if (my_command->argc < 3)
+                       syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                "missing argument", NULL, -1);
+       }
+       else if (pg_strcasecmp(my_command->argv[0], "shell") == 0)
+       {
+               if (my_command->argc < 2)
+                       syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                                "missing command", NULL, -1);
+       }
        else
        {
-               my_commands->type = SQL_COMMAND;
+               syntax_error(source, lineno, my_command->line, my_command->argv[0],
+                                        "invalid command", NULL, -1);
+       }
+
+       termPQExpBuffer(&word_buf);
+
+       return my_command;
+}
+
+/*
+ * Parse a script (either the contents of a file, or a built-in script)
+ * and add it to the list of scripts.
+ */
+static void
+ParseScript(const char *script, const char *desc, int weight)
+{
+       ParsedScript ps;
+       PsqlScanState sstate;
+       PQExpBufferData line_buf;
+       int                     alloc_num;
+       int                     index;
+
+#define COMMANDS_ALLOC_NUM 128
+       alloc_num = COMMANDS_ALLOC_NUM;
+
+       /* Initialize all fields of ps */
+       ps.desc = desc;
+       ps.weight = weight;
+       ps.commands = (Command **) pg_malloc(sizeof(Command *) * alloc_num);
+       initStats(&ps.stats, 0.0);
+
+       /* Prepare to parse script */
+       sstate = psql_scan_create(&pgbench_callbacks);
+
+       /*
+        * Ideally, we'd scan scripts using the encoding and stdstrings settings
+        * we get from a DB connection.  However, without major rearrangement of
+        * pgbench's argument parsing, we can't have a DB connection at the time
+        * we parse scripts.  Using SQL_ASCII (encoding 0) should work well enough
+        * with any backend-safe encoding, though conceivably we could be fooled
+        * if a script file uses a client-only encoding.  We also assume that
+        * stdstrings should be true, which is a bit riskier.
+        */
+       psql_scan_setup(sstate, script, strlen(script), 0, true);
+
+       initPQExpBuffer(&line_buf);
+
+       index = 0;
+
+       for (;;)
+       {
+               PsqlScanResult sr;
+               promptStatus_t prompt;
+               Command    *command;
+
+               resetPQExpBuffer(&line_buf);
 
-               switch (querymode)
+               sr = psql_scan(sstate, &line_buf, &prompt);
+
+               /* If we collected a SQL command, process that */
+               command = process_sql_command(&line_buf, desc);
+               if (command)
                {
-                       case QUERY_SIMPLE:
-                               my_commands->argv[0] = pg_strdup(p);
-                               my_commands->argc++;
-                               break;
-                       case QUERY_EXTENDED:
-                       case QUERY_PREPARED:
-                               if (!parseQuery(my_commands, p))
-                                       exit(1);
-                               break;
-                       default:
-                               exit(1);
+                       ps.commands[index] = command;
+                       index++;
+
+                       if (index >= alloc_num)
+                       {
+                               alloc_num += COMMANDS_ALLOC_NUM;
+                               ps.commands = (Command **)
+                                       pg_realloc(ps.commands, sizeof(Command *) * alloc_num);
+                       }
                }
+
+               /* If we reached a backslash, process that */
+               if (sr == PSCAN_BACKSLASH)
+               {
+                       command = process_backslash_command(sstate, desc);
+                       if (command)
+                       {
+                               ps.commands[index] = command;
+                               index++;
+
+                               if (index >= alloc_num)
+                               {
+                                       alloc_num += COMMANDS_ALLOC_NUM;
+                                       ps.commands = (Command **)
+                                               pg_realloc(ps.commands, sizeof(Command *) * alloc_num);
+                               }
+                       }
+               }
+
+               /* Done if we reached EOF */
+               if (sr == PSCAN_INCOMPLETE || sr == PSCAN_EOL)
+                       break;
        }
 
-       return my_commands;
+       ps.commands[index] = NULL;
+
+       addScript(ps);
+
+       termPQExpBuffer(&line_buf);
+       psql_scan_finish(sstate);
+       psql_scan_destroy(sstate);
 }
 
 /*
- * Read a line from fd, and return it in a malloc'd buffer.
- * Return NULL at EOF.
+ * Read the entire contents of file fd, and return it in a malloc'd buffer.
  *
  * The buffer will typically be larger than necessary, but we don't care
- * in this program, because we'll free it as soon as we've parsed the line.
+ * in this program, because we'll free it as soon as we've parsed the script.
  */
 static char *
-read_line_from_file(FILE *fd)
+read_file_contents(FILE *fd)
 {
-       char            tmpbuf[BUFSIZ];
        char       *buf;
        size_t          buflen = BUFSIZ;
        size_t          used = 0;
 
-       buf = (char *) palloc(buflen);
-       buf[0] = '\0';
+       buf = (char *) pg_malloc(buflen);
 
-       while (fgets(tmpbuf, BUFSIZ, fd) != NULL)
+       for (;;)
        {
-               size_t          thislen = strlen(tmpbuf);
+               size_t          nread;
 
-               /* Append tmpbuf to whatever we had already */
-               memcpy(buf + used, tmpbuf, thislen + 1);
-               used += thislen;
-
-               /* Done if we collected a newline */
-               if (thislen > 0 && tmpbuf[thislen - 1] == '\n')
+               nread = fread(buf + used, 1, BUFSIZ, fd);
+               used += nread;
+               /* If fread() read less than requested, must be EOF or error */
+               if (nread < BUFSIZ)
                        break;
-
-               /* Else, enlarge buf to ensure we can append next bufferload */
+               /* Enlarge buf so we can read some more */
                buflen += BUFSIZ;
                buf = (char *) pg_realloc(buf, buflen);
        }
+       /* There is surely room for a terminator */
+       buf[used] = '\0';
 
-       if (used > 0)
-               return buf;
-
-       /* Reached EOF */
-       free(buf);
-       return NULL;
+       return buf;
 }
 
 /*
- * Initialize a ParsedScript
+ * Given a file name, read it and add its script to the list.
+ * "-" means to read stdin.
+ * NB: filename must be storage that won't disappear.
  */
 static void
-initParsedScript(ParsedScript *ps, const char *desc, int alloc_num, int weight)
+process_file(const char *filename, int weight)
 {
-       ps->commands = (Command **) pg_malloc(sizeof(Command *) * alloc_num);
-       ps->desc = desc;
-       ps->weight = weight;
-       initStats(&ps->stats, 0.0);
-}
-
-/*
- * Given a file name, read it and return its ParsedScript representation.  "-"
- * means to read stdin.
- */
-static ParsedScript
-process_file(char *filename, int weight)
-{
-#define COMMANDS_ALLOC_NUM 128
-       ParsedScript ps;
        FILE       *fd;
-       int                     lineno,
-                               index;
        char       *buf;
-       int                     alloc_num;
 
+       /* Slurp the file contents into "buf" */
        if (strcmp(filename, "-") == 0)
                fd = stdin;
        else if ((fd = fopen(filename, "r")) == NULL)
@@ -2689,95 +2854,28 @@ process_file(char *filename, int weight)
                exit(1);
        }
 
-       alloc_num = COMMANDS_ALLOC_NUM;
-       initParsedScript(&ps, filename, alloc_num, weight);
+       buf = read_file_contents(fd);
 
-       lineno = 0;
-       index = 0;
-
-       while ((buf = read_line_from_file(fd)) != NULL)
+       if (ferror(fd))
        {
-               Command    *command;
-
-               lineno += 1;
-
-               command = process_commands(buf, filename, lineno);
-
-               free(buf);
-
-               if (command == NULL)
-                       continue;
-
-               ps.commands[index] = command;
-               index++;
-
-               if (index >= alloc_num)
-               {
-                       alloc_num += COMMANDS_ALLOC_NUM;
-                       ps.commands = pg_realloc(ps.commands, sizeof(Command *) * alloc_num);
-               }
+               fprintf(stderr, "could not read file \"%s\": %s\n",
+                               filename, strerror(errno));
+               exit(1);
        }
-       fclose(fd);
 
-       ps.commands[index] = NULL;
+       if (fd != stdin)
+               fclose(fd);
 
-       return ps;
+       ParseScript(buf, filename, weight);
+
+       free(buf);
 }
 
-/* Parse the given builtin script and return the parsed representation */
-static ParsedScript
-process_builtin(BuiltinScript *bi, int weight)
+/* Parse the given builtin script and add it to the list. */
+static void
+process_builtin(const BuiltinScript *bi, int weight)
 {
-       int                     lineno,
-                               index;
-       char            buf[BUFSIZ];
-       int                     alloc_num;
-       char       *tb = bi->script;
-       ParsedScript ps;
-
-       alloc_num = COMMANDS_ALLOC_NUM;
-       initParsedScript(&ps, bi->desc, alloc_num, weight);
-
-       lineno = 0;
-       index = 0;
-
-       for (;;)
-       {
-               char       *p;
-               Command    *command;
-
-               /* buffer overflow check? */
-               p = buf;
-               while (*tb && *tb != '\n')
-                       *p++ = *tb++;
-
-               if (*tb == '\0')
-                       break;
-
-               if (*tb == '\n')
-                       tb++;
-
-               *p = '\0';
-
-               lineno += 1;
-
-               command = process_commands(buf, bi->desc, lineno);
-               if (command == NULL)
-                       continue;
-
-               ps.commands[index] = command;
-               index++;
-
-               if (index >= alloc_num)
-               {
-                       alloc_num += COMMANDS_ALLOC_NUM;
-                       ps.commands = pg_realloc(ps.commands, sizeof(Command *) * alloc_num);
-               }
-       }
-
-       ps.commands[index] = NULL;
-
-       return ps;
+       ParseScript(bi->script, bi->desc, weight);
 }
 
 /* show available builtin scripts */
@@ -2792,14 +2890,14 @@ listAvailableScripts(void)
        fprintf(stderr, "\n");
 }
 
-/* return builtin script "name" if unambiguous, of fails if not found */
-static BuiltinScript *
+/* return builtin script "name" if unambiguous, fails if not found */
+static const BuiltinScript *
 findBuiltin(const char *name)
 {
        int                     i,
                                found = 0,
                                len = strlen(name);
-       BuiltinScript *result = NULL;
+       const BuiltinScript *result = NULL;
 
        for (i = 0; i < lengthof(builtin_script); i++)
        {
@@ -3264,24 +3362,24 @@ main(int argc, char **argv)
                                }
 
                                weight = parseScriptWeight(optarg, &script);
-                               addScript(process_builtin(findBuiltin(script), weight));
+                               process_builtin(findBuiltin(script), weight);
                                benchmarking_option_set = true;
                                internal_script_used = true;
                                break;
 
                        case 'S':
-                               addScript(process_builtin(findBuiltin("select-only"), 1));
+                               process_builtin(findBuiltin("select-only"), 1);
                                benchmarking_option_set = true;
                                internal_script_used = true;
                                break;
                        case 'N':
-                               addScript(process_builtin(findBuiltin("simple-update"), 1));
+                               process_builtin(findBuiltin("simple-update"), 1);
                                benchmarking_option_set = true;
                                internal_script_used = true;
                                break;
                        case 'f':
                                weight = parseScriptWeight(optarg, &script);
-                               addScript(process_file(script, weight));
+                               process_file(script, weight);
                                benchmarking_option_set = true;
                                break;
                        case 'D':
@@ -3419,7 +3517,7 @@ main(int argc, char **argv)
        /* set default script if none */
        if (num_scripts == 0 && !is_init_mode)
        {
-               addScript(process_builtin(findBuiltin("tpcb-like"), 1));
+               process_builtin(findBuiltin("tpcb-like"), 1);
                benchmarking_option_set = true;
                internal_script_used = true;
        }
index ba2c51acc96d547cbd731e4cead364e4f07d560a..a9db9c5b8317bc6497d0c2f64c51ef7db24fb0a2 100644 (file)
@@ -11,6 +11,8 @@
 #ifndef PGBENCH_H
 #define PGBENCH_H
 
+#include "psqlscan.h"
+
 /*
  * This file is included outside exprscan.l, in places where we can't see
  * flex's definition of typedef yyscan_t.  Fortunately, it's documented as
@@ -84,16 +86,23 @@ extern PgBenchExpr *expr_parse_result;
 
 extern int     expr_yyparse(yyscan_t yyscanner);
 extern int     expr_yylex(yyscan_t yyscanner);
-extern void expr_yyerror(yyscan_t yyscanner, const char *str);
+extern void expr_yyerror(yyscan_t yyscanner, const char *str) pg_attribute_noreturn();
 extern void expr_yyerror_more(yyscan_t yyscanner, const char *str,
-                                 const char *more);
-extern yyscan_t expr_scanner_init(const char *str, const char *source,
-                                 int lineno, const char *line,
-                                 const char *cmd, int ecol);
-extern void syntax_error(const char *source, const int lineno, const char *line,
-                        const char *cmd, const char *msg, const char *more,
-                        const int col);
+                                 const char *more) pg_attribute_noreturn();
+extern bool expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf,
+                                 int *offset);
+extern yyscan_t expr_scanner_init(PsqlScanState state,
+                                 const char *source, int lineno, int start_offset,
+                                 const char *command);
 extern void expr_scanner_finish(yyscan_t yyscanner);
+extern int     expr_scanner_offset(PsqlScanState state);
+extern char *expr_scanner_get_substring(PsqlScanState state,
+                                                  int start_offset, int end_offset);
+extern int     expr_scanner_get_lineno(PsqlScanState state, int offset);
+
+extern void syntax_error(const char *source, int lineno, const char *line,
+                        const char *cmd, const char *msg,
+                        const char *more, int col) pg_attribute_noreturn();
 
 extern int64 strtoint64(const char *str);