]> granicus.if.org Git - postgresql/blob - src/bin/pgbench/pgbench.c
Fix incorrect tps number calculation in "excluding connections establishing".
[postgresql] / src / bin / pgbench / pgbench.c
1 /*
2  * pgbench.c
3  *
4  * A simple benchmark program for PostgreSQL
5  * Originally written by Tatsuo Ishii and enhanced by many contributors.
6  *
7  * src/bin/pgbench/pgbench.c
8  * Copyright (c) 2000-2015, PostgreSQL Global Development Group
9  * ALL RIGHTS RESERVED;
10  *
11  * Permission to use, copy, modify, and distribute this software and its
12  * documentation for any purpose, without fee, and without a written agreement
13  * is hereby granted, provided that the above copyright notice and this
14  * paragraph and the following two paragraphs appear in all copies.
15  *
16  * IN NO EVENT SHALL THE AUTHOR OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR
17  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
18  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
19  * DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE
20  * POSSIBILITY OF SUCH DAMAGE.
21  *
22  * THE AUTHOR AND DISTRIBUTORS SPECIFICALLY DISCLAIMS ANY WARRANTIES,
23  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
24  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
25  * ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO
26  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
27  *
28  */
29
30 #ifdef WIN32
31 #define FD_SETSIZE 1024                 /* set before winsock2.h is included */
32 #endif   /* ! WIN32 */
33
34 #include "postgres_fe.h"
35
36 #include "getopt_long.h"
37 #include "libpq-fe.h"
38 #include "portability/instr_time.h"
39
40 #include <ctype.h>
41 #include <math.h>
42 #include <signal.h>
43 #include <sys/time.h>
44 #ifdef HAVE_SYS_SELECT_H
45 #include <sys/select.h>
46 #endif
47
48 #ifdef HAVE_SYS_RESOURCE_H
49 #include <sys/resource.h>               /* for getrlimit */
50 #endif
51
52 #ifndef M_PI
53 #define M_PI 3.14159265358979323846
54 #endif
55
56 #include "pgbench.h"
57
58 #define ERRCODE_UNDEFINED_TABLE  "42P01"
59
60 /*
61  * Multi-platform pthread implementations
62  */
63
64 #ifdef WIN32
65 /* Use native win32 threads on Windows */
66 typedef struct win32_pthread *pthread_t;
67 typedef int pthread_attr_t;
68
69 static int      pthread_create(pthread_t *thread, pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
70 static int      pthread_join(pthread_t th, void **thread_return);
71 #elif defined(ENABLE_THREAD_SAFETY)
72 /* Use platform-dependent pthread capability */
73 #include <pthread.h>
74 #else
75 /* No threads implementation, use none (-j 1) */
76 #define pthread_t void *
77 #endif
78
79
80 /********************************************************************
81  * some configurable parameters */
82
83 /* max number of clients allowed */
84 #ifdef FD_SETSIZE
85 #define MAXCLIENTS      (FD_SETSIZE - 10)
86 #else
87 #define MAXCLIENTS      1024
88 #endif
89
90 #define LOG_STEP_SECONDS        5       /* seconds between log messages */
91 #define DEFAULT_NXACTS  10              /* default nxacts */
92
93 #define MIN_GAUSSIAN_THRESHOLD          2.0 /* minimum threshold for gauss */
94
95 int                     nxacts = 0;                     /* number of transactions per client */
96 int                     duration = 0;           /* duration in seconds */
97
98 /*
99  * scaling factor. for example, scale = 10 will make 1000000 tuples in
100  * pgbench_accounts table.
101  */
102 int                     scale = 1;
103
104 /*
105  * fillfactor. for example, fillfactor = 90 will use only 90 percent
106  * space during inserts and leave 10 percent free.
107  */
108 int                     fillfactor = 100;
109
110 /*
111  * create foreign key constraints on the tables?
112  */
113 int                     foreign_keys = 0;
114
115 /*
116  * use unlogged tables?
117  */
118 int                     unlogged_tables = 0;
119
120 /*
121  * log sampling rate (1.0 = log everything, 0.0 = option not given)
122  */
123 double          sample_rate = 0.0;
124
125 /*
126  * When threads are throttled to a given rate limit, this is the target delay
127  * to reach that rate in usec.  0 is the default and means no throttling.
128  */
129 int64           throttle_delay = 0;
130
131 /*
132  * Transactions which take longer than this limit (in usec) are counted as
133  * late, and reported as such, although they are completed anyway. When
134  * throttling is enabled, execution time slots that are more than this late
135  * are skipped altogether, and counted separately.
136  */
137 int64           latency_limit = 0;
138
139 /*
140  * tablespace selection
141  */
142 char       *tablespace = NULL;
143 char       *index_tablespace = NULL;
144
145 /*
146  * end of configurable parameters
147  *********************************************************************/
148
149 #define nbranches       1                       /* Makes little sense to change this.  Change
150                                                                  * -s instead */
151 #define ntellers        10
152 #define naccounts       100000
153
154 /*
155  * The scale factor at/beyond which 32bit integers are incapable of storing
156  * 64bit values.
157  *
158  * Although the actual threshold is 21474, we use 20000 because it is easier to
159  * document and remember, and isn't that far away from the real threshold.
160  */
161 #define SCALE_32BIT_THRESHOLD 20000
162
163 bool            use_log;                        /* log transaction latencies to a file */
164 bool            use_quiet;                      /* quiet logging onto stderr */
165 int                     agg_interval;           /* log aggregates instead of individual
166                                                                  * transactions */
167 int                     progress = 0;           /* thread progress report every this seconds */
168 bool            progress_timestamp = false; /* progress report with Unix time */
169 int                     progress_nclients = 0;          /* number of clients for progress
170                                                                                  * report */
171 int                     progress_nthreads = 0;          /* number of threads for progress
172                                                                                  * report */
173 bool            is_connect;                     /* establish connection for each transaction */
174 bool            is_latencies;           /* report per-command latencies */
175 int                     main_pid;                       /* main process id used in log filename */
176
177 char       *pghost = "";
178 char       *pgport = "";
179 char       *login = NULL;
180 char       *dbName;
181 const char *progname;
182
183 volatile bool timer_exceeded = false;   /* flag from signal handler */
184
185 /* variable definitions */
186 typedef struct
187 {
188         char       *name;                       /* variable name */
189         char       *value;                      /* its value */
190 } Variable;
191
192 #define MAX_FILES               128             /* max number of SQL script files allowed */
193 #define SHELL_COMMAND_SIZE      256 /* maximum size allowed for shell command */
194
195 /*
196  * structures used in custom query mode
197  */
198
199 typedef struct
200 {
201         PGconn     *con;                        /* connection handle to DB */
202         int                     id;                             /* client No. */
203         int                     state;                  /* state No. */
204         int                     listen;                 /* 0 indicates that an async query has been
205                                                                  * sent */
206         int                     sleeping;               /* 1 indicates that the client is napping */
207         bool            throttling;             /* whether nap is for throttling */
208         Variable   *variables;          /* array of variable definitions */
209         int                     nvariables;
210         int64           txn_scheduled;  /* scheduled start time of transaction (usec) */
211         instr_time      txn_begin;              /* used for measuring schedule lag times */
212         instr_time      stmt_begin;             /* used for measuring statement latencies */
213         bool            is_throttled;   /* whether transaction throttling is done */
214         int                     use_file;               /* index in sql_files for this client */
215         bool            prepared[MAX_FILES];
216
217         /* per client collected stats */
218         int                     cnt;                    /* xacts count */
219         int                     ecnt;                   /* error count */
220         int64           txn_latencies;  /* cumulated latencies */
221         int64           txn_sqlats;             /* cumulated square latencies */
222 } CState;
223
224 /*
225  * Thread state
226  */
227 typedef struct
228 {
229         int                     tid;                    /* thread id */
230         pthread_t       thread;                 /* thread handle */
231         CState     *state;                      /* array of CState */
232         int                     nstate;                 /* length of state[] */
233         instr_time      start_time;             /* thread start time */
234         instr_time *exec_elapsed;       /* time spent executing cmds (per Command) */
235         int                *exec_count;         /* number of cmd executions (per Command) */
236         unsigned short random_state[3];         /* separate randomness for each thread */
237         int64           throttle_trigger;               /* previous/next throttling (us) */
238
239         /* per thread collected stats */
240         instr_time      conn_time;
241         int64           throttle_lag;   /* total transaction lag behind throttling */
242         int64           throttle_lag_max;               /* max transaction lag */
243         int64           throttle_latency_skipped;               /* lagging transactions
244                                                                                                  * skipped */
245         int64           latency_late;   /* late transactions */
246 } TState;
247
248 #define INVALID_THREAD          ((pthread_t) 0)
249
250 /*
251  * queries read from files
252  */
253 #define SQL_COMMAND             1
254 #define META_COMMAND    2
255 #define MAX_ARGS                10
256
257 typedef enum QueryMode
258 {
259         QUERY_SIMPLE,                           /* simple query */
260         QUERY_EXTENDED,                         /* extended query */
261         QUERY_PREPARED,                         /* extended query with prepared statements */
262         NUM_QUERYMODE
263 } QueryMode;
264
265 static QueryMode querymode = QUERY_SIMPLE;
266 static const char *QUERYMODE[] = {"simple", "extended", "prepared"};
267
268 typedef struct
269 {
270         char       *line;                       /* full text of command line */
271         int                     command_num;    /* unique index of this Command struct */
272         int                     type;                   /* command type (SQL_COMMAND or META_COMMAND) */
273         int                     argc;                   /* number of command words */
274         char       *argv[MAX_ARGS]; /* command word list */
275         int                     cols[MAX_ARGS]; /* corresponding column starting from 1 */
276         PgBenchExpr *expr;                      /* parsed expression */
277 } Command;
278
279 typedef struct
280 {
281
282         long            start_time;             /* when does the interval start */
283         int                     cnt;                    /* number of transactions */
284         int                     skipped;                /* number of transactions skipped under --rate
285                                                                  * and --latency-limit */
286
287         double          min_latency;    /* min/max latencies */
288         double          max_latency;
289         double          sum_latency;    /* sum(latency), sum(latency^2) - for
290                                                                  * estimates */
291         double          sum2_latency;
292
293         double          min_lag;
294         double          max_lag;
295         double          sum_lag;                /* sum(lag) */
296         double          sum2_lag;               /* sum(lag*lag) */
297 } AggVals;
298
299 static Command **sql_files[MAX_FILES];  /* SQL script files */
300 static int      num_files;                      /* number of script files */
301 static int      num_commands = 0;       /* total number of Command structs */
302 static int      debug = 0;                      /* debug flag */
303
304 /* default scenario */
305 static char *tpc_b = {
306         "\\set nbranches " CppAsString2(nbranches) " * :scale\n"
307         "\\set ntellers " CppAsString2(ntellers) " * :scale\n"
308         "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
309         "\\setrandom aid 1 :naccounts\n"
310         "\\setrandom bid 1 :nbranches\n"
311         "\\setrandom tid 1 :ntellers\n"
312         "\\setrandom delta -5000 5000\n"
313         "BEGIN;\n"
314         "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
315         "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
316         "UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n"
317         "UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n"
318         "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
319         "END;\n"
320 };
321
322 /* -N case */
323 static char *simple_update = {
324         "\\set nbranches " CppAsString2(nbranches) " * :scale\n"
325         "\\set ntellers " CppAsString2(ntellers) " * :scale\n"
326         "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
327         "\\setrandom aid 1 :naccounts\n"
328         "\\setrandom bid 1 :nbranches\n"
329         "\\setrandom tid 1 :ntellers\n"
330         "\\setrandom delta -5000 5000\n"
331         "BEGIN;\n"
332         "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
333         "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
334         "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
335         "END;\n"
336 };
337
338 /* -S case */
339 static char *select_only = {
340         "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
341         "\\setrandom aid 1 :naccounts\n"
342         "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
343 };
344
345 /* Function prototypes */
346 static void setalarm(int seconds);
347 static void *threadRun(void *arg);
348
349 static void doLog(TState *thread, CState *st, FILE *logfile, instr_time *now,
350           AggVals *agg, bool skipped);
351
352 static void
353 usage(void)
354 {
355         printf("%s is a benchmarking tool for PostgreSQL.\n\n"
356                    "Usage:\n"
357                    "  %s [OPTION]... [DBNAME]\n"
358                    "\nInitialization options:\n"
359                    "  -i, --initialize         invokes initialization mode\n"
360                    "  -F, --fillfactor=NUM     set fill factor\n"
361                 "  -n, --no-vacuum          do not run VACUUM after initialization\n"
362         "  -q, --quiet              quiet logging (one message each 5 seconds)\n"
363                    "  -s, --scale=NUM          scaling factor\n"
364                    "  --foreign-keys           create foreign key constraints between tables\n"
365                    "  --index-tablespace=TABLESPACE\n"
366         "                           create indexes in the specified tablespace\n"
367          "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
368                    "  --unlogged-tables        create tables as unlogged tables\n"
369                    "\nBenchmarking options:\n"
370                    "  -c, --client=NUM         number of concurrent database clients (default: 1)\n"
371                    "  -C, --connect            establish new connection for each transaction\n"
372                    "  -D, --define=VARNAME=VALUE\n"
373           "                           define variable for use by custom script\n"
374                  "  -f, --file=FILENAME      read transaction script from FILENAME\n"
375                    "  -j, --jobs=NUM           number of threads (default: 1)\n"
376                    "  -l, --log                write transaction times to log file\n"
377         "  -L, --latency-limit=NUM  count transactions lasting more than NUM ms as late\n"
378                    "  -M, --protocol=simple|extended|prepared\n"
379                    "                           protocol for submitting queries (default: simple)\n"
380                    "  -n, --no-vacuum          do not run VACUUM before tests\n"
381                    "  -N, --skip-some-updates  skip updates of pgbench_tellers and pgbench_branches\n"
382                    "  -P, --progress=NUM       show thread progress report every NUM seconds\n"
383                    "  -r, --report-latencies   report average latency per command\n"
384                 "  -R, --rate=NUM           target rate in transactions per second\n"
385                    "  -s, --scale=NUM          report this scale factor in output\n"
386                    "  -S, --select-only        perform SELECT-only transactions\n"
387                    "  -t, --transactions=NUM   number of transactions each client runs (default: 10)\n"
388                  "  -T, --time=NUM           duration of benchmark test in seconds\n"
389                    "  -v, --vacuum-all         vacuum all four standard tables before tests\n"
390                    "  --aggregate-interval=NUM aggregate data over NUM seconds\n"
391                    "  --sampling-rate=NUM      fraction of transactions to log (e.g. 0.01 for 1%%)\n"
392                    "  --progress-timestamp     use Unix epoch timestamps for progress\n"
393                    "\nCommon options:\n"
394                    "  -d, --debug              print debugging output\n"
395           "  -h, --host=HOSTNAME      database server host or socket directory\n"
396                    "  -p, --port=PORT          database server port number\n"
397                    "  -U, --username=USERNAME  connect as specified database user\n"
398                  "  -V, --version            output version information, then exit\n"
399                    "  -?, --help               show this help, then exit\n"
400                    "\n"
401                    "Report bugs to <pgsql-bugs@postgresql.org>.\n",
402                    progname, progname);
403 }
404
405 /*
406  * strtoint64 -- convert a string to 64-bit integer
407  *
408  * This function is a modified version of scanint8() from
409  * src/backend/utils/adt/int8.c.
410  */
411 int64
412 strtoint64(const char *str)
413 {
414         const char *ptr = str;
415         int64           result = 0;
416         int                     sign = 1;
417
418         /*
419          * Do our own scan, rather than relying on sscanf which might be broken
420          * for long long.
421          */
422
423         /* skip leading spaces */
424         while (*ptr && isspace((unsigned char) *ptr))
425                 ptr++;
426
427         /* handle sign */
428         if (*ptr == '-')
429         {
430                 ptr++;
431
432                 /*
433                  * Do an explicit check for INT64_MIN.  Ugly though this is, it's
434                  * cleaner than trying to get the loop below to handle it portably.
435                  */
436                 if (strncmp(ptr, "9223372036854775808", 19) == 0)
437                 {
438                         result = PG_INT64_MIN;
439                         ptr += 19;
440                         goto gotdigits;
441                 }
442                 sign = -1;
443         }
444         else if (*ptr == '+')
445                 ptr++;
446
447         /* require at least one digit */
448         if (!isdigit((unsigned char) *ptr))
449                 fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str);
450
451         /* process digits */
452         while (*ptr && isdigit((unsigned char) *ptr))
453         {
454                 int64           tmp = result * 10 + (*ptr++ - '0');
455
456                 if ((tmp / 10) != result)               /* overflow? */
457                         fprintf(stderr, "value \"%s\" is out of range for type bigint\n", str);
458                 result = tmp;
459         }
460
461 gotdigits:
462
463         /* allow trailing whitespace, but not other trailing chars */
464         while (*ptr != '\0' && isspace((unsigned char) *ptr))
465                 ptr++;
466
467         if (*ptr != '\0')
468                 fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str);
469
470         return ((sign < 0) ? -result : result);
471 }
472
473 /* random number generator: uniform distribution from min to max inclusive */
474 static int64
475 getrand(TState *thread, int64 min, int64 max)
476 {
477         /*
478          * Odd coding is so that min and max have approximately the same chance of
479          * being selected as do numbers between them.
480          *
481          * pg_erand48() is thread-safe and concurrent, which is why we use it
482          * rather than random(), which in glibc is non-reentrant, and therefore
483          * protected by a mutex, and therefore a bottleneck on machines with many
484          * CPUs.
485          */
486         return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state));
487 }
488
489 /*
490  * random number generator: exponential distribution from min to max inclusive.
491  * the threshold is so that the density of probability for the last cut-off max
492  * value is exp(-threshold).
493  */
494 static int64
495 getExponentialRand(TState *thread, int64 min, int64 max, double threshold)
496 {
497         double          cut,
498                                 uniform,
499                                 rand;
500
501         Assert(threshold > 0.0);
502         cut = exp(-threshold);
503         /* erand in [0, 1), uniform in (0, 1] */
504         uniform = 1.0 - pg_erand48(thread->random_state);
505
506         /*
507          * inner expresion in (cut, 1] (if threshold > 0), rand in [0, 1)
508          */
509         Assert((1.0 - cut) != 0.0);
510         rand = -log(cut + (1.0 - cut) * uniform) / threshold;
511         /* return int64 random number within between min and max */
512         return min + (int64) ((max - min + 1) * rand);
513 }
514
515 /* random number generator: gaussian distribution from min to max inclusive */
516 static int64
517 getGaussianRand(TState *thread, int64 min, int64 max, double threshold)
518 {
519         double          stdev;
520         double          rand;
521
522         /*
523          * Get user specified random number from this loop, with -threshold <
524          * stdev <= threshold
525          *
526          * This loop is executed until the number is in the expected range.
527          *
528          * As the minimum threshold is 2.0, the probability of looping is low:
529          * sqrt(-2 ln(r)) <= 2 => r >= e^{-2} ~ 0.135, then when taking the
530          * average sinus multiplier as 2/pi, we have a 8.6% looping probability in
531          * the worst case. For a 5.0 threshold value, the looping probability is
532          * about e^{-5} * 2 / pi ~ 0.43%.
533          */
534         do
535         {
536                 /*
537                  * pg_erand48 generates [0,1), but for the basic version of the
538                  * Box-Muller transform the two uniformly distributed random numbers
539                  * are expected in (0, 1] (see
540                  * http://en.wikipedia.org/wiki/Box_muller)
541                  */
542                 double          rand1 = 1.0 - pg_erand48(thread->random_state);
543                 double          rand2 = 1.0 - pg_erand48(thread->random_state);
544
545                 /* Box-Muller basic form transform */
546                 double          var_sqrt = sqrt(-2.0 * log(rand1));
547
548                 stdev = var_sqrt * sin(2.0 * M_PI * rand2);
549
550                 /*
551                  * we may try with cos, but there may be a bias induced if the
552                  * previous value fails the test. To be on the safe side, let us try
553                  * over.
554                  */
555         }
556         while (stdev < -threshold || stdev >= threshold);
557
558         /* stdev is in [-threshold, threshold), normalization to [0,1) */
559         rand = (stdev + threshold) / (threshold * 2.0);
560
561         /* return int64 random number within between min and max */
562         return min + (int64) ((max - min + 1) * rand);
563 }
564
565 /*
566  * random number generator: generate a value, such that the series of values
567  * will approximate a Poisson distribution centered on the given value.
568  */
569 static int64
570 getPoissonRand(TState *thread, int64 center)
571 {
572         /*
573          * Use inverse transform sampling to generate a value > 0, such that the
574          * expected (i.e. average) value is the given argument.
575          */
576         double          uniform;
577
578         /* erand in [0, 1), uniform in (0, 1] */
579         uniform = 1.0 - pg_erand48(thread->random_state);
580
581         return (int64) (-log(uniform) * ((double) center) + 0.5);
582 }
583
584 /* call PQexec() and exit() on failure */
585 static void
586 executeStatement(PGconn *con, const char *sql)
587 {
588         PGresult   *res;
589
590         res = PQexec(con, sql);
591         if (PQresultStatus(res) != PGRES_COMMAND_OK)
592         {
593                 fprintf(stderr, "%s", PQerrorMessage(con));
594                 exit(1);
595         }
596         PQclear(res);
597 }
598
599 /* call PQexec() and complain, but without exiting, on failure */
600 static void
601 tryExecuteStatement(PGconn *con, const char *sql)
602 {
603         PGresult   *res;
604
605         res = PQexec(con, sql);
606         if (PQresultStatus(res) != PGRES_COMMAND_OK)
607         {
608                 fprintf(stderr, "%s", PQerrorMessage(con));
609                 fprintf(stderr, "(ignoring this error and continuing anyway)\n");
610         }
611         PQclear(res);
612 }
613
614 /* set up a connection to the backend */
615 static PGconn *
616 doConnect(void)
617 {
618         PGconn     *conn;
619         static char *password = NULL;
620         bool            new_pass;
621
622         /*
623          * Start the connection.  Loop until we have a password if requested by
624          * backend.
625          */
626         do
627         {
628 #define PARAMS_ARRAY_SIZE       7
629
630                 const char *keywords[PARAMS_ARRAY_SIZE];
631                 const char *values[PARAMS_ARRAY_SIZE];
632
633                 keywords[0] = "host";
634                 values[0] = pghost;
635                 keywords[1] = "port";
636                 values[1] = pgport;
637                 keywords[2] = "user";
638                 values[2] = login;
639                 keywords[3] = "password";
640                 values[3] = password;
641                 keywords[4] = "dbname";
642                 values[4] = dbName;
643                 keywords[5] = "fallback_application_name";
644                 values[5] = progname;
645                 keywords[6] = NULL;
646                 values[6] = NULL;
647
648                 new_pass = false;
649
650                 conn = PQconnectdbParams(keywords, values, true);
651
652                 if (!conn)
653                 {
654                         fprintf(stderr, "connection to database \"%s\" failed\n",
655                                         dbName);
656                         return NULL;
657                 }
658
659                 if (PQstatus(conn) == CONNECTION_BAD &&
660                         PQconnectionNeedsPassword(conn) &&
661                         password == NULL)
662                 {
663                         PQfinish(conn);
664                         password = simple_prompt("Password: ", 100, false);
665                         new_pass = true;
666                 }
667         } while (new_pass);
668
669         /* check to see that the backend connection was successfully made */
670         if (PQstatus(conn) == CONNECTION_BAD)
671         {
672                 fprintf(stderr, "connection to database \"%s\" failed:\n%s",
673                                 dbName, PQerrorMessage(conn));
674                 PQfinish(conn);
675                 return NULL;
676         }
677
678         return conn;
679 }
680
681 /* throw away response from backend */
682 static void
683 discard_response(CState *state)
684 {
685         PGresult   *res;
686
687         do
688         {
689                 res = PQgetResult(state->con);
690                 if (res)
691                         PQclear(res);
692         } while (res);
693 }
694
695 static int
696 compareVariables(const void *v1, const void *v2)
697 {
698         return strcmp(((const Variable *) v1)->name,
699                                   ((const Variable *) v2)->name);
700 }
701
702 static char *
703 getVariable(CState *st, char *name)
704 {
705         Variable        key,
706                            *var;
707
708         /* On some versions of Solaris, bsearch of zero items dumps core */
709         if (st->nvariables <= 0)
710                 return NULL;
711
712         key.name = name;
713         var = (Variable *) bsearch((void *) &key,
714                                                            (void *) st->variables,
715                                                            st->nvariables,
716                                                            sizeof(Variable),
717                                                            compareVariables);
718         if (var != NULL)
719                 return var->value;
720         else
721                 return NULL;
722 }
723
724 /* check whether the name consists of alphabets, numerals and underscores. */
725 static bool
726 isLegalVariableName(const char *name)
727 {
728         int                     i;
729
730         for (i = 0; name[i] != '\0'; i++)
731         {
732                 if (!isalnum((unsigned char) name[i]) && name[i] != '_')
733                         return false;
734         }
735
736         return true;
737 }
738
739 static int
740 putVariable(CState *st, const char *context, char *name, char *value)
741 {
742         Variable        key,
743                            *var;
744
745         key.name = name;
746         /* On some versions of Solaris, bsearch of zero items dumps core */
747         if (st->nvariables > 0)
748                 var = (Variable *) bsearch((void *) &key,
749                                                                    (void *) st->variables,
750                                                                    st->nvariables,
751                                                                    sizeof(Variable),
752                                                                    compareVariables);
753         else
754                 var = NULL;
755
756         if (var == NULL)
757         {
758                 Variable   *newvars;
759
760                 /*
761                  * Check for the name only when declaring a new variable to avoid
762                  * overhead.
763                  */
764                 if (!isLegalVariableName(name))
765                 {
766                         fprintf(stderr, "%s: invalid variable name: \"%s\"\n",
767                                         context, name);
768                         return false;
769                 }
770
771                 if (st->variables)
772                         newvars = (Variable *) pg_realloc(st->variables,
773                                                                         (st->nvariables + 1) * sizeof(Variable));
774                 else
775                         newvars = (Variable *) pg_malloc(sizeof(Variable));
776
777                 st->variables = newvars;
778
779                 var = &newvars[st->nvariables];
780
781                 var->name = pg_strdup(name);
782                 var->value = pg_strdup(value);
783
784                 st->nvariables++;
785
786                 qsort((void *) st->variables, st->nvariables, sizeof(Variable),
787                           compareVariables);
788         }
789         else
790         {
791                 char       *val;
792
793                 /* dup then free, in case value is pointing at this variable */
794                 val = pg_strdup(value);
795
796                 free(var->value);
797                 var->value = val;
798         }
799
800         return true;
801 }
802
803 static char *
804 parseVariable(const char *sql, int *eaten)
805 {
806         int                     i = 0;
807         char       *name;
808
809         do
810         {
811                 i++;
812         } while (isalnum((unsigned char) sql[i]) || sql[i] == '_');
813         if (i == 1)
814                 return NULL;
815
816         name = pg_malloc(i);
817         memcpy(name, &sql[1], i - 1);
818         name[i - 1] = '\0';
819
820         *eaten = i;
821         return name;
822 }
823
824 static char *
825 replaceVariable(char **sql, char *param, int len, char *value)
826 {
827         int                     valueln = strlen(value);
828
829         if (valueln > len)
830         {
831                 size_t          offset = param - *sql;
832
833                 *sql = pg_realloc(*sql, strlen(*sql) - len + valueln + 1);
834                 param = *sql + offset;
835         }
836
837         if (valueln != len)
838                 memmove(param + valueln, param + len, strlen(param + len) + 1);
839         memcpy(param, value, valueln);
840
841         return param + valueln;
842 }
843
844 static char *
845 assignVariables(CState *st, char *sql)
846 {
847         char       *p,
848                            *name,
849                            *val;
850
851         p = sql;
852         while ((p = strchr(p, ':')) != NULL)
853         {
854                 int                     eaten;
855
856                 name = parseVariable(p, &eaten);
857                 if (name == NULL)
858                 {
859                         while (*p == ':')
860                         {
861                                 p++;
862                         }
863                         continue;
864                 }
865
866                 val = getVariable(st, name);
867                 free(name);
868                 if (val == NULL)
869                 {
870                         p++;
871                         continue;
872                 }
873
874                 p = replaceVariable(&sql, p, eaten, val);
875         }
876
877         return sql;
878 }
879
880 static void
881 getQueryParams(CState *st, const Command *command, const char **params)
882 {
883         int                     i;
884
885         for (i = 0; i < command->argc - 1; i++)
886                 params[i] = getVariable(st, command->argv[i + 1]);
887 }
888
889 /*
890  * Recursive evaluation of an expression in a pgbench script
891  * using the current state of variables.
892  * Returns whether the evaluation was ok,
893  * the value itself is returned through the retval pointer.
894  */
895 static bool
896 evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
897 {
898         switch (expr->etype)
899         {
900                 case ENODE_INTEGER_CONSTANT:
901                         {
902                                 *retval = expr->u.integer_constant.ival;
903                                 return true;
904                         }
905
906                 case ENODE_VARIABLE:
907                         {
908                                 char       *var;
909
910                                 if ((var = getVariable(st, expr->u.variable.varname)) == NULL)
911                                 {
912                                         fprintf(stderr, "undefined variable \"%s\"\n",
913                                                         expr->u.variable.varname);
914                                         return false;
915                                 }
916                                 *retval = strtoint64(var);
917                                 return true;
918                         }
919
920                 case ENODE_OPERATOR:
921                         {
922                                 int64           lval;
923                                 int64           rval;
924
925                                 if (!evaluateExpr(st, expr->u.operator.lexpr, &lval))
926                                         return false;
927                                 if (!evaluateExpr(st, expr->u.operator.rexpr, &rval))
928                                         return false;
929                                 switch (expr->u.operator.operator)
930                                 {
931                                         case '+':
932                                                 *retval = lval + rval;
933                                                 return true;
934
935                                         case '-':
936                                                 *retval = lval - rval;
937                                                 return true;
938
939                                         case '*':
940                                                 *retval = lval * rval;
941                                                 return true;
942
943                                         case '/':
944                                                 if (rval == 0)
945                                                 {
946                                                         fprintf(stderr, "division by zero\n");
947                                                         return false;
948                                                 }
949                                                 *retval = lval / rval;
950                                                 return true;
951
952                                         case '%':
953                                                 if (rval == 0)
954                                                 {
955                                                         fprintf(stderr, "division by zero\n");
956                                                         return false;
957                                                 }
958                                                 *retval = lval % rval;
959                                                 return true;
960                                 }
961
962                                 fprintf(stderr, "bad operator\n");
963                                 return false;
964                         }
965
966                 default:
967                         break;
968         }
969
970         fprintf(stderr, "bad expression\n");
971         return false;
972 }
973
974 /*
975  * Run a shell command. The result is assigned to the variable if not NULL.
976  * Return true if succeeded, or false on error.
977  */
978 static bool
979 runShellCommand(CState *st, char *variable, char **argv, int argc)
980 {
981         char            command[SHELL_COMMAND_SIZE];
982         int                     i,
983                                 len = 0;
984         FILE       *fp;
985         char            res[64];
986         char       *endptr;
987         int                     retval;
988
989         /*----------
990          * Join arguments with whitespace separators. Arguments starting with
991          * exactly one colon are treated as variables:
992          *      name - append a string "name"
993          *      :var - append a variable named 'var'
994          *      ::name - append a string ":name"
995          *----------
996          */
997         for (i = 0; i < argc; i++)
998         {
999                 char       *arg;
1000                 int                     arglen;
1001
1002                 if (argv[i][0] != ':')
1003                 {
1004                         arg = argv[i];          /* a string literal */
1005                 }
1006                 else if (argv[i][1] == ':')
1007                 {
1008                         arg = argv[i] + 1;      /* a string literal starting with colons */
1009                 }
1010                 else if ((arg = getVariable(st, argv[i] + 1)) == NULL)
1011                 {
1012                         fprintf(stderr, "%s: undefined variable \"%s\"\n",
1013                                         argv[0], argv[i]);
1014                         return false;
1015                 }
1016
1017                 arglen = strlen(arg);
1018                 if (len + arglen + (i > 0 ? 1 : 0) >= SHELL_COMMAND_SIZE - 1)
1019                 {
1020                         fprintf(stderr, "%s: shell command is too long\n", argv[0]);
1021                         return false;
1022                 }
1023
1024                 if (i > 0)
1025                         command[len++] = ' ';
1026                 memcpy(command + len, arg, arglen);
1027                 len += arglen;
1028         }
1029
1030         command[len] = '\0';
1031
1032         /* Fast path for non-assignment case */
1033         if (variable == NULL)
1034         {
1035                 if (system(command))
1036                 {
1037                         if (!timer_exceeded)
1038                                 fprintf(stderr, "%s: could not launch shell command\n", argv[0]);
1039                         return false;
1040                 }
1041                 return true;
1042         }
1043
1044         /* Execute the command with pipe and read the standard output. */
1045         if ((fp = popen(command, "r")) == NULL)
1046         {
1047                 fprintf(stderr, "%s: could not launch shell command\n", argv[0]);
1048                 return false;
1049         }
1050         if (fgets(res, sizeof(res), fp) == NULL)
1051         {
1052                 if (!timer_exceeded)
1053                         fprintf(stderr, "%s: could not read result of shell command\n", argv[0]);
1054                 (void) pclose(fp);
1055                 return false;
1056         }
1057         if (pclose(fp) < 0)
1058         {
1059                 fprintf(stderr, "%s: could not close shell command\n", argv[0]);
1060                 return false;
1061         }
1062
1063         /* Check whether the result is an integer and assign it to the variable */
1064         retval = (int) strtol(res, &endptr, 10);
1065         while (*endptr != '\0' && isspace((unsigned char) *endptr))
1066                 endptr++;
1067         if (*res == '\0' || *endptr != '\0')
1068         {
1069                 fprintf(stderr, "%s: shell command must return an integer (not \"%s\")\n",
1070                                 argv[0], res);
1071                 return false;
1072         }
1073         snprintf(res, sizeof(res), "%d", retval);
1074         if (!putVariable(st, "setshell", variable, res))
1075                 return false;
1076
1077 #ifdef DEBUG
1078         printf("shell parameter name: \"%s\", value: \"%s\"\n", argv[1], res);
1079 #endif
1080         return true;
1081 }
1082
1083 #define MAX_PREPARE_NAME                32
1084 static void
1085 preparedStatementName(char *buffer, int file, int state)
1086 {
1087         sprintf(buffer, "P%d_%d", file, state);
1088 }
1089
1090 static bool
1091 clientDone(CState *st, bool ok)
1092 {
1093         (void) ok;                                      /* unused */
1094
1095         if (st->con != NULL)
1096         {
1097                 PQfinish(st->con);
1098                 st->con = NULL;
1099         }
1100         return false;                           /* always false */
1101 }
1102
1103 static void
1104 agg_vals_init(AggVals *aggs, instr_time start)
1105 {
1106         /* basic counters */
1107         aggs->cnt = 0;                          /* number of transactions (includes skipped) */
1108         aggs->skipped = 0;                      /* xacts skipped under --rate --latency-limit */
1109
1110         aggs->sum_latency = 0;          /* SUM(latency) */
1111         aggs->sum2_latency = 0;         /* SUM(latency*latency) */
1112
1113         /* min and max transaction duration */
1114         aggs->min_latency = 0;
1115         aggs->max_latency = 0;
1116
1117         /* schedule lag counters */
1118         aggs->sum_lag = 0;
1119         aggs->sum2_lag = 0;
1120         aggs->min_lag = 0;
1121         aggs->max_lag = 0;
1122
1123         /* start of the current interval */
1124         aggs->start_time = INSTR_TIME_GET_DOUBLE(start);
1125 }
1126
1127 /* return false iff client should be disconnected */
1128 static bool
1129 doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVals *agg)
1130 {
1131         PGresult   *res;
1132         Command   **commands;
1133         bool            trans_needs_throttle = false;
1134         instr_time      now;
1135
1136         /*
1137          * gettimeofday() isn't free, so we get the current timestamp lazily the
1138          * first time it's needed, and reuse the same value throughout this
1139          * function after that. This also ensures that e.g. the calculated latency
1140          * reported in the log file and in the totals are the same. Zero means
1141          * "not set yet". Reset "now" when we step to the next command with "goto
1142          * top", though.
1143          */
1144 top:
1145         INSTR_TIME_SET_ZERO(now);
1146
1147         commands = sql_files[st->use_file];
1148
1149         /*
1150          * Handle throttling once per transaction by sleeping.  It is simpler to
1151          * do this here rather than at the end, because so much complicated logic
1152          * happens below when statements finish.
1153          */
1154         if (throttle_delay && !st->is_throttled)
1155         {
1156                 /*
1157                  * Generate a delay such that the series of delays will approximate a
1158                  * Poisson distribution centered on the throttle_delay time.
1159                  *
1160                  * If transactions are too slow or a given wait is shorter than a
1161                  * transaction, the next transaction will start right away.
1162                  */
1163                 int64           wait = getPoissonRand(thread, throttle_delay);
1164
1165                 thread->throttle_trigger += wait;
1166                 st->txn_scheduled = thread->throttle_trigger;
1167
1168                 /*
1169                  * If this --latency-limit is used, and this slot is already late so
1170                  * that the transaction will miss the latency limit even if it
1171                  * completed immediately, we skip this time slot and iterate till the
1172                  * next slot that isn't late yet.
1173                  */
1174                 if (latency_limit)
1175                 {
1176                         int64           now_us;
1177
1178                         if (INSTR_TIME_IS_ZERO(now))
1179                                 INSTR_TIME_SET_CURRENT(now);
1180                         now_us = INSTR_TIME_GET_MICROSEC(now);
1181                         while (thread->throttle_trigger < now_us - latency_limit)
1182                         {
1183                                 thread->throttle_latency_skipped++;
1184
1185                                 if (logfile)
1186                                         doLog(thread, st, logfile, &now, agg, true);
1187
1188                                 wait = getPoissonRand(thread, throttle_delay);
1189                                 thread->throttle_trigger += wait;
1190                                 st->txn_scheduled = thread->throttle_trigger;
1191                         }
1192                 }
1193
1194                 st->sleeping = 1;
1195                 st->throttling = true;
1196                 st->is_throttled = true;
1197                 if (debug)
1198                         fprintf(stderr, "client %d throttling " INT64_FORMAT " us\n",
1199                                         st->id, wait);
1200         }
1201
1202         if (st->sleeping)
1203         {                                                       /* are we sleeping? */
1204                 int64           now_us;
1205
1206                 if (INSTR_TIME_IS_ZERO(now))
1207                         INSTR_TIME_SET_CURRENT(now);
1208                 now_us = INSTR_TIME_GET_MICROSEC(now);
1209                 if (st->txn_scheduled <= now_us)
1210                 {
1211                         st->sleeping = 0;       /* Done sleeping, go ahead with next command */
1212                         if (st->throttling)
1213                         {
1214                                 /* Measure lag of throttled transaction relative to target */
1215                                 int64           lag = now_us - st->txn_scheduled;
1216
1217                                 thread->throttle_lag += lag;
1218                                 if (lag > thread->throttle_lag_max)
1219                                         thread->throttle_lag_max = lag;
1220                                 st->throttling = false;
1221                         }
1222                 }
1223                 else
1224                         return true;            /* Still sleeping, nothing to do here */
1225         }
1226
1227         if (st->listen)
1228         {                                                       /* are we receiver? */
1229                 if (commands[st->state]->type == SQL_COMMAND)
1230                 {
1231                         if (debug)
1232                                 fprintf(stderr, "client %d receiving\n", st->id);
1233                         if (!PQconsumeInput(st->con))
1234                         {                                       /* there's something wrong */
1235                                 fprintf(stderr, "client %d aborted in state %d; perhaps the backend died while processing\n", st->id, st->state);
1236                                 return clientDone(st, false);
1237                         }
1238                         if (PQisBusy(st->con))
1239                                 return true;    /* don't have the whole result yet */
1240                 }
1241
1242                 /*
1243                  * command finished: accumulate per-command execution times in
1244                  * thread-local data structure, if per-command latencies are requested
1245                  */
1246                 if (is_latencies)
1247                 {
1248                         int                     cnum = commands[st->state]->command_num;
1249
1250                         if (INSTR_TIME_IS_ZERO(now))
1251                                 INSTR_TIME_SET_CURRENT(now);
1252                         INSTR_TIME_ACCUM_DIFF(thread->exec_elapsed[cnum],
1253                                                                   now, st->stmt_begin);
1254                         thread->exec_count[cnum]++;
1255                 }
1256
1257                 /* transaction finished: calculate latency and log the transaction */
1258                 if (commands[st->state + 1] == NULL)
1259                 {
1260                         /* only calculate latency if an option is used that needs it */
1261                         if (progress || throttle_delay || latency_limit)
1262                         {
1263                                 int64           latency;
1264
1265                                 if (INSTR_TIME_IS_ZERO(now))
1266                                         INSTR_TIME_SET_CURRENT(now);
1267
1268                                 latency = INSTR_TIME_GET_MICROSEC(now) - st->txn_scheduled;
1269
1270                                 st->txn_latencies += latency;
1271
1272                                 /*
1273                                  * XXX In a long benchmark run of high-latency transactions,
1274                                  * this int64 addition eventually overflows.  For example, 100
1275                                  * threads running 10s transactions will overflow it in 2.56
1276                                  * hours.  With a more-typical OLTP workload of .1s
1277                                  * transactions, overflow would take 256 hours.
1278                                  */
1279                                 st->txn_sqlats += latency * latency;
1280
1281                                 /* record over the limit transactions if needed. */
1282                                 if (latency_limit && latency > latency_limit)
1283                                         thread->latency_late++;
1284                         }
1285
1286                         /* record the time it took in the log */
1287                         if (logfile)
1288                                 doLog(thread, st, logfile, &now, agg, false);
1289                 }
1290
1291                 if (commands[st->state]->type == SQL_COMMAND)
1292                 {
1293                         /*
1294                          * Read and discard the query result; note this is not included in
1295                          * the statement latency numbers.
1296                          */
1297                         res = PQgetResult(st->con);
1298                         switch (PQresultStatus(res))
1299                         {
1300                                 case PGRES_COMMAND_OK:
1301                                 case PGRES_TUPLES_OK:
1302                                         break;          /* OK */
1303                                 default:
1304                                         fprintf(stderr, "client %d aborted in state %d: %s",
1305                                                         st->id, st->state, PQerrorMessage(st->con));
1306                                         PQclear(res);
1307                                         return clientDone(st, false);
1308                         }
1309                         PQclear(res);
1310                         discard_response(st);
1311                 }
1312
1313                 if (commands[st->state + 1] == NULL)
1314                 {
1315                         if (is_connect)
1316                         {
1317                                 PQfinish(st->con);
1318                                 st->con = NULL;
1319                         }
1320
1321                         ++st->cnt;
1322                         if ((st->cnt >= nxacts && duration <= 0) || timer_exceeded)
1323                                 return clientDone(st, true);    /* exit success */
1324                 }
1325
1326                 /* increment state counter */
1327                 st->state++;
1328                 if (commands[st->state] == NULL)
1329                 {
1330                         st->state = 0;
1331                         st->use_file = (int) getrand(thread, 0, num_files - 1);
1332                         commands = sql_files[st->use_file];
1333                         st->is_throttled = false;
1334
1335                         /*
1336                          * No transaction is underway anymore, which means there is
1337                          * nothing to listen to right now.  When throttling rate limits
1338                          * are active, a sleep will happen next, as the next transaction
1339                          * starts.  And then in any case the next SQL command will set
1340                          * listen back to 1.
1341                          */
1342                         st->listen = 0;
1343                         trans_needs_throttle = (throttle_delay > 0);
1344                 }
1345         }
1346
1347         if (st->con == NULL)
1348         {
1349                 instr_time      start,
1350                                         end;
1351
1352                 INSTR_TIME_SET_CURRENT(start);
1353                 if ((st->con = doConnect()) == NULL)
1354                 {
1355                         fprintf(stderr, "client %d aborted while establishing connection\n",
1356                                         st->id);
1357                         return clientDone(st, false);
1358                 }
1359                 INSTR_TIME_SET_CURRENT(end);
1360                 INSTR_TIME_ACCUM_DIFF(*conn_time, end, start);
1361         }
1362
1363         /*
1364          * This ensures that a throttling delay is inserted before proceeding with
1365          * sql commands, after the first transaction. The first transaction
1366          * throttling is performed when first entering doCustom.
1367          */
1368         if (trans_needs_throttle)
1369         {
1370                 trans_needs_throttle = false;
1371                 goto top;
1372         }
1373
1374         /* Record transaction start time under logging, progress or throttling */
1375         if ((logfile || progress || throttle_delay || latency_limit) && st->state == 0)
1376         {
1377                 INSTR_TIME_SET_CURRENT(st->txn_begin);
1378
1379                 /*
1380                  * When not throttling, this is also the transaction's scheduled start
1381                  * time.
1382                  */
1383                 if (!throttle_delay)
1384                         st->txn_scheduled = INSTR_TIME_GET_MICROSEC(st->txn_begin);
1385         }
1386
1387         /* Record statement start time if per-command latencies are requested */
1388         if (is_latencies)
1389                 INSTR_TIME_SET_CURRENT(st->stmt_begin);
1390
1391         if (commands[st->state]->type == SQL_COMMAND)
1392         {
1393                 const Command *command = commands[st->state];
1394                 int                     r;
1395
1396                 if (querymode == QUERY_SIMPLE)
1397                 {
1398                         char       *sql;
1399
1400                         sql = pg_strdup(command->argv[0]);
1401                         sql = assignVariables(st, sql);
1402
1403                         if (debug)
1404                                 fprintf(stderr, "client %d sending %s\n", st->id, sql);
1405                         r = PQsendQuery(st->con, sql);
1406                         free(sql);
1407                 }
1408                 else if (querymode == QUERY_EXTENDED)
1409                 {
1410                         const char *sql = command->argv[0];
1411                         const char *params[MAX_ARGS];
1412
1413                         getQueryParams(st, command, params);
1414
1415                         if (debug)
1416                                 fprintf(stderr, "client %d sending %s\n", st->id, sql);
1417                         r = PQsendQueryParams(st->con, sql, command->argc - 1,
1418                                                                   NULL, params, NULL, NULL, 0);
1419                 }
1420                 else if (querymode == QUERY_PREPARED)
1421                 {
1422                         char            name[MAX_PREPARE_NAME];
1423                         const char *params[MAX_ARGS];
1424
1425                         if (!st->prepared[st->use_file])
1426                         {
1427                                 int                     j;
1428
1429                                 for (j = 0; commands[j] != NULL; j++)
1430                                 {
1431                                         PGresult   *res;
1432                                         char            name[MAX_PREPARE_NAME];
1433
1434                                         if (commands[j]->type != SQL_COMMAND)
1435                                                 continue;
1436                                         preparedStatementName(name, st->use_file, j);
1437                                         res = PQprepare(st->con, name,
1438                                                   commands[j]->argv[0], commands[j]->argc - 1, NULL);
1439                                         if (PQresultStatus(res) != PGRES_COMMAND_OK)
1440                                                 fprintf(stderr, "%s", PQerrorMessage(st->con));
1441                                         PQclear(res);
1442                                 }
1443                                 st->prepared[st->use_file] = true;
1444                         }
1445
1446                         getQueryParams(st, command, params);
1447                         preparedStatementName(name, st->use_file, st->state);
1448
1449                         if (debug)
1450                                 fprintf(stderr, "client %d sending %s\n", st->id, name);
1451                         r = PQsendQueryPrepared(st->con, name, command->argc - 1,
1452                                                                         params, NULL, NULL, 0);
1453                 }
1454                 else    /* unknown sql mode */
1455                         r = 0;
1456
1457                 if (r == 0)
1458                 {
1459                         if (debug)
1460                                 fprintf(stderr, "client %d could not send %s\n",
1461                                                 st->id, command->argv[0]);
1462                         st->ecnt++;
1463                 }
1464                 else
1465                         st->listen = 1;         /* flags that should be listened */
1466         }
1467         else if (commands[st->state]->type == META_COMMAND)
1468         {
1469                 int                     argc = commands[st->state]->argc,
1470                                         i;
1471                 char      **argv = commands[st->state]->argv;
1472
1473                 if (debug)
1474                 {
1475                         fprintf(stderr, "client %d executing \\%s", st->id, argv[0]);
1476                         for (i = 1; i < argc; i++)
1477                                 fprintf(stderr, " %s", argv[i]);
1478                         fprintf(stderr, "\n");
1479                 }
1480
1481                 if (pg_strcasecmp(argv[0], "setrandom") == 0)
1482                 {
1483                         char       *var;
1484                         int64           min,
1485                                                 max;
1486                         double          threshold = 0;
1487                         char            res[64];
1488
1489                         if (*argv[2] == ':')
1490                         {
1491                                 if ((var = getVariable(st, argv[2] + 1)) == NULL)
1492                                 {
1493                                         fprintf(stderr, "%s: undefined variable \"%s\"\n",
1494                                                         argv[0], argv[2]);
1495                                         st->ecnt++;
1496                                         return true;
1497                                 }
1498                                 min = strtoint64(var);
1499                         }
1500                         else
1501                                 min = strtoint64(argv[2]);
1502
1503                         if (*argv[3] == ':')
1504                         {
1505                                 if ((var = getVariable(st, argv[3] + 1)) == NULL)
1506                                 {
1507                                         fprintf(stderr, "%s: undefined variable \"%s\"\n",
1508                                                         argv[0], argv[3]);
1509                                         st->ecnt++;
1510                                         return true;
1511                                 }
1512                                 max = strtoint64(var);
1513                         }
1514                         else
1515                                 max = strtoint64(argv[3]);
1516
1517                         if (max < min)
1518                         {
1519                                 fprintf(stderr, "%s: \\setrandom maximum is less than minimum\n",
1520                                                 argv[0]);
1521                                 st->ecnt++;
1522                                 return true;
1523                         }
1524
1525                         /*
1526                          * Generate random number functions need to be able to subtract
1527                          * max from min and add one to the result without overflowing.
1528                          * Since we know max > min, we can detect overflow just by
1529                          * checking for a negative result. But we must check both that the
1530                          * subtraction doesn't overflow, and that adding one to the result
1531                          * doesn't overflow either.
1532                          */
1533                         if (max - min < 0 || (max - min) + 1 < 0)
1534                         {
1535                                 fprintf(stderr, "%s: \\setrandom range is too large\n",
1536                                                 argv[0]);
1537                                 st->ecnt++;
1538                                 return true;
1539                         }
1540
1541                         if (argc == 4 ||        /* uniform without or with "uniform" keyword */
1542                                 (argc == 5 && pg_strcasecmp(argv[4], "uniform") == 0))
1543                         {
1544 #ifdef DEBUG
1545                                 printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
1546 #endif
1547                                 snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
1548                         }
1549                         else if (argc == 6 &&
1550                                          ((pg_strcasecmp(argv[4], "gaussian") == 0) ||
1551                                           (pg_strcasecmp(argv[4], "exponential") == 0)))
1552                         {
1553                                 if (*argv[5] == ':')
1554                                 {
1555                                         if ((var = getVariable(st, argv[5] + 1)) == NULL)
1556                                         {
1557                                                 fprintf(stderr, "%s: invalid threshold number: \"%s\"\n",
1558                                                                 argv[0], argv[5]);
1559                                                 st->ecnt++;
1560                                                 return true;
1561                                         }
1562                                         threshold = strtod(var, NULL);
1563                                 }
1564                                 else
1565                                         threshold = strtod(argv[5], NULL);
1566
1567                                 if (pg_strcasecmp(argv[4], "gaussian") == 0)
1568                                 {
1569                                         if (threshold < MIN_GAUSSIAN_THRESHOLD)
1570                                         {
1571                                                 fprintf(stderr, "gaussian threshold must be at least %f (not \"%s\")\n", MIN_GAUSSIAN_THRESHOLD, argv[5]);
1572                                                 st->ecnt++;
1573                                                 return true;
1574                                         }
1575 #ifdef DEBUG
1576                                         printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianRand(thread, min, max, threshold));
1577 #endif
1578                                         snprintf(res, sizeof(res), INT64_FORMAT, getGaussianRand(thread, min, max, threshold));
1579                                 }
1580                                 else if (pg_strcasecmp(argv[4], "exponential") == 0)
1581                                 {
1582                                         if (threshold <= 0.0)
1583                                         {
1584                                                 fprintf(stderr, "exponential threshold must be greater than zero (not \"%s\")\n", argv[5]);
1585                                                 st->ecnt++;
1586                                                 return true;
1587                                         }
1588 #ifdef DEBUG
1589                                         printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialRand(thread, min, max, threshold));
1590 #endif
1591                                         snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, threshold));
1592                                 }
1593                         }
1594                         else    /* this means an error somewhere in the parsing phase... */
1595                         {
1596                                 fprintf(stderr, "%s: invalid arguments for \\setrandom\n",
1597                                                 argv[0]);
1598                                 st->ecnt++;
1599                                 return true;
1600                         }
1601
1602                         if (!putVariable(st, argv[0], argv[1], res))
1603                         {
1604                                 st->ecnt++;
1605                                 return true;
1606                         }
1607
1608                         st->listen = 1;
1609                 }
1610                 else if (pg_strcasecmp(argv[0], "set") == 0)
1611                 {
1612                         char            res[64];
1613                         PgBenchExpr *expr = commands[st->state]->expr;
1614                         int64           result;
1615
1616                         if (!evaluateExpr(st, expr, &result))
1617                         {
1618                                 st->ecnt++;
1619                                 return true;
1620                         }
1621                         sprintf(res, INT64_FORMAT, result);
1622
1623                         if (!putVariable(st, argv[0], argv[1], res))
1624                         {
1625                                 st->ecnt++;
1626                                 return true;
1627                         }
1628
1629                         st->listen = 1;
1630                 }
1631                 else if (pg_strcasecmp(argv[0], "sleep") == 0)
1632                 {
1633                         char       *var;
1634                         int                     usec;
1635                         instr_time      now;
1636
1637                         if (*argv[1] == ':')
1638                         {
1639                                 if ((var = getVariable(st, argv[1] + 1)) == NULL)
1640                                 {
1641                                         fprintf(stderr, "%s: undefined variable \"%s\"\n",
1642                                                         argv[0], argv[1]);
1643                                         st->ecnt++;
1644                                         return true;
1645                                 }
1646                                 usec = atoi(var);
1647                         }
1648                         else
1649                                 usec = atoi(argv[1]);
1650
1651                         if (argc > 2)
1652                         {
1653                                 if (pg_strcasecmp(argv[2], "ms") == 0)
1654                                         usec *= 1000;
1655                                 else if (pg_strcasecmp(argv[2], "s") == 0)
1656                                         usec *= 1000000;
1657                         }
1658                         else
1659                                 usec *= 1000000;
1660
1661                         INSTR_TIME_SET_CURRENT(now);
1662                         st->txn_scheduled = INSTR_TIME_GET_MICROSEC(now) + usec;
1663                         st->sleeping = 1;
1664
1665                         st->listen = 1;
1666                 }
1667                 else if (pg_strcasecmp(argv[0], "setshell") == 0)
1668                 {
1669                         bool            ret = runShellCommand(st, argv[1], argv + 2, argc - 2);
1670
1671                         if (timer_exceeded) /* timeout */
1672                                 return clientDone(st, true);
1673                         else if (!ret)          /* on error */
1674                         {
1675                                 st->ecnt++;
1676                                 return true;
1677                         }
1678                         else    /* succeeded */
1679                                 st->listen = 1;
1680                 }
1681                 else if (pg_strcasecmp(argv[0], "shell") == 0)
1682                 {
1683                         bool            ret = runShellCommand(st, NULL, argv + 1, argc - 1);
1684
1685                         if (timer_exceeded) /* timeout */
1686                                 return clientDone(st, true);
1687                         else if (!ret)          /* on error */
1688                         {
1689                                 st->ecnt++;
1690                                 return true;
1691                         }
1692                         else    /* succeeded */
1693                                 st->listen = 1;
1694                 }
1695                 goto top;
1696         }
1697
1698         return true;
1699 }
1700
1701 /*
1702  * print log entry after completing one transaction.
1703  */
1704 static void
1705 doLog(TState *thread, CState *st, FILE *logfile, instr_time *now, AggVals *agg,
1706           bool skipped)
1707 {
1708         double          lag;
1709         double          latency;
1710
1711         /*
1712          * Skip the log entry if sampling is enabled and this row doesn't belong
1713          * to the random sample.
1714          */
1715         if (sample_rate != 0.0 &&
1716                 pg_erand48(thread->random_state) > sample_rate)
1717                 return;
1718
1719         if (INSTR_TIME_IS_ZERO(*now))
1720                 INSTR_TIME_SET_CURRENT(*now);
1721
1722         latency = (double) (INSTR_TIME_GET_MICROSEC(*now) - st->txn_scheduled);
1723         if (skipped)
1724                 lag = latency;
1725         else
1726                 lag = (double) (INSTR_TIME_GET_MICROSEC(st->txn_begin) - st->txn_scheduled);
1727
1728         /* should we aggregate the results or not? */
1729         if (agg_interval > 0)
1730         {
1731                 /*
1732                  * Are we still in the same interval? If yes, accumulate the values
1733                  * (print them otherwise)
1734                  */
1735                 if (agg->start_time + agg_interval >= INSTR_TIME_GET_DOUBLE(*now))
1736                 {
1737                         agg->cnt += 1;
1738                         if (skipped)
1739                         {
1740                                 /*
1741                                  * there is no latency to record if the transaction was
1742                                  * skipped
1743                                  */
1744                                 agg->skipped += 1;
1745                         }
1746                         else
1747                         {
1748                                 agg->sum_latency += latency;
1749                                 agg->sum2_latency += latency * latency;
1750
1751                                 /* first in this aggregation interval */
1752                                 if ((agg->cnt == 1) || (latency < agg->min_latency))
1753                                         agg->min_latency = latency;
1754
1755                                 if ((agg->cnt == 1) || (latency > agg->max_latency))
1756                                         agg->max_latency = latency;
1757
1758                                 /* and the same for schedule lag */
1759                                 if (throttle_delay)
1760                                 {
1761                                         agg->sum_lag += lag;
1762                                         agg->sum2_lag += lag * lag;
1763
1764                                         if ((agg->cnt == 1) || (lag < agg->min_lag))
1765                                                 agg->min_lag = lag;
1766                                         if ((agg->cnt == 1) || (lag > agg->max_lag))
1767                                                 agg->max_lag = lag;
1768                                 }
1769                         }
1770                 }
1771                 else
1772                 {
1773                         /*
1774                          * Loop until we reach the interval of the current transaction
1775                          * (and print all the empty intervals in between).
1776                          */
1777                         while (agg->start_time + agg_interval < INSTR_TIME_GET_DOUBLE(*now))
1778                         {
1779                                 /*
1780                                  * This is a non-Windows branch (thanks to the ifdef in
1781                                  * usage), so we don't need to handle this in a special way
1782                                  * (see below).
1783                                  */
1784                                 fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f",
1785                                                 agg->start_time,
1786                                                 agg->cnt,
1787                                                 agg->sum_latency,
1788                                                 agg->sum2_latency,
1789                                                 agg->min_latency,
1790                                                 agg->max_latency);
1791                                 if (throttle_delay)
1792                                 {
1793                                         fprintf(logfile, " %.0f %.0f %.0f %.0f",
1794                                                         agg->sum_lag,
1795                                                         agg->sum2_lag,
1796                                                         agg->min_lag,
1797                                                         agg->max_lag);
1798                                         if (latency_limit)
1799                                                 fprintf(logfile, " %d", agg->skipped);
1800                                 }
1801                                 fputc('\n', logfile);
1802
1803                                 /* move to the next inteval */
1804                                 agg->start_time = agg->start_time + agg_interval;
1805
1806                                 /* reset for "no transaction" intervals */
1807                                 agg->cnt = 0;
1808                                 agg->skipped = 0;
1809                                 agg->min_latency = 0;
1810                                 agg->max_latency = 0;
1811                                 agg->sum_latency = 0;
1812                                 agg->sum2_latency = 0;
1813                                 agg->min_lag = 0;
1814                                 agg->max_lag = 0;
1815                                 agg->sum_lag = 0;
1816                                 agg->sum2_lag = 0;
1817                         }
1818
1819                         /* reset the values to include only the current transaction. */
1820                         agg->cnt = 1;
1821                         agg->skipped = skipped ? 1 : 0;
1822                         agg->min_latency = latency;
1823                         agg->max_latency = latency;
1824                         agg->sum_latency = skipped ? 0.0 : latency;
1825                         agg->sum2_latency = skipped ? 0.0 : latency * latency;
1826                         agg->min_lag = lag;
1827                         agg->max_lag = lag;
1828                         agg->sum_lag = lag;
1829                         agg->sum2_lag = lag * lag;
1830                 }
1831         }
1832         else
1833         {
1834                 /* no, print raw transactions */
1835 #ifndef WIN32
1836
1837                 /* This is more than we really ought to know about instr_time */
1838                 if (skipped)
1839                         fprintf(logfile, "%d %d skipped %d %ld %ld",
1840                                         st->id, st->cnt, st->use_file,
1841                                         (long) now->tv_sec, (long) now->tv_usec);
1842                 else
1843                         fprintf(logfile, "%d %d %.0f %d %ld %ld",
1844                                         st->id, st->cnt, latency, st->use_file,
1845                                         (long) now->tv_sec, (long) now->tv_usec);
1846 #else
1847
1848                 /* On Windows, instr_time doesn't provide a timestamp anyway */
1849                 if (skipped)
1850                         fprintf(logfile, "%d %d skipped %d 0 0",
1851                                         st->id, st->cnt, st->use_file);
1852                 else
1853                         fprintf(logfile, "%d %d %.0f %d 0 0",
1854                                         st->id, st->cnt, latency, st->use_file);
1855 #endif
1856                 if (throttle_delay)
1857                         fprintf(logfile, " %.0f", lag);
1858                 fputc('\n', logfile);
1859         }
1860 }
1861
1862 /* discard connections */
1863 static void
1864 disconnect_all(CState *state, int length)
1865 {
1866         int                     i;
1867
1868         for (i = 0; i < length; i++)
1869         {
1870                 if (state[i].con)
1871                 {
1872                         PQfinish(state[i].con);
1873                         state[i].con = NULL;
1874                 }
1875         }
1876 }
1877
1878 /* create tables and setup data */
1879 static void
1880 init(bool is_no_vacuum)
1881 {
1882 /*
1883  * The scale factor at/beyond which 32-bit integers are insufficient for
1884  * storing TPC-B account IDs.
1885  *
1886  * Although the actual threshold is 21474, we use 20000 because it is easier to
1887  * document and remember, and isn't that far away from the real threshold.
1888  */
1889 #define SCALE_32BIT_THRESHOLD 20000
1890
1891         /*
1892          * Note: TPC-B requires at least 100 bytes per row, and the "filler"
1893          * fields in these table declarations were intended to comply with that.
1894          * The pgbench_accounts table complies with that because the "filler"
1895          * column is set to blank-padded empty string. But for all other tables
1896          * the columns default to NULL and so don't actually take any space.  We
1897          * could fix that by giving them non-null default values.  However, that
1898          * would completely break comparability of pgbench results with prior
1899          * versions. Since pgbench has never pretended to be fully TPC-B compliant
1900          * anyway, we stick with the historical behavior.
1901          */
1902         struct ddlinfo
1903         {
1904                 const char *table;              /* table name */
1905                 const char *smcols;             /* column decls if accountIDs are 32 bits */
1906                 const char *bigcols;    /* column decls if accountIDs are 64 bits */
1907                 int                     declare_fillfactor;
1908         };
1909         static const struct ddlinfo DDLs[] = {
1910                 {
1911                         "pgbench_history",
1912                         "tid int,bid int,aid    int,delta int,mtime timestamp,filler char(22)",
1913                         "tid int,bid int,aid bigint,delta int,mtime timestamp,filler char(22)",
1914                         0
1915                 },
1916                 {
1917                         "pgbench_tellers",
1918                         "tid int not null,bid int,tbalance int,filler char(84)",
1919                         "tid int not null,bid int,tbalance int,filler char(84)",
1920                         1
1921                 },
1922                 {
1923                         "pgbench_accounts",
1924                         "aid    int not null,bid int,abalance int,filler char(84)",
1925                         "aid bigint not null,bid int,abalance int,filler char(84)",
1926                         1
1927                 },
1928                 {
1929                         "pgbench_branches",
1930                         "bid int not null,bbalance int,filler char(88)",
1931                         "bid int not null,bbalance int,filler char(88)",
1932                         1
1933                 }
1934         };
1935         static const char *const DDLINDEXes[] = {
1936                 "alter table pgbench_branches add primary key (bid)",
1937                 "alter table pgbench_tellers add primary key (tid)",
1938                 "alter table pgbench_accounts add primary key (aid)"
1939         };
1940         static const char *const DDLKEYs[] = {
1941                 "alter table pgbench_tellers add foreign key (bid) references pgbench_branches",
1942                 "alter table pgbench_accounts add foreign key (bid) references pgbench_branches",
1943                 "alter table pgbench_history add foreign key (bid) references pgbench_branches",
1944                 "alter table pgbench_history add foreign key (tid) references pgbench_tellers",
1945                 "alter table pgbench_history add foreign key (aid) references pgbench_accounts"
1946         };
1947
1948         PGconn     *con;
1949         PGresult   *res;
1950         char            sql[256];
1951         int                     i;
1952         int64           k;
1953
1954         /* used to track elapsed time and estimate of the remaining time */
1955         instr_time      start,
1956                                 diff;
1957         double          elapsed_sec,
1958                                 remaining_sec;
1959         int                     log_interval = 1;
1960
1961         if ((con = doConnect()) == NULL)
1962                 exit(1);
1963
1964         for (i = 0; i < lengthof(DDLs); i++)
1965         {
1966                 char            opts[256];
1967                 char            buffer[256];
1968                 const struct ddlinfo *ddl = &DDLs[i];
1969                 const char *cols;
1970
1971                 /* Remove old table, if it exists. */
1972                 snprintf(buffer, sizeof(buffer), "drop table if exists %s", ddl->table);
1973                 executeStatement(con, buffer);
1974
1975                 /* Construct new create table statement. */
1976                 opts[0] = '\0';
1977                 if (ddl->declare_fillfactor)
1978                         snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
1979                                          " with (fillfactor=%d)", fillfactor);
1980                 if (tablespace != NULL)
1981                 {
1982                         char       *escape_tablespace;
1983
1984                         escape_tablespace = PQescapeIdentifier(con, tablespace,
1985                                                                                                    strlen(tablespace));
1986                         snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
1987                                          " tablespace %s", escape_tablespace);
1988                         PQfreemem(escape_tablespace);
1989                 }
1990
1991                 cols = (scale >= SCALE_32BIT_THRESHOLD) ? ddl->bigcols : ddl->smcols;
1992
1993                 snprintf(buffer, sizeof(buffer), "create%s table %s(%s)%s",
1994                                  unlogged_tables ? " unlogged" : "",
1995                                  ddl->table, cols, opts);
1996
1997                 executeStatement(con, buffer);
1998         }
1999
2000         executeStatement(con, "begin");
2001
2002         for (i = 0; i < nbranches * scale; i++)
2003         {
2004                 /* "filler" column defaults to NULL */
2005                 snprintf(sql, sizeof(sql),
2006                                  "insert into pgbench_branches(bid,bbalance) values(%d,0)",
2007                                  i + 1);
2008                 executeStatement(con, sql);
2009         }
2010
2011         for (i = 0; i < ntellers * scale; i++)
2012         {
2013                 /* "filler" column defaults to NULL */
2014                 snprintf(sql, sizeof(sql),
2015                         "insert into pgbench_tellers(tid,bid,tbalance) values (%d,%d,0)",
2016                                  i + 1, i / ntellers + 1);
2017                 executeStatement(con, sql);
2018         }
2019
2020         executeStatement(con, "commit");
2021
2022         /*
2023          * fill the pgbench_accounts table with some data
2024          */
2025         fprintf(stderr, "creating tables...\n");
2026
2027         executeStatement(con, "begin");
2028         executeStatement(con, "truncate pgbench_accounts");
2029
2030         res = PQexec(con, "copy pgbench_accounts from stdin");
2031         if (PQresultStatus(res) != PGRES_COPY_IN)
2032         {
2033                 fprintf(stderr, "%s", PQerrorMessage(con));
2034                 exit(1);
2035         }
2036         PQclear(res);
2037
2038         INSTR_TIME_SET_CURRENT(start);
2039
2040         for (k = 0; k < (int64) naccounts * scale; k++)
2041         {
2042                 int64           j = k + 1;
2043
2044                 /* "filler" column defaults to blank padded empty string */
2045                 snprintf(sql, sizeof(sql),
2046                                  INT64_FORMAT "\t" INT64_FORMAT "\t%d\t\n",
2047                                  j, k / naccounts + 1, 0);
2048                 if (PQputline(con, sql))
2049                 {
2050                         fprintf(stderr, "PQputline failed\n");
2051                         exit(1);
2052                 }
2053
2054                 /*
2055                  * If we want to stick with the original logging, print a message each
2056                  * 100k inserted rows.
2057                  */
2058                 if ((!use_quiet) && (j % 100000 == 0))
2059                 {
2060                         INSTR_TIME_SET_CURRENT(diff);
2061                         INSTR_TIME_SUBTRACT(diff, start);
2062
2063                         elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
2064                         remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
2065
2066                         fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)\n",
2067                                         j, (int64) naccounts * scale,
2068                                         (int) (((int64) j * 100) / (naccounts * (int64) scale)),
2069                                         elapsed_sec, remaining_sec);
2070                 }
2071                 /* let's not call the timing for each row, but only each 100 rows */
2072                 else if (use_quiet && (j % 100 == 0))
2073                 {
2074                         INSTR_TIME_SET_CURRENT(diff);
2075                         INSTR_TIME_SUBTRACT(diff, start);
2076
2077                         elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
2078                         remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
2079
2080                         /* have we reached the next interval (or end)? */
2081                         if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS))
2082                         {
2083                                 fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)\n",
2084                                                 j, (int64) naccounts * scale,
2085                                                 (int) (((int64) j * 100) / (naccounts * (int64) scale)), elapsed_sec, remaining_sec);
2086
2087                                 /* skip to the next interval */
2088                                 log_interval = (int) ceil(elapsed_sec / LOG_STEP_SECONDS);
2089                         }
2090                 }
2091
2092         }
2093         if (PQputline(con, "\\.\n"))
2094         {
2095                 fprintf(stderr, "very last PQputline failed\n");
2096                 exit(1);
2097         }
2098         if (PQendcopy(con))
2099         {
2100                 fprintf(stderr, "PQendcopy failed\n");
2101                 exit(1);
2102         }
2103         executeStatement(con, "commit");
2104
2105         /* vacuum */
2106         if (!is_no_vacuum)
2107         {
2108                 fprintf(stderr, "vacuum...\n");
2109                 executeStatement(con, "vacuum analyze pgbench_branches");
2110                 executeStatement(con, "vacuum analyze pgbench_tellers");
2111                 executeStatement(con, "vacuum analyze pgbench_accounts");
2112                 executeStatement(con, "vacuum analyze pgbench_history");
2113         }
2114
2115         /*
2116          * create indexes
2117          */
2118         fprintf(stderr, "set primary keys...\n");
2119         for (i = 0; i < lengthof(DDLINDEXes); i++)
2120         {
2121                 char            buffer[256];
2122
2123                 strlcpy(buffer, DDLINDEXes[i], sizeof(buffer));
2124
2125                 if (index_tablespace != NULL)
2126                 {
2127                         char       *escape_tablespace;
2128
2129                         escape_tablespace = PQescapeIdentifier(con, index_tablespace,
2130                                                                                                    strlen(index_tablespace));
2131                         snprintf(buffer + strlen(buffer), sizeof(buffer) - strlen(buffer),
2132                                          " using index tablespace %s", escape_tablespace);
2133                         PQfreemem(escape_tablespace);
2134                 }
2135
2136                 executeStatement(con, buffer);
2137         }
2138
2139         /*
2140          * create foreign keys
2141          */
2142         if (foreign_keys)
2143         {
2144                 fprintf(stderr, "set foreign keys...\n");
2145                 for (i = 0; i < lengthof(DDLKEYs); i++)
2146                 {
2147                         executeStatement(con, DDLKEYs[i]);
2148                 }
2149         }
2150
2151         fprintf(stderr, "done.\n");
2152         PQfinish(con);
2153 }
2154
2155 /*
2156  * Parse the raw sql and replace :param to $n.
2157  */
2158 static bool
2159 parseQuery(Command *cmd, const char *raw_sql)
2160 {
2161         char       *sql,
2162                            *p;
2163
2164         sql = pg_strdup(raw_sql);
2165         cmd->argc = 1;
2166
2167         p = sql;
2168         while ((p = strchr(p, ':')) != NULL)
2169         {
2170                 char            var[12];
2171                 char       *name;
2172                 int                     eaten;
2173
2174                 name = parseVariable(p, &eaten);
2175                 if (name == NULL)
2176                 {
2177                         while (*p == ':')
2178                         {
2179                                 p++;
2180                         }
2181                         continue;
2182                 }
2183
2184                 if (cmd->argc >= MAX_ARGS)
2185                 {
2186                         fprintf(stderr, "statement has too many arguments (maximum is %d): %s\n", MAX_ARGS - 1, raw_sql);
2187                         pg_free(name);
2188                         return false;
2189                 }
2190
2191                 sprintf(var, "$%d", cmd->argc);
2192                 p = replaceVariable(&sql, p, eaten, var);
2193
2194                 cmd->argv[cmd->argc] = name;
2195                 cmd->argc++;
2196         }
2197
2198         cmd->argv[0] = sql;
2199         return true;
2200 }
2201
2202 void pg_attribute_noreturn()
2203 syntax_error(const char *source, const int lineno,
2204                          const char *line, const char *command,
2205                          const char *msg, const char *more, const int column)
2206 {
2207         fprintf(stderr, "%s:%d: %s", source, lineno, msg);
2208         if (more != NULL)
2209                 fprintf(stderr, " (%s)", more);
2210         if (column != -1)
2211                 fprintf(stderr, " at column %d", column);
2212         fprintf(stderr, " in command \"%s\"\n", command);
2213         if (line != NULL)
2214         {
2215                 fprintf(stderr, "%s\n", line);
2216                 if (column != -1)
2217                 {
2218                         int                     i;
2219
2220                         for (i = 0; i < column - 1; i++)
2221                                 fprintf(stderr, " ");
2222                         fprintf(stderr, "^ error found here\n");
2223                 }
2224         }
2225         exit(1);
2226 }
2227
2228 /* Parse a command; return a Command struct, or NULL if it's a comment */
2229 static Command *
2230 process_commands(char *buf, const char *source, const int lineno)
2231 {
2232         const char      delim[] = " \f\n\r\t\v";
2233
2234         Command    *my_commands;
2235         int                     j;
2236         char       *p,
2237                            *tok;
2238
2239         /* Make the string buf end at the next newline */
2240         if ((p = strchr(buf, '\n')) != NULL)
2241                 *p = '\0';
2242
2243         /* Skip leading whitespace */
2244         p = buf;
2245         while (isspace((unsigned char) *p))
2246                 p++;
2247
2248         /* If the line is empty or actually a comment, we're done */
2249         if (*p == '\0' || strncmp(p, "--", 2) == 0)
2250                 return NULL;
2251
2252         /* Allocate and initialize Command structure */
2253         my_commands = (Command *) pg_malloc(sizeof(Command));
2254         my_commands->line = pg_strdup(buf);
2255         my_commands->command_num = num_commands++;
2256         my_commands->type = 0;          /* until set */
2257         my_commands->argc = 0;
2258
2259         if (*p == '\\')
2260         {
2261                 int                     max_args = -1;
2262
2263                 my_commands->type = META_COMMAND;
2264
2265                 j = 0;
2266                 tok = strtok(++p, delim);
2267
2268                 if (tok != NULL && pg_strcasecmp(tok, "set") == 0)
2269                         max_args = 2;
2270
2271                 while (tok != NULL)
2272                 {
2273                         my_commands->cols[j] = tok - buf + 1;
2274                         my_commands->argv[j++] = pg_strdup(tok);
2275                         my_commands->argc++;
2276                         if (max_args >= 0 && my_commands->argc >= max_args)
2277                                 tok = strtok(NULL, "");
2278                         else
2279                                 tok = strtok(NULL, delim);
2280                 }
2281
2282                 if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
2283                 {
2284                         /*
2285                          * parsing: \setrandom variable min max [uniform] \setrandom
2286                          * variable min max (gaussian|exponential) threshold
2287                          */
2288
2289                         if (my_commands->argc < 4)
2290                         {
2291                                 syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
2292                                                          "missing arguments", NULL, -1);
2293                         }
2294
2295                         /* argc >= 4 */
2296
2297                         if (my_commands->argc == 4 ||           /* uniform without/with
2298                                                                                                  * "uniform" keyword */
2299                                 (my_commands->argc == 5 &&
2300                                  pg_strcasecmp(my_commands->argv[4], "uniform") == 0))
2301                         {
2302                                 /* nothing to do */
2303                         }
2304                         else if (                       /* argc >= 5 */
2305                                          (pg_strcasecmp(my_commands->argv[4], "gaussian") == 0) ||
2306                                    (pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
2307                         {
2308                                 if (my_commands->argc < 6)
2309                                 {
2310                                         syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
2311                                          "missing threshold argument", my_commands->argv[4], -1);
2312                                 }
2313                                 else if (my_commands->argc > 6)
2314                                 {
2315                                         syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
2316                                                                  "too many arguments", my_commands->argv[4],
2317                                                                  my_commands->cols[6]);
2318                                 }
2319                         }
2320                         else    /* cannot parse, unexpected arguments */
2321                         {
2322                                 syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
2323                                                          "unexpected argument", my_commands->argv[4],
2324                                                          my_commands->cols[4]);
2325                         }
2326                 }
2327                 else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
2328                 {
2329                         if (my_commands->argc < 3)
2330                         {
2331                                 syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
2332                                                          "missing argument", NULL, -1);
2333                         }
2334
2335                         expr_scanner_init(my_commands->argv[2], source, lineno,
2336                                                           my_commands->line, my_commands->argv[0],
2337                                                           my_commands->cols[2] - 1);
2338
2339                         if (expr_yyparse() != 0)
2340                         {
2341                                 /* dead code: exit done from syntax_error called by yyerror */
2342                                 exit(1);
2343                         }
2344
2345                         my_commands->expr = expr_parse_result;
2346
2347                         expr_scanner_finish();
2348                 }
2349                 else if (pg_strcasecmp(my_commands->argv[0], "sleep") == 0)
2350                 {
2351                         if (my_commands->argc < 2)
2352                         {
2353                                 syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
2354                                                          "missing argument", NULL, -1);
2355                         }
2356
2357                         /*
2358                          * Split argument into number and unit to allow "sleep 1ms" etc.
2359                          * We don't have to terminate the number argument with null
2360                          * because it will be parsed with atoi, which ignores trailing
2361                          * non-digit characters.
2362                          */
2363                         if (my_commands->argv[1][0] != ':')
2364                         {
2365                                 char       *c = my_commands->argv[1];
2366
2367                                 while (isdigit((unsigned char) *c))
2368                                         c++;
2369                                 if (*c)
2370                                 {
2371                                         my_commands->argv[2] = c;
2372                                         if (my_commands->argc < 3)
2373                                                 my_commands->argc = 3;
2374                                 }
2375                         }
2376
2377                         if (my_commands->argc >= 3)
2378                         {
2379                                 if (pg_strcasecmp(my_commands->argv[2], "us") != 0 &&
2380                                         pg_strcasecmp(my_commands->argv[2], "ms") != 0 &&
2381                                         pg_strcasecmp(my_commands->argv[2], "s") != 0)
2382                                 {
2383                                         syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
2384                                                                  "unknown time unit, must be us, ms or s",
2385                                                                  my_commands->argv[2], my_commands->cols[2]);
2386                                 }
2387                         }
2388
2389                         /* this should be an error?! */
2390                         for (j = 3; j < my_commands->argc; j++)
2391                                 fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
2392                                                 my_commands->argv[0], my_commands->argv[j]);
2393                 }
2394                 else if (pg_strcasecmp(my_commands->argv[0], "setshell") == 0)
2395                 {
2396                         if (my_commands->argc < 3)
2397                         {
2398                                 syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
2399                                                          "missing argument", NULL, -1);
2400                         }
2401                 }
2402                 else if (pg_strcasecmp(my_commands->argv[0], "shell") == 0)
2403                 {
2404                         if (my_commands->argc < 1)
2405                         {
2406                                 syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
2407                                                          "missing command", NULL, -1);
2408                         }
2409                 }
2410                 else
2411                 {
2412                         syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
2413                                                  "invalid command", NULL, -1);
2414                 }
2415         }
2416         else
2417         {
2418                 my_commands->type = SQL_COMMAND;
2419
2420                 switch (querymode)
2421                 {
2422                         case QUERY_SIMPLE:
2423                                 my_commands->argv[0] = pg_strdup(p);
2424                                 my_commands->argc++;
2425                                 break;
2426                         case QUERY_EXTENDED:
2427                         case QUERY_PREPARED:
2428                                 if (!parseQuery(my_commands, p))
2429                                         exit(1);
2430                                 break;
2431                         default:
2432                                 exit(1);
2433                 }
2434         }
2435
2436         return my_commands;
2437 }
2438
2439 /*
2440  * Read a line from fd, and return it in a malloc'd buffer.
2441  * Return NULL at EOF.
2442  *
2443  * The buffer will typically be larger than necessary, but we don't care
2444  * in this program, because we'll free it as soon as we've parsed the line.
2445  */
2446 static char *
2447 read_line_from_file(FILE *fd)
2448 {
2449         char            tmpbuf[BUFSIZ];
2450         char       *buf;
2451         size_t          buflen = BUFSIZ;
2452         size_t          used = 0;
2453
2454         buf = (char *) palloc(buflen);
2455         buf[0] = '\0';
2456
2457         while (fgets(tmpbuf, BUFSIZ, fd) != NULL)
2458         {
2459                 size_t          thislen = strlen(tmpbuf);
2460
2461                 /* Append tmpbuf to whatever we had already */
2462                 memcpy(buf + used, tmpbuf, thislen + 1);
2463                 used += thislen;
2464
2465                 /* Done if we collected a newline */
2466                 if (thislen > 0 && tmpbuf[thislen - 1] == '\n')
2467                         break;
2468
2469                 /* Else, enlarge buf to ensure we can append next bufferload */
2470                 buflen += BUFSIZ;
2471                 buf = (char *) pg_realloc(buf, buflen);
2472         }
2473
2474         if (used > 0)
2475                 return buf;
2476
2477         /* Reached EOF */
2478         free(buf);
2479         return NULL;
2480 }
2481
2482 static int
2483 process_file(char *filename)
2484 {
2485 #define COMMANDS_ALLOC_NUM 128
2486
2487         Command   **my_commands;
2488         FILE       *fd;
2489         int                     lineno,
2490                                 index;
2491         char       *buf;
2492         int                     alloc_num;
2493
2494         if (num_files >= MAX_FILES)
2495         {
2496                 fprintf(stderr, "at most %d SQL files are allowed\n", MAX_FILES);
2497                 exit(1);
2498         }
2499
2500         alloc_num = COMMANDS_ALLOC_NUM;
2501         my_commands = (Command **) pg_malloc(sizeof(Command *) * alloc_num);
2502
2503         if (strcmp(filename, "-") == 0)
2504                 fd = stdin;
2505         else if ((fd = fopen(filename, "r")) == NULL)
2506         {
2507                 fprintf(stderr, "could not open file \"%s\": %s\n",
2508                                 filename, strerror(errno));
2509                 pg_free(my_commands);
2510                 return false;
2511         }
2512
2513         lineno = 0;
2514         index = 0;
2515
2516         while ((buf = read_line_from_file(fd)) != NULL)
2517         {
2518                 Command    *command;
2519
2520                 lineno += 1;
2521
2522                 command = process_commands(buf, filename, lineno);
2523
2524                 free(buf);
2525
2526                 if (command == NULL)
2527                         continue;
2528
2529                 my_commands[index] = command;
2530                 index++;
2531
2532                 if (index >= alloc_num)
2533                 {
2534                         alloc_num += COMMANDS_ALLOC_NUM;
2535                         my_commands = pg_realloc(my_commands, sizeof(Command *) * alloc_num);
2536                 }
2537         }
2538         fclose(fd);
2539
2540         my_commands[index] = NULL;
2541
2542         sql_files[num_files++] = my_commands;
2543
2544         return true;
2545 }
2546
2547 static Command **
2548 process_builtin(char *tb, const char *source)
2549 {
2550 #define COMMANDS_ALLOC_NUM 128
2551
2552         Command   **my_commands;
2553         int                     lineno,
2554                                 index;
2555         char            buf[BUFSIZ];
2556         int                     alloc_num;
2557
2558         alloc_num = COMMANDS_ALLOC_NUM;
2559         my_commands = (Command **) pg_malloc(sizeof(Command *) * alloc_num);
2560
2561         lineno = 0;
2562         index = 0;
2563
2564         for (;;)
2565         {
2566                 char       *p;
2567                 Command    *command;
2568
2569                 p = buf;
2570                 while (*tb && *tb != '\n')
2571                         *p++ = *tb++;
2572
2573                 if (*tb == '\0')
2574                         break;
2575
2576                 if (*tb == '\n')
2577                         tb++;
2578
2579                 *p = '\0';
2580
2581                 lineno += 1;
2582
2583                 command = process_commands(buf, source, lineno);
2584                 if (command == NULL)
2585                         continue;
2586
2587                 my_commands[index] = command;
2588                 index++;
2589
2590                 if (index >= alloc_num)
2591                 {
2592                         alloc_num += COMMANDS_ALLOC_NUM;
2593                         my_commands = pg_realloc(my_commands, sizeof(Command *) * alloc_num);
2594                 }
2595         }
2596
2597         my_commands[index] = NULL;
2598
2599         return my_commands;
2600 }
2601
2602 /* print out results */
2603 static void
2604 printResults(int ttype, int64 normal_xacts, int nclients,
2605                          TState *threads, int nthreads,
2606                          instr_time total_time, instr_time conn_total_time,
2607                          int64 total_latencies, int64 total_sqlats,
2608                          int64 throttle_lag, int64 throttle_lag_max,
2609                          int64 throttle_latency_skipped, int64 latency_late)
2610 {
2611         double          time_include,
2612                                 tps_include,
2613                                 tps_exclude;
2614         char       *s;
2615
2616         time_include = INSTR_TIME_GET_DOUBLE(total_time);
2617         tps_include = normal_xacts / time_include;
2618         tps_exclude = normal_xacts / (time_include -
2619                                                 (INSTR_TIME_GET_DOUBLE(conn_total_time) / nclients));
2620
2621         if (ttype == 0)
2622                 s = "TPC-B (sort of)";
2623         else if (ttype == 2)
2624                 s = "Update only pgbench_accounts";
2625         else if (ttype == 1)
2626                 s = "SELECT only";
2627         else
2628                 s = "Custom query";
2629
2630         printf("transaction type: %s\n", s);
2631         printf("scaling factor: %d\n", scale);
2632         printf("query mode: %s\n", QUERYMODE[querymode]);
2633         printf("number of clients: %d\n", nclients);
2634         printf("number of threads: %d\n", nthreads);
2635         if (duration <= 0)
2636         {
2637                 printf("number of transactions per client: %d\n", nxacts);
2638                 printf("number of transactions actually processed: " INT64_FORMAT "/" INT64_FORMAT "\n",
2639                            normal_xacts, (int64) nxacts * nclients);
2640         }
2641         else
2642         {
2643                 printf("duration: %d s\n", duration);
2644                 printf("number of transactions actually processed: " INT64_FORMAT "\n",
2645                            normal_xacts);
2646         }
2647
2648         /* Remaining stats are nonsensical if we failed to execute any xacts */
2649         if (normal_xacts <= 0)
2650                 return;
2651
2652         if (throttle_delay && latency_limit)
2653                 printf("number of transactions skipped: " INT64_FORMAT " (%.3f %%)\n",
2654                            throttle_latency_skipped,
2655                            100.0 * throttle_latency_skipped / (throttle_latency_skipped + normal_xacts));
2656
2657         if (latency_limit)
2658                 printf("number of transactions above the %.1f ms latency limit: " INT64_FORMAT " (%.3f %%)\n",
2659                            latency_limit / 1000.0, latency_late,
2660                    100.0 * latency_late / (throttle_latency_skipped + normal_xacts));
2661
2662         if (throttle_delay || progress || latency_limit)
2663         {
2664                 /* compute and show latency average and standard deviation */
2665                 double          latency = 0.001 * total_latencies / normal_xacts;
2666                 double          sqlat = (double) total_sqlats / normal_xacts;
2667
2668                 printf("latency average: %.3f ms\n"
2669                            "latency stddev: %.3f ms\n",
2670                            latency, 0.001 * sqrt(sqlat - 1000000.0 * latency * latency));
2671         }
2672         else
2673         {
2674                 /* only an average latency computed from the duration is available */
2675                 printf("latency average: %.3f ms\n",
2676                            1000.0 * duration * nclients / normal_xacts);
2677         }
2678
2679         if (throttle_delay)
2680         {
2681                 /*
2682                  * Report average transaction lag under rate limit throttling.  This
2683                  * is the delay between scheduled and actual start times for the
2684                  * transaction.  The measured lag may be caused by thread/client load,
2685                  * the database load, or the Poisson throttling process.
2686                  */
2687                 printf("rate limit schedule lag: avg %.3f (max %.3f) ms\n",
2688                            0.001 * throttle_lag / normal_xacts, 0.001 * throttle_lag_max);
2689         }
2690
2691         printf("tps = %f (including connections establishing)\n", tps_include);
2692         printf("tps = %f (excluding connections establishing)\n", tps_exclude);
2693
2694         /* Report per-command latencies */
2695         if (is_latencies)
2696         {
2697                 int                     i;
2698
2699                 for (i = 0; i < num_files; i++)
2700                 {
2701                         Command   **commands;
2702
2703                         if (num_files > 1)
2704                                 printf("statement latencies in milliseconds, file %d:\n", i + 1);
2705                         else
2706                                 printf("statement latencies in milliseconds:\n");
2707
2708                         for (commands = sql_files[i]; *commands != NULL; commands++)
2709                         {
2710                                 Command    *command = *commands;
2711                                 int                     cnum = command->command_num;
2712                                 double          total_time;
2713                                 instr_time      total_exec_elapsed;
2714                                 int                     total_exec_count;
2715                                 int                     t;
2716
2717                                 /* Accumulate per-thread data for command */
2718                                 INSTR_TIME_SET_ZERO(total_exec_elapsed);
2719                                 total_exec_count = 0;
2720                                 for (t = 0; t < nthreads; t++)
2721                                 {
2722                                         TState     *thread = &threads[t];
2723
2724                                         INSTR_TIME_ADD(total_exec_elapsed,
2725                                                                    thread->exec_elapsed[cnum]);
2726                                         total_exec_count += thread->exec_count[cnum];
2727                                 }
2728
2729                                 if (total_exec_count > 0)
2730                                         total_time = INSTR_TIME_GET_MILLISEC(total_exec_elapsed) / (double) total_exec_count;
2731                                 else
2732                                         total_time = 0.0;
2733
2734                                 printf("\t%f\t%s\n", total_time, command->line);
2735                         }
2736                 }
2737         }
2738 }
2739
2740
2741 int
2742 main(int argc, char **argv)
2743 {
2744         static struct option long_options[] = {
2745                 /* systematic long/short named options */
2746                 {"client", required_argument, NULL, 'c'},
2747                 {"connect", no_argument, NULL, 'C'},
2748                 {"debug", no_argument, NULL, 'd'},
2749                 {"define", required_argument, NULL, 'D'},
2750                 {"file", required_argument, NULL, 'f'},
2751                 {"fillfactor", required_argument, NULL, 'F'},
2752                 {"host", required_argument, NULL, 'h'},
2753                 {"initialize", no_argument, NULL, 'i'},
2754                 {"jobs", required_argument, NULL, 'j'},
2755                 {"log", no_argument, NULL, 'l'},
2756                 {"no-vacuum", no_argument, NULL, 'n'},
2757                 {"port", required_argument, NULL, 'p'},
2758                 {"progress", required_argument, NULL, 'P'},
2759                 {"protocol", required_argument, NULL, 'M'},
2760                 {"quiet", no_argument, NULL, 'q'},
2761                 {"report-latencies", no_argument, NULL, 'r'},
2762                 {"scale", required_argument, NULL, 's'},
2763                 {"select-only", no_argument, NULL, 'S'},
2764                 {"skip-some-updates", no_argument, NULL, 'N'},
2765                 {"time", required_argument, NULL, 'T'},
2766                 {"transactions", required_argument, NULL, 't'},
2767                 {"username", required_argument, NULL, 'U'},
2768                 {"vacuum-all", no_argument, NULL, 'v'},
2769                 /* long-named only options */
2770                 {"foreign-keys", no_argument, &foreign_keys, 1},
2771                 {"index-tablespace", required_argument, NULL, 3},
2772                 {"tablespace", required_argument, NULL, 2},
2773                 {"unlogged-tables", no_argument, &unlogged_tables, 1},
2774                 {"sampling-rate", required_argument, NULL, 4},
2775                 {"aggregate-interval", required_argument, NULL, 5},
2776                 {"rate", required_argument, NULL, 'R'},
2777                 {"latency-limit", required_argument, NULL, 'L'},
2778                 {"progress-timestamp", no_argument, NULL, 6},
2779                 {NULL, 0, NULL, 0}
2780         };
2781
2782         int                     c;
2783         int                     nclients = 1;   /* default number of simulated clients */
2784         int                     nthreads = 1;   /* default number of threads */
2785         int                     is_init_mode = 0;               /* initialize mode? */
2786         int                     is_no_vacuum = 0;               /* no vacuum at all before testing? */
2787         int                     do_vacuum_accounts = 0; /* do vacuum accounts before testing? */
2788         int                     ttype = 0;              /* transaction type. 0: TPC-B, 1: SELECT only,
2789                                                                  * 2: skip update of branches and tellers */
2790         int                     optindex;
2791         char       *filename = NULL;
2792         bool            scale_given = false;
2793
2794         bool            benchmarking_option_set = false;
2795         bool            initialization_option_set = false;
2796
2797         CState     *state;                      /* status of clients */
2798         TState     *threads;            /* array of thread */
2799
2800         instr_time      start_time;             /* start up time */
2801         instr_time      total_time;
2802         instr_time      conn_total_time;
2803         int64           total_xacts = 0;
2804         int64           total_latencies = 0;
2805         int64           total_sqlats = 0;
2806         int64           throttle_lag = 0;
2807         int64           throttle_lag_max = 0;
2808         int64           throttle_latency_skipped = 0;
2809         int64           latency_late = 0;
2810
2811         int                     i;
2812         int                     nclients_dealt;
2813
2814 #ifdef HAVE_GETRLIMIT
2815         struct rlimit rlim;
2816 #endif
2817
2818         PGconn     *con;
2819         PGresult   *res;
2820         char       *env;
2821
2822         char            val[64];
2823
2824         progname = get_progname(argv[0]);
2825
2826         if (argc > 1)
2827         {
2828                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
2829                 {
2830                         usage();
2831                         exit(0);
2832                 }
2833                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
2834                 {
2835                         puts("pgbench (PostgreSQL) " PG_VERSION);
2836                         exit(0);
2837                 }
2838         }
2839
2840 #ifdef WIN32
2841         /* stderr is buffered on Win32. */
2842         setvbuf(stderr, NULL, _IONBF, 0);
2843 #endif
2844
2845         if ((env = getenv("PGHOST")) != NULL && *env != '\0')
2846                 pghost = env;
2847         if ((env = getenv("PGPORT")) != NULL && *env != '\0')
2848                 pgport = env;
2849         else if ((env = getenv("PGUSER")) != NULL && *env != '\0')
2850                 login = env;
2851
2852         state = (CState *) pg_malloc(sizeof(CState));
2853         memset(state, 0, sizeof(CState));
2854
2855         while ((c = getopt_long(argc, argv, "ih:nvp:dqSNc:j:Crs:t:T:U:lf:D:F:M:P:R:L:", long_options, &optindex)) != -1)
2856         {
2857                 switch (c)
2858                 {
2859                         case 'i':
2860                                 is_init_mode++;
2861                                 break;
2862                         case 'h':
2863                                 pghost = pg_strdup(optarg);
2864                                 break;
2865                         case 'n':
2866                                 is_no_vacuum++;
2867                                 break;
2868                         case 'v':
2869                                 do_vacuum_accounts++;
2870                                 break;
2871                         case 'p':
2872                                 pgport = pg_strdup(optarg);
2873                                 break;
2874                         case 'd':
2875                                 debug++;
2876                                 break;
2877                         case 'S':
2878                                 ttype = 1;
2879                                 benchmarking_option_set = true;
2880                                 break;
2881                         case 'N':
2882                                 ttype = 2;
2883                                 benchmarking_option_set = true;
2884                                 break;
2885                         case 'c':
2886                                 benchmarking_option_set = true;
2887                                 nclients = atoi(optarg);
2888                                 if (nclients <= 0 || nclients > MAXCLIENTS)
2889                                 {
2890                                         fprintf(stderr, "invalid number of clients: \"%s\"\n",
2891                                                         optarg);
2892                                         exit(1);
2893                                 }
2894 #ifdef HAVE_GETRLIMIT
2895 #ifdef RLIMIT_NOFILE                    /* most platforms use RLIMIT_NOFILE */
2896                                 if (getrlimit(RLIMIT_NOFILE, &rlim) == -1)
2897 #else                                                   /* but BSD doesn't ... */
2898                                 if (getrlimit(RLIMIT_OFILE, &rlim) == -1)
2899 #endif   /* RLIMIT_NOFILE */
2900                                 {
2901                                         fprintf(stderr, "getrlimit failed: %s\n", strerror(errno));
2902                                         exit(1);
2903                                 }
2904                                 if (rlim.rlim_cur < nclients + 3)
2905                                 {
2906                                         fprintf(stderr, "need at least %d open files, but system limit is %ld\n",
2907                                                         nclients + 3, (long) rlim.rlim_cur);
2908                                         fprintf(stderr, "Reduce number of clients, or use limit/ulimit to increase the system limit.\n");
2909                                         exit(1);
2910                                 }
2911 #endif   /* HAVE_GETRLIMIT */
2912                                 break;
2913                         case 'j':                       /* jobs */
2914                                 benchmarking_option_set = true;
2915                                 nthreads = atoi(optarg);
2916                                 if (nthreads <= 0)
2917                                 {
2918                                         fprintf(stderr, "invalid number of threads: \"%s\"\n",
2919                                                         optarg);
2920                                         exit(1);
2921                                 }
2922 #ifndef ENABLE_THREAD_SAFETY
2923                                 if (nthreads != 1)
2924                                 {
2925                                         fprintf(stderr, "threads are not supported on this platform; use -j1\n");
2926                                         exit(1);
2927                                 }
2928 #endif   /* !ENABLE_THREAD_SAFETY */
2929                                 break;
2930                         case 'C':
2931                                 benchmarking_option_set = true;
2932                                 is_connect = true;
2933                                 break;
2934                         case 'r':
2935                                 benchmarking_option_set = true;
2936                                 is_latencies = true;
2937                                 break;
2938                         case 's':
2939                                 scale_given = true;
2940                                 scale = atoi(optarg);
2941                                 if (scale <= 0)
2942                                 {
2943                                         fprintf(stderr, "invalid scaling factor: \"%s\"\n", optarg);
2944                                         exit(1);
2945                                 }
2946                                 break;
2947                         case 't':
2948                                 benchmarking_option_set = true;
2949                                 if (duration > 0)
2950                                 {
2951                                         fprintf(stderr, "specify either a number of transactions (-t) or a duration (-T), not both\n");
2952                                         exit(1);
2953                                 }
2954                                 nxacts = atoi(optarg);
2955                                 if (nxacts <= 0)
2956                                 {
2957                                         fprintf(stderr, "invalid number of transactions: \"%s\"\n",
2958                                                         optarg);
2959                                         exit(1);
2960                                 }
2961                                 break;
2962                         case 'T':
2963                                 benchmarking_option_set = true;
2964                                 if (nxacts > 0)
2965                                 {
2966                                         fprintf(stderr, "specify either a number of transactions (-t) or a duration (-T), not both\n");
2967                                         exit(1);
2968                                 }
2969                                 duration = atoi(optarg);
2970                                 if (duration <= 0)
2971                                 {
2972                                         fprintf(stderr, "invalid duration: \"%s\"\n", optarg);
2973                                         exit(1);
2974                                 }
2975                                 break;
2976                         case 'U':
2977                                 login = pg_strdup(optarg);
2978                                 break;
2979                         case 'l':
2980                                 benchmarking_option_set = true;
2981                                 use_log = true;
2982                                 break;
2983                         case 'q':
2984                                 initialization_option_set = true;
2985                                 use_quiet = true;
2986                                 break;
2987                         case 'f':
2988                                 benchmarking_option_set = true;
2989                                 ttype = 3;
2990                                 filename = pg_strdup(optarg);
2991                                 if (process_file(filename) == false || *sql_files[num_files - 1] == NULL)
2992                                         exit(1);
2993                                 break;
2994                         case 'D':
2995                                 {
2996                                         char       *p;
2997
2998                                         benchmarking_option_set = true;
2999
3000                                         if ((p = strchr(optarg, '=')) == NULL || p == optarg || *(p + 1) == '\0')
3001                                         {
3002                                                 fprintf(stderr, "invalid variable definition: \"%s\"\n",
3003                                                                 optarg);
3004                                                 exit(1);
3005                                         }
3006
3007                                         *p++ = '\0';
3008                                         if (!putVariable(&state[0], "option", optarg, p))
3009                                                 exit(1);
3010                                 }
3011                                 break;
3012                         case 'F':
3013                                 initialization_option_set = true;
3014                                 fillfactor = atoi(optarg);
3015                                 if (fillfactor < 10 || fillfactor > 100)
3016                                 {
3017                                         fprintf(stderr, "invalid fillfactor: \"%s\"\n", optarg);
3018                                         exit(1);
3019                                 }
3020                                 break;
3021                         case 'M':
3022                                 benchmarking_option_set = true;
3023                                 if (num_files > 0)
3024                                 {
3025                                         fprintf(stderr, "query mode (-M) should be specified before any transaction scripts (-f)\n");
3026                                         exit(1);
3027                                 }
3028                                 for (querymode = 0; querymode < NUM_QUERYMODE; querymode++)
3029                                         if (strcmp(optarg, QUERYMODE[querymode]) == 0)
3030                                                 break;
3031                                 if (querymode >= NUM_QUERYMODE)
3032                                 {
3033                                         fprintf(stderr, "invalid query mode (-M): \"%s\"\n",
3034                                                         optarg);
3035                                         exit(1);
3036                                 }
3037                                 break;
3038                         case 'P':
3039                                 benchmarking_option_set = true;
3040                                 progress = atoi(optarg);
3041                                 if (progress <= 0)
3042                                 {
3043                                         fprintf(stderr, "invalid thread progress delay: \"%s\"\n",
3044                                                         optarg);
3045                                         exit(1);
3046                                 }
3047                                 break;
3048                         case 'R':
3049                                 {
3050                                         /* get a double from the beginning of option value */
3051                                         double          throttle_value = atof(optarg);
3052
3053                                         benchmarking_option_set = true;
3054
3055                                         if (throttle_value <= 0.0)
3056                                         {
3057                                                 fprintf(stderr, "invalid rate limit: \"%s\"\n", optarg);
3058                                                 exit(1);
3059                                         }
3060                                         /* Invert rate limit into a time offset */
3061                                         throttle_delay = (int64) (1000000.0 / throttle_value);
3062                                 }
3063                                 break;
3064                         case 'L':
3065                                 {
3066                                         double          limit_ms = atof(optarg);
3067
3068                                         if (limit_ms <= 0.0)
3069                                         {
3070                                                 fprintf(stderr, "invalid latency limit: \"%s\"\n",
3071                                                                 optarg);
3072                                                 exit(1);
3073                                         }
3074                                         benchmarking_option_set = true;
3075                                         latency_limit = (int64) (limit_ms * 1000);
3076                                 }
3077                                 break;
3078                         case 0:
3079                                 /* This covers long options which take no argument. */
3080                                 if (foreign_keys || unlogged_tables)
3081                                         initialization_option_set = true;
3082                                 break;
3083                         case 2:                         /* tablespace */
3084                                 initialization_option_set = true;
3085                                 tablespace = pg_strdup(optarg);
3086                                 break;
3087                         case 3:                         /* index-tablespace */
3088                                 initialization_option_set = true;
3089                                 index_tablespace = pg_strdup(optarg);
3090                                 break;
3091                         case 4:
3092                                 benchmarking_option_set = true;
3093                                 sample_rate = atof(optarg);
3094                                 if (sample_rate <= 0.0 || sample_rate > 1.0)
3095                                 {
3096                                         fprintf(stderr, "invalid sampling rate: \"%s\"\n", optarg);
3097                                         exit(1);
3098                                 }
3099                                 break;
3100                         case 5:
3101 #ifdef WIN32
3102                                 fprintf(stderr, "--aggregate-interval is not currently supported on Windows\n");
3103                                 exit(1);
3104 #else
3105                                 benchmarking_option_set = true;
3106                                 agg_interval = atoi(optarg);
3107                                 if (agg_interval <= 0)
3108                                 {
3109                                         fprintf(stderr, "invalid number of seconds for aggregation: \"%s\"\n",
3110                                                         optarg);
3111                                         exit(1);
3112                                 }
3113 #endif
3114                                 break;
3115                         case 6:
3116                                 progress_timestamp = true;
3117                                 benchmarking_option_set = true;
3118                                 break;
3119                         default:
3120                                 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
3121                                 exit(1);
3122                                 break;
3123                 }
3124         }
3125
3126         /*
3127          * Don't need more threads than there are clients.  (This is not merely an
3128          * optimization; throttle_delay is calculated incorrectly below if some
3129          * threads have no clients assigned to them.)
3130          */
3131         if (nthreads > nclients)
3132                 nthreads = nclients;
3133
3134         /* compute a per thread delay */
3135         throttle_delay *= nthreads;
3136
3137         if (argc > optind)
3138                 dbName = argv[optind];
3139         else
3140         {
3141                 if ((env = getenv("PGDATABASE")) != NULL && *env != '\0')
3142                         dbName = env;
3143                 else if (login != NULL && *login != '\0')
3144                         dbName = login;
3145                 else
3146                         dbName = "";
3147         }
3148
3149         if (is_init_mode)
3150         {
3151                 if (benchmarking_option_set)
3152                 {
3153                         fprintf(stderr, "some of the specified options cannot be used in initialization (-i) mode\n");
3154                         exit(1);
3155                 }
3156
3157                 init(is_no_vacuum);
3158                 exit(0);
3159         }
3160         else
3161         {
3162                 if (initialization_option_set)
3163                 {
3164                         fprintf(stderr, "some of the specified options cannot be used in benchmarking mode\n");
3165                         exit(1);
3166                 }
3167         }
3168
3169         /* Use DEFAULT_NXACTS if neither nxacts nor duration is specified. */
3170         if (nxacts <= 0 && duration <= 0)
3171                 nxacts = DEFAULT_NXACTS;
3172
3173         /* --sampling-rate may be used only with -l */
3174         if (sample_rate > 0.0 && !use_log)
3175         {
3176                 fprintf(stderr, "log sampling (--sampling-rate) is allowed only when logging transactions (-l)\n");
3177                 exit(1);
3178         }
3179
3180         /* --sampling-rate may must not be used with --aggregate-interval */
3181         if (sample_rate > 0.0 && agg_interval > 0)
3182         {
3183                 fprintf(stderr, "log sampling (--sampling-rate) and aggregation (--aggregate-interval) cannot be used at the same time\n");
3184                 exit(1);
3185         }
3186
3187         if (agg_interval > 0 && !use_log)
3188         {
3189                 fprintf(stderr, "log aggregation is allowed only when actually logging transactions\n");
3190                 exit(1);
3191         }
3192
3193         if (duration > 0 && agg_interval > duration)
3194         {
3195                 fprintf(stderr, "number of seconds for aggregation (%d) must not be higher than test duration (%d)\n", agg_interval, duration);
3196                 exit(1);
3197         }
3198
3199         if (duration > 0 && agg_interval > 0 && duration % agg_interval != 0)
3200         {
3201                 fprintf(stderr, "duration (%d) must be a multiple of aggregation interval (%d)\n", duration, agg_interval);
3202                 exit(1);
3203         }
3204
3205         /*
3206          * save main process id in the global variable because process id will be
3207          * changed after fork.
3208          */
3209         main_pid = (int) getpid();
3210         progress_nclients = nclients;
3211         progress_nthreads = nthreads;
3212
3213         if (nclients > 1)
3214         {
3215                 state = (CState *) pg_realloc(state, sizeof(CState) * nclients);
3216                 memset(state + 1, 0, sizeof(CState) * (nclients - 1));
3217
3218                 /* copy any -D switch values to all clients */
3219                 for (i = 1; i < nclients; i++)
3220                 {
3221                         int                     j;
3222
3223                         state[i].id = i;
3224                         for (j = 0; j < state[0].nvariables; j++)
3225                         {
3226                                 if (!putVariable(&state[i], "startup", state[0].variables[j].name, state[0].variables[j].value))
3227                                         exit(1);
3228                         }
3229                 }
3230         }
3231
3232         if (debug)
3233         {
3234                 if (duration <= 0)
3235                         printf("pghost: %s pgport: %s nclients: %d nxacts: %d dbName: %s\n",
3236                                    pghost, pgport, nclients, nxacts, dbName);
3237                 else
3238                         printf("pghost: %s pgport: %s nclients: %d duration: %d dbName: %s\n",
3239                                    pghost, pgport, nclients, duration, dbName);
3240         }
3241
3242         /* opening connection... */
3243         con = doConnect();
3244         if (con == NULL)
3245                 exit(1);
3246
3247         if (PQstatus(con) == CONNECTION_BAD)
3248         {
3249                 fprintf(stderr, "connection to database \"%s\" failed\n", dbName);
3250                 fprintf(stderr, "%s", PQerrorMessage(con));
3251                 exit(1);
3252         }
3253
3254         if (ttype != 3)
3255         {
3256                 /*
3257                  * get the scaling factor that should be same as count(*) from
3258                  * pgbench_branches if this is not a custom query
3259                  */
3260                 res = PQexec(con, "select count(*) from pgbench_branches");
3261                 if (PQresultStatus(res) != PGRES_TUPLES_OK)
3262                 {
3263                         char       *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
3264
3265                         fprintf(stderr, "%s", PQerrorMessage(con));
3266                         if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
3267                         {
3268                                 fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
3269                         }
3270
3271                         exit(1);
3272                 }
3273                 scale = atoi(PQgetvalue(res, 0, 0));
3274                 if (scale < 0)
3275                 {
3276                         fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
3277                                         PQgetvalue(res, 0, 0));
3278                         exit(1);
3279                 }
3280                 PQclear(res);
3281
3282                 /* warn if we override user-given -s switch */
3283                 if (scale_given)
3284                         fprintf(stderr,
3285                                         "scale option ignored, using count from pgbench_branches table (%d)\n",
3286                                         scale);
3287         }
3288
3289         /*
3290          * :scale variables normally get -s or database scale, but don't override
3291          * an explicit -D switch
3292          */
3293         if (getVariable(&state[0], "scale") == NULL)
3294         {
3295                 snprintf(val, sizeof(val), "%d", scale);
3296                 for (i = 0; i < nclients; i++)
3297                 {
3298                         if (!putVariable(&state[i], "startup", "scale", val))
3299                                 exit(1);
3300                 }
3301         }
3302
3303         /*
3304          * Define a :client_id variable that is unique per connection. But don't
3305          * override an explicit -D switch.
3306          */
3307         if (getVariable(&state[0], "client_id") == NULL)
3308         {
3309                 for (i = 0; i < nclients; i++)
3310                 {
3311                         snprintf(val, sizeof(val), "%d", i);
3312                         if (!putVariable(&state[i], "startup", "client_id", val))
3313                                 exit(1);
3314                 }
3315         }
3316
3317         if (!is_no_vacuum)
3318         {
3319                 fprintf(stderr, "starting vacuum...");
3320                 tryExecuteStatement(con, "vacuum pgbench_branches");
3321                 tryExecuteStatement(con, "vacuum pgbench_tellers");
3322                 tryExecuteStatement(con, "truncate pgbench_history");
3323                 fprintf(stderr, "end.\n");
3324
3325                 if (do_vacuum_accounts)
3326                 {
3327                         fprintf(stderr, "starting vacuum pgbench_accounts...");
3328                         tryExecuteStatement(con, "vacuum analyze pgbench_accounts");
3329                         fprintf(stderr, "end.\n");
3330                 }
3331         }
3332         PQfinish(con);
3333
3334         /* set random seed */
3335         INSTR_TIME_SET_CURRENT(start_time);
3336         srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
3337
3338         /* process builtin SQL scripts */
3339         switch (ttype)
3340         {
3341                 case 0:
3342                         sql_files[0] = process_builtin(tpc_b,
3343                                                                                    "<builtin: TPC-B (sort of)>");
3344                         num_files = 1;
3345                         break;
3346
3347                 case 1:
3348                         sql_files[0] = process_builtin(select_only,
3349                                                                                    "<builtin: select only>");
3350                         num_files = 1;
3351                         break;
3352
3353                 case 2:
3354                         sql_files[0] = process_builtin(simple_update,
3355                                                                                    "<builtin: simple update>");
3356                         num_files = 1;
3357                         break;
3358
3359                 default:
3360                         break;
3361         }
3362
3363         /* set up thread data structures */
3364         threads = (TState *) pg_malloc(sizeof(TState) * nthreads);
3365         nclients_dealt = 0;
3366
3367         for (i = 0; i < nthreads; i++)
3368         {
3369                 TState     *thread = &threads[i];
3370
3371                 thread->tid = i;
3372                 thread->state = &state[nclients_dealt];
3373                 thread->nstate =
3374                         (nclients - nclients_dealt + nthreads - i - 1) / (nthreads - i);
3375                 thread->random_state[0] = random();
3376                 thread->random_state[1] = random();
3377                 thread->random_state[2] = random();
3378                 thread->throttle_latency_skipped = 0;
3379                 thread->latency_late = 0;
3380
3381                 nclients_dealt += thread->nstate;
3382
3383                 if (is_latencies)
3384                 {
3385                         /* Reserve memory for the thread to store per-command latencies */
3386                         int                     t;
3387
3388                         thread->exec_elapsed = (instr_time *)
3389                                 pg_malloc(sizeof(instr_time) * num_commands);
3390                         thread->exec_count = (int *)
3391                                 pg_malloc(sizeof(int) * num_commands);
3392
3393                         for (t = 0; t < num_commands; t++)
3394                         {
3395                                 INSTR_TIME_SET_ZERO(thread->exec_elapsed[t]);
3396                                 thread->exec_count[t] = 0;
3397                         }
3398                 }
3399                 else
3400                 {
3401                         thread->exec_elapsed = NULL;
3402                         thread->exec_count = NULL;
3403                 }
3404         }
3405
3406         /* all clients must be assigned to a thread */
3407         Assert(nclients_dealt == nclients);
3408
3409         /* get start up time */
3410         INSTR_TIME_SET_CURRENT(start_time);
3411
3412         /* set alarm if duration is specified. */
3413         if (duration > 0)
3414                 setalarm(duration);
3415
3416         /* start threads */
3417 #ifdef ENABLE_THREAD_SAFETY
3418         for (i = 0; i < nthreads; i++)
3419         {
3420                 TState     *thread = &threads[i];
3421
3422                 INSTR_TIME_SET_CURRENT(thread->start_time);
3423
3424                 /* the first thread (i = 0) is executed by main thread */
3425                 if (i > 0)
3426                 {
3427                         int                     err = pthread_create(&thread->thread, NULL, threadRun, thread);
3428
3429                         if (err != 0 || thread->thread == INVALID_THREAD)
3430                         {
3431                                 fprintf(stderr, "could not create thread: %s\n", strerror(err));
3432                                 exit(1);
3433                         }
3434                 }
3435                 else
3436                 {
3437                         thread->thread = INVALID_THREAD;
3438                 }
3439         }
3440 #else
3441         INSTR_TIME_SET_CURRENT(threads[0].start_time);
3442         threads[0].thread = INVALID_THREAD;
3443 #endif   /* ENABLE_THREAD_SAFETY */
3444
3445         /* wait for threads and accumulate results */
3446         INSTR_TIME_SET_ZERO(conn_total_time);
3447         for (i = 0; i < nthreads; i++)
3448         {
3449                 TState     *thread = &threads[i];
3450                 int                     j;
3451
3452 #ifdef ENABLE_THREAD_SAFETY
3453                 if (threads[i].thread == INVALID_THREAD)
3454                         /* actually run this thread directly in the main thread */
3455                         (void) threadRun(thread);
3456                 else
3457                         /* wait of other threads. should check that 0 is returned? */
3458                         pthread_join(thread->thread, NULL);
3459 #else
3460                 (void) threadRun(thread);
3461 #endif   /* ENABLE_THREAD_SAFETY */
3462
3463                 /* thread level stats */
3464                 throttle_lag += thread->throttle_lag;
3465                 throttle_latency_skipped += threads->throttle_latency_skipped;
3466                 latency_late += thread->latency_late;
3467                 if (throttle_lag_max > thread->throttle_lag_max)
3468                         throttle_lag_max = thread->throttle_lag_max;
3469                 INSTR_TIME_ADD(conn_total_time, thread->conn_time);
3470
3471                 /* client-level stats */
3472                 for (j = 0; j < thread->nstate; j++)
3473                 {
3474                         total_xacts += thread->state[j].cnt;
3475                         total_latencies += thread->state[j].txn_latencies;
3476                         total_sqlats += thread->state[j].txn_sqlats;
3477                 }
3478         }
3479         disconnect_all(state, nclients);
3480
3481         /*
3482          * XXX We compute results as though every client of every thread started
3483          * and finished at the same time.  That model can diverge noticeably from
3484          * reality for a short benchmark run involving relatively many threads.
3485          * The first thread may process notably many transactions before the last
3486          * thread begins.  Improving the model alone would bring limited benefit,
3487          * because performance during those periods of partial thread count can
3488          * easily exceed steady state performance.  This is one of the many ways
3489          * short runs convey deceptive performance figures.
3490          */
3491         INSTR_TIME_SET_CURRENT(total_time);
3492         INSTR_TIME_SUBTRACT(total_time, start_time);
3493         printResults(ttype, total_xacts, nclients, threads, nthreads,
3494                                  total_time, conn_total_time, total_latencies, total_sqlats,
3495                                  throttle_lag, throttle_lag_max, throttle_latency_skipped,
3496                                  latency_late);
3497
3498         return 0;
3499 }
3500
3501 static void *
3502 threadRun(void *arg)
3503 {
3504         TState     *thread = (TState *) arg;
3505         CState     *state = thread->state;
3506         FILE       *logfile = NULL; /* per-thread log file */
3507         instr_time      start,
3508                                 end;
3509         int                     nstate = thread->nstate;
3510         int                     remains = nstate;               /* number of remaining clients */
3511         int                     i;
3512
3513         /* for reporting progress: */
3514         int64           thread_start = INSTR_TIME_GET_MICROSEC(thread->start_time);
3515         int64           last_report = thread_start;
3516         int64           next_report = last_report + (int64) progress * 1000000;
3517         int64           last_count = 0,
3518                                 last_lats = 0,
3519                                 last_sqlats = 0,
3520                                 last_lags = 0,
3521                                 last_skipped = 0;
3522
3523         AggVals         aggs;
3524
3525         /*
3526          * Initialize throttling rate target for all of the thread's clients.  It
3527          * might be a little more accurate to reset thread->start_time here too.
3528          * The possible drift seems too small relative to typical throttle delay
3529          * times to worry about it.
3530          */
3531         INSTR_TIME_SET_CURRENT(start);
3532         thread->throttle_trigger = INSTR_TIME_GET_MICROSEC(start);
3533         thread->throttle_lag = 0;
3534         thread->throttle_lag_max = 0;
3535
3536         INSTR_TIME_SET_ZERO(thread->conn_time);
3537
3538         /* open log file if requested */
3539         if (use_log)
3540         {
3541                 char            logpath[64];
3542
3543                 if (thread->tid == 0)
3544                         snprintf(logpath, sizeof(logpath), "pgbench_log.%d", main_pid);
3545                 else
3546                         snprintf(logpath, sizeof(logpath), "pgbench_log.%d.%d", main_pid, thread->tid);
3547                 logfile = fopen(logpath, "w");
3548
3549                 if (logfile == NULL)
3550                 {
3551                         fprintf(stderr, "could not open logfile \"%s\": %s\n",
3552                                         logpath, strerror(errno));
3553                         goto done;
3554                 }
3555         }
3556
3557         if (!is_connect)
3558         {
3559                 /* make connections to the database */
3560                 for (i = 0; i < nstate; i++)
3561                 {
3562                         if ((state[i].con = doConnect()) == NULL)
3563                                 goto done;
3564                 }
3565         }
3566
3567         /* time after thread and connections set up */
3568         INSTR_TIME_SET_CURRENT(thread->conn_time);
3569         INSTR_TIME_SUBTRACT(thread->conn_time, thread->start_time);
3570
3571         agg_vals_init(&aggs, thread->start_time);
3572
3573         /* send start up queries in async manner */
3574         for (i = 0; i < nstate; i++)
3575         {
3576                 CState     *st = &state[i];
3577                 Command   **commands = sql_files[st->use_file];
3578                 int                     prev_ecnt = st->ecnt;
3579
3580                 st->use_file = getrand(thread, 0, num_files - 1);
3581                 if (!doCustom(thread, st, &thread->conn_time, logfile, &aggs))
3582                         remains--;                      /* I've aborted */
3583
3584                 if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND)
3585                 {
3586                         fprintf(stderr, "client %d aborted in state %d; execution of meta-command failed\n",
3587                                         i, st->state);
3588                         remains--;                      /* I've aborted */
3589                         PQfinish(st->con);
3590                         st->con = NULL;
3591                 }
3592         }
3593
3594         while (remains > 0)
3595         {
3596                 fd_set          input_mask;
3597                 int                     maxsock;        /* max socket number to be waited */
3598                 int64           now_usec = 0;
3599                 int64           min_usec;
3600
3601                 FD_ZERO(&input_mask);
3602
3603                 maxsock = -1;
3604                 min_usec = PG_INT64_MAX;
3605                 for (i = 0; i < nstate; i++)
3606                 {
3607                         CState     *st = &state[i];
3608                         Command   **commands = sql_files[st->use_file];
3609                         int                     sock;
3610
3611                         if (st->con == NULL)
3612                         {
3613                                 continue;
3614                         }
3615                         else if (st->sleeping)
3616                         {
3617                                 if (st->throttling && timer_exceeded)
3618                                 {
3619                                         /* interrupt client which has not started a transaction */
3620                                         remains--;
3621                                         st->sleeping = 0;
3622                                         st->throttling = false;
3623                                         PQfinish(st->con);
3624                                         st->con = NULL;
3625                                         continue;
3626                                 }
3627                                 else    /* just a nap from the script */
3628                                 {
3629                                         int                     this_usec;
3630
3631                                         if (min_usec == PG_INT64_MAX)
3632                                         {
3633                                                 instr_time      now;
3634
3635                                                 INSTR_TIME_SET_CURRENT(now);
3636                                                 now_usec = INSTR_TIME_GET_MICROSEC(now);
3637                                         }
3638
3639                                         this_usec = st->txn_scheduled - now_usec;
3640                                         if (min_usec > this_usec)
3641                                                 min_usec = this_usec;
3642                                 }
3643                         }
3644                         else if (commands[st->state]->type == META_COMMAND)
3645                         {
3646                                 min_usec = 0;   /* the connection is ready to run */
3647                                 break;
3648                         }
3649
3650                         sock = PQsocket(st->con);
3651                         if (sock < 0)
3652                         {
3653                                 fprintf(stderr, "bad socket: %s\n", strerror(errno));
3654                                 goto done;
3655                         }
3656
3657                         FD_SET(sock, &input_mask);
3658
3659                         if (maxsock < sock)
3660                                 maxsock = sock;
3661                 }
3662
3663                 /* also wake up to print the next progress report on time */
3664                 if (progress && min_usec > 0 && thread->tid == 0)
3665                 {
3666                         /* get current time if needed */
3667                         if (now_usec == 0)
3668                         {
3669                                 instr_time      now;
3670
3671                                 INSTR_TIME_SET_CURRENT(now);
3672                                 now_usec = INSTR_TIME_GET_MICROSEC(now);
3673                         }
3674
3675                         if (now_usec >= next_report)
3676                                 min_usec = 0;
3677                         else if ((next_report - now_usec) < min_usec)
3678                                 min_usec = next_report - now_usec;
3679                 }
3680
3681                 /*
3682                  * Sleep until we receive data from the server, or a nap-time
3683                  * specified in the script ends, or it's time to print a progress
3684                  * report.
3685                  */
3686                 if (min_usec > 0 && maxsock != -1)
3687                 {
3688                         int                     nsocks; /* return from select(2) */
3689
3690                         if (min_usec != PG_INT64_MAX)
3691                         {
3692                                 struct timeval timeout;
3693
3694                                 timeout.tv_sec = min_usec / 1000000;
3695                                 timeout.tv_usec = min_usec % 1000000;
3696                                 nsocks = select(maxsock + 1, &input_mask, NULL, NULL, &timeout);
3697                         }
3698                         else
3699                                 nsocks = select(maxsock + 1, &input_mask, NULL, NULL, NULL);
3700                         if (nsocks < 0)
3701                         {
3702                                 if (errno == EINTR)
3703                                         continue;
3704                                 /* must be something wrong */
3705                                 fprintf(stderr, "select() failed: %s\n", strerror(errno));
3706                                 goto done;
3707                         }
3708                 }
3709
3710                 /* ok, backend returns reply */
3711                 for (i = 0; i < nstate; i++)
3712                 {
3713                         CState     *st = &state[i];
3714                         Command   **commands = sql_files[st->use_file];
3715                         int                     prev_ecnt = st->ecnt;
3716
3717                         if (st->con && (FD_ISSET(PQsocket(st->con), &input_mask)
3718                                                         || commands[st->state]->type == META_COMMAND))
3719                         {
3720                                 if (!doCustom(thread, st, &thread->conn_time, logfile, &aggs))
3721                                         remains--;      /* I've aborted */
3722                         }
3723
3724                         if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND)
3725                         {
3726                                 fprintf(stderr, "client %d aborted in state %d; execution of meta-command failed\n",
3727                                                 i, st->state);
3728                                 remains--;              /* I've aborted */
3729                                 PQfinish(st->con);
3730                                 st->con = NULL;
3731                         }
3732                 }
3733
3734                 /* progress report by thread 0 for all threads */
3735                 if (progress && thread->tid == 0)
3736                 {
3737                         instr_time      now_time;
3738                         int64           now;
3739
3740                         INSTR_TIME_SET_CURRENT(now_time);
3741                         now = INSTR_TIME_GET_MICROSEC(now_time);
3742                         if (now >= next_report)
3743                         {
3744                                 /* generate and show report */
3745                                 int64           count = 0,
3746                                                         lats = 0,
3747                                                         sqlats = 0,
3748                                                         lags = 0,
3749                                                         skipped = 0;
3750                                 int64           run = now - last_report;
3751                                 double          tps,
3752                                                         total_run,
3753                                                         latency,
3754                                                         sqlat,
3755                                                         lag,
3756                                                         stdev;
3757                                 char            tbuf[64];
3758
3759                                 /*
3760                                  * Add up the statistics of all threads.
3761                                  *
3762                                  * XXX: No locking. There is no guarantee that we get an
3763                                  * atomic snapshot of the transaction count and latencies, so
3764                                  * these figures can well be off by a small amount. The
3765                                  * progress is report's purpose is to give a quick overview of
3766                                  * how the test is going, so that shouldn't matter too much.
3767                                  * (If a read from a 64-bit integer is not atomic, you might
3768                                  * get a "torn" read and completely bogus latencies though!)
3769                                  */
3770                                 for (i = 0; i < progress_nclients; i++)
3771                                 {
3772                                         count += state[i].cnt;
3773                                         lats += state[i].txn_latencies;
3774                                         sqlats += state[i].txn_sqlats;
3775                                 }
3776
3777                                 for (i = 0; i < progress_nthreads; i++)
3778                                 {
3779                                         skipped += thread[i].throttle_latency_skipped;
3780                                         lags += thread[i].throttle_lag;
3781                                 }
3782
3783                                 total_run = (now - thread_start) / 1000000.0;
3784                                 tps = 1000000.0 * (count - last_count) / run;
3785                                 latency = 0.001 * (lats - last_lats) / (count - last_count);
3786                                 sqlat = 1.0 * (sqlats - last_sqlats) / (count - last_count);
3787                                 stdev = 0.001 * sqrt(sqlat - 1000000.0 * latency * latency);
3788                                 lag = 0.001 * (lags - last_lags) / (count - last_count);
3789
3790                                 if (progress_timestamp)
3791                                         sprintf(tbuf, "%.03f s",
3792                                                         INSTR_TIME_GET_MILLISEC(now_time) / 1000.0);
3793                                 else
3794                                         sprintf(tbuf, "%.1f s", total_run);
3795
3796                                 fprintf(stderr,
3797                                                 "progress: %s, %.1f tps, lat %.3f ms stddev %.3f",
3798                                                 tbuf, tps, latency, stdev);
3799
3800                                 if (throttle_delay)
3801                                 {
3802                                         fprintf(stderr, ", lag %.3f ms", lag);
3803                                         if (latency_limit)
3804                                                 fprintf(stderr, ", " INT64_FORMAT " skipped",
3805                                                                 skipped - last_skipped);
3806                                 }
3807                                 fprintf(stderr, "\n");
3808
3809                                 last_count = count;
3810                                 last_lats = lats;
3811                                 last_sqlats = sqlats;
3812                                 last_lags = lags;
3813                                 last_report = now;
3814                                 last_skipped = skipped;
3815
3816                                 /*
3817                                  * Ensure that the next report is in the future, in case
3818                                  * pgbench/postgres got stuck somewhere.
3819                                  */
3820                                 do
3821                                 {
3822                                         next_report += (int64) progress *1000000;
3823                                 } while (now >= next_report);
3824                         }
3825                 }
3826         }
3827
3828 done:
3829         INSTR_TIME_SET_CURRENT(start);
3830         disconnect_all(state, nstate);
3831         INSTR_TIME_SET_CURRENT(end);
3832         INSTR_TIME_ACCUM_DIFF(thread->conn_time, end, start);
3833         if (logfile)
3834                 fclose(logfile);
3835         return NULL;
3836 }
3837
3838 /*
3839  * Support for duration option: set timer_exceeded after so many seconds.
3840  */
3841
3842 #ifndef WIN32
3843
3844 static void
3845 handle_sig_alarm(SIGNAL_ARGS)
3846 {
3847         timer_exceeded = true;
3848 }
3849
3850 static void
3851 setalarm(int seconds)
3852 {
3853         pqsignal(SIGALRM, handle_sig_alarm);
3854         alarm(seconds);
3855 }
3856
3857 #else                                                   /* WIN32 */
3858
3859 static VOID CALLBACK
3860 win32_timer_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
3861 {
3862         timer_exceeded = true;
3863 }
3864
3865 static void
3866 setalarm(int seconds)
3867 {
3868         HANDLE          queue;
3869         HANDLE          timer;
3870
3871         /* This function will be called at most once, so we can cheat a bit. */
3872         queue = CreateTimerQueue();
3873         if (seconds > ((DWORD) -1) / 1000 ||
3874                 !CreateTimerQueueTimer(&timer, queue,
3875                                                            win32_timer_callback, NULL, seconds * 1000, 0,
3876                                                            WT_EXECUTEINTIMERTHREAD | WT_EXECUTEONLYONCE))
3877         {
3878                 fprintf(stderr, "failed to set timer\n");
3879                 exit(1);
3880         }
3881 }
3882
3883 /* partial pthread implementation for Windows */
3884
3885 typedef struct win32_pthread
3886 {
3887         HANDLE          handle;
3888         void       *(*routine) (void *);
3889         void       *arg;
3890         void       *result;
3891 } win32_pthread;
3892
3893 static unsigned __stdcall
3894 win32_pthread_run(void *arg)
3895 {
3896         win32_pthread *th = (win32_pthread *) arg;
3897
3898         th->result = th->routine(th->arg);
3899
3900         return 0;
3901 }
3902
3903 static int
3904 pthread_create(pthread_t *thread,
3905                            pthread_attr_t *attr,
3906                            void *(*start_routine) (void *),
3907                            void *arg)
3908 {
3909         int                     save_errno;
3910         win32_pthread *th;
3911
3912         th = (win32_pthread *) pg_malloc(sizeof(win32_pthread));
3913         th->routine = start_routine;
3914         th->arg = arg;
3915         th->result = NULL;
3916
3917         th->handle = (HANDLE) _beginthreadex(NULL, 0, win32_pthread_run, th, 0, NULL);
3918         if (th->handle == NULL)
3919         {
3920                 save_errno = errno;
3921                 free(th);
3922                 return save_errno;
3923         }
3924
3925         *thread = th;
3926         return 0;
3927 }
3928
3929 static int
3930 pthread_join(pthread_t th, void **thread_return)
3931 {
3932         if (th == NULL || th->handle == NULL)
3933                 return errno = EINVAL;
3934
3935         if (WaitForSingleObject(th->handle, INFINITE) != WAIT_OBJECT_0)
3936         {
3937                 _dosmaperr(GetLastError());
3938                 return errno;
3939         }
3940
3941         if (thread_return)
3942                 *thread_return = th->result;
3943
3944         CloseHandle(th->handle);
3945         free(th);
3946         return 0;
3947 }
3948
3949 #endif   /* WIN32 */