]> granicus.if.org Git - postgresql/blob - contrib/pgbench/pgbench.c
Centralize definition of integer limits.
[postgresql] / contrib / pgbench / pgbench.c
1 /*
2  * pgbench.c
3  *
4  * A simple benchmark program for PostgreSQL
5  * Originally written by Tatsuo Ishii and enhanced by many contributors.
6  *
7  * contrib/pgbench/pgbench.c
8  * Copyright (c) 2000-2015, PostgreSQL Global Development Group
9  * ALL RIGHTS RESERVED;
10  *
11  * Permission to use, copy, modify, and distribute this software and its
12  * documentation for any purpose, without fee, and without a written agreement
13  * is hereby granted, provided that the above copyright notice and this
14  * paragraph and the following two paragraphs appear in all copies.
15  *
16  * IN NO EVENT SHALL THE AUTHOR OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR
17  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
18  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
19  * DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE
20  * POSSIBILITY OF SUCH DAMAGE.
21  *
22  * THE AUTHOR AND DISTRIBUTORS SPECIFICALLY DISCLAIMS ANY WARRANTIES,
23  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
24  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
25  * ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO
26  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
27  *
28  */
29
30 #ifdef WIN32
31 #define FD_SETSIZE 1024                 /* set before winsock2.h is included */
32 #endif   /* ! WIN32 */
33
34 #include "postgres_fe.h"
35
36 #include "getopt_long.h"
37 #include "libpq-fe.h"
38 #include "portability/instr_time.h"
39
40 #include <ctype.h>
41 #include <math.h>
42 #include <signal.h>
43 #include <sys/time.h>
44 #ifdef HAVE_SYS_SELECT_H
45 #include <sys/select.h>
46 #endif
47
48 #ifdef HAVE_SYS_RESOURCE_H
49 #include <sys/resource.h>               /* for getrlimit */
50 #endif
51
52 #ifndef M_PI
53 #define M_PI 3.14159265358979323846
54 #endif
55
56 #include "pgbench.h"
57
58 /*
59  * Multi-platform pthread implementations
60  */
61
62 #ifdef WIN32
63 /* Use native win32 threads on Windows */
64 typedef struct win32_pthread *pthread_t;
65 typedef int pthread_attr_t;
66
67 static int      pthread_create(pthread_t *thread, pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
68 static int      pthread_join(pthread_t th, void **thread_return);
69 #elif defined(ENABLE_THREAD_SAFETY)
70 /* Use platform-dependent pthread capability */
71 #include <pthread.h>
72 #else
73 /* Use emulation with fork. Rename pthread identifiers to avoid conflicts */
74 #define PTHREAD_FORK_EMULATION
75 #include <sys/wait.h>
76
77 #define pthread_t                               pg_pthread_t
78 #define pthread_attr_t                  pg_pthread_attr_t
79 #define pthread_create                  pg_pthread_create
80 #define pthread_join                    pg_pthread_join
81
82 typedef struct fork_pthread *pthread_t;
83 typedef int pthread_attr_t;
84
85 static int      pthread_create(pthread_t *thread, pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
86 static int      pthread_join(pthread_t th, void **thread_return);
87 #endif
88
89
90 /********************************************************************
91  * some configurable parameters */
92
93 /* max number of clients allowed */
94 #ifdef FD_SETSIZE
95 #define MAXCLIENTS      (FD_SETSIZE - 10)
96 #else
97 #define MAXCLIENTS      1024
98 #endif
99
100 #define LOG_STEP_SECONDS        5       /* seconds between log messages */
101 #define DEFAULT_NXACTS  10              /* default nxacts */
102
103 #define MIN_GAUSSIAN_THRESHOLD          2.0     /* minimum threshold for gauss */
104
105 int                     nxacts = 0;                     /* number of transactions per client */
106 int                     duration = 0;           /* duration in seconds */
107
108 /*
109  * scaling factor. for example, scale = 10 will make 1000000 tuples in
110  * pgbench_accounts table.
111  */
112 int                     scale = 1;
113
114 /*
115  * fillfactor. for example, fillfactor = 90 will use only 90 percent
116  * space during inserts and leave 10 percent free.
117  */
118 int                     fillfactor = 100;
119
120 /*
121  * create foreign key constraints on the tables?
122  */
123 int                     foreign_keys = 0;
124
125 /*
126  * use unlogged tables?
127  */
128 int                     unlogged_tables = 0;
129
130 /*
131  * log sampling rate (1.0 = log everything, 0.0 = option not given)
132  */
133 double          sample_rate = 0.0;
134
135 /*
136  * When threads are throttled to a given rate limit, this is the target delay
137  * to reach that rate in usec.  0 is the default and means no throttling.
138  */
139 int64           throttle_delay = 0;
140
141 /*
142  * Transactions which take longer than this limit (in usec) are counted as
143  * late, and reported as such, although they are completed anyway. When
144  * throttling is enabled, execution time slots that are more than this late
145  * are skipped altogether, and counted separately.
146  */
147 int64           latency_limit = 0;
148
149 /*
150  * tablespace selection
151  */
152 char       *tablespace = NULL;
153 char       *index_tablespace = NULL;
154
155 /*
156  * end of configurable parameters
157  *********************************************************************/
158
159 #define nbranches       1                       /* Makes little sense to change this.  Change
160                                                                  * -s instead */
161 #define ntellers        10
162 #define naccounts       100000
163
164 /*
165  * The scale factor at/beyond which 32bit integers are incapable of storing
166  * 64bit values.
167  *
168  * Although the actual threshold is 21474, we use 20000 because it is easier to
169  * document and remember, and isn't that far away from the real threshold.
170  */
171 #define SCALE_32BIT_THRESHOLD 20000
172
173 bool            use_log;                        /* log transaction latencies to a file */
174 bool            use_quiet;                      /* quiet logging onto stderr */
175 int                     agg_interval;           /* log aggregates instead of individual
176                                                                  * transactions */
177 int                     progress = 0;           /* thread progress report every this seconds */
178 int                     progress_nclients = 0;          /* number of clients for progress
179                                                                                  * report */
180 int                     progress_nthreads = 0;          /* number of threads for progress
181                                                                                  * report */
182 bool            is_connect;                     /* establish connection for each transaction */
183 bool            is_latencies;           /* report per-command latencies */
184 int                     main_pid;                       /* main process id used in log filename */
185
186 char       *pghost = "";
187 char       *pgport = "";
188 char       *login = NULL;
189 char       *dbName;
190 const char *progname;
191
192 volatile bool timer_exceeded = false;   /* flag from signal handler */
193
194 /* variable definitions */
195 typedef struct
196 {
197         char       *name;                       /* variable name */
198         char       *value;                      /* its value */
199 } Variable;
200
201 #define MAX_FILES               128             /* max number of SQL script files allowed */
202 #define SHELL_COMMAND_SIZE      256 /* maximum size allowed for shell command */
203
204 /*
205  * structures used in custom query mode
206  */
207
208 typedef struct
209 {
210         PGconn     *con;                        /* connection handle to DB */
211         int                     id;                             /* client No. */
212         int                     state;                  /* state No. */
213         int                     cnt;                    /* xacts count */
214         int                     ecnt;                   /* error count */
215         int                     listen;                 /* 0 indicates that an async query has been
216                                                                  * sent */
217         int                     sleeping;               /* 1 indicates that the client is napping */
218         bool            throttling;             /* whether nap is for throttling */
219         Variable   *variables;          /* array of variable definitions */
220         int                     nvariables;
221         int64           txn_scheduled;  /* scheduled start time of transaction (usec) */
222         instr_time      txn_begin;              /* used for measuring schedule lag times */
223         instr_time      stmt_begin;             /* used for measuring statement latencies */
224         int64           txn_latencies;  /* cumulated latencies */
225         int64           txn_sqlats;             /* cumulated square latencies */
226         bool            is_throttled;   /* whether transaction throttling is done */
227         int                     use_file;               /* index in sql_files for this client */
228         bool            prepared[MAX_FILES];
229 } CState;
230
231 /*
232  * Thread state and result
233  */
234 typedef struct
235 {
236         int                     tid;                    /* thread id */
237         pthread_t       thread;                 /* thread handle */
238         CState     *state;                      /* array of CState */
239         int                     nstate;                 /* length of state[] */
240         instr_time      start_time;             /* thread start time */
241         instr_time *exec_elapsed;       /* time spent executing cmds (per Command) */
242         int                *exec_count;         /* number of cmd executions (per Command) */
243         unsigned short random_state[3];         /* separate randomness for each thread */
244         int64           throttle_trigger;               /* previous/next throttling (us) */
245         int64           throttle_lag;   /* total transaction lag behind throttling */
246         int64           throttle_lag_max;               /* max transaction lag */
247         int64           throttle_latency_skipped; /* lagging transactions skipped */
248         int64           latency_late;   /* late transactions */
249 } TState;
250
251 #define INVALID_THREAD          ((pthread_t) 0)
252
253 typedef struct
254 {
255         instr_time      conn_time;
256         int64           xacts;
257         int64           latencies;
258         int64           sqlats;
259         int64           throttle_lag;
260         int64           throttle_lag_max;
261         int64           throttle_latency_skipped;
262         int64           latency_late;
263 } TResult;
264
265 /*
266  * queries read from files
267  */
268 #define SQL_COMMAND             1
269 #define META_COMMAND    2
270 #define MAX_ARGS                10
271
272 typedef enum QueryMode
273 {
274         QUERY_SIMPLE,                           /* simple query */
275         QUERY_EXTENDED,                         /* extended query */
276         QUERY_PREPARED,                         /* extended query with prepared statements */
277         NUM_QUERYMODE
278 } QueryMode;
279
280 static QueryMode querymode = QUERY_SIMPLE;
281 static const char *QUERYMODE[] = {"simple", "extended", "prepared"};
282
283 typedef struct
284 {
285         char       *line;                       /* full text of command line */
286         int                     command_num;    /* unique index of this Command struct */
287         int                     type;                   /* command type (SQL_COMMAND or META_COMMAND) */
288         int                     argc;                   /* number of command words */
289         char       *argv[MAX_ARGS]; /* command word list */
290         PgBenchExpr *expr;                      /* parsed expression */
291 } Command;
292
293 typedef struct
294 {
295
296         long            start_time;             /* when does the interval start */
297         int                     cnt;                    /* number of transactions */
298         int                     skipped;                /* number of transactions skipped under
299                                                                  * --rate and --latency-limit */
300
301         double          min_latency;    /* min/max latencies */
302         double          max_latency;
303         double          sum_latency;    /* sum(latency), sum(latency^2) - for
304                                                                  * estimates */
305         double          sum2_latency;
306
307         double          min_lag;
308         double          max_lag;
309         double          sum_lag;                /* sum(lag) */
310         double          sum2_lag;               /* sum(lag*lag) */
311 } AggVals;
312
313 static Command **sql_files[MAX_FILES];  /* SQL script files */
314 static int      num_files;                      /* number of script files */
315 static int      num_commands = 0;       /* total number of Command structs */
316 static int      debug = 0;                      /* debug flag */
317
318 /* default scenario */
319 static char *tpc_b = {
320         "\\set nbranches " CppAsString2(nbranches) " * :scale\n"
321         "\\set ntellers " CppAsString2(ntellers) " * :scale\n"
322         "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
323         "\\setrandom aid 1 :naccounts\n"
324         "\\setrandom bid 1 :nbranches\n"
325         "\\setrandom tid 1 :ntellers\n"
326         "\\setrandom delta -5000 5000\n"
327         "BEGIN;\n"
328         "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
329         "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
330         "UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n"
331         "UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n"
332         "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
333         "END;\n"
334 };
335
336 /* -N case */
337 static char *simple_update = {
338         "\\set nbranches " CppAsString2(nbranches) " * :scale\n"
339         "\\set ntellers " CppAsString2(ntellers) " * :scale\n"
340         "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
341         "\\setrandom aid 1 :naccounts\n"
342         "\\setrandom bid 1 :nbranches\n"
343         "\\setrandom tid 1 :ntellers\n"
344         "\\setrandom delta -5000 5000\n"
345         "BEGIN;\n"
346         "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
347         "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
348         "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
349         "END;\n"
350 };
351
352 /* -S case */
353 static char *select_only = {
354         "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
355         "\\setrandom aid 1 :naccounts\n"
356         "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
357 };
358
359 /* Function prototypes */
360 static void setalarm(int seconds);
361 static void *threadRun(void *arg);
362
363 static void doLog(TState *thread, CState *st, FILE *logfile, instr_time *now,
364           AggVals *agg, bool skipped);
365
366 static void
367 usage(void)
368 {
369         printf("%s is a benchmarking tool for PostgreSQL.\n\n"
370                    "Usage:\n"
371                    "  %s [OPTION]... [DBNAME]\n"
372                    "\nInitialization options:\n"
373                    "  -i, --initialize         invokes initialization mode\n"
374                    "  -F, --fillfactor=NUM     set fill factor\n"
375                 "  -n, --no-vacuum          do not run VACUUM after initialization\n"
376         "  -q, --quiet              quiet logging (one message each 5 seconds)\n"
377                    "  -s, --scale=NUM          scaling factor\n"
378                    "  --foreign-keys           create foreign key constraints between tables\n"
379                    "  --index-tablespace=TABLESPACE\n"
380         "                           create indexes in the specified tablespace\n"
381          "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
382                    "  --unlogged-tables        create tables as unlogged tables\n"
383                    "\nBenchmarking options:\n"
384                    "  -c, --client=NUM         number of concurrent database clients (default: 1)\n"
385                    "  -C, --connect            establish new connection for each transaction\n"
386                    "  -D, --define=VARNAME=VALUE\n"
387           "                           define variable for use by custom script\n"
388                  "  -f, --file=FILENAME      read transaction script from FILENAME\n"
389                    "  -j, --jobs=NUM           number of threads (default: 1)\n"
390                    "  -l, --log                write transaction times to log file\n"
391                    "  -L, --latency-limit=NUM  count transactions lasting more than NUM ms\n"
392                    "                           as late.\n"
393                    "  -M, --protocol=simple|extended|prepared\n"
394                    "                           protocol for submitting queries (default: simple)\n"
395                    "  -n, --no-vacuum          do not run VACUUM before tests\n"
396                    "  -N, --skip-some-updates  skip updates of pgbench_tellers and pgbench_branches\n"
397                    "  -P, --progress=NUM       show thread progress report every NUM seconds\n"
398                    "  -r, --report-latencies   report average latency per command\n"
399                 "  -R, --rate=NUM           target rate in transactions per second\n"
400                    "  -s, --scale=NUM          report this scale factor in output\n"
401                    "  -S, --select-only        perform SELECT-only transactions\n"
402                    "  -t, --transactions=NUM   number of transactions each client runs (default: 10)\n"
403                  "  -T, --time=NUM           duration of benchmark test in seconds\n"
404                    "  -v, --vacuum-all         vacuum all four standard tables before tests\n"
405                    "  --aggregate-interval=NUM aggregate data over NUM seconds\n"
406                    "  --sampling-rate=NUM      fraction of transactions to log (e.g. 0.01 for 1%%)\n"
407                    "\nCommon options:\n"
408                    "  -d, --debug              print debugging output\n"
409           "  -h, --host=HOSTNAME      database server host or socket directory\n"
410                    "  -p, --port=PORT          database server port number\n"
411                    "  -U, --username=USERNAME  connect as specified database user\n"
412                  "  -V, --version            output version information, then exit\n"
413                    "  -?, --help               show this help, then exit\n"
414                    "\n"
415                    "Report bugs to <pgsql-bugs@postgresql.org>.\n",
416                    progname, progname);
417 }
418
419 /*
420  * strtoint64 -- convert a string to 64-bit integer
421  *
422  * This function is a modified version of scanint8() from
423  * src/backend/utils/adt/int8.c.
424  */
425 int64
426 strtoint64(const char *str)
427 {
428         const char *ptr = str;
429         int64           result = 0;
430         int                     sign = 1;
431
432         /*
433          * Do our own scan, rather than relying on sscanf which might be broken
434          * for long long.
435          */
436
437         /* skip leading spaces */
438         while (*ptr && isspace((unsigned char) *ptr))
439                 ptr++;
440
441         /* handle sign */
442         if (*ptr == '-')
443         {
444                 ptr++;
445
446                 /*
447                  * Do an explicit check for INT64_MIN.  Ugly though this is, it's
448                  * cleaner than trying to get the loop below to handle it portably.
449                  */
450                 if (strncmp(ptr, "9223372036854775808", 19) == 0)
451                 {
452                         result = INT64_MIN;
453                         ptr += 19;
454                         goto gotdigits;
455                 }
456                 sign = -1;
457         }
458         else if (*ptr == '+')
459                 ptr++;
460
461         /* require at least one digit */
462         if (!isdigit((unsigned char) *ptr))
463                 fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str);
464
465         /* process digits */
466         while (*ptr && isdigit((unsigned char) *ptr))
467         {
468                 int64           tmp = result * 10 + (*ptr++ - '0');
469
470                 if ((tmp / 10) != result)               /* overflow? */
471                         fprintf(stderr, "value \"%s\" is out of range for type bigint\n", str);
472                 result = tmp;
473         }
474
475 gotdigits:
476
477         /* allow trailing whitespace, but not other trailing chars */
478         while (*ptr != '\0' && isspace((unsigned char) *ptr))
479                 ptr++;
480
481         if (*ptr != '\0')
482                 fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str);
483
484         return ((sign < 0) ? -result : result);
485 }
486
487 /* random number generator: uniform distribution from min to max inclusive */
488 static int64
489 getrand(TState *thread, int64 min, int64 max)
490 {
491         /*
492          * Odd coding is so that min and max have approximately the same chance of
493          * being selected as do numbers between them.
494          *
495          * pg_erand48() is thread-safe and concurrent, which is why we use it
496          * rather than random(), which in glibc is non-reentrant, and therefore
497          * protected by a mutex, and therefore a bottleneck on machines with many
498          * CPUs.
499          */
500         return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state));
501 }
502
503 /*
504  * random number generator: exponential distribution from min to max inclusive.
505  * the threshold is so that the density of probability for the last cut-off max
506  * value is exp(-threshold).
507  */
508 static int64
509 getExponentialRand(TState *thread, int64 min, int64 max, double threshold)
510 {
511         double cut, uniform, rand;
512         Assert(threshold > 0.0);
513         cut = exp(-threshold);
514         /* erand in [0, 1), uniform in (0, 1] */
515         uniform = 1.0 - pg_erand48(thread->random_state);
516         /*
517          * inner expresion in (cut, 1] (if threshold > 0),
518          * rand in [0, 1)
519          */
520         Assert((1.0 - cut) != 0.0);
521         rand = - log(cut + (1.0 - cut) * uniform) / threshold;
522         /* return int64 random number within between min and max */
523         return min + (int64)((max - min + 1) * rand);
524 }
525
526 /* random number generator: gaussian distribution from min to max inclusive */
527 static int64
528 getGaussianRand(TState *thread, int64 min, int64 max, double threshold)
529 {
530         double          stdev;
531         double          rand;
532
533         /*
534          * Get user specified random number from this loop, with
535          * -threshold < stdev <= threshold
536          *
537          * This loop is executed until the number is in the expected range.
538          *
539          * As the minimum threshold is 2.0, the probability of looping is low:
540          * sqrt(-2 ln(r)) <= 2 => r >= e^{-2} ~ 0.135, then when taking the average
541          * sinus multiplier as 2/pi, we have a 8.6% looping probability in the
542          * worst case. For a 5.0 threshold value, the looping probability
543          * is about e^{-5} * 2 / pi ~ 0.43%.
544          */
545         do
546         {
547                 /*
548                  * pg_erand48 generates [0,1), but for the basic version of the
549                  * Box-Muller transform the two uniformly distributed random numbers
550                  * are expected in (0, 1] (see http://en.wikipedia.org/wiki/Box_muller)
551                  */
552                 double rand1 = 1.0 - pg_erand48(thread->random_state);
553                 double rand2 = 1.0 - pg_erand48(thread->random_state);
554
555                 /* Box-Muller basic form transform */
556                 double var_sqrt = sqrt(-2.0 * log(rand1));
557                 stdev = var_sqrt * sin(2.0 * M_PI * rand2);
558
559                 /*
560                  * we may try with cos, but there may be a bias induced if the previous
561                  * value fails the test. To be on the safe side, let us try over.
562                  */
563         }
564         while (stdev < -threshold || stdev >= threshold);
565
566         /* stdev is in [-threshold, threshold), normalization to [0,1) */
567         rand = (stdev + threshold) / (threshold * 2.0);
568
569         /* return int64 random number within between min and max */
570         return min + (int64)((max - min + 1) * rand);
571 }
572
573 /*
574  * random number generator: generate a value, such that the series of values
575  * will approximate a Poisson distribution centered on the given value.
576  */
577 static int64
578 getPoissonRand(TState *thread, int64 center)
579 {
580         /*
581          * Use inverse transform sampling to generate a value > 0, such that the
582          * expected (i.e. average) value is the given argument.
583          */
584         double uniform;
585
586         /* erand in [0, 1), uniform in (0, 1] */
587         uniform = 1.0 - pg_erand48(thread->random_state);
588
589         return (int64) (-log(uniform) * ((double) center) + 0.5);
590 }
591
592 /* call PQexec() and exit() on failure */
593 static void
594 executeStatement(PGconn *con, const char *sql)
595 {
596         PGresult   *res;
597
598         res = PQexec(con, sql);
599         if (PQresultStatus(res) != PGRES_COMMAND_OK)
600         {
601                 fprintf(stderr, "%s", PQerrorMessage(con));
602                 exit(1);
603         }
604         PQclear(res);
605 }
606
607 /* set up a connection to the backend */
608 static PGconn *
609 doConnect(void)
610 {
611         PGconn     *conn;
612         static char *password = NULL;
613         bool            new_pass;
614
615         /*
616          * Start the connection.  Loop until we have a password if requested by
617          * backend.
618          */
619         do
620         {
621 #define PARAMS_ARRAY_SIZE       7
622
623                 const char *keywords[PARAMS_ARRAY_SIZE];
624                 const char *values[PARAMS_ARRAY_SIZE];
625
626                 keywords[0] = "host";
627                 values[0] = pghost;
628                 keywords[1] = "port";
629                 values[1] = pgport;
630                 keywords[2] = "user";
631                 values[2] = login;
632                 keywords[3] = "password";
633                 values[3] = password;
634                 keywords[4] = "dbname";
635                 values[4] = dbName;
636                 keywords[5] = "fallback_application_name";
637                 values[5] = progname;
638                 keywords[6] = NULL;
639                 values[6] = NULL;
640
641                 new_pass = false;
642
643                 conn = PQconnectdbParams(keywords, values, true);
644
645                 if (!conn)
646                 {
647                         fprintf(stderr, "Connection to database \"%s\" failed\n",
648                                         dbName);
649                         return NULL;
650                 }
651
652                 if (PQstatus(conn) == CONNECTION_BAD &&
653                         PQconnectionNeedsPassword(conn) &&
654                         password == NULL)
655                 {
656                         PQfinish(conn);
657                         password = simple_prompt("Password: ", 100, false);
658                         new_pass = true;
659                 }
660         } while (new_pass);
661
662         /* check to see that the backend connection was successfully made */
663         if (PQstatus(conn) == CONNECTION_BAD)
664         {
665                 fprintf(stderr, "Connection to database \"%s\" failed:\n%s",
666                                 dbName, PQerrorMessage(conn));
667                 PQfinish(conn);
668                 return NULL;
669         }
670
671         return conn;
672 }
673
674 /* throw away response from backend */
675 static void
676 discard_response(CState *state)
677 {
678         PGresult   *res;
679
680         do
681         {
682                 res = PQgetResult(state->con);
683                 if (res)
684                         PQclear(res);
685         } while (res);
686 }
687
688 static int
689 compareVariables(const void *v1, const void *v2)
690 {
691         return strcmp(((const Variable *) v1)->name,
692                                   ((const Variable *) v2)->name);
693 }
694
695 static char *
696 getVariable(CState *st, char *name)
697 {
698         Variable        key,
699                            *var;
700
701         /* On some versions of Solaris, bsearch of zero items dumps core */
702         if (st->nvariables <= 0)
703                 return NULL;
704
705         key.name = name;
706         var = (Variable *) bsearch((void *) &key,
707                                                            (void *) st->variables,
708                                                            st->nvariables,
709                                                            sizeof(Variable),
710                                                            compareVariables);
711         if (var != NULL)
712                 return var->value;
713         else
714                 return NULL;
715 }
716
717 /* check whether the name consists of alphabets, numerals and underscores. */
718 static bool
719 isLegalVariableName(const char *name)
720 {
721         int                     i;
722
723         for (i = 0; name[i] != '\0'; i++)
724         {
725                 if (!isalnum((unsigned char) name[i]) && name[i] != '_')
726                         return false;
727         }
728
729         return true;
730 }
731
732 static int
733 putVariable(CState *st, const char *context, char *name, char *value)
734 {
735         Variable        key,
736                            *var;
737
738         key.name = name;
739         /* On some versions of Solaris, bsearch of zero items dumps core */
740         if (st->nvariables > 0)
741                 var = (Variable *) bsearch((void *) &key,
742                                                                    (void *) st->variables,
743                                                                    st->nvariables,
744                                                                    sizeof(Variable),
745                                                                    compareVariables);
746         else
747                 var = NULL;
748
749         if (var == NULL)
750         {
751                 Variable   *newvars;
752
753                 /*
754                  * Check for the name only when declaring a new variable to avoid
755                  * overhead.
756                  */
757                 if (!isLegalVariableName(name))
758                 {
759                         fprintf(stderr, "%s: invalid variable name '%s'\n", context, name);
760                         return false;
761                 }
762
763                 if (st->variables)
764                         newvars = (Variable *) pg_realloc(st->variables,
765                                                                         (st->nvariables + 1) * sizeof(Variable));
766                 else
767                         newvars = (Variable *) pg_malloc(sizeof(Variable));
768
769                 st->variables = newvars;
770
771                 var = &newvars[st->nvariables];
772
773                 var->name = pg_strdup(name);
774                 var->value = pg_strdup(value);
775
776                 st->nvariables++;
777
778                 qsort((void *) st->variables, st->nvariables, sizeof(Variable),
779                           compareVariables);
780         }
781         else
782         {
783                 char       *val;
784
785                 /* dup then free, in case value is pointing at this variable */
786                 val = pg_strdup(value);
787
788                 free(var->value);
789                 var->value = val;
790         }
791
792         return true;
793 }
794
795 static char *
796 parseVariable(const char *sql, int *eaten)
797 {
798         int                     i = 0;
799         char       *name;
800
801         do
802         {
803                 i++;
804         } while (isalnum((unsigned char) sql[i]) || sql[i] == '_');
805         if (i == 1)
806                 return NULL;
807
808         name = pg_malloc(i);
809         memcpy(name, &sql[1], i - 1);
810         name[i - 1] = '\0';
811
812         *eaten = i;
813         return name;
814 }
815
816 static char *
817 replaceVariable(char **sql, char *param, int len, char *value)
818 {
819         int                     valueln = strlen(value);
820
821         if (valueln > len)
822         {
823                 size_t          offset = param - *sql;
824
825                 *sql = pg_realloc(*sql, strlen(*sql) - len + valueln + 1);
826                 param = *sql + offset;
827         }
828
829         if (valueln != len)
830                 memmove(param + valueln, param + len, strlen(param + len) + 1);
831         memcpy(param, value, valueln);
832
833         return param + valueln;
834 }
835
836 static char *
837 assignVariables(CState *st, char *sql)
838 {
839         char       *p,
840                            *name,
841                            *val;
842
843         p = sql;
844         while ((p = strchr(p, ':')) != NULL)
845         {
846                 int                     eaten;
847
848                 name = parseVariable(p, &eaten);
849                 if (name == NULL)
850                 {
851                         while (*p == ':')
852                         {
853                                 p++;
854                         }
855                         continue;
856                 }
857
858                 val = getVariable(st, name);
859                 free(name);
860                 if (val == NULL)
861                 {
862                         p++;
863                         continue;
864                 }
865
866                 p = replaceVariable(&sql, p, eaten, val);
867         }
868
869         return sql;
870 }
871
872 static void
873 getQueryParams(CState *st, const Command *command, const char **params)
874 {
875         int                     i;
876
877         for (i = 0; i < command->argc - 1; i++)
878                 params[i] = getVariable(st, command->argv[i + 1]);
879 }
880
881 /*
882  * Recursive evaluation of an expression in a pgbench script
883  * using the current state of variables.
884  * Returns whether the evaluation was ok,
885  * the value itself is returned through the retval pointer.
886  */
887 static bool
888 evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
889 {
890         switch (expr->etype)
891         {
892                 case ENODE_INTEGER_CONSTANT:
893                         {
894                                 *retval = expr->u.integer_constant.ival;
895                                 return true;
896                         }
897
898                 case ENODE_VARIABLE:
899                         {
900                                 char       *var;
901
902                                 if ((var = getVariable(st, expr->u.variable.varname)) == NULL)
903                                 {
904                                         fprintf(stderr, "undefined variable %s\n",
905                                                 expr->u.variable.varname);
906                                         return false;
907                                 }
908                                 *retval = strtoint64(var);
909                                 return true;
910                         }
911
912                 case ENODE_OPERATOR:
913                         {
914                                 int64   lval;
915                                 int64   rval;
916
917                                 if (!evaluateExpr(st, expr->u.operator.lexpr, &lval))
918                                         return false;
919                                 if (!evaluateExpr(st, expr->u.operator.rexpr, &rval))
920                                         return false;
921                                 switch (expr->u.operator.operator)
922                                 {
923                                         case '+':
924                                                 *retval = lval + rval;
925                                                 return true;
926
927                                         case '-':
928                                                 *retval = lval - rval;
929                                                 return true;
930
931                                         case '*':
932                                                 *retval = lval * rval;
933                                                 return true;
934
935                                         case '/':
936                                                 if (rval == 0)
937                                                 {
938                                                         fprintf(stderr, "division by zero\n");
939                                                         return false;
940                                                 }
941                                                 *retval = lval / rval;
942                                                 return true;
943
944                                         case '%':
945                                                 if (rval == 0)
946                                                 {
947                                                         fprintf(stderr, "division by zero\n");
948                                                         return false;
949                                                 }
950                                                 *retval = lval % rval;
951                                                 return true;
952                                 }
953
954                                 fprintf(stderr, "bad operator\n");
955                                 return false;
956                         }
957
958                 default:
959                         break;
960         }
961
962         fprintf(stderr, "bad expression\n");
963         return false;
964 }
965
966 /*
967  * Run a shell command. The result is assigned to the variable if not NULL.
968  * Return true if succeeded, or false on error.
969  */
970 static bool
971 runShellCommand(CState *st, char *variable, char **argv, int argc)
972 {
973         char            command[SHELL_COMMAND_SIZE];
974         int                     i,
975                                 len = 0;
976         FILE       *fp;
977         char            res[64];
978         char       *endptr;
979         int                     retval;
980
981         /*----------
982          * Join arguments with whitespace separators. Arguments starting with
983          * exactly one colon are treated as variables:
984          *      name - append a string "name"
985          *      :var - append a variable named 'var'
986          *      ::name - append a string ":name"
987          *----------
988          */
989         for (i = 0; i < argc; i++)
990         {
991                 char       *arg;
992                 int                     arglen;
993
994                 if (argv[i][0] != ':')
995                 {
996                         arg = argv[i];          /* a string literal */
997                 }
998                 else if (argv[i][1] == ':')
999                 {
1000                         arg = argv[i] + 1;      /* a string literal starting with colons */
1001                 }
1002                 else if ((arg = getVariable(st, argv[i] + 1)) == NULL)
1003                 {
1004                         fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[i]);
1005                         return false;
1006                 }
1007
1008                 arglen = strlen(arg);
1009                 if (len + arglen + (i > 0 ? 1 : 0) >= SHELL_COMMAND_SIZE - 1)
1010                 {
1011                         fprintf(stderr, "%s: too long shell command\n", argv[0]);
1012                         return false;
1013                 }
1014
1015                 if (i > 0)
1016                         command[len++] = ' ';
1017                 memcpy(command + len, arg, arglen);
1018                 len += arglen;
1019         }
1020
1021         command[len] = '\0';
1022
1023         /* Fast path for non-assignment case */
1024         if (variable == NULL)
1025         {
1026                 if (system(command))
1027                 {
1028                         if (!timer_exceeded)
1029                                 fprintf(stderr, "%s: cannot launch shell command\n", argv[0]);
1030                         return false;
1031                 }
1032                 return true;
1033         }
1034
1035         /* Execute the command with pipe and read the standard output. */
1036         if ((fp = popen(command, "r")) == NULL)
1037         {
1038                 fprintf(stderr, "%s: cannot launch shell command\n", argv[0]);
1039                 return false;
1040         }
1041         if (fgets(res, sizeof(res), fp) == NULL)
1042         {
1043                 if (!timer_exceeded)
1044                         fprintf(stderr, "%s: cannot read the result\n", argv[0]);
1045                 (void) pclose(fp);
1046                 return false;
1047         }
1048         if (pclose(fp) < 0)
1049         {
1050                 fprintf(stderr, "%s: cannot close shell command\n", argv[0]);
1051                 return false;
1052         }
1053
1054         /* Check whether the result is an integer and assign it to the variable */
1055         retval = (int) strtol(res, &endptr, 10);
1056         while (*endptr != '\0' && isspace((unsigned char) *endptr))
1057                 endptr++;
1058         if (*res == '\0' || *endptr != '\0')
1059         {
1060                 fprintf(stderr, "%s: must return an integer ('%s' returned)\n", argv[0], res);
1061                 return false;
1062         }
1063         snprintf(res, sizeof(res), "%d", retval);
1064         if (!putVariable(st, "setshell", variable, res))
1065                 return false;
1066
1067 #ifdef DEBUG
1068         printf("shell parameter name: %s, value: %s\n", argv[1], res);
1069 #endif
1070         return true;
1071 }
1072
1073 #define MAX_PREPARE_NAME                32
1074 static void
1075 preparedStatementName(char *buffer, int file, int state)
1076 {
1077         sprintf(buffer, "P%d_%d", file, state);
1078 }
1079
1080 static bool
1081 clientDone(CState *st, bool ok)
1082 {
1083         (void) ok;                                      /* unused */
1084
1085         if (st->con != NULL)
1086         {
1087                 PQfinish(st->con);
1088                 st->con = NULL;
1089         }
1090         return false;                           /* always false */
1091 }
1092
1093 static
1094 void
1095 agg_vals_init(AggVals *aggs, instr_time start)
1096 {
1097         /* basic counters */
1098         aggs->cnt = 0;                          /* number of transactions (includes skipped) */
1099         aggs->skipped = 0;                      /* xacts skipped under --rate --latency-limit */
1100
1101         aggs->sum_latency = 0;          /* SUM(latency) */
1102         aggs->sum2_latency = 0;                         /* SUM(latency*latency) */
1103
1104         /* min and max transaction duration */
1105         aggs->min_latency = 0;
1106         aggs->max_latency = 0;
1107
1108         /* schedule lag counters */
1109         aggs->sum_lag = 0;
1110         aggs->sum2_lag = 0;
1111         aggs->min_lag = 0;
1112         aggs->max_lag = 0;
1113
1114         /* start of the current interval */
1115         aggs->start_time = INSTR_TIME_GET_DOUBLE(start);
1116 }
1117
1118 /* return false iff client should be disconnected */
1119 static bool
1120 doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVals *agg)
1121 {
1122         PGresult   *res;
1123         Command   **commands;
1124         bool            trans_needs_throttle = false;
1125         instr_time      now;
1126
1127         /*
1128          * gettimeofday() isn't free, so we get the current timestamp lazily the
1129          * first time it's needed, and reuse the same value throughout this
1130          * function after that. This also ensures that e.g. the calculated latency
1131          * reported in the log file and in the totals are the same. Zero means
1132          * "not set yet".
1133          */
1134         INSTR_TIME_SET_ZERO(now);
1135
1136 top:
1137         commands = sql_files[st->use_file];
1138
1139         /*
1140          * Handle throttling once per transaction by sleeping.  It is simpler to
1141          * do this here rather than at the end, because so much complicated logic
1142          * happens below when statements finish.
1143          */
1144         if (throttle_delay && !st->is_throttled)
1145         {
1146                 /*
1147                  * Generate a delay such that the series of delays will approximate a
1148                  * Poisson distribution centered on the throttle_delay time.
1149                  *
1150                  * If transactions are too slow or a given wait is shorter than a
1151                  * transaction, the next transaction will start right away.
1152                  */
1153                 int64           wait = getPoissonRand(thread, throttle_delay);
1154
1155                 thread->throttle_trigger += wait;
1156                 st->txn_scheduled = thread->throttle_trigger;
1157
1158                 /*
1159                  * If this --latency-limit is used, and this slot is already late so
1160                  * that the transaction will miss the latency limit even if it
1161                  * completed immediately, we skip this time slot and iterate till the
1162                  * next slot that isn't late yet.
1163                  */
1164                 if (latency_limit)
1165                 {
1166                         int64           now_us;
1167
1168                         if (INSTR_TIME_IS_ZERO(now))
1169                                 INSTR_TIME_SET_CURRENT(now);
1170                         now_us = INSTR_TIME_GET_MICROSEC(now);
1171                         while (thread->throttle_trigger < now_us - latency_limit)
1172                         {
1173                                 thread->throttle_latency_skipped++;
1174
1175                                 if (logfile)
1176                                         doLog(thread, st, logfile, &now, agg, true);
1177
1178                                 wait = getPoissonRand(thread, throttle_delay);
1179                                 thread->throttle_trigger += wait;
1180                                 st->txn_scheduled = thread->throttle_trigger;
1181                         }
1182                 }
1183
1184                 st->sleeping = 1;
1185                 st->throttling = true;
1186                 st->is_throttled = true;
1187                 if (debug)
1188                         fprintf(stderr, "client %d throttling " INT64_FORMAT " us\n",
1189                                         st->id, wait);
1190         }
1191
1192         if (st->sleeping)
1193         {                                                       /* are we sleeping? */
1194                 int64           now_us;
1195
1196                 if (INSTR_TIME_IS_ZERO(now))
1197                         INSTR_TIME_SET_CURRENT(now);
1198                 now_us = INSTR_TIME_GET_MICROSEC(now);
1199                 if (st->txn_scheduled <= now_us)
1200                 {
1201                         st->sleeping = 0;       /* Done sleeping, go ahead with next command */
1202                         if (st->throttling)
1203                         {
1204                                 /* Measure lag of throttled transaction relative to target */
1205                                 int64           lag = now_us - st->txn_scheduled;
1206
1207                                 thread->throttle_lag += lag;
1208                                 if (lag > thread->throttle_lag_max)
1209                                         thread->throttle_lag_max = lag;
1210                                 st->throttling = false;
1211                         }
1212                 }
1213                 else
1214                         return true;            /* Still sleeping, nothing to do here */
1215         }
1216
1217         if (st->listen)
1218         {                                                       /* are we receiver? */
1219                 if (commands[st->state]->type == SQL_COMMAND)
1220                 {
1221                         if (debug)
1222                                 fprintf(stderr, "client %d receiving\n", st->id);
1223                         if (!PQconsumeInput(st->con))
1224                         {                                       /* there's something wrong */
1225                                 fprintf(stderr, "Client %d aborted in state %d. Probably the backend died while processing.\n", st->id, st->state);
1226                                 return clientDone(st, false);
1227                         }
1228                         if (PQisBusy(st->con))
1229                                 return true;    /* don't have the whole result yet */
1230                 }
1231
1232                 /*
1233                  * command finished: accumulate per-command execution times in
1234                  * thread-local data structure, if per-command latencies are requested
1235                  */
1236                 if (is_latencies)
1237                 {
1238                         int                     cnum = commands[st->state]->command_num;
1239
1240                         if (INSTR_TIME_IS_ZERO(now))
1241                                 INSTR_TIME_SET_CURRENT(now);
1242                         INSTR_TIME_ACCUM_DIFF(thread->exec_elapsed[cnum],
1243                                                                   now, st->stmt_begin);
1244                         thread->exec_count[cnum]++;
1245                 }
1246
1247                 /* transaction finished: calculate latency and log the transaction */
1248                 if (commands[st->state + 1] == NULL)
1249                 {
1250                         /* only calculate latency if an option is used that needs it */
1251                         if (progress || throttle_delay || latency_limit)
1252                         {
1253                                 int64           latency;
1254
1255                                 if (INSTR_TIME_IS_ZERO(now))
1256                                         INSTR_TIME_SET_CURRENT(now);
1257
1258                                 latency = INSTR_TIME_GET_MICROSEC(now) - st->txn_scheduled;
1259
1260                                 st->txn_latencies += latency;
1261
1262                                 /*
1263                                  * XXX In a long benchmark run of high-latency transactions,
1264                                  * this int64 addition eventually overflows.  For example, 100
1265                                  * threads running 10s transactions will overflow it in 2.56
1266                                  * hours.  With a more-typical OLTP workload of .1s
1267                                  * transactions, overflow would take 256 hours.
1268                                  */
1269                                 st->txn_sqlats += latency * latency;
1270
1271                                 /* record over the limit transactions if needed. */
1272                                 if (latency_limit && latency > latency_limit)
1273                                         thread->latency_late++;
1274                         }
1275
1276                         /* record the time it took in the log */
1277                         if (logfile)
1278                                 doLog(thread, st, logfile, &now, agg, false);
1279                 }
1280
1281                 if (commands[st->state]->type == SQL_COMMAND)
1282                 {
1283                         /*
1284                          * Read and discard the query result; note this is not included in
1285                          * the statement latency numbers.
1286                          */
1287                         res = PQgetResult(st->con);
1288                         switch (PQresultStatus(res))
1289                         {
1290                                 case PGRES_COMMAND_OK:
1291                                 case PGRES_TUPLES_OK:
1292                                         break;          /* OK */
1293                                 default:
1294                                         fprintf(stderr, "Client %d aborted in state %d: %s",
1295                                                         st->id, st->state, PQerrorMessage(st->con));
1296                                         PQclear(res);
1297                                         return clientDone(st, false);
1298                         }
1299                         PQclear(res);
1300                         discard_response(st);
1301                 }
1302
1303                 if (commands[st->state + 1] == NULL)
1304                 {
1305                         if (is_connect)
1306                         {
1307                                 PQfinish(st->con);
1308                                 st->con = NULL;
1309                         }
1310
1311                         ++st->cnt;
1312                         if ((st->cnt >= nxacts && duration <= 0) || timer_exceeded)
1313                                 return clientDone(st, true);    /* exit success */
1314                 }
1315
1316                 /* increment state counter */
1317                 st->state++;
1318                 if (commands[st->state] == NULL)
1319                 {
1320                         st->state = 0;
1321                         st->use_file = (int) getrand(thread, 0, num_files - 1);
1322                         commands = sql_files[st->use_file];
1323                         st->is_throttled = false;
1324
1325                         /*
1326                          * No transaction is underway anymore, which means there is
1327                          * nothing to listen to right now.  When throttling rate limits
1328                          * are active, a sleep will happen next, as the next transaction
1329                          * starts.  And then in any case the next SQL command will set
1330                          * listen back to 1.
1331                          */
1332                         st->listen = 0;
1333                         trans_needs_throttle = (throttle_delay > 0);
1334                 }
1335         }
1336
1337         if (st->con == NULL)
1338         {
1339                 instr_time      start,
1340                                         end;
1341
1342                 INSTR_TIME_SET_CURRENT(start);
1343                 if ((st->con = doConnect()) == NULL)
1344                 {
1345                         fprintf(stderr, "Client %d aborted in establishing connection.\n", st->id);
1346                         return clientDone(st, false);
1347                 }
1348                 INSTR_TIME_SET_CURRENT(end);
1349                 INSTR_TIME_ACCUM_DIFF(*conn_time, end, start);
1350         }
1351
1352         /*
1353          * This ensures that a throttling delay is inserted before proceeding with
1354          * sql commands, after the first transaction. The first transaction
1355          * throttling is performed when first entering doCustom.
1356          */
1357         if (trans_needs_throttle)
1358         {
1359                 trans_needs_throttle = false;
1360                 goto top;
1361         }
1362
1363         /* Record transaction start time under logging, progress or throttling */
1364         if ((logfile || progress || throttle_delay || latency_limit) && st->state == 0)
1365         {
1366                 INSTR_TIME_SET_CURRENT(st->txn_begin);
1367
1368                 /*
1369                  * When not throttling, this is also the transaction's scheduled start
1370                  * time.
1371                  */
1372                 if (!throttle_delay)
1373                         st->txn_scheduled = INSTR_TIME_GET_MICROSEC(st->txn_begin);
1374         }
1375
1376         /* Record statement start time if per-command latencies are requested */
1377         if (is_latencies)
1378                 INSTR_TIME_SET_CURRENT(st->stmt_begin);
1379
1380         if (commands[st->state]->type == SQL_COMMAND)
1381         {
1382                 const Command *command = commands[st->state];
1383                 int                     r;
1384
1385                 if (querymode == QUERY_SIMPLE)
1386                 {
1387                         char       *sql;
1388
1389                         sql = pg_strdup(command->argv[0]);
1390                         sql = assignVariables(st, sql);
1391
1392                         if (debug)
1393                                 fprintf(stderr, "client %d sending %s\n", st->id, sql);
1394                         r = PQsendQuery(st->con, sql);
1395                         free(sql);
1396                 }
1397                 else if (querymode == QUERY_EXTENDED)
1398                 {
1399                         const char *sql = command->argv[0];
1400                         const char *params[MAX_ARGS];
1401
1402                         getQueryParams(st, command, params);
1403
1404                         if (debug)
1405                                 fprintf(stderr, "client %d sending %s\n", st->id, sql);
1406                         r = PQsendQueryParams(st->con, sql, command->argc - 1,
1407                                                                   NULL, params, NULL, NULL, 0);
1408                 }
1409                 else if (querymode == QUERY_PREPARED)
1410                 {
1411                         char            name[MAX_PREPARE_NAME];
1412                         const char *params[MAX_ARGS];
1413
1414                         if (!st->prepared[st->use_file])
1415                         {
1416                                 int                     j;
1417
1418                                 for (j = 0; commands[j] != NULL; j++)
1419                                 {
1420                                         PGresult   *res;
1421                                         char            name[MAX_PREPARE_NAME];
1422
1423                                         if (commands[j]->type != SQL_COMMAND)
1424                                                 continue;
1425                                         preparedStatementName(name, st->use_file, j);
1426                                         res = PQprepare(st->con, name,
1427                                                   commands[j]->argv[0], commands[j]->argc - 1, NULL);
1428                                         if (PQresultStatus(res) != PGRES_COMMAND_OK)
1429                                                 fprintf(stderr, "%s", PQerrorMessage(st->con));
1430                                         PQclear(res);
1431                                 }
1432                                 st->prepared[st->use_file] = true;
1433                         }
1434
1435                         getQueryParams(st, command, params);
1436                         preparedStatementName(name, st->use_file, st->state);
1437
1438                         if (debug)
1439                                 fprintf(stderr, "client %d sending %s\n", st->id, name);
1440                         r = PQsendQueryPrepared(st->con, name, command->argc - 1,
1441                                                                         params, NULL, NULL, 0);
1442                 }
1443                 else    /* unknown sql mode */
1444                         r = 0;
1445
1446                 if (r == 0)
1447                 {
1448                         if (debug)
1449                                 fprintf(stderr, "client %d cannot send %s\n", st->id, command->argv[0]);
1450                         st->ecnt++;
1451                 }
1452                 else
1453                         st->listen = 1;         /* flags that should be listened */
1454         }
1455         else if (commands[st->state]->type == META_COMMAND)
1456         {
1457                 int                     argc = commands[st->state]->argc,
1458                                         i;
1459                 char      **argv = commands[st->state]->argv;
1460
1461                 if (debug)
1462                 {
1463                         fprintf(stderr, "client %d executing \\%s", st->id, argv[0]);
1464                         for (i = 1; i < argc; i++)
1465                                 fprintf(stderr, " %s", argv[i]);
1466                         fprintf(stderr, "\n");
1467                 }
1468
1469                 if (pg_strcasecmp(argv[0], "setrandom") == 0)
1470                 {
1471                         char       *var;
1472                         int64           min,
1473                                                 max;
1474                         double          threshold = 0;
1475                         char            res[64];
1476
1477                         if (*argv[2] == ':')
1478                         {
1479                                 if ((var = getVariable(st, argv[2] + 1)) == NULL)
1480                                 {
1481                                         fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[2]);
1482                                         st->ecnt++;
1483                                         return true;
1484                                 }
1485                                 min = strtoint64(var);
1486                         }
1487                         else
1488                                 min = strtoint64(argv[2]);
1489
1490 #ifdef NOT_USED
1491                         if (min < 0)
1492                         {
1493                                 fprintf(stderr, "%s: invalid minimum number %d\n", argv[0], min);
1494                                 st->ecnt++;
1495                                 return;
1496                         }
1497 #endif
1498
1499                         if (*argv[3] == ':')
1500                         {
1501                                 if ((var = getVariable(st, argv[3] + 1)) == NULL)
1502                                 {
1503                                         fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[3]);
1504                                         st->ecnt++;
1505                                         return true;
1506                                 }
1507                                 max = strtoint64(var);
1508                         }
1509                         else
1510                                 max = strtoint64(argv[3]);
1511
1512                         if (max < min)
1513                         {
1514                                 fprintf(stderr, "%s: maximum is less than minimum\n", argv[0]);
1515                                 st->ecnt++;
1516                                 return true;
1517                         }
1518
1519                         /*
1520                          * Generate random number functions need to be able to subtract
1521                          * max from min and add one to the result without overflowing.
1522                          * Since we know max > min, we can detect overflow just by checking
1523                          * for a negative result. But we must check both that the subtraction
1524                          * doesn't overflow, and that adding one to the result doesn't overflow either.
1525                          */
1526                         if (max - min < 0 || (max - min) + 1 < 0)
1527                         {
1528                                 fprintf(stderr, "%s: range too large\n", argv[0]);
1529                                 st->ecnt++;
1530                                 return true;
1531                         }
1532
1533                         if (argc == 4 || /* uniform without or with "uniform" keyword */
1534                                 (argc == 5 && pg_strcasecmp(argv[4], "uniform") == 0))
1535                         {
1536 #ifdef DEBUG
1537                                 printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
1538 #endif
1539                                 snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
1540                         }
1541                         else if (argc == 6 &&
1542                                          ((pg_strcasecmp(argv[4], "gaussian") == 0) ||
1543                                           (pg_strcasecmp(argv[4], "exponential") == 0)))
1544                         {
1545                                 if (*argv[5] == ':')
1546                                 {
1547                                         if ((var = getVariable(st, argv[5] + 1)) == NULL)
1548                                         {
1549                                                 fprintf(stderr, "%s: invalid threshold number %s\n", argv[0], argv[5]);
1550                                                 st->ecnt++;
1551                                                 return true;
1552                                         }
1553                                         threshold = strtod(var, NULL);
1554                                 }
1555                                 else
1556                                         threshold = strtod(argv[5], NULL);
1557
1558                                 if (pg_strcasecmp(argv[4], "gaussian") == 0)
1559                                 {
1560                                         if (threshold < MIN_GAUSSIAN_THRESHOLD)
1561                                         {
1562                                                 fprintf(stderr, "%s: gaussian threshold must be at least %f\n,", argv[5], MIN_GAUSSIAN_THRESHOLD);
1563                                                 st->ecnt++;
1564                                                 return true;
1565                                         }
1566 #ifdef DEBUG
1567                                         printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianRand(thread, min, max, threshold));
1568 #endif
1569                                         snprintf(res, sizeof(res), INT64_FORMAT, getGaussianRand(thread, min, max, threshold));
1570                                 }
1571                                 else if (pg_strcasecmp(argv[4], "exponential") == 0)
1572                                 {
1573                                         if (threshold <= 0.0)
1574                                         {
1575                                                 fprintf(stderr, "%s: exponential threshold must be strictly positive\n,", argv[5]);
1576                                                 st->ecnt++;
1577                                                 return true;
1578                                         }
1579 #ifdef DEBUG
1580                                         printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialRand(thread, min, max, threshold));
1581 #endif
1582                                         snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, threshold));
1583                                 }
1584                         }
1585                         else /* this means an error somewhere in the parsing phase... */
1586                         {
1587                                 fprintf(stderr, "%s: unexpected arguments\n", argv[0]);
1588                                 st->ecnt++;
1589                                 return true;
1590                         }
1591
1592                         if (!putVariable(st, argv[0], argv[1], res))
1593                         {
1594                                 st->ecnt++;
1595                                 return true;
1596                         }
1597
1598                         st->listen = 1;
1599                 }
1600                 else if (pg_strcasecmp(argv[0], "set") == 0)
1601                 {
1602                         char            res[64];
1603                         PgBenchExpr *expr = commands[st->state]->expr;
1604                         int64           result;
1605
1606                         if (!evaluateExpr(st, expr, &result))
1607                         {
1608                                 st->ecnt++;
1609                                 return true;
1610                         }
1611                         sprintf(res, INT64_FORMAT, result);
1612
1613                         if (!putVariable(st, argv[0], argv[1], res))
1614                         {
1615                                 st->ecnt++;
1616                                 return true;
1617                         }
1618
1619                         st->listen = 1;
1620                 }
1621                 else if (pg_strcasecmp(argv[0], "sleep") == 0)
1622                 {
1623                         char       *var;
1624                         int                     usec;
1625                         instr_time      now;
1626
1627                         if (*argv[1] == ':')
1628                         {
1629                                 if ((var = getVariable(st, argv[1] + 1)) == NULL)
1630                                 {
1631                                         fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[1]);
1632                                         st->ecnt++;
1633                                         return true;
1634                                 }
1635                                 usec = atoi(var);
1636                         }
1637                         else
1638                                 usec = atoi(argv[1]);
1639
1640                         if (argc > 2)
1641                         {
1642                                 if (pg_strcasecmp(argv[2], "ms") == 0)
1643                                         usec *= 1000;
1644                                 else if (pg_strcasecmp(argv[2], "s") == 0)
1645                                         usec *= 1000000;
1646                         }
1647                         else
1648                                 usec *= 1000000;
1649
1650                         INSTR_TIME_SET_CURRENT(now);
1651                         st->txn_scheduled = INSTR_TIME_GET_MICROSEC(now) + usec;
1652                         st->sleeping = 1;
1653
1654                         st->listen = 1;
1655                 }
1656                 else if (pg_strcasecmp(argv[0], "setshell") == 0)
1657                 {
1658                         bool            ret = runShellCommand(st, argv[1], argv + 2, argc - 2);
1659
1660                         if (timer_exceeded) /* timeout */
1661                                 return clientDone(st, true);
1662                         else if (!ret)          /* on error */
1663                         {
1664                                 st->ecnt++;
1665                                 return true;
1666                         }
1667                         else    /* succeeded */
1668                                 st->listen = 1;
1669                 }
1670                 else if (pg_strcasecmp(argv[0], "shell") == 0)
1671                 {
1672                         bool            ret = runShellCommand(st, NULL, argv + 1, argc - 1);
1673
1674                         if (timer_exceeded) /* timeout */
1675                                 return clientDone(st, true);
1676                         else if (!ret)          /* on error */
1677                         {
1678                                 st->ecnt++;
1679                                 return true;
1680                         }
1681                         else    /* succeeded */
1682                                 st->listen = 1;
1683                 }
1684                 goto top;
1685         }
1686
1687         return true;
1688 }
1689
1690 /*
1691  * print log entry after completing one transaction.
1692  */
1693 static void
1694 doLog(TState *thread, CState *st, FILE *logfile, instr_time *now, AggVals *agg,
1695           bool skipped)
1696 {
1697         double          lag;
1698         double          latency;
1699
1700         /*
1701          * Skip the log entry if sampling is enabled and this row doesn't belong
1702          * to the random sample.
1703          */
1704         if (sample_rate != 0.0 &&
1705                 pg_erand48(thread->random_state) > sample_rate)
1706                 return;
1707
1708         if (INSTR_TIME_IS_ZERO(*now))
1709                 INSTR_TIME_SET_CURRENT(*now);
1710
1711         latency = (double) (INSTR_TIME_GET_MICROSEC(*now) - st->txn_scheduled);
1712         if (skipped)
1713                 lag = latency;
1714         else
1715                 lag = (double) (INSTR_TIME_GET_MICROSEC(st->txn_begin) - st->txn_scheduled);
1716
1717         /* should we aggregate the results or not? */
1718         if (agg_interval > 0)
1719         {
1720                 /*
1721                  * Are we still in the same interval? If yes, accumulate the values
1722                  * (print them otherwise)
1723                  */
1724                 if (agg->start_time + agg_interval >= INSTR_TIME_GET_DOUBLE(*now))
1725                 {
1726                         agg->cnt += 1;
1727                         if (skipped)
1728                         {
1729                                 /* there is no latency to record if the transaction was skipped */
1730                                 agg->skipped += 1;
1731                         }
1732                         else
1733                         {
1734                                 agg->sum_latency += latency;
1735                                 agg->sum2_latency += latency * latency;
1736
1737                                 /* first in this aggregation interval */
1738                                 if ((agg->cnt == 1) || (latency < agg->min_latency))
1739                                         agg->min_latency = latency;
1740
1741                                 if ((agg->cnt == 1) || (latency > agg->max_latency))
1742                                         agg->max_latency = latency;
1743
1744                                 /* and the same for schedule lag */
1745                                 if (throttle_delay)
1746                                 {
1747                                         agg->sum_lag += lag;
1748                                         agg->sum2_lag += lag * lag;
1749
1750                                         if ((agg->cnt == 1) || (lag < agg->min_lag))
1751                                                 agg->min_lag = lag;
1752                                         if ((agg->cnt == 1) || (lag > agg->max_lag))
1753                                                 agg->max_lag = lag;
1754                                 }
1755                         }
1756                 }
1757                 else
1758                 {
1759                         /*
1760                          * Loop until we reach the interval of the current transaction
1761                          * (and print all the empty intervals in between).
1762                          */
1763                         while (agg->start_time + agg_interval < INSTR_TIME_GET_DOUBLE(*now))
1764                         {
1765                                 /*
1766                                  * This is a non-Windows branch (thanks to the
1767                                  * ifdef in usage), so we don't need to handle
1768                                  * this in a special way (see below).
1769                                  */
1770                                 fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f",
1771                                                 agg->start_time,
1772                                                 agg->cnt,
1773                                                 agg->sum_latency,
1774                                                 agg->sum2_latency,
1775                                                 agg->min_latency,
1776                                                 agg->max_latency);
1777                                 if (throttle_delay)
1778                                 {
1779                                         fprintf(logfile, " %.0f %.0f %.0f %.0f",
1780                                                         agg->sum_lag,
1781                                                         agg->sum2_lag,
1782                                                         agg->min_lag,
1783                                                         agg->max_lag);
1784                                         if (latency_limit)
1785                                                 fprintf(logfile, " %d", agg->skipped);
1786                                 }
1787                                 fputc('\n', logfile);
1788
1789                                 /* move to the next inteval */
1790                                 agg->start_time = agg->start_time + agg_interval;
1791
1792                                 /* reset for "no transaction" intervals */
1793                                 agg->cnt = 0;
1794                                 agg->skipped = 0;
1795                                 agg->min_latency = 0;
1796                                 agg->max_latency = 0;
1797                                 agg->sum_latency = 0;
1798                                 agg->sum2_latency = 0;
1799                                 agg->min_lag = 0;
1800                                 agg->max_lag = 0;
1801                                 agg->sum_lag = 0;
1802                                 agg->sum2_lag = 0;
1803                         }
1804
1805                         /* reset the values to include only the current transaction. */
1806                         agg->cnt = 1;
1807                         agg->skipped = skipped ? 1 : 0;
1808                         agg->min_latency = latency;
1809                         agg->max_latency = latency;
1810                         agg->sum_latency = skipped ? 0.0 : latency;
1811                         agg->sum2_latency = skipped ? 0.0 : latency * latency;
1812                         agg->min_lag = lag;
1813                         agg->max_lag = lag;
1814                         agg->sum_lag = lag;
1815                         agg->sum2_lag = lag * lag;
1816                 }
1817         }
1818         else
1819         {
1820                 /* no, print raw transactions */
1821 #ifndef WIN32
1822
1823                 /* This is more than we really ought to know about instr_time */
1824                 if (skipped)
1825                         fprintf(logfile, "%d %d skipped %d %ld %ld",
1826                                         st->id, st->cnt, st->use_file,
1827                                         (long) now->tv_sec, (long) now->tv_usec);
1828                 else
1829                         fprintf(logfile, "%d %d %.0f %d %ld %ld",
1830                                         st->id, st->cnt, latency, st->use_file,
1831                                         (long) now->tv_sec, (long) now->tv_usec);
1832 #else
1833
1834                 /* On Windows, instr_time doesn't provide a timestamp anyway */
1835                 if (skipped)
1836                         fprintf(logfile, "%d %d skipped %d 0 0",
1837                                         st->id, st->cnt, st->use_file);
1838                 else
1839                         fprintf(logfile, "%d %d %.0f %d 0 0",
1840                                         st->id, st->cnt, latency, st->use_file);
1841 #endif
1842                 if (throttle_delay)
1843                         fprintf(logfile, " %.0f", lag);
1844                 fputc('\n', logfile);
1845         }
1846 }
1847
1848 /* discard connections */
1849 static void
1850 disconnect_all(CState *state, int length)
1851 {
1852         int                     i;
1853
1854         for (i = 0; i < length; i++)
1855         {
1856                 if (state[i].con)
1857                 {
1858                         PQfinish(state[i].con);
1859                         state[i].con = NULL;
1860                 }
1861         }
1862 }
1863
1864 /* create tables and setup data */
1865 static void
1866 init(bool is_no_vacuum)
1867 {
1868 /*
1869  * The scale factor at/beyond which 32-bit integers are insufficient for
1870  * storing TPC-B account IDs.
1871  *
1872  * Although the actual threshold is 21474, we use 20000 because it is easier to
1873  * document and remember, and isn't that far away from the real threshold.
1874  */
1875 #define SCALE_32BIT_THRESHOLD 20000
1876
1877         /*
1878          * Note: TPC-B requires at least 100 bytes per row, and the "filler"
1879          * fields in these table declarations were intended to comply with that.
1880          * The pgbench_accounts table complies with that because the "filler"
1881          * column is set to blank-padded empty string. But for all other tables
1882          * the columns default to NULL and so don't actually take any space.  We
1883          * could fix that by giving them non-null default values.  However, that
1884          * would completely break comparability of pgbench results with prior
1885          * versions. Since pgbench has never pretended to be fully TPC-B compliant
1886          * anyway, we stick with the historical behavior.
1887          */
1888         struct ddlinfo
1889         {
1890                 const char *table;              /* table name */
1891                 const char *smcols;             /* column decls if accountIDs are 32 bits */
1892                 const char *bigcols;    /* column decls if accountIDs are 64 bits */
1893                 int                     declare_fillfactor;
1894         };
1895         static const struct ddlinfo DDLs[] = {
1896                 {
1897                         "pgbench_history",
1898                         "tid int,bid int,aid    int,delta int,mtime timestamp,filler char(22)",
1899                         "tid int,bid int,aid bigint,delta int,mtime timestamp,filler char(22)",
1900                         0
1901                 },
1902                 {
1903                         "pgbench_tellers",
1904                         "tid int not null,bid int,tbalance int,filler char(84)",
1905                         "tid int not null,bid int,tbalance int,filler char(84)",
1906                         1
1907                 },
1908                 {
1909                         "pgbench_accounts",
1910                         "aid    int not null,bid int,abalance int,filler char(84)",
1911                         "aid bigint not null,bid int,abalance int,filler char(84)",
1912                         1
1913                 },
1914                 {
1915                         "pgbench_branches",
1916                         "bid int not null,bbalance int,filler char(88)",
1917                         "bid int not null,bbalance int,filler char(88)",
1918                         1
1919                 }
1920         };
1921         static const char *const DDLINDEXes[] = {
1922                 "alter table pgbench_branches add primary key (bid)",
1923                 "alter table pgbench_tellers add primary key (tid)",
1924                 "alter table pgbench_accounts add primary key (aid)"
1925         };
1926         static const char *const DDLKEYs[] = {
1927                 "alter table pgbench_tellers add foreign key (bid) references pgbench_branches",
1928                 "alter table pgbench_accounts add foreign key (bid) references pgbench_branches",
1929                 "alter table pgbench_history add foreign key (bid) references pgbench_branches",
1930                 "alter table pgbench_history add foreign key (tid) references pgbench_tellers",
1931                 "alter table pgbench_history add foreign key (aid) references pgbench_accounts"
1932         };
1933
1934         PGconn     *con;
1935         PGresult   *res;
1936         char            sql[256];
1937         int                     i;
1938         int64           k;
1939
1940         /* used to track elapsed time and estimate of the remaining time */
1941         instr_time      start,
1942                                 diff;
1943         double          elapsed_sec,
1944                                 remaining_sec;
1945         int                     log_interval = 1;
1946
1947         if ((con = doConnect()) == NULL)
1948                 exit(1);
1949
1950         for (i = 0; i < lengthof(DDLs); i++)
1951         {
1952                 char            opts[256];
1953                 char            buffer[256];
1954                 const struct ddlinfo *ddl = &DDLs[i];
1955                 const char *cols;
1956
1957                 /* Remove old table, if it exists. */
1958                 snprintf(buffer, sizeof(buffer), "drop table if exists %s", ddl->table);
1959                 executeStatement(con, buffer);
1960
1961                 /* Construct new create table statement. */
1962                 opts[0] = '\0';
1963                 if (ddl->declare_fillfactor)
1964                         snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
1965                                          " with (fillfactor=%d)", fillfactor);
1966                 if (tablespace != NULL)
1967                 {
1968                         char       *escape_tablespace;
1969
1970                         escape_tablespace = PQescapeIdentifier(con, tablespace,
1971                                                                                                    strlen(tablespace));
1972                         snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
1973                                          " tablespace %s", escape_tablespace);
1974                         PQfreemem(escape_tablespace);
1975                 }
1976
1977                 cols = (scale >= SCALE_32BIT_THRESHOLD) ? ddl->bigcols : ddl->smcols;
1978
1979                 snprintf(buffer, sizeof(buffer), "create%s table %s(%s)%s",
1980                                  unlogged_tables ? " unlogged" : "",
1981                                  ddl->table, cols, opts);
1982
1983                 executeStatement(con, buffer);
1984         }
1985
1986         executeStatement(con, "begin");
1987
1988         for (i = 0; i < nbranches * scale; i++)
1989         {
1990                 /* "filler" column defaults to NULL */
1991                 snprintf(sql, sizeof(sql),
1992                                  "insert into pgbench_branches(bid,bbalance) values(%d,0)",
1993                                  i + 1);
1994                 executeStatement(con, sql);
1995         }
1996
1997         for (i = 0; i < ntellers * scale; i++)
1998         {
1999                 /* "filler" column defaults to NULL */
2000                 snprintf(sql, sizeof(sql),
2001                         "insert into pgbench_tellers(tid,bid,tbalance) values (%d,%d,0)",
2002                                  i + 1, i / ntellers + 1);
2003                 executeStatement(con, sql);
2004         }
2005
2006         executeStatement(con, "commit");
2007
2008         /*
2009          * fill the pgbench_accounts table with some data
2010          */
2011         fprintf(stderr, "creating tables...\n");
2012
2013         executeStatement(con, "begin");
2014         executeStatement(con, "truncate pgbench_accounts");
2015
2016         res = PQexec(con, "copy pgbench_accounts from stdin");
2017         if (PQresultStatus(res) != PGRES_COPY_IN)
2018         {
2019                 fprintf(stderr, "%s", PQerrorMessage(con));
2020                 exit(1);
2021         }
2022         PQclear(res);
2023
2024         INSTR_TIME_SET_CURRENT(start);
2025
2026         for (k = 0; k < (int64) naccounts * scale; k++)
2027         {
2028                 int64           j = k + 1;
2029
2030                 /* "filler" column defaults to blank padded empty string */
2031                 snprintf(sql, sizeof(sql),
2032                                  INT64_FORMAT "\t" INT64_FORMAT "\t%d\t\n",
2033                                  j, k / naccounts + 1, 0);
2034                 if (PQputline(con, sql))
2035                 {
2036                         fprintf(stderr, "PQputline failed\n");
2037                         exit(1);
2038                 }
2039
2040                 /*
2041                  * If we want to stick with the original logging, print a message each
2042                  * 100k inserted rows.
2043                  */
2044                 if ((!use_quiet) && (j % 100000 == 0))
2045                 {
2046                         INSTR_TIME_SET_CURRENT(diff);
2047                         INSTR_TIME_SUBTRACT(diff, start);
2048
2049                         elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
2050                         remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
2051
2052                         fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)\n",
2053                                         j, (int64) naccounts * scale,
2054                                         (int) (((int64) j * 100) / (naccounts * (int64) scale)),
2055                                         elapsed_sec, remaining_sec);
2056                 }
2057                 /* let's not call the timing for each row, but only each 100 rows */
2058                 else if (use_quiet && (j % 100 == 0))
2059                 {
2060                         INSTR_TIME_SET_CURRENT(diff);
2061                         INSTR_TIME_SUBTRACT(diff, start);
2062
2063                         elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
2064                         remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
2065
2066                         /* have we reached the next interval (or end)? */
2067                         if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS))
2068                         {
2069                                 fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)\n",
2070                                                 j, (int64) naccounts * scale,
2071                                                 (int) (((int64) j * 100) / (naccounts * (int64) scale)), elapsed_sec, remaining_sec);
2072
2073                                 /* skip to the next interval */
2074                                 log_interval = (int) ceil(elapsed_sec / LOG_STEP_SECONDS);
2075                         }
2076                 }
2077
2078         }
2079         if (PQputline(con, "\\.\n"))
2080         {
2081                 fprintf(stderr, "very last PQputline failed\n");
2082                 exit(1);
2083         }
2084         if (PQendcopy(con))
2085         {
2086                 fprintf(stderr, "PQendcopy failed\n");
2087                 exit(1);
2088         }
2089         executeStatement(con, "commit");
2090
2091         /* vacuum */
2092         if (!is_no_vacuum)
2093         {
2094                 fprintf(stderr, "vacuum...\n");
2095                 executeStatement(con, "vacuum analyze pgbench_branches");
2096                 executeStatement(con, "vacuum analyze pgbench_tellers");
2097                 executeStatement(con, "vacuum analyze pgbench_accounts");
2098                 executeStatement(con, "vacuum analyze pgbench_history");
2099         }
2100
2101         /*
2102          * create indexes
2103          */
2104         fprintf(stderr, "set primary keys...\n");
2105         for (i = 0; i < lengthof(DDLINDEXes); i++)
2106         {
2107                 char            buffer[256];
2108
2109                 strlcpy(buffer, DDLINDEXes[i], sizeof(buffer));
2110
2111                 if (index_tablespace != NULL)
2112                 {
2113                         char       *escape_tablespace;
2114
2115                         escape_tablespace = PQescapeIdentifier(con, index_tablespace,
2116                                                                                                    strlen(index_tablespace));
2117                         snprintf(buffer + strlen(buffer), sizeof(buffer) - strlen(buffer),
2118                                          " using index tablespace %s", escape_tablespace);
2119                         PQfreemem(escape_tablespace);
2120                 }
2121
2122                 executeStatement(con, buffer);
2123         }
2124
2125         /*
2126          * create foreign keys
2127          */
2128         if (foreign_keys)
2129         {
2130                 fprintf(stderr, "set foreign keys...\n");
2131                 for (i = 0; i < lengthof(DDLKEYs); i++)
2132                 {
2133                         executeStatement(con, DDLKEYs[i]);
2134                 }
2135         }
2136
2137         fprintf(stderr, "done.\n");
2138         PQfinish(con);
2139 }
2140
2141 /*
2142  * Parse the raw sql and replace :param to $n.
2143  */
2144 static bool
2145 parseQuery(Command *cmd, const char *raw_sql)
2146 {
2147         char       *sql,
2148                            *p;
2149
2150         sql = pg_strdup(raw_sql);
2151         cmd->argc = 1;
2152
2153         p = sql;
2154         while ((p = strchr(p, ':')) != NULL)
2155         {
2156                 char            var[12];
2157                 char       *name;
2158                 int                     eaten;
2159
2160                 name = parseVariable(p, &eaten);
2161                 if (name == NULL)
2162                 {
2163                         while (*p == ':')
2164                         {
2165                                 p++;
2166                         }
2167                         continue;
2168                 }
2169
2170                 if (cmd->argc >= MAX_ARGS)
2171                 {
2172                         fprintf(stderr, "statement has too many arguments (maximum is %d): %s\n", MAX_ARGS - 1, raw_sql);
2173                         pg_free(name);
2174                         return false;
2175                 }
2176
2177                 sprintf(var, "$%d", cmd->argc);
2178                 p = replaceVariable(&sql, p, eaten, var);
2179
2180                 cmd->argv[cmd->argc] = name;
2181                 cmd->argc++;
2182         }
2183
2184         cmd->argv[0] = sql;
2185         return true;
2186 }
2187
2188 /* Parse a command; return a Command struct, or NULL if it's a comment */
2189 static Command *
2190 process_commands(char *buf, const char *source, const int lineno)
2191 {
2192         const char      delim[] = " \f\n\r\t\v";
2193
2194         Command    *my_commands;
2195         int                     j;
2196         char       *p,
2197                            *tok;
2198
2199         /* Make the string buf end at the next newline */
2200         if ((p = strchr(buf, '\n')) != NULL)
2201                 *p = '\0';
2202
2203         /* Skip leading whitespace */
2204         p = buf;
2205         while (isspace((unsigned char) *p))
2206                 p++;
2207
2208         /* If the line is empty or actually a comment, we're done */
2209         if (*p == '\0' || strncmp(p, "--", 2) == 0)
2210                 return NULL;
2211
2212         /* Allocate and initialize Command structure */
2213         my_commands = (Command *) pg_malloc(sizeof(Command));
2214         my_commands->line = pg_strdup(buf);
2215         my_commands->command_num = num_commands++;
2216         my_commands->type = 0;          /* until set */
2217         my_commands->argc = 0;
2218
2219         if (*p == '\\')
2220         {
2221                 int             max_args = -1;
2222                 my_commands->type = META_COMMAND;
2223
2224                 j = 0;
2225                 tok = strtok(++p, delim);
2226
2227                 if (tok != NULL && pg_strcasecmp(tok, "set") == 0)
2228                         max_args = 2;
2229
2230                 while (tok != NULL)
2231                 {
2232                         my_commands->argv[j++] = pg_strdup(tok);
2233                         my_commands->argc++;
2234                         if (max_args >= 0 && my_commands->argc >= max_args)
2235                                 tok = strtok(NULL, "");
2236                         else
2237                                 tok = strtok(NULL, delim);
2238                 }
2239
2240                 if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
2241                 {
2242                         /* parsing:
2243                          * \setrandom variable min max [uniform]
2244                          * \setrandom variable min max (gaussian|exponential) threshold
2245                          */
2246
2247                         if (my_commands->argc < 4)
2248                         {
2249                                 fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]);
2250                                 exit(1);
2251                         }
2252                         /* argc >= 4 */
2253
2254                         if (my_commands->argc == 4 || /* uniform without/with "uniform" keyword */
2255                                 (my_commands->argc == 5 &&
2256                                  pg_strcasecmp(my_commands->argv[4], "uniform") == 0))
2257                         {
2258                                 /* nothing to do */
2259                         }
2260                         else if (/* argc >= 5 */
2261                                          (pg_strcasecmp(my_commands->argv[4], "gaussian") == 0) ||
2262                                          (pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
2263                         {
2264                                 if (my_commands->argc < 6)
2265                                 {
2266                                         fprintf(stderr, "%s(%s): missing threshold argument\n", my_commands->argv[0], my_commands->argv[4]);
2267                                         exit(1);
2268                                 }
2269                                 else if (my_commands->argc > 6)
2270                                 {
2271                                         fprintf(stderr, "%s(%s): too many arguments (extra:",
2272                                                         my_commands->argv[0], my_commands->argv[4]);
2273                                         for (j = 6; j < my_commands->argc; j++)
2274                                                 fprintf(stderr, " %s", my_commands->argv[j]);
2275                                         fprintf(stderr, ")\n");
2276                                         exit(1);
2277                                 }
2278                         }
2279                         else /* cannot parse, unexpected arguments */
2280                         {
2281                                 fprintf(stderr, "%s: unexpected arguments (bad:", my_commands->argv[0]);
2282                                 for (j = 4; j < my_commands->argc; j++)
2283                                         fprintf(stderr, " %s", my_commands->argv[j]);
2284                                 fprintf(stderr, ")\n");
2285                                 exit(1);
2286                         }
2287                 }
2288                 else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
2289                 {
2290                         if (my_commands->argc < 3)
2291                         {
2292                                 fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]);
2293                                 exit(1);
2294                         }
2295
2296                         expr_scanner_init(my_commands->argv[2]);
2297
2298                         if (expr_yyparse() != 0)
2299                         {
2300                                 fprintf(stderr, "%s: parse error\n", my_commands->argv[0]);
2301                                 exit(1);
2302                         }
2303
2304                         my_commands->expr = expr_parse_result;
2305
2306                         expr_scanner_finish();
2307                 }
2308                 else if (pg_strcasecmp(my_commands->argv[0], "sleep") == 0)
2309                 {
2310                         if (my_commands->argc < 2)
2311                         {
2312                                 fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]);
2313                                 exit(1);
2314                         }
2315
2316                         /*
2317                          * Split argument into number and unit to allow "sleep 1ms" etc.
2318                          * We don't have to terminate the number argument with null
2319                          * because it will be parsed with atoi, which ignores trailing
2320                          * non-digit characters.
2321                          */
2322                         if (my_commands->argv[1][0] != ':')
2323                         {
2324                                 char       *c = my_commands->argv[1];
2325
2326                                 while (isdigit((unsigned char) *c))
2327                                         c++;
2328                                 if (*c)
2329                                 {
2330                                         my_commands->argv[2] = c;
2331                                         if (my_commands->argc < 3)
2332                                                 my_commands->argc = 3;
2333                                 }
2334                         }
2335
2336                         if (my_commands->argc >= 3)
2337                         {
2338                                 if (pg_strcasecmp(my_commands->argv[2], "us") != 0 &&
2339                                         pg_strcasecmp(my_commands->argv[2], "ms") != 0 &&
2340                                         pg_strcasecmp(my_commands->argv[2], "s") != 0)
2341                                 {
2342                                         fprintf(stderr, "%s: unknown time unit '%s' - must be us, ms or s\n",
2343                                                         my_commands->argv[0], my_commands->argv[2]);
2344                                         exit(1);
2345                                 }
2346                         }
2347
2348                         for (j = 3; j < my_commands->argc; j++)
2349                                 fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
2350                                                 my_commands->argv[0], my_commands->argv[j]);
2351                 }
2352                 else if (pg_strcasecmp(my_commands->argv[0], "setshell") == 0)
2353                 {
2354                         if (my_commands->argc < 3)
2355                         {
2356                                 fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]);
2357                                 exit(1);
2358                         }
2359                 }
2360                 else if (pg_strcasecmp(my_commands->argv[0], "shell") == 0)
2361                 {
2362                         if (my_commands->argc < 1)
2363                         {
2364                                 fprintf(stderr, "%s: missing command\n", my_commands->argv[0]);
2365                                 exit(1);
2366                         }
2367                 }
2368                 else
2369                 {
2370                         fprintf(stderr, "Invalid command %s\n", my_commands->argv[0]);
2371                         exit(1);
2372                 }
2373         }
2374         else
2375         {
2376                 my_commands->type = SQL_COMMAND;
2377
2378                 switch (querymode)
2379                 {
2380                         case QUERY_SIMPLE:
2381                                 my_commands->argv[0] = pg_strdup(p);
2382                                 my_commands->argc++;
2383                                 break;
2384                         case QUERY_EXTENDED:
2385                         case QUERY_PREPARED:
2386                                 if (!parseQuery(my_commands, p))
2387                                         exit(1);
2388                                 break;
2389                         default:
2390                                 exit(1);
2391                 }
2392         }
2393
2394         return my_commands;
2395 }
2396
2397 /*
2398  * Read a line from fd, and return it in a malloc'd buffer.
2399  * Return NULL at EOF.
2400  *
2401  * The buffer will typically be larger than necessary, but we don't care
2402  * in this program, because we'll free it as soon as we've parsed the line.
2403  */
2404 static char *
2405 read_line_from_file(FILE *fd)
2406 {
2407         char            tmpbuf[BUFSIZ];
2408         char       *buf;
2409         size_t          buflen = BUFSIZ;
2410         size_t          used = 0;
2411
2412         buf = (char *) palloc(buflen);
2413         buf[0] = '\0';
2414
2415         while (fgets(tmpbuf, BUFSIZ, fd) != NULL)
2416         {
2417                 size_t          thislen = strlen(tmpbuf);
2418
2419                 /* Append tmpbuf to whatever we had already */
2420                 memcpy(buf + used, tmpbuf, thislen + 1);
2421                 used += thislen;
2422
2423                 /* Done if we collected a newline */
2424                 if (thislen > 0 && tmpbuf[thislen - 1] == '\n')
2425                         break;
2426
2427                 /* Else, enlarge buf to ensure we can append next bufferload */
2428                 buflen += BUFSIZ;
2429                 buf = (char *) pg_realloc(buf, buflen);
2430         }
2431
2432         if (used > 0)
2433                 return buf;
2434
2435         /* Reached EOF */
2436         free(buf);
2437         return NULL;
2438 }
2439
2440 static int
2441 process_file(char *filename)
2442 {
2443 #define COMMANDS_ALLOC_NUM 128
2444
2445         Command   **my_commands;
2446         FILE       *fd;
2447         int                     lineno, index;
2448         char       *buf;
2449         int                     alloc_num;
2450
2451         if (num_files >= MAX_FILES)
2452         {
2453                 fprintf(stderr, "Up to only %d SQL files are allowed\n", MAX_FILES);
2454                 exit(1);
2455         }
2456
2457         alloc_num = COMMANDS_ALLOC_NUM;
2458         my_commands = (Command **) pg_malloc(sizeof(Command *) * alloc_num);
2459
2460         if (strcmp(filename, "-") == 0)
2461                 fd = stdin;
2462         else if ((fd = fopen(filename, "r")) == NULL)
2463         {
2464                 fprintf(stderr, "%s: %s\n", filename, strerror(errno));
2465                 pg_free(my_commands);
2466                 return false;
2467         }
2468
2469         lineno = 0;
2470         index = 0;
2471
2472         while ((buf = read_line_from_file(fd)) != NULL)
2473         {
2474                 Command    *command;
2475                 lineno += 1;
2476
2477                 command = process_commands(buf, filename, lineno);
2478
2479                 free(buf);
2480
2481                 if (command == NULL)
2482                         continue;
2483
2484                 my_commands[index] = command;
2485                 index++;
2486
2487                 if (index >= alloc_num)
2488                 {
2489                         alloc_num += COMMANDS_ALLOC_NUM;
2490                         my_commands = pg_realloc(my_commands, sizeof(Command *) * alloc_num);
2491                 }
2492         }
2493         fclose(fd);
2494
2495         my_commands[index] = NULL;
2496
2497         sql_files[num_files++] = my_commands;
2498
2499         return true;
2500 }
2501
2502 static Command **
2503 process_builtin(char *tb, const char *source)
2504 {
2505 #define COMMANDS_ALLOC_NUM 128
2506
2507         Command   **my_commands;
2508         int                     lineno, index;
2509         char            buf[BUFSIZ];
2510         int                     alloc_num;
2511
2512         alloc_num = COMMANDS_ALLOC_NUM;
2513         my_commands = (Command **) pg_malloc(sizeof(Command *) * alloc_num);
2514
2515         lineno = 0;
2516         index = 0;
2517
2518         for (;;)
2519         {
2520                 char       *p;
2521                 Command    *command;
2522
2523                 p = buf;
2524                 while (*tb && *tb != '\n')
2525                         *p++ = *tb++;
2526
2527                 if (*tb == '\0')
2528                         break;
2529
2530                 if (*tb == '\n')
2531                         tb++;
2532
2533                 *p = '\0';
2534
2535                 lineno += 1;
2536
2537                 command = process_commands(buf, source, lineno);
2538                 if (command == NULL)
2539                         continue;
2540
2541                 my_commands[index] = command;
2542                 index++;
2543
2544                 if (index >= alloc_num)
2545                 {
2546                         alloc_num += COMMANDS_ALLOC_NUM;
2547                         my_commands = pg_realloc(my_commands, sizeof(Command *) * alloc_num);
2548                 }
2549         }
2550
2551         my_commands[index] = NULL;
2552
2553         return my_commands;
2554 }
2555
2556 /* print out results */
2557 static void
2558 printResults(int ttype, int64 normal_xacts, int nclients,
2559                          TState *threads, int nthreads,
2560                          instr_time total_time, instr_time conn_total_time,
2561                          int64 total_latencies, int64 total_sqlats,
2562                          int64 throttle_lag, int64 throttle_lag_max,
2563                          int64 throttle_latency_skipped, int64 latency_late)
2564 {
2565         double          time_include,
2566                                 tps_include,
2567                                 tps_exclude;
2568         char       *s;
2569
2570         time_include = INSTR_TIME_GET_DOUBLE(total_time);
2571         tps_include = normal_xacts / time_include;
2572         tps_exclude = normal_xacts / (time_include -
2573                                                 (INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads));
2574
2575         if (ttype == 0)
2576                 s = "TPC-B (sort of)";
2577         else if (ttype == 2)
2578                 s = "Update only pgbench_accounts";
2579         else if (ttype == 1)
2580                 s = "SELECT only";
2581         else
2582                 s = "Custom query";
2583
2584         printf("transaction type: %s\n", s);
2585         printf("scaling factor: %d\n", scale);
2586         printf("query mode: %s\n", QUERYMODE[querymode]);
2587         printf("number of clients: %d\n", nclients);
2588         printf("number of threads: %d\n", nthreads);
2589         if (duration <= 0)
2590         {
2591                 printf("number of transactions per client: %d\n", nxacts);
2592                 printf("number of transactions actually processed: " INT64_FORMAT "/" INT64_FORMAT "\n",
2593                            normal_xacts, (int64) nxacts * nclients);
2594         }
2595         else
2596         {
2597                 printf("duration: %d s\n", duration);
2598                 printf("number of transactions actually processed: " INT64_FORMAT "\n",
2599                            normal_xacts);
2600         }
2601
2602         /* Remaining stats are nonsensical if we failed to execute any xacts */
2603         if (normal_xacts <= 0)
2604                 return;
2605
2606         if (throttle_delay && latency_limit)
2607                 printf("number of transactions skipped: " INT64_FORMAT " (%.3f %%)\n",
2608                            throttle_latency_skipped,
2609                            100.0 * throttle_latency_skipped / (throttle_latency_skipped + normal_xacts));
2610
2611         if (latency_limit)
2612                 printf("number of transactions above the %.1f ms latency limit: " INT64_FORMAT " (%.3f %%)\n",
2613                            latency_limit / 1000.0, latency_late,
2614                            100.0 * latency_late / (throttle_latency_skipped + normal_xacts));
2615
2616         if (throttle_delay || progress || latency_limit)
2617         {
2618                 /* compute and show latency average and standard deviation */
2619                 double          latency = 0.001 * total_latencies / normal_xacts;
2620                 double          sqlat = (double) total_sqlats / normal_xacts;
2621
2622                 printf("latency average: %.3f ms\n"
2623                            "latency stddev: %.3f ms\n",
2624                            latency, 0.001 * sqrt(sqlat - 1000000.0 * latency * latency));
2625         }
2626         else
2627         {
2628                 /* only an average latency computed from the duration is available */
2629                 printf("latency average: %.3f ms\n",
2630                            1000.0 * duration * nclients / normal_xacts);
2631         }
2632
2633         if (throttle_delay)
2634         {
2635                 /*
2636                  * Report average transaction lag under rate limit throttling.  This
2637                  * is the delay between scheduled and actual start times for the
2638                  * transaction.  The measured lag may be caused by thread/client load,
2639                  * the database load, or the Poisson throttling process.
2640                  */
2641                 printf("rate limit schedule lag: avg %.3f (max %.3f) ms\n",
2642                            0.001 * throttle_lag / normal_xacts, 0.001 * throttle_lag_max);
2643         }
2644
2645         printf("tps = %f (including connections establishing)\n", tps_include);
2646         printf("tps = %f (excluding connections establishing)\n", tps_exclude);
2647
2648         /* Report per-command latencies */
2649         if (is_latencies)
2650         {
2651                 int                     i;
2652
2653                 for (i = 0; i < num_files; i++)
2654                 {
2655                         Command   **commands;
2656
2657                         if (num_files > 1)
2658                                 printf("statement latencies in milliseconds, file %d:\n", i + 1);
2659                         else
2660                                 printf("statement latencies in milliseconds:\n");
2661
2662                         for (commands = sql_files[i]; *commands != NULL; commands++)
2663                         {
2664                                 Command    *command = *commands;
2665                                 int                     cnum = command->command_num;
2666                                 double          total_time;
2667                                 instr_time      total_exec_elapsed;
2668                                 int                     total_exec_count;
2669                                 int                     t;
2670
2671                                 /* Accumulate per-thread data for command */
2672                                 INSTR_TIME_SET_ZERO(total_exec_elapsed);
2673                                 total_exec_count = 0;
2674                                 for (t = 0; t < nthreads; t++)
2675                                 {
2676                                         TState     *thread = &threads[t];
2677
2678                                         INSTR_TIME_ADD(total_exec_elapsed,
2679                                                                    thread->exec_elapsed[cnum]);
2680                                         total_exec_count += thread->exec_count[cnum];
2681                                 }
2682
2683                                 if (total_exec_count > 0)
2684                                         total_time = INSTR_TIME_GET_MILLISEC(total_exec_elapsed) / (double) total_exec_count;
2685                                 else
2686                                         total_time = 0.0;
2687
2688                                 printf("\t%f\t%s\n", total_time, command->line);
2689                         }
2690                 }
2691         }
2692 }
2693
2694
2695 int
2696 main(int argc, char **argv)
2697 {
2698         static struct option long_options[] = {
2699                 /* systematic long/short named options */
2700                 {"client", required_argument, NULL, 'c'},
2701                 {"connect", no_argument, NULL, 'C'},
2702                 {"debug", no_argument, NULL, 'd'},
2703                 {"define", required_argument, NULL, 'D'},
2704                 {"file", required_argument, NULL, 'f'},
2705                 {"fillfactor", required_argument, NULL, 'F'},
2706                 {"host", required_argument, NULL, 'h'},
2707                 {"initialize", no_argument, NULL, 'i'},
2708                 {"jobs", required_argument, NULL, 'j'},
2709                 {"log", no_argument, NULL, 'l'},
2710                 {"no-vacuum", no_argument, NULL, 'n'},
2711                 {"port", required_argument, NULL, 'p'},
2712                 {"progress", required_argument, NULL, 'P'},
2713                 {"protocol", required_argument, NULL, 'M'},
2714                 {"quiet", no_argument, NULL, 'q'},
2715                 {"report-latencies", no_argument, NULL, 'r'},
2716                 {"scale", required_argument, NULL, 's'},
2717                 {"select-only", no_argument, NULL, 'S'},
2718                 {"skip-some-updates", no_argument, NULL, 'N'},
2719                 {"time", required_argument, NULL, 'T'},
2720                 {"transactions", required_argument, NULL, 't'},
2721                 {"username", required_argument, NULL, 'U'},
2722                 {"vacuum-all", no_argument, NULL, 'v'},
2723                 /* long-named only options */
2724                 {"foreign-keys", no_argument, &foreign_keys, 1},
2725                 {"index-tablespace", required_argument, NULL, 3},
2726                 {"tablespace", required_argument, NULL, 2},
2727                 {"unlogged-tables", no_argument, &unlogged_tables, 1},
2728                 {"sampling-rate", required_argument, NULL, 4},
2729                 {"aggregate-interval", required_argument, NULL, 5},
2730                 {"rate", required_argument, NULL, 'R'},
2731                 {"latency-limit", required_argument, NULL, 'L'},
2732                 {NULL, 0, NULL, 0}
2733         };
2734
2735         int                     c;
2736         int                     nclients = 1;   /* default number of simulated clients */
2737         int                     nthreads = 1;   /* default number of threads */
2738         int                     is_init_mode = 0;               /* initialize mode? */
2739         int                     is_no_vacuum = 0;               /* no vacuum at all before testing? */
2740         int                     do_vacuum_accounts = 0; /* do vacuum accounts before testing? */
2741         int                     ttype = 0;              /* transaction type. 0: TPC-B, 1: SELECT only,
2742                                                                  * 2: skip update of branches and tellers */
2743         int                     optindex;
2744         char       *filename = NULL;
2745         bool            scale_given = false;
2746
2747         bool            benchmarking_option_set = false;
2748         bool            initialization_option_set = false;
2749
2750         CState     *state;                      /* status of clients */
2751         TState     *threads;            /* array of thread */
2752
2753         instr_time      start_time;             /* start up time */
2754         instr_time      total_time;
2755         instr_time      conn_total_time;
2756         int64           total_xacts = 0;
2757         int64           total_latencies = 0;
2758         int64           total_sqlats = 0;
2759         int64           throttle_lag = 0;
2760         int64           throttle_lag_max = 0;
2761         int64           throttle_latency_skipped = 0;
2762         int64           latency_late = 0;
2763
2764         int                     i;
2765
2766 #ifdef HAVE_GETRLIMIT
2767         struct rlimit rlim;
2768 #endif
2769
2770         PGconn     *con;
2771         PGresult   *res;
2772         char       *env;
2773
2774         char            val[64];
2775
2776         progname = get_progname(argv[0]);
2777
2778         if (argc > 1)
2779         {
2780                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
2781                 {
2782                         usage();
2783                         exit(0);
2784                 }
2785                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
2786                 {
2787                         puts("pgbench (PostgreSQL) " PG_VERSION);
2788                         exit(0);
2789                 }
2790         }
2791
2792 #ifdef WIN32
2793         /* stderr is buffered on Win32. */
2794         setvbuf(stderr, NULL, _IONBF, 0);
2795 #endif
2796
2797         if ((env = getenv("PGHOST")) != NULL && *env != '\0')
2798                 pghost = env;
2799         if ((env = getenv("PGPORT")) != NULL && *env != '\0')
2800                 pgport = env;
2801         else if ((env = getenv("PGUSER")) != NULL && *env != '\0')
2802                 login = env;
2803
2804         state = (CState *) pg_malloc(sizeof(CState));
2805         memset(state, 0, sizeof(CState));
2806
2807         while ((c = getopt_long(argc, argv, "ih:nvp:dqSNc:j:Crs:t:T:U:lf:D:F:M:P:R:L:", long_options, &optindex)) != -1)
2808         {
2809                 switch (c)
2810                 {
2811                         case 'i':
2812                                 is_init_mode++;
2813                                 break;
2814                         case 'h':
2815                                 pghost = pg_strdup(optarg);
2816                                 break;
2817                         case 'n':
2818                                 is_no_vacuum++;
2819                                 break;
2820                         case 'v':
2821                                 do_vacuum_accounts++;
2822                                 break;
2823                         case 'p':
2824                                 pgport = pg_strdup(optarg);
2825                                 break;
2826                         case 'd':
2827                                 debug++;
2828                                 break;
2829                         case 'S':
2830                                 ttype = 1;
2831                                 benchmarking_option_set = true;
2832                                 break;
2833                         case 'N':
2834                                 ttype = 2;
2835                                 benchmarking_option_set = true;
2836                                 break;
2837                         case 'c':
2838                                 benchmarking_option_set = true;
2839                                 nclients = atoi(optarg);
2840                                 if (nclients <= 0 || nclients > MAXCLIENTS)
2841                                 {
2842                                         fprintf(stderr, "invalid number of clients: %d\n", nclients);
2843                                         exit(1);
2844                                 }
2845 #ifdef HAVE_GETRLIMIT
2846 #ifdef RLIMIT_NOFILE                    /* most platforms use RLIMIT_NOFILE */
2847                                 if (getrlimit(RLIMIT_NOFILE, &rlim) == -1)
2848 #else                                                   /* but BSD doesn't ... */
2849                                 if (getrlimit(RLIMIT_OFILE, &rlim) == -1)
2850 #endif   /* RLIMIT_NOFILE */
2851                                 {
2852                                         fprintf(stderr, "getrlimit failed: %s\n", strerror(errno));
2853                                         exit(1);
2854                                 }
2855                                 if (rlim.rlim_cur <= (nclients + 2))
2856                                 {
2857                                         fprintf(stderr, "You need at least %d open files but you are only allowed to use %ld.\n", nclients + 2, (long) rlim.rlim_cur);
2858                                         fprintf(stderr, "Use limit/ulimit to increase the limit before using pgbench.\n");
2859                                         exit(1);
2860                                 }
2861 #endif   /* HAVE_GETRLIMIT */
2862                                 break;
2863                         case 'j':                       /* jobs */
2864                                 benchmarking_option_set = true;
2865                                 nthreads = atoi(optarg);
2866                                 if (nthreads <= 0)
2867                                 {
2868                                         fprintf(stderr, "invalid number of threads: %d\n", nthreads);
2869                                         exit(1);
2870                                 }
2871                                 break;
2872                         case 'C':
2873                                 benchmarking_option_set = true;
2874                                 is_connect = true;
2875                                 break;
2876                         case 'r':
2877                                 benchmarking_option_set = true;
2878                                 is_latencies = true;
2879                                 break;
2880                         case 's':
2881                                 scale_given = true;
2882                                 scale = atoi(optarg);
2883                                 if (scale <= 0)
2884                                 {
2885                                         fprintf(stderr, "invalid scaling factor: %d\n", scale);
2886                                         exit(1);
2887                                 }
2888                                 break;
2889                         case 't':
2890                                 benchmarking_option_set = true;
2891                                 if (duration > 0)
2892                                 {
2893                                         fprintf(stderr, "specify either a number of transactions (-t) or a duration (-T), not both.\n");
2894                                         exit(1);
2895                                 }
2896                                 nxacts = atoi(optarg);
2897                                 if (nxacts <= 0)
2898                                 {
2899                                         fprintf(stderr, "invalid number of transactions: %d\n", nxacts);
2900                                         exit(1);
2901                                 }
2902                                 break;
2903                         case 'T':
2904                                 benchmarking_option_set = true;
2905                                 if (nxacts > 0)
2906                                 {
2907                                         fprintf(stderr, "specify either a number of transactions (-t) or a duration (-T), not both.\n");
2908                                         exit(1);
2909                                 }
2910                                 duration = atoi(optarg);
2911                                 if (duration <= 0)
2912                                 {
2913                                         fprintf(stderr, "invalid duration: %d\n", duration);
2914                                         exit(1);
2915                                 }
2916                                 break;
2917                         case 'U':
2918                                 login = pg_strdup(optarg);
2919                                 break;
2920                         case 'l':
2921                                 benchmarking_option_set = true;
2922                                 use_log = true;
2923                                 break;
2924                         case 'q':
2925                                 initialization_option_set = true;
2926                                 use_quiet = true;
2927                                 break;
2928                         case 'f':
2929                                 benchmarking_option_set = true;
2930                                 ttype = 3;
2931                                 filename = pg_strdup(optarg);
2932                                 if (process_file(filename) == false || *sql_files[num_files - 1] == NULL)
2933                                         exit(1);
2934                                 break;
2935                         case 'D':
2936                                 {
2937                                         char       *p;
2938
2939                                         benchmarking_option_set = true;
2940
2941                                         if ((p = strchr(optarg, '=')) == NULL || p == optarg || *(p + 1) == '\0')
2942                                         {
2943                                                 fprintf(stderr, "invalid variable definition: %s\n", optarg);
2944                                                 exit(1);
2945                                         }
2946
2947                                         *p++ = '\0';
2948                                         if (!putVariable(&state[0], "option", optarg, p))
2949                                                 exit(1);
2950                                 }
2951                                 break;
2952                         case 'F':
2953                                 initialization_option_set = true;
2954                                 fillfactor = atoi(optarg);
2955                                 if ((fillfactor < 10) || (fillfactor > 100))
2956                                 {
2957                                         fprintf(stderr, "invalid fillfactor: %d\n", fillfactor);
2958                                         exit(1);
2959                                 }
2960                                 break;
2961                         case 'M':
2962                                 benchmarking_option_set = true;
2963                                 if (num_files > 0)
2964                                 {
2965                                         fprintf(stderr, "query mode (-M) should be specified before transaction scripts (-f)\n");
2966                                         exit(1);
2967                                 }
2968                                 for (querymode = 0; querymode < NUM_QUERYMODE; querymode++)
2969                                         if (strcmp(optarg, QUERYMODE[querymode]) == 0)
2970                                                 break;
2971                                 if (querymode >= NUM_QUERYMODE)
2972                                 {
2973                                         fprintf(stderr, "invalid query mode (-M): %s\n", optarg);
2974                                         exit(1);
2975                                 }
2976                                 break;
2977                         case 'P':
2978                                 benchmarking_option_set = true;
2979                                 progress = atoi(optarg);
2980                                 if (progress <= 0)
2981                                 {
2982                                         fprintf(stderr,
2983                                                 "thread progress delay (-P) must be positive (%s)\n",
2984                                                         optarg);
2985                                         exit(1);
2986                                 }
2987                                 break;
2988                         case 'R':
2989                                 {
2990                                         /* get a double from the beginning of option value */
2991                                         double          throttle_value = atof(optarg);
2992
2993                                         benchmarking_option_set = true;
2994
2995                                         if (throttle_value <= 0.0)
2996                                         {
2997                                                 fprintf(stderr, "invalid rate limit: %s\n", optarg);
2998                                                 exit(1);
2999                                         }
3000                                         /* Invert rate limit into a time offset */
3001                                         throttle_delay = (int64) (1000000.0 / throttle_value);
3002                                 }
3003                                 break;
3004                         case 'L':
3005                                 {
3006                                         double limit_ms = atof(optarg);
3007                                         if (limit_ms <= 0.0)
3008                                         {
3009                                                 fprintf(stderr, "invalid latency limit: %s\n", optarg);
3010                                                 exit(1);
3011                                         }
3012                                         benchmarking_option_set = true;
3013                                         latency_limit = (int64) (limit_ms * 1000);
3014                                 }
3015                                 break;
3016                         case 0:
3017                                 /* This covers long options which take no argument. */
3018                                 if (foreign_keys || unlogged_tables)
3019                                         initialization_option_set = true;
3020                                 break;
3021                         case 2:                         /* tablespace */
3022                                 initialization_option_set = true;
3023                                 tablespace = pg_strdup(optarg);
3024                                 break;
3025                         case 3:                         /* index-tablespace */
3026                                 initialization_option_set = true;
3027                                 index_tablespace = pg_strdup(optarg);
3028                                 break;
3029                         case 4:
3030                                 benchmarking_option_set = true;
3031                                 sample_rate = atof(optarg);
3032                                 if (sample_rate <= 0.0 || sample_rate > 1.0)
3033                                 {
3034                                         fprintf(stderr, "invalid sampling rate: %f\n", sample_rate);
3035                                         exit(1);
3036                                 }
3037                                 break;
3038                         case 5:
3039 #ifdef WIN32
3040                                 fprintf(stderr, "--aggregate-interval is not currently supported on Windows");
3041                                 exit(1);
3042 #else
3043                                 benchmarking_option_set = true;
3044                                 agg_interval = atoi(optarg);
3045                                 if (agg_interval <= 0)
3046                                 {
3047                                         fprintf(stderr, "invalid number of seconds for aggregation: %d\n", agg_interval);
3048                                         exit(1);
3049                                 }
3050 #endif
3051                                 break;
3052                         default:
3053                                 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
3054                                 exit(1);
3055                                 break;
3056                 }
3057         }
3058
3059         /* compute a per thread delay */
3060         throttle_delay *= nthreads;
3061
3062         if (argc > optind)
3063                 dbName = argv[optind];
3064         else
3065         {
3066                 if ((env = getenv("PGDATABASE")) != NULL && *env != '\0')
3067                         dbName = env;
3068                 else if (login != NULL && *login != '\0')
3069                         dbName = login;
3070                 else
3071                         dbName = "";
3072         }
3073
3074         if (is_init_mode)
3075         {
3076                 if (benchmarking_option_set)
3077                 {
3078                         fprintf(stderr, "some options cannot be used in initialization (-i) mode\n");
3079                         exit(1);
3080                 }
3081
3082                 init(is_no_vacuum);
3083                 exit(0);
3084         }
3085         else
3086         {
3087                 if (initialization_option_set)
3088                 {
3089                         fprintf(stderr, "some options cannot be used in benchmarking mode\n");
3090                         exit(1);
3091                 }
3092         }
3093
3094         /* Use DEFAULT_NXACTS if neither nxacts nor duration is specified. */
3095         if (nxacts <= 0 && duration <= 0)
3096                 nxacts = DEFAULT_NXACTS;
3097
3098         if (nclients % nthreads != 0)
3099         {
3100                 fprintf(stderr, "number of clients (%d) must be a multiple of number of threads (%d)\n", nclients, nthreads);
3101                 exit(1);
3102         }
3103
3104         /* --sampling-rate may be used only with -l */
3105         if (sample_rate > 0.0 && !use_log)
3106         {
3107                 fprintf(stderr, "log sampling rate is allowed only when logging transactions (-l) \n");
3108                 exit(1);
3109         }
3110
3111         /* --sampling-rate may must not be used with --aggregate-interval */
3112         if (sample_rate > 0.0 && agg_interval > 0)
3113         {
3114                 fprintf(stderr, "log sampling (--sampling-rate) and aggregation (--aggregate-interval) can't be used at the same time\n");
3115                 exit(1);
3116         }
3117
3118         if (agg_interval > 0 && (!use_log))
3119         {
3120                 fprintf(stderr, "log aggregation is allowed only when actually logging transactions\n");
3121                 exit(1);
3122         }
3123
3124         if ((duration > 0) && (agg_interval > duration))
3125         {
3126                 fprintf(stderr, "number of seconds for aggregation (%d) must not be higher that test duration (%d)\n", agg_interval, duration);
3127                 exit(1);
3128         }
3129
3130         if ((duration > 0) && (agg_interval > 0) && (duration % agg_interval != 0))
3131         {
3132                 fprintf(stderr, "duration (%d) must be a multiple of aggregation interval (%d)\n", duration, agg_interval);
3133                 exit(1);
3134         }
3135
3136         /*
3137          * is_latencies only works with multiple threads in thread-based
3138          * implementations, not fork-based ones, because it supposes that the
3139          * parent can see changes made to the per-thread execution stats by child
3140          * threads.  It seems useful enough to accept despite this limitation, but
3141          * perhaps we should FIXME someday (by passing the stats data back up
3142          * through the parent-to-child pipes).
3143          */
3144 #ifndef ENABLE_THREAD_SAFETY
3145         if (is_latencies && nthreads > 1)
3146         {
3147                 fprintf(stderr, "-r does not work with -j larger than 1 on this platform.\n");
3148                 exit(1);
3149         }
3150 #endif
3151
3152         /*
3153          * save main process id in the global variable because process id will be
3154          * changed after fork.
3155          */
3156         main_pid = (int) getpid();
3157         progress_nclients = nclients;
3158         progress_nthreads = nthreads;
3159
3160         if (nclients > 1)
3161         {
3162                 state = (CState *) pg_realloc(state, sizeof(CState) * nclients);
3163                 memset(state + 1, 0, sizeof(CState) * (nclients - 1));
3164
3165                 /* copy any -D switch values to all clients */
3166                 for (i = 1; i < nclients; i++)
3167                 {
3168                         int                     j;
3169
3170                         state[i].id = i;
3171                         for (j = 0; j < state[0].nvariables; j++)
3172                         {
3173                                 if (!putVariable(&state[i], "startup", state[0].variables[j].name, state[0].variables[j].value))
3174                                         exit(1);
3175                         }
3176                 }
3177         }
3178
3179         if (debug)
3180         {
3181                 if (duration <= 0)
3182                         printf("pghost: %s pgport: %s nclients: %d nxacts: %d dbName: %s\n",
3183                                    pghost, pgport, nclients, nxacts, dbName);
3184                 else
3185                         printf("pghost: %s pgport: %s nclients: %d duration: %d dbName: %s\n",
3186                                    pghost, pgport, nclients, duration, dbName);
3187         }
3188
3189         /* opening connection... */
3190         con = doConnect();
3191         if (con == NULL)
3192                 exit(1);
3193
3194         if (PQstatus(con) == CONNECTION_BAD)
3195         {
3196                 fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
3197                 fprintf(stderr, "%s", PQerrorMessage(con));
3198                 exit(1);
3199         }
3200
3201         if (ttype != 3)
3202         {
3203                 /*
3204                  * get the scaling factor that should be same as count(*) from
3205                  * pgbench_branches if this is not a custom query
3206                  */
3207                 res = PQexec(con, "select count(*) from pgbench_branches");
3208                 if (PQresultStatus(res) != PGRES_TUPLES_OK)
3209                 {
3210                         fprintf(stderr, "%s", PQerrorMessage(con));
3211                         exit(1);
3212                 }
3213                 scale = atoi(PQgetvalue(res, 0, 0));
3214                 if (scale < 0)
3215                 {
3216                         fprintf(stderr, "count(*) from pgbench_branches invalid (%d)\n", scale);
3217                         exit(1);
3218                 }
3219                 PQclear(res);
3220
3221                 /* warn if we override user-given -s switch */
3222                 if (scale_given)
3223                         fprintf(stderr,
3224                         "Scale option ignored, using pgbench_branches table count = %d\n",
3225                                         scale);
3226         }
3227
3228         /*
3229          * :scale variables normally get -s or database scale, but don't override
3230          * an explicit -D switch
3231          */
3232         if (getVariable(&state[0], "scale") == NULL)
3233         {
3234                 snprintf(val, sizeof(val), "%d", scale);
3235                 for (i = 0; i < nclients; i++)
3236                 {
3237                         if (!putVariable(&state[i], "startup", "scale", val))
3238                                 exit(1);
3239                 }
3240         }
3241
3242         /*
3243          * Define a :client_id variable that is unique per connection. But don't
3244          * override an explicit -D switch.
3245          */
3246         if (getVariable(&state[0], "client_id") == NULL)
3247         {
3248                 for (i = 0; i < nclients; i++)
3249                 {
3250                         snprintf(val, sizeof(val), "%d", i);
3251                         if (!putVariable(&state[i], "startup", "client_id", val))
3252                                 exit(1);
3253                 }
3254         }
3255
3256         if (!is_no_vacuum)
3257         {
3258                 fprintf(stderr, "starting vacuum...");
3259                 executeStatement(con, "vacuum pgbench_branches");
3260                 executeStatement(con, "vacuum pgbench_tellers");
3261                 executeStatement(con, "truncate pgbench_history");
3262                 fprintf(stderr, "end.\n");
3263
3264                 if (do_vacuum_accounts)
3265                 {
3266                         fprintf(stderr, "starting vacuum pgbench_accounts...");
3267                         executeStatement(con, "vacuum analyze pgbench_accounts");
3268                         fprintf(stderr, "end.\n");
3269                 }
3270         }
3271         PQfinish(con);
3272
3273         /* set random seed */
3274         INSTR_TIME_SET_CURRENT(start_time);
3275         srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
3276
3277         /* process builtin SQL scripts */
3278         switch (ttype)
3279         {
3280                 case 0:
3281                         sql_files[0] = process_builtin(tpc_b,
3282                                                                                    "<builtin: TPC-B (sort of)>");
3283                         num_files = 1;
3284                         break;
3285
3286                 case 1:
3287                         sql_files[0] = process_builtin(select_only,
3288                                                                                    "<builtin: select only>");
3289                         num_files = 1;
3290                         break;
3291
3292                 case 2:
3293                         sql_files[0] = process_builtin(simple_update,
3294                                                                                    "<builtin: simple update>");
3295                         num_files = 1;
3296                         break;
3297
3298                 default:
3299                         break;
3300         }
3301
3302         /* set up thread data structures */
3303         threads = (TState *) pg_malloc(sizeof(TState) * nthreads);
3304         for (i = 0; i < nthreads; i++)
3305         {
3306                 TState     *thread = &threads[i];
3307
3308                 thread->tid = i;
3309                 thread->state = &state[nclients / nthreads * i];
3310                 thread->nstate = nclients / nthreads;
3311                 thread->random_state[0] = random();
3312                 thread->random_state[1] = random();
3313                 thread->random_state[2] = random();
3314                 thread->throttle_latency_skipped = 0;
3315                 thread->latency_late = 0;
3316
3317                 if (is_latencies)
3318                 {
3319                         /* Reserve memory for the thread to store per-command latencies */
3320                         int                     t;
3321
3322                         thread->exec_elapsed = (instr_time *)
3323                                 pg_malloc(sizeof(instr_time) * num_commands);
3324                         thread->exec_count = (int *)
3325                                 pg_malloc(sizeof(int) * num_commands);
3326
3327                         for (t = 0; t < num_commands; t++)
3328                         {
3329                                 INSTR_TIME_SET_ZERO(thread->exec_elapsed[t]);
3330                                 thread->exec_count[t] = 0;
3331                         }
3332                 }
3333                 else
3334                 {
3335                         thread->exec_elapsed = NULL;
3336                         thread->exec_count = NULL;
3337                 }
3338         }
3339
3340         /* get start up time */
3341         INSTR_TIME_SET_CURRENT(start_time);
3342
3343         /* set alarm if duration is specified. */
3344         if (duration > 0)
3345                 setalarm(duration);
3346
3347         /* start threads */
3348         for (i = 0; i < nthreads; i++)
3349         {
3350                 TState     *thread = &threads[i];
3351
3352                 INSTR_TIME_SET_CURRENT(thread->start_time);
3353
3354                 /* the first thread (i = 0) is executed by main thread */
3355                 if (i > 0)
3356                 {
3357                         int                     err = pthread_create(&thread->thread, NULL, threadRun, thread);
3358
3359                         if (err != 0 || thread->thread == INVALID_THREAD)
3360                         {
3361                                 fprintf(stderr, "cannot create thread: %s\n", strerror(err));
3362                                 exit(1);
3363                         }
3364                 }
3365                 else
3366                 {
3367                         thread->thread = INVALID_THREAD;
3368                 }
3369         }
3370
3371         /* wait for threads and accumulate results */
3372         INSTR_TIME_SET_ZERO(conn_total_time);
3373         for (i = 0; i < nthreads; i++)
3374         {
3375                 void       *ret = NULL;
3376
3377                 if (threads[i].thread == INVALID_THREAD)
3378                         ret = threadRun(&threads[i]);
3379                 else
3380                         pthread_join(threads[i].thread, &ret);
3381
3382                 if (ret != NULL)
3383                 {
3384                         TResult    *r = (TResult *) ret;
3385
3386                         total_xacts += r->xacts;
3387                         total_latencies += r->latencies;
3388                         total_sqlats += r->sqlats;
3389                         throttle_lag += r->throttle_lag;
3390                         throttle_latency_skipped += r->throttle_latency_skipped;
3391                         latency_late += r->latency_late;
3392                         if (r->throttle_lag_max > throttle_lag_max)
3393                                 throttle_lag_max = r->throttle_lag_max;
3394                         INSTR_TIME_ADD(conn_total_time, r->conn_time);
3395                         free(ret);
3396                 }
3397         }
3398         disconnect_all(state, nclients);
3399
3400         /*
3401          * XXX We compute results as though every client of every thread started
3402          * and finished at the same time.  That model can diverge noticeably from
3403          * reality for a short benchmark run involving relatively many threads.
3404          * The first thread may process notably many transactions before the last
3405          * thread begins.  Improving the model alone would bring limited benefit,
3406          * because performance during those periods of partial thread count can
3407          * easily exceed steady state performance.  This is one of the many ways
3408          * short runs convey deceptive performance figures.
3409          */
3410         INSTR_TIME_SET_CURRENT(total_time);
3411         INSTR_TIME_SUBTRACT(total_time, start_time);
3412         printResults(ttype, total_xacts, nclients, threads, nthreads,
3413                                  total_time, conn_total_time, total_latencies, total_sqlats,
3414                                  throttle_lag, throttle_lag_max, throttle_latency_skipped,
3415                                  latency_late);
3416
3417         return 0;
3418 }
3419
3420 static void *
3421 threadRun(void *arg)
3422 {
3423         TState     *thread = (TState *) arg;
3424         CState     *state = thread->state;
3425         TResult    *result;
3426         FILE       *logfile = NULL; /* per-thread log file */
3427         instr_time      start,
3428                                 end;
3429         int                     nstate = thread->nstate;
3430         int                     remains = nstate;               /* number of remaining clients */
3431         int                     i;
3432
3433         /* for reporting progress: */
3434         int64           thread_start = INSTR_TIME_GET_MICROSEC(thread->start_time);
3435         int64           last_report = thread_start;
3436         int64           next_report = last_report + (int64) progress * 1000000;
3437         int64           last_count = 0,
3438                                 last_lats = 0,
3439                                 last_sqlats = 0,
3440                                 last_lags = 0,
3441                                 last_skipped = 0;
3442
3443         AggVals         aggs;
3444
3445         /*
3446          * Initialize throttling rate target for all of the thread's clients.  It
3447          * might be a little more accurate to reset thread->start_time here too.
3448          * The possible drift seems too small relative to typical throttle delay
3449          * times to worry about it.
3450          */
3451         INSTR_TIME_SET_CURRENT(start);
3452         thread->throttle_trigger = INSTR_TIME_GET_MICROSEC(start);
3453         thread->throttle_lag = 0;
3454         thread->throttle_lag_max = 0;
3455
3456         result = pg_malloc(sizeof(TResult));
3457
3458         INSTR_TIME_SET_ZERO(result->conn_time);
3459
3460         /* open log file if requested */
3461         if (use_log)
3462         {
3463                 char            logpath[64];
3464
3465                 if (thread->tid == 0)
3466                         snprintf(logpath, sizeof(logpath), "pgbench_log.%d", main_pid);
3467                 else
3468                         snprintf(logpath, sizeof(logpath), "pgbench_log.%d.%d", main_pid, thread->tid);
3469                 logfile = fopen(logpath, "w");
3470
3471                 if (logfile == NULL)
3472                 {
3473                         fprintf(stderr, "Couldn't open logfile \"%s\": %s", logpath, strerror(errno));
3474                         goto done;
3475                 }
3476         }
3477
3478         if (!is_connect)
3479         {
3480                 /* make connections to the database */
3481                 for (i = 0; i < nstate; i++)
3482                 {
3483                         if ((state[i].con = doConnect()) == NULL)
3484                                 goto done;
3485                 }
3486         }
3487
3488         /* time after thread and connections set up */
3489         INSTR_TIME_SET_CURRENT(result->conn_time);
3490         INSTR_TIME_SUBTRACT(result->conn_time, thread->start_time);
3491
3492         agg_vals_init(&aggs, thread->start_time);
3493
3494         /* send start up queries in async manner */
3495         for (i = 0; i < nstate; i++)
3496         {
3497                 CState     *st = &state[i];
3498                 Command   **commands = sql_files[st->use_file];
3499                 int                     prev_ecnt = st->ecnt;
3500
3501                 st->use_file = getrand(thread, 0, num_files - 1);
3502                 if (!doCustom(thread, st, &result->conn_time, logfile, &aggs))
3503                         remains--;                      /* I've aborted */
3504
3505                 if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND)
3506                 {
3507                         fprintf(stderr, "Client %d aborted in state %d. Execution meta-command failed.\n", i, st->state);
3508                         remains--;                      /* I've aborted */
3509                         PQfinish(st->con);
3510                         st->con = NULL;
3511                 }
3512         }
3513
3514         while (remains > 0)
3515         {
3516                 fd_set          input_mask;
3517                 int                     maxsock;        /* max socket number to be waited */
3518                 int64           now_usec = 0;
3519                 int64           min_usec;
3520
3521                 FD_ZERO(&input_mask);
3522
3523                 maxsock = -1;
3524                 min_usec = INT64_MAX;
3525                 for (i = 0; i < nstate; i++)
3526                 {
3527                         CState     *st = &state[i];
3528                         Command   **commands = sql_files[st->use_file];
3529                         int                     sock;
3530
3531                         if (st->con == NULL)
3532                         {
3533                                 continue;
3534                         }
3535                         else if (st->sleeping)
3536                         {
3537                                 if (st->throttling && timer_exceeded)
3538                                 {
3539                                         /* interrupt client which has not started a transaction */
3540                                         remains--;
3541                                         st->sleeping = 0;
3542                                         st->throttling = false;
3543                                         PQfinish(st->con);
3544                                         st->con = NULL;
3545                                         continue;
3546                                 }
3547                                 else    /* just a nap from the script */
3548                                 {
3549                                         int                     this_usec;
3550
3551                                         if (min_usec == INT64_MAX)
3552                                         {
3553                                                 instr_time      now;
3554
3555                                                 INSTR_TIME_SET_CURRENT(now);
3556                                                 now_usec = INSTR_TIME_GET_MICROSEC(now);
3557                                         }
3558
3559                                         this_usec = st->txn_scheduled - now_usec;
3560                                         if (min_usec > this_usec)
3561                                                 min_usec = this_usec;
3562                                 }
3563                         }
3564                         else if (commands[st->state]->type == META_COMMAND)
3565                         {
3566                                 min_usec = 0;   /* the connection is ready to run */
3567                                 break;
3568                         }
3569
3570                         sock = PQsocket(st->con);
3571                         if (sock < 0)
3572                         {
3573                                 fprintf(stderr, "bad socket: %s\n", strerror(errno));
3574                                 goto done;
3575                         }
3576
3577                         FD_SET(sock, &input_mask);
3578
3579                         if (maxsock < sock)
3580                                 maxsock = sock;
3581                 }
3582
3583                 if (min_usec > 0 && maxsock != -1)
3584                 {
3585                         int                     nsocks; /* return from select(2) */
3586
3587                         if (min_usec != INT64_MAX)
3588                         {
3589                                 struct timeval timeout;
3590
3591                                 timeout.tv_sec = min_usec / 1000000;
3592                                 timeout.tv_usec = min_usec % 1000000;
3593                                 nsocks = select(maxsock + 1, &input_mask, NULL, NULL, &timeout);
3594                         }
3595                         else
3596                                 nsocks = select(maxsock + 1, &input_mask, NULL, NULL, NULL);
3597                         if (nsocks < 0)
3598                         {
3599                                 if (errno == EINTR)
3600                                         continue;
3601                                 /* must be something wrong */
3602                                 fprintf(stderr, "select failed: %s\n", strerror(errno));
3603                                 goto done;
3604                         }
3605                 }
3606
3607                 /* ok, backend returns reply */
3608                 for (i = 0; i < nstate; i++)
3609                 {
3610                         CState     *st = &state[i];
3611                         Command   **commands = sql_files[st->use_file];
3612                         int                     prev_ecnt = st->ecnt;
3613
3614                         if (st->con && (FD_ISSET(PQsocket(st->con), &input_mask)
3615                                                         || commands[st->state]->type == META_COMMAND))
3616                         {
3617                                 if (!doCustom(thread, st, &result->conn_time, logfile, &aggs))
3618                                         remains--;      /* I've aborted */
3619                         }
3620
3621                         if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND)
3622                         {
3623                                 fprintf(stderr, "Client %d aborted in state %d. Execution of meta-command failed.\n", i, st->state);
3624                                 remains--;              /* I've aborted */
3625                                 PQfinish(st->con);
3626                                 st->con = NULL;
3627                         }
3628                 }
3629
3630 #ifdef PTHREAD_FORK_EMULATION
3631                 /* each process reports its own progression */
3632                 if (progress)
3633                 {
3634                         instr_time      now_time;
3635                         int64           now;
3636
3637                         INSTR_TIME_SET_CURRENT(now_time);
3638                         now = INSTR_TIME_GET_MICROSEC(now_time);
3639                         if (now >= next_report)
3640                         {
3641                                 /* generate and show report */
3642                                 int64           count = 0,
3643                                                         lats = 0,
3644                                                         sqlats = 0,
3645                                                         skipped = 0;
3646                                 int64           lags = thread->throttle_lag;
3647                                 int64           run = now - last_report;
3648                                 double          tps,
3649                                                         total_run,
3650                                                         latency,
3651                                                         sqlat,
3652                                                         stdev,
3653                                                         lag;
3654
3655                                 for (i = 0; i < nstate; i++)
3656                                 {
3657                                         count += state[i].cnt;
3658                                         lats += state[i].txn_latencies;
3659                                         sqlats += state[i].txn_sqlats;
3660                                 }
3661
3662                                 total_run = (now - thread_start) / 1000000.0;
3663                                 tps = 1000000.0 * (count - last_count) / run;
3664                                 latency = 0.001 * (lats - last_lats) / (count - last_count);
3665                                 sqlat = 1.0 * (sqlats - last_sqlats) / (count - last_count);
3666                                 stdev = 0.001 * sqrt(sqlat - 1000000.0 * latency * latency);
3667                                 lag = 0.001 * (lags - last_lags) / (count - last_count);
3668                                 skipped = thread->throttle_latency_skipped - last_skipped;
3669
3670                                 fprintf(stderr,
3671                                                 "progress %d: %.1f s, %.1f tps, "
3672                                                 "lat %.3f ms stddev %.3f",
3673                                                 thread->tid, total_run, tps, latency, stdev);
3674                                 if (throttle_delay)
3675                                 {
3676                                         fprintf(stderr, ", lag %.3f ms", lag);
3677                                         if (latency_limit)
3678                                                 fprintf(stderr, ", skipped " INT64_FORMAT, skipped);
3679                                 }
3680                                 fprintf(stderr, "\n");
3681
3682                                 last_count = count;
3683                                 last_lats = lats;
3684                                 last_sqlats = sqlats;
3685                                 last_lags = lags;
3686                                 last_report = now;
3687                                 last_skipped = thread->throttle_latency_skipped;
3688                                 next_report += (int64) progress *1000000;
3689                         }
3690                 }
3691 #else
3692                 /* progress report by thread 0 for all threads */
3693                 if (progress && thread->tid == 0)
3694                 {
3695                         instr_time      now_time;
3696                         int64           now;
3697
3698                         INSTR_TIME_SET_CURRENT(now_time);
3699                         now = INSTR_TIME_GET_MICROSEC(now_time);
3700                         if (now >= next_report)
3701                         {
3702                                 /* generate and show report */
3703                                 int64           count = 0,
3704                                                         lats = 0,
3705                                                         sqlats = 0,
3706                                                         lags = 0,
3707                                                         skipped = 0;
3708                                 int64           run = now - last_report;
3709                                 double          tps,
3710                                                         total_run,
3711                                                         latency,
3712                                                         sqlat,
3713                                                         lag,
3714                                                         stdev;
3715
3716                                 for (i = 0; i < progress_nclients; i++)
3717                                 {
3718                                         count += state[i].cnt;
3719                                         lats += state[i].txn_latencies;
3720                                         sqlats += state[i].txn_sqlats;
3721                                 }
3722
3723                                 for (i = 0; i < progress_nthreads; i++)
3724                                         lags += thread[i].throttle_lag;
3725
3726                                 total_run = (now - thread_start) / 1000000.0;
3727                                 tps = 1000000.0 * (count - last_count) / run;
3728                                 latency = 0.001 * (lats - last_lats) / (count - last_count);
3729                                 sqlat = 1.0 * (sqlats - last_sqlats) / (count - last_count);
3730                                 stdev = 0.001 * sqrt(sqlat - 1000000.0 * latency * latency);
3731                                 lag = 0.001 * (lags - last_lags) / (count - last_count);
3732                                 skipped = thread->throttle_latency_skipped - last_skipped;
3733
3734                                 fprintf(stderr,
3735                                                 "progress: %.1f s, %.1f tps, "
3736                                                 "lat %.3f ms stddev %.3f",
3737                                                 total_run, tps, latency, stdev);
3738                                 if (throttle_delay)
3739                                 {
3740                                         fprintf(stderr, ", lag %.3f ms", lag);
3741                                         if (latency_limit)
3742                                                 fprintf(stderr, ", " INT64_FORMAT " skipped", skipped);
3743                                 }
3744                                 fprintf(stderr, "\n");
3745
3746                                 last_count = count;
3747                                 last_lats = lats;
3748                                 last_sqlats = sqlats;
3749                                 last_lags = lags;
3750                                 last_report = now;
3751                                 last_skipped = thread->throttle_latency_skipped;
3752                                 next_report += (int64) progress *1000000;
3753                         }
3754                 }
3755 #endif   /* PTHREAD_FORK_EMULATION */
3756         }
3757
3758 done:
3759         INSTR_TIME_SET_CURRENT(start);
3760         disconnect_all(state, nstate);
3761         result->xacts = 0;
3762         result->latencies = 0;
3763         result->sqlats = 0;
3764         for (i = 0; i < nstate; i++)
3765         {
3766                 result->xacts += state[i].cnt;
3767                 result->latencies += state[i].txn_latencies;
3768                 result->sqlats += state[i].txn_sqlats;
3769         }
3770         result->throttle_lag = thread->throttle_lag;
3771         result->throttle_lag_max = thread->throttle_lag_max;
3772         result->throttle_latency_skipped = thread->throttle_latency_skipped;
3773         result->latency_late = thread->latency_late;
3774
3775         INSTR_TIME_SET_CURRENT(end);
3776         INSTR_TIME_ACCUM_DIFF(result->conn_time, end, start);
3777         if (logfile)
3778                 fclose(logfile);
3779         return result;
3780 }
3781
3782 /*
3783  * Support for duration option: set timer_exceeded after so many seconds.
3784  */
3785
3786 #ifndef WIN32
3787
3788 static void
3789 handle_sig_alarm(SIGNAL_ARGS)
3790 {
3791         timer_exceeded = true;
3792 }
3793
3794 static void
3795 setalarm(int seconds)
3796 {
3797         pqsignal(SIGALRM, handle_sig_alarm);
3798         alarm(seconds);
3799 }
3800
3801 #ifndef ENABLE_THREAD_SAFETY
3802
3803 /*
3804  * implements pthread using fork.
3805  */
3806
3807 typedef struct fork_pthread
3808 {
3809         pid_t           pid;
3810         int                     pipes[2];
3811 }       fork_pthread;
3812
3813 static int
3814 pthread_create(pthread_t *thread,
3815                            pthread_attr_t *attr,
3816                            void *(*start_routine) (void *),
3817                            void *arg)
3818 {
3819         fork_pthread *th;
3820         void       *ret;
3821         int                     rc;
3822
3823         th = (fork_pthread *) pg_malloc(sizeof(fork_pthread));
3824         if (pipe(th->pipes) < 0)
3825         {
3826                 free(th);
3827                 return errno;
3828         }
3829
3830         th->pid = fork();
3831         if (th->pid == -1)                      /* error */
3832         {
3833                 free(th);
3834                 return errno;
3835         }
3836         if (th->pid != 0)                       /* in parent process */
3837         {
3838                 close(th->pipes[1]);
3839                 *thread = th;
3840                 return 0;
3841         }
3842
3843         /* in child process */
3844         close(th->pipes[0]);
3845
3846         /* set alarm again because the child does not inherit timers */
3847         if (duration > 0)
3848                 setalarm(duration);
3849
3850         ret = start_routine(arg);
3851         rc = write(th->pipes[1], ret, sizeof(TResult));
3852         (void) rc;
3853         close(th->pipes[1]);
3854         free(th);
3855         exit(0);
3856 }
3857
3858 static int
3859 pthread_join(pthread_t th, void **thread_return)
3860 {
3861         int                     status;
3862
3863         while (waitpid(th->pid, &status, 0) != th->pid)
3864         {
3865                 if (errno != EINTR)
3866                         return errno;
3867         }
3868
3869         if (thread_return != NULL)
3870         {
3871                 /* assume result is TResult */
3872                 *thread_return = pg_malloc(sizeof(TResult));
3873                 if (read(th->pipes[0], *thread_return, sizeof(TResult)) != sizeof(TResult))
3874                 {
3875                         free(*thread_return);
3876                         *thread_return = NULL;
3877                 }
3878         }
3879         close(th->pipes[0]);
3880
3881         free(th);
3882         return 0;
3883 }
3884 #endif
3885 #else                                                   /* WIN32 */
3886
3887 static VOID CALLBACK
3888 win32_timer_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
3889 {
3890         timer_exceeded = true;
3891 }
3892
3893 static void
3894 setalarm(int seconds)
3895 {
3896         HANDLE          queue;
3897         HANDLE          timer;
3898
3899         /* This function will be called at most once, so we can cheat a bit. */
3900         queue = CreateTimerQueue();
3901         if (seconds > ((DWORD) -1) / 1000 ||
3902                 !CreateTimerQueueTimer(&timer, queue,
3903                                                            win32_timer_callback, NULL, seconds * 1000, 0,
3904                                                            WT_EXECUTEINTIMERTHREAD | WT_EXECUTEONLYONCE))
3905         {
3906                 fprintf(stderr, "Failed to set timer\n");
3907                 exit(1);
3908         }
3909 }
3910
3911 /* partial pthread implementation for Windows */
3912
3913 typedef struct win32_pthread
3914 {
3915         HANDLE          handle;
3916         void       *(*routine) (void *);
3917         void       *arg;
3918         void       *result;
3919 } win32_pthread;
3920
3921 static unsigned __stdcall
3922 win32_pthread_run(void *arg)
3923 {
3924         win32_pthread *th = (win32_pthread *) arg;
3925
3926         th->result = th->routine(th->arg);
3927
3928         return 0;
3929 }
3930
3931 static int
3932 pthread_create(pthread_t *thread,
3933                            pthread_attr_t *attr,
3934                            void *(*start_routine) (void *),
3935                            void *arg)
3936 {
3937         int                     save_errno;
3938         win32_pthread *th;
3939
3940         th = (win32_pthread *) pg_malloc(sizeof(win32_pthread));
3941         th->routine = start_routine;
3942         th->arg = arg;
3943         th->result = NULL;
3944
3945         th->handle = (HANDLE) _beginthreadex(NULL, 0, win32_pthread_run, th, 0, NULL);
3946         if (th->handle == NULL)
3947         {
3948                 save_errno = errno;
3949                 free(th);
3950                 return save_errno;
3951         }
3952
3953         *thread = th;
3954         return 0;
3955 }
3956
3957 static int
3958 pthread_join(pthread_t th, void **thread_return)
3959 {
3960         if (th == NULL || th->handle == NULL)
3961                 return errno = EINVAL;
3962
3963         if (WaitForSingleObject(th->handle, INFINITE) != WAIT_OBJECT_0)
3964         {
3965                 _dosmaperr(GetLastError());
3966                 return errno;
3967         }
3968
3969         if (thread_return)
3970                 *thread_return = th->result;
3971
3972         CloseHandle(th->handle);
3973         free(th);
3974         return 0;
3975 }
3976
3977 #endif   /* WIN32 */