* A simple benchmark program for PostgreSQL
* Originally written by Tatsuo Ishii and enhanced by many contributors.
*
- * $PostgreSQL: pgsql/contrib/pgbench/pgbench.c,v 1.99 2010/07/06 19:18:55 momjian Exp $
+ * $PostgreSQL: pgsql/contrib/pgbench/pgbench.c,v 1.100 2010/08/12 20:39:39 tgl Exp $
* Copyright (c) 2000-2010, PostgreSQL Global Development Group
* ALL RIGHTS RESERVED;
*
bool use_log; /* log transaction latencies to a file */
bool is_connect; /* establish connection for each transaction */
+bool is_latencies; /* report per-command latencies */
int main_pid; /* main process id used in log filename */
char *pghost = "";
int64 until; /* napping until (usec) */
Variable *variables; /* array of variable definitions */
int nvariables;
- instr_time txn_begin; /* used for measuring latencies */
+ instr_time txn_begin; /* used for measuring transaction latencies */
+ instr_time stmt_begin; /* used for measuring statement latencies */
int use_file; /* index in sql_files for this client */
bool prepared[MAX_FILES];
} CState;
CState *state; /* array of CState */
int nstate; /* length of state[] */
instr_time start_time; /* thread start time */
+ instr_time *exec_elapsed; /* time spent executing cmds (per Command) */
+ int *exec_count; /* number of cmd executions (per Command) */
} TState;
#define INVALID_THREAD ((pthread_t) 0)
typedef struct
{
+ char *line; /* full text of command line */
+ int command_num; /* unique index of this Command struct */
int type; /* command type (SQL_COMMAND or META_COMMAND) */
- int argc; /* number of commands */
- char *argv[MAX_ARGS]; /* command list */
+ int argc; /* number of command words */
+ char *argv[MAX_ARGS]; /* command word list */
} Command;
static Command **sql_files[MAX_FILES]; /* SQL script files */
static int num_files; /* number of script files */
+static int num_commands = 0; /* total number of Command structs */
static int debug = 0; /* debug flag */
/* default scenario */
" define variable for use by custom script\n"
" -f FILENAME read transaction script from FILENAME\n"
" -j NUM number of threads (default: 1)\n"
+ " -r report average latency per command\n"
" -l write transaction times to log file\n"
" -M {simple|extended|prepared}\n"
" protocol for submitting queries to server (default: simple)\n"
char *endptr;
int retval;
- /*
- * Join arguments with whilespace separaters. Arguments starting with
- * exactly one colon are treated as variables: name - append a string
- * "name" :var - append a variable named 'var'. ::name - append a string
- * ":name"
+ /*----------
+ * Join arguments with whitespace separators. Arguments starting with
+ * exactly one colon are treated as variables:
+ * name - append a string "name"
+ * :var - append a variable named 'var'
+ * ::name - append a string ":name"
+ *----------
*/
for (i = 0; i < argc; i++)
{
/* return false iff client should be disconnected */
static bool
-doCustom(CState *st, instr_time *conn_time, FILE *logfile)
+doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile)
{
PGresult *res;
Command **commands;
}
/*
- * transaction finished: record the time it took in the log
+ * command finished: accumulate per-command execution times in
+ * thread-local data structure, if per-command latencies are requested
+ */
+ if (is_latencies)
+ {
+ instr_time now;
+ int cnum = commands[st->state]->command_num;
+
+ INSTR_TIME_SET_CURRENT(now);
+ INSTR_TIME_ACCUM_DIFF(thread->exec_elapsed[cnum],
+ now, st->stmt_begin);
+ thread->exec_count[cnum]++;
+ }
+
+ /*
+ * if transaction finished, record the time it took in the log
*/
if (logfile && commands[st->state + 1] == NULL)
{
if (commands[st->state]->type == SQL_COMMAND)
{
+ /*
+ * Read and discard the query result; note this is not included
+ * in the statement latency numbers.
+ */
res = PQgetResult(st->con);
switch (PQresultStatus(res))
{
INSTR_TIME_ACCUM_DIFF(*conn_time, end, start);
}
+ /* Record transaction start time if logging is enabled */
if (logfile && st->state == 0)
INSTR_TIME_SET_CURRENT(st->txn_begin);
+ /* Record statement start time if per-command latencies are requested */
+ if (is_latencies)
+ INSTR_TIME_SET_CURRENT(st->stmt_begin);
+
if (commands[st->state]->type == SQL_COMMAND)
{
const Command *command = commands[st->state];
return true;
}
+/* Parse a command; return a Command struct, or NULL if it's a comment */
static Command *
process_commands(char *buf)
{
char *p,
*tok;
+ /* Make the string buf end at the next newline */
if ((p = strchr(buf, '\n')) != NULL)
*p = '\0';
+ /* Skip leading whitespace */
p = buf;
while (isspace((unsigned char) *p))
p++;
+ /* If the line is empty or actually a comment, we're done */
if (*p == '\0' || strncmp(p, "--", 2) == 0)
- {
return NULL;
- }
+ /* Allocate and initialize Command structure */
my_commands = (Command *) malloc(sizeof(Command));
if (my_commands == NULL)
- {
return NULL;
- }
-
+ my_commands->line = strdup(buf);
+ if (my_commands->line == NULL)
+ return NULL;
+ my_commands->command_num = num_commands++;
+ my_commands->type = 0; /* until set */
my_commands->argc = 0;
if (*p == '\\')
while (fgets(buf, sizeof(buf), fd) != NULL)
{
- Command *commands;
- int i;
+ Command *command;
- i = 0;
- while (isspace((unsigned char) buf[i]))
- i++;
-
- if (buf[i] != '\0' && strncmp(&buf[i], "--", 2) != 0)
- {
- commands = process_commands(&buf[i]);
- if (commands == NULL)
- {
- fclose(fd);
- return false;
- }
- }
- else
+ command = process_commands(buf);
+ if (command == NULL)
continue;
- my_commands[lineno] = commands;
+ my_commands[lineno] = command;
lineno++;
if (lineno >= alloc_num)
for (;;)
{
char *p;
- Command *commands;
+ Command *command;
p = buf;
while (*tb && *tb != '\n')
*p = '\0';
- commands = process_commands(buf);
- if (commands == NULL)
- {
- return NULL;
- }
+ command = process_commands(buf);
+ if (command == NULL)
+ continue;
- my_commands[lineno] = commands;
+ my_commands[lineno] = command;
lineno++;
if (lineno >= alloc_num)
/* print out results */
static void
-printResults(int ttype, int normal_xacts, int nclients, int nthreads,
+printResults(int ttype, int normal_xacts, int nclients,
+ TState *threads, int nthreads,
instr_time total_time, instr_time conn_total_time)
{
double time_include,
}
printf("tps = %f (including connections establishing)\n", tps_include);
printf("tps = %f (excluding connections establishing)\n", tps_exclude);
+
+ /* Report per-command latencies */
+ if (is_latencies)
+ {
+ int i;
+
+ for (i = 0; i < num_files; i++)
+ {
+ Command **commands;
+
+ if (num_files > 1)
+ printf("statement latencies in milliseconds, file %d:\n", i+1);
+ else
+ printf("statement latencies in milliseconds:\n");
+
+ for (commands = sql_files[i]; *commands != NULL; commands++)
+ {
+ Command *command = *commands;
+ int cnum = command->command_num;
+ double total_time;
+ instr_time total_exec_elapsed;
+ int total_exec_count;
+ int t;
+
+ /* Accumulate per-thread data for command */
+ INSTR_TIME_SET_ZERO(total_exec_elapsed);
+ total_exec_count = 0;
+ for (t = 0; t < nthreads; t++)
+ {
+ TState *thread = &threads[t];
+
+ INSTR_TIME_ADD(total_exec_elapsed,
+ thread->exec_elapsed[cnum]);
+ total_exec_count += thread->exec_count[cnum];
+ }
+
+ if (total_exec_count > 0)
+ total_time = INSTR_TIME_GET_MILLISEC(total_exec_elapsed) / (double) total_exec_count;
+ else
+ total_time = 0.0;
+
+ printf("\t%f\t%s\n", total_time, command->line);
+ }
+ }
+ }
}
memset(state, 0, sizeof(*state));
- while ((c = getopt(argc, argv, "ih:nvp:dSNc:Cs:t:T:U:lf:D:F:M:j:")) != -1)
+ while ((c = getopt(argc, argv, "ih:nvp:dSNc:j:Crs:t:T:U:lf:D:F:M:")) != -1)
{
switch (c)
{
case 'C':
is_connect = true;
break;
+ case 'r':
+ is_latencies = true;
+ break;
case 's':
scale_given = true;
scale = atoi(optarg);
exit(1);
}
+ /*
+ * is_latencies only works with multiple threads in thread-based
+ * implementations, not fork-based ones, because it supposes that the
+ * parent can see changes made to the per-thread execution stats by child
+ * threads. It seems useful enough to accept despite this limitation,
+ * but perhaps we should FIXME someday (by passing the stats data back
+ * up through the parent-to-child pipes).
+ */
+#ifndef ENABLE_THREAD_SAFETY
+ if (is_latencies && nthreads > 1)
+ {
+ fprintf(stderr, "-r does not work with -j larger than 1 on this platform.\n");
+ exit(1);
+ }
+#endif
+
/*
* save main process id in the global variable because process id will be
* changed after fork.
break;
}
+ /* set up thread data structures */
+ threads = (TState *) malloc(sizeof(TState) * nthreads);
+ for (i = 0; i < nthreads; i++)
+ {
+ TState *thread = &threads[i];
+
+ thread->tid = i;
+ thread->state = &state[nclients / nthreads * i];
+ thread->nstate = nclients / nthreads;
+
+ if (is_latencies)
+ {
+ /* Reserve memory for the thread to store per-command latencies */
+ int t;
+
+ thread->exec_elapsed = (instr_time *)
+ malloc(sizeof(instr_time) * num_commands);
+ thread->exec_count = (int *)
+ malloc(sizeof(int) * num_commands);
+
+ for (t = 0; t < num_commands; t++)
+ {
+ INSTR_TIME_SET_ZERO(thread->exec_elapsed[t]);
+ thread->exec_count[t] = 0;
+ }
+ }
+ else
+ {
+ thread->exec_elapsed = NULL;
+ thread->exec_count = NULL;
+ }
+ }
+
/* get start up time */
INSTR_TIME_SET_CURRENT(start_time);
setalarm(duration);
/* start threads */
- threads = (TState *) malloc(sizeof(TState) * nthreads);
for (i = 0; i < nthreads; i++)
{
- threads[i].tid = i;
- threads[i].state = &state[nclients / nthreads * i];
- threads[i].nstate = nclients / nthreads;
- INSTR_TIME_SET_CURRENT(threads[i].start_time);
+ TState *thread = &threads[i];
+
+ INSTR_TIME_SET_CURRENT(thread->start_time);
/* the first thread (i = 0) is executed by main thread */
if (i > 0)
{
- int err = pthread_create(&threads[i].thread, NULL, threadRun, &threads[i]);
+ int err = pthread_create(&thread->thread, NULL, threadRun, thread);
- if (err != 0 || threads[i].thread == INVALID_THREAD)
+ if (err != 0 || thread->thread == INVALID_THREAD)
{
fprintf(stderr, "cannot create thread: %s\n", strerror(err));
exit(1);
}
else
{
- threads[i].thread = INVALID_THREAD;
+ thread->thread = INVALID_THREAD;
}
}
/* get end time */
INSTR_TIME_SET_CURRENT(total_time);
INSTR_TIME_SUBTRACT(total_time, start_time);
- printResults(ttype, total_xacts, nclients, nthreads, total_time, conn_total_time);
+ printResults(ttype, total_xacts, nclients, threads, nthreads,
+ total_time, conn_total_time);
return 0;
}
int prev_ecnt = st->ecnt;
st->use_file = getrand(0, num_files - 1);
- if (!doCustom(st, &result->conn_time, logfile))
+ if (!doCustom(thread, st, &result->conn_time, logfile))
remains--; /* I've aborted */
if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND)
if (st->con && (FD_ISSET(PQsocket(st->con), &input_mask)
|| commands[st->state]->type == META_COMMAND))
{
- if (!doCustom(st, &result->conn_time, logfile))
+ if (!doCustom(thread, st, &result->conn_time, logfile))
remains--; /* I've aborted */
}
-<!-- $PostgreSQL: pgsql/doc/src/sgml/pgbench.sgml,v 1.17 2010/07/29 19:34:40 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/pgbench.sgml,v 1.18 2010/08/12 20:39:39 tgl Exp $ -->
<sect1 id="pgbench">
<title>pgbench</title>
settings. The next line reports the number of transactions completed
and intended (the latter being just the product of number of clients
and number of transactions per client); these will be equal unless the run
- failed before completion. The last two lines report the TPS rate,
+ failed before completion. (In <literal>-T</> mode, only the actual
+ number of transactions is printed.)
+ The last two lines report the TPS rate,
figured with and without counting the time to start database sessions.
</para>
<variablelist>
<varlistentry>
- <term><option>-F</option> <replaceable>fillfactor</></term>
+ <term><option>-i</option></term>
<listitem>
<para>
- Create the <structname>pgbench_accounts</>,
- <structname>pgbench_tellers</> and
- <structname>pgbench_branches</> tables with the given fillfactor.
- Default is 100.
+ Required to invoke initialization mode.
</para>
</listitem>
</varlistentry>
<varlistentry>
- <term><option>-i</option></term>
+ <term><option>-F</option> <replaceable>fillfactor</></term>
<listitem>
<para>
- Required to invoke initialization mode.
+ Create the <structname>pgbench_accounts</>,
+ <structname>pgbench_tellers</> and
+ <structname>pgbench_branches</> tables with the given fillfactor.
+ Default is 100.
</para>
</listitem>
</varlistentry>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><option>-r</option></term>
+ <listitem>
+ <para>
+ Report the average per-statement latency (execution time from the
+ perspective of the client) of each command after the benchmark
+ finishes. See below for details.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><option>-s</option> <replaceable>scale_factor</></term>
<listitem>
<replaceable>nnn</> is the PID of the pgbench process.
If the <literal>-j</> option is 2 or higher, creating multiple worker
threads, each will have its own log file. The first worker will use the
- same name for its log file as in the standard single worker case.
+ same name for its log file as in the standard single worker case.
The additional log files for the other workers will be named
<filename>pgbench_log.<replaceable>nnn</>.<replaceable>mmm</></filename>,
where <replaceable>mmm</> is a sequential number for each worker starting
<replaceable>client_id</> <replaceable>transaction_no</> <replaceable>time</> <replaceable>file_no</> <replaceable>time_epoch</> <replaceable>time_us</>
</synopsis>
- where <replaceable>time</> is the elapsed transaction time in microseconds,
+ where <replaceable>time</> is the total elapsed transaction time in microseconds,
<replaceable>file_no</> identifies which script file was used
(useful when multiple scripts were specified with <literal>-f</>),
and <replaceable>time_epoch</>/<replaceable>time_us</> are a
</para>
</sect2>
+ <sect2>
+ <title>Per-statement latencies</title>
+
+ <para>
+ With the <literal>-r</> option, <application>pgbench</> collects
+ the elapsed transaction time of each statement executed by every
+ client. It then reports an average of those values, referred to
+ as the latency for each statement, after the benchmark has finished.
+ </para>
+
+ <para>
+ For the default script, the output will look similar to this:
+<screen>
+starting vacuum...end.
+transaction type: TPC-B (sort of)
+scaling factor: 1
+query mode: simple
+number of clients: 10
+number of threads: 1
+number of transactions per client: 1000
+number of transactions actually processed: 10000/10000
+tps = 618.764555 (including connections establishing)
+tps = 622.977698 (excluding connections establishing)
+statement latencies in milliseconds:
+ 0.004386 \set nbranches 1 * :scale
+ 0.001343 \set ntellers 10 * :scale
+ 0.001212 \set naccounts 100000 * :scale
+ 0.001310 \setrandom aid 1 :naccounts
+ 0.001073 \setrandom bid 1 :nbranches
+ 0.001005 \setrandom tid 1 :ntellers
+ 0.001078 \setrandom delta -5000 5000
+ 0.326152 BEGIN;
+ 0.603376 UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
+ 0.454643 SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+ 5.528491 UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
+ 7.335435 UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
+ 0.371851 INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
+ 1.212976 END;
+</screen>
+ </para>
+
+ <para>
+ If multiple script files are specified, the averages are reported
+ separately for each script file.
+ </para>
+
+ <para>
+ Note that collecting the additional timing information needed for
+ per-statement latency computation adds some overhead. This will slow
+ average execution speed and lower the computed TPS. The amount
+ of slowdown varies significantly depending on platform and hardware.
+ Comparing average TPS values with and without latency reporting enabled
+ is a good way to measure if the timing overhead is significant.
+ </para>
+ </sect2>
+
<sect2>
<title>Good Practices</title>