]> granicus.if.org Git - postgresql/commitdiff
Add --aggregate-interval option.
authorTatsuo Ishii <ishii@postgresql.org>
Thu, 31 Jan 2013 06:53:58 +0000 (15:53 +0900)
committerTatsuo Ishii <ishii@postgresql.org>
Thu, 31 Jan 2013 06:53:58 +0000 (15:53 +0900)
The new option specifies length of aggregation interval (in
seconds). May be used only together with -l. With this option, the log
contains per-interval summary (number of transactions, min/max latency
and two additional fields useful for variance estimation).

Patch contributed by Tomas Vondra, reviewed by Pavel Stehule. Slight
change by Tatsuo Ishii, suggested by Robert Hass to emit an error
message indicating that the option is not currently supported on
Windows.

contrib/pgbench/pgbench.c
doc/src/sgml/pgbench.sgml

index 32d3be65085b2a1589f088c16855072458face65..11c006209facf2aad8ec6c9e946243a7f7e35883 100644 (file)
@@ -162,6 +162,7 @@ char           *index_tablespace = NULL;
 
 bool           use_log;                        /* log transaction latencies to a file */
 bool           use_quiet;                      /* quiet logging onto stderr */
+int                    agg_interval;           /* log aggregates instead of individual transactions */
 bool           is_connect;                     /* establish connection for each transaction */
 bool           is_latencies;           /* report per-command latencies */
 int                    main_pid;                       /* main process id used in log filename */
@@ -257,6 +258,18 @@ typedef struct
        char       *argv[MAX_ARGS]; /* command word list */
 } Command;
 
+typedef struct
+{
+
+       long    start_time;                     /* when does the interval start */
+       int     cnt;                            /* number of transactions */
+       double  min_duration;           /* min/max durations */
+       double  max_duration;
+       double  sum;                            /* sum(duration), sum(duration^2) - for estimates */
+       double  sum2;
+       
+} AggVals;
+
 static Command **sql_files[MAX_FILES]; /* SQL script files */
 static int     num_files;                      /* number of script files */
 static int     num_commands = 0;       /* total number of Command structs */
@@ -390,6 +403,8 @@ usage(void)
                   "  -l           write transaction times to log file\n"
                   "  --sampling-rate NUM\n"
                   "               fraction of transactions to log (e.g. 0.01 for 1%% sample)\n"
+                  "  --aggregate-interval NUM\n"
+                  "               aggregate data over NUM seconds\n"
                   "  -M simple|extended|prepared\n"
                   "               protocol for submitting queries to server (default: simple)\n"
                   "  -n           do not run VACUUM before tests\n"
@@ -911,9 +926,25 @@ clientDone(CState *st, bool ok)
        return false;                           /* always false */
 }
 
+static
+void agg_vals_init(AggVals * aggs, instr_time start)
+{
+       /* basic counters */
+       aggs->cnt = 0;          /* number of transactions */
+       aggs->sum = 0;          /* SUM(duration) */
+       aggs->sum2 = 0;         /* SUM(duration*duration) */
+
+       /* min and max transaction duration */
+       aggs->min_duration = 0;
+       aggs->max_duration = 0;
+
+       /* start of the current interval */
+       aggs->start_time = INSTR_TIME_GET_DOUBLE(start);
+}
+
 /* return false iff client should be disconnected */
 static bool
-doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile)
+doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVals * agg)
 {
        PGresult   *res;
        Command   **commands;
@@ -978,22 +1009,74 @@ top:
                        if (sample_rate == 0.0 ||
                                pg_erand48(thread->random_state) <= sample_rate)
                        {
-
                                INSTR_TIME_SET_CURRENT(now);
                                diff = now;
                                INSTR_TIME_SUBTRACT(diff, st->txn_begin);
                                usec = (double) INSTR_TIME_GET_MICROSEC(diff);
 
+                               /* should we aggregate the results or not? */
+                               if (agg_interval > 0)
+                               {
+                                       /* are we still in the same interval? if yes, accumulate the
+                                       * values (print them otherwise) */
+                                       if (agg->start_time + agg_interval >= INSTR_TIME_GET_DOUBLE(now))
+                                       {
+                                               agg->cnt += 1;
+                                               agg->sum  += usec;
+                                               agg->sum2 += usec * usec;
+
+                                               /* first in this aggregation interval */
+                                               if ((agg->cnt == 1) || (usec < agg->min_duration))
+                                                       agg->min_duration =  usec;
+
+                                               if ((agg->cnt == 1) || (usec > agg->max_duration))
+                                                       agg->max_duration = usec;
+                                       }
+                                       else
+                                       {
+                                               /* Loop until we reach the interval of the current transaction (and
+                                                * print all the empty intervals in between). */
+                                               while (agg->start_time + agg_interval < INSTR_TIME_GET_DOUBLE(now))
+                                               {
+                                                       /* This is a non-Windows branch (thanks to the ifdef in usage), so
+                                                        * we don't need to handle this in a special way (see below). */
+                                                       fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f\n",
+                                                                       agg->start_time, agg->cnt, agg->sum, agg->sum2,
+                                                                       agg->min_duration, agg->max_duration);
+
+                                                       /* move to the next inteval */
+                                                       agg->start_time = agg->start_time + agg_interval;
+
+                                                       /* reset for "no transaction" intervals */
+                                                       agg->cnt = 0;
+                                                       agg->min_duration = 0;
+                                                       agg->max_duration = 0;
+                                                       agg->sum = 0;
+                                                       agg->sum2 = 0;
+                                               }
+
+                                               /* and now update the reset values (include the current) */
+                                               agg->cnt = 1;
+                                               agg->min_duration = usec;
+                                               agg->max_duration = usec;
+                                               agg->sum = usec;
+                                               agg->sum2 = usec * usec;
+                                       }
+                               }
+                               else
+                               {
+                                       /* no, print raw transactions */
 #ifndef WIN32
-                               /* This is more than we really ought to know about instr_time */
-                               fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
-                                               st->id, st->cnt, usec, st->use_file,
-                                               (long) now.tv_sec, (long) now.tv_usec);
+                                       /* This is more than we really ought to know about instr_time */
+                                       fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
+                                                       st->id, st->cnt, usec, st->use_file,
+                                                       (long) now.tv_sec, (long) now.tv_usec);
 #else
-                               /* On Windows, instr_time doesn't provide a timestamp anyway */
-                               fprintf(logfile, "%d %d %.0f %d 0 0\n",
-                                               st->id, st->cnt, usec, st->use_file);
+                                       /* On Windows, instr_time doesn't provide a timestamp anyway */
+                                       fprintf(logfile, "%d %d %.0f %d 0 0\n",
+                                                       st->id, st->cnt, usec, st->use_file);
 #endif
+                               }
                        }
                }
 
@@ -2055,6 +2138,7 @@ main(int argc, char **argv)
                {"tablespace", required_argument, NULL, 2},
                {"unlogged-tables", no_argument, &unlogged_tables, 1},
                {"sampling-rate", required_argument, NULL, 4},
+               {"aggregate-interval", required_argument, NULL, 5},
                {NULL, 0, NULL, 0}
        };
 
@@ -2293,6 +2377,19 @@ main(int argc, char **argv)
                                        exit(1);
                                }
                                break;
+                       case 5:
+#ifdef WIN32
+                               fprintf(stderr, "--aggregate-interval is not currently supported on Windows");
+                               exit(1);
+#else
+                               agg_interval = atoi(optarg);
+                               if (agg_interval <= 0)
+                               {
+                                       fprintf(stderr, "invalid number of seconds for aggregation: %d\n", agg_interval);
+                                       exit(1);
+                               }
+#endif
+                               break;
                        default:
                                fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
                                exit(1);
@@ -2342,6 +2439,28 @@ main(int argc, char **argv)
                exit(1);
        }
 
+       /* --sampling-rate may must not be used with --aggregate-interval */
+       if (sample_rate > 0.0 && agg_interval > 0)
+       {
+               fprintf(stderr, "log sampling (--sampling-rate) and aggregation (--aggregate-interval) can't be used at the same time\n");
+               exit(1);
+       }
+
+       if (agg_interval > 0 && (! use_log)) {
+               fprintf(stderr, "log aggregation is allowed only when actually logging transactions\n");
+               exit(1);
+       }
+
+       if ((duration > 0) && (agg_interval > duration)) {
+               fprintf(stderr, "number of seconds for aggregation (%d) must not be higher that test duration (%d)\n", agg_interval, duration);
+               exit(1);
+       }
+
+       if ((duration > 0) && (agg_interval > 0) && (duration % agg_interval != 0)) {
+               fprintf(stderr, "duration (%d) must be a multiple of aggregation interval (%d)\n", duration, agg_interval);
+               exit(1);
+       }
+
        /*
         * is_latencies only works with multiple threads in thread-based
         * implementations, not fork-based ones, because it supposes that the
@@ -2601,7 +2720,10 @@ threadRun(void *arg)
        int                     remains = nstate;               /* number of remaining clients */
        int                     i;
 
+       AggVals         aggs;
+
        result = pg_malloc(sizeof(TResult));
+       
        INSTR_TIME_SET_ZERO(result->conn_time);
 
        /* open log file if requested */
@@ -2636,6 +2758,8 @@ threadRun(void *arg)
        INSTR_TIME_SET_CURRENT(result->conn_time);
        INSTR_TIME_SUBTRACT(result->conn_time, thread->start_time);
 
+       agg_vals_init(&aggs, thread->start_time);
+       
        /* send start up queries in async manner */
        for (i = 0; i < nstate; i++)
        {
@@ -2644,7 +2768,7 @@ threadRun(void *arg)
                int                     prev_ecnt = st->ecnt;
 
                st->use_file = getrand(thread, 0, num_files - 1);
-               if (!doCustom(thread, st, &result->conn_time, logfile))
+               if (!doCustom(thread, st, &result->conn_time, logfile, &aggs))
                        remains--;                      /* I've aborted */
 
                if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND)
@@ -2746,7 +2870,7 @@ threadRun(void *arg)
                        if (st->con && (FD_ISSET(PQsocket(st->con), &input_mask)
                                                        || commands[st->state]->type == META_COMMAND))
                        {
-                               if (!doCustom(thread, st, &result->conn_time, logfile))
+                               if (!doCustom(thread, st, &result->conn_time, logfile, &aggs))
                                        remains--;      /* I've aborted */
                        }
 
@@ -2773,7 +2897,6 @@ done:
        return result;
 }
 
-
 /*
  * Support for duration option: set timer_exceeded after so many seconds.
  */
index 9ed8b76963cabb9d0d43440342205e53fbbee936..79b4baffbf873489c6366d15b5be7f396904639d 100644 (file)
@@ -350,6 +350,21 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--aggregate-interval</option> <replaceable>seconds</></term>
+      <listitem>
+       <para>
+        Length of aggregation interval (in seconds). May be used only together
+        with <application>-l</application> - with this option, the log contains
+        per-interval summary (number of transactions, min/max latency and two
+        additional fields useful for variance estimation).
+       </para>
+       <para>
+        This option is not currently supported on Windows.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-M</option> <replaceable>querymode</></term>
       <listitem>
@@ -746,8 +761,9 @@ END;
   <title>Per-Transaction Logging</title>
 
   <para>
-   With the <option>-l</> option, <application>pgbench</> writes the time
-   taken by each transaction to a log file.  The log file will be named
+   With the <option>-l</> option but without the <option>--aggregate-interval</option>,
+   <application>pgbench</> writes the time taken by each transaction
+   to a log file.  The log file will be named
    <filename>pgbench_log.<replaceable>nnn</></filename>, where
    <replaceable>nnn</> is the PID of the pgbench process.
    If the <option>-j</> option is 2 or higher, creating multiple worker
@@ -792,6 +808,45 @@ END;
   </para>
  </refsect2>
 
+ <refsect2>
+  <title>Aggregated Logging</title>
+  
+  <para>
+   With the <option>--aggregate-interval</option> option, the logs use a bit different format:
+
+<synopsis>
+<replaceable>interval_start</> <replaceable>num_of_transactions</> <replaceable>latency_sum</> <replaceable>latency_2_sum</> <replaceable>min_latency</> <replaceable>max_latency</>
+</synopsis>
+
+   where <replaceable>interval_start</> is the start of the interval (UNIX epoch
+   format timestamp), <replaceable>num_of_transactions</> is the number of transactions
+   within the interval, <replaceable>latency_sum</replaceable> is a sum of latencies
+   (so you can compute average latency easily). The following two fields are useful
+   for variance estimation - <replaceable>latency_sum</> is a sum of latencies and
+   <replaceable>latency_2_sum</> is a sum of 2nd powers of latencies. The last two
+   fields are <replaceable>min_latency</> - a minimum latency within the interval, and
+   <replaceable>max_latency</> - maximum latency within the interval. A transaction is
+   counted into the interval when it was committed.
+  </para>
+
+  <para>
+   Here is example outputs:
+<screen>
+1345828501 5601 1542744 483552416 61 2573
+1345828503 7884 1979812 565806736 60 1479
+1345828505 7208 1979422 567277552 59 1391
+1345828507 7685 1980268 569784714 60 1398
+1345828509 7073 1979779 573489941 236 1411
+</screen></para>
+
+  <para>
+   Notice that while the plain (unaggregated) log file contains index
+   of the custom script files, the aggregated log does not. Therefore if
+   you need per script data, you need to aggregate the data on your own.
+  </para>
+
+ </refsect2>
+
  <refsect2>
   <title>Per-Statement Latencies</title>