Allow pgbench to use a scale larger than 21474.

author Heikki Linnakangas <heikki.linnakangas@iki.fi>

Tue, 29 Jan 2013 09:49:40 +0000 (11:49 +0200)

committer Heikki Linnakangas <heikki.linnakangas@iki.fi>

Tue, 29 Jan 2013 10:05:55 +0000 (12:05 +0200)
author Heikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 29 Jan 2013 09:49:40 +0000 (11:49 +0200)
committer Heikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 29 Jan 2013 10:05:55 +0000 (12:05 +0200)
diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c

index 3ca120fa68b784e9b0b7d66f186f09945440e213..32d3be65085b2a1589f088c16855072458face65 100644 (file)
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -151,6 +151,15 @@ char          *index_tablespace = NULL;
  #define ntellers       10
  #define naccounts      100000
  
+/*
+ * The scale factor at/beyond which 32bit integers are incapable of storing
+ * 64bit values.
+ *
+ * Although the actual threshold is 21474, we use 20000 because it is easier to
+ * document and remember, and isn't that far away from the real threshold.
+ */
+#define SCALE_32BIT_THRESHOLD 20000
+
  bool           use_log;                        /* log transaction latencies to a file */
  bool           use_quiet;                      /* quiet logging onto stderr */
  bool           is_connect;                     /* establish connection for each transaction */
@@ -403,9 +412,77 @@ usage(void)
                    progname, progname);
  }
  
+/*
+ * strtoint64 -- convert a string to 64-bit integer
+ *
+ * This function is a modified version of scanint8() from
+ * src/backend/utils/adt/int8.c.
+ */
+static int64
+strtoint64(const char *str)
+{
+       const char *ptr = str;
+       int64           result = 0;
+       int                     sign = 1;
+
+       /*
+        * Do our own scan, rather than relying on sscanf which might be broken
+        * for long long.
+        */
+
+       /* skip leading spaces */
+       while (*ptr && isspace((unsigned char) *ptr))
+               ptr++;
+
+       /* handle sign */
+       if (*ptr == '-')
+       {
+               ptr++;
+
+               /*
+                * Do an explicit check for INT64_MIN.  Ugly though this is, it's
+                * cleaner than trying to get the loop below to handle it portably.
+                */
+               if (strncmp(ptr, "9223372036854775808", 19) == 0)
+               {
+                       result = -INT64CONST(0x7fffffffffffffff) - 1;
+                       ptr += 19;
+                       goto gotdigits;
+               }
+               sign = -1;
+       }
+       else if (*ptr == '+')
+               ptr++;
+
+       /* require at least one digit */
+       if (!isdigit((unsigned char) *ptr))
+               fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str);
+
+       /* process digits */
+       while (*ptr && isdigit((unsigned char) *ptr))
+       {
+               int64           tmp = result * 10 + (*ptr++ - '0');
+
+               if ((tmp / 10) != result)               /* overflow? */
+                       fprintf(stderr, "value \"%s\" is out of range for type bigint\n", str);
+               result = tmp;
+       }
+
+gotdigits:
+
+       /* allow trailing whitespace, but not other trailing chars */
+       while (*ptr != '\0' && isspace((unsigned char) *ptr))
+               ptr++;
+
+       if (*ptr != '\0')
+               fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str);
+
+       return ((sign < 0) ? -result : result);
+}
+
  /* random number generator: uniform distribution from min to max inclusive */
-static int
-getrand(TState *thread, int min, int max)
+static int64
+getrand(TState *thread, int64 min, int64 max)
  {
         /*
          * Odd coding is so that min and max have approximately the same chance of
@@ -416,7 +493,7 @@ getrand(TState *thread, int min, int max)
          * protected by a mutex, and therefore a bottleneck on machines with many
          * CPUs.
          */
-       return min + (int) ((max - min + 1) * pg_erand48(thread->random_state));
+       return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state));
  }
  
  /* call PQexec() and exit() on failure */
@@ -960,7 +1037,7 @@ top:
                 if (commands[st->state] == NULL)
                 {
                         st->state = 0;
-                       st->use_file = getrand(thread, 0, num_files - 1);
+                       st->use_file = (int) getrand(thread, 0, num_files - 1);
                         commands = sql_files[st->use_file];
                 }
         }
@@ -1080,7 +1157,7 @@ top:
                 if (pg_strcasecmp(argv[0], "setrandom") == 0)
                 {
                         char       *var;
-                       int                     min,
+                       int64           min,
                                                 max;
                         char            res[64];
  
@@ -1092,10 +1169,10 @@ top:
                                         st->ecnt++;
                                         return true;
                                 }
-                               min = atoi(var);
+                               min = strtoint64(var);
                         }
                         else
-                               min = atoi(argv[2]);
+                               min = strtoint64(argv[2]);
  
  #ifdef NOT_USED
                         if (min < 0)
@@ -1114,10 +1191,10 @@ top:
                                         st->ecnt++;
                                         return true;
                                 }
-                               max = atoi(var);
+                               max = strtoint64(var);
                         }
                         else
-                               max = atoi(argv[3]);
+                               max = strtoint64(argv[3]);
  
                         if (max < min)
                         {
@@ -1127,8 +1204,8 @@ top:
                         }
  
                         /*
-                        * getrand() neeeds to be able to subtract max from min and add
-                        * one the result without overflowing.  Since we know max > min,
+                        * getrand() needs to be able to subtract max from min and add
+                        * one to the result without overflowing.  Since we know max > min,
                          * we can detect overflow just by checking for a negative result.
                          * But we must check both that the subtraction doesn't overflow,
                          * and that adding one to the result doesn't overflow either.
@@ -1141,9 +1218,9 @@ top:
                         }
  
  #ifdef DEBUG
-                       printf("min: %d max: %d random: %d\n", min, max, getrand(thread, min, max));
+                       printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
  #endif
-                       snprintf(res, sizeof(res), "%d", getrand(thread, min, max));
+                       snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
  
                         if (!putVariable(st, argv[0], argv[1], res))
                         {
@@ -1156,7 +1233,7 @@ top:
                 else if (pg_strcasecmp(argv[0], "set") == 0)
                 {
                         char       *var;
-                       int                     ope1,
+                       int64           ope1,
                                                 ope2;
                         char            res[64];
  
@@ -1168,13 +1245,13 @@ top:
                                         st->ecnt++;
                                         return true;
                                 }
-                               ope1 = atoi(var);
+                               ope1 = strtoint64(var);
                         }
                         else
-                               ope1 = atoi(argv[2]);
+                               ope1 = strtoint64(argv[2]);
  
                         if (argc < 5)
-                               snprintf(res, sizeof(res), "%d", ope1);
+                               snprintf(res, sizeof(res), INT64_FORMAT, ope1);
                         else
                         {
                                 if (*argv[4] == ':')
@@ -1185,17 +1262,17 @@ top:
                                                 st->ecnt++;
                                                 return true;
                                         }
-                                       ope2 = atoi(var);
+                                       ope2 = strtoint64(var);
                                 }
                                 else
-                                       ope2 = atoi(argv[4]);
+                                       ope2 = strtoint64(argv[4]);
  
                                 if (strcmp(argv[3], "+") == 0)
-                                       snprintf(res, sizeof(res), "%d", ope1 + ope2);
+                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 + ope2);
                                 else if (strcmp(argv[3], "-") == 0)
-                                       snprintf(res, sizeof(res), "%d", ope1 - ope2);
+                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 - ope2);
                                 else if (strcmp(argv[3], "*") == 0)
-                                       snprintf(res, sizeof(res), "%d", ope1 * ope2);
+                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 * ope2);
                                 else if (strcmp(argv[3], "/") == 0)
                                 {
                                         if (ope2 == 0)
@@ -1204,7 +1281,7 @@ top:
                                                 st->ecnt++;
                                                 return true;
                                         }
-                                       snprintf(res, sizeof(res), "%d", ope1 / ope2);
+                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 / ope2);
                                 }
                                 else
                                 {
@@ -1311,6 +1388,15 @@ disconnect_all(CState *state, int length)
  static void
  init(bool is_no_vacuum)
  {
+
+/* The scale factor at/beyond which 32bit integers are incapable of storing
+ * 64bit values.
+ *
+ * Although the actual threshold is 21474, we use 20000 because it is easier to
+ * document and remember, and isn't that far away from the real threshold.
+ */
+#define SCALE_32BIT_THRESHOLD 20000
+
         /*
          * Note: TPC-B requires at least 100 bytes per row, and the "filler"
          * fields in these table declarations were intended to comply with that.
@@ -1329,7 +1415,9 @@ init(bool is_no_vacuum)
         struct ddlinfo DDLs[] = {
                 {
                         "pgbench_history",
-                       "tid int,bid int,aid int,delta int,mtime timestamp,filler char(22)",
+                       scale >= SCALE_32BIT_THRESHOLD
+                               ? "tid int,bid int,aid bigint,delta int,mtime timestamp,filler char(22)"
+                               : "tid int,bid int,aid    int,delta int,mtime timestamp,filler char(22)",
                         0
                 },
                 {
@@ -1339,7 +1427,9 @@ init(bool is_no_vacuum)
                 },
                 {
                         "pgbench_accounts",
-                       "aid int not null,bid int,abalance int,filler char(84)",
+                       scale >= SCALE_32BIT_THRESHOLD
+                               ? "aid bigint not null,bid int,abalance int,filler char(84)"
+                               : "aid    int not null,bid int,abalance int,filler char(84)",
                         1
                 },
                 {
@@ -1365,6 +1455,7 @@ init(bool is_no_vacuum)
         PGresult   *res;
         char            sql[256];
         int                     i;
+       int64           k;
  
         /* used to track elapsed time and estimate of the remaining time */
         instr_time      start, diff;
@@ -1441,11 +1532,11 @@ init(bool is_no_vacuum)
  
         INSTR_TIME_SET_CURRENT(start);
  
-       for (i = 0; i < naccounts * scale; i++)
+       for (k = 0; k < (int64) naccounts * scale; k++)
         {
-               int                     j = i + 1;
+               int64           j = k + 1;
  
-               snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0);
+               snprintf(sql, 256, INT64_FORMAT "\t" INT64_FORMAT "\t%d\t\n", j, k / naccounts + 1, 0);
                 if (PQputline(con, sql))
                 {
                         fprintf(stderr, "PQputline failed\n");
@@ -1462,8 +1553,8 @@ init(bool is_no_vacuum)
                         elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
                         remaining_sec = (scale * naccounts - j) * elapsed_sec / j;
  
-                       fprintf(stderr, "%d of %d tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n",
-                                                       j, naccounts * scale,
+                       fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n",
+                                                       j, (int64)naccounts * scale,
                                                         (int) (((int64) j * 100) / (naccounts * scale)),
                                                         elapsed_sec, remaining_sec);
                 }
@@ -1479,8 +1570,8 @@ init(bool is_no_vacuum)
                         /* have we reached the next interval (or end)? */
                         if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS)) {
  
-                               fprintf(stderr, "%d of %d tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n",
-                                               j, naccounts * scale,
+                               fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n",
+                                               j, (int64)naccounts * scale,
                                                 (int) (((int64) j * 100) / (naccounts * scale)), elapsed_sec, remaining_sec);
  
                                 /* skip to the next interval */
diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml

index 58686b1a8b5b7c135e89fc28789e966ed39a4cbb..9ed8b76963cabb9d0d43440342205e53fbbee936 100644 (file)
--- a/doc/src/sgml/pgbench.sgml
+++ b/doc/src/sgml/pgbench.sgml
@@ -185,6 +185,11 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
          Multiply the number of rows generated by the scale factor.
          For example, <literal>-s 100</> will create 10,000,000 rows
          in the <structname>pgbench_accounts</> table. Default is 1.
+        When the scale is 20,000 or larger, the columns used to
+        hold account identifiers (<structfield>aid</structfield> columns)
+        will switch to using larger integers (<type>bigint</type>),
+        in order to be big enough to hold the range of account
+        identifiers.
         </para>
        </listitem>
       </varlistentry>
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Tue, 29 Jan 2013 09:49:40 +0000 (11:49 +0200)
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Tue, 29 Jan 2013 10:05:55 +0000 (12:05 +0200)
contrib/pgbench/pgbench.c		patch \| blob \| history
doc/src/sgml/pgbench.sgml		patch \| blob \| history