]> granicus.if.org Git - postgresql/commitdiff
Allow pgbench to use a scale larger than 21474.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 29 Jan 2013 09:49:40 +0000 (11:49 +0200)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 29 Jan 2013 10:05:55 +0000 (12:05 +0200)
Beyond 21474, the number of accounts exceed the range for int4. Change the
initialization code to use bigint for account id columns when scale is large
enough, and switch to using int64s for the variables in pgbench code. The
threshold where we switch to bigints is set at 20000, because that's easier
to remember and document than 21474, and ensures that there is some headroom
when int4s are used.

Greg Smith, with various changes by Euler Taveira de Oliveira, Gurjeet
Singh and Satoshi Nagayasu.

contrib/pgbench/pgbench.c
doc/src/sgml/pgbench.sgml

index 3ca120fa68b784e9b0b7d66f186f09945440e213..32d3be65085b2a1589f088c16855072458face65 100644 (file)
@@ -151,6 +151,15 @@ char          *index_tablespace = NULL;
 #define ntellers       10
 #define naccounts      100000
 
+/*
+ * The scale factor at/beyond which 32bit integers are incapable of storing
+ * 64bit values.
+ *
+ * Although the actual threshold is 21474, we use 20000 because it is easier to
+ * document and remember, and isn't that far away from the real threshold.
+ */
+#define SCALE_32BIT_THRESHOLD 20000
+
 bool           use_log;                        /* log transaction latencies to a file */
 bool           use_quiet;                      /* quiet logging onto stderr */
 bool           is_connect;                     /* establish connection for each transaction */
@@ -403,9 +412,77 @@ usage(void)
                   progname, progname);
 }
 
+/*
+ * strtoint64 -- convert a string to 64-bit integer
+ *
+ * This function is a modified version of scanint8() from
+ * src/backend/utils/adt/int8.c.
+ */
+static int64
+strtoint64(const char *str)
+{
+       const char *ptr = str;
+       int64           result = 0;
+       int                     sign = 1;
+
+       /*
+        * Do our own scan, rather than relying on sscanf which might be broken
+        * for long long.
+        */
+
+       /* skip leading spaces */
+       while (*ptr && isspace((unsigned char) *ptr))
+               ptr++;
+
+       /* handle sign */
+       if (*ptr == '-')
+       {
+               ptr++;
+
+               /*
+                * Do an explicit check for INT64_MIN.  Ugly though this is, it's
+                * cleaner than trying to get the loop below to handle it portably.
+                */
+               if (strncmp(ptr, "9223372036854775808", 19) == 0)
+               {
+                       result = -INT64CONST(0x7fffffffffffffff) - 1;
+                       ptr += 19;
+                       goto gotdigits;
+               }
+               sign = -1;
+       }
+       else if (*ptr == '+')
+               ptr++;
+
+       /* require at least one digit */
+       if (!isdigit((unsigned char) *ptr))
+               fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str);
+
+       /* process digits */
+       while (*ptr && isdigit((unsigned char) *ptr))
+       {
+               int64           tmp = result * 10 + (*ptr++ - '0');
+
+               if ((tmp / 10) != result)               /* overflow? */
+                       fprintf(stderr, "value \"%s\" is out of range for type bigint\n", str);
+               result = tmp;
+       }
+
+gotdigits:
+
+       /* allow trailing whitespace, but not other trailing chars */
+       while (*ptr != '\0' && isspace((unsigned char) *ptr))
+               ptr++;
+
+       if (*ptr != '\0')
+               fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str);
+
+       return ((sign < 0) ? -result : result);
+}
+
 /* random number generator: uniform distribution from min to max inclusive */
-static int
-getrand(TState *thread, int min, int max)
+static int64
+getrand(TState *thread, int64 min, int64 max)
 {
        /*
         * Odd coding is so that min and max have approximately the same chance of
@@ -416,7 +493,7 @@ getrand(TState *thread, int min, int max)
         * protected by a mutex, and therefore a bottleneck on machines with many
         * CPUs.
         */
-       return min + (int) ((max - min + 1) * pg_erand48(thread->random_state));
+       return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state));
 }
 
 /* call PQexec() and exit() on failure */
@@ -960,7 +1037,7 @@ top:
                if (commands[st->state] == NULL)
                {
                        st->state = 0;
-                       st->use_file = getrand(thread, 0, num_files - 1);
+                       st->use_file = (int) getrand(thread, 0, num_files - 1);
                        commands = sql_files[st->use_file];
                }
        }
@@ -1080,7 +1157,7 @@ top:
                if (pg_strcasecmp(argv[0], "setrandom") == 0)
                {
                        char       *var;
-                       int                     min,
+                       int64           min,
                                                max;
                        char            res[64];
 
@@ -1092,10 +1169,10 @@ top:
                                        st->ecnt++;
                                        return true;
                                }
-                               min = atoi(var);
+                               min = strtoint64(var);
                        }
                        else
-                               min = atoi(argv[2]);
+                               min = strtoint64(argv[2]);
 
 #ifdef NOT_USED
                        if (min < 0)
@@ -1114,10 +1191,10 @@ top:
                                        st->ecnt++;
                                        return true;
                                }
-                               max = atoi(var);
+                               max = strtoint64(var);
                        }
                        else
-                               max = atoi(argv[3]);
+                               max = strtoint64(argv[3]);
 
                        if (max < min)
                        {
@@ -1127,8 +1204,8 @@ top:
                        }
 
                        /*
-                        * getrand() neeeds to be able to subtract max from min and add
-                        * one the result without overflowing.  Since we know max > min,
+                        * getrand() needs to be able to subtract max from min and add
+                        * one to the result without overflowing.  Since we know max > min,
                         * we can detect overflow just by checking for a negative result.
                         * But we must check both that the subtraction doesn't overflow,
                         * and that adding one to the result doesn't overflow either.
@@ -1141,9 +1218,9 @@ top:
                        }
 
 #ifdef DEBUG
-                       printf("min: %d max: %d random: %d\n", min, max, getrand(thread, min, max));
+                       printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
 #endif
-                       snprintf(res, sizeof(res), "%d", getrand(thread, min, max));
+                       snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
 
                        if (!putVariable(st, argv[0], argv[1], res))
                        {
@@ -1156,7 +1233,7 @@ top:
                else if (pg_strcasecmp(argv[0], "set") == 0)
                {
                        char       *var;
-                       int                     ope1,
+                       int64           ope1,
                                                ope2;
                        char            res[64];
 
@@ -1168,13 +1245,13 @@ top:
                                        st->ecnt++;
                                        return true;
                                }
-                               ope1 = atoi(var);
+                               ope1 = strtoint64(var);
                        }
                        else
-                               ope1 = atoi(argv[2]);
+                               ope1 = strtoint64(argv[2]);
 
                        if (argc < 5)
-                               snprintf(res, sizeof(res), "%d", ope1);
+                               snprintf(res, sizeof(res), INT64_FORMAT, ope1);
                        else
                        {
                                if (*argv[4] == ':')
@@ -1185,17 +1262,17 @@ top:
                                                st->ecnt++;
                                                return true;
                                        }
-                                       ope2 = atoi(var);
+                                       ope2 = strtoint64(var);
                                }
                                else
-                                       ope2 = atoi(argv[4]);
+                                       ope2 = strtoint64(argv[4]);
 
                                if (strcmp(argv[3], "+") == 0)
-                                       snprintf(res, sizeof(res), "%d", ope1 + ope2);
+                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 + ope2);
                                else if (strcmp(argv[3], "-") == 0)
-                                       snprintf(res, sizeof(res), "%d", ope1 - ope2);
+                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 - ope2);
                                else if (strcmp(argv[3], "*") == 0)
-                                       snprintf(res, sizeof(res), "%d", ope1 * ope2);
+                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 * ope2);
                                else if (strcmp(argv[3], "/") == 0)
                                {
                                        if (ope2 == 0)
@@ -1204,7 +1281,7 @@ top:
                                                st->ecnt++;
                                                return true;
                                        }
-                                       snprintf(res, sizeof(res), "%d", ope1 / ope2);
+                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 / ope2);
                                }
                                else
                                {
@@ -1311,6 +1388,15 @@ disconnect_all(CState *state, int length)
 static void
 init(bool is_no_vacuum)
 {
+
+/* The scale factor at/beyond which 32bit integers are incapable of storing
+ * 64bit values.
+ *
+ * Although the actual threshold is 21474, we use 20000 because it is easier to
+ * document and remember, and isn't that far away from the real threshold.
+ */
+#define SCALE_32BIT_THRESHOLD 20000
+
        /*
         * Note: TPC-B requires at least 100 bytes per row, and the "filler"
         * fields in these table declarations were intended to comply with that.
@@ -1329,7 +1415,9 @@ init(bool is_no_vacuum)
        struct ddlinfo DDLs[] = {
                {
                        "pgbench_history",
-                       "tid int,bid int,aid int,delta int,mtime timestamp,filler char(22)",
+                       scale >= SCALE_32BIT_THRESHOLD
+                               ? "tid int,bid int,aid bigint,delta int,mtime timestamp,filler char(22)"
+                               : "tid int,bid int,aid    int,delta int,mtime timestamp,filler char(22)",
                        0
                },
                {
@@ -1339,7 +1427,9 @@ init(bool is_no_vacuum)
                },
                {
                        "pgbench_accounts",
-                       "aid int not null,bid int,abalance int,filler char(84)",
+                       scale >= SCALE_32BIT_THRESHOLD
+                               ? "aid bigint not null,bid int,abalance int,filler char(84)"
+                               : "aid    int not null,bid int,abalance int,filler char(84)",
                        1
                },
                {
@@ -1365,6 +1455,7 @@ init(bool is_no_vacuum)
        PGresult   *res;
        char            sql[256];
        int                     i;
+       int64           k;
 
        /* used to track elapsed time and estimate of the remaining time */
        instr_time      start, diff;
@@ -1441,11 +1532,11 @@ init(bool is_no_vacuum)
 
        INSTR_TIME_SET_CURRENT(start);
 
-       for (i = 0; i < naccounts * scale; i++)
+       for (k = 0; k < (int64) naccounts * scale; k++)
        {
-               int                     j = i + 1;
+               int64           j = k + 1;
 
-               snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0);
+               snprintf(sql, 256, INT64_FORMAT "\t" INT64_FORMAT "\t%d\t\n", j, k / naccounts + 1, 0);
                if (PQputline(con, sql))
                {
                        fprintf(stderr, "PQputline failed\n");
@@ -1462,8 +1553,8 @@ init(bool is_no_vacuum)
                        elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
                        remaining_sec = (scale * naccounts - j) * elapsed_sec / j;
 
-                       fprintf(stderr, "%d of %d tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n",
-                                                       j, naccounts * scale,
+                       fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n",
+                                                       j, (int64)naccounts * scale,
                                                        (int) (((int64) j * 100) / (naccounts * scale)),
                                                        elapsed_sec, remaining_sec);
                }
@@ -1479,8 +1570,8 @@ init(bool is_no_vacuum)
                        /* have we reached the next interval (or end)? */
                        if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS)) {
 
-                               fprintf(stderr, "%d of %d tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n",
-                                               j, naccounts * scale,
+                               fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n",
+                                               j, (int64)naccounts * scale,
                                                (int) (((int64) j * 100) / (naccounts * scale)), elapsed_sec, remaining_sec);
 
                                /* skip to the next interval */
index 58686b1a8b5b7c135e89fc28789e966ed39a4cbb..9ed8b76963cabb9d0d43440342205e53fbbee936 100644 (file)
@@ -185,6 +185,11 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
         Multiply the number of rows generated by the scale factor.
         For example, <literal>-s 100</> will create 10,000,000 rows
         in the <structname>pgbench_accounts</> table. Default is 1.
+        When the scale is 20,000 or larger, the columns used to
+        hold account identifiers (<structfield>aid</structfield> columns)
+        will switch to using larger integers (<type>bigint</type>),
+        in order to be big enough to hold the range of account
+        identifiers.
        </para>
       </listitem>
      </varlistentry>