]> granicus.if.org Git - postgresql/commitdiff
pgbench: add --partitions and --partition-method options.
authorAmit Kapila <akapila@postgresql.org>
Tue, 1 Oct 2019 04:20:26 +0000 (09:50 +0530)
committerAmit Kapila <akapila@postgresql.org>
Thu, 3 Oct 2019 02:57:07 +0000 (08:27 +0530)
These new options allow users to partition the pgbench_accounts table by
specifying the number of partitions and partitioning method.  The values
allowed for partitioning method are range and hash.

This feature allows users to measure the overhead of partitioning if any.

Author: Fabien COELHO
Reviewed-by: Amit Kapila, Amit Langote, Dilip Kumar, Asif Rehman, and
Alvaro Herrera
Discussion: https://postgr.es/m/alpine.DEB.2.21.1907230826190.7008@lancre

doc/src/sgml/ref/pgbench.sgml
src/bin/pgbench/pgbench.c
src/bin/pgbench/t/001_pgbench_with_server.pl
src/bin/pgbench/t/002_pgbench_no_server.pl

index c857aa3cbac14a88bf235fb2280f6d47afa4192b..e3a0abb4c704365c93055ed3200a283551a6e706 100644 (file)
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
index ed7652bfbf6358b44f96196ded99d0b0c6fefdb0..e72ad0036e18a4d3e673a1a20548a38829d10777 100644 (file)
@@ -186,6 +186,23 @@ int64              latency_limit = 0;
 char      *tablespace = NULL;
 char      *index_tablespace = NULL;
 
+/*
+ * Number of "pgbench_accounts" partitions.  0 is the default and means no
+ * partitioning.
+ */
+static int     partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+       PART_NONE,                                      /* no partitioning */
+       PART_RANGE,                                     /* range partitioning */
+       PART_HASH                                       /* hash partitioning */
+}                      partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64          random_seed = -1;
 
@@ -582,6 +599,7 @@ static void doLog(TState *thread, CState *st,
                                  StatsData *agg, bool skipped, double latency, double lag);
 static void processXactStats(TState *thread, CState *st, instr_time *now,
                                                         bool skipped, StatsData *agg);
+static void append_fillfactor(char *opts, int len);
 static void addScript(ParsedScript script);
 static void *threadRun(void *arg);
 static void finishCon(CState *st);
@@ -617,6 +635,9 @@ usage(void)
                   "  --foreign-keys           create foreign key constraints between tables\n"
                   "  --index-tablespace=TABLESPACE\n"
                   "                           create indexes in the specified tablespace\n"
+                  "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+                  "  --partition-method=(range|hash)\n"
+                  "                           partition pgbench_accounts with this method (default: range)\n"
                   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
                   "  --unlogged-tables        create tables as unlogged tables\n"
                   "\nOptions to select what to run:\n"
@@ -3601,6 +3622,77 @@ initDropTables(PGconn *con)
                                         "pgbench_tellers");
 }
 
+/*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so we choose to partition it.
+ */
+static void
+createPartitions(PGconn *con)
+{
+       char            ff[64];
+
+       ff[0] = '\0';
+
+       /*
+        * Per ddlinfo in initCreateTables, fillfactor is needed on table
+        * pgbench_accounts.
+        */
+       append_fillfactor(ff, sizeof(ff));
+
+       /* we must have to create some partitions */
+       Assert(partitions > 0);
+
+       fprintf(stderr, "creating %d partitions...\n", partitions);
+
+       for (int p = 1; p <= partitions; p++)
+       {
+               char            query[256];
+
+               if (partition_method == PART_RANGE)
+               {
+                       int64           part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+                       char            minvalue[32],
+                                               maxvalue[32];
+
+                       /*
+                        * For RANGE, we use open-ended partitions at the beginning and
+                        * end to allow any valid value for the primary key.  Although the
+                        * actual minimum and maximum values can be derived from the
+                        * scale, it is more generic and the performance is better.
+                        */
+                       if (p == 1)
+                               sprintf(minvalue, "minvalue");
+                       else
+                               sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+                       if (p < partitions)
+                               sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+                       else
+                               sprintf(maxvalue, "maxvalue");
+
+                       snprintf(query, sizeof(query),
+                                        "create%s table pgbench_accounts_%d\n"
+                                        "  partition of pgbench_accounts\n"
+                                        "  for values from (%s) to (%s)%s\n",
+                                        unlogged_tables ? " unlogged" : "", p,
+                                        minvalue, maxvalue, ff);
+               }
+               else if (partition_method == PART_HASH)
+                       snprintf(query, sizeof(query),
+                                        "create%s table pgbench_accounts_%d\n"
+                                        "  partition of pgbench_accounts\n"
+                                        "  for values with (modulus %d, remainder %d)%s\n",
+                                        unlogged_tables ? " unlogged" : "", p,
+                                        partitions, p - 1, ff);
+               else                                    /* cannot get there */
+                       Assert(0);
+
+               executeStatement(con, query);
+       }
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3756,15 @@ initCreateTables(PGconn *con)
 
                /* Construct new create table statement. */
                opts[0] = '\0';
-               if (ddl->declare_fillfactor)
+
+               /* Partition pgbench_accounts table */
+               if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
                        snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-                                        " with (fillfactor=%d)", fillfactor);
+                                        " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+               else if (ddl->declare_fillfactor)
+                       /* fillfactor is only expected on actual tables */
+                       append_fillfactor(opts, sizeof(opts));
+
                if (tablespace != NULL)
                {
                        char       *escape_tablespace;
@@ -3686,6 +3784,21 @@ initCreateTables(PGconn *con)
 
                executeStatement(con, buffer);
        }
+
+       if (partition_method != PART_NONE)
+               createPartitions(con);
+}
+
+/*
+ * add fillfactor percent option.
+ *
+ * XXX - As default is 100, it could be removed in this case.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+       snprintf(opts + strlen(opts), len - strlen(opts),
+                        " with (fillfactor=%d)", fillfactor);
 }
 
 /*
@@ -4010,6 +4123,121 @@ runInitSteps(const char *initialize_steps)
        termPQExpBuffer(&stats);
 }
 
+/*
+ * Extract pgbench table informations into global variables scale,
+ * partition_method and partitions.
+ */
+static void
+GetTableInfo(PGconn *con, bool scale_given)
+{
+       PGresult   *res;
+
+       /*
+        * get the scaling factor that should be same as count(*) from
+        * pgbench_branches if this is not a custom query
+        */
+       res = PQexec(con, "select count(*) from pgbench_branches");
+       if (PQresultStatus(res) != PGRES_TUPLES_OK)
+       {
+               char       *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
+
+               fprintf(stderr, "%s", PQerrorMessage(con));
+               if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
+               {
+                       fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+               }
+
+               exit(1);
+       }
+       scale = atoi(PQgetvalue(res, 0, 0));
+       if (scale < 0)
+       {
+               fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
+                               PQgetvalue(res, 0, 0));
+               exit(1);
+       }
+       PQclear(res);
+
+       /* warn if we override user-given -s switch */
+       if (scale_given)
+               fprintf(stderr,
+                               "scale option ignored, using count from pgbench_branches table (%d)\n",
+                               scale);
+
+       /*
+        * Get the partition information for the first "pgbench_accounts" table
+        * found in search_path.
+        *
+        * The result is empty if no "pgbench_accounts" is found.
+        *
+        * Otherwise, it always returns one row even if the table is not
+        * partitioned (in which case the partition strategy is NULL).
+        *
+        * The number of partitions can be 0 even for partitioned tables, if no
+        * partition is attached.
+        *
+        * We assume no partitioning on any failure, so as to avoid failing on an
+        * old version without "pg_partitioned_table".
+        */
+       res = PQexec(con,
+                                "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+                                "from pg_catalog.pg_class as c "
+                                "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+                                "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+                                "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+                                "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+                                "where c.relname = 'pgbench_accounts' and o.n is not null "
+                                "group by 1, 2 "
+                                "order by 1 asc "
+                                "limit 1");
+
+       if (PQresultStatus(res) != PGRES_TUPLES_OK)
+       {
+               /* probably an older version, coldly assume no partitioning */
+               partition_method = PART_NONE;
+               partitions = 0;
+       }
+       else if (PQntuples(res) == 0)
+       {
+               /*
+                * This case is unlikely as pgbench already found "pgbench_branches"
+                * above to compute the scale.
+                */
+               fprintf(stderr,
+                               "no pgbench_accounts table found in search_path\n"
+                               "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\".\n", PQdb(con));
+               exit(1);
+       }
+       else                                            /* PQntupes(res) == 1 */
+       {
+               /* normal case, extract partition information */
+               if (PQgetisnull(res, 0, 1))
+                       partition_method = PART_NONE;
+               else
+               {
+                       char       *ps = PQgetvalue(res, 0, 1);
+
+                       /* column must be there */
+                       Assert(ps != NULL);
+
+                       if (strcmp(ps, "r") == 0)
+                               partition_method = PART_RANGE;
+                       else if (strcmp(ps, "h") == 0)
+                               partition_method = PART_HASH;
+                       else
+                       {
+                               /* possibly a newer version with new partition method */
+                               fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+                               exit(1);
+                       }
+               }
+
+               partitions = atoi(PQgetvalue(res, 0, 2));
+       }
+
+       PQclear(res);
+}
+
 /*
  * Replace :param with $n throughout the command's SQL text, which
  * is a modifiable string in cmd->lines.
@@ -4919,6 +5147,10 @@ printResults(StatsData *total, instr_time total_time,
        printf("transaction type: %s\n",
                   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
        printf("scaling factor: %d\n", scale);
+       /* only print partitioning information if some partitioning was detected */
+       if (partition_method != PART_NONE)
+               printf("partition method: %s\npartitions: %d\n",
+                          PARTITION_METHOD[partition_method], partitions);
        printf("query mode: %s\n", QUERYMODE[querymode]);
        printf("number of clients: %d\n", nclients);
        printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5358,8 @@ main(int argc, char **argv)
                {"foreign-keys", no_argument, NULL, 8},
                {"random-seed", required_argument, NULL, 9},
                {"show-script", required_argument, NULL, 10},
+               {"partitions", required_argument, NULL, 11},
+               {"partition-method", required_argument, NULL, 12},
                {NULL, 0, NULL, 0}
        };
 
@@ -5160,7 +5394,6 @@ main(int argc, char **argv)
 #endif
 
        PGconn     *con;
-       PGresult   *res;
        char       *env;
 
        int                     exit_code = 0;
@@ -5486,6 +5719,29 @@ main(int argc, char **argv)
                                        exit(0);
                                }
                                break;
+                       case 11:                        /* partitions */
+                               initialization_option_set = true;
+                               partitions = atoi(optarg);
+                               if (partitions < 0)
+                               {
+                                       fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+                                                       optarg);
+                                       exit(1);
+                               }
+                               break;
+                       case 12:                        /* partition-method */
+                               initialization_option_set = true;
+                               if (pg_strcasecmp(optarg, "range") == 0)
+                                       partition_method = PART_RANGE;
+                               else if (pg_strcasecmp(optarg, "hash") == 0)
+                                       partition_method = PART_HASH;
+                               else
+                               {
+                                       fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+                                                       " got: \"%s\"\n", optarg);
+                                       exit(1);
+                               }
+                               break;
                        default:
                                fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
                                exit(1);
@@ -5567,6 +5823,16 @@ main(int argc, char **argv)
                        exit(1);
                }
 
+               if (partitions == 0 && partition_method != PART_NONE)
+               {
+                       fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+                       exit(1);
+               }
+
+               /* set default method */
+               if (partitions > 0 && partition_method == PART_NONE)
+                       partition_method = PART_RANGE;
+
                if (initialize_steps == NULL)
                        initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5724,39 +5990,7 @@ main(int argc, char **argv)
        }
 
        if (internal_script_used)
-       {
-               /*
-                * get the scaling factor that should be same as count(*) from
-                * pgbench_branches if this is not a custom query
-                */
-               res = PQexec(con, "select count(*) from pgbench_branches");
-               if (PQresultStatus(res) != PGRES_TUPLES_OK)
-               {
-                       char       *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
-
-                       fprintf(stderr, "%s", PQerrorMessage(con));
-                       if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
-                       {
-                               fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
-                       }
-
-                       exit(1);
-               }
-               scale = atoi(PQgetvalue(res, 0, 0));
-               if (scale < 0)
-               {
-                       fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
-                                       PQgetvalue(res, 0, 0));
-                       exit(1);
-               }
-               PQclear(res);
-
-               /* warn if we override user-given -s switch */
-               if (scale_given)
-                       fprintf(stderr,
-                                       "scale option ignored, using count from pgbench_branches table (%d)\n",
-                                       scale);
-       }
+               GetTableInfo(con, scale_given);
 
        /*
         * :scale variables normally get -s or database scale, but don't override
index b82d3f65c4f4714ff45c3cfe051ad52fd352f713..c441626d7cd7da81848fef4fe22705e5e0baeae4 100644 (file)
@@ -58,6 +58,19 @@ sub pgbench
        return;
 }
 
+# tablespace for testing, because partitioned tables cannot use pg_default
+# explicitly and we want to test that table creation with tablespace works
+# for partitioned tables.
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+# this takes care of WIN-specific path issues
+my $ets = TestLib::perl2host($ts);
+
+# the next commands will issue a syntax error if the path contains a "'"
+$node->safe_psql('postgres',
+       "CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +113,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-       '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+       '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
        0,
        [qr{^$}i],
        [
                qr{dropping old tables},
                qr{creating tables},
+               qr{creating 2 partitions},
                qr{vacuuming},
                qr{creating primary keys},
                qr{creating foreign keys},
@@ -116,12 +130,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-       '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+       '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
        0,
        [qr{^$}],
        [
                qr{dropping old tables},
                qr{creating tables},
+               qr{creating 3 partitions},
                qr{creating primary keys},
                qr{.* of .* tuples \(.*\) done},
                qr{creating foreign keys},
@@ -910,5 +925,6 @@ check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
        qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
 # done
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
 $node->stop;
 done_testing();
index f7fa18418b42c29e09db07e8b7bde33d769849ef..1e9542af3f257dc4aa106ba2cd2cb37838a25bb6 100644 (file)
@@ -157,6 +157,13 @@ my @options = (
                        qr{error while setting random seed from --random-seed option}
                ]
        ],
+       [ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+       [ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+       [
+               'partition method without partitioning',
+               '-i --partition-method=hash',
+               [ qr{partition-method requires greater than zero --partitions} ]
+       ],
 
        # logging sub-options
        [