From b1c1aa53182372e907f3f7f090e7eb5f432a4c9a Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Tue, 1 Oct 2019 09:50:26 +0530 Subject: [PATCH] pgbench: add --partitions and --partition-method options. These new options allow users to partition the pgbench_accounts table by specifying the number of partitions and partitioning method. The values allowed for partitioning method are range and hash. This feature allows users to measure the overhead of partitioning if any. Author: Fabien COELHO Reviewed-by: Amit Kapila, Amit Langote, Dilip Kumar, Asif Rehman, and Alvaro Herrera Discussion: https://postgr.es/m/alpine.DEB.2.21.1907230826190.7008@lancre --- doc/src/sgml/ref/pgbench.sgml | 25 ++ src/bin/pgbench/pgbench.c | 306 ++++++++++++++++--- src/bin/pgbench/t/001_pgbench_with_server.pl | 20 +- src/bin/pgbench/t/002_pgbench_no_server.pl | 7 + 4 files changed, 320 insertions(+), 38 deletions(-) diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index c857aa3cba..e3a0abb4c7 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -306,6 +306,31 @@ pgbench options d + + + + + Create a partitioned pgbench_accounts table with + NUM partitions of nearly equal size for + the scaled number of accounts. + Default is 0, meaning no partitioning. + + + + + + + + + Create a partitioned pgbench_accounts table with + NAME method. + Expected values are range or hash. + This option requires that is set to non-zero. + If unspecified, default is range. + + + + diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index ed7652bfbf..e72ad0036e 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -186,6 +186,23 @@ int64 latency_limit = 0; char *tablespace = NULL; char *index_tablespace = NULL; +/* + * Number of "pgbench_accounts" partitions. 0 is the default and means no + * partitioning. + */ +static int partitions = 0; + +/* partitioning strategy for "pgbench_accounts" */ +typedef enum +{ + PART_NONE, /* no partitioning */ + PART_RANGE, /* range partitioning */ + PART_HASH /* hash partitioning */ +} partition_method_t; + +static partition_method_t partition_method = PART_NONE; +static const char *PARTITION_METHOD[] = {"none", "range", "hash"}; + /* random seed used to initialize base_random_sequence */ int64 random_seed = -1; @@ -582,6 +599,7 @@ static void doLog(TState *thread, CState *st, StatsData *agg, bool skipped, double latency, double lag); static void processXactStats(TState *thread, CState *st, instr_time *now, bool skipped, StatsData *agg); +static void append_fillfactor(char *opts, int len); static void addScript(ParsedScript script); static void *threadRun(void *arg); static void finishCon(CState *st); @@ -617,6 +635,9 @@ usage(void) " --foreign-keys create foreign key constraints between tables\n" " --index-tablespace=TABLESPACE\n" " create indexes in the specified tablespace\n" + " --partitions=NUM partition pgbench_accounts in NUM parts (default: 0)\n" + " --partition-method=(range|hash)\n" + " partition pgbench_accounts with this method (default: range)\n" " --tablespace=TABLESPACE create tables in the specified tablespace\n" " --unlogged-tables create tables as unlogged tables\n" "\nOptions to select what to run:\n" @@ -3601,6 +3622,77 @@ initDropTables(PGconn *con) "pgbench_tellers"); } +/* + * Create "pgbench_accounts" partitions if needed. + * + * This is the larger table of pgbench default tpc-b like schema + * with a known size, so we choose to partition it. + */ +static void +createPartitions(PGconn *con) +{ + char ff[64]; + + ff[0] = '\0'; + + /* + * Per ddlinfo in initCreateTables, fillfactor is needed on table + * pgbench_accounts. + */ + append_fillfactor(ff, sizeof(ff)); + + /* we must have to create some partitions */ + Assert(partitions > 0); + + fprintf(stderr, "creating %d partitions...\n", partitions); + + for (int p = 1; p <= partitions; p++) + { + char query[256]; + + if (partition_method == PART_RANGE) + { + int64 part_size = (naccounts * (int64) scale + partitions - 1) / partitions; + char minvalue[32], + maxvalue[32]; + + /* + * For RANGE, we use open-ended partitions at the beginning and + * end to allow any valid value for the primary key. Although the + * actual minimum and maximum values can be derived from the + * scale, it is more generic and the performance is better. + */ + if (p == 1) + sprintf(minvalue, "minvalue"); + else + sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1); + + if (p < partitions) + sprintf(maxvalue, INT64_FORMAT, p * part_size + 1); + else + sprintf(maxvalue, "maxvalue"); + + snprintf(query, sizeof(query), + "create%s table pgbench_accounts_%d\n" + " partition of pgbench_accounts\n" + " for values from (%s) to (%s)%s\n", + unlogged_tables ? " unlogged" : "", p, + minvalue, maxvalue, ff); + } + else if (partition_method == PART_HASH) + snprintf(query, sizeof(query), + "create%s table pgbench_accounts_%d\n" + " partition of pgbench_accounts\n" + " for values with (modulus %d, remainder %d)%s\n", + unlogged_tables ? " unlogged" : "", p, + partitions, p - 1, ff); + else /* cannot get there */ + Assert(0); + + executeStatement(con, query); + } +} + /* * Create pgbench's standard tables */ @@ -3664,9 +3756,15 @@ initCreateTables(PGconn *con) /* Construct new create table statement. */ opts[0] = '\0'; - if (ddl->declare_fillfactor) + + /* Partition pgbench_accounts table */ + if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0) snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts), - " with (fillfactor=%d)", fillfactor); + " partition by %s (aid)", PARTITION_METHOD[partition_method]); + else if (ddl->declare_fillfactor) + /* fillfactor is only expected on actual tables */ + append_fillfactor(opts, sizeof(opts)); + if (tablespace != NULL) { char *escape_tablespace; @@ -3686,6 +3784,21 @@ initCreateTables(PGconn *con) executeStatement(con, buffer); } + + if (partition_method != PART_NONE) + createPartitions(con); +} + +/* + * add fillfactor percent option. + * + * XXX - As default is 100, it could be removed in this case. + */ +static void +append_fillfactor(char *opts, int len) +{ + snprintf(opts + strlen(opts), len - strlen(opts), + " with (fillfactor=%d)", fillfactor); } /* @@ -4010,6 +4123,121 @@ runInitSteps(const char *initialize_steps) termPQExpBuffer(&stats); } +/* + * Extract pgbench table informations into global variables scale, + * partition_method and partitions. + */ +static void +GetTableInfo(PGconn *con, bool scale_given) +{ + PGresult *res; + + /* + * get the scaling factor that should be same as count(*) from + * pgbench_branches if this is not a custom query + */ + res = PQexec(con, "select count(*) from pgbench_branches"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + char *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE); + + fprintf(stderr, "%s", PQerrorMessage(con)); + if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0) + { + fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con)); + } + + exit(1); + } + scale = atoi(PQgetvalue(res, 0, 0)); + if (scale < 0) + { + fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n", + PQgetvalue(res, 0, 0)); + exit(1); + } + PQclear(res); + + /* warn if we override user-given -s switch */ + if (scale_given) + fprintf(stderr, + "scale option ignored, using count from pgbench_branches table (%d)\n", + scale); + + /* + * Get the partition information for the first "pgbench_accounts" table + * found in search_path. + * + * The result is empty if no "pgbench_accounts" is found. + * + * Otherwise, it always returns one row even if the table is not + * partitioned (in which case the partition strategy is NULL). + * + * The number of partitions can be 0 even for partitioned tables, if no + * partition is attached. + * + * We assume no partitioning on any failure, so as to avoid failing on an + * old version without "pg_partitioned_table". + */ + res = PQexec(con, + "select o.n, p.partstrat, pg_catalog.count(i.inhparent) " + "from pg_catalog.pg_class as c " + "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) " + "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) " + "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) " + "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) " + "where c.relname = 'pgbench_accounts' and o.n is not null " + "group by 1, 2 " + "order by 1 asc " + "limit 1"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + /* probably an older version, coldly assume no partitioning */ + partition_method = PART_NONE; + partitions = 0; + } + else if (PQntuples(res) == 0) + { + /* + * This case is unlikely as pgbench already found "pgbench_branches" + * above to compute the scale. + */ + fprintf(stderr, + "no pgbench_accounts table found in search_path\n" + "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\".\n", PQdb(con)); + exit(1); + } + else /* PQntupes(res) == 1 */ + { + /* normal case, extract partition information */ + if (PQgetisnull(res, 0, 1)) + partition_method = PART_NONE; + else + { + char *ps = PQgetvalue(res, 0, 1); + + /* column must be there */ + Assert(ps != NULL); + + if (strcmp(ps, "r") == 0) + partition_method = PART_RANGE; + else if (strcmp(ps, "h") == 0) + partition_method = PART_HASH; + else + { + /* possibly a newer version with new partition method */ + fprintf(stderr, "unexpected partition method: \"%s\"\n", ps); + exit(1); + } + } + + partitions = atoi(PQgetvalue(res, 0, 2)); + } + + PQclear(res); +} + /* * Replace :param with $n throughout the command's SQL text, which * is a modifiable string in cmd->lines. @@ -4919,6 +5147,10 @@ printResults(StatsData *total, instr_time total_time, printf("transaction type: %s\n", num_scripts == 1 ? sql_script[0].desc : "multiple scripts"); printf("scaling factor: %d\n", scale); + /* only print partitioning information if some partitioning was detected */ + if (partition_method != PART_NONE) + printf("partition method: %s\npartitions: %d\n", + PARTITION_METHOD[partition_method], partitions); printf("query mode: %s\n", QUERYMODE[querymode]); printf("number of clients: %d\n", nclients); printf("number of threads: %d\n", nthreads); @@ -5126,6 +5358,8 @@ main(int argc, char **argv) {"foreign-keys", no_argument, NULL, 8}, {"random-seed", required_argument, NULL, 9}, {"show-script", required_argument, NULL, 10}, + {"partitions", required_argument, NULL, 11}, + {"partition-method", required_argument, NULL, 12}, {NULL, 0, NULL, 0} }; @@ -5160,7 +5394,6 @@ main(int argc, char **argv) #endif PGconn *con; - PGresult *res; char *env; int exit_code = 0; @@ -5486,6 +5719,29 @@ main(int argc, char **argv) exit(0); } break; + case 11: /* partitions */ + initialization_option_set = true; + partitions = atoi(optarg); + if (partitions < 0) + { + fprintf(stderr, "invalid number of partitions: \"%s\"\n", + optarg); + exit(1); + } + break; + case 12: /* partition-method */ + initialization_option_set = true; + if (pg_strcasecmp(optarg, "range") == 0) + partition_method = PART_RANGE; + else if (pg_strcasecmp(optarg, "hash") == 0) + partition_method = PART_HASH; + else + { + fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\"," + " got: \"%s\"\n", optarg); + exit(1); + } + break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); @@ -5567,6 +5823,16 @@ main(int argc, char **argv) exit(1); } + if (partitions == 0 && partition_method != PART_NONE) + { + fprintf(stderr, "--partition-method requires greater than zero --partitions\n"); + exit(1); + } + + /* set default method */ + if (partitions > 0 && partition_method == PART_NONE) + partition_method = PART_RANGE; + if (initialize_steps == NULL) initialize_steps = pg_strdup(DEFAULT_INIT_STEPS); @@ -5724,39 +5990,7 @@ main(int argc, char **argv) } if (internal_script_used) - { - /* - * get the scaling factor that should be same as count(*) from - * pgbench_branches if this is not a custom query - */ - res = PQexec(con, "select count(*) from pgbench_branches"); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - char *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE); - - fprintf(stderr, "%s", PQerrorMessage(con)); - if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0) - { - fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con)); - } - - exit(1); - } - scale = atoi(PQgetvalue(res, 0, 0)); - if (scale < 0) - { - fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n", - PQgetvalue(res, 0, 0)); - exit(1); - } - PQclear(res); - - /* warn if we override user-given -s switch */ - if (scale_given) - fprintf(stderr, - "scale option ignored, using count from pgbench_branches table (%d)\n", - scale); - } + GetTableInfo(con, scale_given); /* * :scale variables normally get -s or database scale, but don't override diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index b82d3f65c4..c441626d7c 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -58,6 +58,19 @@ sub pgbench return; } +# tablespace for testing, because partitioned tables cannot use pg_default +# explicitly and we want to test that table creation with tablespace works +# for partitioned tables. +my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir'; +mkdir $ts or die "cannot create directory $ts"; +# this takes care of WIN-specific path issues +my $ets = TestLib::perl2host($ts); + +# the next commands will issue a syntax error if the path contains a "'" +$node->safe_psql('postgres', + "CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';" +); + # Test concurrent OID generation via pg_enum_oid_index. This indirectly # exercises LWLock and spinlock concurrency. my $labels = join ',', map { "'l$_'" } 1 .. 1000; @@ -100,12 +113,13 @@ pgbench( # Again, with all possible options pgbench( - '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default', + '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash', 0, [qr{^$}i], [ qr{dropping old tables}, qr{creating tables}, + qr{creating 2 partitions}, qr{vacuuming}, qr{creating primary keys}, qr{creating foreign keys}, @@ -116,12 +130,13 @@ pgbench( # Test interaction of --init-steps with legacy step-selection options pgbench( - '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables', + '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3', 0, [qr{^$}], [ qr{dropping old tables}, qr{creating tables}, + qr{creating 3 partitions}, qr{creating primary keys}, qr{.* of .* tuples \(.*\) done}, qr{creating foreign keys}, @@ -910,5 +925,6 @@ check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10, qr{^\d \d{1,2} \d+ \d \d+ \d+$}); # done +$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts'); $node->stop; done_testing(); diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index f7fa18418b..1e9542af3f 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -157,6 +157,13 @@ my @options = ( qr{error while setting random seed from --random-seed option} ] ], + [ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ], + [ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ], + [ + 'partition method without partitioning', + '-i --partition-method=hash', + [ qr{partition-method requires greater than zero --partitions} ] + ], # logging sub-options [ -- 2.40.0