From: Teodor Sigaev Date: Mon, 26 Mar 2018 15:26:27 +0000 (+0300) Subject: Set random seed for pgbench. X-Git-Tag: REL_11_BETA1~477 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=64f85894ad2730fb1449a8e81dd8026604e9a546;p=postgresql Set random seed for pgbench. Setting random could increase reproducibility of test in some cases. Patch suggests three providers for seed: time (default), strong random generator (if available) and unsigned constant. Seed could be set from command line or enviroment variable. Author: Fabien Coelho Reviewed by: Chapman Flack Discussion: https://www.postgresql.org/message-id/flat/20160407082711.q7iq3ykffqxcszkv@alap3.anarazel.de --- diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index d52d324bf0..41d9030098 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -679,6 +679,43 @@ pgbench options d + + SEED + + + Set random generator seed. Seeds the system random number generator, + which then produces a sequence of initial generator states, one for + each thread. + Values for SEED may be: + time (the default, the seed is based on the current time), + rand (use a strong random source, failing if none + is available), or an unsigned decimal integer value. + The random generator is invoked explicitly from a pgbench script + (random... functions) or implicitly (for instance option + uses it to schedule transactions). + When explicitly set, the value used for seeding is shown on the terminal. + Any value allowed for SEED may also be + provided through the environment variable + PGBENCH_RANDOM_SEED. + To ensure that the provided seed impacts all possible uses, put this option + first or use the environment variable. + + + Setting the seed explicitly allows to reproduce a pgbench + run exactly, as far as random numbers are concerned. + As the random state is managed per thread, this means the exact same + pgbench run for an identical invocation if there is one + client per thread and there are no external or data dependencies. + From a statistical viewpoint reproducing runs exactly is a bad idea because + it can hide the performance variability or improve performance unduly, + e.g. by hitting the same pages as a previous run. + However, it may also be of great help for debugging, for instance + re-running a tricky case which leads to an error. + Use wisely. + + + + @@ -883,6 +920,11 @@ pgbench options d seed used in hash functions by default + + random_seed + random generator seed (unless overwritten with ) + + scale current scale factor diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 89e4bf5f28..8529e7dc47 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -155,6 +155,9 @@ int64 latency_limit = 0; char *tablespace = NULL; char *index_tablespace = NULL; +/* random seed used when calling srandom() */ +int64 random_seed = -1; + /* * end of configurable parameters *********************************************************************/ @@ -579,6 +582,7 @@ usage(void) " --log-prefix=PREFIX prefix for transaction time log file\n" " (default: \"pgbench_log\")\n" " --progress-timestamp use Unix epoch timestamps for progress\n" + " --random-seed=SEED set random seed (\"time\", \"rand\", integer)\n" " --sampling-rate=NUM fraction of transactions to log (e.g., 0.01 for 1%%)\n" "\nCommon options:\n" " -d, --debug print debugging output\n" @@ -4664,6 +4668,49 @@ printResults(TState *threads, StatsData *total, instr_time total_time, } } +/* call srandom based on some seed. NULL triggers the default behavior. */ +static void +set_random_seed(const char *seed, const char *origin) +{ + /* srandom expects an unsigned int */ + unsigned int iseed; + + if (seed == NULL || strcmp(seed, "time") == 0) + { + /* rely on current time */ + instr_time now; + INSTR_TIME_SET_CURRENT(now); + iseed = (unsigned int) INSTR_TIME_GET_MICROSEC(now); + } + else if (strcmp(seed, "rand") == 0) + { + /* use some "strong" random source */ + if (!pg_strong_random(&iseed, sizeof(iseed))) + { + fprintf(stderr, "cannot seed random from a strong source\n"); + exit(1); + } + } + else + { + /* parse seed unsigned int value */ + char garbage; + if (sscanf(seed, "%u%c", &iseed, &garbage) != 1) + { + fprintf(stderr, + "error while scanning '%s' from %s, expecting an unsigned integer, 'time' or 'rand'\n", + seed, origin); + exit(1); + } + } + + if (seed != NULL) + fprintf(stderr, "setting random seed to %u\n", iseed); + srandom(iseed); + /* no precision loss: 32 bit unsigned int cast to 64 bit int */ + random_seed = iseed; +} + int main(int argc, char **argv) @@ -4706,6 +4753,7 @@ main(int argc, char **argv) {"progress-timestamp", no_argument, NULL, 6}, {"log-prefix", required_argument, NULL, 7}, {"foreign-keys", no_argument, NULL, 8}, + {"random-seed", required_argument, NULL, 9}, {NULL, 0, NULL, 0} }; @@ -4774,6 +4822,9 @@ main(int argc, char **argv) state = (CState *) pg_malloc(sizeof(CState)); memset(state, 0, sizeof(CState)); + /* set random seed early, because it may be used while parsing scripts. */ + set_random_seed(getenv("PGBENCH_RANDOM_SEED"), "PGBENCH_RANDOM_SEED environment variable"); + while ((c = getopt_long(argc, argv, "iI:h:nvp:dqb:SNc:j:Crs:t:T:U:lf:D:F:M:P:R:L:", long_options, &optindex)) != -1) { char *script; @@ -5046,6 +5097,10 @@ main(int argc, char **argv) initialization_option_set = true; foreign_keys = true; break; + case 9: /* random-seed */ + benchmarking_option_set = true; + set_random_seed(optarg, "--random-seed option"); + break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); @@ -5280,10 +5335,6 @@ main(int argc, char **argv) exit(1); } - /* set random seed */ - INSTR_TIME_SET_CURRENT(start_time); - srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time)); - if (internal_script_used) { /* @@ -5339,10 +5390,8 @@ main(int argc, char **argv) if (lookupVariable(&state[0], "client_id") == NULL) { for (i = 0; i < nclients; i++) - { if (!putVariableInt(&state[i], "startup", "client_id", i)) exit(1); - } } /* set default seed for hash functions */ @@ -5358,6 +5407,14 @@ main(int argc, char **argv) exit(1); } + /* set random seed unless overwritten */ + if (lookupVariable(&state[0], "random_seed") == NULL) + { + for (i = 0; i < nclients; i++) + if (!putVariableInt(&state[i], "startup", "random_seed", random_seed)) + exit(1); + } + if (!is_no_vacuum) { fprintf(stderr, "starting vacuum..."); diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 7448a96150..0929418d30 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -29,6 +29,12 @@ sub pgbench $filename =~ s/\@\d+$//; #push @filenames, $filename; + # filenames are expected to be unique on a test + if (-e $filename) + { + ok(0, "$filename must not already exists"); + unlink $filename or die "cannot unlink $filename: $!"; + } append_to_file($filename, $$files{$fn}); } } @@ -210,14 +216,18 @@ COMMIT; } }); # test expressions +# command 1..3 and 23 depend on random seed which is used to call srandom. pgbench( - '-t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dminint=-9223372036854775808 -Dn=null -Dt=t -Df=of -Dd=1.0', + '--random-seed=5432 -t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dminint=-9223372036854775808 -Dn=null -Dt=t -Df=of -Dd=1.0', 0, [ qr{type: .*/001_pgbench_expressions}, qr{processed: 1/1} ], - [ qr{command=1.: int 1\d\b}, - qr{command=2.: int 1\d\d\b}, - qr{command=3.: int 1\d\d\d\b}, - qr{command=4.: int 4\b}, + [ qr{setting random seed to 5432\b}, + # After explicit seeding, the four * random checks (1-3,20) should be + # deterministic, but not necessarily portable. + qr{command=1.: int 1\d\b}, # uniform random: 12 on linux + qr{command=2.: int 1\d\d\b}, # exponential random: 106 on linux + qr{command=3.: int 1\d\d\d\b}, # gaussian random: 1462 on linux + qr{command=4.: int 4\b}, qr{command=5.: int 5\b}, qr{command=6.: int 6\b}, qr{command=7.: int 7\b}, @@ -230,7 +240,7 @@ pgbench( qr{command=16.: double 16\b}, qr{command=17.: double 17\b}, qr{command=18.: int 9223372036854775807\b}, - qr{command=20.: int [1-9]\b}, + qr{command=20.: int \d\b}, # zipfian random: 1 on linux qr{command=21.: double -27\b}, qr{command=22.: double 1024\b}, qr{command=23.: double 1\b}, @@ -270,6 +280,9 @@ pgbench( qr{command=86.: int 86\b}, qr{command=93.: int 93\b}, qr{command=95.: int 0\b}, + qr{command=96.: int 1\b}, # :scale + qr{command=97.: int 0\b}, # :client_id + qr{command=98.: int 5432\b}, # :random_seed ], 'pgbench expressions', { '001_pgbench_expressions' => q{-- integer functions @@ -390,8 +403,52 @@ SELECT :v0, :v1, :v2, :v3; \endif -- must be zero if false branches where skipped \set nope debug(:nope) +-- check automatic variables +\set sc debug(:scale) +\set ci debug(:client_id) +\set rs debug(:random_seed) } }); +# random determinism when seeded +$node->safe_psql('postgres', + 'CREATE UNLOGGED TABLE seeded_random(seed INT8 NOT NULL, rand TEXT NOT NULL, val INTEGER NOT NULL);'); + +# same value to check for determinism +my $seed = int(rand(1000000000)); +for my $i (1, 2) +{ + pgbench("--random-seed=$seed -t 1", + 0, + [qr{processed: 1/1}], + [qr{setting random seed to $seed\b}], + "random seeded with $seed", + { "001_pgbench_random_seed_$i" => q{-- test random functions +\set ur random(1000, 1999) +\set er random_exponential(2000, 2999, 2.0) +\set gr random_gaussian(3000, 3999, 3.0) +\set zr random_zipfian(4000, 4999, 2.5) +INSERT INTO seeded_random(seed, rand, val) VALUES + (:random_seed, 'uniform', :ur), + (:random_seed, 'exponential', :er), + (:random_seed, 'gaussian', :gr), + (:random_seed, 'zipfian', :zr); +} }); +} + +# check that all runs generated the same 4 values +my ($ret, $out, $err) = + $node->psql('postgres', + 'SELECT seed, rand, val, COUNT(*) FROM seeded_random GROUP BY seed, rand, val'); + +ok($ret == 0, "psql seeded_random count ok"); +ok($err eq '', "psql seeded_random count stderr is empty"); +ok($out =~ /\b$seed\|uniform\|1\d\d\d\|2/, "psql seeded_random count uniform"); +ok($out =~ /\b$seed\|exponential\|2\d\d\d\|2/, "psql seeded_random count exponential"); +ok($out =~ /\b$seed\|gaussian\|3\d\d\d\|2/, "psql seeded_random count gaussian"); +ok($out =~ /\b$seed\|zipfian\|4\d\d\d\|2/, "psql seeded_random count zipfian"); + +$node->safe_psql('postgres', 'DROP TABLE seeded_random;'); + # backslash commands pgbench( '-t 1', 0, diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index 80c5aed435..682bc2280d 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -110,6 +110,8 @@ my @options = ( [ 'invalid init step', '-i -I dta', [qr{unrecognized initialization step}, qr{allowed steps are} ] ], + [ 'bad random seed', '--random-seed=one', + [qr{error while scanning 'one' from --random-seed option, expecting an unsigned integer} ] ], # loging sub-options [ 'sampling => log', '--sampling-rate=0.01',