From e51a04840a1c45db101686bef0b7025d5014c74b Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Wed, 21 Mar 2018 18:01:23 +0300 Subject: [PATCH] Add general purpose hasing functions to pgbench. Hashing function is useful for simulating real-world workload in test like WEB workload, as an example - YCSB benchmarks. Author: Ildar Musin with minor editorization by me Reviewed by: Fabien Coelho, me Discussion: https://www.postgresql.org/message-id/flat/0e8bd39e-dfcd-2879-f88f-272799ad7ef2@postgrespro.ru --- doc/src/sgml/ref/pgbench.sgml | 59 ++++++++++- src/bin/pgbench/exprparse.y | 98 +++++++++++++----- src/bin/pgbench/pgbench.c | 101 ++++++++++++++++++- src/bin/pgbench/pgbench.h | 4 +- src/bin/pgbench/t/001_pgbench_with_server.pl | 11 ++ 5 files changed, 239 insertions(+), 34 deletions(-) diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 5f28023799..f07ddf1226 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -874,13 +874,18 @@ pgbench options d - scale - current scale factor + client_id + unique number identifying the client session (starts from zero) - client_id - unique number identifying the client session (starts from zero) + default_seed + seed used in hash functions by default + + + + scale + current scale factor @@ -1245,6 +1250,27 @@ pgbench options d greatest(5, 4, 3, 2) 5 + + hash(a [, seed ] ) + integer + alias for hash_murmur2() + hash(10, 5432) + -5817877081768721676 + + + hash_fnv1a(a [, seed ] ) + integer + FNV-1a hash + hash_fnv1a(10, 5432) + -7793829335365542153 + + + hash_murmur2(a [, seed ] ) + integer + MurmurHash2 hash + hash_murmur2(10, 5432) + -5817877081768721676 + int(x) integer @@ -1423,6 +1449,31 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / + + Hash functions hash, hash_murmur2 and + hash_fnv1a accept an input value and an optional seed parameter. + In case the seed isn't provided the value of :default_seed + is used, which is initialized randomly unless set by the command-line + -D option. Hash functions can be used to scatter the + distribution of random functions such as random_zipfian or + random_exponential. For instance, the following pgbench + script simulates possible real world workload typical for social media and + blogging platforms where few accounts generate excessive load: + + +\set r random_zipfian(0, 100000000, 1.07) +\set k abs(hash(:r)) % 1000000 + + + In some cases several distinct distributions are needed which don't correlate + with each other and this is when implicit seed parameter comes in handy: + + +\set k1 abs(hash(:r), :default_seed + 123) % 1000000 +\set k2 abs(hash(:r), :default_seed + 321) % 1000000 + + + As an example, the full definition of the built-in TPC-B-like transaction is: diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index e23ca51786..8447e14d14 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -16,6 +16,10 @@ #include "pgbench.h" +#define PGBENCH_NARGS_VARIABLE (-1) +#define PGBENCH_NARGS_CASE (-2) +#define PGBENCH_NARGS_HASH (-3) + PgBenchExpr *expr_parse_result; static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list); @@ -226,9 +230,13 @@ make_uop(yyscan_t yyscanner, const char *operator, PgBenchExpr *expr) /* * List of available functions: * - fname: function name, "!..." for special internal functions - * - nargs: number of arguments - * -1 is a special value for least & greatest meaning #args >= 1 - * -2 is for the "CASE WHEN ..." function, which has #args >= 3 and odd + * - nargs: number of arguments. Special cases: + * - PGBENCH_NARGS_VARIABLE is a special value for least & greatest + * meaning #args >= 1; + * - PGBENCH_NARGS_CASE is for the "CASE WHEN ..." function, which + * has #args >= 3 and odd; + * - PGBENCH_NARGS_HASH is for hash functions, which have one required + * and one optional argument; * - tag: function identifier from PgBenchFunction enum */ static const struct @@ -259,10 +267,10 @@ static const struct "abs", 1, PGBENCH_ABS }, { - "least", -1, PGBENCH_LEAST + "least", PGBENCH_NARGS_VARIABLE, PGBENCH_LEAST }, { - "greatest", -1, PGBENCH_GREATEST + "greatest", PGBENCH_NARGS_VARIABLE, PGBENCH_GREATEST }, { "debug", 1, PGBENCH_DEBUG @@ -347,7 +355,25 @@ static const struct }, /* "case when ... then ... else ... end" construction */ { - "!case_end", -2, PGBENCH_CASE + "!case_end", PGBENCH_NARGS_CASE, PGBENCH_CASE + }, + { + "hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2 + }, + { + "hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2 + }, + { + "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A + }, + { + "hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2 + }, + { + "hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2 + }, + { + "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A }, /* keep as last array element */ { @@ -423,29 +449,51 @@ elist_length(PgBenchExprList *list) static PgBenchExpr * make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args) { + int len = elist_length(args); + PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr)); Assert(fnumber >= 0); - if (PGBENCH_FUNCTIONS[fnumber].nargs >= 0 && - PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args)) - expr_yyerror_more(yyscanner, "unexpected number of arguments", - PGBENCH_FUNCTIONS[fnumber].fname); - - /* check at least one arg for least & greatest */ - if (PGBENCH_FUNCTIONS[fnumber].nargs == -1 && - elist_length(args) == 0) - expr_yyerror_more(yyscanner, "at least one argument expected", - PGBENCH_FUNCTIONS[fnumber].fname); - /* special case: case (when ... then ...)+ (else ...)? end */ - if (PGBENCH_FUNCTIONS[fnumber].nargs == -2) - { - int len = elist_length(args); - - /* 'else' branch is always present, but could be a NULL-constant */ - if (len < 3 || len % 2 != 1) - expr_yyerror_more(yyscanner, "odd and >= 3 number of arguments expected", - "case control structure"); + /* validate arguments number including few special cases */ + switch (PGBENCH_FUNCTIONS[fnumber].nargs) + { + /* check at least one arg for least & greatest */ + case PGBENCH_NARGS_VARIABLE: + if (len == 0) + expr_yyerror_more(yyscanner, "at least one argument expected", + PGBENCH_FUNCTIONS[fnumber].fname); + break; + + /* case (when ... then ...)+ (else ...)? end */ + case PGBENCH_NARGS_CASE: + /* 'else' branch is always present, but could be a NULL-constant */ + if (len < 3 || len % 2 != 1) + expr_yyerror_more(yyscanner, + "odd and >= 3 number of arguments expected", + "case control structure"); + break; + + /* hash functions with optional seed argument */ + case PGBENCH_NARGS_HASH: + if (len > 2) + expr_yyerror_more(yyscanner, "unexpected number of arguments", + PGBENCH_FUNCTIONS[fnumber].fname); + + if (len == 1) + { + PgBenchExpr *var = make_variable("default_seed"); + args = make_elist(var, args); + } + break; + + /* common case: positive arguments number */ + default: + Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0); + + if (PGBENCH_FUNCTIONS[fnumber].nargs != len) + expr_yyerror_more(yyscanner, "unexpected number of arguments", + PGBENCH_FUNCTIONS[fnumber].fname); } expr->etype = ENODE_FUNCTION; diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 29d69de4d1..a15aa06b19 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -60,6 +60,14 @@ #define ERRCODE_UNDEFINED_TABLE "42P01" +/* + * Hashing constants + */ +#define FNV_PRIME 0x100000001b3 +#define FNV_OFFSET_BASIS 0xcbf29ce484222325 +#define MM2_MUL 0xc6a4a7935bd1e995 +#define MM2_ROT 47 + /* * Multi-platform pthread implementations */ @@ -915,6 +923,54 @@ getZipfianRand(TState *thread, int64 min, int64 max, double s) : computeHarmonicZipfian(thread, n, s)); } +/* + * FNV-1a hash function + */ +static int64 +getHashFnv1a(int64 val, uint64 seed) +{ + int64 result; + int i; + + result = FNV_OFFSET_BASIS ^ seed; + for (i = 0; i < 8; ++i) + { + int32 octet = val & 0xff; + + val = val >> 8; + result = result ^ octet; + result = result * FNV_PRIME; + } + + return result; +} + +/* + * Murmur2 hash function + * + * Based on original work of Austin Appleby + * https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp + */ +static int64 +getHashMurmur2(int64 val, uint64 seed) +{ + uint64 result = seed ^ (sizeof(int64) * MM2_MUL); + uint64 k = (uint64) val; + + k *= MM2_MUL; + k ^= k >> MM2_ROT; + k *= MM2_MUL; + + result ^= k; + result *= MM2_MUL; + + result ^= result >> MM2_ROT; + result *= MM2_MUL; + result ^= result >> MM2_ROT; + + return (int64) result; +} + /* * Initialize the given SimpleStats struct to all zeroes */ @@ -2211,6 +2267,30 @@ evalStandardFunc(TState *thread, CState *st, return true; } + /* hashing */ + case PGBENCH_HASH_FNV1A: + case PGBENCH_HASH_MURMUR2: + { + int64 val, + seed; + + Assert(nargs == 2); + + if (!coerceToInt(&vargs[0], &val) || + !coerceToInt(&vargs[1], &seed)) + return false; + + if (func == PGBENCH_HASH_MURMUR2) + setIntValue(retval, getHashMurmur2(val, seed)); + else if (func == PGBENCH_HASH_FNV1A) + setIntValue(retval, getHashFnv1a(val, seed)); + else + /* cannot get here */ + Assert(0); + + return true; + } + default: /* cannot get here */ Assert(0); @@ -4963,6 +5043,10 @@ main(int argc, char **argv) exit(1); } + /* set random seed */ + INSTR_TIME_SET_CURRENT(start_time); + srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time)); + if (internal_script_used) { /* @@ -5024,6 +5108,19 @@ main(int argc, char **argv) } } + /* set default seed for hash functions */ + if (lookupVariable(&state[0], "default_seed") == NULL) + { + uint64 seed = ((uint64) (random() & 0xFFFF) << 48) | + ((uint64) (random() & 0xFFFF) << 32) | + ((uint64) (random() & 0xFFFF) << 16) | + (uint64) (random() & 0xFFFF); + + for (i = 0; i < nclients; i++) + if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed)) + exit(1); + } + if (!is_no_vacuum) { fprintf(stderr, "starting vacuum..."); @@ -5041,10 +5138,6 @@ main(int argc, char **argv) } PQfinish(con); - /* set random seed */ - INSTR_TIME_SET_CURRENT(start_time); - srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time)); - /* set up thread data structures */ threads = (TState *) pg_malloc(sizeof(TState) * nthreads); nclients_dealt = 0; diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index 0705ccdf0d..6983865b92 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -97,7 +97,9 @@ typedef enum PgBenchFunction PGBENCH_LE, PGBENCH_LT, PGBENCH_IS, - PGBENCH_CASE + PGBENCH_CASE, + PGBENCH_HASH_FNV1A, + PGBENCH_HASH_MURMUR2 } PgBenchFunction; typedef struct PgBenchExpr PgBenchExpr; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 0c23d2fc58..50cbb23f20 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -259,6 +259,11 @@ pgbench( qr{command=46.: int 46\b}, qr{command=47.: boolean true\b}, qr{command=48.: boolean true\b}, + qr{command=49.: int -5817877081768721676\b}, + qr{command=50.: boolean true\b}, + qr{command=51.: int -7793829335365542153\b}, + qr{command=52.: int -?\d+\b}, + qr{command=53.: boolean true\b}, ], 'pgbench expressions', { '001_pgbench_expressions' => q{-- integer functions @@ -327,6 +332,12 @@ pgbench( \set n6 debug(:n IS NULL AND NOT :f AND :t) -- conditional truth \set cs debug(CASE WHEN 1 THEN TRUE END AND CASE WHEN 1.0 THEN TRUE END AND CASE WHEN :n THEN NULL ELSE TRUE END) +-- hash functions +\set h0 debug(hash(10, 5432)) +\set h1 debug(:h0 = hash_murmur2(10, 5432)) +\set h3 debug(hash_fnv1a(10, 5432)) +\set h4 debug(hash(10)) +\set h5 debug(hash(10) = hash(10, :default_seed)) -- lazy evaluation \set zy 0 \set yz debug(case when :zy = 0 then -1 else (1 / :zy) end) -- 2.40.0