middle quarter (1.0 / 4.0) of the interval (i.e. from
<literal>3.0 / 8.0</literal> to <literal>5.0 / 8.0</literal>) and 95% from
the middle half (<literal>2.0 / 4.0</literal>) of the interval (second and third
- quartiles). The minimum <replaceable>parameter</replaceable> is 2.0 for performance
- of the Box-Muller transform.
+ quartiles). The minimum allowed <replaceable>parameter</replaceable>
+ value is 2.0.
</para>
</listitem>
<listitem>
<para>
- <literal>random_zipfian</literal> generates an approximated bounded Zipfian
- distribution. For <replaceable>parameter</replaceable> in (0, 1), an
- approximated algorithm is taken from
- "Quickly Generating Billion-Record Synthetic Databases",
- Jim Gray et al, SIGMOD 1994. For <replaceable>parameter</replaceable>
- in (1, 1000), a rejection method is used, based on
- "Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
- Springer 1986. The distribution is not defined when the parameter's
- value is 1.0. The function's performance is poor for parameter values
- close and above 1.0 and on a small range.
- </para>
- <para>
+ <literal>random_zipfian</literal> generates a bounded Zipfian
+ distribution.
<replaceable>parameter</replaceable> defines how skewed the distribution
is. The larger the <replaceable>parameter</replaceable>, the more
frequently values closer to the beginning of the interval are drawn.
- The closer to 0 <replaceable>parameter</replaceable> is,
- the flatter (more uniform) the output distribution.
The distribution is such that, assuming the range starts from 1,
the ratio of the probability of drawing <replaceable>k</replaceable>
versus drawing <replaceable>k+1</replaceable> is
itself is produced <literal>(3/2)*2.5 = 2.76</literal> times more
frequently than <literal>3</literal>, and so on.
</para>
+ <para>
+ <application>pgbench</application>'s implementation is based on
+ "Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
+ Springer 1986. Due to limitations of that algorithm,
+ the <replaceable>parameter</replaceable> value is restricted to
+ the range [1.001, 1000].
+ </para>
</listitem>
</itemizedlist>
#define LOG_STEP_SECONDS 5 /* seconds between log messages */
#define DEFAULT_NXACTS 10 /* default nxacts */
-#define ZIPF_CACHE_SIZE 15 /* cache cells number */
-
#define MIN_GAUSSIAN_PARAM 2.0 /* minimum parameter for gauss */
-#define MAX_ZIPFIAN_PARAM 1000 /* maximum parameter for zipfian */
+
+#define MIN_ZIPFIAN_PARAM 1.001 /* minimum parameter for zipfian */
+#define MAX_ZIPFIAN_PARAM 1000.0 /* maximum parameter for zipfian */
int nxacts = 0; /* number of transactions per client */
int duration = 0; /* duration in seconds */
int ecnt; /* error count */
} CState;
-/*
- * Cache cell for random_zipfian call
- */
-typedef struct
-{
- /* cell keys */
- double s; /* s - parameter of random_zipfian function */
- int64 n; /* number of elements in range (max - min + 1) */
-
- double harmonicn; /* generalizedHarmonicNumber(n, s) */
- double alpha;
- double beta;
- double eta;
-
- uint64 last_used; /* last used logical time */
-} ZipfCell;
-
-/*
- * Zipf cache for zeta values
- */
-typedef struct
-{
- uint64 current; /* counter for LRU cache replacement algorithm */
-
- int nb_cells; /* number of filled cells */
- int overflowCount; /* number of cache overflows */
- ZipfCell cells[ZIPF_CACHE_SIZE];
-} ZipfCache;
-
/*
* Thread state
*/
int64 throttle_trigger; /* previous/next throttling (us) */
FILE *logfile; /* where to log, or NULL */
- ZipfCache zipf_cache; /* for thread-safe zipfian random number
- * generation */
/* per thread collected stats */
instr_time start_time; /* thread start time */
return (int64) (-log(uniform) * center + 0.5);
}
-/* helper function for getZipfianRand */
-static double
-generalizedHarmonicNumber(int64 n, double s)
-{
- int i;
- double ans = 0.0;
-
- for (i = n; i > 1; i--)
- ans += pow(i, -s);
- return ans + 1.0;
-}
-
-/* set harmonicn and other parameters to cache cell */
-static void
-zipfSetCacheCell(ZipfCell *cell, int64 n, double s)
-{
- double harmonic2;
-
- cell->n = n;
- cell->s = s;
-
- harmonic2 = generalizedHarmonicNumber(2, s);
- cell->harmonicn = generalizedHarmonicNumber(n, s);
-
- cell->alpha = 1.0 / (1.0 - s);
- cell->beta = pow(0.5, s);
- cell->eta = (1.0 - pow(2.0 / n, 1.0 - s)) / (1.0 - harmonic2 / cell->harmonicn);
-}
-
-/*
- * search for cache cell with keys (n, s)
- * and create new cell if it does not exist
- */
-static ZipfCell *
-zipfFindOrCreateCacheCell(ZipfCache *cache, int64 n, double s)
-{
- int i,
- least_recently_used = 0;
- ZipfCell *cell;
-
- /* search cached cell for given parameters */
- for (i = 0; i < cache->nb_cells; i++)
- {
- cell = &cache->cells[i];
- if (cell->n == n && cell->s == s)
- return &cache->cells[i];
-
- if (cell->last_used < cache->cells[least_recently_used].last_used)
- least_recently_used = i;
- }
-
- /* create new one if it does not exist */
- if (cache->nb_cells < ZIPF_CACHE_SIZE)
- i = cache->nb_cells++;
- else
- {
- /* replace LRU cell if cache is full */
- i = least_recently_used;
- cache->overflowCount++;
- }
-
- zipfSetCacheCell(&cache->cells[i], n, s);
-
- cache->cells[i].last_used = cache->current++;
- return &cache->cells[i];
-}
-
/*
* Computing zipfian using rejection method, based on
* "Non-Uniform Random Variate Generation",
* Luc Devroye, p. 550-551, Springer 1986.
+ *
+ * This works for s > 1.0, but may perform badly for s very close to 1.0.
*/
static int64
computeIterativeZipfian(RandomState *random_state, int64 n, double s)
u,
v;
+ /* Ensure n is sane */
+ if (n <= 1)
+ return 1;
+
while (true)
{
/* random variates */
return (int64) x;
}
-/*
- * Computing zipfian using harmonic numbers, based on algorithm described in
- * "Quickly Generating Billion-Record Synthetic Databases",
- * Jim Gray et al, SIGMOD 1994
- */
-static int64
-computeHarmonicZipfian(ZipfCache *zipf_cache, RandomState *random_state,
- int64 n, double s)
-{
- ZipfCell *cell = zipfFindOrCreateCacheCell(zipf_cache, n, s);
- double uniform = pg_erand48(random_state->xseed);
- double uz = uniform * cell->harmonicn;
-
- if (uz < 1.0)
- return 1;
- if (uz < 1.0 + cell->beta)
- return 2;
- return 1 + (int64) (cell->n * pow(cell->eta * uniform - cell->eta + 1.0, cell->alpha));
-}
-
/* random number generator: zipfian distribution from min to max inclusive */
static int64
-getZipfianRand(ZipfCache *zipf_cache, RandomState *random_state, int64 min,
- int64 max, double s)
+getZipfianRand(RandomState *random_state, int64 min, int64 max, double s)
{
int64 n = max - min + 1;
/* abort if parameter is invalid */
- Assert(s > 0.0 && s != 1.0 && s <= MAX_ZIPFIAN_PARAM);
+ Assert(MIN_ZIPFIAN_PARAM <= s && s <= MAX_ZIPFIAN_PARAM);
- return min - 1 + ((s > 1)
- ? computeIterativeZipfian(random_state, n, s)
- : computeHarmonicZipfian(zipf_cache, random_state, n, s));
+ return min - 1 + computeIterativeZipfian(random_state, n, s);
}
/*
}
else if (func == PGBENCH_RANDOM_ZIPFIAN)
{
- if (param <= 0.0 || param == 1.0 || param > MAX_ZIPFIAN_PARAM)
+ if (param < MIN_ZIPFIAN_PARAM || param > MAX_ZIPFIAN_PARAM)
{
fprintf(stderr,
- "zipfian parameter must be in range (0, 1) U (1, %d]"
- " (got %f)\n", MAX_ZIPFIAN_PARAM, param);
+ "zipfian parameter must be in range [%.3f, %.0f]"
+ " (not %f)\n",
+ MIN_ZIPFIAN_PARAM, MAX_ZIPFIAN_PARAM, param);
return false;
}
+
setIntValue(retval,
- getZipfianRand(&thread->zipf_cache,
- &st->cs_func_rs,
- imin, imax, param));
+ getZipfianRand(&st->cs_func_rs, imin, imax, param));
}
else /* exponential */
{
{
fprintf(stderr,
"exponential parameter must be greater than zero"
- " (got %f)\n", param);
+ " (not %f)\n", param);
return false;
}
tps_include,
tps_exclude;
int64 ntx = total->cnt - total->skipped;
- int i,
- totalCacheOverflows = 0;
time_include = INSTR_TIME_GET_DOUBLE(total_time);
printf("number of transactions actually processed: " INT64_FORMAT "\n",
ntx);
}
- /* Report zipfian cache overflow */
- for (i = 0; i < nthreads; i++)
- {
- totalCacheOverflows += threads[i].zipf_cache.overflowCount;
- }
- if (totalCacheOverflows > 0)
- {
- printf("zipfian cache array overflowed %d time(s)\n", totalCacheOverflows);
- }
/* Remaining stats are nonsensical if we failed to execute any xacts */
if (total->cnt <= 0)
initRandomState(&thread->ts_sample_rs);
thread->logfile = NULL; /* filled in later */
thread->latency_late = 0;
- thread->zipf_cache.nb_cells = 0;
- thread->zipf_cache.current = 0;
- thread->zipf_cache.overflowCount = 0;
initStats(&thread->stats, 0);
nclients_dealt += thread->nstate;
[
'set zipfian param to 1',
2,
- [qr{zipfian parameter must be in range \(0, 1\) U \(1, \d+\]}],
+ [qr{zipfian parameter must be in range \[1\.001, 1000\]}],
q{\set i random_zipfian(0, 10, 1)}
],
[
'set zipfian param too large',
2,
- [qr{zipfian parameter must be in range \(0, 1\) U \(1, \d+\]}],
+ [qr{zipfian parameter must be in range \[1\.001, 1000\]}],
q{\set i random_zipfian(0, 10, 1000000)}
],
[
{ $n => $script });
}
-# zipfian cache array overflow
-pgbench(
- '-t 1', 0,
- [ qr{processed: 1/1}, qr{zipfian cache array overflowed 1 time\(s\)} ],
- [qr{^}],
- 'pgbench zipfian array overflow on random_zipfian',
- {
- '001_pgbench_random_zipfian' => q{
-\set i random_zipfian(1, 100, 0.5)
-\set i random_zipfian(2, 100, 0.5)
-\set i random_zipfian(3, 100, 0.5)
-\set i random_zipfian(4, 100, 0.5)
-\set i random_zipfian(5, 100, 0.5)
-\set i random_zipfian(6, 100, 0.5)
-\set i random_zipfian(7, 100, 0.5)
-\set i random_zipfian(8, 100, 0.5)
-\set i random_zipfian(9, 100, 0.5)
-\set i random_zipfian(10, 100, 0.5)
-\set i random_zipfian(11, 100, 0.5)
-\set i random_zipfian(12, 100, 0.5)
-\set i random_zipfian(13, 100, 0.5)
-\set i random_zipfian(14, 100, 0.5)
-\set i random_zipfian(15, 100, 0.5)
-\set i random_zipfian(16, 100, 0.5)
-}
- });
-
# throttling
pgbench(
'-t 100 -S --rate=100000 --latency-limit=1000000 -c 2 -n -r',