#define LOG_STEP_SECONDS 5 /* seconds between log messages */
#define DEFAULT_NXACTS 10 /* default nxacts */
+#define MIN_GAUSSIAN_THRESHOLD 2.0 /* minimum threshold for gauss */
+
int nxacts = 0; /* number of transactions per client */
int duration = 0; /* duration in seconds */
return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state));
}
+/*
+ * random number generator: exponential distribution from min to max inclusive.
+ * the threshold is so that the density of probability for the last cut-off max
+ * value is exp(-threshold).
+ */
+static int64
+getExponentialRand(TState *thread, int64 min, int64 max, double threshold)
+{
+ double cut, uniform, rand;
+ Assert(threshold > 0.0);
+ cut = exp(-threshold);
+ /* erand in [0, 1), uniform in (0, 1] */
+ uniform = 1.0 - pg_erand48(thread->random_state);
+ /*
+ * inner expresion in (cut, 1] (if threshold > 0),
+ * rand in [0, 1)
+ */
+ Assert((1.0 - cut) != 0.0);
+ rand = - log(cut + (1.0 - cut) * uniform) / threshold;
+ /* return int64 random number within between min and max */
+ return min + (int64)((max - min + 1) * rand);
+}
+
+/* random number generator: gaussian distribution from min to max inclusive */
+static int64
+getGaussianRand(TState *thread, int64 min, int64 max, double threshold)
+{
+ double stdev;
+ double rand;
+
+ /*
+ * Get user specified random number from this loop, with
+ * -threshold < stdev <= threshold
+ *
+ * This loop is executed until the number is in the expected range.
+ *
+ * As the minimum threshold is 2.0, the probability of looping is low:
+ * sqrt(-2 ln(r)) <= 2 => r >= e^{-2} ~ 0.135, then when taking the average
+ * sinus multiplier as 2/pi, we have a 8.6% looping probability in the
+ * worst case. For a 5.0 threshold value, the looping probability
+ * is about e^{-5} * 2 / pi ~ 0.43%.
+ */
+ do
+ {
+ /*
+ * pg_erand48 generates [0,1), but for the basic version of the
+ * Box-Muller transform the two uniformly distributed random numbers
+ * are expected in (0, 1] (see http://en.wikipedia.org/wiki/Box_muller)
+ */
+ double rand1 = 1.0 - pg_erand48(thread->random_state);
+ double rand2 = 1.0 - pg_erand48(thread->random_state);
+
+ /* Box-Muller basic form transform */
+ double var_sqrt = sqrt(-2.0 * log(rand1));
+ stdev = var_sqrt * sin(2.0 * M_PI * rand2);
+
+ /*
+ * we may try with cos, but there may be a bias induced if the previous
+ * value fails the test. To be on the safe side, let us try over.
+ */
+ }
+ while (stdev < -threshold || stdev >= threshold);
+
+ /* stdev is in [-threshold, threshold), normalization to [0,1) */
+ rand = (stdev + threshold) / (threshold * 2.0);
+
+ /* return int64 random number within between min and max */
+ return min + (int64)((max - min + 1) * rand);
+}
+
/* call PQexec() and exit() on failure */
static void
executeStatement(PGconn *con, const char *sql)
char *var;
int64 min,
max;
+ double threshold = 0;
char res[64];
if (*argv[2] == ':')
}
/*
- * getrand() needs to be able to subtract max from min and add one
- * to the result without overflowing. Since we know max > min, we
- * can detect overflow just by checking for a negative result. But
- * we must check both that the subtraction doesn't overflow, and
- * that adding one to the result doesn't overflow either.
+ * Generate random number functions need to be able to subtract
+ * max from min and add one to the result without overflowing.
+ * Since we know max > min, we can detect overflow just by checking
+ * for a negative result. But we must check both that the subtraction
+ * doesn't overflow, and that adding one to the result doesn't overflow either.
*/
if (max - min < 0 || (max - min) + 1 < 0)
{
return true;
}
+ if (argc == 4 || /* uniform without or with "uniform" keyword */
+ (argc == 5 && pg_strcasecmp(argv[4], "uniform") == 0))
+ {
+#ifdef DEBUG
+ printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
+#endif
+ snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
+ }
+ else if (argc == 6 &&
+ ((pg_strcasecmp(argv[4], "gaussian") == 0) ||
+ (pg_strcasecmp(argv[4], "exponential") == 0)))
+ {
+ if (*argv[5] == ':')
+ {
+ if ((var = getVariable(st, argv[5] + 1)) == NULL)
+ {
+ fprintf(stderr, "%s: invalid threshold number %s\n", argv[0], argv[5]);
+ st->ecnt++;
+ return true;
+ }
+ threshold = strtod(var, NULL);
+ }
+ else
+ threshold = strtod(argv[5], NULL);
+
+ if (pg_strcasecmp(argv[4], "gaussian") == 0)
+ {
+ if (threshold < MIN_GAUSSIAN_THRESHOLD)
+ {
+ fprintf(stderr, "%s: gaussian threshold must be at least %f\n,", argv[5], MIN_GAUSSIAN_THRESHOLD);
+ st->ecnt++;
+ return true;
+ }
+#ifdef DEBUG
+ printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianRand(thread, min, max, threshold));
+#endif
+ snprintf(res, sizeof(res), INT64_FORMAT, getGaussianRand(thread, min, max, threshold));
+ }
+ else if (pg_strcasecmp(argv[4], "exponential") == 0)
+ {
+ if (threshold <= 0.0)
+ {
+ fprintf(stderr, "%s: exponential threshold must be strictly positive\n,", argv[5]);
+ st->ecnt++;
+ return true;
+ }
#ifdef DEBUG
- printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
+ printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialRand(thread, min, max, threshold));
#endif
- snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
+ snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, threshold));
+ }
+ }
+ else /* this means an error somewhere in the parsing phase... */
+ {
+ fprintf(stderr, "%s: unexpected arguments\n", argv[0]);
+ st->ecnt++;
+ return true;
+ }
if (!putVariable(st, argv[0], argv[1], res))
{
if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
{
+ /* parsing:
+ * \setrandom variable min max [uniform]
+ * \setrandom variable min max (gaussian|exponential) threshold
+ */
+
if (my_commands->argc < 4)
{
fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]);
exit(1);
}
+ /* argc >= 4 */
- for (j = 4; j < my_commands->argc; j++)
- fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
- my_commands->argv[0], my_commands->argv[j]);
+ if (my_commands->argc == 4 || /* uniform without/with "uniform" keyword */
+ (my_commands->argc == 5 &&
+ pg_strcasecmp(my_commands->argv[4], "uniform") == 0))
+ {
+ /* nothing to do */
+ }
+ else if (/* argc >= 5 */
+ (pg_strcasecmp(my_commands->argv[4], "gaussian") == 0) ||
+ (pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
+ {
+ if (my_commands->argc < 6)
+ {
+ fprintf(stderr, "%s(%s): missing threshold argument\n", my_commands->argv[0], my_commands->argv[4]);
+ exit(1);
+ }
+ else if (my_commands->argc > 6)
+ {
+ fprintf(stderr, "%s(%s): too many arguments (extra:",
+ my_commands->argv[0], my_commands->argv[4]);
+ for (j = 6; j < my_commands->argc; j++)
+ fprintf(stderr, " %s", my_commands->argv[j]);
+ fprintf(stderr, ")\n");
+ exit(1);
+ }
+ }
+ else /* cannot parse, unexpected arguments */
+ {
+ fprintf(stderr, "%s: unexpected arguments (bad:", my_commands->argv[0]);
+ for (j = 4; j < my_commands->argc; j++)
+ fprintf(stderr, " %s", my_commands->argv[j]);
+ fprintf(stderr, ")\n");
+ exit(1);
+ }
}
else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
{
<varlistentry>
<term>
- <literal>\setrandom <replaceable>varname</> <replaceable>min</> <replaceable>max</></literal>
- </term>
+ <literal>\setrandom <replaceable>varname</> <replaceable>min</> <replaceable>max</> [ uniform | [ { gaussian | exponential } <replaceable>threshold</> ] ]</literal>
+ </term>
<listitem>
<para>
having an integer value.
</para>
+ <para>
+ By default, or when <literal>uniform</> is specified, all values in the
+ range are drawn with equal probability. Specifiying <literal>gaussian</>
+ or <literal>exponential</> options modifies this behavior; each
+ requires a mandatory threshold which determines the precise shape of the
+ distribution.
+ </para>
+
+ <para>
+ For a Gaussian distribution, the interval is mapped onto a standard
+ normal distribution (the classical bell-shaped Gaussian curve) truncated
+ at <literal>-threshold</> on the left and <literal>+threshold</>
+ on the right.
+ To be precise, if <literal>PHI(x)</> is the cumulative distribution
+ function of the standard normal distribution, with mean <literal>mu</>
+ defined as <literal>(max + min) / 2.0</>, then value <replaceable>i</>
+ between <replaceable>min</> and <replaceable>max</> inclusive is drawn
+ with probability:
+ <literal>
+ (PHI(2.0 * threshold * (i - min - mu + 0.5) / (max - min + 1)) -
+ PHI(2.0 * threshold * (i - min - mu - 0.5) / (max - min + 1))) /
+ (2.0 * PHI(threshold) - 1.0)
+ </>
+ Intuitively, the larger the <replaceable>threshold</>, the more
+ frequently values close to the middle of the interval are drawn, and the
+ less frequently values close to the <replaceable>min</> and
+ <replaceable>max</> bounds.
+ About 67% of values are drawn from the middle <literal>1.0 / threshold</>
+ and 95% in the middle <literal>2.0 / threshold</>; for instance, if
+ <replaceable>threshold</> is 4.0, 67% of values are drawn from the middle
+ quarter and 95% from the middle half of the interval.
+ The minimum <replaceable>threshold</> is 2.0 for performance of
+ the Box-Muller transform.
+ </para>
+
+ <para>
+ For an exponential distribution, the <replaceable>threshold</>
+ parameter controls the distribution by truncating a quickly-decreasing
+ exponential distribution at <replaceable>threshold</>, and then
+ projecting onto integers between the bounds.
+ To be precise, value <replaceable>i</> between <replaceable>min</> and
+ <replaceable>max</> inclusive is drawn with probability:
+ <literal>(exp(-threshold*(i-min)/(max+1-min)) -
+ exp(-threshold*(i+1-min)/(max+1-min))) / (1.0 - exp(-threshold))</>.
+ Intuitively, the larger the <replaceable>threshold</>, the more
+ frequently values close to <replaceable>min</> are accessed, and the
+ less frequently values close to <replaceable>max</> are accessed.
+ The closer to 0 the threshold, the flatter (more uniform) the access
+ distribution.
+ A crude approximation of the distribution is that the most frequent 1%
+ values in the range, close to <replaceable>min</>, are drawn
+ <replaceable>threshold</>% of the time.
+ The <replaceable>threshold</> value must be strictly positive.
+ </para>
+
<para>
Example:
<programlisting>
-\setrandom aid 1 :naccounts
+\setrandom aid 1 :naccounts gaussian 5.0
</programlisting></para>
</listitem>
</varlistentry>