Commit e6c3ba7f authored by Tom Lane's avatar Tom Lane

Fix portability problem in pgbench.

The pgbench regression test supposed that srandom() with a specific value
would result in deterministic output from random(), as required by POSIX.
It emerges however that OpenBSD is too smart to be constrained by mere
standards, so their random() emits nondeterministic output anyway.
While a workaround does exist, what seems like a better fix is to stop
relying on the platform's srandom()/random() altogether, so that what
you get from --random-seed=N is not merely deterministic but platform
independent.  Hence, use a separate pg_jrand48() random sequence in
place of random().

Also adjust the regression test case that's supposed to detect
nondeterminism so that it's more likely to detect it; the original
choice of random_zipfian parameter tended to produce the same output
all the time even if the underlying behavior wasn't deterministic.

In passing, improve pgbench's docs about random_zipfian().

Back-patch to v11 where this code was introduced.

Fabien Coelho and Tom Lane

Discussion: https://postgr.es/m/4615.1547792324@sss.pgh.pa.us
parent 19184fcc
...@@ -1604,15 +1604,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / ...@@ -1604,15 +1604,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
in (1, 1000), a rejection method is used, based on in (1, 1000), a rejection method is used, based on
"Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551, "Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
Springer 1986. The distribution is not defined when the parameter's Springer 1986. The distribution is not defined when the parameter's
value is 1.0. The drawing performance is poor for parameter values value is 1.0. The function's performance is poor for parameter values
close and above 1.0 and on a small range. close and above 1.0 and on a small range.
</para> </para>
<para> <para>
<replaceable>parameter</replaceable> <replaceable>parameter</replaceable> defines how skewed the distribution
defines how skewed the distribution is. The larger the <replaceable>parameter</replaceable>, the more is. The larger the <replaceable>parameter</replaceable>, the more
frequently values to the beginning of the interval are drawn. frequently values closer to the beginning of the interval are drawn.
The closer to 0 <replaceable>parameter</replaceable> is, The closer to 0 <replaceable>parameter</replaceable> is,
the flatter (more uniform) the access distribution. the flatter (more uniform) the output distribution.
The distribution is such that, assuming the range starts from 1,
the ratio of the probability of drawing <replaceable>k</replaceable>
versus drawing <replaceable>k+1</replaceable> is
<literal>((<replaceable>k</replaceable>+1)/<replaceable>k</replaceable>)**<replaceable>parameter</replaceable></literal>.
For example, <literal>random_zipfian(1, ..., 2.5)</literal> produces
the value <literal>1</literal> about <literal>(2/1)**2.5 =
5.66</literal> times more frequently than <literal>2</literal>, which
itself is produced <literal>(3/2)*2.5 = 2.76</literal> times more
frequently than <literal>3</literal>, and so on.
</para> </para>
</listitem> </listitem>
</itemizedlist> </itemizedlist>
......
...@@ -185,7 +185,7 @@ int64 latency_limit = 0; ...@@ -185,7 +185,7 @@ int64 latency_limit = 0;
char *tablespace = NULL; char *tablespace = NULL;
char *index_tablespace = NULL; char *index_tablespace = NULL;
/* random seed used when calling srandom() */ /* random seed used to initialize base_random_sequence */
int64 random_seed = -1; int64 random_seed = -1;
/* /*
...@@ -287,6 +287,9 @@ typedef struct RandomState ...@@ -287,6 +287,9 @@ typedef struct RandomState
unsigned short xseed[3]; unsigned short xseed[3];
} RandomState; } RandomState;
/* Various random sequences are initialized from this one. */
static RandomState base_random_sequence;
/* /*
* Connection state machine states. * Connection state machine states.
*/ */
...@@ -833,16 +836,28 @@ strtodouble(const char *str, bool errorOK, double *dv) ...@@ -833,16 +836,28 @@ strtodouble(const char *str, bool errorOK, double *dv)
/* /*
* Initialize a random state struct. * Initialize a random state struct.
*
* We derive the seed from base_random_sequence, which must be set up already.
*/ */
static void static void
initRandomState(RandomState *random_state) initRandomState(RandomState *random_state)
{ {
random_state->xseed[0] = random(); random_state->xseed[0] = (unsigned short)
random_state->xseed[1] = random(); (pg_jrand48(base_random_sequence.xseed) & 0xFFFF);
random_state->xseed[2] = random(); random_state->xseed[1] = (unsigned short)
(pg_jrand48(base_random_sequence.xseed) & 0xFFFF);
random_state->xseed[2] = (unsigned short)
(pg_jrand48(base_random_sequence.xseed) & 0xFFFF);
} }
/* random number generator: uniform distribution from min to max inclusive */ /*
* Random number generator: uniform distribution from min to max inclusive.
*
* Although the limits are expressed as int64, you can't generate the full
* int64 range in one call, because the difference of the limits mustn't
* overflow int64. In practice it's unwise to ask for more than an int32
* range, because of the limited precision of pg_erand48().
*/
static int64 static int64
getrand(RandomState *random_state, int64 min, int64 max) getrand(RandomState *random_state, int64 min, int64 max)
{ {
...@@ -5126,12 +5141,14 @@ printResults(TState *threads, StatsData *total, instr_time total_time, ...@@ -5126,12 +5141,14 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
} }
} }
/* call srandom based on some seed. NULL triggers the default behavior. */ /*
* Set up a random seed according to seed parameter (NULL means default),
* and initialize base_random_sequence for use in initializing other sequences.
*/
static bool static bool
set_random_seed(const char *seed) set_random_seed(const char *seed)
{ {
/* srandom expects an unsigned int */ uint64 iseed;
unsigned int iseed;
if (seed == NULL || strcmp(seed, "time") == 0) if (seed == NULL || strcmp(seed, "time") == 0)
{ {
...@@ -5139,7 +5156,7 @@ set_random_seed(const char *seed) ...@@ -5139,7 +5156,7 @@ set_random_seed(const char *seed)
instr_time now; instr_time now;
INSTR_TIME_SET_CURRENT(now); INSTR_TIME_SET_CURRENT(now);
iseed = (unsigned int) INSTR_TIME_GET_MICROSEC(now); iseed = (uint64) INSTR_TIME_GET_MICROSEC(now);
} }
else if (strcmp(seed, "rand") == 0) else if (strcmp(seed, "rand") == 0)
{ {
...@@ -5155,7 +5172,7 @@ set_random_seed(const char *seed) ...@@ -5155,7 +5172,7 @@ set_random_seed(const char *seed)
/* parse seed unsigned int value */ /* parse seed unsigned int value */
char garbage; char garbage;
if (sscanf(seed, "%u%c", &iseed, &garbage) != 1) if (sscanf(seed, UINT64_FORMAT "%c", &iseed, &garbage) != 1)
{ {
fprintf(stderr, fprintf(stderr,
"unrecognized random seed option \"%s\": expecting an unsigned integer, \"time\" or \"rand\"\n", "unrecognized random seed option \"%s\": expecting an unsigned integer, \"time\" or \"rand\"\n",
...@@ -5165,10 +5182,14 @@ set_random_seed(const char *seed) ...@@ -5165,10 +5182,14 @@ set_random_seed(const char *seed)
} }
if (seed != NULL) if (seed != NULL)
fprintf(stderr, "setting random seed to %u\n", iseed); fprintf(stderr, "setting random seed to " UINT64_FORMAT "\n", iseed);
srandom(iseed);
/* no precision loss: 32 bit unsigned int cast to 64 bit int */
random_seed = iseed; random_seed = iseed;
/* Fill base_random_sequence with low-order bits of seed */
base_random_sequence.xseed[0] = iseed & 0xFFFF;
base_random_sequence.xseed[1] = (iseed >> 16) & 0xFFFF;
base_random_sequence.xseed[2] = (iseed >> 32) & 0xFFFF;
return true; return true;
} }
...@@ -5862,10 +5883,9 @@ main(int argc, char **argv) ...@@ -5862,10 +5883,9 @@ main(int argc, char **argv)
/* set default seed for hash functions */ /* set default seed for hash functions */
if (lookupVariable(&state[0], "default_seed") == NULL) if (lookupVariable(&state[0], "default_seed") == NULL)
{ {
uint64 seed = ((uint64) (random() & 0xFFFF) << 48) | uint64 seed =
((uint64) (random() & 0xFFFF) << 32) | ((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) |
((uint64) (random() & 0xFFFF) << 16) | (((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) << 32);
(uint64) (random() & 0xFFFF);
for (i = 0; i < nclients; i++) for (i = 0; i < nclients; i++)
if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed)) if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed))
......
...@@ -259,11 +259,11 @@ pgbench( ...@@ -259,11 +259,11 @@ pgbench(
[ [
qr{setting random seed to 5432\b}, qr{setting random seed to 5432\b},
# After explicit seeding, the four * random checks (1-3,20) should be # After explicit seeding, the four random checks (1-3,20) are
# deterministic, but not necessarily portable. # deterministic
qr{command=1.: int 1\d\b}, # uniform random: 12 on linux qr{command=1.: int 13\b}, # uniform random
qr{command=2.: int 1\d\d\b}, # exponential random: 106 on linux qr{command=2.: int 116\b}, # exponential random
qr{command=3.: int 1\d\d\d\b}, # gaussian random: 1462 on linux qr{command=3.: int 1498\b}, # gaussian random
qr{command=4.: int 4\b}, qr{command=4.: int 4\b},
qr{command=5.: int 5\b}, qr{command=5.: int 5\b},
qr{command=6.: int 6\b}, qr{command=6.: int 6\b},
...@@ -276,7 +276,7 @@ pgbench( ...@@ -276,7 +276,7 @@ pgbench(
qr{command=15.: double 15\b}, qr{command=15.: double 15\b},
qr{command=16.: double 16\b}, qr{command=16.: double 16\b},
qr{command=17.: double 17\b}, qr{command=17.: double 17\b},
qr{command=20.: int \d\b}, # zipfian random: 1 on linux qr{command=20.: int 1\b}, # zipfian random
qr{command=21.: double -27\b}, qr{command=21.: double -27\b},
qr{command=22.: double 1024\b}, qr{command=22.: double 1024\b},
qr{command=23.: double 1\b}, qr{command=23.: double 1\b},
...@@ -471,7 +471,7 @@ for my $i (1, 2) ...@@ -471,7 +471,7 @@ for my $i (1, 2)
\set ur random(1000, 1999) \set ur random(1000, 1999)
\set er random_exponential(2000, 2999, 2.0) \set er random_exponential(2000, 2999, 2.0)
\set gr random_gaussian(3000, 3999, 3.0) \set gr random_gaussian(3000, 3999, 3.0)
\set zr random_zipfian(4000, 4999, 2.5) \set zr random_zipfian(4000, 4999, 1.5)
INSERT INTO seeded_random(seed, rand, val) VALUES INSERT INTO seeded_random(seed, rand, val) VALUES
(:random_seed, 'uniform', :ur), (:random_seed, 'uniform', :ur),
(:random_seed, 'exponential', :er), (:random_seed, 'exponential', :er),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment