Commit e1be1df5 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Add --sampling-rate option to pgbench.

This allows logging only some fraction of transactions, greatly reducing
the amount of log generated.

Tomas Vondra, reviewed by Robert Haas and Jeff Janes.
parent 7ae18159
......@@ -129,6 +129,11 @@ int foreign_keys = 0;
*/
int unlogged_tables = 0;
/*
* log sampling rate (1.0 = log everything, 0.0 = option not given)
*/
double sample_rate = 0.0;
/*
* tablespace selection
*/
......@@ -370,6 +375,8 @@ usage(void)
" -f FILENAME read transaction script from FILENAME\n"
" -j NUM number of threads (default: 1)\n"
" -l write transaction times to log file\n"
" --sampling-rate NUM\n"
" fraction of transactions to log (e.g. 0.01 for 1%% sample)\n"
" -M simple|extended|prepared\n"
" protocol for submitting queries to server (default: simple)\n"
" -n do not run VACUUM before tests\n"
......@@ -883,6 +890,14 @@ top:
instr_time diff;
double usec;
/*
* write the log entry if this row belongs to the random sample,
* or no sampling rate was given which means log everything.
*/
if (sample_rate == 0.0 ||
pg_erand48(thread->random_state) <= sample_rate)
{
INSTR_TIME_SET_CURRENT(now);
diff = now;
INSTR_TIME_SUBTRACT(diff, st->txn_begin);
......@@ -899,6 +914,7 @@ top:
st->id, st->cnt, usec, st->use_file);
#endif
}
}
if (commands[st->state]->type == SQL_COMMAND)
{
......@@ -1926,6 +1942,7 @@ main(int argc, char **argv)
{"index-tablespace", required_argument, NULL, 3},
{"tablespace", required_argument, NULL, 2},
{"unlogged-tables", no_argument, &unlogged_tables, 1},
{"sampling-rate", required_argument, NULL, 4},
{NULL, 0, NULL, 0}
};
......@@ -2131,6 +2148,14 @@ main(int argc, char **argv)
case 3: /* index-tablespace */
index_tablespace = optarg;
break;
case 4:
sample_rate = atof(optarg);
if (sample_rate <= 0.0 || sample_rate > 1.0)
{
fprintf(stderr, "invalid sampling rate: %f\n", sample_rate);
exit(1);
}
break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
......@@ -2166,6 +2191,13 @@ main(int argc, char **argv)
exit(1);
}
/* --sampling-rate may be used only with -l */
if (sample_rate > 0.0 && !use_log)
{
fprintf(stderr, "log sampling rate is allowed only when logging transactions (-l) \n");
exit(1);
}
/*
* is_latencies only works with multiple threads in thread-based
* implementations, not fork-based ones, because it supposes that the
......
......@@ -316,6 +316,24 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--sampling-rate</option> <replaceable>rate</></term>
<listitem>
<para>
Sampling rate, used when writing data into the log, to reduce the
amount of log generated. If this option is given, only the specified
fraction of transactions are logged. 1.0 means all transactions will
be logged, 0.05 means only 5% of the transactions will be logged.
</para>
<para>
Remember to take the sampling rate into account when processing the
log file. For example, when computing tps values, you need to multiply
the numbers accordingly (e.g. with 0.01 sample rate, you'll only get
1/100 of the actual tps).
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-M</option> <replaceable>querymode</></term>
<listitem>
......@@ -750,6 +768,12 @@ END;
0 201 2513 0 1175850569 608
0 202 2038 0 1175850569 2663
</screen></para>
<para>
When running a long test on hardware that can handle a lot of transactions,
the log files can become very large. The <option>--sampling-rate</> option
can be used to log only a random sample of transactions.
</para>
</refsect2>
<refsect2>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment