Commit e1be1df5 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Add --sampling-rate option to pgbench.

This allows logging only some fraction of transactions, greatly reducing
the amount of log generated.

Tomas Vondra, reviewed by Robert Haas and Jeff Janes.
parent 7ae18159
...@@ -129,6 +129,11 @@ int foreign_keys = 0; ...@@ -129,6 +129,11 @@ int foreign_keys = 0;
*/ */
int unlogged_tables = 0; int unlogged_tables = 0;
/*
* log sampling rate (1.0 = log everything, 0.0 = option not given)
*/
double sample_rate = 0.0;
/* /*
* tablespace selection * tablespace selection
*/ */
...@@ -370,6 +375,8 @@ usage(void) ...@@ -370,6 +375,8 @@ usage(void)
" -f FILENAME read transaction script from FILENAME\n" " -f FILENAME read transaction script from FILENAME\n"
" -j NUM number of threads (default: 1)\n" " -j NUM number of threads (default: 1)\n"
" -l write transaction times to log file\n" " -l write transaction times to log file\n"
" --sampling-rate NUM\n"
" fraction of transactions to log (e.g. 0.01 for 1%% sample)\n"
" -M simple|extended|prepared\n" " -M simple|extended|prepared\n"
" protocol for submitting queries to server (default: simple)\n" " protocol for submitting queries to server (default: simple)\n"
" -n do not run VACUUM before tests\n" " -n do not run VACUUM before tests\n"
...@@ -883,21 +890,30 @@ top: ...@@ -883,21 +890,30 @@ top:
instr_time diff; instr_time diff;
double usec; double usec;
INSTR_TIME_SET_CURRENT(now); /*
diff = now; * write the log entry if this row belongs to the random sample,
INSTR_TIME_SUBTRACT(diff, st->txn_begin); * or no sampling rate was given which means log everything.
usec = (double) INSTR_TIME_GET_MICROSEC(diff); */
if (sample_rate == 0.0 ||
pg_erand48(thread->random_state) <= sample_rate)
{
INSTR_TIME_SET_CURRENT(now);
diff = now;
INSTR_TIME_SUBTRACT(diff, st->txn_begin);
usec = (double) INSTR_TIME_GET_MICROSEC(diff);
#ifndef WIN32 #ifndef WIN32
/* This is more than we really ought to know about instr_time */ /* This is more than we really ought to know about instr_time */
fprintf(logfile, "%d %d %.0f %d %ld %ld\n", fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
st->id, st->cnt, usec, st->use_file, st->id, st->cnt, usec, st->use_file,
(long) now.tv_sec, (long) now.tv_usec); (long) now.tv_sec, (long) now.tv_usec);
#else #else
/* On Windows, instr_time doesn't provide a timestamp anyway */ /* On Windows, instr_time doesn't provide a timestamp anyway */
fprintf(logfile, "%d %d %.0f %d 0 0\n", fprintf(logfile, "%d %d %.0f %d 0 0\n",
st->id, st->cnt, usec, st->use_file); st->id, st->cnt, usec, st->use_file);
#endif #endif
}
} }
if (commands[st->state]->type == SQL_COMMAND) if (commands[st->state]->type == SQL_COMMAND)
...@@ -1926,6 +1942,7 @@ main(int argc, char **argv) ...@@ -1926,6 +1942,7 @@ main(int argc, char **argv)
{"index-tablespace", required_argument, NULL, 3}, {"index-tablespace", required_argument, NULL, 3},
{"tablespace", required_argument, NULL, 2}, {"tablespace", required_argument, NULL, 2},
{"unlogged-tables", no_argument, &unlogged_tables, 1}, {"unlogged-tables", no_argument, &unlogged_tables, 1},
{"sampling-rate", required_argument, NULL, 4},
{NULL, 0, NULL, 0} {NULL, 0, NULL, 0}
}; };
...@@ -2131,6 +2148,14 @@ main(int argc, char **argv) ...@@ -2131,6 +2148,14 @@ main(int argc, char **argv)
case 3: /* index-tablespace */ case 3: /* index-tablespace */
index_tablespace = optarg; index_tablespace = optarg;
break; break;
case 4:
sample_rate = atof(optarg);
if (sample_rate <= 0.0 || sample_rate > 1.0)
{
fprintf(stderr, "invalid sampling rate: %f\n", sample_rate);
exit(1);
}
break;
default: default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1); exit(1);
...@@ -2166,6 +2191,13 @@ main(int argc, char **argv) ...@@ -2166,6 +2191,13 @@ main(int argc, char **argv)
exit(1); exit(1);
} }
/* --sampling-rate may be used only with -l */
if (sample_rate > 0.0 && !use_log)
{
fprintf(stderr, "log sampling rate is allowed only when logging transactions (-l) \n");
exit(1);
}
/* /*
* is_latencies only works with multiple threads in thread-based * is_latencies only works with multiple threads in thread-based
* implementations, not fork-based ones, because it supposes that the * implementations, not fork-based ones, because it supposes that the
......
...@@ -316,6 +316,24 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</> ...@@ -316,6 +316,24 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term><option>--sampling-rate</option> <replaceable>rate</></term>
<listitem>
<para>
Sampling rate, used when writing data into the log, to reduce the
amount of log generated. If this option is given, only the specified
fraction of transactions are logged. 1.0 means all transactions will
be logged, 0.05 means only 5% of the transactions will be logged.
</para>
<para>
Remember to take the sampling rate into account when processing the
log file. For example, when computing tps values, you need to multiply
the numbers accordingly (e.g. with 0.01 sample rate, you'll only get
1/100 of the actual tps).
</para>
</listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term><option>-M</option> <replaceable>querymode</></term> <term><option>-M</option> <replaceable>querymode</></term>
<listitem> <listitem>
...@@ -750,6 +768,12 @@ END; ...@@ -750,6 +768,12 @@ END;
0 201 2513 0 1175850569 608 0 201 2513 0 1175850569 608
0 202 2038 0 1175850569 2663 0 202 2038 0 1175850569 2663
</screen></para> </screen></para>
<para>
When running a long test on hardware that can handle a lot of transactions,
the log files can become very large. The <option>--sampling-rate</> option
can be used to log only a random sample of transactions.
</para>
</refsect2> </refsect2>
<refsect2> <refsect2>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment