Commit 02e3bcc6 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Change the way latency is calculated with pgbench --rate option.

The reported latency values now include the "schedule lag" time, that is,
the time between the transaction's scheduled start time and the time it
actually started. This relates better to a model where requests arrive at a
certain rate, and we are interested in the response time to the end user or
application, rather than the response time of the database itself.

Also, when --rate is used, include the schedule lag time in the log output.

The --rate option is new in 9.4, so backpatch to 9.4. It seems better to
make this change in 9.4, while we're still in the beta period, than ship a
9.4 version that calculates the values differently than 9.5.
parent 75717ce8
...@@ -210,10 +210,10 @@ typedef struct ...@@ -210,10 +210,10 @@ typedef struct
* sent */ * sent */
int sleeping; /* 1 indicates that the client is napping */ int sleeping; /* 1 indicates that the client is napping */
bool throttling; /* whether nap is for throttling */ bool throttling; /* whether nap is for throttling */
int64 until; /* napping until (usec) */
Variable *variables; /* array of variable definitions */ Variable *variables; /* array of variable definitions */
int nvariables; int nvariables;
instr_time txn_begin; /* used for measuring transaction latencies */ int64 txn_scheduled; /* scheduled start time of transaction (usec) */
instr_time txn_begin; /* used for measuring schedule lag times */
instr_time stmt_begin; /* used for measuring statement latencies */ instr_time stmt_begin; /* used for measuring statement latencies */
int64 txn_latencies; /* cumulated latencies */ int64 txn_latencies; /* cumulated latencies */
int64 txn_sqlats; /* cumulated square latencies */ int64 txn_sqlats; /* cumulated square latencies */
...@@ -284,12 +284,17 @@ typedef struct ...@@ -284,12 +284,17 @@ typedef struct
long start_time; /* when does the interval start */ long start_time; /* when does the interval start */
int cnt; /* number of transactions */ int cnt; /* number of transactions */
double min_duration; /* min/max durations */
double max_duration; double min_latency; /* min/max latencies */
double sum; /* sum(duration), sum(duration^2) - for double max_latency;
double sum_latency; /* sum(latency), sum(latency^2) - for
* estimates */ * estimates */
double sum2; double sum2_latency;
double min_lag;
double max_lag;
double sum_lag; /* sum(lag) */
double sum2_lag; /* sum(lag*lag) */
} AggVals; } AggVals;
static Command **sql_files[MAX_FILES]; /* SQL script files */ static Command **sql_files[MAX_FILES]; /* SQL script files */
...@@ -968,12 +973,18 @@ agg_vals_init(AggVals *aggs, instr_time start) ...@@ -968,12 +973,18 @@ agg_vals_init(AggVals *aggs, instr_time start)
{ {
/* basic counters */ /* basic counters */
aggs->cnt = 0; /* number of transactions */ aggs->cnt = 0; /* number of transactions */
aggs->sum = 0; /* SUM(duration) */ aggs->sum_latency = 0; /* SUM(latency) */
aggs->sum2 = 0; /* SUM(duration*duration) */ aggs->sum2_latency = 0; /* SUM(latency*latency) */
/* min and max transaction duration */ /* min and max transaction duration */
aggs->min_duration = 0; aggs->min_latency = 0;
aggs->max_duration = 0; aggs->max_latency = 0;
/* schedule lag counters */
aggs->sum_lag = 0;
aggs->sum2_lag = 0;
aggs->min_lag = 0;
aggs->max_lag = 0;
/* start of the current interval */ /* start of the current interval */
aggs->start_time = INSTR_TIME_GET_DOUBLE(start); aggs->start_time = INSTR_TIME_GET_DOUBLE(start);
...@@ -1016,7 +1027,7 @@ top: ...@@ -1016,7 +1027,7 @@ top:
thread->throttle_trigger += wait; thread->throttle_trigger += wait;
st->until = thread->throttle_trigger; st->txn_scheduled = thread->throttle_trigger;
st->sleeping = 1; st->sleeping = 1;
st->throttling = true; st->throttling = true;
st->is_throttled = true; st->is_throttled = true;
...@@ -1032,13 +1043,13 @@ top: ...@@ -1032,13 +1043,13 @@ top:
INSTR_TIME_SET_CURRENT(now); INSTR_TIME_SET_CURRENT(now);
now_us = INSTR_TIME_GET_MICROSEC(now); now_us = INSTR_TIME_GET_MICROSEC(now);
if (st->until <= now_us) if (st->txn_scheduled <= now_us)
{ {
st->sleeping = 0; /* Done sleeping, go ahead with next command */ st->sleeping = 0; /* Done sleeping, go ahead with next command */
if (st->throttling) if (st->throttling)
{ {
/* Measure lag of throttled transaction relative to target */ /* Measure lag of throttled transaction relative to target */
int64 lag = now_us - st->until; int64 lag = now_us - st->txn_scheduled;
thread->throttle_lag += lag; thread->throttle_lag += lag;
if (lag > thread->throttle_lag_max) if (lag > thread->throttle_lag_max)
...@@ -1052,6 +1063,11 @@ top: ...@@ -1052,6 +1063,11 @@ top:
if (st->listen) if (st->listen)
{ /* are we receiver? */ { /* are we receiver? */
instr_time now;
bool now_valid = false;
INSTR_TIME_SET_ZERO(now); /* initialize to keep compiler quiet */
if (commands[st->state]->type == SQL_COMMAND) if (commands[st->state]->type == SQL_COMMAND)
{ {
if (debug) if (debug)
...@@ -1071,10 +1087,13 @@ top: ...@@ -1071,10 +1087,13 @@ top:
*/ */
if (is_latencies) if (is_latencies)
{ {
instr_time now;
int cnum = commands[st->state]->command_num; int cnum = commands[st->state]->command_num;
INSTR_TIME_SET_CURRENT(now); if (!now_valid)
{
INSTR_TIME_SET_CURRENT(now);
now_valid = true;
}
INSTR_TIME_ACCUM_DIFF(thread->exec_elapsed[cnum], INSTR_TIME_ACCUM_DIFF(thread->exec_elapsed[cnum],
now, st->stmt_begin); now, st->stmt_begin);
thread->exec_count[cnum]++; thread->exec_count[cnum]++;
...@@ -1083,12 +1102,16 @@ top: ...@@ -1083,12 +1102,16 @@ top:
/* transaction finished: record latency under progress or throttling */ /* transaction finished: record latency under progress or throttling */
if ((progress || throttle_delay) && commands[st->state + 1] == NULL) if ((progress || throttle_delay) && commands[st->state + 1] == NULL)
{ {
instr_time diff;
int64 latency; int64 latency;
INSTR_TIME_SET_CURRENT(diff); if (!now_valid)
INSTR_TIME_SUBTRACT(diff, st->txn_begin); {
latency = INSTR_TIME_GET_MICROSEC(diff); INSTR_TIME_SET_CURRENT(now);
now_valid = true;
}
latency = INSTR_TIME_GET_MICROSEC(now) - st->txn_scheduled;
st->txn_latencies += latency; st->txn_latencies += latency;
/* /*
...@@ -1106,9 +1129,8 @@ top: ...@@ -1106,9 +1129,8 @@ top:
*/ */
if (logfile && commands[st->state + 1] == NULL) if (logfile && commands[st->state + 1] == NULL)
{ {
instr_time now; double lag;
instr_time diff; double latency;
double usec;
/* /*
* write the log entry if this row belongs to the random sample, * write the log entry if this row belongs to the random sample,
...@@ -1117,10 +1139,13 @@ top: ...@@ -1117,10 +1139,13 @@ top:
if (sample_rate == 0.0 || if (sample_rate == 0.0 ||
pg_erand48(thread->random_state) <= sample_rate) pg_erand48(thread->random_state) <= sample_rate)
{ {
INSTR_TIME_SET_CURRENT(now); if (!now_valid)
diff = now; {
INSTR_TIME_SUBTRACT(diff, st->txn_begin); INSTR_TIME_SET_CURRENT(now);
usec = (double) INSTR_TIME_GET_MICROSEC(diff); now_valid = true;
}
latency = (double) (INSTR_TIME_GET_MICROSEC(now) - st->txn_scheduled);
lag = (double) (INSTR_TIME_GET_MICROSEC(st->txn_begin) - st->txn_scheduled);
/* should we aggregate the results or not? */ /* should we aggregate the results or not? */
if (agg_interval > 0) if (agg_interval > 0)
...@@ -1132,15 +1157,27 @@ top: ...@@ -1132,15 +1157,27 @@ top:
if (agg->start_time + agg_interval >= INSTR_TIME_GET_DOUBLE(now)) if (agg->start_time + agg_interval >= INSTR_TIME_GET_DOUBLE(now))
{ {
agg->cnt += 1; agg->cnt += 1;
agg->sum += usec; agg->sum_latency += latency;
agg->sum2 += usec * usec; agg->sum2_latency += latency * latency;
/* first in this aggregation interval */ /* first in this aggregation interval */
if ((agg->cnt == 1) || (usec < agg->min_duration)) if ((agg->cnt == 1) || (latency < agg->min_latency))
agg->min_duration = usec; agg->min_latency = latency;
if ((agg->cnt == 1) || (latency > agg->max_latency))
agg->max_latency = latency;
/* and the same for schedule lag */
if (throttle_delay)
{
agg->sum_lag += lag;
agg->sum2_lag += lag * lag;
if ((agg->cnt == 1) || (usec > agg->max_duration)) if ((agg->cnt == 1) || (lag < agg->min_lag))
agg->max_duration = usec; agg->min_lag = lag;
if ((agg->cnt == 1) || (lag > agg->max_lag))
agg->max_lag = lag;
}
} }
else else
{ {
...@@ -1156,23 +1193,34 @@ top: ...@@ -1156,23 +1193,34 @@ top:
* ifdef in usage), so we don't need to handle * ifdef in usage), so we don't need to handle
* this in a special way (see below). * this in a special way (see below).
*/ */
fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f\n", fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f",
agg->start_time, agg->start_time,
agg->cnt, agg->cnt,
agg->sum, agg->sum_latency,
agg->sum2, agg->sum2_latency,
agg->min_duration, agg->min_latency,
agg->max_duration); agg->max_latency);
if (throttle_delay)
fprintf(logfile, " %.0f %.0f %.0f %.0f",
agg->sum_lag,
agg->sum2_lag,
agg->min_lag,
agg->max_lag);
fputc('\n', logfile);
/* move to the next inteval */ /* move to the next inteval */
agg->start_time = agg->start_time + agg_interval; agg->start_time = agg->start_time + agg_interval;
/* reset for "no transaction" intervals */ /* reset for "no transaction" intervals */
agg->cnt = 0; agg->cnt = 0;
agg->min_duration = 0; agg->min_latency = 0;
agg->max_duration = 0; agg->max_latency = 0;
agg->sum = 0; agg->sum_latency = 0;
agg->sum2 = 0; agg->sum2_latency = 0;
agg->min_lag = 0;
agg->max_lag = 0;
agg->sum_lag = 0;
agg->sum2_lag = 0;
} }
/* /*
...@@ -1180,10 +1228,14 @@ top: ...@@ -1180,10 +1228,14 @@ top:
* current) * current)
*/ */
agg->cnt = 1; agg->cnt = 1;
agg->min_duration = usec; agg->min_latency = latency;
agg->max_duration = usec; agg->max_latency = latency;
agg->sum = usec; agg->sum_latency = latency;
agg->sum2 = usec * usec; agg->sum2_latency = latency * latency;
agg->min_lag = lag;
agg->max_lag = lag;
agg->sum_lag = lag;
agg->sum2_lag = lag * lag;
} }
} }
else else
...@@ -1195,8 +1247,8 @@ top: ...@@ -1195,8 +1247,8 @@ top:
* This is more than we really ought to know about * This is more than we really ought to know about
* instr_time * instr_time
*/ */
fprintf(logfile, "%d %d %.0f %d %ld %ld\n", fprintf(logfile, "%d %d %.0f %d %ld %ld",
st->id, st->cnt, usec, st->use_file, st->id, st->cnt, latency, st->use_file,
(long) now.tv_sec, (long) now.tv_usec); (long) now.tv_sec, (long) now.tv_usec);
#else #else
...@@ -1204,9 +1256,12 @@ top: ...@@ -1204,9 +1256,12 @@ top:
* On Windows, instr_time doesn't provide a timestamp * On Windows, instr_time doesn't provide a timestamp
* anyway * anyway
*/ */
fprintf(logfile, "%d %d %.0f %d 0 0\n", fprintf(logfile, "%d %d %.0f %d 0 0",
st->id, st->cnt, usec, st->use_file); st->id, st->cnt, usec, st->use_file);
#endif #endif
if (throttle_delay)
fprintf(logfile, " %.0f", lag);
fputc('\n', logfile);
} }
} }
} }
...@@ -1295,8 +1350,17 @@ top: ...@@ -1295,8 +1350,17 @@ top:
/* Record transaction start time under logging, progress or throttling */ /* Record transaction start time under logging, progress or throttling */
if ((logfile || progress || throttle_delay) && st->state == 0) if ((logfile || progress || throttle_delay) && st->state == 0)
{
INSTR_TIME_SET_CURRENT(st->txn_begin); INSTR_TIME_SET_CURRENT(st->txn_begin);
/*
* When not throttling, this is also the transaction's scheduled start
* time.
*/
if (!throttle_delay)
st->txn_scheduled = INSTR_TIME_GET_MICROSEC(st->txn_begin);
}
/* Record statement start time if per-command latencies are requested */ /* Record statement start time if per-command latencies are requested */
if (is_latencies) if (is_latencies)
INSTR_TIME_SET_CURRENT(st->stmt_begin); INSTR_TIME_SET_CURRENT(st->stmt_begin);
...@@ -1620,7 +1684,7 @@ top: ...@@ -1620,7 +1684,7 @@ top:
usec *= 1000000; usec *= 1000000;
INSTR_TIME_SET_CURRENT(now); INSTR_TIME_SET_CURRENT(now);
st->until = INSTR_TIME_GET_MICROSEC(now) + usec; st->txn_scheduled = INSTR_TIME_GET_MICROSEC(now) + usec;
st->sleeping = 1; st->sleeping = 1;
st->listen = 1; st->listen = 1;
...@@ -3309,7 +3373,7 @@ threadRun(void *arg) ...@@ -3309,7 +3373,7 @@ threadRun(void *arg)
now_usec = INSTR_TIME_GET_MICROSEC(now); now_usec = INSTR_TIME_GET_MICROSEC(now);
} }
this_usec = st->until - now_usec; this_usec = st->txn_scheduled - now_usec;
if (min_usec > this_usec) if (min_usec > this_usec)
min_usec = this_usec; min_usec = this_usec;
} }
......
...@@ -403,8 +403,10 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</> ...@@ -403,8 +403,10 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
Show progress report every <literal>sec</> seconds. The report Show progress report every <literal>sec</> seconds. The report
includes the time since the beginning of the run, the tps since the includes the time since the beginning of the run, the tps since the
last report, and the transaction latency average and standard last report, and the transaction latency average and standard
deviation since the last report. Under throttling (<option>-R</>), it deviation since the last report. Under throttling (<option>-R</>),
also includes the average schedule lag time since the last report. the latency is computed with respect to the transaction scheduled
start time, not the actual transaction beginning time, thus it also
includes the average schedule lag time.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -440,29 +442,25 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</> ...@@ -440,29 +442,25 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
possible for later ones to catch up again. possible for later ones to catch up again.
</para> </para>
<para> <para>
When throttling is active, the average and maximum transaction When throttling is active, the transaction latency reported at the
schedule lag time are reported in ms. This is the delay between end of the run is calculated from the scheduled start times, so it
the original scheduled transaction time and the actual transaction includes the time each transaction had to wait for the previous
start times. The schedule lag shows whether a transaction was transaction to finish. The wait time is called the schedule lag time,
started on time or late. Once a client starts running behind its and its average and maximum are also reported separately. The
schedule, every following transaction can continue to be penalized transaction latency with respect to the actual transaction start time,
for schedule lag. If faster transactions are able to catch up, it's i.e. the time spent executing the transaction in the database, can be
possible for them to get back on schedule again. The lag measurement computed by subtracting the schedule lag time from the reported
of every transaction is shown when pgbench is run with debugging latency.
output.
</para> </para>
<para> <para>
High rate limit schedule lag values, that is lag values that are large A high schedule lag time is an indication that the system cannot
compared to the actual transaction latency, indicate that something is process transactions at the specified rate, with the chosen number of
amiss in the throttling process. High schedule lag can highlight a subtle clients and threads. When the average transaction execution time is
problem there even if the target rate limit is met in the end. One longer than the scheduled interval between each transaction, each
possible cause of schedule lag is insufficient pgbench threads to successive transaction will fall further behind, and the schedule lag
handle all of the clients. To improve that, consider reducing the time will keep increasing the longer the test run is. When that
number of clients, increasing the number of threads in pgbench, or happens, you will have to reduce the specified transaction rate.
running pgbench on a separate host. Another possibility is that the
database is not keeping up with the load at some point. When that
happens, you will have to reduce the expected transaction rate to
lower schedule lag.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -942,7 +940,7 @@ END; ...@@ -942,7 +940,7 @@ END;
The format of the log is: The format of the log is:
<synopsis> <synopsis>
<replaceable>client_id</> <replaceable>transaction_no</> <replaceable>time</> <replaceable>file_no</> <replaceable>time_epoch</> <replaceable>time_us</> <replaceable>client_id</> <replaceable>transaction_no</> <replaceable>time</> <replaceable>file_no</> <replaceable>time_epoch</> <replaceable>time_us</> [<replaceable>schedule_lag</replaceable>]
</synopsis> </synopsis>
where <replaceable>time</> is the total elapsed transaction time in microseconds, where <replaceable>time</> is the total elapsed transaction time in microseconds,
...@@ -952,7 +950,10 @@ END; ...@@ -952,7 +950,10 @@ END;
UNIX epoch format timestamp and an offset UNIX epoch format timestamp and an offset
in microseconds (suitable for creating an ISO 8601 in microseconds (suitable for creating an ISO 8601
timestamp with fractional seconds) showing when timestamp with fractional seconds) showing when
the transaction completed. the transaction completed. The last field, <replaceable>schedule_lag</>, is
the difference between the transaction's scheduled start time, and the
time it actually started, in microseconds. It is only present when the
<option>--rate</> option is used.
</para> </para>
<para> <para>
...@@ -978,7 +979,7 @@ END; ...@@ -978,7 +979,7 @@ END;
With the <option>--aggregate-interval</option> option, the logs use a bit different format: With the <option>--aggregate-interval</option> option, the logs use a bit different format:
<synopsis> <synopsis>
<replaceable>interval_start</> <replaceable>num_of_transactions</> <replaceable>latency_sum</> <replaceable>latency_2_sum</> <replaceable>min_latency</> <replaceable>max_latency</> <replaceable>interval_start</> <replaceable>num_of_transactions</> <replaceable>latency_sum</> <replaceable>latency_2_sum</> <replaceable>min_latency</> <replaceable>max_latency</> [<replaceable>lag_sum</> <replaceable>lag_2_sum</> <replaceable>min_lag</> <replaceable>max_lag</>]
</synopsis> </synopsis>
where <replaceable>interval_start</> is the start of the interval (UNIX epoch where <replaceable>interval_start</> is the start of the interval (UNIX epoch
...@@ -989,7 +990,11 @@ END; ...@@ -989,7 +990,11 @@ END;
<replaceable>latency_2_sum</> is a sum of 2nd powers of latencies. The last two <replaceable>latency_2_sum</> is a sum of 2nd powers of latencies. The last two
fields are <replaceable>min_latency</> - a minimum latency within the interval, and fields are <replaceable>min_latency</> - a minimum latency within the interval, and
<replaceable>max_latency</> - maximum latency within the interval. A transaction is <replaceable>max_latency</> - maximum latency within the interval. A transaction is
counted into the interval when it was committed. counted into the interval when it was committed. The last four fields,
<replaceable>lag_sum</>, <replaceable>lag_2_sum</>, <replaceable>min_lag</>, and <replaceable>max_lag</>, are only present if the --rate option is used.
They are calculated from the time each transaction had to wait for the
previous one to finish, i.e. the difference between each transaction's
scheduled start time and the time it actually started.
</para> </para>
<para> <para>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment