Commit 40b9b957 authored by Robert Haas's avatar Robert Haas

New GUC, track_iotiming, to track I/O timings.

Currently, the only way to see the numbers this gathers is via
EXPLAIN (ANALYZE, BUFFERS), but the plan is to add visibility through
the stats collector and pg_stat_statements in subsequent patches.

Ants Aasma, reviewed by Greg Smith, with some further changes by me.
parent 98316e21
......@@ -4284,6 +4284,21 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
<varlistentry id="guc-track-iotiming" xreflabel="track_iotiming">
<term><varname>track_iotiming</varname> (<type>boolean</type>)</term>
<indexterm>
<primary><varname>track_iotiming</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
Enables timing of database I/O calls. This parameter is off by
default, because it will repeatedly query the operating system for
the current time, which may cause significant overhead on some
platforms. Only superusers can change this setting.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-track-functions" xreflabel="track_functions">
<term><varname>track_functions</varname> (<type>enum</type>)</term>
<indexterm>
......
......@@ -1236,6 +1236,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
usage->local_blks_written > 0);
bool has_temp = (usage->temp_blks_read > 0 ||
usage->temp_blks_written > 0);
bool has_timing = (!INSTR_TIME_IS_ZERO(usage->time_read) ||
!INSTR_TIME_IS_ZERO(usage->time_write));
/* Show only positive counter values. */
if (has_shared || has_local || has_temp)
......@@ -1291,6 +1293,20 @@ ExplainNode(PlanState *planstate, List *ancestors,
}
appendStringInfoChar(es->str, '\n');
}
/* As above, show only positive counter values. */
if (has_timing)
{
appendStringInfoSpaces(es->str, es->indent * 2);
appendStringInfoString(es->str, "I/O Timings:");
if (!INSTR_TIME_IS_ZERO(usage->time_read))
appendStringInfo(es->str, " read=%0.2f",
INSTR_TIME_GET_MILLISEC(usage->time_read));
if (!INSTR_TIME_IS_ZERO(usage->time_write))
appendStringInfo(es->str, " write=%0.2f",
INSTR_TIME_GET_MILLISEC(usage->time_write));
appendStringInfoChar(es->str, '\n');
}
}
else
{
......@@ -1304,6 +1320,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
ExplainPropertyLong("Local Written Blocks", usage->local_blks_written, es);
ExplainPropertyLong("Temp Read Blocks", usage->temp_blks_read, es);
ExplainPropertyLong("Temp Written Blocks", usage->temp_blks_written, es);
ExplainPropertyFloat("I/O Read Time", INSTR_TIME_GET_MILLISEC(usage->time_read), 3, es);
ExplainPropertyFloat("I/O Write Time", INSTR_TIME_GET_MILLISEC(usage->time_write), 3, es);
}
}
......
......@@ -145,4 +145,6 @@ BufferUsageAccumDiff(BufferUsage *dst,
dst->local_blks_written += add->local_blks_written - sub->local_blks_written;
dst->temp_blks_read += add->temp_blks_read - sub->temp_blks_read;
dst->temp_blks_written += add->temp_blks_written - sub->temp_blks_written;
INSTR_TIME_ACCUM_DIFF(dst->time_read, add->time_read, sub->time_read);
INSTR_TIME_ACCUM_DIFF(dst->time_write, add->time_write, sub->time_write);
}
......@@ -67,6 +67,7 @@
bool zero_damaged_pages = false;
int bgwriter_lru_maxpages = 100;
double bgwriter_lru_multiplier = 2.0;
bool track_iotiming = false;
/*
* How many buffers PrefetchBuffer callers should try to stay ahead of their
......@@ -437,8 +438,21 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
MemSet((char *) bufBlock, 0, BLCKSZ);
else
{
instr_time io_start,
io_time;
if (track_iotiming)
INSTR_TIME_SET_CURRENT(io_start);
smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
if (track_iotiming)
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
INSTR_TIME_ADD(pgBufferUsage.time_read, io_time);
}
/* check for garbage data */
if (!PageHeaderIsValid((PageHeader) bufBlock))
{
......@@ -1874,6 +1888,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
{
XLogRecPtr recptr;
ErrorContextCallback errcontext;
instr_time io_start, io_end;
/*
* Acquire the buffer's io_in_progress lock. If StartBufferIO returns
......@@ -1921,12 +1936,21 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
buf->flags &= ~BM_JUST_DIRTIED;
UnlockBufHdr(buf);
if (track_iotiming)
INSTR_TIME_SET_CURRENT(io_start);
smgrwrite(reln,
buf->tag.forkNum,
buf->tag.blockNum,
(char *) BufHdrGetBlock(buf),
false);
if (track_iotiming)
{
INSTR_TIME_SET_CURRENT(io_end);
INSTR_TIME_ACCUM_DIFF(pgBufferUsage.time_write, io_end, io_start);
}
pgBufferUsage.shared_blks_written++;
/*
......
......@@ -1018,6 +1018,15 @@ static struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
{
{"track_iotiming", PGC_SUSET, STATS_COLLECTOR,
gettext_noop("Collects timing information for database IO activity."),
NULL
},
&track_iotiming,
false,
NULL, NULL, NULL
},
{
{"update_process_title", PGC_SUSET, STATS_COLLECTOR,
......
......@@ -424,6 +424,7 @@
#track_activities = on
#track_counts = on
#track_iotiming = off
#track_functions = none # none, pl, all
#track_activity_query_size = 1024 # (change requires restart)
#update_process_title = on
......
......@@ -28,6 +28,8 @@ typedef struct BufferUsage
long local_blks_written; /* # of local disk blocks written */
long temp_blks_read; /* # of temp blocks read */
long temp_blks_written; /* # of temp blocks written */
instr_time time_read; /* time spent reading */
instr_time time_write; /* time spent writing */
} BufferUsage;
/* Flag bits included in InstrAlloc's instrument_options bitmask */
......
......@@ -48,6 +48,7 @@ extern PGDLLIMPORT int NBuffers;
extern bool zero_damaged_pages;
extern int bgwriter_lru_maxpages;
extern double bgwriter_lru_multiplier;
extern bool track_iotiming;
extern int target_prefetch_pages;
/* in buf_init.c */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment