Commit 249cf070 authored by Robert Haas's avatar Robert Haas

Create and use wait events for read, write, and fsync operations.

Previous commits, notably 53be0b1a and
6f3bd98e, made it possible to see from
pg_stat_activity when a backend was stuck waiting for another backend,
but it's also fairly common for a backend to be stuck waiting for an
I/O.  Add wait events for those operations, too.

Rushabh Lathia, with further hacking by me.  Reviewed and tested by
Michael Paquier, Amit Kapila, Rajkumar Raghuwanshi, and Rahila Syed.

Discussion: http://postgr.es/m/CAGPqQf0LsYHXREPAZqYGVkDqHSyjf=KsD=k0GTVPAuzyThh-VQ@mail.gmail.com
parent 928250ae
This diff is collapsed.
......@@ -119,6 +119,8 @@
#include "lib/ilist.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/slot.h"
......@@ -916,7 +918,8 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
* Note that we deviate from the usual WAL coding practices here,
* check the above "Logical rewrite support" comment for reasoning.
*/
written = FileWrite(src->vfd, waldata_start, len);
written = FileWrite(src->vfd, waldata_start, len,
WAIT_EVENT_LOGICAL_REWRITE_WRITE);
if (written != len)
ereport(ERROR,
(errcode_for_file_access(),
......@@ -957,7 +960,7 @@ logical_end_heap_rewrite(RewriteState state)
hash_seq_init(&seq_status, state->rs_logical_mappings);
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
{
if (FileSync(src->vfd) != 0)
if (FileSync(src->vfd, WAIT_EVENT_LOGICAL_REWRITE_SYNC) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", src->path)));
......@@ -1141,11 +1144,13 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
* Truncate all data that's not guaranteed to have been safely fsynced (by
* previous record or by the last checkpoint).
*/
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE);
if (ftruncate(fd, xlrec->offset) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate file \"%s\" to %u: %m",
path, (uint32) xlrec->offset)));
pgstat_report_wait_end();
/* now seek to the position we want to write our data to */
if (lseek(fd, xlrec->offset, SEEK_SET) != xlrec->offset)
......@@ -1159,20 +1164,24 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
len = xlrec->num_mappings * sizeof(LogicalRewriteMappingData);
/* write out tail end of mapping file (again) */
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE);
if (write(fd, data, len) != len)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", path)));
pgstat_report_wait_end();
/*
* Now fsync all previously written data. We could improve things and only
* do this for the last write to a file, but the required bookkeeping
* doesn't seem worth the trouble.
*/
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", path)));
pgstat_report_wait_end();
CloseTransientFile(fd);
}
......@@ -1266,10 +1275,12 @@ CheckPointLogicalRewriteHeap(void)
* changed or have only been created since the checkpoint's start,
* but it's currently not deemed worth the effort.
*/
else if (pg_fsync(fd) != 0)
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", path)));
pgstat_report_wait_end();
CloseTransientFile(fd);
}
}
......
......@@ -54,6 +54,7 @@
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "miscadmin.h"
......@@ -675,13 +676,16 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
}
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
{
pgstat_report_wait_end();
slru_errcause = SLRU_READ_FAILED;
slru_errno = errno;
CloseTransientFile(fd);
return false;
}
pgstat_report_wait_end();
if (CloseTransientFile(fd))
{
......@@ -834,8 +838,10 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
}
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
if (write(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
{
pgstat_report_wait_end();
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
......@@ -845,6 +851,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
CloseTransientFile(fd);
return false;
}
pgstat_report_wait_end();
/*
* If not part of Flush, need to fsync now. We assume this happens
......@@ -852,13 +859,16 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
*/
if (!fdata)
{
pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
if (ctl->do_fsync && pg_fsync(fd))
{
pgstat_report_wait_end();
slru_errcause = SLRU_FSYNC_FAILED;
slru_errno = errno;
CloseTransientFile(fd);
return false;
}
pgstat_report_wait_end();
if (CloseTransientFile(fd))
{
......@@ -1126,6 +1136,7 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
ok = true;
for (i = 0; i < fdata.num_files; i++)
{
pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC);
if (ctl->do_fsync && pg_fsync(fdata.fd[i]))
{
slru_errcause = SLRU_FSYNC_FAILED;
......@@ -1133,6 +1144,7 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
ok = false;
}
pgstat_report_wait_end();
if (CloseTransientFile(fdata.fd[i]))
{
......
......@@ -38,6 +38,7 @@
#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xlogdefs.h"
#include "pgstat.h"
#include "storage/fd.h"
/*
......@@ -338,7 +339,9 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
for (;;)
{
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
nbytes = (int) read(srcfd, buffer, sizeof(buffer));
pgstat_report_wait_end();
if (nbytes < 0 || errno != 0)
ereport(ERROR,
(errcode_for_file_access(),
......@@ -346,6 +349,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
if (nbytes == 0)
break;
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
if ((int) write(fd, buffer, nbytes) != nbytes)
{
int save_errno = errno;
......@@ -365,6 +369,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
}
CloseTransientFile(srcfd);
}
......@@ -400,10 +405,12 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
if (CloseTransientFile(fd))
ereport(ERROR,
......@@ -460,6 +467,7 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
errmsg("could not create file \"%s\": %m", tmppath)));
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
if ((int) write(fd, content, size) != size)
{
int save_errno = errno;
......@@ -475,11 +483,14 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
if (CloseTransientFile(fd))
ereport(ERROR,
......
......@@ -1200,8 +1200,10 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
*/
buf = (char *) palloc(stat.st_size);
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
if (read(fd, buf, stat.st_size) != stat.st_size)
{
pgstat_report_wait_end();
CloseTransientFile(fd);
if (give_warnings)
ereport(WARNING,
......@@ -1212,6 +1214,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
return NULL;
}
pgstat_report_wait_end();
CloseTransientFile(fd);
hdr = (TwoPhaseFileHeader *) buf;
......@@ -1542,8 +1545,10 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
path)));
/* Write content and CRC */
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_WRITE);
if (write(fd, content, len) != len)
{
pgstat_report_wait_end();
CloseTransientFile(fd);
ereport(ERROR,
(errcode_for_file_access(),
......@@ -1551,16 +1556,19 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
}
if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
{
pgstat_report_wait_end();
CloseTransientFile(fd);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write two-phase state file: %m")));
}
pgstat_report_wait_end();
/*
* We must fsync the file because the end-of-replay checkpoint will not do
* so, there being no GXACT in shared memory yet to tell it to.
*/
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_SYNC);
if (pg_fsync(fd) != 0)
{
CloseTransientFile(fd);
......@@ -1568,6 +1576,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
(errcode_for_file_access(),
errmsg("could not fsync two-phase state file: %m")));
}
pgstat_report_wait_end();
if (CloseTransientFile(fd) != 0)
ereport(ERROR,
......
......@@ -2456,7 +2456,9 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
do
{
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
written = write(openLogFile, from, nleft);
pgstat_report_wait_end();
if (written <= 0)
{
if (errno == EINTR)
......@@ -3207,6 +3209,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
{
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
{
int save_errno = errno;
......@@ -3225,8 +3228,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
}
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
if (pg_fsync(fd) != 0)
{
close(fd);
......@@ -3234,6 +3239,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
if (close(fd))
ereport(ERROR,
......@@ -3360,6 +3366,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
if (nread > sizeof(buffer))
nread = sizeof(buffer);
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
if (read(srcfd, buffer, nread) != nread)
{
if (errno != 0)
......@@ -3372,8 +3379,10 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
(errmsg("not enough data in file \"%s\"",
path)));
}
pgstat_report_wait_end();
}
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
{
int save_errno = errno;
......@@ -3389,12 +3398,15 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
}
pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
if (CloseTransientFile(fd))
ereport(ERROR,
......@@ -4414,6 +4426,7 @@ WriteControlFile(void)
XLOG_CONTROL_FILE)));
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE);
if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE)
{
/* if write didn't set errno, assume problem is no disk space */
......@@ -4423,11 +4436,14 @@ WriteControlFile(void)
(errcode_for_file_access(),
errmsg("could not write to control file: %m")));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC);
if (pg_fsync(fd) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync control file: %m")));
pgstat_report_wait_end();
if (close(fd))
ereport(PANIC,
......@@ -4453,10 +4469,12 @@ ReadControlFile(void)
errmsg("could not open control file \"%s\": %m",
XLOG_CONTROL_FILE)));
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
if (read(fd, ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData))
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not read from control file: %m")));
pgstat_report_wait_end();
close(fd);
......@@ -4634,6 +4652,7 @@ UpdateControlFile(void)
XLOG_CONTROL_FILE)));
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE);
if (write(fd, ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData))
{
/* if write didn't set errno, assume problem is no disk space */
......@@ -4643,11 +4662,14 @@ UpdateControlFile(void)
(errcode_for_file_access(),
errmsg("could not write to control file: %m")));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE);
if (pg_fsync(fd) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync control file: %m")));
pgstat_report_wait_end();
if (close(fd))
ereport(PANIC,
......@@ -5036,6 +5058,7 @@ BootStrapXLOG(void)
/* Write the first page with the initial record */
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
/* if write didn't set errno, assume problem is no disk space */
......@@ -5045,11 +5068,14 @@ BootStrapXLOG(void)
(errcode_for_file_access(),
errmsg("could not write bootstrap transaction log file: %m")));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
if (pg_fsync(openLogFile) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync bootstrap transaction log file: %m")));
pgstat_report_wait_end();
if (close(openLogFile))
ereport(PANIC,
......@@ -9999,11 +10025,13 @@ assign_xlog_sync_method(int new_sync_method, void *extra)
*/
if (openLogFile >= 0)
{
pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN);
if (pg_fsync(openLogFile) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync log segment %s: %m",
XLogFileNameP(ThisTimeLineID, openLogSegNo))));
pgstat_report_wait_end();
if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method))
XLogFileClose();
}
......@@ -11456,10 +11484,12 @@ retry:
goto next_record_is_invalid;
}
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
char fname[MAXFNAMELEN];
pgstat_report_wait_end();
XLogFileName(fname, curFileTLI, readSegNo);
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
(errcode_for_file_access(),
......@@ -11467,6 +11497,7 @@ retry:
fname, readOff)));
goto next_record_is_invalid;
}
pgstat_report_wait_end();
Assert(targetSegNo == readSegNo);
Assert(targetPageOff == readOff);
......
......@@ -24,6 +24,7 @@
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/smgr.h"
#include "utils/guc.h"
#include "utils/hsearch.h"
......@@ -728,7 +729,9 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
else
segbytes = nbytes;
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
readbytes = read(sendFile, p, segbytes);
pgstat_report_wait_end();
if (readbytes <= 0)
{
char path[MAXPGPATH];
......
......@@ -280,6 +280,7 @@ static const char *pgstat_get_wait_activity(WaitEventActivity w);
static const char *pgstat_get_wait_client(WaitEventClient w);
static const char *pgstat_get_wait_ipc(WaitEventIPC w);
static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
static const char *pgstat_get_wait_io(WaitEventIO w);
static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
static void pgstat_send(void *msg, int len);
......@@ -3176,6 +3177,9 @@ pgstat_get_wait_event_type(uint32 wait_event_info)
case PG_WAIT_TIMEOUT:
event_type = "Timeout";
break;
case PG_WAIT_IO:
event_type = "IO";
break;
default:
event_type = "???";
break;
......@@ -3246,6 +3250,13 @@ pgstat_get_wait_event(uint32 wait_event_info)
event_name = pgstat_get_wait_timeout(w);
break;
}
case PG_WAIT_IO:
{
WaitEventIO w = (WaitEventIO) wait_event_info;
event_name = pgstat_get_wait_io(w);
break;
}
default:
event_name = "unknown wait event";
break;
......@@ -3438,6 +3449,228 @@ pgstat_get_wait_timeout(WaitEventTimeout w)
return event_name;
}
/* ----------
* pgstat_get_wait_io() -
*
* Convert WaitEventIO to string.
* ----------
*/
static const char *
pgstat_get_wait_io(WaitEventIO w)
{
const char *event_name = "unknown wait event";
switch (w)
{
case WAIT_EVENT_BUFFILE_READ:
event_name = "BufFileRead";
break;
case WAIT_EVENT_BUFFILE_WRITE:
event_name = "BufFileWrite";
break;
case WAIT_EVENT_CONTROL_FILE_READ:
event_name = "ControlFileRead";
break;
case WAIT_EVENT_CONTROL_FILE_SYNC:
event_name = "ControlFileSync";
break;
case WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE:
event_name = "ControlFileSyncUpdate";
break;
case WAIT_EVENT_CONTROL_FILE_WRITE:
event_name = "ControlFileWrite";
break;
case WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE:
event_name = "ControlFileWriteUpdate";
break;
case WAIT_EVENT_COPY_FILE_READ:
event_name = "CopyFileRead";
break;
case WAIT_EVENT_COPY_FILE_WRITE:
event_name = "CopyFileWrite";
break;
case WAIT_EVENT_DATA_FILE_EXTEND:
event_name = "DataFileExtend";
break;
case WAIT_EVENT_DATA_FILE_FLUSH:
event_name = "DataFileFlush";
break;
case WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC:
event_name = "DataFileImmediateSync";
break;
case WAIT_EVENT_DATA_FILE_PREFETCH:
event_name = "DataFilePrefetch";
break;
case WAIT_EVENT_DATA_FILE_READ:
event_name = "DataFileRead";
break;
case WAIT_EVENT_DATA_FILE_SYNC:
event_name = "DataFileSync";
break;
case WAIT_EVENT_DATA_FILE_TRUNCATE:
event_name = "DataFileTruncate";
break;
case WAIT_EVENT_DATA_FILE_WRITE:
event_name = "DataFileWrite";
break;
case WAIT_EVENT_DSM_FILL_ZERO_WRITE:
event_name = "DSMFillZeroWrite";
break;
case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ:
event_name = "LockFileAddToDataDirRead";
break;
case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC:
event_name = "LockFileAddToDataDirSync";
break;
case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE:
event_name = "LockFileAddToDataDirWrite";
break;
case WAIT_EVENT_LOCK_FILE_CREATE_READ:
event_name = "LockFileCreateRead";
break;
case WAIT_EVENT_LOCK_FILE_CREATE_SYNC:
event_name = "LockFileCreateSync";
break;
case WAIT_EVENT_LOCK_FILE_CREATE_WRITE:
event_name = "LockFileCreateWRITE";
break;
case WAIT_EVENT_LOCK_FILE_RECHECKDATADIR_READ:
event_name = "LockFileReCheckDataDirRead";
break;
case WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC:
event_name = "LogicalRewriteCheckpointSync";
break;
case WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC:
event_name = "LogicalRewriteMappingSync";
break;
case WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE:
event_name = "LogicalRewriteMappingWrite";
break;
case WAIT_EVENT_LOGICAL_REWRITE_SYNC:
event_name = "LogicalRewriteSync";
break;
case WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE:
event_name = "LogicalRewriteTruncate";
break;
case WAIT_EVENT_LOGICAL_REWRITE_WRITE:
event_name = "LogicalRewriteWrite";
break;
case WAIT_EVENT_RELATION_MAP_READ:
event_name = "RelationMapRead";
break;
case WAIT_EVENT_RELATION_MAP_SYNC:
event_name = "RelationMapSync";
break;
case WAIT_EVENT_RELATION_MAP_WRITE:
event_name = "RelationMapWrite";
break;
case WAIT_EVENT_REORDER_BUFFER_READ:
event_name = "ReorderBufferRead";
break;
case WAIT_EVENT_REORDER_BUFFER_WRITE:
event_name = "ReorderBufferWrite";
break;
case WAIT_EVENT_REORDER_LOGICAL_MAPPING_READ:
event_name = "ReorderLogicalMappingRead";
break;
case WAIT_EVENT_REPLICATION_SLOT_READ:
event_name = "ReplicationSlotRead";
break;
case WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC:
event_name = "ReplicationSlotRestoreSync";
break;
case WAIT_EVENT_REPLICATION_SLOT_SYNC:
event_name = "ReplicationSlotSync";
break;
case WAIT_EVENT_REPLICATION_SLOT_WRITE:
event_name = "ReplicationSlotWrite";
break;
case WAIT_EVENT_SLRU_FLUSH_SYNC:
event_name = "SLRUFlushSync";
break;
case WAIT_EVENT_SLRU_READ:
event_name = "SLRURead";
break;
case WAIT_EVENT_SLRU_SYNC:
event_name = "SLRUSync";
break;
case WAIT_EVENT_SLRU_WRITE:
event_name = "SLRUWrite";
break;
case WAIT_EVENT_SNAPBUILD_READ:
event_name = "SnapbuildRead";
break;
case WAIT_EVENT_SNAPBUILD_SYNC:
event_name = "SnapbuildSync";
break;
case WAIT_EVENT_SNAPBUILD_WRITE:
event_name = "SnapbuildWrite";
break;
case WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC:
event_name = "TimelineHistoryFileSync";
break;
case WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE:
event_name = "TimelineHistoryFileWrite";
break;
case WAIT_EVENT_TIMELINE_HISTORY_READ:
event_name = "TimelineHistoryRead";
break;
case WAIT_EVENT_TIMELINE_HISTORY_SYNC:
event_name = "TimelineHistorySync";
break;
case WAIT_EVENT_TIMELINE_HISTORY_WRITE:
event_name = "TimelineHistoryWrite";
break;
case WAIT_EVENT_TWOPHASE_FILE_READ:
event_name = "TwophaseFileRead";
break;
case WAIT_EVENT_TWOPHASE_FILE_SYNC:
event_name = "TwophaseFileSync";
break;
case WAIT_EVENT_TWOPHASE_FILE_WRITE:
event_name = "TwophaseFileWrite";
break;
case WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ:
event_name = "WALSenderTimelineHistoryRead";
break;
case WAIT_EVENT_WAL_BOOTSTRAP_SYNC:
event_name = "WALBootstrapSync";
break;
case WAIT_EVENT_WAL_BOOTSTRAP_WRITE:
event_name = "WALBootstrapWrite";
break;
case WAIT_EVENT_WAL_COPY_READ:
event_name = "WALCopyRead";
break;
case WAIT_EVENT_WAL_COPY_SYNC:
event_name = "WALCopySync";
break;
case WAIT_EVENT_WAL_COPY_WRITE:
event_name = "WALCopyWrite";
break;
case WAIT_EVENT_WAL_INIT_SYNC:
event_name = "WALInitSync";
break;
case WAIT_EVENT_WAL_INIT_WRITE:
event_name = "WALInitWrite";
break;
case WAIT_EVENT_WAL_READ:
event_name = "WALRead";
break;
case WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN:
event_name = "WALSyncMethodAssign";
break;
case WAIT_EVENT_WAL_WRITE:
event_name = "WALWrite";
break;
/* no default case, so that compiler will warn */
}
return event_name;
}
/* ----------
* pgstat_get_backend_current_activity() -
*
......
......@@ -58,6 +58,7 @@
#include "catalog/catalog.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/reorderbuffer.h"
#include "replication/slot.h"
......@@ -2275,6 +2276,7 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
ondisk->size = sz;
pgstat_report_wait_start(WAIT_EVENT_REORDER_BUFFER_WRITE);
if (write(fd, rb->outbuf, ondisk->size) != ondisk->size)
{
int save_errno = errno;
......@@ -2286,6 +2288,7 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
errmsg("could not write to data file for XID %u: %m",
txn->xid)));
}
pgstat_report_wait_end();
Assert(ondisk->change.action == change->action);
}
......@@ -2366,7 +2369,9 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
* end of this file.
*/
ReorderBufferSerializeReserve(rb, sizeof(ReorderBufferDiskChange));
pgstat_report_wait_start(WAIT_EVENT_REORDER_BUFFER_READ);
readBytes = read(*fd, rb->outbuf, sizeof(ReorderBufferDiskChange));
pgstat_report_wait_end();
/* eof */
if (readBytes == 0)
......@@ -2393,8 +2398,10 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
sizeof(ReorderBufferDiskChange) + ondisk->size);
ondisk = (ReorderBufferDiskChange *) rb->outbuf;
pgstat_report_wait_start(WAIT_EVENT_REORDER_BUFFER_READ);
readBytes = read(*fd, rb->outbuf + sizeof(ReorderBufferDiskChange),
ondisk->size - sizeof(ReorderBufferDiskChange));
pgstat_report_wait_end();
if (readBytes < 0)
ereport(ERROR,
......@@ -3047,7 +3054,9 @@ ApplyLogicalMappingFile(HTAB *tuplecid_data, Oid relid, const char *fname)
memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
/* read all mappings till the end of the file */
pgstat_report_wait_start(WAIT_EVENT_REORDER_LOGICAL_MAPPING_READ);
readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData));
pgstat_report_wait_end();
if (readBytes < 0)
ereport(ERROR,
......
......@@ -115,6 +115,8 @@
#include "access/transam.h"
#include "access/xact.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/reorderbuffer.h"
#include "replication/snapbuild.h"
......@@ -1580,6 +1582,7 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
ereport(ERROR,
(errmsg("could not open file \"%s\": %m", path)));
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_WRITE);
if ((write(fd, ondisk, needed_length)) != needed_length)
{
CloseTransientFile(fd);
......@@ -1587,6 +1590,7 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
/*
* fsync the file before renaming so that even if we crash after this we
......@@ -1596,6 +1600,7 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
* some noticeable overhead since it's performed synchronously during
* decoding?
*/
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_SYNC);
if (pg_fsync(fd) != 0)
{
CloseTransientFile(fd);
......@@ -1603,6 +1608,7 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
CloseTransientFile(fd);
fsync_fname("pg_logical/snapshots", true);
......@@ -1677,7 +1683,9 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
/* read statically sized portion of snapshot */
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
pgstat_report_wait_end();
if (readBytes != SnapBuildOnDiskConstantSize)
{
CloseTransientFile(fd);
......@@ -1703,7 +1711,9 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize);
/* read SnapBuild */
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
pgstat_report_wait_end();
if (readBytes != sizeof(SnapBuild))
{
CloseTransientFile(fd);
......@@ -1717,7 +1727,9 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
/* restore running xacts information */
sz = sizeof(TransactionId) * ondisk.builder.running.xcnt_space;
ondisk.builder.running.xip = MemoryContextAllocZero(builder->context, sz);
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
readBytes = read(fd, ondisk.builder.running.xip, sz);
pgstat_report_wait_end();
if (readBytes != sz)
{
CloseTransientFile(fd);
......@@ -1731,7 +1743,9 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
/* restore committed xacts information */
sz = sizeof(TransactionId) * ondisk.builder.committed.xcnt;
ondisk.builder.committed.xip = MemoryContextAllocZero(builder->context, sz);
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
readBytes = read(fd, ondisk.builder.committed.xip, sz);
pgstat_report_wait_end();
if (readBytes != sz)
{
CloseTransientFile(fd);
......
......@@ -43,6 +43,7 @@
#include "access/xlog_internal.h"
#include "common/string.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/slot.h"
#include "storage/fd.h"
#include "storage/proc.h"
......@@ -1100,10 +1101,12 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
SnapBuildOnDiskChecksummedSize);
FIN_CRC32C(cp.checksum);
pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_WRITE);
if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
{
int save_errno = errno;
pgstat_report_wait_end();
CloseTransientFile(fd);
errno = save_errno;
ereport(elevel,
......@@ -1112,12 +1115,15 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
tmppath)));
return;
}
pgstat_report_wait_end();
/* fsync the temporary file */
pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_SYNC);
if (pg_fsync(fd) != 0)
{
int save_errno = errno;
pgstat_report_wait_end();
CloseTransientFile(fd);
errno = save_errno;
ereport(elevel,
......@@ -1126,6 +1132,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
tmppath)));
return;
}
pgstat_report_wait_end();
CloseTransientFile(fd);
......@@ -1202,6 +1209,7 @@ RestoreSlotFromDisk(const char *name)
* Sync state file before we're reading from it. We might have crashed
* while it wasn't synced yet and we shouldn't continue on that basis.
*/
pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC);
if (pg_fsync(fd) != 0)
{
CloseTransientFile(fd);
......@@ -1210,6 +1218,7 @@ RestoreSlotFromDisk(const char *name)
errmsg("could not fsync file \"%s\": %m",
path)));
}
pgstat_report_wait_end();
/* Also sync the parent directory */
START_CRIT_SECTION();
......@@ -1217,7 +1226,9 @@ RestoreSlotFromDisk(const char *name)
END_CRIT_SECTION();
/* read part of statefile that's guaranteed to be version independent */
pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize);
pgstat_report_wait_end();
if (readBytes != ReplicationSlotOnDiskConstantSize)
{
int saved_errno = errno;
......@@ -1253,9 +1264,11 @@ RestoreSlotFromDisk(const char *name)
path, cp.length)));
/* Now that we know the size, read the entire file */
pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
readBytes = read(fd,
(char *) &cp + ReplicationSlotOnDiskConstantSize,
cp.length);
pgstat_report_wait_end();
if (readBytes != cp.length)
{
int saved_errno = errno;
......
......@@ -463,7 +463,9 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
char rbuf[BLCKSZ];
int nread;
pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ);
nread = read(fd, rbuf, sizeof(rbuf));
pgstat_report_wait_end();
if (nread <= 0)
ereport(ERROR,
(errcode_for_file_access(),
......@@ -2126,7 +2128,9 @@ retry:
else
segbytes = nbytes;
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
readbytes = read(sendFile, p, segbytes);
pgstat_report_wait_end();
if (readbytes <= 0)
{
ereport(ERROR,
......
......@@ -37,6 +37,7 @@
#include "postgres.h"
#include "executor/instrument.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/buffile.h"
#include "storage/buf_internals.h"
......@@ -254,7 +255,10 @@ BufFileLoadBuffer(BufFile *file)
/*
* Read whatever we can get, up to a full bufferload.
*/
file->nbytes = FileRead(thisfile, file->buffer, sizeof(file->buffer));
file->nbytes = FileRead(thisfile,
file->buffer,
sizeof(file->buffer),
WAIT_EVENT_BUFFILE_READ);
if (file->nbytes < 0)
file->nbytes = 0;
file->offsets[file->curFile] += file->nbytes;
......@@ -317,7 +321,10 @@ BufFileDumpBuffer(BufFile *file)
return; /* seek failed, give up */
file->offsets[file->curFile] = file->curOffset;
}
bytestowrite = FileWrite(thisfile, file->buffer + wpos, bytestowrite);
bytestowrite = FileWrite(thisfile,
file->buffer + wpos,
bytestowrite,
WAIT_EVENT_BUFFILE_WRITE);
if (bytestowrite <= 0)
return; /* failed to write */
file->offsets[file->curFile] += bytestowrite;
......
......@@ -25,7 +25,7 @@
#include "storage/copydir.h"
#include "storage/fd.h"
#include "miscadmin.h"
#include "pgstat.h"
/*
* copydir: copy a directory
......@@ -169,7 +169,9 @@ copy_file(char *fromfile, char *tofile)
/* If we got a cancel signal during the copy of the file, quit */
CHECK_FOR_INTERRUPTS();
pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_READ);
nbytes = read(srcfd, buffer, COPY_BUF_SIZE);
pgstat_report_wait_end();
if (nbytes < 0)
ereport(ERROR,
(errcode_for_file_access(),
......@@ -177,8 +179,10 @@ copy_file(char *fromfile, char *tofile)
if (nbytes == 0)
break;
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_WRITE);
if ((int) write(dstfd, buffer, nbytes) != nbytes)
{
pgstat_report_wait_end();
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
......@@ -186,6 +190,7 @@ copy_file(char *fromfile, char *tofile)
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tofile)));
}
pgstat_report_wait_end();
/*
* We fsync the files later but first flush them to avoid spamming the
......
......@@ -1550,7 +1550,7 @@ FileClose(File file)
* to read into.
*/
int
FilePrefetch(File file, off_t offset, int amount)
FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info)
{
#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_WILLNEED)
int returnCode;
......@@ -1565,8 +1565,10 @@ FilePrefetch(File file, off_t offset, int amount)
if (returnCode < 0)
return returnCode;
pgstat_report_wait_start(wait_event_info);
returnCode = posix_fadvise(VfdCache[file].fd, offset, amount,
POSIX_FADV_WILLNEED);
pgstat_report_wait_end();
return returnCode;
#else
......@@ -1576,7 +1578,7 @@ FilePrefetch(File file, off_t offset, int amount)
}
void
FileWriteback(File file, off_t offset, off_t nbytes)
FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
{
int returnCode;
......@@ -1597,11 +1599,13 @@ FileWriteback(File file, off_t offset, off_t nbytes)
if (returnCode < 0)
return;
pgstat_report_wait_start(wait_event_info);
pg_flush_data(VfdCache[file].fd, offset, nbytes);
pgstat_report_wait_end();
}
int
FileRead(File file, char *buffer, int amount)
FileRead(File file, char *buffer, int amount, uint32 wait_event_info)
{
int returnCode;
Vfd *vfdP;
......@@ -1620,7 +1624,9 @@ FileRead(File file, char *buffer, int amount)
vfdP = &VfdCache[file];
retry:
pgstat_report_wait_start(wait_event_info);
returnCode = read(vfdP->fd, buffer, amount);
pgstat_report_wait_end();
if (returnCode >= 0)
{
......@@ -1663,7 +1669,7 @@ retry:
}
int
FileWrite(File file, char *buffer, int amount)
FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
{
int returnCode;
Vfd *vfdP;
......@@ -1721,7 +1727,9 @@ FileWrite(File file, char *buffer, int amount)
retry:
errno = 0;
pgstat_report_wait_start(wait_event_info);
returnCode = write(vfdP->fd, buffer, amount);
pgstat_report_wait_end();
/* if write didn't set errno, assume problem is no disk space */
if (returnCode != amount && errno == 0)
......@@ -1782,7 +1790,7 @@ retry:
}
int
FileSync(File file)
FileSync(File file, uint32 wait_event_info)
{
int returnCode;
......@@ -1795,7 +1803,11 @@ FileSync(File file)
if (returnCode < 0)
return returnCode;
return pg_fsync(VfdCache[file].fd);
pgstat_report_wait_start(wait_event_info);
returnCode = pg_fsync(VfdCache[file].fd);
pgstat_report_wait_end();
return returnCode;
}
off_t
......@@ -1887,7 +1899,7 @@ FileTell(File file)
#endif
int
FileTruncate(File file, off_t offset)
FileTruncate(File file, off_t offset, uint32 wait_event_info)
{
int returnCode;
......@@ -1900,7 +1912,9 @@ FileTruncate(File file, off_t offset)
if (returnCode < 0)
return returnCode;
pgstat_report_wait_start(wait_event_info);
returnCode = ftruncate(VfdCache[file].fd, offset);
pgstat_report_wait_end();
if (returnCode == 0 && VfdCache[file].fileSize > offset)
{
......
......@@ -60,6 +60,7 @@
#ifdef HAVE_SYS_SHM_H
#include <sys/shm.h>
#endif
#include "pgstat.h"
#include "portability/mem.h"
#include "storage/dsm_impl.h"
......@@ -911,10 +912,12 @@ dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
if (goal > ZBUFFER_SIZE)
goal = ZBUFFER_SIZE;
pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
if (write(fd, zbuffer, goal) == goal)
remaining -= goal;
else
success = false;
pgstat_report_wait_end();
}
if (!success)
......
......@@ -28,6 +28,7 @@
#include "miscadmin.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
#include "pgstat.h"
#include "portability/instr_time.h"
#include "postmaster/bgwriter.h"
#include "storage/fd.h"
......@@ -536,7 +537,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
errmsg("could not seek to block %u in file \"%s\": %m",
blocknum, FilePathName(v->mdfd_vfd))));
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
{
if (nbytes < 0)
ereport(ERROR,
......@@ -667,7 +668,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
(void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ);
(void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
#endif /* USE_PREFETCH */
}
......@@ -716,7 +717,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush);
FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
nblocks -= nflush;
blocknum += nflush;
......@@ -753,7 +754,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
errmsg("could not seek to block %u in file \"%s\": %m",
blocknum, FilePathName(v->mdfd_vfd))));
nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ);
nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_READ);
TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
reln->smgr_rnode.node.spcNode,
......@@ -829,7 +830,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
errmsg("could not seek to block %u in file \"%s\": %m",
blocknum, FilePathName(v->mdfd_vfd))));
nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ);
nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_WRITE);
TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
reln->smgr_rnode.node.spcNode,
......@@ -967,7 +968,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
* This segment is no longer active. We truncate the file, but do
* not delete it, for reasons explained in the header comments.
*/
if (FileTruncate(v->mdfd_vfd, 0) < 0)
if (FileTruncate(v->mdfd_vfd, 0, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate file \"%s\": %m",
......@@ -993,7 +994,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
*/
BlockNumber lastsegblocks = nblocks - priorblocks;
if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ) < 0)
if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate file \"%s\" to %u blocks: %m",
......@@ -1037,7 +1038,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
{
MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
if (FileSync(v->mdfd_vfd) < 0)
if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
......@@ -1232,7 +1233,7 @@ mdsync(void)
INSTR_TIME_SET_CURRENT(sync_start);
if (seg != NULL &&
FileSync(seg->mdfd_vfd) >= 0)
FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) >= 0)
{
/* Success; update statistics about sync timing */
INSTR_TIME_SET_CURRENT(sync_end);
......@@ -1443,7 +1444,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
ereport(DEBUG1,
(errmsg("could not forward fsync request because request queue is full")));
if (FileSync(seg->mdfd_vfd) < 0)
if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
......
......@@ -50,6 +50,7 @@
#include "catalog/pg_tablespace.h"
#include "catalog/storage.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/lwlock.h"
#include "utils/inval.h"
......@@ -658,11 +659,13 @@ load_relmap_file(bool shared)
* look, the sinval signaling mechanism will make us re-read it before we
* are able to access any relation that's affected by the change.
*/
pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
if (read(fd, map, sizeof(RelMapFile)) != sizeof(RelMapFile))
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not read relation mapping file \"%s\": %m",
mapfilename)));
pgstat_report_wait_end();
CloseTransientFile(fd);
......@@ -774,6 +777,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
}
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_WRITE);
if (write(fd, newmap, sizeof(RelMapFile)) != sizeof(RelMapFile))
{
/* if write didn't set errno, assume problem is no disk space */
......@@ -784,6 +788,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
errmsg("could not write to relation mapping file \"%s\": %m",
mapfilename)));
}
pgstat_report_wait_end();
/*
* We choose to fsync the data to disk before considering the task done.
......@@ -791,11 +796,13 @@ write_relmap_file(bool shared, RelMapFile *newmap,
* issue, but it would complicate checkpointing --- see notes for
* CheckPointRelationMap.
*/
pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync relation mapping file \"%s\": %m",
mapfilename)));
pgstat_report_wait_end();
if (CloseTransientFile(fd))
ereport(ERROR,
......
......@@ -35,6 +35,7 @@
#include "libpq/libpq.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/postmaster.h"
#include "storage/fd.h"
......@@ -856,11 +857,13 @@ CreateLockFile(const char *filename, bool amPostmaster,
errmsg("could not open lock file \"%s\": %m",
filename)));
}
pgstat_report_wait_start(WAIT_EVENT_LOCK_FILE_CREATE_READ);
if ((len = read(fd, buffer, sizeof(buffer) - 1)) < 0)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not read lock file \"%s\": %m",
filename)));
pgstat_report_wait_end();
close(fd);
if (len == 0)
......@@ -1009,6 +1012,7 @@ CreateLockFile(const char *filename, bool amPostmaster,
strlcat(buffer, "\n", sizeof(buffer));
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_LOCK_FILE_CREATE_WRITE);
if (write(fd, buffer, strlen(buffer)) != strlen(buffer))
{
int save_errno = errno;
......@@ -1021,6 +1025,9 @@ CreateLockFile(const char *filename, bool amPostmaster,
(errcode_for_file_access(),
errmsg("could not write lock file \"%s\": %m", filename)));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_LOCK_FILE_CREATE_SYNC);
if (pg_fsync(fd) != 0)
{
int save_errno = errno;
......@@ -1032,6 +1039,7 @@ CreateLockFile(const char *filename, bool amPostmaster,
(errcode_for_file_access(),
errmsg("could not write lock file \"%s\": %m", filename)));
}
pgstat_report_wait_end();
if (close(fd) != 0)
{
int save_errno = errno;
......@@ -1164,7 +1172,9 @@ AddToDataDirLockFile(int target_line, const char *str)
DIRECTORY_LOCK_FILE)));
return;
}
pgstat_report_wait_start(WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ);
len = read(fd, srcbuffer, sizeof(srcbuffer) - 1);
pgstat_report_wait_end();
if (len < 0)
{
ereport(LOG,
......@@ -1217,9 +1227,11 @@ AddToDataDirLockFile(int target_line, const char *str)
*/
len = strlen(destbuffer);
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE);
if (lseek(fd, (off_t) 0, SEEK_SET) != 0 ||
(int) write(fd, destbuffer, len) != len)
{
pgstat_report_wait_end();
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
......@@ -1230,6 +1242,8 @@ AddToDataDirLockFile(int target_line, const char *str)
close(fd);
return;
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC);
if (pg_fsync(fd) != 0)
{
ereport(LOG,
......@@ -1237,6 +1251,7 @@ AddToDataDirLockFile(int target_line, const char *str)
errmsg("could not write to file \"%s\": %m",
DIRECTORY_LOCK_FILE)));
}
pgstat_report_wait_end();
if (close(fd) != 0)
{
ereport(LOG,
......@@ -1293,7 +1308,9 @@ RecheckDataDirLockFile(void)
return true;
}
}
pgstat_report_wait_start(WAIT_EVENT_LOCK_FILE_RECHECKDATADIR_READ);
len = read(fd, buffer, sizeof(buffer) - 1);
pgstat_report_wait_end();
if (len < 0)
{
ereport(LOG,
......
......@@ -723,6 +723,7 @@ typedef enum BackendState
#define PG_WAIT_EXTENSION 0x07000000U
#define PG_WAIT_IPC 0x08000000U
#define PG_WAIT_TIMEOUT 0x09000000U
#define PG_WAIT_IO 0x0A000000U
/* ----------
* Wait Events - Activity
......@@ -805,6 +806,83 @@ typedef enum
WAIT_EVENT_RECOVERY_APPLY_DELAY
} WaitEventTimeout;
/* ----------
* Wait Events - IO
*
* Use this category when a process is waiting for a IO.
* ----------
*/
typedef enum
{
WAIT_EVENT_BUFFILE_READ,
WAIT_EVENT_BUFFILE_WRITE,
WAIT_EVENT_CONTROL_FILE_READ,
WAIT_EVENT_CONTROL_FILE_SYNC,
WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE,
WAIT_EVENT_CONTROL_FILE_WRITE,
WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE,
WAIT_EVENT_COPY_FILE_READ,
WAIT_EVENT_COPY_FILE_WRITE,
WAIT_EVENT_DATA_FILE_EXTEND,
WAIT_EVENT_DATA_FILE_FLUSH,
WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC,
WAIT_EVENT_DATA_FILE_PREFETCH,
WAIT_EVENT_DATA_FILE_READ = PG_WAIT_IO,
WAIT_EVENT_DATA_FILE_SYNC,
WAIT_EVENT_DATA_FILE_TRUNCATE,
WAIT_EVENT_DATA_FILE_WRITE,
WAIT_EVENT_DSM_FILL_ZERO_WRITE,
WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ,
WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC,
WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE,
WAIT_EVENT_LOCK_FILE_CREATE_READ,
WAIT_EVENT_LOCK_FILE_CREATE_SYNC,
WAIT_EVENT_LOCK_FILE_CREATE_WRITE,
WAIT_EVENT_LOCK_FILE_RECHECKDATADIR_READ,
WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC,
WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC,
WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE,
WAIT_EVENT_LOGICAL_REWRITE_SYNC,
WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE,
WAIT_EVENT_LOGICAL_REWRITE_WRITE,
WAIT_EVENT_RELATION_MAP_READ,
WAIT_EVENT_RELATION_MAP_SYNC,
WAIT_EVENT_RELATION_MAP_WRITE,
WAIT_EVENT_REORDER_BUFFER_READ,
WAIT_EVENT_REORDER_BUFFER_WRITE,
WAIT_EVENT_REORDER_LOGICAL_MAPPING_READ,
WAIT_EVENT_REPLICATION_SLOT_READ,
WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC,
WAIT_EVENT_REPLICATION_SLOT_SYNC,
WAIT_EVENT_REPLICATION_SLOT_WRITE,
WAIT_EVENT_SLRU_FLUSH_SYNC,
WAIT_EVENT_SLRU_READ,
WAIT_EVENT_SLRU_SYNC,
WAIT_EVENT_SLRU_WRITE,
WAIT_EVENT_SNAPBUILD_READ,
WAIT_EVENT_SNAPBUILD_SYNC,
WAIT_EVENT_SNAPBUILD_WRITE,
WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC,
WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE,
WAIT_EVENT_TIMELINE_HISTORY_READ,
WAIT_EVENT_TIMELINE_HISTORY_SYNC,
WAIT_EVENT_TIMELINE_HISTORY_WRITE,
WAIT_EVENT_TWOPHASE_FILE_READ,
WAIT_EVENT_TWOPHASE_FILE_SYNC,
WAIT_EVENT_TWOPHASE_FILE_WRITE,
WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ,
WAIT_EVENT_WAL_BOOTSTRAP_SYNC,
WAIT_EVENT_WAL_BOOTSTRAP_WRITE,
WAIT_EVENT_WAL_COPY_READ,
WAIT_EVENT_WAL_COPY_SYNC,
WAIT_EVENT_WAL_COPY_WRITE,
WAIT_EVENT_WAL_INIT_SYNC,
WAIT_EVENT_WAL_INIT_WRITE,
WAIT_EVENT_WAL_READ,
WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN,
WAIT_EVENT_WAL_WRITE
} WaitEventIO;
/* ----------
* Command type for progress reporting purposes
* ----------
......
......@@ -68,13 +68,13 @@ extern int max_safe_fds;
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
extern File OpenTemporaryFile(bool interXact);
extern void FileClose(File file);
extern int FilePrefetch(File file, off_t offset, int amount);
extern int FileRead(File file, char *buffer, int amount);
extern int FileWrite(File file, char *buffer, int amount);
extern int FileSync(File file);
extern int FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info);
extern int FileRead(File file, char *buffer, int amount, uint32 wait_event_info);
extern int FileWrite(File file, char *buffer, int amount, uint32 wait_event_info);
extern int FileSync(File file, uint32 wait_event_info);
extern off_t FileSeek(File file, off_t offset, int whence);
extern int FileTruncate(File file, off_t offset);
extern void FileWriteback(File file, off_t offset, off_t nbytes);
extern int FileTruncate(File file, off_t offset, uint32 wait_event_info);
extern void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info);
extern char *FilePathName(File file);
extern int FileGetRawDesc(File file);
extern int FileGetRawFlags(File file);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment