Commit 475861b2 authored by Thomas Munro's avatar Thomas Munro

Add wal_recycle and wal_init_zero GUCs.

On at least ZFS, it can be beneficial to create new WAL files every
time and not to bother zero-filling them.  Since it's not clear which
other filesystems might benefit from one or both of those things,
add individual GUCs to control those two behaviors independently and
make only very general statements in the docs.

Author: Jerry Jelinek, with some adjustments by Thomas Munro
Reviewed-by: Alvaro Herrera, Andres Freund, Tomas Vondra, Robert Haas and others
Discussion: https://postgr.es/m/CACPQ5Fo00QR7LNAcd1ZjgoBi4y97%2BK760YABs0vQHH5dLdkkMA%40mail.gmail.com
parent 4b826641
......@@ -3590,6 +3590,41 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
</listitem>
</varlistentry>
<varlistentry id="guc-wal-init-zero" xreflabel="wal_init_zero">
<term><varname>wal_init_zero</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>wal_init_zero</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
If set to <literal>on</literal> (the default), this option causes new
WAL files to be filled with zeroes. On some filesystems, this ensures
that space is allocated before we need to write WAL records. However,
<firstterm>Copy-On-Write</firstterm> (COW) filesystems may not benefit
from this technique, so the option is given to skip the unnecessary
work. If set to <literal>off</literal>, only the final byte is written
when the file is created so that it has the expected size.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-wal-recycle" xreflabel="wal_recycle">
<term><varname>wal_recycle</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>wal_recycle</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
If set to <literal>on</literal> (the default), this option causes WAL
files to be recycled by renaming them, avoiding the need to create new
ones. On COW filesystems, it may be faster to create new ones, so the
option is given to disable this behavior.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-wal-sender-timeout" xreflabel="wal_sender_timeout">
<term><varname>wal_sender_timeout</varname> (<type>integer</type>)
<indexterm>
......
......@@ -95,6 +95,8 @@ bool wal_log_hints = false;
bool wal_compression = false;
char *wal_consistency_checking_string = NULL;
bool *wal_consistency_checking = NULL;
bool wal_init_zero = true;
bool wal_recycle = true;
bool log_checkpoints = false;
int sync_method = DEFAULT_SYNC_METHOD;
int wal_level = WAL_LEVEL_MINIMAL;
......@@ -3209,6 +3211,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
XLogSegNo max_segno;
int fd;
int nbytes;
int save_errno;
XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size);
......@@ -3248,39 +3251,61 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", tmppath)));
/*
* Zero-fill the file. We have to do this the hard way to ensure that all
* the file space has really been allocated --- on platforms that allow
* "holes" in files, just seeking to the end doesn't allocate intermediate
* space. This way, we know that we have all the space and (after the
* fsync below) that all the indirect blocks are down on disk. Therefore,
* fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
* log file.
*/
memset(zbuffer.data, 0, XLOG_BLCKSZ);
for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ)
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
save_errno = 0;
if (wal_init_zero)
{
/*
* Zero-fill the file. With this setting, we do this the hard way to
* ensure that all the file space has really been allocated. On
* platforms that allow "holes" in files, just seeking to the end
* doesn't allocate intermediate space. This way, we know that we
* have all the space and (after the fsync below) that all the
* indirect blocks are down on disk. Therefore, fdatasync(2) or
* O_DSYNC will be sufficient to sync future writes to the log file.
*/
for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ)
{
errno = 0;
if (write(fd, zbuffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
/* if write didn't set errno, assume no disk space */
save_errno = errno ? errno : ENOSPC;
break;
}
}
}
else
{
/*
* Otherwise, seeking to the end and writing a solitary byte is
* enough.
*/
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1)
{
int save_errno = errno;
/* if write didn't set errno, assume no disk space */
save_errno = errno ? errno : ENOSPC;
}
}
pgstat_report_wait_end();
/*
* If we fail to make the file, delete it to release disk space
*/
unlink(tmppath);
if (save_errno)
{
/*
* If we fail to make the file, delete it to release disk space
*/
unlink(tmppath);
close(fd);
close(fd);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
......@@ -4049,14 +4074,19 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
XLogSegNo endlogSegNo;
XLogSegNo recycleSegNo;
/*
* Initialize info about where to try to recycle to.
*/
XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
if (RedoRecPtr == InvalidXLogRecPtr)
recycleSegNo = endlogSegNo + 10;
if (wal_recycle)
{
/*
* Initialize info about where to try to recycle to.
*/
XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
if (RedoRecPtr == InvalidXLogRecPtr)
recycleSegNo = endlogSegNo + 10;
else
recycleSegNo = XLOGfileslop(RedoRecPtr);
}
else
recycleSegNo = XLOGfileslop(RedoRecPtr);
recycleSegNo = 0; /* keep compiler quiet */
snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
......@@ -4065,7 +4095,8 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
* segment. Only recycle normal files, pg_standby for example can create
* symbolic links pointing to a separate archive directory.
*/
if (endlogSegNo <= recycleSegNo &&
if (wal_recycle &&
endlogSegNo <= recycleSegNo &&
lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
InstallXLogFileSegment(&endlogSegNo, path,
true, recycleSegNo, true))
......
......@@ -1174,6 +1174,26 @@ static struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
{
{"wal_init_zero", PGC_SUSET, WAL_SETTINGS,
gettext_noop("Writes zeroes to new WAL files before first use."),
NULL
},
&wal_init_zero,
true,
NULL, NULL, NULL
},
{
{"wal_recycle", PGC_SUSET, WAL_SETTINGS,
gettext_noop("Recycles WAL files by renaming them."),
NULL
},
&wal_recycle,
true,
NULL, NULL, NULL
},
{
{"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT,
gettext_noop("Logs each checkpoint."),
......
......@@ -206,6 +206,8 @@
#wal_compression = off # enable compression of full-page writes
#wal_log_hints = off # also do full page writes of non-critical updates
# (change requires restart)
#wal_init_zero = on # zero-fill new WAL files
#wal_recycle = on # recycle WAL files
#wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers
# (change requires restart)
#wal_writer_delay = 200ms # 1-10000 milliseconds
......
......@@ -116,6 +116,8 @@ extern bool EnableHotStandby;
extern bool fullPageWrites;
extern bool wal_log_hints;
extern bool wal_compression;
extern bool wal_init_zero;
extern bool wal_recycle;
extern bool *wal_consistency_checking;
extern char *wal_consistency_checking_string;
extern bool log_checkpoints;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment