Commit e57cd7f0 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Change the logic to decide when to delete old WAL segments, so that it

doesn't take into account how far the WAL senders are. This way a hung
WAL sender doesn't prevent old WAL segments from being recycled/removed
in the primary, ultimately causing the disk to fill up. Instead add
standby_keep_segments setting to control how many old WAL segments are
kept in the primary. This also makes it more reliable to use streaming
replication without WAL archiving, assuming that you set
standby_keep_segments high enough.
parent 93f35f09
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.262 2010/04/03 07:22:53 petere Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.263 2010/04/12 09:52:29 heikki Exp $ -->
<chapter Id="runtime-config">
<title>Server Configuration</title>
......@@ -1823,6 +1823,34 @@ archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-standby-keep-segments" xreflabel="standby_keep_segments">
<term><varname>standby_keep_segments</varname> (<type>integer</type>)</term>
<indexterm>
<primary><varname>standby_keep_segments</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
Specifies the number of log file segments kept in <filename>pg_xlog</>
directory, in case a standby server needs to fetch them via streaming
replciation. Each segment is normally 16 megabytes. If a standby
server connected to the primary falls behind more than
<varname>standby_keep_segments</> segments, the primary might remove
a WAL segment still needed by the standby and the replication
connection will be terminated.
This sets only the minimum number of segments retained for standby
purposes, the system might need to retain more segments for WAL
archival or to recover from a checkpoint. If <varname>standby_keep_segments</>
is zero (the default), the system doesn't keep any extra segments
for standby purposes, and the number of old WAL segments available
for standbys is determined based only on the location of the previous
checkpoint and status of WAL archival.
This parameter can only be set in the <filename>postgresql.conf</>
file or on the server command line.
</para>
</listitem>
</varlistentry>
</variablelist>
</sect2>
<sect2 id="runtime-config-standby">
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/high-availability.sgml,v 1.58 2010/04/03 07:22:54 petere Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/high-availability.sgml,v 1.59 2010/04/12 09:52:29 heikki Exp $ -->
<chapter id="high-availability">
<title>High Availability, Load Balancing, and Replication</title>
......@@ -732,7 +732,12 @@ trigger_file = '/path/to/trigger_file'
Streaming replication relies on file-based continuous archiving for
making the base backup and for allowing the standby to catch up if it is
disconnected from the primary for long enough for the primary to
delete old WAL files still required by the standby.
delete old WAL files still required by the standby. It is possible
to use streaming replication without WAL archiving, but if a standby
falls behind too much, the primary will delete old WAL files still
needed by the standby, and the standby will have to be manually restored
from a base backup. You can control how long the primary retains old WAL
segments using the <varname>standby_keep_segments</> setting.
</para>
<para>
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.391 2010/04/07 10:58:49 heikki Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.392 2010/04/12 09:52:29 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -66,6 +66,7 @@
/* User-settable parameters */
int CheckPointSegments = 3;
int StandbySegments = 0;
int XLOGbuffers = 8;
int XLogArchiveTimeout = 0;
bool XLogArchiveMode = false;
......@@ -356,6 +357,8 @@ typedef struct XLogCtlData
uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */
TransactionId ckptXid;
XLogRecPtr asyncCommitLSN; /* LSN of newest async commit */
uint32 lastRemovedLog; /* latest removed/recycled XLOG segment */
uint32 lastRemovedSeg;
/* Protected by WALWriteLock: */
XLogCtlWrite Write;
......@@ -3149,6 +3152,22 @@ PreallocXlogFiles(XLogRecPtr endptr)
}
}
/*
* Get the log/seg of the latest removed or recycled WAL segment.
* Returns 0 if no WAL segments have been removed since startup.
*/
void
XLogGetLastRemoved(uint32 *log, uint32 *seg)
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
SpinLockAcquire(&xlogctl->info_lck);
*log = xlogctl->lastRemovedLog;
*seg = xlogctl->lastRemovedSeg;
SpinLockRelease(&xlogctl->info_lck);
}
/*
* Recycle or remove all log files older or equal to passed log/seg#
*
......@@ -3170,6 +3189,20 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
char newpath[MAXPGPATH];
#endif
struct stat statbuf;
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
/* Update the last removed location in shared memory first */
SpinLockAcquire(&xlogctl->info_lck);
if (log > xlogctl->lastRemovedLog ||
(log == xlogctl->lastRemovedLog && seg > xlogctl->lastRemovedSeg))
{
xlogctl->lastRemovedLog = log;
xlogctl->lastRemovedSeg = seg;
}
SpinLockRelease(&xlogctl->info_lck);
elog(DEBUG1, "removing WAL segments older than %X/%X", log, seg);
/*
* Initialize info about where to try to recycle to. We allow recycling
......@@ -7172,36 +7205,51 @@ CreateCheckPoint(int flags)
smgrpostckpt();
/*
* If there's connected standby servers doing XLOG streaming, don't delete
* XLOG files that have not been streamed to all of them yet. This does
* nothing to prevent them from being deleted when the standby is
* disconnected (e.g because of network problems), but at least it avoids
* an open replication connection from failing because of that.
* Delete old log files (those no longer needed even for previous
* checkpoint or the standbys in XLOG streaming).
*/
if ((_logId || _logSeg) && max_wal_senders > 0)
if (_logId || _logSeg)
{
XLogRecPtr oldest;
uint32 log;
uint32 seg;
oldest = GetOldestWALSendPointer();
if (oldest.xlogid != 0 || oldest.xrecoff != 0)
/*
* Calculate the last segment that we need to retain because of
* standby_keep_segments, by subtracting StandbySegments from the
* new checkpoint location.
*/
if (StandbySegments > 0)
{
XLByteToSeg(oldest, log, seg);
uint32 log;
uint32 seg;
int d_log;
int d_seg;
XLByteToSeg(recptr, log, seg);
d_seg = StandbySegments % XLogSegsPerFile;
d_log = StandbySegments / XLogSegsPerFile;
if (seg < d_seg)
{
d_log += 1;
seg = seg - d_seg + XLogSegsPerFile;
}
else
seg = seg - d_seg;
/* avoid underflow, don't go below (0,1) */
if (log < d_log || (log == d_log && seg == 0))
{
log = 0;
seg = 1;
}
else
log = log - d_log;
/* don't delete WAL segments newer than the calculated segment */
if (log < _logId || (log == _logId && seg < _logSeg))
{
_logId = log;
_logSeg = seg;
}
}
}
/*
* Delete old log files (those no longer needed even for previous
* checkpoint or the standbys in XLOG streaming).
*/
if (_logId || _logSeg)
{
PrevLogSeg(_logId, _logSeg);
RemoveOldXlogFiles(_logId, _logSeg, recptr);
}
......
......@@ -30,7 +30,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.14 2010/04/01 00:43:29 rhaas Exp $
* $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.15 2010/04/12 09:52:29 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -508,6 +508,10 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
{
char path[MAXPGPATH];
uint32 startoff;
uint32 lastRemovedLog;
uint32 lastRemovedSeg;
uint32 log;
uint32 seg;
while (nbytes > 0)
{
......@@ -527,10 +531,27 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
if (sendFile < 0)
ereport(FATAL, /* XXX: Why FATAL? */
(errcode_for_file_access(),
errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
path, sendId, sendSeg)));
{
/*
* If the file is not found, assume it's because the
* standby asked for a too old WAL segment that has already
* been removed or recycled.
*/
if (errno == ENOENT)
{
char filename[MAXFNAMELEN];
XLogFileName(filename, ThisTimeLineID, sendId, sendSeg);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("requested WAL segment %s has already been removed",
filename)));
}
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
path, sendId, sendSeg)));
}
sendOff = 0;
}
......@@ -538,7 +559,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
if (sendOff != startoff)
{
if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
ereport(FATAL,
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not seek in log file %u, segment %u to offset %u: %m",
sendId, sendSeg, startoff)));
......@@ -553,7 +574,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
readbytes = read(sendFile, buf, segbytes);
if (readbytes <= 0)
ereport(FATAL,
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from log file %u, segment %u, offset %u, "
"length %lu: %m",
......@@ -566,6 +587,26 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
nbytes -= readbytes;
buf += readbytes;
}
/*
* After reading into the buffer, check that what we read was valid.
* We do this after reading, because even though the segment was present
* when we opened it, it might get recycled or removed while we read it.
* The read() succeeds in that case, but the data we tried to read might
* already have been overwritten with new WAL records.
*/
XLogGetLastRemoved(&lastRemovedLog, &lastRemovedSeg);
XLByteToPrevSeg(recptr, log, seg);
if (log < lastRemovedLog ||
(log == lastRemovedLog && seg <= lastRemovedSeg))
{
char filename[MAXFNAMELEN];
XLogFileName(filename, ThisTimeLineID, log, seg);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("requested WAL segment %s has already been removed",
filename)));
}
}
/*
......@@ -801,6 +842,12 @@ WalSndShmemInit(void)
}
}
/*
* This isn't currently used for anything. Monitoring tools might be
* interested in the future, and we'll need something like this in the
* future for synchronous replication.
*/
#ifdef NOT_USED
/*
* Returns the oldest Send position among walsenders. Or InvalidXLogRecPtr
* if none.
......@@ -834,3 +881,4 @@ GetOldestWALSendPointer(void)
}
return oldest;
}
#endif
......@@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.546 2010/04/01 00:43:29 rhaas Exp $
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.547 2010/04/12 09:52:29 heikki Exp $
*
*--------------------------------------------------------------------
*/
......@@ -1647,6 +1647,15 @@ static struct config_int ConfigureNamesInt[] =
0, 0, 60, NULL, NULL
},
{
{"standby_keep_segments", PGC_SIGHUP, WAL_CHECKPOINTS,
gettext_noop("Sets the number of WAL files held for standby servers"),
NULL
},
&StandbySegments,
0, 0, INT_MAX, NULL, NULL
},
{
{"checkpoint_segments", PGC_SIGHUP, WAL_CHECKPOINTS,
gettext_noop("Sets the maximum distance in log segments between automatic WAL checkpoints."),
......
......@@ -193,6 +193,7 @@
#max_wal_senders = 0 # max number of walsender processes
#wal_sender_delay = 200ms # 1-10000 milliseconds
#standby_keep_segments = 0 # in logfile segments, 16MB each; 0 disables
#------------------------------------------------------------------------------
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.105 2010/04/01 00:43:29 rhaas Exp $
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.106 2010/04/12 09:52:29 heikki Exp $
*/
#ifndef XLOG_H
#define XLOG_H
......@@ -187,6 +187,7 @@ extern XLogRecPtr XactLastRecEnd;
/* these variables are GUC parameters related to XLOG */
extern int CheckPointSegments;
extern int StandbySegments;
extern int XLOGbuffers;
extern bool XLogArchiveMode;
extern char *XLogArchiveCommand;
......@@ -267,6 +268,7 @@ extern int XLogFileInit(uint32 log, uint32 seg,
extern int XLogFileOpen(uint32 log, uint32 seg);
extern void XLogGetLastRemoved(uint32 *log, uint32 *seg);
extern void XLogSetAsyncCommitLSN(XLogRecPtr record);
extern void RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment