Commit 6ef2eba3 authored by Andres Freund's avatar Andres Freund

Skip checkpoints, archiving on idle systems.

Some background activity (like checkpoints, archive timeout, standby
snapshots) is not supposed to happen on an idle system. Unfortunately
so far it was not easy to determine when a system is idle, which
defeated some of the attempts to avoid redundant activity on an idle
system.

To make that easier, allow to make individual WAL insertions as not
being "important". By checking whether any important activity happened
since the last time an activity was performed, it now is easy to check
whether some action needs to be repeated.

Use the new facility for checkpoints, archive timeout and standby
snapshots.

The lack of a facility causes some issues in older releases, but in my
opinion the consequences (superflous checkpoints / archived segments)
aren't grave enough to warrant backpatching.

Author: Michael Paquier, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Amit Kapila, Kyotaro HORIGUCHI
Bug: #13685
Discussion:
    https://www.postgresql.org/message-id/20151016203031.3019.72930@wrigleys.postgresql.org
    https://www.postgresql.org/message-id/CAB7nPqQcPqxEM3S735Bd2RzApNqSNJVietAC=6kfkYv_45dKwA@mail.gmail.com
Backpatch: -
parent 097e4143
...@@ -2852,12 +2852,10 @@ include_dir 'conf.d' ...@@ -2852,12 +2852,10 @@ include_dir 'conf.d'
parameter is greater than zero, the server will switch to a new parameter is greater than zero, the server will switch to a new
segment file whenever this many seconds have elapsed since the last segment file whenever this many seconds have elapsed since the last
segment file switch, and there has been any database activity, segment file switch, and there has been any database activity,
including a single checkpoint. (Increasing including a single checkpoint (checkpoints are skipped if there is
<varname>checkpoint_timeout</> will reduce unnecessary no database activity). Note that archived files that are closed
checkpoints on an idle system.) early due to a forced switch are still the same length as completely
Note that archived files that are closed early full files. Therefore, it is unwise to use a very short
due to a forced switch are still the same length as completely full
files. Therefore, it is unwise to use a very short
<varname>archive_timeout</> &mdash; it will bloat your archive <varname>archive_timeout</> &mdash; it will bloat your archive
storage. <varname>archive_timeout</> settings of a minute or so are storage. <varname>archive_timeout</> settings of a minute or so are
usually reasonable. You should consider using streaming replication, usually reasonable. You should consider using streaming replication,
......
...@@ -2507,7 +2507,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, ...@@ -2507,7 +2507,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
heaptup->t_len - SizeofHeapTupleHeader); heaptup->t_len - SizeofHeapTupleHeader);
/* filtering by origin on a row level is much more efficient */ /* filtering by origin on a row level is much more efficient */
XLogIncludeOrigin(); XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
recptr = XLogInsert(RM_HEAP_ID, info); recptr = XLogInsert(RM_HEAP_ID, info);
...@@ -2846,7 +2846,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, ...@@ -2846,7 +2846,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
XLogRegisterBufData(0, tupledata, totaldatalen); XLogRegisterBufData(0, tupledata, totaldatalen);
/* filtering by origin on a row level is much more efficient */ /* filtering by origin on a row level is much more efficient */
XLogIncludeOrigin(); XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
recptr = XLogInsert(RM_HEAP2_ID, info); recptr = XLogInsert(RM_HEAP2_ID, info);
...@@ -3308,7 +3308,7 @@ l1: ...@@ -3308,7 +3308,7 @@ l1:
} }
/* filtering by origin on a row level is much more efficient */ /* filtering by origin on a row level is much more efficient */
XLogIncludeOrigin(); XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE); recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
...@@ -6035,7 +6035,7 @@ heap_finish_speculative(Relation relation, HeapTuple tuple) ...@@ -6035,7 +6035,7 @@ heap_finish_speculative(Relation relation, HeapTuple tuple)
XLogBeginInsert(); XLogBeginInsert();
/* We want the same filtering on this as on a plain insert */ /* We want the same filtering on this as on a plain insert */
XLogIncludeOrigin(); XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm); XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
...@@ -7703,7 +7703,7 @@ log_heap_update(Relation reln, Buffer oldbuf, ...@@ -7703,7 +7703,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
} }
/* filtering by origin on a row level is much more efficient */ /* filtering by origin on a row level is much more efficient */
XLogIncludeOrigin(); XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
recptr = XLogInsert(RM_HEAP_ID, info); recptr = XLogInsert(RM_HEAP_ID, info);
......
...@@ -5234,7 +5234,7 @@ XactLogCommitRecord(TimestampTz commit_time, ...@@ -5234,7 +5234,7 @@ XactLogCommitRecord(TimestampTz commit_time,
XLogRegisterData((char *) (&xl_origin), sizeof(xl_xact_origin)); XLogRegisterData((char *) (&xl_origin), sizeof(xl_xact_origin));
/* we allow filtering by xacts */ /* we allow filtering by xacts */
XLogIncludeOrigin(); XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
return XLogInsert(RM_XACT_ID, info); return XLogInsert(RM_XACT_ID, info);
} }
......
...@@ -442,11 +442,21 @@ typedef struct XLogwrtResult ...@@ -442,11 +442,21 @@ typedef struct XLogwrtResult
* the WAL record is just copied to the page and the lock is released. But * the WAL record is just copied to the page and the lock is released. But
* to avoid the deadlock-scenario explained above, the indicator is always * to avoid the deadlock-scenario explained above, the indicator is always
* updated before sleeping while holding an insertion lock. * updated before sleeping while holding an insertion lock.
*
* lastImportantAt contains the LSN of the last important WAL record inserted
* using a given lock. This value is used to detect if there has been
* important WAL activity since the last time some action, like a checkpoint,
* was performed - allowing to not repeat the action if not. The LSN is
* updated for all insertions, unless the XLOG_MARK_UNIMPORTANT flag was
* set. lastImportantAt is never cleared, only overwritten by the LSN of newer
* records. Tracking the WAL activity directly in WALInsertLock has the
* advantage of not needing any additional locks to update the value.
*/ */
typedef struct typedef struct
{ {
LWLock lock; LWLock lock;
XLogRecPtr insertingAt; XLogRecPtr insertingAt;
XLogRecPtr lastImportantAt;
} WALInsertLock; } WALInsertLock;
/* /*
...@@ -541,8 +551,9 @@ typedef struct XLogCtlData ...@@ -541,8 +551,9 @@ typedef struct XLogCtlData
XLogRecPtr unloggedLSN; XLogRecPtr unloggedLSN;
slock_t ulsn_lck; slock_t ulsn_lck;
/* Time of last xlog segment switch. Protected by WALWriteLock. */ /* Time and LSN of last xlog segment switch. Protected by WALWriteLock. */
pg_time_t lastSegSwitchTime; pg_time_t lastSegSwitchTime;
XLogRecPtr lastSegSwitchLSN;
/* /*
* Protected by info_lck and WALWriteLock (you must hold either lock to * Protected by info_lck and WALWriteLock (you must hold either lock to
...@@ -884,6 +895,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); ...@@ -884,6 +895,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
* which pages need a full-page image, and retry. If fpw_lsn is invalid, the * which pages need a full-page image, and retry. If fpw_lsn is invalid, the
* record is always inserted. * record is always inserted.
* *
* 'flags' gives more in-depth control on the record being inserted. See
* XLogSetRecordFlags() for details.
*
* The first XLogRecData in the chain must be for the record header, and its * The first XLogRecData in the chain must be for the record header, and its
* data must be MAXALIGNed. XLogInsertRecord fills in the xl_prev and * data must be MAXALIGNed. XLogInsertRecord fills in the xl_prev and
* xl_crc fields in the header, the rest of the header must already be filled * xl_crc fields in the header, the rest of the header must already be filled
...@@ -896,7 +910,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); ...@@ -896,7 +910,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
* WAL rule "write the log before the data".) * WAL rule "write the log before the data".)
*/ */
XLogRecPtr XLogRecPtr
XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) XLogInsertRecord(XLogRecData *rdata,
XLogRecPtr fpw_lsn,
uint8 flags)
{ {
XLogCtlInsert *Insert = &XLogCtl->Insert; XLogCtlInsert *Insert = &XLogCtl->Insert;
pg_crc32c rdata_crc; pg_crc32c rdata_crc;
...@@ -1013,6 +1029,18 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) ...@@ -1013,6 +1029,18 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
*/ */
CopyXLogRecordToWAL(rechdr->xl_tot_len, isLogSwitch, rdata, CopyXLogRecordToWAL(rechdr->xl_tot_len, isLogSwitch, rdata,
StartPos, EndPos); StartPos, EndPos);
/*
* Unless record is flagged as not important, update LSN of last
* important record in the current slot. When holding all locks, just
* update the first one.
*/
if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
{
int lockno = holdingAllLocks ? 0 : MyLockNo;
WALInsertLocks[lockno].l.lastImportantAt = StartPos;
}
} }
else else
{ {
...@@ -2332,6 +2360,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) ...@@ -2332,6 +2360,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
XLogArchiveNotifySeg(openLogSegNo); XLogArchiveNotifySeg(openLogSegNo);
XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL); XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
XLogCtl->lastSegSwitchLSN = LogwrtResult.Flush;
/* /*
* Request a checkpoint if we've consumed too much xlog since * Request a checkpoint if we've consumed too much xlog since
...@@ -4715,6 +4744,7 @@ XLOGShmemInit(void) ...@@ -4715,6 +4744,7 @@ XLOGShmemInit(void)
{ {
LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT); LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT);
WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr; WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr;
WALInsertLocks[i].l.lastImportantAt = InvalidXLogRecPtr;
} }
/* /*
...@@ -7431,8 +7461,9 @@ StartupXLOG(void) ...@@ -7431,8 +7461,9 @@ StartupXLOG(void)
*/ */
InRecovery = false; InRecovery = false;
/* start the archive_timeout timer running */ /* start the archive_timeout timer and LSN running */
XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL); XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
XLogCtl->lastSegSwitchLSN = EndOfLog;
/* also initialize latestCompletedXid, to nextXid - 1 */ /* also initialize latestCompletedXid, to nextXid - 1 */
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
...@@ -7994,16 +8025,51 @@ GetFlushRecPtr(void) ...@@ -7994,16 +8025,51 @@ GetFlushRecPtr(void)
} }
/* /*
* Get the time of the last xlog segment switch * GetLastImportantRecPtr -- Returns the LSN of the last important record
* inserted. All records not explicitly marked as unimportant are considered
* important.
*
* The LSN is determined by computing the maximum of
* WALInsertLocks[i].lastImportantAt.
*/
XLogRecPtr
GetLastImportantRecPtr(void)
{
XLogRecPtr res = InvalidXLogRecPtr;
int i;
for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
{
XLogRecPtr last_important;
/*
* Need to take a lock to prevent torn reads of the LSN, which are
* possible on some of the supported platforms. WAL insert locks only
* support exclusive mode, so we have to use that.
*/
LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE);
last_important = WALInsertLocks[i].l.lastImportantAt;
LWLockRelease(&WALInsertLocks[i].l.lock);
if (res < last_important)
res = last_important;
}
return res;
}
/*
* Get the time and LSN of the last xlog segment switch
*/ */
pg_time_t pg_time_t
GetLastSegSwitchTime(void) GetLastSegSwitchData(XLogRecPtr *lastSwitchLSN)
{ {
pg_time_t result; pg_time_t result;
/* Need WALWriteLock, but shared lock is sufficient */ /* Need WALWriteLock, but shared lock is sufficient */
LWLockAcquire(WALWriteLock, LW_SHARED); LWLockAcquire(WALWriteLock, LW_SHARED);
result = XLogCtl->lastSegSwitchTime; result = XLogCtl->lastSegSwitchTime;
*lastSwitchLSN = XLogCtl->lastSegSwitchLSN;
LWLockRelease(WALWriteLock); LWLockRelease(WALWriteLock);
return result; return result;
...@@ -8065,7 +8131,7 @@ ShutdownXLOG(int code, Datum arg) ...@@ -8065,7 +8131,7 @@ ShutdownXLOG(int code, Datum arg)
* record will go to the next XLOG file and won't be archived (yet). * record will go to the next XLOG file and won't be archived (yet).
*/ */
if (XLogArchivingActive() && XLogArchiveCommandSet()) if (XLogArchivingActive() && XLogArchiveCommandSet())
RequestXLogSwitch(); RequestXLogSwitch(false);
CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
} }
...@@ -8253,7 +8319,7 @@ CreateCheckPoint(int flags) ...@@ -8253,7 +8319,7 @@ CreateCheckPoint(int flags)
uint32 freespace; uint32 freespace;
XLogRecPtr PriorRedoPtr; XLogRecPtr PriorRedoPtr;
XLogRecPtr curInsert; XLogRecPtr curInsert;
XLogRecPtr prevPtr; XLogRecPtr last_important_lsn;
VirtualTransactionId *vxids; VirtualTransactionId *vxids;
int nvxids; int nvxids;
...@@ -8333,39 +8399,34 @@ CreateCheckPoint(int flags) ...@@ -8333,39 +8399,34 @@ CreateCheckPoint(int flags)
else else
checkPoint.oldestActiveXid = InvalidTransactionId; checkPoint.oldestActiveXid = InvalidTransactionId;
/*
* Get location of last important record before acquiring insert locks (as
* GetLastImportantRecPtr() also locks WAL locks).
*/
last_important_lsn = GetLastImportantRecPtr();
/* /*
* We must block concurrent insertions while examining insert state to * We must block concurrent insertions while examining insert state to
* determine the checkpoint REDO pointer. * determine the checkpoint REDO pointer.
*/ */
WALInsertLockAcquireExclusive(); WALInsertLockAcquireExclusive();
curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos); curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
prevPtr = XLogBytePosToRecPtr(Insert->PrevBytePos);
/* /*
* If this isn't a shutdown or forced checkpoint, and we have not inserted * If this isn't a shutdown or forced checkpoint, and if there has been no
* any XLOG records since the start of the last checkpoint, skip the * WAL activity requiring a checkpoint, skip it. The idea here is to
* checkpoint. The idea here is to avoid inserting duplicate checkpoints * avoid inserting duplicate checkpoints when the system is idle.
* when the system is idle. That wastes log space, and more importantly it
* exposes us to possible loss of both current and previous checkpoint
* records if the machine crashes just as we're writing the update.
* (Perhaps it'd make even more sense to checkpoint only when the previous
* checkpoint record is in a different xlog page?)
*
* If the previous checkpoint crossed a WAL segment, however, we create
* the checkpoint anyway, to have the latest checkpoint fully contained in
* the new segment. This is for a little bit of extra robustness: it's
* better if you don't need to keep two WAL segments around to recover the
* checkpoint.
*/ */
if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY | if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
CHECKPOINT_FORCE)) == 0) CHECKPOINT_FORCE)) == 0)
{ {
if (prevPtr == ControlFile->checkPointCopy.redo && if (last_important_lsn == ControlFile->checkPoint)
prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE)
{ {
WALInsertLockRelease(); WALInsertLockRelease();
LWLockRelease(CheckpointLock); LWLockRelease(CheckpointLock);
END_CRIT_SECTION(); END_CRIT_SECTION();
ereport(DEBUG1,
(errmsg("checkpoint skipped due to an idle system")));
return; return;
} }
} }
...@@ -9122,12 +9183,15 @@ XLogPutNextOid(Oid nextOid) ...@@ -9122,12 +9183,15 @@ XLogPutNextOid(Oid nextOid)
* write a switch record because we are already at segment start. * write a switch record because we are already at segment start.
*/ */
XLogRecPtr XLogRecPtr
RequestXLogSwitch(void) RequestXLogSwitch(bool mark_unimportant)
{ {
XLogRecPtr RecPtr; XLogRecPtr RecPtr;
/* XLOG SWITCH has no data */ /* XLOG SWITCH has no data */
XLogBeginInsert(); XLogBeginInsert();
if (mark_unimportant)
XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH); RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
return RecPtr; return RecPtr;
...@@ -9997,7 +10061,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, ...@@ -9997,7 +10061,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
* recovery case described above. * recovery case described above.
*/ */
if (!backup_started_in_recovery) if (!backup_started_in_recovery)
RequestXLogSwitch(); RequestXLogSwitch(false);
do do
{ {
...@@ -10582,7 +10646,7 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) ...@@ -10582,7 +10646,7 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
* Force a switch to a new xlog segment file, so that the backup is valid * Force a switch to a new xlog segment file, so that the backup is valid
* as soon as archiver moves out the current segment file. * as soon as archiver moves out the current segment file.
*/ */
RequestXLogSwitch(); RequestXLogSwitch(false);
XLByteToPrevSeg(stoppoint, _logSegNo); XLByteToPrevSeg(stoppoint, _logSegNo);
XLogFileName(stopxlogfilename, ThisTimeLineID, _logSegNo); XLogFileName(stopxlogfilename, ThisTimeLineID, _logSegNo);
......
...@@ -293,7 +293,7 @@ pg_switch_xlog(PG_FUNCTION_ARGS) ...@@ -293,7 +293,7 @@ pg_switch_xlog(PG_FUNCTION_ARGS)
errmsg("recovery is in progress"), errmsg("recovery is in progress"),
errhint("WAL control functions cannot be executed during recovery."))); errhint("WAL control functions cannot be executed during recovery.")));
switchpoint = RequestXLogSwitch(); switchpoint = RequestXLogSwitch(false);
/* /*
* As a convenience, return the WAL location of the switch record * As a convenience, return the WAL location of the switch record
......
...@@ -73,8 +73,8 @@ static XLogRecData *mainrdata_head; ...@@ -73,8 +73,8 @@ static XLogRecData *mainrdata_head;
static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head; static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head;
static uint32 mainrdata_len; /* total # of bytes in chain */ static uint32 mainrdata_len; /* total # of bytes in chain */
/* Should the in-progress insertion log the origin? */ /* flags for the in-progress insertion */
static bool include_origin = false; static uint8 curinsert_flags = 0;
/* /*
* These are used to hold the record header while constructing a record. * These are used to hold the record header while constructing a record.
...@@ -201,7 +201,7 @@ XLogResetInsertion(void) ...@@ -201,7 +201,7 @@ XLogResetInsertion(void)
max_registered_block_id = 0; max_registered_block_id = 0;
mainrdata_len = 0; mainrdata_len = 0;
mainrdata_last = (XLogRecData *) &mainrdata_head; mainrdata_last = (XLogRecData *) &mainrdata_head;
include_origin = false; curinsert_flags = 0;
begininsert_called = false; begininsert_called = false;
} }
...@@ -384,13 +384,20 @@ XLogRegisterBufData(uint8 block_id, char *data, int len) ...@@ -384,13 +384,20 @@ XLogRegisterBufData(uint8 block_id, char *data, int len)
} }
/* /*
* Should this record include the replication origin if one is set up? * Set insert status flags for the upcoming WAL record.
*
* The flags that can be used here are:
* - XLOG_INCLUDE_ORIGIN, to determine if the replication origin should be
* included in the record.
* - XLOG_MARK_UNIMPORTANT, to signal that the record is not important for
* durability, which allows to avoid triggering WAL archiving and other
* background activity.
*/ */
void void
XLogIncludeOrigin(void) XLogSetRecordFlags(uint8 flags)
{ {
Assert(begininsert_called); Assert(begininsert_called);
include_origin = true; curinsert_flags = flags;
} }
/* /*
...@@ -450,7 +457,7 @@ XLogInsert(RmgrId rmid, uint8 info) ...@@ -450,7 +457,7 @@ XLogInsert(RmgrId rmid, uint8 info)
rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites, rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites,
&fpw_lsn); &fpw_lsn);
EndPos = XLogInsertRecord(rdt, fpw_lsn); EndPos = XLogInsertRecord(rdt, fpw_lsn, curinsert_flags);
} while (EndPos == InvalidXLogRecPtr); } while (EndPos == InvalidXLogRecPtr);
XLogResetInsertion(); XLogResetInsertion();
...@@ -701,7 +708,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, ...@@ -701,7 +708,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
} }
/* followed by the record's origin, if any */ /* followed by the record's origin, if any */
if (include_origin && replorigin_session_origin != InvalidRepOriginId) if ((curinsert_flags & XLOG_INCLUDE_ORIGIN) &&
replorigin_session_origin != InvalidRepOriginId)
{ {
*(scratch++) = XLR_BLOCK_ID_ORIGIN; *(scratch++) = XLR_BLOCK_ID_ORIGIN;
memcpy(scratch, &replorigin_session_origin, sizeof(replorigin_session_origin)); memcpy(scratch, &replorigin_session_origin, sizeof(replorigin_session_origin));
......
...@@ -310,7 +310,7 @@ BackgroundWriterMain(void) ...@@ -310,7 +310,7 @@ BackgroundWriterMain(void)
* check whether there has been any WAL inserted since the last time * check whether there has been any WAL inserted since the last time
* we've logged a running xacts. * we've logged a running xacts.
* *
* We do this logging in the bgwriter as its the only process that is * We do this logging in the bgwriter as it is the only process that is
* run regularly and returns to its mainloop all the time. E.g. * run regularly and returns to its mainloop all the time. E.g.
* Checkpointer, when active, is barely ever in its mainloop and thus * Checkpointer, when active, is barely ever in its mainloop and thus
* makes it hard to log regularly. * makes it hard to log regularly.
...@@ -324,11 +324,11 @@ BackgroundWriterMain(void) ...@@ -324,11 +324,11 @@ BackgroundWriterMain(void)
LOG_SNAPSHOT_INTERVAL_MS); LOG_SNAPSHOT_INTERVAL_MS);
/* /*
* only log if enough time has passed and some xlog record has * Only log if enough time has passed and interesting records have
* been inserted. * been inserted since the last snapshot.
*/ */
if (now >= timeout && if (now >= timeout &&
last_snapshot_lsn != GetXLogInsertRecPtr()) last_snapshot_lsn < GetLastImportantRecPtr())
{ {
last_snapshot_lsn = LogStandbySnapshot(); last_snapshot_lsn = LogStandbySnapshot();
last_snapshot_ts = now; last_snapshot_ts = now;
......
...@@ -573,15 +573,21 @@ CheckpointerMain(void) ...@@ -573,15 +573,21 @@ CheckpointerMain(void)
/* /*
* CheckArchiveTimeout -- check for archive_timeout and switch xlog files * CheckArchiveTimeout -- check for archive_timeout and switch xlog files
* *
* This will switch to a new WAL file and force an archive file write * This will switch to a new WAL file and force an archive file write if
* if any activity is recorded in the current WAL file, including just * meaningful activity is recorded in the current WAL file. This includes most
* a single checkpoint record. * writes, including just a single checkpoint record, but excludes WAL records
* that were inserted with the XLOG_MARK_UNIMPORTANT flag being set (like
* snapshots of running transactions). Such records, depending on
* configuration, occur on regular intervals and don't contain important
* information. This avoids generating archives with a few unimportant
* records.
*/ */
static void static void
CheckArchiveTimeout(void) CheckArchiveTimeout(void)
{ {
pg_time_t now; pg_time_t now;
pg_time_t last_time; pg_time_t last_time;
XLogRecPtr last_switch_lsn;
if (XLogArchiveTimeout <= 0 || RecoveryInProgress()) if (XLogArchiveTimeout <= 0 || RecoveryInProgress())
return; return;
...@@ -596,26 +602,33 @@ CheckArchiveTimeout(void) ...@@ -596,26 +602,33 @@ CheckArchiveTimeout(void)
* Update local state ... note that last_xlog_switch_time is the last time * Update local state ... note that last_xlog_switch_time is the last time
* a switch was performed *or requested*. * a switch was performed *or requested*.
*/ */
last_time = GetLastSegSwitchTime(); last_time = GetLastSegSwitchData(&last_switch_lsn);
last_xlog_switch_time = Max(last_xlog_switch_time, last_time); last_xlog_switch_time = Max(last_xlog_switch_time, last_time);
/* Now we can do the real check */ /* Now we can do the real checks */
if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout) if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout)
{ {
XLogRecPtr switchpoint;
/* OK, it's time to switch */
switchpoint = RequestXLogSwitch();
/* /*
* If the returned pointer points exactly to a segment boundary, * Switch segment only when "important" WAL has been logged since the
* assume nothing happened. * last segment switch.
*/ */
if ((switchpoint % XLogSegSize) != 0) if (GetLastImportantRecPtr() > last_switch_lsn)
ereport(DEBUG1, {
(errmsg("transaction log switch forced (archive_timeout=%d)", XLogRecPtr switchpoint;
XLogArchiveTimeout)));
/* mark switch as unimportant, avoids triggering checkpoints */
switchpoint = RequestXLogSwitch(true);
/*
* If the returned pointer points exactly to a segment boundary,
* assume nothing happened.
*/
if ((switchpoint % XLogSegSize) != 0)
ereport(DEBUG1,
(errmsg("transaction log switch forced (archive_timeout=%d)",
XLogArchiveTimeout)));
}
/* /*
* Update state in any case, so we don't retry constantly when the * Update state in any case, so we don't retry constantly when the
......
...@@ -73,7 +73,7 @@ LogLogicalMessage(const char *prefix, const char *message, size_t size, ...@@ -73,7 +73,7 @@ LogLogicalMessage(const char *prefix, const char *message, size_t size,
XLogRegisterData((char *) message, size); XLogRegisterData((char *) message, size);
/* allow origin filtering */ /* allow origin filtering */
XLogIncludeOrigin(); XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
return XLogInsert(RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE); return XLogInsert(RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE);
} }
......
...@@ -961,10 +961,11 @@ LogStandbySnapshot(void) ...@@ -961,10 +961,11 @@ LogStandbySnapshot(void)
/* /*
* Record an enhanced snapshot of running transactions into WAL. * Record an enhanced snapshot of running transactions into WAL.
* *
* The definitions of RunningTransactionsData and xl_xact_running_xacts * The definitions of RunningTransactionsData and xl_xact_running_xacts are
* are similar. We keep them separate because xl_xact_running_xacts * similar. We keep them separate because xl_xact_running_xacts is a
* is a contiguous chunk of memory and never exists fully until it is * contiguous chunk of memory and never exists fully until it is assembled in
* assembled in WAL. * WAL. The inserted records are marked as not being important for durability,
* to avoid triggering superflous checkpoint / archiving activity.
*/ */
static XLogRecPtr static XLogRecPtr
LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
...@@ -981,6 +982,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) ...@@ -981,6 +982,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
/* Header */ /* Header */
XLogBeginInsert(); XLogBeginInsert();
XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts); XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
/* array of TransactionIds */ /* array of TransactionIds */
...@@ -1035,6 +1037,7 @@ LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks) ...@@ -1035,6 +1037,7 @@ LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
XLogBeginInsert(); XLogBeginInsert();
XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks)); XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock)); XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
(void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK); (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
} }
......
...@@ -184,6 +184,13 @@ extern bool XLOG_DEBUG; ...@@ -184,6 +184,13 @@ extern bool XLOG_DEBUG;
#define CHECKPOINT_CAUSE_XLOG 0x0040 /* XLOG consumption */ #define CHECKPOINT_CAUSE_XLOG 0x0040 /* XLOG consumption */
#define CHECKPOINT_CAUSE_TIME 0x0080 /* Elapsed time */ #define CHECKPOINT_CAUSE_TIME 0x0080 /* Elapsed time */
/*
* Flag bits for the record being inserted, set using XLogSetRecordFlags().
*/
#define XLOG_INCLUDE_ORIGIN 0x01 /* include the replication origin */
#define XLOG_MARK_UNIMPORTANT 0x02 /* record not important for durability */
/* Checkpoint statistics */ /* Checkpoint statistics */
typedef struct CheckpointStatsData typedef struct CheckpointStatsData
{ {
...@@ -211,7 +218,9 @@ extern CheckpointStatsData CheckpointStats; ...@@ -211,7 +218,9 @@ extern CheckpointStatsData CheckpointStats;
struct XLogRecData; struct XLogRecData;
extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn); extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
XLogRecPtr fpw_lsn,
uint8 flags);
extern void XLogFlush(XLogRecPtr RecPtr); extern void XLogFlush(XLogRecPtr RecPtr);
extern bool XLogBackgroundFlush(void); extern bool XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
...@@ -262,6 +271,7 @@ extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p) ...@@ -262,6 +271,7 @@ extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
extern XLogRecPtr GetRedoRecPtr(void); extern XLogRecPtr GetRedoRecPtr(void);
extern XLogRecPtr GetInsertRecPtr(void); extern XLogRecPtr GetInsertRecPtr(void);
extern XLogRecPtr GetFlushRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void);
extern XLogRecPtr GetLastImportantRecPtr(void);
extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
extern void RemovePromoteSignalFiles(void); extern void RemovePromoteSignalFiles(void);
......
...@@ -283,8 +283,8 @@ extern const RmgrData RmgrTable[]; ...@@ -283,8 +283,8 @@ extern const RmgrData RmgrTable[];
/* /*
* Exported to support xlog switching from checkpointer * Exported to support xlog switching from checkpointer
*/ */
extern pg_time_t GetLastSegSwitchTime(void); extern pg_time_t GetLastSegSwitchData(XLogRecPtr *lastSwitchLSN);
extern XLogRecPtr RequestXLogSwitch(void); extern XLogRecPtr RequestXLogSwitch(bool mark_uninmportant);
extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli); extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli);
......
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
/* prototypes for public functions in xloginsert.c: */ /* prototypes for public functions in xloginsert.c: */
extern void XLogBeginInsert(void); extern void XLogBeginInsert(void);
extern void XLogIncludeOrigin(void); extern void XLogSetRecordFlags(uint8 flags);
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info); extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
extern void XLogEnsureRecordSpace(int nbuffers, int ndatas); extern void XLogEnsureRecordSpace(int nbuffers, int ndatas);
extern void XLogRegisterData(char *data, int len); extern void XLogRegisterData(char *data, int len);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment