Commit 8eace46d authored by Alvaro Herrera's avatar Alvaro Herrera

Fix race condition in reading commit timestamps

If a user requests the commit timestamp for a transaction old enough
that its data is concurrently being truncated away by vacuum at just the
right time, they would receive an ugly internal file-not-found error
message from slru.c rather than the expected NULL return value.

In a primary server, the window for the race is very small: the lookup
has to occur exactly between the two calls by vacuum, and there's not a
lot that happens between them (mostly just a multixact truncate).  In a
standby server, however, the window is larger because the truncation is
executed as soon as the WAL record for it is replayed, but the advance
of the oldest-Xid is not executed until the next checkpoint record.

To fix in the primary, simply reverse the order of operations in
vac_truncate_clog.  To fix in the standby, augment the WAL truncation
record so that the standby is aware of the new oldest-XID value and can
apply the update immediately.  WAL version bumped because of this.

No backpatch, because of the low importance of the bug and its rarity.

Author: Craig Ringer
Reviewed-By: Petr Jelínek, Peter Eisentraut
Discussion: https://postgr.es/m/CAMsr+YFhVtRQT1VAwC+WGbbxZZRzNou=N9Ed-FrCqkwQ8H8oJQ@mail.gmail.com
parent 8b0fec93
...@@ -33,10 +33,10 @@ commit_ts_desc(StringInfo buf, XLogReaderState *record) ...@@ -33,10 +33,10 @@ commit_ts_desc(StringInfo buf, XLogReaderState *record)
} }
else if (info == COMMIT_TS_TRUNCATE) else if (info == COMMIT_TS_TRUNCATE)
{ {
int pageno; xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) rec;
memcpy(&pageno, rec, sizeof(int)); appendStringInfo(buf, "pageno %d, oldestXid %u",
appendStringInfo(buf, "%d", pageno); trunc->pageno, trunc->oldestXid);
} }
else if (info == COMMIT_TS_SETTS) else if (info == COMMIT_TS_SETTS)
{ {
......
...@@ -113,7 +113,7 @@ static bool CommitTsPagePrecedes(int page1, int page2); ...@@ -113,7 +113,7 @@ static bool CommitTsPagePrecedes(int page1, int page2);
static void ActivateCommitTs(void); static void ActivateCommitTs(void);
static void DeactivateCommitTs(void); static void DeactivateCommitTs(void);
static void WriteZeroPageXlogRec(int pageno); static void WriteZeroPageXlogRec(int pageno);
static void WriteTruncateXlogRec(int pageno); static void WriteTruncateXlogRec(int pageno, TransactionId oldestXid);
static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids, static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids,
TransactionId *subxids, TimestampTz timestamp, TransactionId *subxids, TimestampTz timestamp,
RepOriginId nodeid); RepOriginId nodeid);
...@@ -824,7 +824,7 @@ TruncateCommitTs(TransactionId oldestXact) ...@@ -824,7 +824,7 @@ TruncateCommitTs(TransactionId oldestXact)
return; /* nothing to remove */ return; /* nothing to remove */
/* Write XLOG record */ /* Write XLOG record */
WriteTruncateXlogRec(cutoffPage); WriteTruncateXlogRec(cutoffPage, oldestXact);
/* Now we can remove the old CommitTs segment(s) */ /* Now we can remove the old CommitTs segment(s) */
SimpleLruTruncate(CommitTsCtl, cutoffPage); SimpleLruTruncate(CommitTsCtl, cutoffPage);
...@@ -910,10 +910,15 @@ WriteZeroPageXlogRec(int pageno) ...@@ -910,10 +910,15 @@ WriteZeroPageXlogRec(int pageno)
* Write a TRUNCATE xlog record * Write a TRUNCATE xlog record
*/ */
static void static void
WriteTruncateXlogRec(int pageno) WriteTruncateXlogRec(int pageno, TransactionId oldestXid)
{ {
xl_commit_ts_truncate xlrec;
xlrec.pageno = pageno;
xlrec.oldestXid = oldestXid;
XLogBeginInsert(); XLogBeginInsert();
XLogRegisterData((char *) (&pageno), sizeof(int)); XLogRegisterData((char *) (&xlrec), SizeOfCommitTsTruncate);
(void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_TRUNCATE); (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_TRUNCATE);
} }
...@@ -967,17 +972,17 @@ commit_ts_redo(XLogReaderState *record) ...@@ -967,17 +972,17 @@ commit_ts_redo(XLogReaderState *record)
} }
else if (info == COMMIT_TS_TRUNCATE) else if (info == COMMIT_TS_TRUNCATE)
{ {
int pageno; xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) XLogRecGetData(record);
memcpy(&pageno, XLogRecGetData(record), sizeof(int)); AdvanceOldestCommitTsXid(trunc->oldestXid);
/* /*
* During XLOG replay, latest_page_number isn't set up yet; insert a * During XLOG replay, latest_page_number isn't set up yet; insert a
* suitable value to bypass the sanity test in SimpleLruTruncate. * suitable value to bypass the sanity test in SimpleLruTruncate.
*/ */
CommitTsCtl->shared->latest_page_number = pageno; CommitTsCtl->shared->latest_page_number = trunc->pageno;
SimpleLruTruncate(CommitTsCtl, pageno); SimpleLruTruncate(CommitTsCtl, trunc->pageno);
} }
else if (info == COMMIT_TS_SETTS) else if (info == COMMIT_TS_SETTS)
{ {
......
...@@ -1152,6 +1152,15 @@ vac_truncate_clog(TransactionId frozenXID, ...@@ -1152,6 +1152,15 @@ vac_truncate_clog(TransactionId frozenXID,
if (bogus) if (bogus)
return; return;
/*
* Advance the oldest value for commit timestamps before truncating, so
* that if a user requests a timestamp for a transaction we're truncating
* away right after this point, they get NULL instead of an ugly "file not
* found" error from slru.c. This doesn't matter for xact/multixact
* because they are not subject to arbitrary lookups from users.
*/
AdvanceOldestCommitTsXid(frozenXID);
/* /*
* Truncate CLOG, multixact and CommitTs to the oldest computed value. * Truncate CLOG, multixact and CommitTs to the oldest computed value.
*/ */
...@@ -1167,7 +1176,6 @@ vac_truncate_clog(TransactionId frozenXID, ...@@ -1167,7 +1176,6 @@ vac_truncate_clog(TransactionId frozenXID,
*/ */
SetTransactionIdLimit(frozenXID, oldestxid_datoid); SetTransactionIdLimit(frozenXID, oldestxid_datoid);
SetMultiXactIdLimit(minMulti, minmulti_datoid); SetMultiXactIdLimit(minMulti, minmulti_datoid);
AdvanceOldestCommitTsXid(frozenXID);
} }
......
...@@ -61,6 +61,14 @@ typedef struct xl_commit_ts_set ...@@ -61,6 +61,14 @@ typedef struct xl_commit_ts_set
#define SizeOfCommitTsSet (offsetof(xl_commit_ts_set, mainxid) + \ #define SizeOfCommitTsSet (offsetof(xl_commit_ts_set, mainxid) + \
sizeof(TransactionId)) sizeof(TransactionId))
typedef struct xl_commit_ts_truncate
{
int pageno;
TransactionId oldestXid;
} xl_commit_ts_truncate;
#define SizeOfCommitTsTruncate (offsetof(xl_commit_ts_truncate, oldestXid) + \
sizeof(TransactionId))
extern void commit_ts_redo(XLogReaderState *record); extern void commit_ts_redo(XLogReaderState *record);
extern void commit_ts_desc(StringInfo buf, XLogReaderState *record); extern void commit_ts_desc(StringInfo buf, XLogReaderState *record);
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
/* /*
* Each page of XLOG file has a header like this: * Each page of XLOG file has a header like this:
*/ */
#define XLOG_PAGE_MAGIC 0xD093 /* can be used as WAL version indicator */ #define XLOG_PAGE_MAGIC 0xD094 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData typedef struct XLogPageHeaderData
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment