Commit aa8bd089 authored by Alvaro Herrera's avatar Alvaro Herrera

Revert "Avoid creating archive status ".ready" files too early"

This reverts commit 515e3d84a0b5 and equivalent commits in back
branches.  This solution to the problem has a number of problems, so
we'll try again with a different approach.

Per note from Andres Freund

Discussion: https://postgr.es/m/20210831042949.52eqp5xwbxgrfank@alap3.anarazel.de
parent 69d670e6
...@@ -733,18 +733,6 @@ typedef struct XLogCtlData ...@@ -733,18 +733,6 @@ typedef struct XLogCtlData
XLogRecPtr lastFpwDisableRecPtr; XLogRecPtr lastFpwDisableRecPtr;
slock_t info_lck; /* locks shared variables shown above */ slock_t info_lck; /* locks shared variables shown above */
/*
* Variables used to track segment-boundary-crossing WAL records. See
* RegisterSegmentBoundary. Protected by segtrack_lck.
*/
XLogSegNo lastNotifiedSeg;
XLogSegNo earliestSegBoundary;
XLogRecPtr earliestSegBoundaryEndPtr;
XLogSegNo latestSegBoundary;
XLogRecPtr latestSegBoundaryEndPtr;
slock_t segtrack_lck; /* locks shared variables shown above */
} XLogCtlData; } XLogCtlData;
static XLogCtlData *XLogCtl = NULL; static XLogCtlData *XLogCtl = NULL;
...@@ -943,7 +931,6 @@ static void RemoveXlogFile(const char *segname, XLogSegNo recycleSegNo, ...@@ -943,7 +931,6 @@ static void RemoveXlogFile(const char *segname, XLogSegNo recycleSegNo,
XLogSegNo *endlogSegNo); XLogSegNo *endlogSegNo);
static void UpdateLastRemovedPtr(char *filename); static void UpdateLastRemovedPtr(char *filename);
static void ValidateXLOGDirectoryStructure(void); static void ValidateXLOGDirectoryStructure(void);
static void RegisterSegmentBoundary(XLogSegNo seg, XLogRecPtr pos);
static void CleanupBackupHistory(void); static void CleanupBackupHistory(void);
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force); static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
static XLogRecord *ReadRecord(XLogReaderState *xlogreader, static XLogRecord *ReadRecord(XLogReaderState *xlogreader,
...@@ -1178,56 +1165,23 @@ XLogInsertRecord(XLogRecData *rdata, ...@@ -1178,56 +1165,23 @@ XLogInsertRecord(XLogRecData *rdata,
END_CRIT_SECTION(); END_CRIT_SECTION();
/* /*
* If we crossed page boundary, update LogwrtRqst.Write; if we crossed * Update shared LogwrtRqst.Write, if we crossed page boundary.
* segment boundary, register that and wake up walwriter.
*/ */
if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ) if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
{ {
XLogSegNo StartSeg;
XLogSegNo EndSeg;
XLByteToSeg(StartPos, StartSeg, wal_segment_size);
XLByteToSeg(EndPos, EndSeg, wal_segment_size);
/*
* Register our crossing the segment boundary if that occurred.
*
* Note that we did not use XLByteToPrevSeg() for determining the
* ending segment. This is so that a record that fits perfectly into
* the end of the segment causes the latter to get marked ready for
* archival immediately.
*/
if (StartSeg != EndSeg && XLogArchivingActive())
RegisterSegmentBoundary(EndSeg, EndPos);
/*
* Advance LogwrtRqst.Write so that it includes new block(s).
*
* We do this after registering the segment boundary so that the
* comparison with the flushed pointer below can use the latest value
* known globally.
*/
SpinLockAcquire(&XLogCtl->info_lck); SpinLockAcquire(&XLogCtl->info_lck);
/* advance global request to include new block(s) */
if (XLogCtl->LogwrtRqst.Write < EndPos) if (XLogCtl->LogwrtRqst.Write < EndPos)
XLogCtl->LogwrtRqst.Write = EndPos; XLogCtl->LogwrtRqst.Write = EndPos;
/* update local result copy while I have the chance */ /* update local result copy while I have the chance */
LogwrtResult = XLogCtl->LogwrtResult; LogwrtResult = XLogCtl->LogwrtResult;
SpinLockRelease(&XLogCtl->info_lck); SpinLockRelease(&XLogCtl->info_lck);
/*
* There's a chance that the record was already flushed to disk and we
* missed marking segments as ready for archive. If this happens, we
* nudge the WALWriter, which will take care of notifying segments as
* needed.
*/
if (StartSeg != EndSeg && XLogArchivingActive() &&
LogwrtResult.Flush >= EndPos && ProcGlobal->walwriterLatch)
SetLatch(ProcGlobal->walwriterLatch);
} }
/* /*
* If this was an XLOG_SWITCH record, flush the record and the empty * If this was an XLOG_SWITCH record, flush the record and the empty
* padding space that fills the rest of the segment. * padding space that fills the rest of the segment, and perform
* end-of-segment actions (eg, notifying archiver).
*/ */
if (isLogSwitch) if (isLogSwitch)
{ {
...@@ -2479,7 +2433,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) ...@@ -2479,7 +2433,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
/* We should always be inside a critical section here */ /* We should always be inside a critical section here */
Assert(CritSectionCount > 0); Assert(CritSectionCount > 0);
Assert(LWLockHeldByMe(WALWriteLock));
/* /*
* Update local LogwrtResult (caller probably did this already, but...) * Update local LogwrtResult (caller probably did this already, but...)
...@@ -2646,12 +2599,11 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) ...@@ -2646,12 +2599,11 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
* later. Doing it here ensures that one and only one backend will * later. Doing it here ensures that one and only one backend will
* perform this fsync. * perform this fsync.
* *
* If WAL archiving is active, we attempt to notify the archiver * This is also the right place to notify the Archiver that the
* of any segments that are now ready for archival. * segment is ready to copy to archival storage, and to update the
* * timer for archive_timeout, and to signal for a checkpoint if
* This is also the right place to update the timer for * too many logfile segments have been used since the last
* archive_timeout and to signal for a checkpoint if too many * checkpoint.
* logfile segments have been used since the last checkpoint.
*/ */
if (finishing_seg) if (finishing_seg)
{ {
...@@ -2663,7 +2615,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) ...@@ -2663,7 +2615,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
LogwrtResult.Flush = LogwrtResult.Write; /* end of page */ LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
if (XLogArchivingActive()) if (XLogArchivingActive())
NotifySegmentsReadyForArchive(LogwrtResult.Flush); XLogArchiveNotifySeg(openLogSegNo);
XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL); XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
XLogCtl->lastSegSwitchLSN = LogwrtResult.Flush; XLogCtl->lastSegSwitchLSN = LogwrtResult.Flush;
...@@ -2751,9 +2703,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) ...@@ -2751,9 +2703,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
XLogCtl->LogwrtRqst.Flush = LogwrtResult.Flush; XLogCtl->LogwrtRqst.Flush = LogwrtResult.Flush;
SpinLockRelease(&XLogCtl->info_lck); SpinLockRelease(&XLogCtl->info_lck);
} }
if (XLogArchivingActive())
NotifySegmentsReadyForArchive(LogwrtResult.Flush);
} }
/* /*
...@@ -4375,129 +4324,6 @@ ValidateXLOGDirectoryStructure(void) ...@@ -4375,129 +4324,6 @@ ValidateXLOGDirectoryStructure(void)
} }
} }
/*
* RegisterSegmentBoundary
*
* WAL records that are split across a segment boundary require special
* treatment for archiving: the initial segment must not be archived until
* the end segment has been flushed, in case we crash before we have
* the chance to flush the end segment (because after recovery we would
* overwrite that WAL record with a different one, and so the file we
* archived no longer represents truth.) This also applies to streaming
* physical replication.
*
* To handle this, we keep track of the LSN of WAL records that cross
* segment boundaries. Two such are sufficient: the ones with the
* earliest and the latest end pointers we know about, since the flush
* position advances monotonically. WAL record writers register
* boundary-crossing records here, which is used by .ready file creation
* to delay until the end segment is known flushed.
*/
static void
RegisterSegmentBoundary(XLogSegNo seg, XLogRecPtr endpos)
{
XLogSegNo segno PG_USED_FOR_ASSERTS_ONLY;
/* verify caller computed segment number correctly */
AssertArg((XLByteToSeg(endpos, segno, wal_segment_size), segno == seg));
SpinLockAcquire(&XLogCtl->segtrack_lck);
/*
* If no segment boundaries are registered, store the new segment boundary
* in earliestSegBoundary. Otherwise, store the greater segment
* boundaries in latestSegBoundary.
*/
if (XLogCtl->earliestSegBoundary == MaxXLogSegNo)
{
XLogCtl->earliestSegBoundary = seg;
XLogCtl->earliestSegBoundaryEndPtr = endpos;
}
else if (seg > XLogCtl->earliestSegBoundary &&
(XLogCtl->latestSegBoundary == MaxXLogSegNo ||
seg > XLogCtl->latestSegBoundary))
{
XLogCtl->latestSegBoundary = seg;
XLogCtl->latestSegBoundaryEndPtr = endpos;
}
SpinLockRelease(&XLogCtl->segtrack_lck);
}
/*
* NotifySegmentsReadyForArchive
*
* Mark segments as ready for archival, given that it is safe to do so.
* This function is idempotent.
*/
void
NotifySegmentsReadyForArchive(XLogRecPtr flushRecPtr)
{
XLogSegNo latest_boundary_seg;
XLogSegNo last_notified;
XLogSegNo flushed_seg;
XLogSegNo seg;
bool keep_latest;
XLByteToSeg(flushRecPtr, flushed_seg, wal_segment_size);
SpinLockAcquire(&XLogCtl->segtrack_lck);
if (XLogCtl->latestSegBoundary <= flushed_seg &&
XLogCtl->latestSegBoundaryEndPtr <= flushRecPtr)
{
latest_boundary_seg = XLogCtl->latestSegBoundary;
keep_latest = false;
}
else if (XLogCtl->earliestSegBoundary <= flushed_seg &&
XLogCtl->earliestSegBoundaryEndPtr <= flushRecPtr)
{
latest_boundary_seg = XLogCtl->earliestSegBoundary;
keep_latest = true;
}
else
{
SpinLockRelease(&XLogCtl->segtrack_lck);
return;
}
last_notified = XLogCtl->lastNotifiedSeg;
/*
* Update shared memory and discard segment boundaries that are no longer
* needed.
*
* It is safe to update shared memory before we attempt to create the
* .ready files. If our calls to XLogArchiveNotifySeg() fail,
* RemoveOldXlogFiles() will retry it as needed.
*/
if (last_notified < latest_boundary_seg - 1)
XLogCtl->lastNotifiedSeg = latest_boundary_seg - 1;
if (keep_latest)
{
XLogCtl->earliestSegBoundary = XLogCtl->latestSegBoundary;
XLogCtl->earliestSegBoundaryEndPtr = XLogCtl->latestSegBoundaryEndPtr;
}
else
{
XLogCtl->earliestSegBoundary = MaxXLogSegNo;
XLogCtl->earliestSegBoundaryEndPtr = InvalidXLogRecPtr;
}
XLogCtl->latestSegBoundary = MaxXLogSegNo;
XLogCtl->latestSegBoundaryEndPtr = InvalidXLogRecPtr;
SpinLockRelease(&XLogCtl->segtrack_lck);
/*
* Notify archiver about segments that are ready for archival (by creating
* the corresponding .ready files).
*/
for (seg = last_notified + 1; seg < latest_boundary_seg; seg++)
XLogArchiveNotifySeg(seg);
}
/* /*
* Remove previous backup history files. This also retries creation of * Remove previous backup history files. This also retries creation of
* .ready files for any backup history files for which XLogArchiveNotify * .ready files for any backup history files for which XLogArchiveNotify
...@@ -5399,17 +5225,9 @@ XLOGShmemInit(void) ...@@ -5399,17 +5225,9 @@ XLOGShmemInit(void)
SpinLockInit(&XLogCtl->Insert.insertpos_lck); SpinLockInit(&XLogCtl->Insert.insertpos_lck);
SpinLockInit(&XLogCtl->info_lck); SpinLockInit(&XLogCtl->info_lck);
SpinLockInit(&XLogCtl->segtrack_lck);
SpinLockInit(&XLogCtl->ulsn_lck); SpinLockInit(&XLogCtl->ulsn_lck);
InitSharedLatch(&XLogCtl->recoveryWakeupLatch); InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
ConditionVariableInit(&XLogCtl->recoveryNotPausedCV); ConditionVariableInit(&XLogCtl->recoveryNotPausedCV);
/* Initialize stuff for marking segments as ready for archival. */
XLogCtl->lastNotifiedSeg = MaxXLogSegNo;
XLogCtl->earliestSegBoundary = MaxXLogSegNo;
XLogCtl->earliestSegBoundaryEndPtr = InvalidXLogRecPtr;
XLogCtl->latestSegBoundary = MaxXLogSegNo;
XLogCtl->latestSegBoundaryEndPtr = InvalidXLogRecPtr;
} }
/* /*
...@@ -8040,20 +7858,6 @@ StartupXLOG(void) ...@@ -8040,20 +7858,6 @@ StartupXLOG(void)
XLogCtl->LogwrtRqst.Write = EndOfLog; XLogCtl->LogwrtRqst.Write = EndOfLog;
XLogCtl->LogwrtRqst.Flush = EndOfLog; XLogCtl->LogwrtRqst.Flush = EndOfLog;
/*
* Initialize XLogCtl->lastNotifiedSeg to the previous WAL file.
*/
if (XLogArchivingActive())
{
XLogSegNo EndOfLogSeg;
XLByteToSeg(EndOfLog, EndOfLogSeg, wal_segment_size);
SpinLockAcquire(&XLogCtl->segtrack_lck);
XLogCtl->lastNotifiedSeg = EndOfLogSeg - 1;
SpinLockRelease(&XLogCtl->segtrack_lck);
}
/* /*
* Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
* record before resource manager writes cleanup WAL records or checkpoint * record before resource manager writes cleanup WAL records or checkpoint
......
...@@ -248,13 +248,6 @@ WalWriterMain(void) ...@@ -248,13 +248,6 @@ WalWriterMain(void)
/* Process any signals received recently */ /* Process any signals received recently */
HandleWalWriterInterrupts(); HandleWalWriterInterrupts();
/*
* Notify the archiver of any WAL segments that are ready. We do this
* here to handle a race condition where WAL is flushed to disk prior
* to registering the segment boundary.
*/
NotifySegmentsReadyForArchive(GetFlushRecPtr());
/* /*
* Do what we're here for; then, if XLogBackgroundFlush() found useful * Do what we're here for; then, if XLogBackgroundFlush() found useful
* work to do, reset hibernation counter. * work to do, reset hibernation counter.
......
...@@ -349,7 +349,6 @@ extern XLogRecPtr GetInsertRecPtr(void); ...@@ -349,7 +349,6 @@ extern XLogRecPtr GetInsertRecPtr(void);
extern XLogRecPtr GetFlushRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void);
extern XLogRecPtr GetLastImportantRecPtr(void); extern XLogRecPtr GetLastImportantRecPtr(void);
extern void RemovePromoteSignalFiles(void); extern void RemovePromoteSignalFiles(void);
extern void NotifySegmentsReadyForArchive(XLogRecPtr flushRecPtr);
extern bool PromoteIsTriggered(void); extern bool PromoteIsTriggered(void);
extern bool CheckPromoteSignal(void); extern bool CheckPromoteSignal(void);
......
...@@ -46,7 +46,6 @@ typedef uint64 XLogRecPtr; ...@@ -46,7 +46,6 @@ typedef uint64 XLogRecPtr;
* XLogSegNo - physical log file sequence number. * XLogSegNo - physical log file sequence number.
*/ */
typedef uint64 XLogSegNo; typedef uint64 XLogSegNo;
#define MaxXLogSegNo ((XLogSegNo) 0xFFFFFFFFFFFFFFFF)
/* /*
* TimeLineID (TLI) - identifies different database histories to prevent * TimeLineID (TLI) - identifies different database histories to prevent
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment