Commit 06f82b29 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Write an end-of-backup WAL record at pg_stop_backup(), and wait for it at

recovery instead of reading the backup history file. This is more robust,
as it stops you from prematurely starting up an inconsisten cluster if the
backup history file is lost for some reason, or if the base backup was
never finished with pg_stop_backup().

This also paves the way for a simpler streaming replication patch, which
doesn't need to care about backup history files anymore.

The backup history file is still created and archived as before, but it's
not used by the system anymore. It's just for informational purposes now.

Bump PG_CONTROL_VERSION as the location of the backup startpoint is now
written to a new field in pg_control, and catversion because initdb is
required

Original patch by Fujii Masao per Simon's idea, with further fixes by me.
parent 40608e7f
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.356 2010/01/02 16:57:35 momjian Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.357 2010/01/04 12:50:49 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -515,8 +515,7 @@ static void xlog_outrec(StringInfo buf, XLogRecord *record); ...@@ -515,8 +515,7 @@ static void xlog_outrec(StringInfo buf, XLogRecord *record);
#endif #endif
static void issue_xlog_fsync(void); static void issue_xlog_fsync(void);
static void pg_start_backup_callback(int code, Datum arg); static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc, static bool read_backup_label(XLogRecPtr *checkPointLoc);
XLogRecPtr *minRecoveryLoc);
static void rm_redo_error_callback(void *arg); static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method); static int get_sync_bit(int method);
...@@ -5355,7 +5354,6 @@ StartupXLOG(void) ...@@ -5355,7 +5354,6 @@ StartupXLOG(void)
bool haveBackupLabel = false; bool haveBackupLabel = false;
XLogRecPtr RecPtr, XLogRecPtr RecPtr,
checkPointLoc, checkPointLoc,
backupStopLoc,
EndOfLog; EndOfLog;
uint32 endLogId; uint32 endLogId;
uint32 endLogSeg; uint32 endLogSeg;
...@@ -5454,7 +5452,7 @@ StartupXLOG(void) ...@@ -5454,7 +5452,7 @@ StartupXLOG(void)
recoveryTargetTLI, recoveryTargetTLI,
ControlFile->checkPointCopy.ThisTimeLineID))); ControlFile->checkPointCopy.ThisTimeLineID)));
if (read_backup_label(&checkPointLoc, &backupStopLoc)) if (read_backup_label(&checkPointLoc))
{ {
/* /*
* When a backup_label file is present, we want to roll forward from * When a backup_label file is present, we want to roll forward from
...@@ -5597,11 +5595,23 @@ StartupXLOG(void) ...@@ -5597,11 +5595,23 @@ StartupXLOG(void)
ControlFile->prevCheckPoint = ControlFile->checkPoint; ControlFile->prevCheckPoint = ControlFile->checkPoint;
ControlFile->checkPoint = checkPointLoc; ControlFile->checkPoint = checkPointLoc;
ControlFile->checkPointCopy = checkPoint; ControlFile->checkPointCopy = checkPoint;
if (backupStopLoc.xlogid != 0 || backupStopLoc.xrecoff != 0) if (InArchiveRecovery)
{ {
if (XLByteLT(ControlFile->minRecoveryPoint, backupStopLoc)) /* initialize minRecoveryPoint if not set yet */
ControlFile->minRecoveryPoint = backupStopLoc; if (XLByteLT(ControlFile->minRecoveryPoint, checkPoint.redo))
ControlFile->minRecoveryPoint = checkPoint.redo;
} }
else
{
XLogRecPtr InvalidXLogRecPtr = {0, 0};
ControlFile->minRecoveryPoint = InvalidXLogRecPtr;
}
/*
* set backupStartupPoint if we're starting archive recovery from a
* base backup
*/
if (haveBackupLabel)
ControlFile->backupStartPoint = checkPoint.redo;
ControlFile->time = (pg_time_t) time(NULL); ControlFile->time = (pg_time_t) time(NULL);
/* No need to hold ControlFileLock yet, we aren't up far enough */ /* No need to hold ControlFileLock yet, we aren't up far enough */
UpdateControlFile(); UpdateControlFile();
...@@ -5703,15 +5713,9 @@ StartupXLOG(void) ...@@ -5703,15 +5713,9 @@ StartupXLOG(void)
InRedo = true; InRedo = true;
if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
ereport(LOG, ereport(LOG,
(errmsg("redo starts at %X/%X", (errmsg("redo starts at %X/%X",
ReadRecPtr.xlogid, ReadRecPtr.xrecoff))); ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
else
ereport(LOG,
(errmsg("redo starts at %X/%X, consistency will be reached at %X/%X",
ReadRecPtr.xlogid, ReadRecPtr.xrecoff,
minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff)));
/* /*
* Let postmaster know we've started redo now, so that it can * Let postmaster know we've started redo now, so that it can
...@@ -5771,7 +5775,8 @@ StartupXLOG(void) ...@@ -5771,7 +5775,8 @@ StartupXLOG(void)
* Have we passed our safe starting point? * Have we passed our safe starting point?
*/ */
if (!reachedMinRecoveryPoint && if (!reachedMinRecoveryPoint &&
XLByteLE(minRecoveryPoint, EndRecPtr)) XLByteLE(minRecoveryPoint, EndRecPtr) &&
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
{ {
reachedMinRecoveryPoint = true; reachedMinRecoveryPoint = true;
ereport(LOG, ereport(LOG,
...@@ -5877,7 +5882,9 @@ StartupXLOG(void) ...@@ -5877,7 +5882,9 @@ StartupXLOG(void)
* be further ahead --- ControlFile->minRecoveryPoint cannot have been * be further ahead --- ControlFile->minRecoveryPoint cannot have been
* advanced beyond the WAL we processed. * advanced beyond the WAL we processed.
*/ */
if (InRecovery && XLByteLT(EndOfLog, minRecoveryPoint)) if (InArchiveRecovery &&
(XLByteLT(EndOfLog, minRecoveryPoint) ||
!XLogRecPtrIsInvalid(ControlFile->backupStartPoint)))
{ {
if (reachedStopPoint) /* stopped because of stop request */ if (reachedStopPoint) /* stopped because of stop request */
ereport(FATAL, ereport(FATAL,
...@@ -7312,6 +7319,32 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -7312,6 +7319,32 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
{ {
/* nothing to do here */ /* nothing to do here */
} }
else if (info == XLOG_BACKUP_END)
{
XLogRecPtr startpoint;
memcpy(&startpoint, XLogRecGetData(record), sizeof(startpoint));
if (XLByteEQ(ControlFile->backupStartPoint, startpoint))
{
/*
* We have reached the end of base backup, the point where
* pg_stop_backup() was done. The data on disk is now consistent.
* Reset backupStartPoint, and update minRecoveryPoint to make
* sure we don't allow starting up at an earlier point even if
* recovery is stopped and restarted soon after this.
*/
elog(DEBUG1, "end of backup reached");
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
if (XLByteLT(ControlFile->minRecoveryPoint, lsn))
ControlFile->minRecoveryPoint = lsn;
MemSet(&ControlFile->backupStartPoint, 0, sizeof(XLogRecPtr));
UpdateControlFile();
LWLockRelease(ControlFileLock);
}
}
} }
void void
...@@ -7353,6 +7386,14 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec) ...@@ -7353,6 +7386,14 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
{ {
appendStringInfo(buf, "xlog switch"); appendStringInfo(buf, "xlog switch");
} }
else if (info == XLOG_BACKUP_END)
{
XLogRecPtr startpoint;
memcpy(&startpoint, rec, sizeof(XLogRecPtr));
appendStringInfo(buf, "backup end: %X/%X",
startpoint.xlogid, startpoint.xrecoff);
}
else else
appendStringInfo(buf, "UNKNOWN"); appendStringInfo(buf, "UNKNOWN");
} }
...@@ -7688,10 +7729,14 @@ pg_start_backup_callback(int code, Datum arg) ...@@ -7688,10 +7729,14 @@ pg_start_backup_callback(int code, Datum arg)
/* /*
* pg_stop_backup: finish taking an on-line backup dump * pg_stop_backup: finish taking an on-line backup dump
* *
* We remove the backup label file created by pg_start_backup, and instead * We write an end-of-backup WAL record, and remove the backup label file
* create a backup history file in pg_xlog (whence it will immediately be * created by pg_start_backup, creating a backup history file in pg_xlog
* archived). The backup history file contains the same info found in * instead (whence it will immediately be archived). The backup history file
* the label file, plus the backup-end time and WAL location. * contains the same info found in the label file, plus the backup-end time
* and WAL location. Before 8.5, the backup-end time was read from the backup
* history file at the beginning of archive recovery, but we now use the WAL
* record for that and the file is for informational and debug purposes only.
*
* Note: different from CancelBackup which just cancels online backup mode. * Note: different from CancelBackup which just cancels online backup mode.
*/ */
Datum Datum
...@@ -7699,6 +7744,7 @@ pg_stop_backup(PG_FUNCTION_ARGS) ...@@ -7699,6 +7744,7 @@ pg_stop_backup(PG_FUNCTION_ARGS)
{ {
XLogRecPtr startpoint; XLogRecPtr startpoint;
XLogRecPtr stoppoint; XLogRecPtr stoppoint;
XLogRecData rdata;
pg_time_t stamp_time; pg_time_t stamp_time;
char strfbuf[128]; char strfbuf[128];
char histfilepath[MAXPGPATH]; char histfilepath[MAXPGPATH];
...@@ -7739,22 +7785,6 @@ pg_stop_backup(PG_FUNCTION_ARGS) ...@@ -7739,22 +7785,6 @@ pg_stop_backup(PG_FUNCTION_ARGS)
XLogCtl->Insert.forcePageWrites = false; XLogCtl->Insert.forcePageWrites = false;
LWLockRelease(WALInsertLock); LWLockRelease(WALInsertLock);
/*
* Force a switch to a new xlog segment file, so that the backup is valid
* as soon as archiver moves out the current segment file. We'll report
* the end address of the XLOG SWITCH record as the backup stopping point.
*/
stoppoint = RequestXLogSwitch();
XLByteToSeg(stoppoint, _logId, _logSeg);
XLogFileName(stopxlogfilename, ThisTimeLineID, _logId, _logSeg);
/* Use the log timezone here, not the session timezone */
stamp_time = (pg_time_t) time(NULL);
pg_strftime(strfbuf, sizeof(strfbuf),
"%Y-%m-%d %H:%M:%S %Z",
pg_localtime(&stamp_time, log_timezone));
/* /*
* Open the existing label file * Open the existing label file
*/ */
...@@ -7782,6 +7812,30 @@ pg_stop_backup(PG_FUNCTION_ARGS) ...@@ -7782,6 +7812,30 @@ pg_stop_backup(PG_FUNCTION_ARGS)
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE))); errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
/*
* Write the backup-end xlog record
*/
rdata.data = (char *) (&startpoint);
rdata.len = sizeof(startpoint);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END, &rdata);
/*
* Force a switch to a new xlog segment file, so that the backup is valid
* as soon as archiver moves out the current segment file.
*/
RequestXLogSwitch();
XLByteToSeg(stoppoint, _logId, _logSeg);
XLogFileName(stopxlogfilename, ThisTimeLineID, _logId, _logSeg);
/* Use the log timezone here, not the session timezone */
stamp_time = (pg_time_t) time(NULL);
pg_strftime(strfbuf, sizeof(strfbuf),
"%Y-%m-%d %H:%M:%S %Z",
pg_localtime(&stamp_time, log_timezone));
/* /*
* Write the backup history file * Write the backup history file
*/ */
...@@ -8088,33 +8142,18 @@ pg_xlogfile_name(PG_FUNCTION_ARGS) ...@@ -8088,33 +8142,18 @@ pg_xlogfile_name(PG_FUNCTION_ARGS)
* later than the start of the dump, and so if we rely on it as the start * later than the start of the dump, and so if we rely on it as the start
* point, we will fail to restore a consistent database state. * point, we will fail to restore a consistent database state.
* *
* We also attempt to retrieve the corresponding backup history file.
* If successful, set *minRecoveryLoc to constrain valid PITR stopping
* points.
*
* Returns TRUE if a backup_label was found (and fills the checkpoint * Returns TRUE if a backup_label was found (and fills the checkpoint
* location into *checkPointLoc); returns FALSE if not. * location into *checkPointLoc); returns FALSE if not.
*/ */
static bool static bool
read_backup_label(XLogRecPtr *checkPointLoc, XLogRecPtr *minRecoveryLoc) read_backup_label(XLogRecPtr *checkPointLoc)
{ {
XLogRecPtr startpoint; XLogRecPtr startpoint;
XLogRecPtr stoppoint;
char histfilename[MAXFNAMELEN];
char histfilepath[MAXPGPATH];
char startxlogfilename[MAXFNAMELEN]; char startxlogfilename[MAXFNAMELEN];
char stopxlogfilename[MAXFNAMELEN];
TimeLineID tli; TimeLineID tli;
uint32 _logId;
uint32 _logSeg;
FILE *lfp; FILE *lfp;
FILE *fp;
char ch; char ch;
/* Default is to not constrain recovery stop point */
minRecoveryLoc->xlogid = 0;
minRecoveryLoc->xrecoff = 0;
/* /*
* See if label file is present * See if label file is present
*/ */
...@@ -8152,45 +8191,6 @@ read_backup_label(XLogRecPtr *checkPointLoc, XLogRecPtr *minRecoveryLoc) ...@@ -8152,45 +8191,6 @@ read_backup_label(XLogRecPtr *checkPointLoc, XLogRecPtr *minRecoveryLoc)
errmsg("could not read file \"%s\": %m", errmsg("could not read file \"%s\": %m",
BACKUP_LABEL_FILE))); BACKUP_LABEL_FILE)));
/*
* Try to retrieve the backup history file (no error if we can't)
*/
XLByteToSeg(startpoint, _logId, _logSeg);
BackupHistoryFileName(histfilename, tli, _logId, _logSeg,
startpoint.xrecoff % XLogSegSize);
if (InArchiveRecovery)
RestoreArchivedFile(histfilepath, histfilename, "RECOVERYHISTORY", 0);
else
BackupHistoryFilePath(histfilepath, tli, _logId, _logSeg,
startpoint.xrecoff % XLogSegSize);
fp = AllocateFile(histfilepath, "r");
if (fp)
{
/*
* Parse history file to identify stop point.
*/
if (fscanf(fp, "START WAL LOCATION: %X/%X (file %24s)%c",
&startpoint.xlogid, &startpoint.xrecoff, startxlogfilename,
&ch) != 4 || ch != '\n')
ereport(FATAL,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("invalid data in file \"%s\"", histfilename)));
if (fscanf(fp, "STOP WAL LOCATION: %X/%X (file %24s)%c",
&stoppoint.xlogid, &stoppoint.xrecoff, stopxlogfilename,
&ch) != 4 || ch != '\n')
ereport(FATAL,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("invalid data in file \"%s\"", histfilename)));
*minRecoveryLoc = stoppoint;
if (ferror(fp) || FreeFile(fp))
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
histfilepath)));
}
return true; return true;
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001; * copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001;
* licence: BSD * licence: BSD
* *
* $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.45 2009/12/19 01:32:38 sriggs Exp $ * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.46 2010/01/04 12:50:49 heikki Exp $
*/ */
#include "postgres_fe.h" #include "postgres_fe.h"
...@@ -203,6 +203,9 @@ main(int argc, char *argv[]) ...@@ -203,6 +203,9 @@ main(int argc, char *argv[])
printf(_("Minimum recovery ending location: %X/%X\n"), printf(_("Minimum recovery ending location: %X/%X\n"),
ControlFile.minRecoveryPoint.xlogid, ControlFile.minRecoveryPoint.xlogid,
ControlFile.minRecoveryPoint.xrecoff); ControlFile.minRecoveryPoint.xrecoff);
printf(_("Backup start location: %X/%X\n"),
ControlFile.backupStartPoint.xlogid,
ControlFile.backupStartPoint.xrecoff);
printf(_("Maximum data alignment: %u\n"), printf(_("Maximum data alignment: %u\n"),
ControlFile.maxAlign); ControlFile.maxAlign);
/* we don't print floatFormat since can't say much useful about it */ /* we don't print floatFormat since can't say much useful about it */
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.76 2010/01/02 16:57:59 momjian Exp $ * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.77 2010/01/04 12:50:49 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -625,6 +625,8 @@ RewriteControlFile(void) ...@@ -625,6 +625,8 @@ RewriteControlFile(void)
ControlFile.prevCheckPoint.xrecoff = 0; ControlFile.prevCheckPoint.xrecoff = 0;
ControlFile.minRecoveryPoint.xlogid = 0; ControlFile.minRecoveryPoint.xlogid = 0;
ControlFile.minRecoveryPoint.xrecoff = 0; ControlFile.minRecoveryPoint.xrecoff = 0;
ControlFile.backupStartPoint.xlogid = 0;
ControlFile.backupStartPoint.xrecoff = 0;
/* Now we can force the recorded xlog seg size to the right thing. */ /* Now we can force the recorded xlog seg size to the right thing. */
ControlFile.xlog_seg_size = XLogSegSize; ControlFile.xlog_seg_size = XLogSegSize;
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.565 2010/01/02 16:58:01 momjian Exp $ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.566 2010/01/04 12:50:49 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 201001011 #define CATALOG_VERSION_NO 201001041
#endif #endif
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.47 2010/01/02 16:58:01 momjian Exp $ * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.48 2010/01/04 12:50:50 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
/* Version identifier for this pg_control format */ /* Version identifier for this pg_control format */
#define PG_CONTROL_VERSION 852 #define PG_CONTROL_VERSION 853
/* /*
* Body of CheckPoint XLOG records. This is declared here because we keep * Body of CheckPoint XLOG records. This is declared here because we keep
...@@ -62,6 +62,7 @@ typedef struct CheckPoint ...@@ -62,6 +62,7 @@ typedef struct CheckPoint
#define XLOG_NOOP 0x20 #define XLOG_NOOP 0x20
#define XLOG_NEXTOID 0x30 #define XLOG_NEXTOID 0x30
#define XLOG_SWITCH 0x40 #define XLOG_SWITCH 0x40
#define XLOG_BACKUP_END 0x50
/* System status indicator */ /* System status indicator */
...@@ -117,7 +118,27 @@ typedef struct ControlFileData ...@@ -117,7 +118,27 @@ typedef struct ControlFileData
CheckPoint checkPointCopy; /* copy of last check point record */ CheckPoint checkPointCopy; /* copy of last check point record */
XLogRecPtr minRecoveryPoint; /* must replay xlog to here */ /*
* These two values determine the minimum point we must recover up to
* before starting up:
*
* minRecoveryPoint is updated to the latest replayed LSN whenever we
* flush a data change during archive recovery. That guards against
* starting archive recovery, aborting it, and restarting with an earlier
* stop location. If we've already flushed data changes from WAL record X
* to disk, we mustn't start up until we reach X again. Zero when not
* doing archive recovery.
*
* backupStartPoint is the redo pointer of the backup start checkpoint, if
* we are recovering from an online backup and haven't reached the end of
* backup yet. It is reset to zero when the end of backup is reached, and
* we mustn't start up before that. A boolean would suffice otherwise, but
* we use the redo pointer as a cross-check when we see an end-of-backup
* record, to make sure the end-of-backup record corresponds the base
* backup we're recovering from.
*/
XLogRecPtr minRecoveryPoint;
XLogRecPtr backupStartPoint;
/* /*
* This data is used to check for hardware-architecture compatibility of * This data is used to check for hardware-architecture compatibility of
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment