Commit 41f9ffd9 authored by Heikki Linnakangas's avatar Heikki Linnakangas

If backup-end record is not seen, and we reach end of recovery from a

streamed backup, throw an error and refuse to start up. The restore has not
finished correctly in that case and the data directory is possibly corrupt.
We already errored out in case of archive recovery, but could not during
crash recovery because we couldn't distinguish between the case that
pg_start_backup() was called and the database then crashed (must not error,
data is OK), and the case that we're restoring from a backup and not all
the needed WAL was replayed (data can be corrupt).

To distinguish those cases, add a line to backup_label to indicate
whether the backup was taken with pg_start/stop_backup(), or by streaming
(ie. pg_basebackup).

This requires re-initdb, because of a new field added to the control file.
parent 9f17ffd8
...@@ -662,7 +662,8 @@ static bool CheckForStandbyTrigger(void); ...@@ -662,7 +662,8 @@ static bool CheckForStandbyTrigger(void);
static void xlog_outrec(StringInfo buf, XLogRecord *record); static void xlog_outrec(StringInfo buf, XLogRecord *record);
#endif #endif
static void pg_start_backup_callback(int code, Datum arg); static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc); static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired);
static void rm_redo_error_callback(void *arg); static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method); static int get_sync_bit(int method);
...@@ -6016,6 +6017,7 @@ StartupXLOG(void) ...@@ -6016,6 +6017,7 @@ StartupXLOG(void)
XLogRecord *record; XLogRecord *record;
uint32 freespace; uint32 freespace;
TransactionId oldestActiveXID; TransactionId oldestActiveXID;
bool backupEndRequired = false;
/* /*
* Read control file and check XLOG status looks valid. * Read control file and check XLOG status looks valid.
...@@ -6149,7 +6151,7 @@ StartupXLOG(void) ...@@ -6149,7 +6151,7 @@ StartupXLOG(void)
if (StandbyMode) if (StandbyMode)
OwnLatch(&XLogCtl->recoveryWakeupLatch); OwnLatch(&XLogCtl->recoveryWakeupLatch);
if (read_backup_label(&checkPointLoc)) if (read_backup_label(&checkPointLoc, &backupEndRequired))
{ {
/* /*
* When a backup_label file is present, we want to roll forward from * When a backup_label file is present, we want to roll forward from
...@@ -6328,7 +6330,10 @@ StartupXLOG(void) ...@@ -6328,7 +6330,10 @@ StartupXLOG(void)
* set backupStartPoint if we're starting recovery from a base backup * set backupStartPoint if we're starting recovery from a base backup
*/ */
if (haveBackupLabel) if (haveBackupLabel)
{
ControlFile->backupStartPoint = checkPoint.redo; ControlFile->backupStartPoint = checkPoint.redo;
ControlFile->backupEndRequired = backupEndRequired;
}
ControlFile->time = (pg_time_t) time(NULL); ControlFile->time = (pg_time_t) time(NULL);
/* No need to hold ControlFileLock yet, we aren't up far enough */ /* No need to hold ControlFileLock yet, we aren't up far enough */
UpdateControlFile(); UpdateControlFile();
...@@ -6698,9 +6703,13 @@ StartupXLOG(void) ...@@ -6698,9 +6703,13 @@ StartupXLOG(void)
* crashes while an online backup is in progress. We must not treat * crashes while an online backup is in progress. We must not treat
* that as an error, or the database will refuse to start up. * that as an error, or the database will refuse to start up.
*/ */
if (InArchiveRecovery) if (InArchiveRecovery || ControlFile->backupEndRequired)
{ {
if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint)) if (ControlFile->backupEndRequired)
ereport(FATAL,
(errmsg("WAL ends before end of online backup"),
errhint("All WAL generated while online backup was taken must be available at recovery.")));
else if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
ereport(FATAL, ereport(FATAL,
(errmsg("WAL ends before end of online backup"), (errmsg("WAL ends before end of online backup"),
errhint("Online backup started with pg_start_backup() must be ended with pg_stop_backup(), and all WAL up to that point must be available at recovery."))); errhint("Online backup started with pg_start_backup() must be ended with pg_stop_backup(), and all WAL up to that point must be available at recovery.")));
...@@ -8531,6 +8540,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -8531,6 +8540,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
if (XLByteLT(ControlFile->minRecoveryPoint, lsn)) if (XLByteLT(ControlFile->minRecoveryPoint, lsn))
ControlFile->minRecoveryPoint = lsn; ControlFile->minRecoveryPoint = lsn;
MemSet(&ControlFile->backupStartPoint, 0, sizeof(XLogRecPtr)); MemSet(&ControlFile->backupStartPoint, 0, sizeof(XLogRecPtr));
ControlFile->backupEndRequired = false;
UpdateControlFile(); UpdateControlFile();
LWLockRelease(ControlFileLock); LWLockRelease(ControlFileLock);
...@@ -9013,6 +9023,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile) ...@@ -9013,6 +9023,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
startpoint.xlogid, startpoint.xrecoff, xlogfilename); startpoint.xlogid, startpoint.xrecoff, xlogfilename);
appendStringInfo(&labelfbuf, "CHECKPOINT LOCATION: %X/%X\n", appendStringInfo(&labelfbuf, "CHECKPOINT LOCATION: %X/%X\n",
checkpointloc.xlogid, checkpointloc.xrecoff); checkpointloc.xlogid, checkpointloc.xrecoff);
appendStringInfo(&labelfbuf, "BACKUP METHOD: %s\n",
exclusive ? "pg_start_backup" : "streamed");
appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf); appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf);
appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr); appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr);
...@@ -9768,15 +9780,19 @@ pg_xlogfile_name(PG_FUNCTION_ARGS) ...@@ -9768,15 +9780,19 @@ pg_xlogfile_name(PG_FUNCTION_ARGS)
* *
* Returns TRUE if a backup_label was found (and fills the checkpoint * Returns TRUE if a backup_label was found (and fills the checkpoint
* location and its REDO location into *checkPointLoc and RedoStartLSN, * location and its REDO location into *checkPointLoc and RedoStartLSN,
* respectively); returns FALSE if not. * respectively); returns FALSE if not. If this backup_label came from a
* streamed backup, *backupEndRequired is set to TRUE.
*/ */
static bool static bool
read_backup_label(XLogRecPtr *checkPointLoc) read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired)
{ {
char startxlogfilename[MAXFNAMELEN]; char startxlogfilename[MAXFNAMELEN];
TimeLineID tli; TimeLineID tli;
FILE *lfp; FILE *lfp;
char ch; char ch;
char backuptype[20];
*backupEndRequired = false;
/* /*
* See if label file is present * See if label file is present
...@@ -9809,6 +9825,17 @@ read_backup_label(XLogRecPtr *checkPointLoc) ...@@ -9809,6 +9825,17 @@ read_backup_label(XLogRecPtr *checkPointLoc)
ereport(FATAL, ereport(FATAL,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE))); errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
/*
* BACKUP METHOD line is new in 9.0. Don't complain if it doesn't exist,
* in case you're restoring from a backup taken with an 9.0 beta version
* that didn't emit it.
*/
if (fscanf(lfp, "BACKUP METHOD: %19s", backuptype) == 1)
{
if (strcmp(backuptype, "streamed") == 0)
*backupEndRequired = true;
}
if (ferror(lfp) || FreeFile(lfp)) if (ferror(lfp) || FreeFile(lfp))
ereport(FATAL, ereport(FATAL,
(errcode_for_file_access(), (errcode_for_file_access(),
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
/* Version identifier for this pg_control format */ /* Version identifier for this pg_control format */
#define PG_CONTROL_VERSION 903 #define PG_CONTROL_VERSION 911
/* /*
* Body of CheckPoint XLOG records. This is declared here because we keep * Body of CheckPoint XLOG records. This is declared here because we keep
...@@ -137,9 +137,16 @@ typedef struct ControlFileData ...@@ -137,9 +137,16 @@ typedef struct ControlFileData
* we use the redo pointer as a cross-check when we see an end-of-backup * we use the redo pointer as a cross-check when we see an end-of-backup
* record, to make sure the end-of-backup record corresponds the base * record, to make sure the end-of-backup record corresponds the base
* backup we're recovering from. * backup we're recovering from.
*
* If backupEndRequired is true, we know for sure that we're restoring
* from a backup, and must see a backup-end record before we can safely
* start up. If it's false, but backupStartPoint is set, a backup_label
* file was found at startup but it may have been a leftover from a stray
* pg_start_backup() call, not accompanied by pg_stop_backup().
*/ */
XLogRecPtr minRecoveryPoint; XLogRecPtr minRecoveryPoint;
XLogRecPtr backupStartPoint; XLogRecPtr backupStartPoint;
bool backupEndRequired;
/* /*
* Parameter settings that determine if the WAL can be used for archival * Parameter settings that determine if the WAL can be used for archival
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment