Commit 974ece58 authored by Fujii Masao's avatar Fujii Masao

Fix an assertion failure related to an exclusive backup.

Previously multiple sessions could execute pg_start_backup() and
pg_stop_backup() to start and stop an exclusive backup at the same time.
This could trigger the assertion failure of
"FailedAssertion("!(XLogCtl->Insert.exclusiveBackup)".
This happend because, even while pg_start_backup() was starting
an exclusive backup, other session could run pg_stop_backup()
concurrently and mark the backup as not-in-progress unconditionally.

This patch introduces ExclusiveBackupState indicating the state of
an exclusive backup. This state is used to ensure that there is only
one session running pg_start_backup() or pg_stop_backup() at
the same time, to avoid the assertion failure.

Back-patch to all supported versions.

Author: Michael Paquier
Reviewed-By: Kyotaro Horiguchi and me
Reported-By: Andreas Seltenreich
Discussion: <87mvktojme.fsf@credativ.de>
parent d43a619c
...@@ -472,6 +472,29 @@ typedef union WALInsertLockPadded ...@@ -472,6 +472,29 @@ typedef union WALInsertLockPadded
char pad[PG_CACHE_LINE_SIZE]; char pad[PG_CACHE_LINE_SIZE];
} WALInsertLockPadded; } WALInsertLockPadded;
/*
* State of an exclusive backup, necessary to control concurrent activities
* across sessions when working on exclusive backups.
*
* EXCLUSIVE_BACKUP_NONE means that there is no exclusive backup actually
* running, to be more precise pg_start_backup() is not being executed for
* an exclusive backup and there is no exclusive backup in progress.
* EXCLUSIVE_BACKUP_STARTING means that pg_start_backup() is starting an
* exclusive backup.
* EXCLUSIVE_BACKUP_IN_PROGRESS means that pg_start_backup() has finished
* running and an exclusive backup is in progress. pg_stop_backup() is
* needed to finish it.
* EXCLUSIVE_BACKUP_STOPPING means that pg_stop_backup() is stopping an
* exclusive backup.
*/
typedef enum ExclusiveBackupState
{
EXCLUSIVE_BACKUP_NONE = 0,
EXCLUSIVE_BACKUP_STARTING,
EXCLUSIVE_BACKUP_IN_PROGRESS,
EXCLUSIVE_BACKUP_STOPPING
} ExclusiveBackupState;
/* /*
* Shared state data for WAL insertion. * Shared state data for WAL insertion.
*/ */
...@@ -513,13 +536,15 @@ typedef struct XLogCtlInsert ...@@ -513,13 +536,15 @@ typedef struct XLogCtlInsert
bool fullPageWrites; bool fullPageWrites;
/* /*
* exclusiveBackup is true if a backup started with pg_start_backup() is * exclusiveBackupState indicates the state of an exclusive backup
* in progress, and nonExclusiveBackups is a counter indicating the number * (see comments of ExclusiveBackupState for more details).
* of streaming base backups currently in progress. forcePageWrites is set * nonExclusiveBackups is a counter indicating the number of streaming
* to true when either of these is non-zero. lastBackupStart is the latest * base backups currently in progress. forcePageWrites is set to true
* checkpoint redo location used as a starting point for an online backup. * when either of these is non-zero. lastBackupStart is the latest
* checkpoint redo location used as a starting point for an online
* backup.
*/ */
bool exclusiveBackup; ExclusiveBackupState exclusiveBackupState;
int nonExclusiveBackups; int nonExclusiveBackups;
XLogRecPtr lastBackupStart; XLogRecPtr lastBackupStart;
...@@ -858,6 +883,7 @@ static void xlog_outrec(StringInfo buf, XLogReaderState *record); ...@@ -858,6 +883,7 @@ static void xlog_outrec(StringInfo buf, XLogReaderState *record);
#endif #endif
static void xlog_outdesc(StringInfo buf, XLogReaderState *record); static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
static void pg_start_backup_callback(int code, Datum arg); static void pg_start_backup_callback(int code, Datum arg);
static void pg_stop_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc, static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby); bool *backupEndRequired, bool *backupFromStandby);
static bool read_tablespace_map(List **tablespaces); static bool read_tablespace_map(List **tablespaces);
...@@ -10016,7 +10042,12 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, ...@@ -10016,7 +10042,12 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
WALInsertLockAcquireExclusive(); WALInsertLockAcquireExclusive();
if (exclusive) if (exclusive)
{ {
if (XLogCtl->Insert.exclusiveBackup) /*
* At first, mark that we're now starting an exclusive backup,
* to ensure that there are no other sessions currently running
* pg_start_backup() or pg_stop_backup().
*/
if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_NONE)
{ {
WALInsertLockRelease(); WALInsertLockRelease();
ereport(ERROR, ereport(ERROR,
...@@ -10024,7 +10055,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, ...@@ -10024,7 +10055,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
errmsg("a backup is already in progress"), errmsg("a backup is already in progress"),
errhint("Run pg_stop_backup() and try again."))); errhint("Run pg_stop_backup() and try again.")));
} }
XLogCtl->Insert.exclusiveBackup = true; XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STARTING;
} }
else else
XLogCtl->Insert.nonExclusiveBackups++; XLogCtl->Insert.nonExclusiveBackups++;
...@@ -10279,7 +10310,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, ...@@ -10279,7 +10310,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
{ {
/* /*
* Check for existing backup label --- implies a backup is already * Check for existing backup label --- implies a backup is already
* running. (XXX given that we checked exclusiveBackup above, * running. (XXX given that we checked exclusiveBackupState above,
* maybe it would be OK to just unlink any such label file?) * maybe it would be OK to just unlink any such label file?)
*/ */
if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0) if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
...@@ -10360,6 +10391,16 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, ...@@ -10360,6 +10391,16 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
} }
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive)); PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
/*
* Mark that start phase has correctly finished for an exclusive backup.
*/
if (exclusive)
{
WALInsertLockAcquireExclusive();
XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS;
WALInsertLockRelease();
}
/* /*
* We're done. As a convenience, return the starting WAL location. * We're done. As a convenience, return the starting WAL location.
*/ */
...@@ -10378,8 +10419,8 @@ pg_start_backup_callback(int code, Datum arg) ...@@ -10378,8 +10419,8 @@ pg_start_backup_callback(int code, Datum arg)
WALInsertLockAcquireExclusive(); WALInsertLockAcquireExclusive();
if (exclusive) if (exclusive)
{ {
Assert(XLogCtl->Insert.exclusiveBackup); Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STARTING);
XLogCtl->Insert.exclusiveBackup = false; XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE;
} }
else else
{ {
...@@ -10387,7 +10428,7 @@ pg_start_backup_callback(int code, Datum arg) ...@@ -10387,7 +10428,7 @@ pg_start_backup_callback(int code, Datum arg)
XLogCtl->Insert.nonExclusiveBackups--; XLogCtl->Insert.nonExclusiveBackups--;
} }
if (!XLogCtl->Insert.exclusiveBackup && if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
XLogCtl->Insert.nonExclusiveBackups == 0) XLogCtl->Insert.nonExclusiveBackups == 0)
{ {
XLogCtl->Insert.forcePageWrites = false; XLogCtl->Insert.forcePageWrites = false;
...@@ -10395,6 +10436,24 @@ pg_start_backup_callback(int code, Datum arg) ...@@ -10395,6 +10436,24 @@ pg_start_backup_callback(int code, Datum arg)
WALInsertLockRelease(); WALInsertLockRelease();
} }
/*
* Error cleanup callback for pg_stop_backup
*/
static void
pg_stop_backup_callback(int code, Datum arg)
{
bool exclusive = DatumGetBool(arg);
/* Update backup status on failure */
WALInsertLockAcquireExclusive();
if (exclusive)
{
Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STOPPING);
XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS;
}
WALInsertLockRelease();
}
/* /*
* do_pg_stop_backup is the workhorse of the user-visible pg_stop_backup() * do_pg_stop_backup is the workhorse of the user-visible pg_stop_backup()
* function. * function.
...@@ -10457,20 +10516,91 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) ...@@ -10457,20 +10516,91 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
errmsg("WAL level not sufficient for making an online backup"), errmsg("WAL level not sufficient for making an online backup"),
errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
/*
* OK to update backup counters and forcePageWrites
*/
WALInsertLockAcquireExclusive();
if (exclusive) if (exclusive)
{ {
if (!XLogCtl->Insert.exclusiveBackup) /*
* At first, mark that we're now stopping an exclusive backup,
* to ensure that there are no other sessions currently running
* pg_start_backup() or pg_stop_backup().
*/
WALInsertLockAcquireExclusive();
if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_IN_PROGRESS)
{ {
WALInsertLockRelease(); WALInsertLockRelease();
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("exclusive backup not in progress"))); errmsg("exclusive backup not in progress")));
} }
XLogCtl->Insert.exclusiveBackup = false; XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STOPPING;
WALInsertLockRelease();
/*
* Remove backup_label. In case of failure, the state for an exclusive
* backup is switched back to in-progress.
*/
PG_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
{
/*
* Read the existing label file into memory.
*/
struct stat statbuf;
int r;
if (stat(BACKUP_LABEL_FILE, &statbuf))
{
/* should not happen per the upper checks */
if (errno != ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
BACKUP_LABEL_FILE)));
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("a backup is not in progress")));
}
lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
if (!lfp)
{
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
BACKUP_LABEL_FILE)));
}
labelfile = palloc(statbuf.st_size + 1);
r = fread(labelfile, statbuf.st_size, 1, lfp);
labelfile[statbuf.st_size] = '\0';
/*
* Close and remove the backup label file
*/
if (r != 1 || ferror(lfp) || FreeFile(lfp))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
BACKUP_LABEL_FILE)));
if (unlink(BACKUP_LABEL_FILE) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
/*
* Remove tablespace_map file if present, it is created only if there
* are tablespaces.
*/
unlink(TABLESPACE_MAP);
}
PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
}
/*
* OK to update backup counters and forcePageWrites
*/
WALInsertLockAcquireExclusive();
if (exclusive)
{
XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE;
} }
else else
{ {
...@@ -10484,66 +10614,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) ...@@ -10484,66 +10614,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
XLogCtl->Insert.nonExclusiveBackups--; XLogCtl->Insert.nonExclusiveBackups--;
} }
if (!XLogCtl->Insert.exclusiveBackup && if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
XLogCtl->Insert.nonExclusiveBackups == 0) XLogCtl->Insert.nonExclusiveBackups == 0)
{ {
XLogCtl->Insert.forcePageWrites = false; XLogCtl->Insert.forcePageWrites = false;
} }
WALInsertLockRelease(); WALInsertLockRelease();
if (exclusive)
{
/*
* Read the existing label file into memory.
*/
struct stat statbuf;
int r;
if (stat(BACKUP_LABEL_FILE, &statbuf))
{
if (errno != ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
BACKUP_LABEL_FILE)));
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("a backup is not in progress")));
}
lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
if (!lfp)
{
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
BACKUP_LABEL_FILE)));
}
labelfile = palloc(statbuf.st_size + 1);
r = fread(labelfile, statbuf.st_size, 1, lfp);
labelfile[statbuf.st_size] = '\0';
/*
* Close and remove the backup label file
*/
if (r != 1 || ferror(lfp) || FreeFile(lfp))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
BACKUP_LABEL_FILE)));
if (unlink(BACKUP_LABEL_FILE) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
/*
* Remove tablespace_map file if present, it is created only if there
* are tablespaces.
*/
unlink(TABLESPACE_MAP);
}
/* /*
* Read and parse the START WAL LOCATION line (this code is pretty crude, * Read and parse the START WAL LOCATION line (this code is pretty crude,
* but we are not expecting any variability in the file format). * but we are not expecting any variability in the file format).
...@@ -10780,7 +10857,7 @@ do_pg_abort_backup(void) ...@@ -10780,7 +10857,7 @@ do_pg_abort_backup(void)
Assert(XLogCtl->Insert.nonExclusiveBackups > 0); Assert(XLogCtl->Insert.nonExclusiveBackups > 0);
XLogCtl->Insert.nonExclusiveBackups--; XLogCtl->Insert.nonExclusiveBackups--;
if (!XLogCtl->Insert.exclusiveBackup && if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
XLogCtl->Insert.nonExclusiveBackups == 0) XLogCtl->Insert.nonExclusiveBackups == 0)
{ {
XLogCtl->Insert.forcePageWrites = false; XLogCtl->Insert.forcePageWrites = false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment