Commit 56a95ee5 authored by Fujii Masao's avatar Fujii Masao

Fix bug in cancellation of non-exclusive backup to avoid assertion failure.

Previously an assertion failure occurred when pg_stop_backup() for
non-exclusive backup was aborted while it's waiting for WAL files to
be archived. This assertion failure happened in do_pg_abort_backup()
which was called when a non-exclusive backup was canceled.
do_pg_abort_backup() assumes that there is at least one non-exclusive
backup running when it's called. But pg_stop_backup() can be canceled
even after it marks the end of non-exclusive backup (e.g.,
during waiting for WAL archiving). This broke the assumption that
do_pg_abort_backup() relies on, and which caused an assertion failure.

This commit changes do_pg_abort_backup() so that it does nothing
when non-exclusive backup has been already marked as completed.
That is, the asssumption is also changed, and do_pg_abort_backup()
now can handle even the case where it's called when there is
no running backup.

Backpatch to 9.6 where SQL-callable non-exclusive backup was added.

Author: Masahiko Sawada and Michael Paquier
Reviewed-By: Robert Haas and Fujii Masao
Discussion: https://www.postgresql.org/message-id/CAD21AoD2L1Fu2c==gnVASMyFAAaq3y-AQ2uEVj-zTCGFFjvmDg@mail.gmail.com
parent fd7c0fa7
...@@ -10628,13 +10628,20 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, ...@@ -10628,13 +10628,20 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
/* /*
* Mark that start phase has correctly finished for an exclusive backup. * Mark that start phase has correctly finished for an exclusive backup.
* Session-level locks are updated as well to reflect that state. * Session-level locks are updated as well to reflect that state.
*
* Note that CHECK_FOR_INTERRUPTS() must not occur while updating
* backup counters and session-level lock. Otherwise they can be
* updated inconsistently, and which might cause do_pg_abort_backup()
* to fail.
*/ */
if (exclusive) if (exclusive)
{ {
WALInsertLockAcquireExclusive(); WALInsertLockAcquireExclusive();
XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS; XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS;
WALInsertLockRelease();
/* Set session-level lock */
sessionBackupState = SESSION_BACKUP_EXCLUSIVE; sessionBackupState = SESSION_BACKUP_EXCLUSIVE;
WALInsertLockRelease();
} }
else else
sessionBackupState = SESSION_BACKUP_NON_EXCLUSIVE; sessionBackupState = SESSION_BACKUP_NON_EXCLUSIVE;
...@@ -10838,7 +10845,11 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) ...@@ -10838,7 +10845,11 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
} }
/* /*
* OK to update backup counters and forcePageWrites * OK to update backup counters, forcePageWrites and session-level lock.
*
* Note that CHECK_FOR_INTERRUPTS() must not occur while updating them.
* Otherwise they can be updated inconsistently, and which might cause
* do_pg_abort_backup() to fail.
*/ */
WALInsertLockAcquireExclusive(); WALInsertLockAcquireExclusive();
if (exclusive) if (exclusive)
...@@ -10862,11 +10873,20 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) ...@@ -10862,11 +10873,20 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
{ {
XLogCtl->Insert.forcePageWrites = false; XLogCtl->Insert.forcePageWrites = false;
} }
WALInsertLockRelease();
/* Clean up session-level lock */ /*
* Clean up session-level lock.
*
* You might think that WALInsertLockRelease() can be called
* before cleaning up session-level lock because session-level
* lock doesn't need to be protected with WAL insertion lock.
* But since CHECK_FOR_INTERRUPTS() can occur in it,
* session-level lock must be cleaned up before it.
*/
sessionBackupState = SESSION_BACKUP_NONE; sessionBackupState = SESSION_BACKUP_NONE;
WALInsertLockRelease();
/* /*
* Read and parse the START WAL LOCATION line (this code is pretty crude, * Read and parse the START WAL LOCATION line (this code is pretty crude,
* but we are not expecting any variability in the file format). * but we are not expecting any variability in the file format).
...@@ -11104,8 +11124,16 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) ...@@ -11104,8 +11124,16 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
void void
do_pg_abort_backup(void) do_pg_abort_backup(void)
{ {
/*
* Quick exit if session is not keeping around a non-exclusive backup
* already started.
*/
if (sessionBackupState == SESSION_BACKUP_NONE)
return;
WALInsertLockAcquireExclusive(); WALInsertLockAcquireExclusive();
Assert(XLogCtl->Insert.nonExclusiveBackups > 0); Assert(XLogCtl->Insert.nonExclusiveBackups > 0);
Assert(sessionBackupState == SESSION_BACKUP_NON_EXCLUSIVE);
XLogCtl->Insert.nonExclusiveBackups--; XLogCtl->Insert.nonExclusiveBackups--;
if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE && if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
......
...@@ -215,7 +215,7 @@ perform_base_backup(basebackup_options *opt) ...@@ -215,7 +215,7 @@ perform_base_backup(basebackup_options *opt)
* Once do_pg_start_backup has been called, ensure that any failure causes * Once do_pg_start_backup has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this * us to abort the backup so we don't "leak" a backup counter. For this
* reason, *all* functionality between do_pg_start_backup() and * reason, *all* functionality between do_pg_start_backup() and
* do_pg_stop_backup() should be inside the error cleanup block! * the end of do_pg_stop_backup() should be inside the error cleanup block!
*/ */
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
...@@ -324,10 +324,11 @@ perform_base_backup(basebackup_options *opt) ...@@ -324,10 +324,11 @@ perform_base_backup(basebackup_options *opt)
else else
pq_putemptymessage('c'); /* CopyDone */ pq_putemptymessage('c'); /* CopyDone */
} }
endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
} }
PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
if (opt->includewal) if (opt->includewal)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment