Commit 4ae08cd5 authored by Alvaro Herrera's avatar Alvaro Herrera

Persist slot invalidation correctly

We failed to save slot to disk after invalidating it, so the state was
lost in case of server restart or crash.  Fix by marking it dirty and
flushing.

Also, if the slot is known invalidated we don't need to reason about the
LSN at all -- it's known invalidated.  Only test the LSN if the slot is
known not invalidated.

Author: Fujii Masao <masao.fujii@oss.nttdata.com>
Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Reviewed-by: default avatarÁlvaro Herrera <alvherre@alvh.no-ip.org>
Discussion: https://postgr.es/m/17a69cfe-f1c1-a416-ee25-ae15427c69eb@oss.nttdata.com
parent eca08f58
...@@ -1157,6 +1157,7 @@ restart: ...@@ -1157,6 +1157,7 @@ restart:
if (XLogRecPtrIsInvalid(restart_lsn) || restart_lsn >= oldestLSN) if (XLogRecPtrIsInvalid(restart_lsn) || restart_lsn >= oldestLSN)
continue; continue;
LWLockRelease(ReplicationSlotControlLock); LWLockRelease(ReplicationSlotControlLock);
CHECK_FOR_INTERRUPTS();
/* Get ready to sleep on the slot in case it is active */ /* Get ready to sleep on the slot in case it is active */
ConditionVariablePrepareToSleep(&s->active_cv); ConditionVariablePrepareToSleep(&s->active_cv);
...@@ -1214,10 +1215,7 @@ restart: ...@@ -1214,10 +1215,7 @@ restart:
* already been dropped. * already been dropped.
*/ */
if (wspid == -1) if (wspid == -1)
{
CHECK_FOR_INTERRUPTS();
goto restart; goto restart;
}
ereport(LOG, ereport(LOG,
(errmsg("invalidating slot \"%s\" because its restart_lsn %X/%X exceeds max_slot_wal_keep_size", (errmsg("invalidating slot \"%s\" because its restart_lsn %X/%X exceeds max_slot_wal_keep_size",
...@@ -1229,10 +1227,13 @@ restart: ...@@ -1229,10 +1227,13 @@ restart:
s->data.invalidated_at = s->data.restart_lsn; s->data.invalidated_at = s->data.restart_lsn;
s->data.restart_lsn = InvalidXLogRecPtr; s->data.restart_lsn = InvalidXLogRecPtr;
SpinLockRelease(&s->mutex); SpinLockRelease(&s->mutex);
/* Make sure the invalidated state persists across server restart */
ReplicationSlotMarkDirty();
ReplicationSlotSave();
ReplicationSlotRelease(); ReplicationSlotRelease();
/* if we did anything, start from scratch */ /* if we did anything, start from scratch */
CHECK_FOR_INTERRUPTS();
goto restart; goto restart;
} }
LWLockRelease(ReplicationSlotControlLock); LWLockRelease(ReplicationSlotControlLock);
......
...@@ -283,7 +283,6 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) ...@@ -283,7 +283,6 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
bool nulls[PG_GET_REPLICATION_SLOTS_COLS]; bool nulls[PG_GET_REPLICATION_SLOTS_COLS];
WALAvailability walstate; WALAvailability walstate;
XLogSegNo last_removed_seg; XLogSegNo last_removed_seg;
XLogRecPtr targetLSN;
int i; int i;
if (!slot->in_use) if (!slot->in_use)
...@@ -344,14 +343,15 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) ...@@ -344,14 +343,15 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
nulls[i++] = true; nulls[i++] = true;
/* /*
* Report availability from invalidated_at when the slot has been * If invalidated_at is valid and restart_lsn is invalid, we know for
* invalidated; otherwise slots would appear as invalid without any * certain that the slot has been invalidated. Otherwise, test
* more clues as to what happened. * availability from restart_lsn.
*/ */
targetLSN = XLogRecPtrIsInvalid(slot_contents.data.restart_lsn) ? if (XLogRecPtrIsInvalid(slot_contents.data.restart_lsn) &&
slot_contents.data.invalidated_at : !XLogRecPtrIsInvalid(slot_contents.data.invalidated_at))
slot_contents.data.restart_lsn; walstate = WALAVAIL_REMOVED;
walstate = GetWALAvailability(targetLSN); else
walstate = GetWALAvailability(slot_contents.data.restart_lsn);
switch (walstate) switch (walstate)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment