Rework wait for AccessExclusiveLocks on Hot Standby

Earlier version committed in 9.0 caused spurious waits in some cases. New infrastructure for lock waits in 9.3 used to correct and improve this. Jeff Janes based upon a proposal by Simon Riggs, who also reviewed Additional review comments from Amit Kapila

Rework wait for AccessExclusiveLocks on Hot Standby
Earlier version committed in 9.0 caused spurious waits in some cases. New infrastructure for lock waits in 9.3 used to correct and improve this. Jeff Janes based upon a proposal by Simon Riggs, who also reviewed Additional review comments from Amit Kapila
37c54863 · Simon Riggs · 53be0b1a · 37c54863 · 37c54863 · 37c54863
Commit 37c54863 authored Mar 10, 2016 by Simon Riggs
5 changed files
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -203,6 +203,7 @@ StartupProcessMain(void)
 	 */
 	RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler);
 	RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler);
+	RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler);
 	/*
 	 * Unblock signals (they were blocked when the postmaster forked us)

--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -41,7 +41,6 @@ static List *RecoveryLockList;
 static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
 									   ProcSignalReason reason);
-static void ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid);
 static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
 static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
 static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
@@ -339,39 +338,65 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
 	}
 }
-static void
+/*
-ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid)
+ * ResolveRecoveryConflictWithLock is called from ProcSleep()
+ * to resolve conflicts with other backends holding relation locks.
+ *
+ * The WaitLatch sleep normally done in ProcSleep()
+ * (when not InHotStandby) is performed here, for code clarity.
+ *
+ * We either resolve conflicts immediately or set a timeout to wake us at
+ * the limit of our patience.
+ *
+ * Resolve conflicts by cancelling to all backends holding a conflicting
+ * lock.  As we are already queued to be granted the lock, no new lock
+ * requests conflicting with ours will be granted in the meantime.
+ *
+ * Deadlocks involving the Startup process and an ordinary backend process
+ * will be detected by the deadlock detector within the ordinary backend.
+ */
+void
+ResolveRecoveryConflictWithLock(LOCKTAG locktag)
 {
-	VirtualTransactionId *backends;
+	TimestampTz ltime;
-	bool		lock_acquired = false;
-	int			num_attempts = 0;
+	Assert(InHotStandby);
-	LOCKTAG		locktag;
-	SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
+	ltime = GetStandbyLimitTime();
+	if (GetCurrentTimestamp() >= ltime)
+	{
 		/*
-	 * If blowing away everybody with conflicting locks doesn't work, after
+		 * We're already behind, so clear a path as quickly as possible.
-	 * the first two attempts then we just start blowing everybody away until
-	 * it does work. We do this because its likely that we either have too
-	 * many locks and we just can't get one at all, or that there are many
-	 * people crowding for the same table. Recovery must win; the end
-	 * justifies the means.
 		 */
-	while (!lock_acquired)
+		VirtualTransactionId *backends;
-	{
-		if (++num_attempts < 3)
 		backends = GetLockConflicts(&locktag, AccessExclusiveLock);
-		else
-			backends = GetConflictingVirtualXIDs(InvalidTransactionId,
-												 InvalidOid);
 		ResolveRecoveryConflictWithVirtualXIDs(backends,
 											 PROCSIG_RECOVERY_CONFLICT_LOCK);
+	}
+	else
+	{
+		/*
+		 * Wait (or wait again) until ltime
+		 */
+		EnableTimeoutParams timeouts[1];
-		if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false)
+		timeouts[0].id = STANDBY_LOCK_TIMEOUT;
-			!= LOCKACQUIRE_NOT_AVAIL)
+		timeouts[0].type = TMPARAM_AT;
-			lock_acquired = true;
+		timeouts[0].fin_time = ltime;
+		enable_timeouts(timeouts, 1);
 	}
+	/* Wait to be signaled by the release of the Relation Lock */
+	ProcWaitForSignal();
+	/*
+	 * Clear any timeout requests established above.  We assume here that the
+	 * Startup process doesn't have any other outstanding timeouts than those
+	 * used by this function. If that stops being true, we could cancel the
+	 * timeouts individually, but that'd be slower.
+	 */
+	disable_all_timeouts(false);
 }
 /*
@@ -534,6 +559,14 @@ StandbyTimeoutHandler(void)
 	SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
 }
+/*
+ * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
+ * This doesn't need to do anything, simply waking up is enough.
+ */
+void
+StandbyLockTimeoutHandler(void)
+{
+}
 /*
 * -----------------------------------------------------
@@ -547,7 +580,7 @@ StandbyTimeoutHandler(void)
 * process is the proxy by which the original locks are implemented.
 *
 * We only keep track of AccessExclusiveLocks, which are only ever held by
- * one transaction on one relation, and don't worry about lock queuing.
+ * one transaction on one relation.
 *
 * We keep a single dynamically expandible list of locks in local memory,
 * RelationLockList, so we can keep track of the various entries made by
@@ -589,14 +622,9 @@ StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
 	newlock->relOid = relOid;
 	RecoveryLockList = lappend(RecoveryLockList, newlock);
-	/*
-	 * Attempt to acquire the lock as requested, if not resolve conflict
-	 */
 	SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid);
-	if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false)
+	LockAcquireExtended(&locktag, AccessExclusiveLock, true, false, false);
-		== LOCKACQUIRE_NOT_AVAIL)
-		ResolveRecoveryConflictWithLock(newlock->dbOid, newlock->relOid);
 }
 static void

--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -42,6 +42,7 @@
 #include "postmaster/autovacuum.h"
 #include "replication/slot.h"
 #include "replication/syncrep.h"
+#include "storage/standby.h"
 #include "storage/ipc.h"
 #include "storage/lmgr.h"
 #include "storage/pmsignal.h"
@@ -1169,7 +1170,12 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	 *
 	 * If LockTimeout is set, also enable the timeout for that.  We can save a
 	 * few cycles by enabling both timeout sources in one call.
+	 *
+	 * If InHotStandby we set lock waits slightly later for clarity with other
+	 * code.
 	 */
+	if (!InHotStandby)
+	{
 		if (LockTimeout > 0)
 		{
 			EnableTimeoutParams timeouts[2];
@@ -1184,6 +1190,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 		}
 		else
 			enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
+	}
 	/*
 	 * If somebody wakes us between LWLockRelease and WaitLatch, the latch
@@ -1200,6 +1207,13 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	 * error, LockErrorCleanup will fix that up.
 	 */
 	do
+	{
+		if (InHotStandby)
+		{
+			/* Set a timer and wait for that or for the Lock to be granted */
+			ResolveRecoveryConflictWithLock(locallock->tag.lock);
+		}
+		else
 		{
 			WaitLatch(MyLatch, WL_LATCH_SET, 0);
 			ResetLatch(MyLatch);
@@ -1210,6 +1224,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 				got_deadlock_timeout = false;
 			}
 			CHECK_FOR_INTERRUPTS();
+		}
 		/*
 		 * waitStatus could change from STATUS_WAITING to something else
@@ -1447,6 +1462,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	 * already caused QueryCancelPending to become set, we want the cancel to
 	 * be reported as a lock timeout, not a user cancel.
 	 */
+	if (!InHotStandby)
+	{
 		if (LockTimeout > 0)
 		{
 			DisableTimeoutParams timeouts[2];
@@ -1459,6 +1476,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 		}
 		else
 			disable_timeout(DEADLOCK_TIMEOUT, false);
+	}
 	/*
 	 * Re-acquire the lock table's partition lock.  We have to do this to hold

--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -15,6 +15,7 @@
 #define STANDBY_H
 #include "storage/standbydefs.h"
+#include "storage/lock.h"
 #include "storage/procsignal.h"
 #include "storage/relfilenode.h"
@@ -31,10 +32,12 @@ extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid,
 extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
 extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
+extern void ResolveRecoveryConflictWithLock(LOCKTAG locktag);
 extern void ResolveRecoveryConflictWithBufferPin(void);
 extern void CheckRecoveryConflictDeadlock(void);
 extern void StandbyDeadLockHandler(void);
 extern void StandbyTimeoutHandler(void);
+extern void StandbyLockTimeoutHandler(void);
 /*
 * Standby Rmgr (RM_STANDBY_ID)

--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -29,6 +29,7 @@ typedef enum TimeoutId
 	STATEMENT_TIMEOUT,
 	STANDBY_DEADLOCK_TIMEOUT,
 	STANDBY_TIMEOUT,
+	STANDBY_LOCK_TIMEOUT,
 	/* First user-definable timeout reason */
 	USER_TIMEOUT,
 	/* Maximum number of timeout reasons */