Commit 37c54863 authored by Simon Riggs's avatar Simon Riggs

Rework wait for AccessExclusiveLocks on Hot Standby

Earlier version committed in 9.0 caused spurious waits in some cases.
New infrastructure for lock waits in 9.3 used to correct and improve this.

Jeff Janes based upon a proposal by Simon Riggs, who also reviewed
Additional review comments from Amit Kapila
parent 53be0b1a
...@@ -203,6 +203,7 @@ StartupProcessMain(void) ...@@ -203,6 +203,7 @@ StartupProcessMain(void)
*/ */
RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler); RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler);
RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler); RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler);
RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler);
/* /*
* Unblock signals (they were blocked when the postmaster forked us) * Unblock signals (they were blocked when the postmaster forked us)
......
...@@ -41,7 +41,6 @@ static List *RecoveryLockList; ...@@ -41,7 +41,6 @@ static List *RecoveryLockList;
static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
ProcSignalReason reason); ProcSignalReason reason);
static void ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid);
static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason); static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts); static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks); static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
...@@ -339,39 +338,65 @@ ResolveRecoveryConflictWithDatabase(Oid dbid) ...@@ -339,39 +338,65 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
} }
} }
static void /*
ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid) * ResolveRecoveryConflictWithLock is called from ProcSleep()
* to resolve conflicts with other backends holding relation locks.
*
* The WaitLatch sleep normally done in ProcSleep()
* (when not InHotStandby) is performed here, for code clarity.
*
* We either resolve conflicts immediately or set a timeout to wake us at
* the limit of our patience.
*
* Resolve conflicts by cancelling to all backends holding a conflicting
* lock. As we are already queued to be granted the lock, no new lock
* requests conflicting with ours will be granted in the meantime.
*
* Deadlocks involving the Startup process and an ordinary backend process
* will be detected by the deadlock detector within the ordinary backend.
*/
void
ResolveRecoveryConflictWithLock(LOCKTAG locktag)
{ {
VirtualTransactionId *backends; TimestampTz ltime;
bool lock_acquired = false;
int num_attempts = 0;
LOCKTAG locktag;
SET_LOCKTAG_RELATION(locktag, dbOid, relOid); Assert(InHotStandby);
/* ltime = GetStandbyLimitTime();
* If blowing away everybody with conflicting locks doesn't work, after
* the first two attempts then we just start blowing everybody away until
* it does work. We do this because its likely that we either have too
* many locks and we just can't get one at all, or that there are many
* people crowding for the same table. Recovery must win; the end
* justifies the means.
*/
while (!lock_acquired)
{
if (++num_attempts < 3)
backends = GetLockConflicts(&locktag, AccessExclusiveLock);
else
backends = GetConflictingVirtualXIDs(InvalidTransactionId,
InvalidOid);
if (GetCurrentTimestamp() >= ltime)
{
/*
* We're already behind, so clear a path as quickly as possible.
*/
VirtualTransactionId *backends;
backends = GetLockConflicts(&locktag, AccessExclusiveLock);
ResolveRecoveryConflictWithVirtualXIDs(backends, ResolveRecoveryConflictWithVirtualXIDs(backends,
PROCSIG_RECOVERY_CONFLICT_LOCK); PROCSIG_RECOVERY_CONFLICT_LOCK);
}
else
{
/*
* Wait (or wait again) until ltime
*/
EnableTimeoutParams timeouts[1];
if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false) timeouts[0].id = STANDBY_LOCK_TIMEOUT;
!= LOCKACQUIRE_NOT_AVAIL) timeouts[0].type = TMPARAM_AT;
lock_acquired = true; timeouts[0].fin_time = ltime;
enable_timeouts(timeouts, 1);
} }
/* Wait to be signaled by the release of the Relation Lock */
ProcWaitForSignal();
/*
* Clear any timeout requests established above. We assume here that the
* Startup process doesn't have any other outstanding timeouts than those
* used by this function. If that stops being true, we could cancel the
* timeouts individually, but that'd be slower.
*/
disable_all_timeouts(false);
} }
/* /*
...@@ -534,6 +559,14 @@ StandbyTimeoutHandler(void) ...@@ -534,6 +559,14 @@ StandbyTimeoutHandler(void)
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
} }
/*
* StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
* This doesn't need to do anything, simply waking up is enough.
*/
void
StandbyLockTimeoutHandler(void)
{
}
/* /*
* ----------------------------------------------------- * -----------------------------------------------------
...@@ -547,7 +580,7 @@ StandbyTimeoutHandler(void) ...@@ -547,7 +580,7 @@ StandbyTimeoutHandler(void)
* process is the proxy by which the original locks are implemented. * process is the proxy by which the original locks are implemented.
* *
* We only keep track of AccessExclusiveLocks, which are only ever held by * We only keep track of AccessExclusiveLocks, which are only ever held by
* one transaction on one relation, and don't worry about lock queuing. * one transaction on one relation.
* *
* We keep a single dynamically expandible list of locks in local memory, * We keep a single dynamically expandible list of locks in local memory,
* RelationLockList, so we can keep track of the various entries made by * RelationLockList, so we can keep track of the various entries made by
...@@ -589,14 +622,9 @@ StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) ...@@ -589,14 +622,9 @@ StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
newlock->relOid = relOid; newlock->relOid = relOid;
RecoveryLockList = lappend(RecoveryLockList, newlock); RecoveryLockList = lappend(RecoveryLockList, newlock);
/*
* Attempt to acquire the lock as requested, if not resolve conflict
*/
SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid); SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid);
if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false) LockAcquireExtended(&locktag, AccessExclusiveLock, true, false, false);
== LOCKACQUIRE_NOT_AVAIL)
ResolveRecoveryConflictWithLock(newlock->dbOid, newlock->relOid);
} }
static void static void
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "postmaster/autovacuum.h" #include "postmaster/autovacuum.h"
#include "replication/slot.h" #include "replication/slot.h"
#include "replication/syncrep.h" #include "replication/syncrep.h"
#include "storage/standby.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/pmsignal.h" #include "storage/pmsignal.h"
...@@ -1169,21 +1170,27 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) ...@@ -1169,21 +1170,27 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
* *
* If LockTimeout is set, also enable the timeout for that. We can save a * If LockTimeout is set, also enable the timeout for that. We can save a
* few cycles by enabling both timeout sources in one call. * few cycles by enabling both timeout sources in one call.
*
* If InHotStandby we set lock waits slightly later for clarity with other
* code.
*/ */
if (LockTimeout > 0) if (!InHotStandby)
{ {
EnableTimeoutParams timeouts[2]; if (LockTimeout > 0)
{
timeouts[0].id = DEADLOCK_TIMEOUT; EnableTimeoutParams timeouts[2];
timeouts[0].type = TMPARAM_AFTER;
timeouts[0].delay_ms = DeadlockTimeout; timeouts[0].id = DEADLOCK_TIMEOUT;
timeouts[1].id = LOCK_TIMEOUT; timeouts[0].type = TMPARAM_AFTER;
timeouts[1].type = TMPARAM_AFTER; timeouts[0].delay_ms = DeadlockTimeout;
timeouts[1].delay_ms = LockTimeout; timeouts[1].id = LOCK_TIMEOUT;
enable_timeouts(timeouts, 2); timeouts[1].type = TMPARAM_AFTER;
timeouts[1].delay_ms = LockTimeout;
enable_timeouts(timeouts, 2);
}
else
enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
} }
else
enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
/* /*
* If somebody wakes us between LWLockRelease and WaitLatch, the latch * If somebody wakes us between LWLockRelease and WaitLatch, the latch
...@@ -1201,15 +1208,23 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) ...@@ -1201,15 +1208,23 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
*/ */
do do
{ {
WaitLatch(MyLatch, WL_LATCH_SET, 0); if (InHotStandby)
ResetLatch(MyLatch); {
/* check for deadlocks first, as that's probably log-worthy */ /* Set a timer and wait for that or for the Lock to be granted */
if (got_deadlock_timeout) ResolveRecoveryConflictWithLock(locallock->tag.lock);
}
else
{ {
CheckDeadLock(); WaitLatch(MyLatch, WL_LATCH_SET, 0);
got_deadlock_timeout = false; ResetLatch(MyLatch);
/* check for deadlocks first, as that's probably log-worthy */
if (got_deadlock_timeout)
{
CheckDeadLock();
got_deadlock_timeout = false;
}
CHECK_FOR_INTERRUPTS();
} }
CHECK_FOR_INTERRUPTS();
/* /*
* waitStatus could change from STATUS_WAITING to something else * waitStatus could change from STATUS_WAITING to something else
...@@ -1447,18 +1462,21 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) ...@@ -1447,18 +1462,21 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
* already caused QueryCancelPending to become set, we want the cancel to * already caused QueryCancelPending to become set, we want the cancel to
* be reported as a lock timeout, not a user cancel. * be reported as a lock timeout, not a user cancel.
*/ */
if (LockTimeout > 0) if (!InHotStandby)
{ {
DisableTimeoutParams timeouts[2]; if (LockTimeout > 0)
{
DisableTimeoutParams timeouts[2];
timeouts[0].id = DEADLOCK_TIMEOUT; timeouts[0].id = DEADLOCK_TIMEOUT;
timeouts[0].keep_indicator = false; timeouts[0].keep_indicator = false;
timeouts[1].id = LOCK_TIMEOUT; timeouts[1].id = LOCK_TIMEOUT;
timeouts[1].keep_indicator = true; timeouts[1].keep_indicator = true;
disable_timeouts(timeouts, 2); disable_timeouts(timeouts, 2);
}
else
disable_timeout(DEADLOCK_TIMEOUT, false);
} }
else
disable_timeout(DEADLOCK_TIMEOUT, false);
/* /*
* Re-acquire the lock table's partition lock. We have to do this to hold * Re-acquire the lock table's partition lock. We have to do this to hold
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#define STANDBY_H #define STANDBY_H
#include "storage/standbydefs.h" #include "storage/standbydefs.h"
#include "storage/lock.h"
#include "storage/procsignal.h" #include "storage/procsignal.h"
#include "storage/relfilenode.h" #include "storage/relfilenode.h"
...@@ -31,10 +32,12 @@ extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, ...@@ -31,10 +32,12 @@ extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid,
extern void ResolveRecoveryConflictWithTablespace(Oid tsid); extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
extern void ResolveRecoveryConflictWithDatabase(Oid dbid); extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
extern void ResolveRecoveryConflictWithLock(LOCKTAG locktag);
extern void ResolveRecoveryConflictWithBufferPin(void); extern void ResolveRecoveryConflictWithBufferPin(void);
extern void CheckRecoveryConflictDeadlock(void); extern void CheckRecoveryConflictDeadlock(void);
extern void StandbyDeadLockHandler(void); extern void StandbyDeadLockHandler(void);
extern void StandbyTimeoutHandler(void); extern void StandbyTimeoutHandler(void);
extern void StandbyLockTimeoutHandler(void);
/* /*
* Standby Rmgr (RM_STANDBY_ID) * Standby Rmgr (RM_STANDBY_ID)
......
...@@ -29,6 +29,7 @@ typedef enum TimeoutId ...@@ -29,6 +29,7 @@ typedef enum TimeoutId
STATEMENT_TIMEOUT, STATEMENT_TIMEOUT,
STANDBY_DEADLOCK_TIMEOUT, STANDBY_DEADLOCK_TIMEOUT,
STANDBY_TIMEOUT, STANDBY_TIMEOUT,
STANDBY_LOCK_TIMEOUT,
/* First user-definable timeout reason */ /* First user-definable timeout reason */
USER_TIMEOUT, USER_TIMEOUT,
/* Maximum number of timeout reasons */ /* Maximum number of timeout reasons */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment