Commit b19e4250 authored by Kevin Grittner's avatar Kevin Grittner

Fix performance problems with autovacuum truncation in busy workloads.

In situations where there are over 8MB of empty pages at the end of
a table, the truncation work for trailing empty pages takes longer
than deadlock_timeout, and there is frequent access to the table by
processes other than autovacuum, there was a problem with the
autovacuum worker process being canceled by the deadlock checking
code. The truncation work done by autovacuum up that point was
lost, and the attempt tried again by a later autovacuum worker. The
attempts could continue indefinitely without making progress,
consuming resources and blocking other processes for up to
deadlock_timeout each time.

This patch has the autovacuum worker checking whether it is
blocking any other thread at 20ms intervals. If such a condition
develops, the autovacuum worker will persist the work it has done
so far, release its lock on the table, and sleep in 50ms intervals
for up to 5 seconds, hoping to be able to re-acquire the lock and
try again. If it is unable to get the lock in that time, it moves
on and a worker will try to continue later from the point this one
left off.

While this patch doesn't change the rules about when and what to
truncate, it does cause the truncation to occur sooner, with less
blocking, and with the consumption of fewer resources when there is
contention for the table's lock.

The only user-visible change other than improved performance is
that the table size during truncation may change incrementally
instead of just once.

This problem exists in all supported versions but is infrequently
reported, although some reports of performance problems when
autovacuum runs might be caused by this. Initial commit is just the
master branch, but this should probably be backpatched once the
build farm and general developer usage confirm that there are no
surprising effects.

Jan Wieck
parent e95c4bd1
This diff is collapsed.
...@@ -232,6 +232,24 @@ UnlockRelation(Relation relation, LOCKMODE lockmode) ...@@ -232,6 +232,24 @@ UnlockRelation(Relation relation, LOCKMODE lockmode)
LockRelease(&tag, lockmode, false); LockRelease(&tag, lockmode, false);
} }
/*
* LockHasWaitersRelation
*
* This is a functiion to check if someone else is waiting on a
* lock, we are currently holding.
*/
bool
LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
{
LOCKTAG tag;
SET_LOCKTAG_RELATION(tag,
relation->rd_lockInfo.lockRelId.dbId,
relation->rd_lockInfo.lockRelId.relId);
return LockHasWaiters(&tag, lockmode, false);
}
/* /*
* LockRelationIdForSession * LockRelationIdForSession
* *
......
...@@ -538,6 +538,98 @@ ProcLockHashCode(const PROCLOCKTAG *proclocktag, uint32 hashcode) ...@@ -538,6 +538,98 @@ ProcLockHashCode(const PROCLOCKTAG *proclocktag, uint32 hashcode)
return lockhash; return lockhash;
} }
/*
* LockHasWaiters -- look up 'locktag' and check if releasing this
* lock would wake up other processes waiting for it.
*/
bool
LockHasWaiters(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
{
LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
LockMethod lockMethodTable;
LOCALLOCKTAG localtag;
LOCALLOCK *locallock;
LOCK *lock;
PROCLOCK *proclock;
LWLockId partitionLock;
bool hasWaiters = false;
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
lockMethodTable = LockMethods[lockmethodid];
if (lockmode <= 0 || lockmode > lockMethodTable->numLockModes)
elog(ERROR, "unrecognized lock mode: %d", lockmode);
#ifdef LOCK_DEBUG
if (LOCK_DEBUG_ENABLED(locktag))
elog(LOG, "LockHasWaiters: lock [%u,%u] %s",
locktag->locktag_field1, locktag->locktag_field2,
lockMethodTable->lockModeNames[lockmode]);
#endif
/*
* Find the LOCALLOCK entry for this lock and lockmode
*/
MemSet(&localtag, 0, sizeof(localtag)); /* must clear padding */
localtag.lock = *locktag;
localtag.mode = lockmode;
locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash,
(void *) &localtag,
HASH_FIND, NULL);
/*
* let the caller print its own error message, too. Do not ereport(ERROR).
*/
if (!locallock || locallock->nLocks <= 0)
{
elog(WARNING, "you don't own a lock of type %s",
lockMethodTable->lockModeNames[lockmode]);
return false;
}
/*
* Check the shared lock table.
*/
partitionLock = LockHashPartitionLock(locallock->hashcode);
LWLockAcquire(partitionLock, LW_SHARED);
/*
* We don't need to re-find the lock or proclock, since we kept their
* addresses in the locallock table, and they couldn't have been removed
* while we were holding a lock on them.
*/
lock = locallock->lock;
LOCK_PRINT("LockHasWaiters: found", lock, lockmode);
proclock = locallock->proclock;
PROCLOCK_PRINT("LockHasWaiters: found", proclock);
/*
* Double-check that we are actually holding a lock of the type we want to
* release.
*/
if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
{
PROCLOCK_PRINT("LockHasWaiters: WRONGTYPE", proclock);
LWLockRelease(partitionLock);
elog(WARNING, "you don't own a lock of type %s",
lockMethodTable->lockModeNames[lockmode]);
RemoveLocalLock(locallock);
return false;
}
/*
* Do the checking.
*/
if ((lockMethodTable->conflictTab[lockmode] & lock->waitMask) != 0)
hasWaiters = true;
LWLockRelease(partitionLock);
return hasWaiters;
}
/* /*
* LockAcquire -- Check for lock conflicts, sleep if conflict found, * LockAcquire -- Check for lock conflicts, sleep if conflict found,
......
...@@ -31,6 +31,7 @@ extern void UnlockRelationOid(Oid relid, LOCKMODE lockmode); ...@@ -31,6 +31,7 @@ extern void UnlockRelationOid(Oid relid, LOCKMODE lockmode);
extern void LockRelation(Relation relation, LOCKMODE lockmode); extern void LockRelation(Relation relation, LOCKMODE lockmode);
extern bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode); extern bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode);
extern void UnlockRelation(Relation relation, LOCKMODE lockmode); extern void UnlockRelation(Relation relation, LOCKMODE lockmode);
extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode);
extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
......
...@@ -494,6 +494,8 @@ extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks); ...@@ -494,6 +494,8 @@ extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
extern void LockReleaseSession(LOCKMETHODID lockmethodid); extern void LockReleaseSession(LOCKMETHODID lockmethodid);
extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks); extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks);
extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks); extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks);
extern bool LockHasWaiters(const LOCKTAG *locktag,
LOCKMODE lockmode, bool sessionLock);
extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag, extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
LOCKMODE lockmode); LOCKMODE lockmode);
extern void AtPrepare_Locks(void); extern void AtPrepare_Locks(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment