Commit a794fb06 authored by Tom Lane's avatar Tom Lane

Convert the lock manager to use the new dynahash.c support for partitioned

hash tables, instead of the previous kluge involving multiple hash tables.
This partially undoes my patch of last December.
parent b25dc481
$PostgreSQL: pgsql/src/backend/storage/lmgr/README,v 1.19 2005/12/11 21:02:18 tgl Exp $
$PostgreSQL: pgsql/src/backend/storage/lmgr/README,v 1.20 2006/07/23 23:08:46 tgl Exp $
LOCKING OVERVIEW
......@@ -148,13 +148,21 @@ The lock manager's PROCLOCK objects contain:
tag -
The key fields that are used for hashing entries in the shared memory
PROCLOCK hash table. This is declared as a separate struct to ensure that
we always zero out the correct number of bytes.
we always zero out the correct number of bytes. It is critical that any
alignment-padding bytes the compiler might insert in the struct be zeroed
out, else the hash computation will be random. (Currently, we are careful
to define struct PROCLOCKTAG so that there are no padding bytes.)
tag.lock
SHMEM offset of the LOCK object this PROCLOCK is for.
tag.myLock
Pointer to the shared LOCK object this PROCLOCK is for.
tag.proc
SHMEM offset of PGPROC of backend process that owns this PROCLOCK.
tag.myProc
Pointer to the PGPROC of backend process that owns this PROCLOCK.
Note: it's OK to use pointers here because a PROCLOCK never outlives
either its lock or its proc. The tag is therefore unique for as long
as it needs to be, even though the same tag values might mean something
else at other times.
holdMask -
A bitmask for the lock modes successfully acquired by this PROCLOCK.
......@@ -191,12 +199,18 @@ Most operations only need to lock the single partition they are working in.
Here are the details:
* Each possible lock is assigned to one partition according to a hash of
its LOCKTAG value (see LockTagToPartition()). The partition's LWLock is
considered to protect all the LOCK objects of that partition as well as
their subsidiary PROCLOCKs. The shared-memory hash tables for LOCKs and
PROCLOCKs are divided into separate hash tables for each partition, and
operations on each hash table are likewise protected by the partition
lock.
its LOCKTAG value. The partition's LWLock is considered to protect all the
LOCK objects of that partition as well as their subsidiary PROCLOCKs.
* The shared-memory hash tables for LOCKs and PROCLOCKs are organized
so that different partitions use different hash chains, and thus there
is no conflict in working with objects in different partitions. This
is supported directly by dynahash.c's "partitioned table" mechanism
for the LOCK table: we need only ensure that the partition number is
taken from the low-order bits of the dynahash hash value for the LOCKTAG.
To make it work for PROCLOCKs, we have to ensure that a PROCLOCK's hash
value has the same low-order bits as its associated LOCK. This requires
a specialized hash function (see proclock_hash).
* Formerly, each PGPROC had a single list of PROCLOCKs belonging to it.
This has now been split into per-partition lists, so that access to a
......@@ -226,9 +240,10 @@ deadlock checking should not occur often enough to be performance-critical,
trying to make this work does not seem a productive use of effort.
A backend's internal LOCALLOCK hash table is not partitioned. We do store
the partition number in LOCALLOCK table entries, but this is a straight
speed-for-space tradeoff: we could instead recalculate the partition
number from the LOCKTAG when needed.
a copy of the locktag hash code in LOCALLOCK table entries, from which the
partition number can be computed, but this is a straight speed-for-space
tradeoff: we could instead recalculate the partition number from the LOCKTAG
when needed.
THE DEADLOCK DETECTION ALGORITHM
......
......@@ -12,7 +12,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.40 2006/07/14 14:52:23 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.41 2006/07/23 23:08:46 tgl Exp $
*
* Interface:
*
......@@ -480,7 +480,7 @@ FindLockCycleRecurse(PGPROC *checkProc,
while (proclock)
{
proc = (PGPROC *) MAKE_PTR(proclock->tag.proc);
proc = proclock->tag.myProc;
/* A proc never blocks itself */
if (proc != checkProc)
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.167 2006/07/22 23:04:39 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.168 2006/07/23 23:08:46 tgl Exp $
*
* NOTES
* A lock table is a shared memory hash table. When
......@@ -32,6 +32,7 @@
#include <signal.h>
#include <unistd.h>
#include "access/transam.h"
#include "access/twophase.h"
#include "access/twophase_rmgr.h"
#include "miscadmin.h"
......@@ -166,8 +167,8 @@ typedef struct TwoPhaseLockRecord
* The LockMethodLockHash and LockMethodProcLockHash hash tables are in
* shared memory; LockMethodLocalHash is local to each backend.
*/
static HTAB *LockMethodLockHash[NUM_LOCK_PARTITIONS];
static HTAB *LockMethodProcLockHash[NUM_LOCK_PARTITIONS];
static HTAB *LockMethodLockHash;
static HTAB *LockMethodProcLockHash;
static HTAB *LockMethodLocalHash;
......@@ -218,10 +219,10 @@ LOCK_PRINT(const char *where, const LOCK *lock, LOCKMODE type)
{
if (LOCK_DEBUG_ENABLED(&lock->tag))
elog(LOG,
"%s: lock(%lx) id(%u,%u,%u,%u,%u,%u) grantMask(%x) "
"%s: lock(%p) id(%u,%u,%u,%u,%u,%u) grantMask(%x) "
"req(%d,%d,%d,%d,%d,%d,%d)=%d "
"grant(%d,%d,%d,%d,%d,%d,%d)=%d wait(%d) type(%s)",
where, MAKE_OFFSET(lock),
where, lock,
lock->tag.locktag_field1, lock->tag.locktag_field2,
lock->tag.locktag_field3, lock->tag.locktag_field4,
lock->tag.locktag_type, lock->tag.locktag_lockmethodid,
......@@ -240,12 +241,12 @@ LOCK_PRINT(const char *where, const LOCK *lock, LOCKMODE type)
inline static void
PROCLOCK_PRINT(const char *where, const PROCLOCK *proclockP)
{
if (LOCK_DEBUG_ENABLED(&((LOCK *) MAKE_PTR(proclockP->tag.lock))->tag))
if (LOCK_DEBUG_ENABLED(&proclockP->tag.myLock->tag))
elog(LOG,
"%s: proclock(%lx) lock(%lx) method(%u) proc(%lx) hold(%x)",
where, MAKE_OFFSET(proclockP), proclockP->tag.lock,
"%s: proclock(%p) lock(%p) method(%u) proc(%p) hold(%x)",
where, proclockP, proclockP->tag.myLock,
PROCLOCK_LOCKMETHOD(*(proclockP)),
proclockP->tag.proc, (int) proclockP->holdMask);
proclockP->tag.myProc, (int) proclockP->holdMask);
}
#else /* not LOCK_DEBUG */
......@@ -254,13 +255,14 @@ PROCLOCK_PRINT(const char *where, const PROCLOCK *proclockP)
#endif /* not LOCK_DEBUG */
static uint32 proclock_hash(const void *key, Size keysize);
static void RemoveLocalLock(LOCALLOCK *locallock);
static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
PROCLOCK *proclock, LockMethod lockMethodTable);
static void CleanUpLock(LOCK *lock, PROCLOCK *proclock,
LockMethod lockMethodTable, int partition,
LockMethod lockMethodTable, uint32 hashcode,
bool wakeupNeeded);
......@@ -279,71 +281,62 @@ static void CleanUpLock(LOCK *lock, PROCLOCK *proclock,
void
InitLocks(void)
{
char shmemName[64];
HASHCTL info;
int hash_flags;
long init_table_size,
max_table_size;
int i;
/*
* Compute init/max size to request for lock hashtables. Note these
* calculations must agree with LockShmemSize!
*/
max_table_size = NLOCKENTS();
max_table_size = (max_table_size - 1) / NUM_LOCK_PARTITIONS + 1;
init_table_size = max_table_size / 2;
/*
* Allocate hash tables for LOCK structs. These are used to store
* Allocate hash table for LOCK structs. This stores
* per-locked-object information.
*/
MemSet(&info, 0, sizeof(info));
info.keysize = sizeof(LOCKTAG);
info.entrysize = sizeof(LOCK);
info.hash = tag_hash;
hash_flags = (HASH_ELEM | HASH_FUNCTION);
info.num_partitions = NUM_LOCK_PARTITIONS;
hash_flags = (HASH_ELEM | HASH_FUNCTION | HASH_PARTITION);
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
{
sprintf(shmemName, "LOCK hash %d", i);
LockMethodLockHash[i] = ShmemInitHash(shmemName,
init_table_size,
max_table_size,
&info,
hash_flags);
if (!LockMethodLockHash[i])
elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
}
LockMethodLockHash = ShmemInitHash("LOCK hash",
init_table_size,
max_table_size,
&info,
hash_flags);
if (!LockMethodLockHash)
elog(FATAL, "could not initialize lock hash table");
/* Assume an average of 2 holders per lock */
max_table_size *= 2;
init_table_size *= 2;
/*
* Allocate hash tables for PROCLOCK structs. These are used to store
* Allocate hash table for PROCLOCK structs. This stores
* per-lock-per-holder information.
*/
info.keysize = sizeof(PROCLOCKTAG);
info.entrysize = sizeof(PROCLOCK);
info.hash = tag_hash;
hash_flags = (HASH_ELEM | HASH_FUNCTION);
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
{
sprintf(shmemName, "PROCLOCK hash %d", i);
LockMethodProcLockHash[i] = ShmemInitHash(shmemName,
init_table_size,
max_table_size,
&info,
hash_flags);
if (!LockMethodProcLockHash[i])
elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
}
info.hash = proclock_hash;
info.num_partitions = NUM_LOCK_PARTITIONS;
hash_flags = (HASH_ELEM | HASH_FUNCTION | HASH_PARTITION);
LockMethodProcLockHash = ShmemInitHash("PROCLOCK hash",
init_table_size,
max_table_size,
&info,
hash_flags);
if (!LockMethodProcLockHash)
elog(FATAL, "could not initialize proclock hash table");
/*
* Allocate one non-shared hash table for LOCALLOCK structs. This is used
* to store lock counts and resource owner information.
* Allocate non-shared hash table for LOCALLOCK structs. This stores
* lock counts and resource owner information.
*
* The non-shared table could already exist in this process (this occurs
* when the postmaster is recreating shared memory after a backend crash).
......@@ -379,35 +372,74 @@ GetLocksMethodTable(const LOCK *lock)
/*
* Given a LOCKTAG, determine which partition the lock belongs in.
* Compute the hash code associated with a LOCKTAG.
*
* Basically what we want to do here is hash the locktag. However, it
* seems unwise to use hash_any() because that is the same function that
* will be used to distribute the locks within each partition's hash table;
* if we use it, we run a big risk of having uneven distribution of hash
* codes within each hash table. Instead, we use a simple linear XOR of the
* bits of the locktag.
* To avoid unnecessary recomputations of the hash code, we try to do this
* just once per function, and then pass it around as needed. Aside from
* passing the hashcode to hash_search_with_hash_value(), we can extract
* the lock partition number from the hashcode.
*/
int
LockTagToPartition(const LOCKTAG *locktag)
uint32
LockTagHashCode(const LOCKTAG *locktag)
{
const uint8 *ptr = (const uint8 *) locktag;
int result = 0;
int i;
return get_hash_value(LockMethodLockHash, (const void *) locktag);
}
for (i = 0; i < sizeof(LOCKTAG); i++)
result ^= *ptr++;
#if NUM_LOCK_PARTITIONS == 16
result ^= result >> 4;
result &= 0x0F;
#elif NUM_LOCK_PARTITIONS == 4
result ^= result >> 4;
result ^= result >> 2;
result &= 0x03;
#else
#error unsupported NUM_LOCK_PARTITIONS
#endif
return result;
/*
* Compute the hash code associated with a PROCLOCKTAG.
*
* Because we want to use just one set of partition locks for both the
* LOCK and PROCLOCK hash tables, we have to make sure that PROCLOCKs
* fall into the same partition number as their associated LOCKs.
* dynahash.c expects the partition number to be the low-order bits of
* the hash code, and therefore a PROCLOCKTAG's hash code must have the
* same low-order bits as the associated LOCKTAG's hash code. We achieve
* this with this specialized hash function.
*/
static uint32
proclock_hash(const void *key, Size keysize)
{
const PROCLOCKTAG *proclocktag = (const PROCLOCKTAG *) key;
uint32 lockhash;
Datum procptr;
Assert(keysize == sizeof(PROCLOCKTAG));
/* Look into the associated LOCK object, and compute its hash code */
lockhash = LockTagHashCode(&proclocktag->myLock->tag);
/*
* To make the hash code also depend on the PGPROC, we xor the proc
* struct's address into the hash code, left-shifted so that the
* partition-number bits don't change. Since this is only a hash,
* we don't care if we lose high-order bits of the address; use
* an intermediate variable to suppress cast-pointer-to-int warnings.
*/
procptr = PointerGetDatum(proclocktag->myProc);
lockhash ^= ((uint32) procptr) << LOG2_NUM_LOCK_PARTITIONS;
return lockhash;
}
/*
* Compute the hash code associated with a PROCLOCKTAG, given the hashcode
* for its underlying LOCK.
*
* We use this just to avoid redundant calls of LockTagHashCode().
*/
static inline uint32
ProcLockHashCode(const PROCLOCKTAG *proclocktag, uint32 hashcode)
{
uint32 lockhash = hashcode;
Datum procptr;
/*
* This must match proclock_hash()!
*/
procptr = PointerGetDatum(proclocktag->myProc);
lockhash ^= ((uint32) procptr) << LOG2_NUM_LOCK_PARTITIONS;
return lockhash;
}
......@@ -453,6 +485,8 @@ LockAcquire(const LOCKTAG *locktag,
PROCLOCKTAG proclocktag;
bool found;
ResourceOwner owner;
uint32 hashcode;
uint32 proclock_hashcode;
int partition;
LWLockId partitionLock;
int status;
......@@ -495,7 +529,7 @@ LockAcquire(const LOCKTAG *locktag,
locallock->lock = NULL;
locallock->proclock = NULL;
locallock->isTempObject = isTempObject;
locallock->partition = LockTagToPartition(&(localtag.lock));
locallock->hashcode = LockTagHashCode(&(localtag.lock));
locallock->nLocks = 0;
locallock->numLockOwners = 0;
locallock->maxLockOwners = 8;
......@@ -532,8 +566,9 @@ LockAcquire(const LOCKTAG *locktag,
/*
* Otherwise we've got to mess with the shared lock table.
*/
partition = locallock->partition;
partitionLock = FirstLockMgrLock + partition;
hashcode = locallock->hashcode;
partition = LockHashPartition(hashcode);
partitionLock = LockHashPartitionLock(hashcode);
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
......@@ -545,9 +580,11 @@ LockAcquire(const LOCKTAG *locktag,
* pointer is valid, since a lock object with no locks can go away
* anytime.
*/
lock = (LOCK *) hash_search(LockMethodLockHash[partition],
(void *) locktag,
HASH_ENTER_NULL, &found);
lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
(void *) locktag,
hashcode,
HASH_ENTER_NULL,
&found);
if (!lock)
{
LWLockRelease(partitionLock);
......@@ -584,16 +621,19 @@ LockAcquire(const LOCKTAG *locktag,
/*
* Create the hash key for the proclock table.
*/
MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG)); /* must clear padding */
proclocktag.lock = MAKE_OFFSET(lock);
proclocktag.proc = MAKE_OFFSET(MyProc);
proclocktag.myLock = lock;
proclocktag.myProc = MyProc;
proclock_hashcode = ProcLockHashCode(&proclocktag, hashcode);
/*
* Find or create a proclock entry with this tag
*/
proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
(void *) &proclocktag,
HASH_ENTER_NULL, &found);
proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash,
(void *) &proclocktag,
proclock_hashcode,
HASH_ENTER_NULL,
&found);
if (!proclock)
{
/* Ooops, not enough shmem for the proclock */
......@@ -606,9 +646,11 @@ LockAcquire(const LOCKTAG *locktag,
* anyone to release the lock object later.
*/
Assert(SHMQueueEmpty(&(lock->procLocks)));
if (!hash_search(LockMethodLockHash[partition],
(void *) &(lock->tag),
HASH_REMOVE, NULL))
if (!hash_search_with_hash_value(LockMethodLockHash,
(void *) &(lock->tag),
hashcode,
HASH_REMOVE,
NULL))
elog(PANIC, "lock table corrupted");
}
LWLockRelease(partitionLock);
......@@ -726,9 +768,11 @@ LockAcquire(const LOCKTAG *locktag,
{
SHMQueueDelete(&proclock->lockLink);
SHMQueueDelete(&proclock->procLink);
if (!hash_search(LockMethodProcLockHash[partition],
(void *) &(proclock->tag),
HASH_REMOVE, NULL))
if (!hash_search_with_hash_value(LockMethodProcLockHash,
(void *) &(proclock->tag),
proclock_hashcode,
HASH_REMOVE,
NULL))
elog(PANIC, "proclock table corrupted");
}
else
......@@ -954,12 +998,12 @@ UnGrantLock(LOCK *lock, LOCKMODE lockmode,
* should be called after UnGrantLock, and wakeupNeeded is the result from
* UnGrantLock.)
*
* The lock table's partition lock must be held at entry, and will be
* The appropriate partition lock must be held at entry, and will be
* held at exit.
*/
static void
CleanUpLock(LOCK *lock, PROCLOCK *proclock,
LockMethod lockMethodTable, int partition,
LockMethod lockMethodTable, uint32 hashcode,
bool wakeupNeeded)
{
/*
......@@ -968,12 +1012,17 @@ CleanUpLock(LOCK *lock, PROCLOCK *proclock,
*/
if (proclock->holdMask == 0)
{
uint32 proclock_hashcode;
PROCLOCK_PRINT("CleanUpLock: deleting", proclock);
SHMQueueDelete(&proclock->lockLink);
SHMQueueDelete(&proclock->procLink);
if (!hash_search(LockMethodProcLockHash[partition],
(void *) &(proclock->tag),
HASH_REMOVE, NULL))
proclock_hashcode = ProcLockHashCode(&proclock->tag, hashcode);
if (!hash_search_with_hash_value(LockMethodProcLockHash,
(void *) &(proclock->tag),
proclock_hashcode,
HASH_REMOVE,
NULL))
elog(PANIC, "proclock table corrupted");
}
......@@ -985,9 +1034,11 @@ CleanUpLock(LOCK *lock, PROCLOCK *proclock,
*/
LOCK_PRINT("CleanUpLock: deleting", lock, 0);
Assert(SHMQueueEmpty(&(lock->procLocks)));
if (!hash_search(LockMethodLockHash[partition],
(void *) &(lock->tag),
HASH_REMOVE, NULL))
if (!hash_search_with_hash_value(LockMethodLockHash,
(void *) &(lock->tag),
hashcode,
HASH_REMOVE,
NULL))
elog(PANIC, "lock table corrupted");
}
else if (wakeupNeeded)
......@@ -1097,7 +1148,7 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
awaitedLock = NULL;
LOCK_PRINT("WaitOnLock: aborting on lock",
locallock->lock, locallock->tag.mode);
LWLockRelease(FirstLockMgrLock + locallock->partition);
LWLockRelease(LockHashPartitionLock(locallock->hashcode));
/*
* Now that we aren't holding the partition lock, we can give an error
......@@ -1130,7 +1181,7 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
* NB: this does not clean up any locallock object that may exist for the lock.
*/
void
RemoveFromWaitQueue(PGPROC *proc, int partition)
RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode)
{
LOCK *waitLock = proc->waitLock;
PROCLOCK *proclock = proc->waitProcLock;
......@@ -1171,7 +1222,7 @@ RemoveFromWaitQueue(PGPROC *proc, int partition)
* any other waiters for the lock can be woken up now.
*/
CleanUpLock(waitLock, proclock,
LockMethods[lockmethodid], partition,
LockMethods[lockmethodid], hashcode,
true);
}
......@@ -1195,7 +1246,6 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
LOCALLOCK *locallock;
LOCK *lock;
PROCLOCK *proclock;
int partition;
LWLockId partitionLock;
bool wakeupNeeded;
......@@ -1283,8 +1333,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
/*
* Otherwise we've got to mess with the shared lock table.
*/
partition = locallock->partition;
partitionLock = FirstLockMgrLock + partition;
partitionLock = LockHashPartitionLock(locallock->hashcode);
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
......@@ -1318,7 +1367,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable);
CleanUpLock(lock, proclock,
lockMethodTable, partition,
lockMethodTable, locallock->hashcode,
wakeupNeeded);
LWLockRelease(partitionLock);
......@@ -1449,9 +1498,9 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
SHMQueueNext(procLocks, &proclock->procLink,
offsetof(PROCLOCK, procLink));
Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
Assert(proclock->tag.myProc == MyProc);
lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
lock = proclock->tag.myLock;
/* Ignore items that are not of the lockmethod to be removed */
if (LOCK_LOCKMETHOD(*lock) != lockmethodid)
......@@ -1497,7 +1546,8 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
/* CleanUpLock will wake up waiters if needed. */
CleanUpLock(lock, proclock,
lockMethodTable, partition,
lockMethodTable,
LockTagHashCode(&lock->tag),
wakeupNeeded);
next_item:
......@@ -1789,9 +1839,9 @@ PostPrepare_Locks(TransactionId xid)
SHMQueueNext(procLocks, &proclock->procLink,
offsetof(PROCLOCK, procLink));
Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
Assert(proclock->tag.myProc == MyProc);
lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
lock = proclock->tag.myLock;
/* Ignore nontransactional locks */
if (!LockMethods[LOCK_LOCKMETHOD(*lock)]->transactional)
......@@ -1814,7 +1864,7 @@ PostPrepare_Locks(TransactionId xid)
holdMask = proclock->holdMask;
/*
* We cannot simply modify proclock->tag.proc to reassign
* We cannot simply modify proclock->tag.myProc to reassign
* ownership of the lock, because that's part of the hash key and
* the proclock would then be in the wrong hash chain. So, unlink
* and delete the old proclock; create a new one with the right
......@@ -1825,7 +1875,7 @@ PostPrepare_Locks(TransactionId xid)
*/
SHMQueueDelete(&proclock->lockLink);
SHMQueueDelete(&proclock->procLink);
if (!hash_search(LockMethodProcLockHash[partition],
if (!hash_search(LockMethodProcLockHash,
(void *) &(proclock->tag),
HASH_REMOVE, NULL))
elog(PANIC, "proclock table corrupted");
......@@ -1833,11 +1883,10 @@ PostPrepare_Locks(TransactionId xid)
/*
* Create the hash key for the new proclock table.
*/
MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG));
proclocktag.lock = MAKE_OFFSET(lock);
proclocktag.proc = MAKE_OFFSET(newproc);
proclocktag.myLock = lock;
proclocktag.myProc = newproc;
newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
(void *) &proclocktag,
HASH_ENTER_NULL, &found);
if (!newproclock)
......@@ -1889,23 +1938,18 @@ Size
LockShmemSize(void)
{
Size size = 0;
Size tabsize;
long max_table_size;
/* lock hash tables */
/* lock hash table */
max_table_size = NLOCKENTS();
max_table_size = (max_table_size - 1) / NUM_LOCK_PARTITIONS + 1;
tabsize = hash_estimate_size(max_table_size, sizeof(LOCK));
size = add_size(size, mul_size(tabsize, NUM_LOCK_PARTITIONS));
size = add_size(size, hash_estimate_size(max_table_size, sizeof(LOCK)));
/* proclock hash tables */
/* proclock hash table */
max_table_size *= 2;
tabsize = hash_estimate_size(max_table_size, sizeof(PROCLOCK));
size = add_size(size, mul_size(tabsize, NUM_LOCK_PARTITIONS));
size = add_size(size, hash_estimate_size(max_table_size, sizeof(PROCLOCK)));
/*
* Since there is likely to be some space wastage due to uneven use
* of the partitions, add 10% safety margin.
* Since NLOCKENTS is only an estimate, add 10% safety margin.
*/
size = add_size(size, size / 10);
......@@ -1930,7 +1974,6 @@ LockData *
GetLockStatusData(void)
{
LockData *data;
HTAB *proclockTable;
PROCLOCK *proclock;
HASH_SEQ_STATUS seqstat;
int els;
......@@ -1940,7 +1983,7 @@ GetLockStatusData(void)
data = (LockData *) palloc(sizeof(LockData));
/*
* Acquire lock on the entire shared lock data structures. We can't
* Acquire lock on the entire shared lock data structure. We can't
* operate one partition at a time if we want to deliver a self-consistent
* view of the state.
*
......@@ -1950,43 +1993,32 @@ GetLockStatusData(void)
* It will at least allow two backends to do GetLockStatusData in parallel.
*
* Must grab LWLocks in partition-number order to avoid LWLock deadlock.
*
* Use same loop to count up the total number of PROCLOCK objects.
*/
els = 0;
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
{
LWLockAcquire(FirstLockMgrLock + i, LW_SHARED);
proclockTable = LockMethodProcLockHash[i];
els += hash_get_num_entries(proclockTable);
}
/* Now we can safely count the number of proclocks */
els = hash_get_num_entries(LockMethodProcLockHash);
data->nelements = els;
data->proclockaddrs = (SHMEM_OFFSET *) palloc(sizeof(SHMEM_OFFSET) * els);
data->proclocks = (PROCLOCK *) palloc(sizeof(PROCLOCK) * els);
data->procs = (PGPROC *) palloc(sizeof(PGPROC) * els);
data->locks = (LOCK *) palloc(sizeof(LOCK) * els);
el = 0;
/* Now scan the tables to copy the data */
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
{
proclockTable = LockMethodProcLockHash[i];
hash_seq_init(&seqstat, proclockTable);
hash_seq_init(&seqstat, LockMethodProcLockHash);
while ((proclock = hash_seq_search(&seqstat)))
{
PGPROC *proc = (PGPROC *) MAKE_PTR(proclock->tag.proc);
LOCK *lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
el = 0;
while ((proclock = (PROCLOCK *) hash_seq_search(&seqstat)))
{
PGPROC *proc = proclock->tag.myProc;
LOCK *lock = proclock->tag.myLock;
data->proclockaddrs[el] = MAKE_OFFSET(proclock);
memcpy(&(data->proclocks[el]), proclock, sizeof(PROCLOCK));
memcpy(&(data->procs[el]), proc, sizeof(PGPROC));
memcpy(&(data->locks[el]), lock, sizeof(LOCK));
memcpy(&(data->proclocks[el]), proclock, sizeof(PROCLOCK));
memcpy(&(data->procs[el]), proc, sizeof(PGPROC));
memcpy(&(data->locks[el]), lock, sizeof(LOCK));
el++;
}
el++;
}
/* And release locks */
......@@ -2036,9 +2068,9 @@ DumpLocks(PGPROC *proc)
while (proclock)
{
Assert(proclock->tag.proc == MAKE_OFFSET(proc));
Assert(proclock->tag.myProc == proc);
lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
lock = proclock->tag.myLock;
PROCLOCK_PRINT("DumpLocks", proclock);
LOCK_PRINT("DumpLocks", lock, 0);
......@@ -2061,32 +2093,24 @@ DumpAllLocks(void)
PGPROC *proc;
PROCLOCK *proclock;
LOCK *lock;
HTAB *proclockTable;
HASH_SEQ_STATUS status;
int i;
proc = MyProc;
if (proc && proc->waitLock)
LOCK_PRINT("DumpAllLocks: waiting on", proc->waitLock, 0);
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
{
proclockTable = LockMethodProcLockHash[i];
hash_seq_init(&status, proclockTable);
hash_seq_init(&status, LockMethodProcLockHash);
while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL)
{
PROCLOCK_PRINT("DumpAllLocks", proclock);
while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL)
{
PROCLOCK_PRINT("DumpAllLocks", proclock);
if (proclock->tag.lock)
{
lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
LOCK_PRINT("DumpAllLocks", lock, 0);
}
else
elog(LOG, "DumpAllLocks: proclock->tag.lock = NULL");
}
lock = proclock->tag.myLock;
if (lock)
LOCK_PRINT("DumpAllLocks", lock, 0);
else
elog(LOG, "DumpAllLocks: proclock->tag.myLock = NULL");
}
}
#endif /* LOCK_DEBUG */
......@@ -2115,6 +2139,8 @@ lock_twophase_recover(TransactionId xid, uint16 info,
PROCLOCK *proclock;
PROCLOCKTAG proclocktag;
bool found;
uint32 hashcode;
uint32 proclock_hashcode;
int partition;
LWLockId partitionLock;
LockMethod lockMethodTable;
......@@ -2128,17 +2154,20 @@ lock_twophase_recover(TransactionId xid, uint16 info,
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
lockMethodTable = LockMethods[lockmethodid];
partition = LockTagToPartition(locktag);
partitionLock = FirstLockMgrLock + partition;
hashcode = LockTagHashCode(locktag);
partition = LockHashPartition(hashcode);
partitionLock = LockHashPartitionLock(hashcode);
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
/*
* Find or create a lock with this tag.
*/
lock = (LOCK *) hash_search(LockMethodLockHash[partition],
(void *) locktag,
HASH_ENTER_NULL, &found);
lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
(void *) locktag,
hashcode,
HASH_ENTER_NULL,
&found);
if (!lock)
{
LWLockRelease(partitionLock);
......@@ -2174,16 +2203,19 @@ lock_twophase_recover(TransactionId xid, uint16 info,
/*
* Create the hash key for the proclock table.
*/
MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG)); /* must clear padding */
proclocktag.lock = MAKE_OFFSET(lock);
proclocktag.proc = MAKE_OFFSET(proc);
proclocktag.myLock = lock;
proclocktag.myProc = proc;
proclock_hashcode = ProcLockHashCode(&proclocktag, hashcode);
/*
* Find or create a proclock entry with this tag
*/
proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
(void *) &proclocktag,
HASH_ENTER_NULL, &found);
proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash,
(void *) &proclocktag,
proclock_hashcode,
HASH_ENTER_NULL,
&found);
if (!proclock)
{
/* Ooops, not enough shmem for the proclock */
......@@ -2196,9 +2228,11 @@ lock_twophase_recover(TransactionId xid, uint16 info,
* anyone to release the lock object later.
*/
Assert(SHMQueueEmpty(&(lock->procLocks)));
if (!hash_search(LockMethodLockHash[partition],
(void *) &(lock->tag),
HASH_REMOVE, NULL))
if (!hash_search_with_hash_value(LockMethodLockHash,
(void *) &(lock->tag),
hashcode,
HASH_REMOVE,
NULL))
elog(PANIC, "lock table corrupted");
}
LWLockRelease(partitionLock);
......@@ -2269,7 +2303,8 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
LOCK *lock;
PROCLOCK *proclock;
PROCLOCKTAG proclocktag;
int partition;
uint32 hashcode;
uint32 proclock_hashcode;
LWLockId partitionLock;
LockMethod lockMethodTable;
bool wakeupNeeded;
......@@ -2283,29 +2318,35 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
lockMethodTable = LockMethods[lockmethodid];
partition = LockTagToPartition(locktag);
partitionLock = FirstLockMgrLock + partition;
hashcode = LockTagHashCode(locktag);
partitionLock = LockHashPartitionLock(hashcode);
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
/*
* Re-find the lock object (it had better be there).
*/
lock = (LOCK *) hash_search(LockMethodLockHash[partition],
(void *) locktag,
HASH_FIND, NULL);
lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
(void *) locktag,
hashcode,
HASH_FIND,
NULL);
if (!lock)
elog(PANIC, "failed to re-find shared lock object");
/*
* Re-find the proclock object (ditto).
*/
MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG)); /* must clear padding */
proclocktag.lock = MAKE_OFFSET(lock);
proclocktag.proc = MAKE_OFFSET(proc);
proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
(void *) &proclocktag,
HASH_FIND, NULL);
proclocktag.myLock = lock;
proclocktag.myProc = proc;
proclock_hashcode = ProcLockHashCode(&proclocktag, hashcode);
proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash,
(void *) &proclocktag,
proclock_hashcode,
HASH_FIND,
NULL);
if (!proclock)
elog(PANIC, "failed to re-find shared proclock object");
......@@ -2328,7 +2369,7 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable);
CleanUpLock(lock, proclock,
lockMethodTable, partition,
lockMethodTable, hashcode,
wakeupNeeded);
LWLockRelease(partitionLock);
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.177 2006/07/14 14:52:23 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.178 2006/07/23 23:08:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -461,13 +461,13 @@ LockWaitCancel(void)
disable_sig_alarm(false);
/* Unlink myself from the wait queue, if on it (might not be anymore!) */
partitionLock = FirstLockMgrLock + lockAwaited->partition;
partitionLock = LockHashPartitionLock(lockAwaited->hashcode);
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
if (MyProc->links.next != INVALID_OFFSET)
{
/* We could not have been granted the lock yet */
RemoveFromWaitQueue(MyProc, lockAwaited->partition);
RemoveFromWaitQueue(MyProc, lockAwaited->hashcode);
}
else
{
......@@ -673,8 +673,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
LOCKMODE lockmode = locallock->tag.mode;
LOCK *lock = locallock->lock;
PROCLOCK *proclock = locallock->proclock;
int partition = locallock->partition;
LWLockId partitionLock = FirstLockMgrLock + partition;
uint32 hashcode = locallock->hashcode;
LWLockId partitionLock = LockHashPartitionLock(hashcode);
PROC_QUEUE *waitQueue = &(lock->waitProcs);
LOCKMASK myHeldLocks = MyProc->heldLocks;
bool early_deadlock = false;
......@@ -776,7 +776,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
*/
if (early_deadlock)
{
RemoveFromWaitQueue(MyProc, partition);
RemoveFromWaitQueue(MyProc, hashcode);
return STATUS_ERROR;
}
......@@ -1025,7 +1025,7 @@ CheckDeadLock(void)
* ProcSleep will report an error after we return from the signal handler.
*/
Assert(MyProc->waitLock != NULL);
RemoveFromWaitQueue(MyProc, LockTagToPartition(&(MyProc->waitLock->tag)));
RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
/*
* Unlock my semaphore so that the interrupted ProcSleep() call can
......
......@@ -6,7 +6,7 @@
* Copyright (c) 2002-2006, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/lockfuncs.c,v 1.23 2006/07/14 14:52:24 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/lockfuncs.c,v 1.24 2006/07/23 23:08:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -152,7 +152,7 @@ pg_lock_status(PG_FUNCTION_ARGS)
*/
if (!granted)
{
if (proc->waitLock == (LOCK *) MAKE_PTR(proclock->tag.lock))
if (proc->waitLock == proclock->tag.myLock)
{
/* Yes, so report it with proper mode */
mode = proc->waitLockMode;
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.95 2006/07/23 03:07:58 tgl Exp $
* $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.96 2006/07/23 23:08:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -266,7 +266,9 @@ typedef struct LOCK
*
* PROCLOCKTAG is the key information needed to look up a PROCLOCK item in the
* proclock hashtable. A PROCLOCKTAG value uniquely identifies the combination
* of a lockable object and a holder/waiter for that object.
* of a lockable object and a holder/waiter for that object. (We can use
* pointers here because the PROCLOCKTAG need only be unique for the lifespan
* of the PROCLOCK, and it will never outlive the lock or the proc.)
*
* Internally to a backend, it is possible for the same lock to be held
* for different purposes: the backend tracks transaction locks separately
......@@ -292,8 +294,9 @@ typedef struct LOCK
*/
typedef struct PROCLOCKTAG
{
SHMEM_OFFSET lock; /* link to per-lockable-object information */
SHMEM_OFFSET proc; /* link to PGPROC of owning backend */
/* NB: we assume this struct contains no padding! */
LOCK *myLock; /* link to per-lockable-object information */
PGPROC *myProc; /* link to PGPROC of owning backend */
} PROCLOCKTAG;
typedef struct PROCLOCK
......@@ -309,7 +312,7 @@ typedef struct PROCLOCK
} PROCLOCK;
#define PROCLOCK_LOCKMETHOD(proclock) \
LOCK_LOCKMETHOD(*((LOCK *) MAKE_PTR((proclock).tag.lock)))
LOCK_LOCKMETHOD(*((proclock).tag.myLock))
/*
* Each backend also maintains a local hash table with information about each
......@@ -347,7 +350,7 @@ typedef struct LOCALLOCK
LOCK *lock; /* associated LOCK object in shared mem */
PROCLOCK *proclock; /* associated PROCLOCK object in shmem */
bool isTempObject; /* true if lock is on a temporary object */
int partition; /* ID of partition containing this lock */
uint32 hashcode; /* copy of LOCKTAG's hash value */
int nLocks; /* total number of times lock is held */
int numLockOwners; /* # of relevant ResourceOwners */
int maxLockOwners; /* allocated size of array */
......@@ -360,15 +363,14 @@ typedef struct LOCALLOCK
/*
* This struct holds information passed from lmgr internals to the lock
* listing user-level functions (lockfuncs.c). For each PROCLOCK in the
* system, the SHMEM_OFFSET, PROCLOCK itself, and associated PGPROC and
* LOCK objects are stored. (Note there will often be multiple copies
* of the same PGPROC or LOCK.) We do not store the SHMEM_OFFSET of the
* PGPROC or LOCK separately, since they're in the PROCLOCK's tag fields.
* system, copies of the PROCLOCK object and associated PGPROC and
* LOCK objects are stored. Note there will often be multiple copies
* of the same PGPROC or LOCK --- to detect whether two are the same,
* compare the PROCLOCK tag fields.
*/
typedef struct
typedef struct LockData
{
int nelements; /* The length of each of the arrays */
SHMEM_OFFSET *proclockaddrs;
PROCLOCK *proclocks;
PGPROC *procs;
LOCK *locks;
......@@ -384,12 +386,24 @@ typedef enum
} LockAcquireResult;
/*
* The lockmgr's shared hash tables are partitioned to reduce contention.
* To determine which partition a given locktag belongs to, compute the tag's
* hash code with LockTagHashCode(), then apply one of these macros.
* NB: NUM_LOCK_PARTITIONS must be a power of 2!
*/
#define LockHashPartition(hashcode) \
((hashcode) % NUM_LOCK_PARTITIONS)
#define LockHashPartitionLock(hashcode) \
((LWLockId) (FirstLockMgrLock + LockHashPartition(hashcode)))
/*
* function prototypes
*/
extern void InitLocks(void);
extern LockMethod GetLocksMethodTable(const LOCK *lock);
extern int LockTagToPartition(const LOCKTAG *locktag);
extern uint32 LockTagHashCode(const LOCKTAG *locktag);
extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
bool isTempObject,
LOCKMODE lockmode,
......@@ -407,7 +421,7 @@ extern int LockCheckConflicts(LockMethod lockMethodTable,
LOCK *lock, PROCLOCK *proclock, PGPROC *proc);
extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
extern void GrantAwaitedLock(void);
extern void RemoveFromWaitQueue(PGPROC *proc, int partition);
extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode);
extern Size LockShmemSize(void);
extern bool DeadLockCheck(PGPROC *proc);
extern void DeadLockReport(void);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.29 2006/07/23 03:07:58 tgl Exp $
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.30 2006/07/23 23:08:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -24,7 +24,8 @@
#define NUM_BUFFER_PARTITIONS 16
/* Number of partitions the shared lock tables are divided into */
#define NUM_LOCK_PARTITIONS 16
#define LOG2_NUM_LOCK_PARTITIONS 4
#define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS)
/*
* We have a number of predefined LWLocks, plus a bunch of LWLocks that are
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment