Revise lock manager to support "session level" locks as well as "transaction

level" locks. A session lock is not released at transaction commit (but it is released on transaction abort, to ensure recovery after an elog(ERROR)). In VACUUM, use a session lock to protect the master table while vacuuming a TOAST table, so that the TOAST table can be done in an independent transaction. I also took this opportunity to do some cleanup and renaming in the lock code. The previously noted bug in ProcLockWakeup, that it couldn't wake up any waiters beyond the first non-wakeable waiter, is now fixed. Also found a previously unknown bug of the same kind (failure to scan all members of a lock queue in some cases) in DeadLockCheck. This might have led to failure to detect a deadlock condition, resulting in indefinite waits, but it's difficult to characterize the conditions required to trigger a failure.

Revise lock manager to support "session level" locks as well as "transaction
level" locks. A session lock is not released at transaction commit (but it is released on transaction abort, to ensure recovery after an elog(ERROR)). In VACUUM, use a session lock to protect the master table while vacuuming a TOAST table, so that the TOAST table can be done in an independent transaction. I also took this opportunity to do some cleanup and renaming in the lock code. The previously noted bug in ProcLockWakeup, that it couldn't wake up any waiters beyond the first non-wakeable waiter, is now fixed. Also found a previously unknown bug of the same kind (failure to scan all members of a lock queue in some cases) in DeadLockCheck. This might have led to failure to detect a deadlock condition, resulting in indefinite waits, but it's difficult to characterize the conditions required to trigger a failure.
6cc842ab · Tom Lane · b2145e93 · 6cc842ab · 6cc842ab · 6cc842ab
Commit 6cc842ab authored Dec 22, 2000 by Tom Lane
11 changed files
--- a/contrib/userlock/user_locks.c
+++ b/contrib/userlock/user_locks.c
@@ -33,7 +33,7 @@ user_lock(uint32 id1, uint32 id2, LOCKMODE lockmode)
 	tag.objId.blkno = (BlockNumber) id2;
 	tag.offnum = (OffsetNumber) (id1 & 0xffff);
-	return LockAcquire(USER_LOCKMETHOD, &tag, lockmode);
+	return LockAcquire(USER_LOCKMETHOD, &tag, InvalidTransactionId, lockmode);
 }
 int
@@ -47,7 +47,7 @@ user_unlock(uint32 id1, uint32 id2, LOCKMODE lockmode)
 	tag.objId.blkno = (BlockNumber) id2;
 	tag.offnum = (OffsetNumber) (id1 & 0xffff);
-	return LockRelease(USER_LOCKMETHOD, &tag, lockmode);
+	return LockRelease(USER_LOCKMETHOD, &tag, InvalidTransactionId, lockmode);
 }
 int
@@ -89,7 +89,7 @@ user_unlock_all()
 	}
 	proc = (PROC *) MAKE_PTR(location);
-	return LockReleaseAll(USER_LOCKMETHOD, &proc->lockQueue);
+	return LockReleaseAll(USER_LOCKMETHOD, proc, false, InvalidTransactionId);
 }
 /* end of file */

--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.89 2000/12/18 00:44:45 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.90 2000/12/22 00:51:53 tgl Exp $
 *
 * NOTES
 *		Transaction aborts can now occur two ways:
@@ -741,7 +741,7 @@ AtCommit_Locks(void)
 	 *	Then you're up a creek! -mer 5/24/92
 	 * ----------------
 	 */
-	ProcReleaseLocks();
+	ProcReleaseLocks(true);
 }
 /* --------------------------------
@@ -828,7 +828,7 @@ AtAbort_Locks(void)
 	 *	Then you're up a creek without a paddle! -mer
 	 * ----------------
 	 */
-	ProcReleaseLocks();
+	ProcReleaseLocks(false);
 }

--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.177 2000/12/08 06:43:44 inoue Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.178 2000/12/22 00:51:53 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -61,7 +61,7 @@ static void vacuum_init(void);
 static void vacuum_shutdown(void);
 static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
 static VRelList getrels(NameData *VacRelP);
-static void vacuum_rel(Oid relid, bool is_toastrel);
+static void vacuum_rel(Oid relid);
 static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
 static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
 static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
@@ -239,7 +239,7 @@ vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
 	/* vacuum each heap relation */
 	for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
 	{
-		vacuum_rel(cur->vrl_relid, false);
+		vacuum_rel(cur->vrl_relid);
 		/* analyze separately so locking is minimized */
 		if (analyze)
 			analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
@@ -308,7 +308,7 @@ getrels(NameData *VacRelP)
 		if (rkind != RELKIND_RELATION)
 		{
-			elog(NOTICE, "Vacuum: can not process indecies, views and certain system tables");
+			elog(NOTICE, "Vacuum: can not process indices, views and certain system tables");
 			continue;
 		}
@@ -342,23 +342,25 @@ getrels(NameData *VacRelP)
 *	vacuum_rel() -- vacuum one heap relation
 *
 *		This routine vacuums a single heap, cleans out its indices, and
- *		updates its statistics num_pages and num_tuples statistics.
+ *		updates its num_pages and num_tuples statistics.
 *
 *		Doing one heap at a time incurs extra overhead, since we need to
 *		check that the heap exists again just before we vacuum it.	The
 *		reason that we do this is so that vacuuming can be spread across
 *		many small transactions.  Otherwise, two-phase locking would require
 *		us to lock the entire database during one pass of the vacuum cleaner.
+ *
+ *		At entry and exit, we are not inside a transaction.
 */
 static void
-vacuum_rel(Oid relid, bool is_toastrel)
+vacuum_rel(Oid relid)
 {
 	Relation	onerel;
+	LockRelId	onerelid;
 	VacPageListData vacuum_pages; /* List of pages to vacuum and/or clean
-								 * indices */
+								   * indices */
 	VacPageListData fraged_pages; /* List of pages with space enough for
-								 * re-using */
+								   * re-using */
-	VacPage    *vacpage;
 	Relation   *Irel;
 	int32		nindices,
 				i;
@@ -366,8 +368,8 @@ vacuum_rel(Oid relid, bool is_toastrel)
 	bool		reindex = false;
 	Oid			toast_relid;
-	if (!is_toastrel)
+	/* Begin a transaction for vacuuming this relation */
-		StartTransactionCommand();
+	StartTransactionCommand();
 	/*
 	 * Check for user-requested abort.	Note we want this to be inside a
@@ -384,8 +386,7 @@ vacuum_rel(Oid relid, bool is_toastrel)
 							  ObjectIdGetDatum(relid),
 							  0, 0, 0))
 	{
-		if (!is_toastrel)
+		CommitTransactionCommand();
-			CommitTransactionCommand();
 		return;
 	}
@@ -403,13 +404,25 @@ vacuum_rel(Oid relid, bool is_toastrel)
 		elog(NOTICE, "Skipping \"%s\" --- only table owner can VACUUM it",
 			 RelationGetRelationName(onerel));
 		heap_close(onerel, AccessExclusiveLock);
-		if (!is_toastrel)
+		CommitTransactionCommand();
-			CommitTransactionCommand();
 		return;
 	}
 	/*
-	 * Remember the relation'ss TOAST relation for later
+	 * Get a session-level exclusive lock too.  This will protect our
+	 * exclusive access to the relation across multiple transactions,
+	 * so that we can vacuum the relation's TOAST table (if any) secure
+	 * in the knowledge that no one is diddling the parent relation.
+	 *
+	 * NOTE: this cannot block, even if someone else is waiting for access,
+	 * because the lock manager knows that both lock requests are from the
+	 * same process.
+	 */
+	onerelid = onerel->rd_lockInfo.lockRelId;
+	LockRelationForSession(&onerelid, AccessExclusiveLock);
+	/*
+	 * Remember the relation's TOAST relation for later
 	 */
 	toast_relid = onerel->rd_rel->reltoastrelid;
@@ -500,21 +513,6 @@ vacuum_rel(Oid relid, bool is_toastrel)
 	if (reindex)
 		activate_indexes_of_a_table(relid, true);
-	/*
-	 * ok - free vacuum_pages list of reaped pages
-	 *
-	 * Isn't this a waste of code?  Upcoming commit should free memory, no?
-	 */
-	if (vacuum_pages.num_pages > 0)
-	{
-		vacpage = vacuum_pages.pagedesc;
-		for (i = 0; i < vacuum_pages.num_pages; i++, vacpage++)
-			pfree(*vacpage);
-		pfree(vacuum_pages.pagedesc);
-		if (fraged_pages.num_pages > 0)
-			pfree(fraged_pages.pagedesc);
-	}
 	/* all done with this class, but hold lock until commit */
 	heap_close(onerel, NoLock);
@@ -523,19 +521,25 @@ vacuum_rel(Oid relid, bool is_toastrel)
 					vacrelstats->num_tuples, vacrelstats->hasindex,
 					vacrelstats);
+	/*
+	 * Complete the transaction and free all temporary memory used.
+	 */
+	CommitTransactionCommand();
 	/*
 	 * If the relation has a secondary toast one, vacuum that too
-	 * while we still hold the lock on the master table. We don't
+	 * while we still hold the session lock on the master table.
-	 * need to propagate "analyze" to it, because the toaster
+	 * We don't need to propagate "analyze" to it, because the toaster
 	 * always uses hardcoded index access and statistics are
 	 * totally unimportant for toast relations
 	 */
 	if (toast_relid != InvalidOid)
-		vacuum_rel(toast_relid, true);
+		vacuum_rel(toast_relid);
-	/* next command frees attribute stats */
+	/*
-	if (!is_toastrel)
+	 * Now release the session-level lock on the master table.
-		CommitTransactionCommand();
+	 */
+	UnlockRelationForSession(&onerelid, AccessExclusiveLock);
 }
 /*
@@ -1786,9 +1790,13 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)"
 	if (num_moved > 0)
 	{
 		/*
-		 * We have to commit our tuple' movings before we'll truncate
+		 * We have to commit our tuple movings before we truncate the
-		 * relation, but we shouldn't lose our locks. And so - quick hack:
+		 * relation.  Ideally we should do Commit/StartTransactionCommand
-		 * record status of current transaction as committed, and continue.
+		 * here, relying on the session-level table lock to protect our
+		 * exclusive access to the relation.  However, that would require
+		 * a lot of extra code to close and re-open the relation, indices,
+		 * etc.  For now, a quick hack: record status of current transaction
+		 * as committed, and continue.
 		 */
 		RecordTransactionCommit();
 	}
@@ -1852,7 +1860,7 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)"
 	/* 
 	 * Reflect the motion of system tuples to catalog cache here.
 	 */
-        CommandCounterIncrement();
+	CommandCounterIncrement();
 	if (Nvacpagelist.num_pages > 0)
 	{

--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.37 2000/12/03 17:18:10 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.38 2000/12/22 00:51:54 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -84,7 +84,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends)
 	 * Set up lock manager
 	 */
 	InitLocks();
-	if (InitLockTable() == INVALID_TABLEID)
+	if (InitLockTable(maxBackends) == INVALID_TABLEID)
 		elog(FATAL, "Couldn't create the lock table");
 	/*

--- a/src/backend/storage/lmgr/README
+++ b/src/backend/storage/lmgr/README
-$Header: /cvsroot/pgsql/src/backend/storage/lmgr/README,v 1.3 1998/07/06 18:16:07 momjian Exp $
+$Header: /cvsroot/pgsql/src/backend/storage/lmgr/README,v 1.4 2000/12/22 00:51:54 tgl Exp $
-There are two fundemental lock structures.  Lock methods describe the
+There are two fundamental lock structures: the per-lockable-object LOCK
-locking behavior.  We currently only support multi-level locking.  Lock
+struct, and the per-lock-holder HOLDER struct.  A LOCK object exists
-modes describe the mode of the lock(read/write or shared/exclusive). 
+for each lockable object that currently has locks held or requested on it.
+A HOLDER struct exists for each transaction that is holding or requesting
+lock(s) on each LOCK object.
+Lock methods describe the overall locking behavior.  Currently there are
+two lock methods: DEFAULT and USER.  (USER locks are non-blocking.)
+Lock modes describe the type of the lock (read/write or shared/exclusive). 
 See src/tools/backend/index.html and src/include/storage/lock.h for more
 details.
@@ -12,10 +19,10 @@ The lock manager's LOCK:
 tag -
    The key fields that are used for hashing locks in the shared memory
-    lock hash table.  This is kept as a separate struct to ensure that we
+    lock hash table.  This is declared as a separate struct to ensure that
-    always zero out the correct number of bytes.  This is a problem as
+    we always zero out the correct number of bytes.  It is critical that
-    part of the tag is an itempointer which is 6 bytes and causes 2
+    any alignment-padding bytes the compiler might insert in the struct
-    additional bytes to be added as padding.
+    be zeroed out, else the hash computation will be random.
    tag.relId -
 	Uniquely identifies the relation that the lock corresponds to.
@@ -30,7 +37,7 @@ tag -
 	tuple within the block.  If we are setting a table level lock
 	both the blockId and tupleId (in an item pointer this is called
 	the position) are set to invalid, if it is a page level lock the
-	blockId is valid, while the tuleId is still invalid.  Finally if
+	blockId is valid, while the tupleId is still invalid.  Finally if
 	this is a tuple level lock (we currently never do this) then both
 	the blockId and tupleId are set to valid specifications.  This is
 	how we get the appearance of a multi-level lock table while using
@@ -38,9 +45,9 @@ tag -
 	you are puzzled about how multi-level lock tables work).
 mask -
-    This field indicates what types of locks are currently held in the
+    This field indicates what types of locks are currently held on the
-    given lock.  It is used (against the lock table's conflict table)
+    given lockable object.  It is used (against the lock table's conflict
-    to determine if the new lock request will conflict with existing
+    table) to determine if the new lock request will conflict with existing
    lock types held.  Conficts are determined by bitwise AND operations
    between the mask and the conflict table entry for the given lock type
    to be set.  The current representation is that each bit (1 through 5)
@@ -73,7 +80,7 @@ holders -
 nActive -
    Keeps a count of how many times this lock has been succesfully acquired.
-    This count does not include attempts that were rejected due to conflicts,
+    This count does not include attempts that are waiting due to conflicts,
    but can count the same backend twice (e.g. a read then a write -- since
    its the same transaction this won't cause a conflict)
@@ -85,3 +92,39 @@ activeHolders -
 ---------------------------------------------------------------------------
+The lock manager's HOLDER:
+tag -
+    The key fields that are used for hashing entries in the shared memory
+    holder hash table.  This is declared as a separate struct to ensure that
+    we always zero out the correct number of bytes.
+    tag.lock
+        SHMEM offset of the LOCK object this holder is for.
+    tag.pid
+        PID of backend process that owns this holder.
+    tag.xid
+        XID of transaction this holder is for, or InvalidTransactionId
+        if the holder is for session-level locking.
+    Note that this structure will support multiple transactions running
+    concurrently in one backend, which may be handy if we someday decide
+    to support nested transactions.  Currently, the XID field is only needed
+    to distinguish per-transaction locks from session locks.  User locks
+    are always session locks, and we also use session locks for multi-
+    transaction operations like VACUUM.
+holders -
+    The number of successfully acquired locks of each type for this holder.
+    (CAUTION: the semantics are not the same as the LOCK's holder[], which
+    counts both acquired and pending requests.  Probably a different name
+    should be used...)
+nHolding -
+    Sum of the holders[] array.
+queue -
+    List link for shared memory queue of all the HOLDER objects for the
+    same backend.
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lmgr.c,v 1.42 2000/11/30 01:39:08 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lmgr.c,v 1.43 2000/12/22 00:51:54 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -16,6 +16,7 @@
 #include "postgres.h"
 #include "access/transam.h"
+#include "access/xact.h"
 #include "catalog/catalog.h"
 #include "miscadmin.h"
 #include "storage/lmgr.h"
@@ -72,16 +73,17 @@ LOCKMETHOD	LongTermTableId = (LOCKMETHOD) NULL;
 * Create the lock table described by LockConflicts and LockPrios.
 */
 LOCKMETHOD
-InitLockTable()
+InitLockTable(int maxBackends)
 {
 	int			lockmethod;
 	lockmethod = LockMethodTableInit("LockTable",
-							LockConflicts, LockPrios, MAX_LOCKMODES - 1);
+									 LockConflicts, LockPrios,
+									 MAX_LOCKMODES - 1, maxBackends);
 	LockTableId = lockmethod;
 	if (!(LockTableId))
-		elog(ERROR, "InitLockTable: couldnt initialize lock table");
+		elog(ERROR, "InitLockTable: couldn't initialize lock table");
 #ifdef USER_LOCKS
@@ -90,10 +92,7 @@ InitLockTable()
 	 */
 	LongTermTableId = LockMethodTableRename(LockTableId);
 	if (!(LongTermTableId))
-	{
+		elog(ERROR, "InitLockTable: couldn't rename long-term lock table");
-		elog(ERROR,
-			 "InitLockTable: couldn't rename long-term lock table");
-	}
 #endif
 	return LockTableId;
@@ -139,7 +138,7 @@ LockRelation(Relation relation, LOCKMODE lockmode)
 	tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
 	tag.objId.blkno = InvalidBlockNumber;
-	if (!LockAcquire(LockTableId, &tag, lockmode))
+	if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), lockmode))
 		elog(ERROR, "LockRelation: LockAcquire failed");
 	/*
@@ -169,7 +168,55 @@ UnlockRelation(Relation relation, LOCKMODE lockmode)
 	tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
 	tag.objId.blkno = InvalidBlockNumber;
-	LockRelease(LockTableId, &tag, lockmode);
+	LockRelease(LockTableId, &tag, GetCurrentTransactionId(), lockmode);
+}
+/*
+ *		LockRelationForSession
+ *
+ * This routine grabs a session-level lock on the target relation.  The
+ * session lock persists across transaction boundaries.  It will be removed
+ * when UnlockRelationForSession() is called, or if an elog(ERROR) occurs,
+ * or if the backend exits.
+ *
+ * Note that one should also grab a transaction-level lock on the rel
+ * in any transaction that actually uses the rel, to ensure that the
+ * relcache entry is up to date.
+ */
+void
+LockRelationForSession(LockRelId *relid, LOCKMODE lockmode)
+{
+	LOCKTAG		tag;
+	if (LockingDisabled())
+		return;
+	MemSet(&tag, 0, sizeof(tag));
+	tag.relId = relid->relId;
+	tag.dbId = relid->dbId;
+	tag.objId.blkno = InvalidBlockNumber;
+	if (!LockAcquire(LockTableId, &tag, InvalidTransactionId, lockmode))
+		elog(ERROR, "LockRelationForSession: LockAcquire failed");
+}
+/*
+ *		UnlockRelationForSession
+ */
+void
+UnlockRelationForSession(LockRelId *relid, LOCKMODE lockmode)
+{
+	LOCKTAG		tag;
+	if (LockingDisabled())
+		return;
+	MemSet(&tag, 0, sizeof(tag));
+	tag.relId = relid->relId;
+	tag.dbId = relid->dbId;
+	tag.objId.blkno = InvalidBlockNumber;
+	LockRelease(LockTableId, &tag, InvalidTransactionId, lockmode);
 }
 /*
@@ -188,7 +235,7 @@ LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode)
 	tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
 	tag.objId.blkno = blkno;
-	if (!LockAcquire(LockTableId, &tag, lockmode))
+	if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), lockmode))
 		elog(ERROR, "LockPage: LockAcquire failed");
 }
@@ -208,7 +255,7 @@ UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode)
 	tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
 	tag.objId.blkno = blkno;
-	LockRelease(LockTableId, &tag, lockmode);
+	LockRelease(LockTableId, &tag, GetCurrentTransactionId(), lockmode);
 }
 void
@@ -221,10 +268,10 @@ XactLockTableInsert(TransactionId xid)
 	MemSet(&tag, 0, sizeof(tag));
 	tag.relId = XactLockTableId;
-	tag.dbId = InvalidOid;
+	tag.dbId = InvalidOid;		/* xids are globally unique */
 	tag.objId.xid = xid;
-	if (!LockAcquire(LockTableId, &tag, ExclusiveLock))
+	if (!LockAcquire(LockTableId, &tag, xid, ExclusiveLock))
 		elog(ERROR, "XactLockTableInsert: LockAcquire failed");
 }
@@ -242,7 +289,7 @@ XactLockTableDelete(TransactionId xid)
 	tag.dbId = InvalidOid;
 	tag.objId.xid = xid;
-	LockRelease(LockTableId, &tag, ExclusiveLock);
+	LockRelease(LockTableId, &tag, xid, ExclusiveLock);
 }
 #endif
@@ -259,10 +306,10 @@ XactLockTableWait(TransactionId xid)
 	tag.dbId = InvalidOid;
 	tag.objId.xid = xid;
-	if (!LockAcquire(LockTableId, &tag, ShareLock))
+	if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), ShareLock))
 		elog(ERROR, "XactLockTableWait: LockAcquire failed");
-	LockRelease(LockTableId, &tag, ShareLock);
+	LockRelease(LockTableId, &tag, GetCurrentTransactionId(), ShareLock);
 	/*
 	 * Transaction was committed/aborted/crashed - we have to update

--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: lmgr.h,v 1.26 2000/11/28 23:27:57 tgl Exp $
+ * $Id: lmgr.h,v 1.27 2000/12/22 00:51:54 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -33,17 +33,21 @@
 extern LOCKMETHOD LockTableId;
-extern LOCKMETHOD InitLockTable(void);
+extern LOCKMETHOD InitLockTable(int maxBackends);
 extern void RelationInitLockInfo(Relation relation);
+/* Lock a relation */
 extern void LockRelation(Relation relation, LOCKMODE lockmode);
 extern void UnlockRelation(Relation relation, LOCKMODE lockmode);
-/* this is for indices */
+extern void LockRelationForSession(LockRelId *relid, LOCKMODE lockmode);
+extern void UnlockRelationForSession(LockRelId *relid, LOCKMODE lockmode);
+/* Lock a page (mainly used for indices) */
 extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
 extern void UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
-/* and this is for transactions */
+/* Lock an XID (used to wait for a transaction to finish) */
 extern void XactLockTableInsert(TransactionId xid);
 extern void XactLockTableWait(TransactionId xid);

--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
 /*-------------------------------------------------------------------------
 *
 * lock.h
- *
+ *	  POSTGRES low-level lock mechanism
 *
 *
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: lock.h,v 1.39 2000/07/17 03:05:30 tgl Exp $
+ * $Id: lock.h,v 1.40 2000/12/22 00:51:54 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -18,13 +18,20 @@
 #include "storage/itemptr.h"
 #include "storage/shmem.h"
-extern SPINLOCK LockMgrLock;
-typedef int LOCKMASK;
-#define INIT_TABLE_SIZE			100
+/* originally in procq.h */
-#define MAX_TABLE_SIZE			1000
+typedef struct PROC_QUEUE
+{
+	SHM_QUEUE	links;
+	int			size;
+} PROC_QUEUE;
+/* struct proc is declared in storage/proc.h, but must forward-reference it */
+typedef struct proc PROC;
+extern SPINLOCK LockMgrLock;
 #ifdef LOCK_DEBUG
 extern int  Trace_lock_oidmin;
 extern bool Trace_locks;
@@ -39,14 +46,16 @@ extern bool Debug_deadlocks;
 * memory the lock manager is going to require.
 * See LockShmemSize() in lock.c.
 *
- * NLOCKS_PER_XACT - The number of unique locks acquired in a transaction
+ * NLOCKS_PER_XACT - The number of unique objects locked in a transaction
- *					 (should be configurable!)
+ *					 (this should be configurable!)
 * NLOCKENTS - The maximum number of lock entries in the lock table.
 * ----------------------
 */
 #define NLOCKS_PER_XACT			64
 #define NLOCKENTS(maxBackends)	(NLOCKS_PER_XACT*(maxBackends))
+typedef int LOCKMASK;
 typedef int LOCKMODE;
 typedef int LOCKMETHOD;
@@ -68,29 +77,8 @@ typedef int LOCKMETHOD;
 #define MIN_LOCKMETHOD		DEFAULT_LOCKMETHOD
-typedef struct LTAG
+/*
-{
+ * This is the control structure for a lock table.	It
-	Oid			relId;
-	Oid			dbId;
-	union
-	{
-		BlockNumber blkno;
-		TransactionId xid;
-	}			objId;
-	/*
-	 * offnum should be part of objId.tupleId above, but would increase
-	 * sizeof(LOCKTAG) and so moved here; currently used by userlocks
-	 * only.
-	 */
-	OffsetNumber offnum;
-	uint16		lockmethod;		/* needed by userlocks */
-} LOCKTAG;
-#define TAGSIZE (sizeof(LOCKTAG))
-#define LOCKTAG_LOCKMETHOD(locktag) ((locktag).lockmethod)
-/* This is the control structure for a lock table.	It
 * lives in shared memory:
 *
 * lockmethod -- the handle used by the lock table's clients to
@@ -108,7 +96,6 @@ typedef struct LTAG
 *		starvation).
 *
 * masterlock -- synchronizes access to the table
- *
 */
 typedef struct LOCKMETHODCTL
 {
@@ -120,91 +107,47 @@ typedef struct LOCKMETHODCTL
 } LOCKMETHODCTL;
 /*
- * lockHash -- hash table on lock Ids,
+ * Non-shared header for a lock table.
- * xidHash -- hash on xid and lockId in case
+ *
- *		multiple processes are holding the lock
+ * lockHash -- hash table holding per-locked-object lock information
- * ctl - control structure described above.
+ * holderHash -- hash table holding per-lock-holder lock information
+ * ctl - shared control structure described above.
 */
 typedef struct LOCKMETHODTABLE
 {
 	HTAB	   *lockHash;
-	HTAB	   *xidHash;
+	HTAB	   *holderHash;
 	LOCKMETHODCTL *ctl;
 } LOCKMETHODTABLE;
-/* -----------------------
- * A transaction never conflicts with its own locks.  Hence, if
- * multiple transactions hold non-conflicting locks on the same
- * data, private per-transaction information must be stored in the
- * XID table.  The tag is XID + shared memory lock address so that
- * all locks can use the same XID table.  The private information
- * we store is the number of locks of each type (holders) and the
- * total number of locks (nHolding) held by the transaction.
- *
- * NOTE:
- * There were some problems with the fact that currently TransactionIdData
- * is a 5 byte entity and compilers long word aligning of structure fields.
- * If the 3 byte padding is put in front of the actual xid data then the
- * hash function (which uses XID_TAGSIZE when deciding how many bytes of a
- * struct to look at for the key) might only see the last two bytes of the xid.
- *
- * Clearly this is not good since its likely that these bytes will be the
- * same for many transactions and hence they will share the same entry in
- * hash table causing the entry to be corrupted.  For this long-winded
- * reason I have put the tag in a struct of its own to ensure that the
- * XID_TAGSIZE is computed correctly.  It used to be sizeof (SHMEM_OFFSET) +
- * sizeof(TransactionIdData) which != sizeof(XIDTAG).
- *
- * Finally since the hash function will now look at all 12 bytes of the tag
- * the padding bytes MUST be zero'd before use in hash_search() as they
- * will have random values otherwise.  Jeff 22 July 1991.
- * -----------------------
- */
-typedef struct XIDTAG
-{
-	SHMEM_OFFSET lock;
-	int			pid;
-	TransactionId xid;
-#ifdef USE_XIDTAG_LOCKMETHOD
-	uint16		lockmethod;		/* for debug or consistency checking */
-#endif
-} XIDTAG;
-#ifdef USE_XIDTAG_LOCKMETHOD
-#define XIDTAG_LOCKMETHOD(xidtag) ((xidtag).lockmethod)
-#else
-#define XIDTAG_LOCKMETHOD(xidtag) \
-		(((LOCK*) MAKE_PTR((xidtag).lock))->tag.lockmethod)
-#endif
-typedef struct XIDLookupEnt
+/*
+ * LOCKTAG is the key information needed to look up a LOCK item in the
+ * lock hashtable.  A LOCKTAG value uniquely identifies a lockable object.
+ */
+typedef struct LOCKTAG
 {
-	/* tag */
+	Oid			relId;
-	XIDTAG		tag;
+	Oid			dbId;
+	union
-	/* data */
+	{
-	int			holders[MAX_LOCKMODES];
+		BlockNumber blkno;
-	int			nHolding;
+		TransactionId xid;
-	SHM_QUEUE	queue;
+	}			objId;
-} XIDLookupEnt;
-#define SHMEM_XIDTAB_KEYSIZE  sizeof(XIDTAG)
-#define SHMEM_XIDTAB_DATASIZE (sizeof(XIDLookupEnt) - SHMEM_XIDTAB_KEYSIZE)
-#define XID_TAGSIZE (sizeof(XIDTAG))
+	/*
-#define XIDENT_LOCKMETHOD(xident) (XIDTAG_LOCKMETHOD((xident).tag))
+	 * offnum should be part of objId.tupleId above, but would increase
+	 * sizeof(LOCKTAG) and so moved here; currently used by userlocks
+	 * only.
+	 */
+	OffsetNumber offnum;
-/* originally in procq.h */
+	uint16		lockmethod;		/* needed by userlocks */
-typedef struct PROC_QUEUE
+} LOCKTAG;
-{
-	SHM_QUEUE	links;
-	int			size;
-} PROC_QUEUE;
 /*
- * lock information:
+ * Per-locked-object lock information:
 *
 * tag -- uniquely identifies the object being locked
 * mask -- union of the conflict masks of all lock types
@@ -232,40 +175,76 @@ typedef struct LOCK
 #define SHMEM_LOCKTAB_KEYSIZE  sizeof(LOCKTAG)
 #define SHMEM_LOCKTAB_DATASIZE (sizeof(LOCK) - SHMEM_LOCKTAB_KEYSIZE)
-#define LOCK_LOCKMETHOD(lock) (LOCKTAG_LOCKMETHOD((lock).tag))
+#define LOCK_LOCKMETHOD(lock) ((lock).tag.lockmethod)
-#define LockGetLock_nHolders(l) l->nHolders
-#ifdef NOT_USED
+/*
-#define LockDecrWaitHolders(lock, lockmode) \
+ * We may have several different transactions holding or awaiting locks
-( \
+ * on the same lockable object.  We need to store some per-holder information
-  lock->nHolding--, \
+ * for each such holder (or would-be holder).
-  lock->holders[lockmode]-- \
+ *
-)
+ * HOLDERTAG is the key information needed to look up a HOLDER item in the
-#endif
+ * holder hashtable.  A HOLDERTAG value uniquely identifies a lock holder.
-#define LockLockTable() SpinAcquire(LockMgrLock);
+ *
-#define UnlockLockTable() SpinRelease(LockMgrLock);
+ * There are two possible kinds of holder tags: a transaction (identified
+ * both by the PID of the backend running it, and the xact's own ID) and
+ * a session (identified by backend PID, with xid = InvalidTransactionId).
+ *
+ * Currently, session holders are used for user locks and for cross-xact
+ * locks obtained for VACUUM.  We assume that a session lock never conflicts
+ * with per-transaction locks obtained by the same backend.
+ */
+typedef struct HOLDERTAG
+{
+	SHMEM_OFFSET lock;			/* link to per-lockable-object information */
+	int			pid;			/* PID of backend */
+	TransactionId xid;			/* xact ID, or InvalidTransactionId */
+} HOLDERTAG;
+typedef struct HOLDER
+{
+	/* tag */
+	HOLDERTAG	tag;
+	/* data */
+	int			holders[MAX_LOCKMODES];
+	int			nHolding;
+	SHM_QUEUE	queue;
+} HOLDER;
+#define SHMEM_HOLDERTAB_KEYSIZE  sizeof(HOLDERTAG)
+#define SHMEM_HOLDERTAB_DATASIZE (sizeof(HOLDER) - SHMEM_HOLDERTAB_KEYSIZE)
+#define HOLDER_LOCKMETHOD(holder) \
+		(((LOCK *) MAKE_PTR((holder).tag.lock))->tag.lockmethod)
+#define LockLockTable() SpinAcquire(LockMgrLock)
+#define UnlockLockTable() SpinRelease(LockMgrLock)
 /*
 * function prototypes
 */
 extern void InitLocks(void);
-extern void LockDisable(int status);
+extern void LockDisable(bool status);
+extern bool LockingDisabled(void);
 extern LOCKMETHOD LockMethodTableInit(char *tabName, LOCKMASK *conflictsP,
-					int *prioP, int numModes);
+					int *prioP, int numModes, int maxBackends);
 extern LOCKMETHOD LockMethodTableRename(LOCKMETHOD lockmethod);
 extern bool LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
-			LOCKMODE lockmode);
+						TransactionId xid, LOCKMODE lockmode);
-extern int LockResolveConflicts(LOCKMETHOD lockmethod, LOCK *lock,
-					 LOCKMODE lockmode, TransactionId xid,
-					 XIDLookupEnt *xidentP);
 extern bool LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
-			LOCKMODE lockmode);
+						TransactionId xid, LOCKMODE lockmode);
-extern void GrantLock(LOCK *lock, LOCKMODE lockmode);
+extern bool LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc,
-extern bool LockReleaseAll(LOCKMETHOD lockmethod, SHM_QUEUE *lockQueue);
+						   bool allxids, TransactionId xid);
+extern int LockResolveConflicts(LOCKMETHOD lockmethod, LOCKMODE lockmode,
+								LOCK *lock, HOLDER *holder, PROC *proc,
+								int *myHolders);
+extern void GrantLock(LOCK *lock, HOLDER *holder, LOCKMODE lockmode);
 extern int	LockShmemSize(int maxBackends);
-extern bool LockingDisabled(void);
+extern bool DeadLockCheck(PROC *thisProc, LOCK *findlock);
-extern bool DeadLockCheck(void *proc, LOCK *findlock);
 #ifdef LOCK_DEBUG
 extern void DumpLocks(void);

--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: proc.h,v 1.32 2000/11/28 23:27:57 tgl Exp $
+ * $Id: proc.h,v 1.33 2000/12/22 00:51:54 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -30,7 +30,7 @@ typedef struct
 /*
 * Each backend has:
 */
-typedef struct proc
+struct proc
 {
 	/* proc->links MUST BE THE FIRST ELEMENT OF STRUCT (see ProcWakeup()) */
@@ -50,16 +50,25 @@ typedef struct proc
 	TransactionId xmin;			/* minimal running XID as it was when we
 								 * were starting our xact: vacuum must not
 								 * remove tuples deleted by xid >= xmin ! */
 	XLogRecPtr	logRec;
-	LOCK	   *waitLock;		/* Lock we're sleeping on ... */
-	int			token;			/* type of lock we sleeping for */
+	/* Info about lock the process is currently waiting for, if any */
-	int			holdLock;		/* while holding these locks */
+	LOCK	   *waitLock;		/* Lock object we're sleeping on ... */
+	HOLDER	   *waitHolder;		/* Per-holder info for our lock */
+	LOCKMODE	waitLockMode;	/* type of lock we're waiting for */
+	LOCKMASK	holdLock;		/* bitmask for lock types already held */
 	int			pid;			/* This backend's process id */
 	Oid			databaseId;		/* OID of database this backend is using */
 	short		sLocks[MAX_SPINS];		/* Spin lock stats */
 	SHM_QUEUE	lockQueue;		/* locks associated with current
 								 * transaction */
-} PROC;
+};
+/* NOTE: "typedef struct proc PROC" appears in storage/lock.h. */
 extern PROC *MyProc;
@@ -122,15 +131,14 @@ typedef struct procglobal
 */
 extern void InitProcGlobal(int maxBackends);
 extern void InitProcess(void);
-extern void ProcReleaseLocks(void);
+extern void ProcReleaseLocks(bool isCommit);
 extern bool ProcRemove(int pid);
 extern void ProcQueueInit(PROC_QUEUE *queue);
-extern int ProcSleep(PROC_QUEUE *queue, LOCKMETHODCTL *lockctl, int token,
+extern int ProcSleep(LOCKMETHODCTL *lockctl, LOCKMODE lockmode,
-		  LOCK *lock);
+					 LOCK *lock, HOLDER *holder);
 extern PROC *ProcWakeup(PROC *proc, int errType);
-extern int ProcLockWakeup(PROC_QUEUE *queue, LOCKMETHOD lockmethod,
+extern int ProcLockWakeup(LOCKMETHOD lockmethod, LOCK *lock);
-			   LOCK *lock);
 extern void ProcAddLock(SHM_QUEUE *elem);
 extern void ProcReleaseSpins(PROC *proc);
 extern void LockWaitCancel(void);