multi.c 11 KB
Newer Older
1 2 3
/*-------------------------------------------------------------------------
 *
 * multi.c--
4
 *	  multi level lock table manager
5
 *
6 7 8 9
 *	  Standard multi-level lock manager as per the Gray paper
 *	  (at least, that is what it is supposed to be).  We implement
 *	  three levels -- RELN, PAGE, TUPLE.  Tuple is actually TID
 *	  a physical record pointer.  It isn't an object id.
10 11 12 13 14
 *
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
Bruce Momjian's avatar
Bruce Momjian committed
15
 *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.16 1998/06/26 19:57:49 momjian Exp $
16 17
 *
 * NOTES:
18 19
 *	 (1) The lock.c module assumes that the caller here is doing
 *		 two phase locking.
20 21 22 23 24
 *
 *-------------------------------------------------------------------------
 */
#include <stdio.h>
#include <string.h>
Marc G. Fournier's avatar
Marc G. Fournier committed
25
#include "postgres.h"
26 27 28 29
#include "storage/lmgr.h"
#include "storage/multilev.h"

#include "utils/rel.h"
30
#include "miscadmin.h"			/* MyDatabaseId */
31

32
static bool
33
MultiAcquire(LockTableId tableId, LOCKTAG *tag, LOCKT lockt,
34
			 PG_LOCK_LEVEL level);
35
static bool
36
MultiRelease(LockTableId tableId, LOCKTAG *tag, LOCKT lockt,
37
			 PG_LOCK_LEVEL level);
38 39 40

/*
 * INTENT indicates to higher level that a lower level lock has been
41 42
 * set.  For example, a write lock on a tuple conflicts with a write
 * lock on a relation.	This conflict is detected as a WRITE_INTENT/
43 44 45
 * WRITE conflict between the tuple's intent lock and the relation's
 * write lock.
 */
46
static int	MultiConflicts[] = {
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
	(int) NULL,
	/* All reads and writes at any level conflict with a write lock */
	(1 << WRITE_LOCK) | (1 << WRITE_INTENT) | (1 << READ_LOCK) | (1 << READ_INTENT),
	/* read locks conflict with write locks at curr and lower levels */
	(1 << WRITE_LOCK) | (1 << WRITE_INTENT),
	/* write intent locks */
	(1 << READ_LOCK) | (1 << WRITE_LOCK),
	/* read intent locks */
	(1 << WRITE_LOCK),

	/*
	 * extend locks for archive storage manager conflict only w/extend
	 * locks
	 */
	(1 << EXTEND_LOCK)
62 63 64 65 66 67
};

/*
 * write locks have higher priority than read locks and extend locks.  May
 * want to treat INTENT locks differently.
 */
68
static int	MultiPrios[] = {
69 70 71 72 73 74
	(int) NULL,
	2,
	1,
	2,
	1,
	1
75 76
};

77
/*
78 79 80
 * Lock table identifier for this lock table.  The multi-level
 * lock table is ONE lock table, not three.
 */
81 82
LockTableId MultiTableId = (LockTableId) NULL;
LockTableId ShortTermTableId = (LockTableId) NULL;
83 84 85 86 87

/*
 * Create the lock table described by MultiConflicts and Multiprio.
 */
LockTableId
Bruce Momjian's avatar
Bruce Momjian committed
88
InitMultiLevelLocks()
89
{
90
	int			tableId;
91

92 93 94 95 96 97 98
	/* -----------------------
	 * If we're already initialized just return the table id.
	 * -----------------------
	 */
	if (MultiTableId)
		return MultiTableId;

99
	tableId = LockTableInit("LockTable", MultiConflicts, MultiPrios, 5);
100 101
	MultiTableId = tableId;
	if (!(MultiTableId))
Bruce Momjian's avatar
Bruce Momjian committed
102
		elog(ERROR, "InitMultiLocks: couldnt initialize lock table");
103 104 105
	/* -----------------------
	 * No short term lock table for now.  -Jeff 15 July 1991
	 *
106
	 * ShortTermTableId = LockTableRename(tableId);
107
	 * if (! (ShortTermTableId)) {
Bruce Momjian's avatar
Bruce Momjian committed
108
	 *	 elog(ERROR,"InitMultiLocks: couldnt rename lock table");
109 110 111
	 * }
	 * -----------------------
	 */
112 113 114 115 116 117 118 119 120 121 122
	return MultiTableId;
}

/*
 * MultiLockReln -- lock a relation
 *
 * Returns: TRUE if the lock can be set, FALSE otherwise.
 */
bool
MultiLockReln(LockInfo linfo, LOCKT lockt)
{
123
	LOCKTAG		tag;
124 125 126 127 128

	/*
	 * LOCKTAG has two bytes of padding, unfortunately.  The hash function
	 * will return miss if the padding bytes aren't zero'd.
	 */
Bruce Momjian's avatar
Bruce Momjian committed
129
	MemSet(&tag, 0, sizeof(tag));
130 131 132
	tag.relId = linfo->lRelId.relId;
	tag.dbId = linfo->lRelId.dbId;
	return (MultiAcquire(MultiTableId, &tag, lockt, RELN_LEVEL));
133 134 135 136 137 138 139 140
}

/*
 * MultiLockTuple -- Lock the TID associated with a tuple
 *
 * Returns: TRUE if lock is set, FALSE otherwise.
 *
 * Side Effects: causes intention level locks to be set
141
 *		at the page and relation level.
142 143 144 145
 */
bool
MultiLockTuple(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
{
146
	LOCKTAG		tag;
147 148 149 150 151

	/*
	 * LOCKTAG has two bytes of padding, unfortunately.  The hash function
	 * will return miss if the padding bytes aren't zero'd.
	 */
Bruce Momjian's avatar
Bruce Momjian committed
152
	MemSet(&tag, 0, sizeof(tag));
153 154 155 156 157 158 159

	tag.relId = linfo->lRelId.relId;
	tag.dbId = linfo->lRelId.dbId;

	/* not locking any valid Tuple, just the page */
	tag.tupleId = *tidPtr;
	return (MultiAcquire(MultiTableId, &tag, lockt, TUPLE_LEVEL));
160 161 162 163 164 165 166 167
}

/*
 * same as above at page level
 */
bool
MultiLockPage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
{
168
	LOCKTAG		tag;
169 170 171 172 173

	/*
	 * LOCKTAG has two bytes of padding, unfortunately.  The hash function
	 * will return miss if the padding bytes aren't zero'd.
	 */
Bruce Momjian's avatar
Bruce Momjian committed
174
	MemSet(&tag, 0, sizeof(tag));
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189


	/* ----------------------------
	 * Now we want to set the page offset to be invalid
	 * and lock the block.	There is some confusion here as to what
	 * a page is.  In Postgres a page is an 8k block, however this
	 * block may be partitioned into many subpages which are sometimes
	 * also called pages.  The term is overloaded, so don't be fooled
	 * when we say lock the page we mean the 8k block. -Jeff 16 July 1991
	 * ----------------------------
	 */
	tag.relId = linfo->lRelId.relId;
	tag.dbId = linfo->lRelId.dbId;
	BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
	return (MultiAcquire(MultiTableId, &tag, lockt, PAGE_LEVEL));
190 191 192 193 194 195 196 197
}

/*
 * MultiAcquire -- acquire multi level lock at requested level
 *
 * Returns: TRUE if lock is set, FALSE if not
 * Side Effects:
 */
198
static bool
199
MultiAcquire(LockTableId tableId,
200
			 LOCKTAG *tag,
201
			 LOCKT lockt,
202
			 PG_LOCK_LEVEL level)
203
{
204 205 206 207 208 209
	LOCKT		locks[N_LEVELS];
	int			i,
				status;
	LOCKTAG		xxTag,
			   *tmpTag = &xxTag;
	int			retStatus = TRUE;
210 211 212 213 214 215 216 217 218 219 220 221

	/*
	 * Three levels implemented.  If we set a low level (e.g. Tuple) lock,
	 * we must set INTENT locks on the higher levels.  The intent lock
	 * detects conflicts between the low level lock and an existing high
	 * level lock.	For example, setting a write lock on a tuple in a
	 * relation is disallowed if there is an existing read lock on the
	 * entire relation.  The write lock would set a WRITE + INTENT lock on
	 * the relation and that lock would conflict with the read.
	 */
	switch (level)
	{
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
		case RELN_LEVEL:
			locks[0] = lockt;
			locks[1] = NO_LOCK;
			locks[2] = NO_LOCK;
			break;
		case PAGE_LEVEL:
			locks[0] = lockt + INTENT;
			locks[1] = lockt;
			locks[2] = NO_LOCK;
			break;
		case TUPLE_LEVEL:
			locks[0] = lockt + INTENT;
			locks[1] = lockt + INTENT;
			locks[2] = lockt;
			break;
		default:
238
			elog(ERROR, "MultiAcquire: bad lock level");
239
			return (FALSE);
240
	}
241 242 243 244 245 246 247

	/*
	 * construct a new tag as we go. Always loop through all levels, but
	 * if we arent' seting a low level lock, locks[i] is set to NO_LOCK
	 * for the lower levels.  Always start from the highest level and go
	 * to the lowest level.
	 */
Bruce Momjian's avatar
Bruce Momjian committed
248
	MemSet(tmpTag, 0, sizeof(*tmpTag));
249 250 251 252 253 254 255 256 257
	tmpTag->relId = tag->relId;
	tmpTag->dbId = tag->dbId;

	for (i = 0; i < N_LEVELS; i++)
	{
		if (locks[i] != NO_LOCK)
		{
			switch (i)
			{
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
				case RELN_LEVEL:
					/* -------------
					 * Set the block # and offset to invalid
					 * -------------
					 */
					BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber);
					tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
					break;
				case PAGE_LEVEL:
					/* -------------
					 * Copy the block #, set the offset to invalid
					 * -------------
					 */
					BlockIdCopy(&(tmpTag->tupleId.ip_blkid),
								&(tag->tupleId.ip_blkid));
					tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
					break;
				case TUPLE_LEVEL:
					/* --------------
					 * Copy the entire tuple id.
					 * --------------
					 */
					ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId);
					break;
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
			}

			status = LockAcquire(tableId, tmpTag, locks[i]);
			if (!status)
			{

				/*
				 * failed for some reason. Before returning we have to
				 * release all of the locks we just acquired.
				 * MultiRelease(xx,xx,xx, i) means release starting from
				 * the last level lock we successfully acquired
				 */
				retStatus = FALSE;
				MultiRelease(tableId, tag, lockt, i);
				/* now leave the loop.	Don't try for any more locks */
				break;
			}
		}
	}
	return (retStatus);
302 303 304 305 306 307
}

/* ------------------
 * Release a page in the multi-level lock table
 * ------------------
 */
308
#ifdef NOT_USED
309
bool
310
MultiReleasePage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
311
{
312
	LOCKTAG		tag;
313 314 315 316 317 318 319

	/* ------------------
	 * LOCKTAG has two bytes of padding, unfortunately.  The
	 * hash function will return miss if the padding bytes aren't
	 * zero'd.
	 * ------------------
	 */
Bruce Momjian's avatar
Bruce Momjian committed
320
	MemSet(&tag, 0, sizeof(LOCKTAG));
321 322 323 324 325 326

	tag.relId = linfo->lRelId.relId;
	tag.dbId = linfo->lRelId.dbId;
	BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));

	return (MultiRelease(MultiTableId, &tag, lockt, PAGE_LEVEL));
327
}
328

329
#endif
330 331 332 333 334 335

/* ------------------
 * Release a relation in the multi-level lock table
 * ------------------
 */
bool
336
MultiReleaseReln(LockInfo linfo, LOCKT lockt)
337
{
338
	LOCKTAG		tag;
339 340 341 342 343 344 345

	/* ------------------
	 * LOCKTAG has two bytes of padding, unfortunately.  The
	 * hash function will return miss if the padding bytes aren't
	 * zero'd.
	 * ------------------
	 */
Bruce Momjian's avatar
Bruce Momjian committed
346
	MemSet(&tag, 0, sizeof(LOCKTAG));
347 348 349 350
	tag.relId = linfo->lRelId.relId;
	tag.dbId = linfo->lRelId.dbId;

	return (MultiRelease(MultiTableId, &tag, lockt, RELN_LEVEL));
351 352 353 354 355 356 357
}

/*
 * MultiRelease -- release a multi-level lock
 *
 * Returns: TRUE if successful, FALSE otherwise.
 */
358
static bool
359
MultiRelease(LockTableId tableId,
360
			 LOCKTAG *tag,
361
			 LOCKT lockt,
362
			 PG_LOCK_LEVEL level)
363
{
364 365 366 367 368
	LOCKT		locks[N_LEVELS];
	int			i,
				status;
	LOCKTAG		xxTag,
			   *tmpTag = &xxTag;
369 370 371 372 373 374

	/*
	 * same level scheme as MultiAcquire().
	 */
	switch (level)
	{
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
		case RELN_LEVEL:
			locks[0] = lockt;
			locks[1] = NO_LOCK;
			locks[2] = NO_LOCK;
			break;
		case PAGE_LEVEL:
			locks[0] = lockt + INTENT;
			locks[1] = lockt;
			locks[2] = NO_LOCK;
			break;
		case TUPLE_LEVEL:
			locks[0] = lockt + INTENT;
			locks[1] = lockt + INTENT;
			locks[2] = lockt;
			break;
		default:
391
			elog(ERROR, "MultiRelease: bad lockt");
392 393 394 395 396 397 398 399 400 401
	}

	/*
	 * again, construct the tag on the fly.  This time, however, we
	 * release the locks in the REVERSE order -- from lowest level to
	 * highest level.
	 *
	 * Must zero out the tag to set padding byes to zero and ensure hashing
	 * consistency.
	 */
Bruce Momjian's avatar
Bruce Momjian committed
402
	MemSet(tmpTag, 0, sizeof(*tmpTag));
403 404 405 406 407 408 409 410 411
	tmpTag->relId = tag->relId;
	tmpTag->dbId = tag->dbId;

	for (i = (N_LEVELS - 1); i >= 0; i--)
	{
		if (locks[i] != NO_LOCK)
		{
			switch (i)
			{
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
				case RELN_LEVEL:
					/* -------------
					 * Set the block # and offset to invalid
					 * -------------
					 */
					BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber);
					tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
					break;
				case PAGE_LEVEL:
					/* -------------
					 * Copy the block #, set the offset to invalid
					 * -------------
					 */
					BlockIdCopy(&(tmpTag->tupleId.ip_blkid),
								&(tag->tupleId.ip_blkid));
					tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
					break;
				case TUPLE_LEVEL:
					ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId);
					break;
432 433 434
			}
			status = LockRelease(tableId, tmpTag, locks[i]);
			if (!status)
435
				elog(ERROR, "MultiRelease: couldn't release after error");
436
		}
437
	}
438 439
	/* shouldn't reach here */
	return false;
440
}