Commit 499abb0c authored by Tom Lane's avatar Tom Lane

Implement new 'lightweight lock manager' that's intermediate between

existing lock manager and spinlocks: it understands exclusive vs shared
lock but has few other fancy features.  Replace most uses of spinlocks
with lightweight locks.  All remaining uses of spinlocks have very short
lock hold times (a few dozen instructions), so tweak spinlock backoff
code to work efficiently given this assumption.  All per my proposal on
pghackers 26-Sep-01.
parent 818fb55a
<!-- $Header: /cvsroot/pgsql/doc/src/sgml/wal.sgml,v 1.10 2001/09/22 03:59:17 momjian Exp $ --> <!-- $Header: /cvsroot/pgsql/doc/src/sgml/wal.sgml,v 1.11 2001/09/29 04:02:19 tgl Exp $ -->
<chapter id="wal"> <chapter id="wal">
<title>Write-Ahead Logging (<acronym>WAL</acronym>)</title> <title>Write-Ahead Logging (<acronym>WAL</acronym>)</title>
...@@ -146,7 +146,7 @@ ...@@ -146,7 +146,7 @@
<para> <para>
The <acronym>WAL</acronym> buffers and control structure are in The <acronym>WAL</acronym> buffers and control structure are in
shared memory, and are handled by the backends; they are protected shared memory, and are handled by the backends; they are protected
by spinlocks. The demand on shared memory is dependent on the by lightweight locks. The demand on shared memory is dependent on the
number of buffers; the default size of the <acronym>WAL</acronym> number of buffers; the default size of the <acronym>WAL</acronym>
buffers is 64 kB. buffers is 64 kB.
</para> </para>
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.3 2001/08/26 16:55:59 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.4 2001/09/29 04:02:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#include <unistd.h> #include <unistd.h>
#include "access/clog.h" #include "access/clog.h"
#include "storage/s_lock.h" #include "storage/lwlock.h"
#include "miscadmin.h" #include "miscadmin.h"
...@@ -74,8 +74,8 @@ ...@@ -74,8 +74,8 @@
* The management algorithm is straight LRU except that we will never swap * The management algorithm is straight LRU except that we will never swap
* out the latest page (since we know it's going to be hit again eventually). * out the latest page (since we know it's going to be hit again eventually).
* *
* We use an overall spinlock to protect the shared data structures, plus * We use an overall LWLock to protect the shared data structures, plus
* per-buffer spinlocks that synchronize I/O for each buffer. A process * per-buffer LWLocks that synchronize I/O for each buffer. A process
* that is reading in or writing out a page buffer does not hold the control * that is reading in or writing out a page buffer does not hold the control
* lock, only the per-buffer lock for the buffer it is working on. * lock, only the per-buffer lock for the buffer it is working on.
* *
...@@ -105,10 +105,6 @@ ...@@ -105,10 +105,6 @@
* by setting the page's state from WRITE_IN_PROGRESS to DIRTY. The writing * by setting the page's state from WRITE_IN_PROGRESS to DIRTY. The writing
* process must notice this and not mark the page CLEAN when it's done. * process must notice this and not mark the page CLEAN when it's done.
* *
* XXX it's probably okay to use a spinlock for the control lock, since
* that lock is only held for very short operations. It'd be nice to use
* some other form of lock for the per-buffer I/O locks, however.
*
* XLOG interactions: this module generates an XLOG record whenever a new * XLOG interactions: this module generates an XLOG record whenever a new
* CLOG page is initialized to zeroes. Other writes of CLOG come from * CLOG page is initialized to zeroes. Other writes of CLOG come from
* recording of transaction commit or abort in xact.c, which generates its * recording of transaction commit or abort in xact.c, which generates its
...@@ -121,7 +117,6 @@ ...@@ -121,7 +117,6 @@
* synchronization already. * synchronization already.
*---------- *----------
*/ */
#define NUM_CLOG_BUFFERS 8
typedef enum typedef enum
{ {
...@@ -153,13 +148,17 @@ typedef struct ClogCtlData ...@@ -153,13 +148,17 @@ typedef struct ClogCtlData
* swapping out the latest page. * swapping out the latest page.
*/ */
int latest_page_number; int latest_page_number;
slock_t control_lck; /* Lock for ClogCtlData itself */
slock_t buffer_lck[NUM_CLOG_BUFFERS]; /* Per-buffer I/O locks */
} ClogCtlData; } ClogCtlData;
static ClogCtlData *ClogCtl = NULL; static ClogCtlData *ClogCtl = NULL;
/*
* ClogBufferLocks is set during CLOGShmemInit and does not change thereafter.
* The value is automatically inherited by backends via fork, and
* doesn't need to be in shared memory.
*/
static LWLockId ClogBufferLocks[NUM_CLOG_BUFFERS]; /* Per-buffer I/O locks */
/* /*
* ClogDir is set during CLOGShmemInit and does not change thereafter. * ClogDir is set during CLOGShmemInit and does not change thereafter.
* The value is automatically inherited by backends via fork, and * The value is automatically inherited by backends via fork, and
...@@ -211,7 +210,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) ...@@ -211,7 +210,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
Assert(status == TRANSACTION_STATUS_COMMITTED || Assert(status == TRANSACTION_STATUS_COMMITTED ||
status == TRANSACTION_STATUS_ABORTED); status == TRANSACTION_STATUS_ABORTED);
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
slotno = ReadCLOGPage(pageno); slotno = ReadCLOGPage(pageno);
byteptr = ClogCtl->page_buffer[slotno] + byteno; byteptr = ClogCtl->page_buffer[slotno] + byteno;
...@@ -224,7 +223,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) ...@@ -224,7 +223,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
ClogCtl->page_status[slotno] = CLOG_PAGE_DIRTY; ClogCtl->page_status[slotno] = CLOG_PAGE_DIRTY;
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
/* /*
...@@ -243,14 +242,14 @@ TransactionIdGetStatus(TransactionId xid) ...@@ -243,14 +242,14 @@ TransactionIdGetStatus(TransactionId xid)
char *byteptr; char *byteptr;
XidStatus status; XidStatus status;
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
slotno = ReadCLOGPage(pageno); slotno = ReadCLOGPage(pageno);
byteptr = ClogCtl->page_buffer[slotno] + byteno; byteptr = ClogCtl->page_buffer[slotno] + byteno;
status = (*byteptr >> bshift) & CLOG_XACT_BITMASK; status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
return status; return status;
} }
...@@ -283,15 +282,13 @@ CLOGShmemInit(void) ...@@ -283,15 +282,13 @@ CLOGShmemInit(void)
memset(ClogCtl, 0, sizeof(ClogCtlData)); memset(ClogCtl, 0, sizeof(ClogCtlData));
S_INIT_LOCK(&(ClogCtl->control_lck));
bufptr = ((char *) ClogCtl) + sizeof(ClogCtlData); bufptr = ((char *) ClogCtl) + sizeof(ClogCtlData);
for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
{ {
ClogCtl->page_buffer[slotno] = bufptr; ClogCtl->page_buffer[slotno] = bufptr;
ClogCtl->page_status[slotno] = CLOG_PAGE_EMPTY; ClogCtl->page_status[slotno] = CLOG_PAGE_EMPTY;
S_INIT_LOCK(&(ClogCtl->buffer_lck[slotno])); ClogBufferLocks[slotno] = LWLockAssign();
bufptr += CLOG_BLCKSZ; bufptr += CLOG_BLCKSZ;
} }
...@@ -312,7 +309,7 @@ BootStrapCLOG(void) ...@@ -312,7 +309,7 @@ BootStrapCLOG(void)
{ {
int slotno; int slotno;
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
/* Create and zero the first page of the commit log */ /* Create and zero the first page of the commit log */
slotno = ZeroCLOGPage(0, false); slotno = ZeroCLOGPage(0, false);
...@@ -321,7 +318,7 @@ BootStrapCLOG(void) ...@@ -321,7 +318,7 @@ BootStrapCLOG(void)
WriteCLOGPage(slotno); WriteCLOGPage(slotno);
Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN); Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN);
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
/* /*
...@@ -411,8 +408,8 @@ ReadCLOGPage(int pageno) ...@@ -411,8 +408,8 @@ ReadCLOGPage(int pageno)
ClogCtl->page_lru_count[slotno] = 0; ClogCtl->page_lru_count[slotno] = 0;
/* Release shared lock, grab per-buffer lock instead */ /* Release shared lock, grab per-buffer lock instead */
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
S_LOCK(&(ClogCtl->buffer_lck[slotno])); LWLockAcquire(ClogBufferLocks[slotno], LW_EXCLUSIVE);
/* /*
* Check to see if someone else already did the read, or took the * Check to see if someone else already did the read, or took the
...@@ -421,8 +418,8 @@ ReadCLOGPage(int pageno) ...@@ -421,8 +418,8 @@ ReadCLOGPage(int pageno)
if (ClogCtl->page_number[slotno] != pageno || if (ClogCtl->page_number[slotno] != pageno ||
ClogCtl->page_status[slotno] != CLOG_PAGE_READ_IN_PROGRESS) ClogCtl->page_status[slotno] != CLOG_PAGE_READ_IN_PROGRESS)
{ {
S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); LWLockRelease(ClogBufferLocks[slotno]);
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
continue; continue;
} }
...@@ -430,14 +427,14 @@ ReadCLOGPage(int pageno) ...@@ -430,14 +427,14 @@ ReadCLOGPage(int pageno)
CLOGPhysicalReadPage(pageno, slotno); CLOGPhysicalReadPage(pageno, slotno);
/* Re-acquire shared control lock and update page state */ /* Re-acquire shared control lock and update page state */
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
Assert(ClogCtl->page_number[slotno] == pageno && Assert(ClogCtl->page_number[slotno] == pageno &&
ClogCtl->page_status[slotno] == CLOG_PAGE_READ_IN_PROGRESS); ClogCtl->page_status[slotno] == CLOG_PAGE_READ_IN_PROGRESS);
ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN; ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN;
S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); LWLockRelease(ClogBufferLocks[slotno]);
ClogRecentlyUsed(slotno); ClogRecentlyUsed(slotno);
return slotno; return slotno;
...@@ -468,8 +465,8 @@ WriteCLOGPage(int slotno) ...@@ -468,8 +465,8 @@ WriteCLOGPage(int slotno)
pageno = ClogCtl->page_number[slotno]; pageno = ClogCtl->page_number[slotno];
/* Release shared lock, grab per-buffer lock instead */ /* Release shared lock, grab per-buffer lock instead */
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
S_LOCK(&(ClogCtl->buffer_lck[slotno])); LWLockAcquire(ClogBufferLocks[slotno], LW_EXCLUSIVE);
/* /*
* Check to see if someone else already did the write, or took the * Check to see if someone else already did the write, or took the
...@@ -482,8 +479,8 @@ WriteCLOGPage(int slotno) ...@@ -482,8 +479,8 @@ WriteCLOGPage(int slotno)
(ClogCtl->page_status[slotno] != CLOG_PAGE_DIRTY && (ClogCtl->page_status[slotno] != CLOG_PAGE_DIRTY &&
ClogCtl->page_status[slotno] != CLOG_PAGE_WRITE_IN_PROGRESS)) ClogCtl->page_status[slotno] != CLOG_PAGE_WRITE_IN_PROGRESS))
{ {
S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); LWLockRelease(ClogBufferLocks[slotno]);
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
return; return;
} }
...@@ -504,7 +501,7 @@ WriteCLOGPage(int slotno) ...@@ -504,7 +501,7 @@ WriteCLOGPage(int slotno)
CLOGPhysicalWritePage(pageno, slotno); CLOGPhysicalWritePage(pageno, slotno);
/* Re-acquire shared control lock and update page state */ /* Re-acquire shared control lock and update page state */
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
Assert(ClogCtl->page_number[slotno] == pageno && Assert(ClogCtl->page_number[slotno] == pageno &&
(ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS || (ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS ||
...@@ -514,7 +511,7 @@ WriteCLOGPage(int slotno) ...@@ -514,7 +511,7 @@ WriteCLOGPage(int slotno)
if (ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS) if (ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS)
ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN; ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN;
S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); LWLockRelease(ClogBufferLocks[slotno]);
} }
/* /*
...@@ -714,7 +711,7 @@ ShutdownCLOG(void) ...@@ -714,7 +711,7 @@ ShutdownCLOG(void)
{ {
int slotno; int slotno;
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
{ {
...@@ -723,7 +720,7 @@ ShutdownCLOG(void) ...@@ -723,7 +720,7 @@ ShutdownCLOG(void)
ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN); ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN);
} }
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
/* /*
...@@ -734,7 +731,7 @@ CheckPointCLOG(void) ...@@ -734,7 +731,7 @@ CheckPointCLOG(void)
{ {
int slotno; int slotno;
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
{ {
...@@ -745,7 +742,7 @@ CheckPointCLOG(void) ...@@ -745,7 +742,7 @@ CheckPointCLOG(void)
*/ */
} }
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
...@@ -772,12 +769,12 @@ ExtendCLOG(TransactionId newestXact) ...@@ -772,12 +769,12 @@ ExtendCLOG(TransactionId newestXact)
pageno = TransactionIdToPage(newestXact); pageno = TransactionIdToPage(newestXact);
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
/* Zero the page and make an XLOG entry about it */ /* Zero the page and make an XLOG entry about it */
ZeroCLOGPage(pageno, true); ZeroCLOGPage(pageno, true);
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
...@@ -819,7 +816,7 @@ TruncateCLOG(TransactionId oldestXact) ...@@ -819,7 +816,7 @@ TruncateCLOG(TransactionId oldestXact)
* should have been flushed already during the checkpoint, we're * should have been flushed already during the checkpoint, we're
* just being extra careful here.) * just being extra careful here.)
*/ */
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
restart:; restart:;
/* /*
...@@ -830,7 +827,7 @@ restart:; ...@@ -830,7 +827,7 @@ restart:;
*/ */
if (CLOGPagePrecedes(ClogCtl->latest_page_number, cutoffPage)) if (CLOGPagePrecedes(ClogCtl->latest_page_number, cutoffPage))
{ {
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
elog(LOG, "unable to truncate commit log: apparent wraparound"); elog(LOG, "unable to truncate commit log: apparent wraparound");
return; return;
} }
...@@ -861,7 +858,7 @@ restart:; ...@@ -861,7 +858,7 @@ restart:;
goto restart; goto restart;
} }
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
/* Now we can remove the old CLOG segment(s) */ /* Now we can remove the old CLOG segment(s) */
(void) ScanCLOGDirectory(cutoffPage, true); (void) ScanCLOGDirectory(cutoffPage, true);
...@@ -974,13 +971,13 @@ clog_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -974,13 +971,13 @@ clog_redo(XLogRecPtr lsn, XLogRecord *record)
memcpy(&pageno, XLogRecGetData(record), sizeof(int)); memcpy(&pageno, XLogRecGetData(record), sizeof(int));
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
slotno = ZeroCLOGPage(pageno, false); slotno = ZeroCLOGPage(pageno, false);
WriteCLOGPage(slotno); WriteCLOGPage(slotno);
Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN); Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN);
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Copyright (c) 2000, PostgreSQL Global Development Group * Copyright (c) 2000, PostgreSQL Global Development Group
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.45 2001/08/25 18:52:41 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.46 2001/09/29 04:02:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -15,16 +15,13 @@ ...@@ -15,16 +15,13 @@
#include "access/clog.h" #include "access/clog.h"
#include "access/transam.h" #include "access/transam.h"
#include "storage/ipc.h"
#include "storage/proc.h" #include "storage/proc.h"
/* Number of OIDs to prefetch (preallocate) per XLOG write */ /* Number of OIDs to prefetch (preallocate) per XLOG write */
#define VAR_OID_PREFETCH 8192 #define VAR_OID_PREFETCH 8192
/* Spinlocks for serializing generation of XIDs and OIDs, respectively */
SPINLOCK XidGenLockId;
SPINLOCK OidGenLockId;
/* pointer to "variable cache" in shared memory (set up by shmem.c) */ /* pointer to "variable cache" in shared memory (set up by shmem.c) */
VariableCache ShmemVariableCache = NULL; VariableCache ShmemVariableCache = NULL;
...@@ -44,7 +41,7 @@ GetNewTransactionId(void) ...@@ -44,7 +41,7 @@ GetNewTransactionId(void)
if (AMI_OVERRIDE) if (AMI_OVERRIDE)
return BootstrapTransactionId; return BootstrapTransactionId;
SpinAcquire(XidGenLockId); LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
xid = ShmemVariableCache->nextXid; xid = ShmemVariableCache->nextXid;
...@@ -83,7 +80,7 @@ GetNewTransactionId(void) ...@@ -83,7 +80,7 @@ GetNewTransactionId(void)
if (MyProc != (PROC *) NULL) if (MyProc != (PROC *) NULL)
MyProc->xid = xid; MyProc->xid = xid;
SpinRelease(XidGenLockId); LWLockRelease(XidGenLock);
return xid; return xid;
} }
...@@ -103,9 +100,9 @@ ReadNewTransactionId(void) ...@@ -103,9 +100,9 @@ ReadNewTransactionId(void)
if (AMI_OVERRIDE) if (AMI_OVERRIDE)
return BootstrapTransactionId; return BootstrapTransactionId;
SpinAcquire(XidGenLockId); LWLockAcquire(XidGenLock, LW_SHARED);
xid = ShmemVariableCache->nextXid; xid = ShmemVariableCache->nextXid;
SpinRelease(XidGenLockId); LWLockRelease(XidGenLock);
return xid; return xid;
} }
...@@ -122,7 +119,7 @@ GetNewObjectId(void) ...@@ -122,7 +119,7 @@ GetNewObjectId(void)
{ {
Oid result; Oid result;
SpinAcquire(OidGenLockId); LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
/* /*
* Check for wraparound of the OID counter. We *must* not return 0 * Check for wraparound of the OID counter. We *must* not return 0
...@@ -149,7 +146,7 @@ GetNewObjectId(void) ...@@ -149,7 +146,7 @@ GetNewObjectId(void)
(ShmemVariableCache->nextOid)++; (ShmemVariableCache->nextOid)++;
(ShmemVariableCache->oidCount)--; (ShmemVariableCache->oidCount)--;
SpinRelease(OidGenLockId); LWLockRelease(OidGenLock);
lastSeenOid = result; lastSeenOid = result;
...@@ -162,12 +159,12 @@ CheckMaxObjectId(Oid assigned_oid) ...@@ -162,12 +159,12 @@ CheckMaxObjectId(Oid assigned_oid)
if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid) if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid)
return; return;
SpinAcquire(OidGenLockId); LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
if (assigned_oid < ShmemVariableCache->nextOid) if (assigned_oid < ShmemVariableCache->nextOid)
{ {
lastSeenOid = ShmemVariableCache->nextOid - 1; lastSeenOid = ShmemVariableCache->nextOid - 1;
SpinRelease(OidGenLockId); LWLockRelease(OidGenLock);
return; return;
} }
...@@ -178,7 +175,7 @@ CheckMaxObjectId(Oid assigned_oid) ...@@ -178,7 +175,7 @@ CheckMaxObjectId(Oid assigned_oid)
ShmemVariableCache->oidCount -= ShmemVariableCache->oidCount -=
assigned_oid - ShmemVariableCache->nextOid + 1; assigned_oid - ShmemVariableCache->nextOid + 1;
ShmemVariableCache->nextOid = assigned_oid + 1; ShmemVariableCache->nextOid = assigned_oid + 1;
SpinRelease(OidGenLockId); LWLockRelease(OidGenLock);
return; return;
} }
...@@ -192,5 +189,5 @@ CheckMaxObjectId(Oid assigned_oid) ...@@ -192,5 +189,5 @@ CheckMaxObjectId(Oid assigned_oid)
ShmemVariableCache->nextOid = assigned_oid + 1; ShmemVariableCache->nextOid = assigned_oid + 1;
ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1; ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1;
SpinRelease(OidGenLockId); LWLockRelease(OidGenLock);
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.110 2001/09/28 08:08:57 thomas Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.111 2001/09/29 04:02:21 tgl Exp $
* *
* NOTES * NOTES
* Transaction aborts can now occur two ways: * Transaction aborts can now occur two ways:
...@@ -965,7 +965,7 @@ CommitTransaction(void) ...@@ -965,7 +965,7 @@ CommitTransaction(void)
* this must be done _before_ releasing locks we hold and _after_ * this must be done _before_ releasing locks we hold and _after_
* RecordTransactionCommit. * RecordTransactionCommit.
* *
* SpinAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked * LWLockAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked
* by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot - * by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot -
* if xid 2' GetSnapshotData sees xid 1 as running then it must see * if xid 2' GetSnapshotData sees xid 1 as running then it must see
* xid 0 as running as well or it will see two tuple versions - one * xid 0 as running as well or it will see two tuple versions - one
...@@ -975,10 +975,10 @@ CommitTransaction(void) ...@@ -975,10 +975,10 @@ CommitTransaction(void)
if (MyProc != (PROC *) NULL) if (MyProc != (PROC *) NULL)
{ {
/* Lock SInvalLock because that's what GetSnapshotData uses. */ /* Lock SInvalLock because that's what GetSnapshotData uses. */
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
MyProc->xid = InvalidTransactionId; MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId; MyProc->xmin = InvalidTransactionId;
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
} }
/* /*
...@@ -1030,12 +1030,15 @@ AbortTransaction(void) ...@@ -1030,12 +1030,15 @@ AbortTransaction(void)
HOLD_INTERRUPTS(); HOLD_INTERRUPTS();
/* /*
* Release any spinlocks or buffer context locks we might be holding * Release any LW locks we might be holding as quickly as possible.
* as quickly as possible. (Real locks, however, must be held till we * (Regular locks, however, must be held till we finish aborting.)
* finish aborting.) Releasing spinlocks is critical since we might * Releasing LW locks is critical since we might try to grab them again
* try to grab them again while cleaning up! * while cleaning up!
*/ */
ProcReleaseSpins(NULL); LWLockReleaseAll();
/* Clean up buffer I/O and buffer context locks, too */
AbortBufferIO();
UnlockBuffers(); UnlockBuffers();
/* /*
...@@ -1081,10 +1084,10 @@ AbortTransaction(void) ...@@ -1081,10 +1084,10 @@ AbortTransaction(void)
if (MyProc != (PROC *) NULL) if (MyProc != (PROC *) NULL)
{ {
/* Lock SInvalLock because that's what GetSnapshotData uses. */ /* Lock SInvalLock because that's what GetSnapshotData uses. */
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
MyProc->xid = InvalidTransactionId; MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId; MyProc->xmin = InvalidTransactionId;
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
} }
RelationPurgeLocalRelation(false); RelationPurgeLocalRelation(false);
......
This diff is collapsed.
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/bootstrap/bootparse.y,v 1.38 2001/08/21 16:36:00 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootparse.y,v 1.39 2001/09/29 04:02:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -45,7 +45,6 @@ ...@@ -45,7 +45,6 @@
#include "storage/itemptr.h" #include "storage/itemptr.h"
#include "storage/off.h" #include "storage/off.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "storage/spin.h"
#include "tcop/dest.h" #include "tcop/dest.h"
#include "utils/nabstime.h" #include "utils/nabstime.h"
#include "utils/rel.h" #include "utils/rel.h"
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.116 2001/09/27 16:29:12 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.117 2001/09/29 04:02:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "libpq/pqsignal.h" #include "libpq/pqsignal.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/proc.h"
#include "tcop/tcopprot.h" #include "tcop/tcopprot.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/exc.h" #include "utils/exc.h"
...@@ -360,29 +361,39 @@ BootstrapMain(int argc, char *argv[]) ...@@ -360,29 +361,39 @@ BootstrapMain(int argc, char *argv[])
* XLOG operations * XLOG operations
*/ */
SetProcessingMode(NormalProcessing); SetProcessingMode(NormalProcessing);
if (xlogop == BS_XLOG_NOP)
StartupXLOG(); switch (xlogop)
else if (xlogop == BS_XLOG_BOOTSTRAP)
{
BootStrapXLOG();
StartupXLOG();
}
else
{ {
if (xlogop == BS_XLOG_CHECKPOINT) case BS_XLOG_NOP:
{ StartupXLOG();
break;
case BS_XLOG_BOOTSTRAP:
BootStrapXLOG();
StartupXLOG();
break;
case BS_XLOG_CHECKPOINT:
if (IsUnderPostmaster)
InitDummyProcess(); /* needed to get LWLocks */
CreateDummyCaches(); CreateDummyCaches();
CreateCheckPoint(false); CreateCheckPoint(false);
SetRedoRecPtr(); SetRedoRecPtr();
} proc_exit(0); /* done */
else if (xlogop == BS_XLOG_STARTUP)
case BS_XLOG_STARTUP:
StartupXLOG(); StartupXLOG();
else if (xlogop == BS_XLOG_SHUTDOWN) proc_exit(0); /* done */
case BS_XLOG_SHUTDOWN:
ShutdownXLOG(); ShutdownXLOG();
else proc_exit(0); /* done */
default:
elog(STOP, "Unsupported XLOG op %d", xlogop); elog(STOP, "Unsupported XLOG op %d", xlogop);
proc_exit(0); proc_exit(0);
} }
SetProcessingMode(BootstrapProcessing); SetProcessingMode(BootstrapProcessing);
/* /*
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.7 2001/09/21 03:32:35 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.8 2001/09/29 04:02:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -53,7 +53,7 @@ ...@@ -53,7 +53,7 @@
* A page with less than PAGE_SPACE_THRESHOLD free space will be forgotten * A page with less than PAGE_SPACE_THRESHOLD free space will be forgotten
* immediately, and not even passed to the free space map. Removing the * immediately, and not even passed to the free space map. Removing the
* uselessly small entries early saves cycles, and in particular reduces * uselessly small entries early saves cycles, and in particular reduces
* the amount of time we spend holding the FSM spinlock when we finally call * the amount of time we spend holding the FSM lock when we finally call
* MultiRecordFreeSpace. Since the FSM will ignore pages below its own * MultiRecordFreeSpace. Since the FSM will ignore pages below its own
* runtime threshold anyway, there's no point in making this really small. * runtime threshold anyway, there's no point in making this really small.
* XXX Is it worth trying to measure average tuple size, and using that to * XXX Is it worth trying to measure average tuple size, and using that to
......
$Header: /cvsroot/pgsql/src/backend/storage/buffer/README,v 1.2 2001/08/25 18:52:42 tgl Exp $ $Header: /cvsroot/pgsql/src/backend/storage/buffer/README,v 1.3 2001/09/29 04:02:22 tgl Exp $
Notes about shared buffer access rules Notes about shared buffer access rules
-------------------------------------- --------------------------------------
...@@ -30,12 +30,10 @@ Buffer locks: there are two kinds of buffer locks, shared and exclusive, ...@@ -30,12 +30,10 @@ Buffer locks: there are two kinds of buffer locks, shared and exclusive,
which act just as you'd expect: multiple backends can hold shared locks on which act just as you'd expect: multiple backends can hold shared locks on
the same buffer, but an exclusive lock prevents anyone else from holding the same buffer, but an exclusive lock prevents anyone else from holding
either shared or exclusive lock. (These can alternatively be called READ either shared or exclusive lock. (These can alternatively be called READ
and WRITE locks.) These locks are short-term: they should not be held for and WRITE locks.) These locks are intended to be short-term: they should not
long. They are implemented as per-buffer spinlocks, so another backend be held for long. Buffer locks are acquired and released by LockBuffer().
trying to acquire a competing lock will spin as long as you hold yours! It will *not* work for a single backend to try to acquire multiple locks on
Buffer locks are acquired and released by LockBuffer(). It will *not* work the same buffer. One must pin a buffer before trying to lock it.
for a single backend to try to acquire multiple locks on the same buffer.
One must pin a buffer before trying to lock it.
Buffer access rules: Buffer access rules:
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.43 2001/07/06 21:04:25 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.44 2001/09/29 04:02:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -28,10 +28,9 @@ ...@@ -28,10 +28,9 @@
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/s_lock.h"
#include "storage/shmem.h" #include "storage/shmem.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "storage/spin.h" #include "storage/lwlock.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/hsearch.h" #include "utils/hsearch.h"
#include "utils/memutils.h" #include "utils/memutils.h"
...@@ -117,8 +116,6 @@ bool *BufferDirtiedByMe; /* T if buf has been dirtied in cur xact */ ...@@ -117,8 +116,6 @@ bool *BufferDirtiedByMe; /* T if buf has been dirtied in cur xact */
* *
*/ */
SPINLOCK BufMgrLock;
long int ReadBufferCount; long int ReadBufferCount;
long int ReadLocalBufferCount; long int ReadLocalBufferCount;
long int BufferHitCount; long int BufferHitCount;
...@@ -151,7 +148,7 @@ InitBufferPool(void) ...@@ -151,7 +148,7 @@ InitBufferPool(void)
* anyone else attached to the shmem at this point, we've got * anyone else attached to the shmem at this point, we've got
* problems. * problems.
*/ */
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
#ifdef BMTRACE #ifdef BMTRACE
CurTraceBuf = (long *) ShmemInitStruct("Buffer trace", CurTraceBuf = (long *) ShmemInitStruct("Buffer trace",
...@@ -186,8 +183,8 @@ InitBufferPool(void) ...@@ -186,8 +183,8 @@ InitBufferPool(void)
/* /*
* link the buffers into a circular, doubly-linked list to * link the buffers into a circular, doubly-linked list to
* initialize free list. Still don't know anything about * initialize free list, and initialize the buffer headers.
* replacement strategy in this file. * Still don't know anything about replacement strategy in this file.
*/ */
for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++) for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++)
{ {
...@@ -197,12 +194,15 @@ InitBufferPool(void) ...@@ -197,12 +194,15 @@ InitBufferPool(void)
buf->freePrev = i - 1; buf->freePrev = i - 1;
CLEAR_BUFFERTAG(&(buf->tag)); CLEAR_BUFFERTAG(&(buf->tag));
buf->buf_id = i;
buf->data = MAKE_OFFSET(block); buf->data = MAKE_OFFSET(block);
buf->flags = (BM_DELETED | BM_FREE | BM_VALID); buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
buf->refcount = 0; buf->refcount = 0;
buf->buf_id = i; buf->io_in_progress_lock = LWLockAssign();
S_INIT_LOCK(&(buf->io_in_progress_lock)); buf->cntx_lock = LWLockAssign();
S_INIT_LOCK(&(buf->cntx_lock)); buf->cntxDirty = false;
buf->wait_backend_id = 0;
} }
/* close the circular queue */ /* close the circular queue */
...@@ -214,7 +214,7 @@ InitBufferPool(void) ...@@ -214,7 +214,7 @@ InitBufferPool(void)
InitBufTable(); InitBufTable();
InitFreeList(!foundDescs); InitFreeList(!foundDescs);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
} }
/* /*
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.21 2001/03/22 03:59:44 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.22 2001/09/29 04:02:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,8 +23,7 @@ ...@@ -23,8 +23,7 @@
* *
* Synchronization: * Synchronization:
* *
* All routines in this file assume buffer manager spinlock is * All routines in this file assume BufMgrLock is held by their caller.
* held by their caller.
*/ */
#include "postgres.h" #include "postgres.h"
......
This diff is collapsed.
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.24 2001/07/06 21:04:26 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.25 2001/09/29 04:02:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "storage/buf_internals.h" #include "storage/buf_internals.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/proc.h" #include "storage/proc.h"
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.4 2001/07/19 21:25:37 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.5 2001/09/29 04:02:23 tgl Exp $
* *
* *
* NOTES: * NOTES:
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/itemid.h" #include "storage/itemid.h"
#include "storage/lwlock.h"
#include "storage/shmem.h" #include "storage/shmem.h"
...@@ -122,9 +123,6 @@ struct FSMChunk ...@@ -122,9 +123,6 @@ struct FSMChunk
}; };
SPINLOCK FreeSpaceLock; /* in Shmem or created in
* CreateSpinlocks() */
int MaxFSMRelations; /* these are set by guc.c */ int MaxFSMRelations; /* these are set by guc.c */
int MaxFSMPages; int MaxFSMPages;
...@@ -256,7 +254,7 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) ...@@ -256,7 +254,7 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
FSMRelation *fsmrel; FSMRelation *fsmrel;
BlockNumber freepage; BlockNumber freepage;
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/* /*
* We always add a rel to the hashtable when it is inquired about. * We always add a rel to the hashtable when it is inquired about.
*/ */
...@@ -279,7 +277,7 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) ...@@ -279,7 +277,7 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
fsmrel->threshold = (Size) cur_avg; fsmrel->threshold = (Size) cur_avg;
} }
freepage = find_free_space(fsmrel, spaceNeeded); freepage = find_free_space(fsmrel, spaceNeeded);
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
return freepage; return freepage;
} }
...@@ -299,7 +297,7 @@ RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail) ...@@ -299,7 +297,7 @@ RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail)
/* Sanity check: ensure spaceAvail will fit into ItemLength */ /* Sanity check: ensure spaceAvail will fit into ItemLength */
AssertArg(spaceAvail < BLCKSZ); AssertArg(spaceAvail < BLCKSZ);
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/* /*
* We choose not to add rels to the hashtable unless they've been * We choose not to add rels to the hashtable unless they've been
* inquired about with GetPageWithFreeSpace. Also, a Record operation * inquired about with GetPageWithFreeSpace. Also, a Record operation
...@@ -308,11 +306,11 @@ RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail) ...@@ -308,11 +306,11 @@ RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail)
fsmrel = lookup_fsm_rel(rel); fsmrel = lookup_fsm_rel(rel);
if (fsmrel) if (fsmrel)
fsm_record_free_space(fsmrel, page, spaceAvail); fsm_record_free_space(fsmrel, page, spaceAvail);
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
} }
/* /*
* RecordAndGetPageWithFreeSpace - combo form to save one spinlock and * RecordAndGetPageWithFreeSpace - combo form to save one lock and
* hash table lookup cycle. * hash table lookup cycle.
*/ */
BlockNumber BlockNumber
...@@ -327,7 +325,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel, ...@@ -327,7 +325,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel,
/* Sanity check: ensure spaceAvail will fit into ItemLength */ /* Sanity check: ensure spaceAvail will fit into ItemLength */
AssertArg(oldSpaceAvail < BLCKSZ); AssertArg(oldSpaceAvail < BLCKSZ);
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/* /*
* We always add a rel to the hashtable when it is inquired about. * We always add a rel to the hashtable when it is inquired about.
*/ */
...@@ -351,7 +349,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel, ...@@ -351,7 +349,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel,
fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail); fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
/* Do the Get */ /* Do the Get */
freepage = find_free_space(fsmrel, spaceNeeded); freepage = find_free_space(fsmrel, spaceNeeded);
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
return freepage; return freepage;
} }
...@@ -378,7 +376,7 @@ MultiRecordFreeSpace(RelFileNode *rel, ...@@ -378,7 +376,7 @@ MultiRecordFreeSpace(RelFileNode *rel,
FSMRelation *fsmrel; FSMRelation *fsmrel;
int i; int i;
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
fsmrel = lookup_fsm_rel(rel); fsmrel = lookup_fsm_rel(rel);
if (fsmrel) if (fsmrel)
{ {
...@@ -437,7 +435,7 @@ MultiRecordFreeSpace(RelFileNode *rel, ...@@ -437,7 +435,7 @@ MultiRecordFreeSpace(RelFileNode *rel,
fsm_record_free_space(fsmrel, page, avail); fsm_record_free_space(fsmrel, page, avail);
} }
} }
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
} }
/* /*
...@@ -452,11 +450,11 @@ FreeSpaceMapForgetRel(RelFileNode *rel) ...@@ -452,11 +450,11 @@ FreeSpaceMapForgetRel(RelFileNode *rel)
{ {
FSMRelation *fsmrel; FSMRelation *fsmrel;
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
fsmrel = lookup_fsm_rel(rel); fsmrel = lookup_fsm_rel(rel);
if (fsmrel) if (fsmrel)
delete_fsm_rel(fsmrel); delete_fsm_rel(fsmrel);
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
} }
/* /*
...@@ -474,14 +472,14 @@ FreeSpaceMapForgetDatabase(Oid dbid) ...@@ -474,14 +472,14 @@ FreeSpaceMapForgetDatabase(Oid dbid)
FSMRelation *fsmrel, FSMRelation *fsmrel,
*nextrel; *nextrel;
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = nextrel) for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = nextrel)
{ {
nextrel = fsmrel->nextRel; /* in case we delete it */ nextrel = fsmrel->nextRel; /* in case we delete it */
if (fsmrel->key.tblNode == dbid) if (fsmrel->key.tblNode == dbid)
delete_fsm_rel(fsmrel); delete_fsm_rel(fsmrel);
} }
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.68 2001/09/04 00:22:34 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.69 2001/09/29 04:02:23 tgl Exp $
* *
* NOTES * NOTES
* *
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#include <unistd.h> #include <unistd.h>
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/s_lock.h"
/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */ /* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
#ifdef HAVE_SYS_SEM_H #ifdef HAVE_SYS_SEM_H
#include <sys/sem.h> #include <sys/sem.h>
...@@ -306,7 +305,7 @@ InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, ...@@ -306,7 +305,7 @@ InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
if (errno == ENOSPC) if (errno == ENOSPC)
fprintf(stderr, fprintf(stderr,
"\nThis error does *not* mean that you have run out of disk space.\n\n" "\nThis error does *not* mean that you have run out of disk space.\n\n"
"It occurs either because system limit for the maximum number of\n" "It occurs because either the system limit for the maximum number of\n"
"semaphore sets (SEMMNI), or the system wide maximum number of\n" "semaphore sets (SEMMNI), or the system wide maximum number of\n"
"semaphores (SEMMNS), would be exceeded. You need to raise the\n" "semaphores (SEMMNS), would be exceeded. You need to raise the\n"
"respective kernel parameter. Look into the PostgreSQL documentation\n" "respective kernel parameter. Look into the PostgreSQL documentation\n"
...@@ -416,8 +415,8 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, bool interruptOK) ...@@ -416,8 +415,8 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, bool interruptOK)
* record acquiring the lock. (This is currently true for lockmanager * record acquiring the lock. (This is currently true for lockmanager
* locks, since the process that granted us the lock did all the * locks, since the process that granted us the lock did all the
* necessary state updates. It's not true for SysV semaphores used to * necessary state updates. It's not true for SysV semaphores used to
* emulate spinlocks --- but our performance on such platforms is so * implement LW locks or emulate spinlocks --- but the wait time for
* horrible anyway that I'm not going to worry too much about it.) * such locks should not be very long, anyway.)
*/ */
do do
{ {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.42 2001/08/25 18:52:42 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.43 2001/09/29 04:02:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/lwlock.h"
#include "storage/proc.h" #include "storage/proc.h"
#include "storage/sinval.h" #include "storage/sinval.h"
#include "storage/spin.h" #include "storage/spin.h"
...@@ -53,7 +54,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends) ...@@ -53,7 +54,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends)
size += LockShmemSize(maxBackends); size += LockShmemSize(maxBackends);
size += XLOGShmemSize(); size += XLOGShmemSize();
size += CLOGShmemSize(); size += CLOGShmemSize();
size += SLockShmemSize(); size += LWLockShmemSize();
size += SInvalShmemSize(maxBackends); size += SInvalShmemSize(maxBackends);
size += FreeSpaceShmemSize(); size += FreeSpaceShmemSize();
#ifdef STABLE_MEMORY_STORAGE #ifdef STABLE_MEMORY_STORAGE
...@@ -74,13 +75,24 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends) ...@@ -74,13 +75,24 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends)
/* /*
* First initialize spinlocks --- needed by InitShmemAllocation() * First initialize spinlocks --- needed by InitShmemAllocation()
*/ */
CreateSpinlocks(seghdr); CreateSpinlocks();
/* /*
* Set up shmem.c hashtable * Set up shared memory allocation mechanism
*/ */
InitShmemAllocation(seghdr); InitShmemAllocation(seghdr);
/*
* Now initialize LWLocks, which do shared memory allocation and
* are needed for InitShmemIndex.
*/
CreateLWLocks();
/*
* Set up shmem.c index hashtable
*/
InitShmemIndex();
/* /*
* Set up xlog, clog, and buffers * Set up xlog, clog, and buffers
*/ */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.58 2001/09/07 00:27:29 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.59 2001/09/29 04:02:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -61,8 +61,10 @@ ...@@ -61,8 +61,10 @@
#include "postgres.h" #include "postgres.h"
#include "access/transam.h" #include "access/transam.h"
#include "storage/spin.h"
#include "utils/tqual.h" #include "utils/tqual.h"
/* shared memory global variables */ /* shared memory global variables */
static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */ static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
...@@ -71,9 +73,7 @@ SHMEM_OFFSET ShmemBase; /* start address of shared memory */ ...@@ -71,9 +73,7 @@ SHMEM_OFFSET ShmemBase; /* start address of shared memory */
static SHMEM_OFFSET ShmemEnd; /* end+1 address of shared memory */ static SHMEM_OFFSET ShmemEnd; /* end+1 address of shared memory */
SPINLOCK ShmemLock; /* lock for shared memory allocation */ static slock_t *ShmemLock; /* spinlock for shared memory allocation */
SPINLOCK ShmemIndexLock; /* lock for shmem index access */
static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */ static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
...@@ -81,63 +81,33 @@ static bool ShmemBootstrap = false; /* bootstrapping shmem index? */ ...@@ -81,63 +81,33 @@ static bool ShmemBootstrap = false; /* bootstrapping shmem index? */
/* /*
* InitShmemAllocation() --- set up shared-memory allocation and index table. * InitShmemAllocation() --- set up shared-memory allocation.
*
* Note: the argument should be declared "PGShmemHeader *seghdr",
* but we use void to avoid having to include ipc.h in shmem.h.
*/ */
void void
InitShmemAllocation(PGShmemHeader *seghdr) InitShmemAllocation(void *seghdr)
{ {
HASHCTL info; PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr;
int hash_flags;
ShmemIndexEnt *result,
item;
bool found;
/* Set up basic pointers to shared memory */ /* Set up basic pointers to shared memory */
ShmemSegHdr = seghdr; ShmemSegHdr = shmhdr;
ShmemBase = (SHMEM_OFFSET) seghdr; ShmemBase = (SHMEM_OFFSET) shmhdr;
ShmemEnd = ShmemBase + seghdr->totalsize; ShmemEnd = ShmemBase + shmhdr->totalsize;
/*
* Since ShmemInitHash calls ShmemInitStruct, which expects the
* ShmemIndex hashtable to exist already, we have a bit of a
* circularity problem in initializing the ShmemIndex itself. We set
* ShmemBootstrap to tell ShmemInitStruct to fake it.
*/
ShmemIndex = (HTAB *) NULL;
ShmemBootstrap = true;
/* create the shared memory shmem index */
info.keysize = SHMEM_INDEX_KEYSIZE;
info.datasize = SHMEM_INDEX_DATASIZE;
hash_flags = HASH_ELEM;
/* This will acquire the shmem index lock, but not release it. */
ShmemIndex = ShmemInitHash("ShmemIndex",
SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
&info, hash_flags);
if (!ShmemIndex)
elog(FATAL, "InitShmemAllocation: couldn't initialize Shmem Index");
/* /*
* Now, create an entry in the hashtable for the index itself. * Initialize the spinlock used by ShmemAlloc. We have to do the
* space allocation the hard way, since ShmemAlloc can't be called yet.
*/ */
MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE); ShmemLock = (slock_t *) (((char *) shmhdr) + shmhdr->freeoffset);
strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE); shmhdr->freeoffset += MAXALIGN(sizeof(slock_t));
Assert(shmhdr->freeoffset <= shmhdr->totalsize);
result = (ShmemIndexEnt *) SpinLockInit(ShmemLock);
hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found);
if (!result)
elog(FATAL, "InitShmemAllocation: corrupted shmem index");
Assert(ShmemBootstrap && !found); /* ShmemIndex can't be set up yet (need LWLocks first) */
ShmemIndex = (HTAB *) NULL;
result->location = MAKE_OFFSET(ShmemIndex->hctl);
result->size = SHMEM_INDEX_SIZE;
ShmemBootstrap = false;
/* now release the lock acquired in ShmemInitStruct */
SpinRelease(ShmemIndexLock);
/* /*
* Initialize ShmemVariableCache for transaction manager. * Initialize ShmemVariableCache for transaction manager.
...@@ -167,9 +137,9 @@ ShmemAlloc(Size size) ...@@ -167,9 +137,9 @@ ShmemAlloc(Size size)
*/ */
size = MAXALIGN(size); size = MAXALIGN(size);
Assert(ShmemSegHdr); Assert(ShmemSegHdr != NULL);
SpinAcquire(ShmemLock); SpinLockAcquire(ShmemLock);
newFree = ShmemSegHdr->freeoffset + size; newFree = ShmemSegHdr->freeoffset + size;
if (newFree <= ShmemSegHdr->totalsize) if (newFree <= ShmemSegHdr->totalsize)
...@@ -180,7 +150,7 @@ ShmemAlloc(Size size) ...@@ -180,7 +150,7 @@ ShmemAlloc(Size size)
else else
newSpace = NULL; newSpace = NULL;
SpinRelease(ShmemLock); SpinLockRelease(ShmemLock);
if (!newSpace) if (!newSpace)
elog(NOTICE, "ShmemAlloc: out of memory"); elog(NOTICE, "ShmemAlloc: out of memory");
...@@ -199,6 +169,60 @@ ShmemIsValid(unsigned long addr) ...@@ -199,6 +169,60 @@ ShmemIsValid(unsigned long addr)
return (addr < ShmemEnd) && (addr >= ShmemBase); return (addr < ShmemEnd) && (addr >= ShmemBase);
} }
/*
* InitShmemIndex() --- set up shmem index table.
*/
void
InitShmemIndex(void)
{
HASHCTL info;
int hash_flags;
ShmemIndexEnt *result,
item;
bool found;
/*
* Since ShmemInitHash calls ShmemInitStruct, which expects the
* ShmemIndex hashtable to exist already, we have a bit of a
* circularity problem in initializing the ShmemIndex itself. We set
* ShmemBootstrap to tell ShmemInitStruct to fake it.
*/
ShmemBootstrap = true;
/* create the shared memory shmem index */
info.keysize = SHMEM_INDEX_KEYSIZE;
info.datasize = SHMEM_INDEX_DATASIZE;
hash_flags = HASH_ELEM;
/* This will acquire the shmem index lock, but not release it. */
ShmemIndex = ShmemInitHash("ShmemIndex",
SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
&info, hash_flags);
if (!ShmemIndex)
elog(FATAL, "InitShmemIndex: couldn't initialize Shmem Index");
/*
* Now, create an entry in the hashtable for the index itself.
*/
MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE);
result = (ShmemIndexEnt *)
hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found);
if (!result)
elog(FATAL, "InitShmemIndex: corrupted shmem index");
Assert(ShmemBootstrap && !found);
result->location = MAKE_OFFSET(ShmemIndex->hctl);
result->size = SHMEM_INDEX_SIZE;
ShmemBootstrap = false;
/* now release the lock acquired in ShmemInitStruct */
LWLockRelease(ShmemIndexLock);
}
/* /*
* ShmemInitHash -- Create/Attach to and initialize * ShmemInitHash -- Create/Attach to and initialize
* shared memory hash table. * shared memory hash table.
...@@ -207,8 +231,7 @@ ShmemIsValid(unsigned long addr) ...@@ -207,8 +231,7 @@ ShmemIsValid(unsigned long addr)
* *
* assume caller is doing some kind of synchronization * assume caller is doing some kind of synchronization
* so that two people dont try to create/initialize the * so that two people dont try to create/initialize the
* table at once. Use SpinAlloc() to create a spinlock * table at once.
* for the structure before creating the structure itself.
*/ */
HTAB * HTAB *
ShmemInitHash(char *name, /* table string name for shmem index */ ShmemInitHash(char *name, /* table string name for shmem index */
...@@ -283,7 +306,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -283,7 +306,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
strncpy(item.key, name, SHMEM_INDEX_KEYSIZE); strncpy(item.key, name, SHMEM_INDEX_KEYSIZE);
item.location = BAD_LOCATION; item.location = BAD_LOCATION;
SpinAcquire(ShmemIndexLock); LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
if (!ShmemIndex) if (!ShmemIndex)
{ {
...@@ -306,7 +329,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -306,7 +329,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
if (!result) if (!result)
{ {
SpinRelease(ShmemIndexLock); LWLockRelease(ShmemIndexLock);
elog(ERROR, "ShmemInitStruct: Shmem Index corrupted"); elog(ERROR, "ShmemInitStruct: Shmem Index corrupted");
return NULL; return NULL;
} }
...@@ -320,7 +343,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -320,7 +343,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
*/ */
if (result->size != size) if (result->size != size)
{ {
SpinRelease(ShmemIndexLock); LWLockRelease(ShmemIndexLock);
elog(NOTICE, "ShmemInitStruct: ShmemIndex entry size is wrong"); elog(NOTICE, "ShmemInitStruct: ShmemIndex entry size is wrong");
/* let caller print its message too */ /* let caller print its message too */
...@@ -337,7 +360,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -337,7 +360,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
/* out of memory */ /* out of memory */
Assert(ShmemIndex); Assert(ShmemIndex);
hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, foundPtr); hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, foundPtr);
SpinRelease(ShmemIndexLock); LWLockRelease(ShmemIndexLock);
*foundPtr = FALSE; *foundPtr = FALSE;
elog(NOTICE, "ShmemInitStruct: cannot allocate '%s'", elog(NOTICE, "ShmemInitStruct: cannot allocate '%s'",
...@@ -349,6 +372,6 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -349,6 +372,6 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
} }
Assert(ShmemIsValid((unsigned long) structPtr)); Assert(ShmemIsValid((unsigned long) structPtr));
SpinRelease(ShmemIndexLock); LWLockRelease(ShmemIndexLock);
return structPtr; return structPtr;
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.40 2001/08/26 16:56:00 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.41 2001/09/29 04:02:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,8 +23,6 @@ ...@@ -23,8 +23,6 @@
#include "miscadmin.h" #include "miscadmin.h"
SPINLOCK SInvalLock = (SPINLOCK) NULL;
/****************************************************************************/ /****************************************************************************/
/* CreateSharedInvalidationState() Initialize SI buffer */ /* CreateSharedInvalidationState() Initialize SI buffer */
/* */ /* */
...@@ -33,7 +31,7 @@ SPINLOCK SInvalLock = (SPINLOCK) NULL; ...@@ -33,7 +31,7 @@ SPINLOCK SInvalLock = (SPINLOCK) NULL;
void void
CreateSharedInvalidationState(int maxBackends) CreateSharedInvalidationState(int maxBackends)
{ {
/* SInvalLock must be initialized already, during spinlock init */ /* SInvalLock must be initialized already, during LWLock init */
SIBufferInit(maxBackends); SIBufferInit(maxBackends);
} }
...@@ -46,9 +44,9 @@ InitBackendSharedInvalidationState(void) ...@@ -46,9 +44,9 @@ InitBackendSharedInvalidationState(void)
{ {
int flag; int flag;
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
flag = SIBackendInit(shmInvalBuffer); flag = SIBackendInit(shmInvalBuffer);
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
if (flag < 0) /* unexpected problem */ if (flag < 0) /* unexpected problem */
elog(FATAL, "Backend cache invalidation initialization failed"); elog(FATAL, "Backend cache invalidation initialization failed");
if (flag == 0) /* expected problem: MaxBackends exceeded */ if (flag == 0) /* expected problem: MaxBackends exceeded */
...@@ -64,9 +62,9 @@ SendSharedInvalidMessage(SharedInvalidationMessage *msg) ...@@ -64,9 +62,9 @@ SendSharedInvalidMessage(SharedInvalidationMessage *msg)
{ {
bool insertOK; bool insertOK;
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
insertOK = SIInsertDataEntry(shmInvalBuffer, msg); insertOK = SIInsertDataEntry(shmInvalBuffer, msg);
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
if (!insertOK) if (!insertOK)
elog(DEBUG, "SendSharedInvalidMessage: SI buffer overflow"); elog(DEBUG, "SendSharedInvalidMessage: SI buffer overflow");
} }
...@@ -86,9 +84,25 @@ ReceiveSharedInvalidMessages( ...@@ -86,9 +84,25 @@ ReceiveSharedInvalidMessages(
for (;;) for (;;)
{ {
SpinAcquire(SInvalLock); /*
* We can run SIGetDataEntry in parallel with other backends running
* SIGetDataEntry for themselves, since each instance will modify
* only fields of its own backend's ProcState, and no instance will
* look at fields of other backends' ProcStates. We express this
* by grabbing SInvalLock in shared mode. Note that this is not
* exactly the normal (read-only) interpretation of a shared lock!
* Look closely at the interactions before allowing SInvalLock to
* be grabbed in shared mode for any other reason!
*
* The routines later in this file that use shared mode are okay
* with this, because they aren't looking at the ProcState fields
* associated with SI message transfer; they only use the ProcState
* array as an easy way to find all the PROC structures.
*/
LWLockAcquire(SInvalLock, LW_SHARED);
getResult = SIGetDataEntry(shmInvalBuffer, MyBackendId, &data); getResult = SIGetDataEntry(shmInvalBuffer, MyBackendId, &data);
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
if (getResult == 0) if (getResult == 0)
break; /* nothing more to do */ break; /* nothing more to do */
if (getResult < 0) if (getResult < 0)
...@@ -108,9 +122,9 @@ ReceiveSharedInvalidMessages( ...@@ -108,9 +122,9 @@ ReceiveSharedInvalidMessages(
/* If we got any messages, try to release dead messages */ /* If we got any messages, try to release dead messages */
if (gotMessage) if (gotMessage)
{ {
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
SIDelExpiredDataEntries(shmInvalBuffer); SIDelExpiredDataEntries(shmInvalBuffer);
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
} }
} }
...@@ -149,7 +163,7 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself) ...@@ -149,7 +163,7 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself)
ProcState *stateP = segP->procState; ProcState *stateP = segP->procState;
int index; int index;
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_SHARED);
for (index = 0; index < segP->lastBackend; index++) for (index = 0; index < segP->lastBackend; index++)
{ {
...@@ -170,7 +184,7 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself) ...@@ -170,7 +184,7 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself)
} }
} }
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
return result; return result;
} }
...@@ -186,7 +200,7 @@ TransactionIdIsInProgress(TransactionId xid) ...@@ -186,7 +200,7 @@ TransactionIdIsInProgress(TransactionId xid)
ProcState *stateP = segP->procState; ProcState *stateP = segP->procState;
int index; int index;
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_SHARED);
for (index = 0; index < segP->lastBackend; index++) for (index = 0; index < segP->lastBackend; index++)
{ {
...@@ -206,7 +220,7 @@ TransactionIdIsInProgress(TransactionId xid) ...@@ -206,7 +220,7 @@ TransactionIdIsInProgress(TransactionId xid)
} }
} }
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
return result; return result;
} }
...@@ -237,7 +251,7 @@ GetOldestXmin(bool allDbs) ...@@ -237,7 +251,7 @@ GetOldestXmin(bool allDbs)
result = GetCurrentTransactionId(); result = GetCurrentTransactionId();
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_SHARED);
for (index = 0; index < segP->lastBackend; index++) for (index = 0; index < segP->lastBackend; index++)
{ {
...@@ -265,7 +279,7 @@ GetOldestXmin(bool allDbs) ...@@ -265,7 +279,7 @@ GetOldestXmin(bool allDbs)
} }
} }
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
return result; return result;
} }
...@@ -298,7 +312,7 @@ GetSnapshotData(bool serializable) ...@@ -298,7 +312,7 @@ GetSnapshotData(bool serializable)
snapshot->xmin = GetCurrentTransactionId(); snapshot->xmin = GetCurrentTransactionId();
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_SHARED);
/* /*
* There can be no more than lastBackend active transactions, so this * There can be no more than lastBackend active transactions, so this
...@@ -307,15 +321,12 @@ GetSnapshotData(bool serializable) ...@@ -307,15 +321,12 @@ GetSnapshotData(bool serializable)
snapshot->xip = (TransactionId *) snapshot->xip = (TransactionId *)
malloc(segP->lastBackend * sizeof(TransactionId)); malloc(segP->lastBackend * sizeof(TransactionId));
if (snapshot->xip == NULL) if (snapshot->xip == NULL)
{
SpinRelease(SInvalLock);
elog(ERROR, "Memory exhausted in GetSnapshotData"); elog(ERROR, "Memory exhausted in GetSnapshotData");
}
/*-------------------- /*--------------------
* Unfortunately, we have to call ReadNewTransactionId() after acquiring * Unfortunately, we have to call ReadNewTransactionId() after acquiring
* SInvalLock above. It's not good because ReadNewTransactionId() does * SInvalLock above. It's not good because ReadNewTransactionId() does
* SpinAcquire(XidGenLockId), but *necessary*. We need to be sure that * LWLockAcquire(XidGenLock), but *necessary*. We need to be sure that
* no transactions exit the set of currently-running transactions * no transactions exit the set of currently-running transactions
* between the time we fetch xmax and the time we finish building our * between the time we fetch xmax and the time we finish building our
* snapshot. Otherwise we could have a situation like this: * snapshot. Otherwise we could have a situation like this:
...@@ -373,7 +384,7 @@ GetSnapshotData(bool serializable) ...@@ -373,7 +384,7 @@ GetSnapshotData(bool serializable)
if (serializable) if (serializable)
MyProc->xmin = snapshot->xmin; MyProc->xmin = snapshot->xmin;
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
/* Serializable snapshot must be computed before any other... */ /* Serializable snapshot must be computed before any other... */
Assert(TransactionIdIsValid(MyProc->xmin)); Assert(TransactionIdIsValid(MyProc->xmin));
...@@ -439,7 +450,7 @@ GetUndoRecPtr(void) ...@@ -439,7 +450,7 @@ GetUndoRecPtr(void)
XLogRecPtr tempr; XLogRecPtr tempr;
int index; int index;
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_SHARED);
for (index = 0; index < segP->lastBackend; index++) for (index = 0; index < segP->lastBackend; index++)
{ {
...@@ -458,7 +469,7 @@ GetUndoRecPtr(void) ...@@ -458,7 +469,7 @@ GetUndoRecPtr(void)
} }
} }
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
return (urec); return (urec);
} }
...@@ -470,7 +481,7 @@ GetUndoRecPtr(void) ...@@ -470,7 +481,7 @@ GetUndoRecPtr(void)
* knows that the backend isn't going to go away, so we do not bother with * knows that the backend isn't going to go away, so we do not bother with
* locking. * locking.
*/ */
struct proc * struct PROC *
BackendIdGetProc(BackendId procId) BackendIdGetProc(BackendId procId)
{ {
SISeg *segP = shmInvalBuffer; SISeg *segP = shmInvalBuffer;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.40 2001/06/19 19:42:15 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.41 2001/09/29 04:02:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -83,7 +83,7 @@ SIBufferInit(int maxBackends) ...@@ -83,7 +83,7 @@ SIBufferInit(int maxBackends)
* <0 Some other failure (not currently used) * <0 Some other failure (not currently used)
* *
* NB: this routine, and all following ones, must be executed with the * NB: this routine, and all following ones, must be executed with the
* SInvalLock spinlock held, since there may be multiple backends trying * SInvalLock lock held, since there may be multiple backends trying
* to access the buffer. * to access the buffer.
*/ */
int int
...@@ -152,7 +152,7 @@ CleanupInvalidationState(int status, Datum arg) ...@@ -152,7 +152,7 @@ CleanupInvalidationState(int status, Datum arg)
Assert(PointerIsValid(segP)); Assert(PointerIsValid(segP));
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
/* Mark myself inactive */ /* Mark myself inactive */
segP->procState[MyBackendId - 1].nextMsgNum = -1; segP->procState[MyBackendId - 1].nextMsgNum = -1;
...@@ -167,7 +167,7 @@ CleanupInvalidationState(int status, Datum arg) ...@@ -167,7 +167,7 @@ CleanupInvalidationState(int status, Datum arg)
} }
segP->lastBackend = i; segP->lastBackend = i;
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
} }
/* /*
...@@ -267,6 +267,10 @@ SISetProcStateInvalid(SISeg *segP) ...@@ -267,6 +267,10 @@ SISetProcStateInvalid(SISeg *segP)
* 1: next SI message has been extracted into *data * 1: next SI message has been extracted into *data
* (there may be more messages available after this one!) * (there may be more messages available after this one!)
* -1: SI reset message extracted * -1: SI reset message extracted
*
* NB: this can run in parallel with other instances of SIGetDataEntry
* executing on behalf of other backends. See comments in sinval.c in
* ReceiveSharedInvalidMessages().
*/ */
int int
SIGetDataEntry(SISeg *segP, int backendId, SIGetDataEntry(SISeg *segP, int backendId,
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
# Makefile for storage/lmgr # Makefile for storage/lmgr
# #
# IDENTIFICATION # IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Makefile,v 1.16 2001/09/27 19:10:02 tgl Exp $ # $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Makefile,v 1.17 2001/09/29 04:02:24 tgl Exp $
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
...@@ -12,7 +12,7 @@ subdir = src/backend/storage/lmgr ...@@ -12,7 +12,7 @@ subdir = src/backend/storage/lmgr
top_builddir = ../../../.. top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
OBJS = lmgr.o lock.o proc.o deadlock.o spin.o s_lock.o OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o
all: SUBSYS.o all: SUBSYS.o
......
$Header: /cvsroot/pgsql/src/backend/storage/lmgr/README,v 1.8 2001/01/26 18:23:12 tgl Exp $ $Header: /cvsroot/pgsql/src/backend/storage/lmgr/README,v 1.9 2001/09/29 04:02:24 tgl Exp $
LOCKING OVERVIEW
Postgres uses three types of interprocess locks:
* Spinlocks. These are intended for *very* short-term locks. If a lock
is to be held more than a few dozen instructions, or across any sort of
kernel call (or even a call to a nontrivial subroutine), don't use a spinlock.
Spinlocks are primarily used as infrastructure for lightweight locks.
They are implemented using a hardware atomic-test-and-set instruction,
if available. Waiting processes busy-loop until they can get the lock.
There is no provision for deadlock detection, automatic release on error,
or any other nicety. There is a timeout if the lock cannot be gotten after
a minute or so (which is approximately forever in comparison to the intended
lock hold time, so this is certainly an error condition).
* Lightweight locks (LWLocks). These locks are typically used to interlock
access to datastructures in shared memory. LWLocks support both exclusive
and shared lock modes (for read/write and read-only access to a shared object).
There is no provision for deadlock detection, but the LWLock manager will
automatically release held LWLocks during elog() recovery, so it is safe to
raise an error while holding LWLocks. Obtaining or releasing an LWLock is
quite fast (a few dozen instructions) when there is no contention for the
lock. When a process has to wait for an LWLock, it blocks on a SysV semaphore
so as to not consume CPU time. Waiting processes will be granted the lock
in arrival order. There is no timeout.
* Regular locks (a/k/a heavyweight locks). The regular lock manager supports
a variety of lock modes with table-driven semantics, and it has full deadlock
detection and automatic release at transaction end. Regular locks should be
used for all user-driven lock requests.
Acquisition of either a spinlock or a lightweight lock causes query cancel
and die() interrupts to be held off until all such locks are released.
No such restriction exists for regular locks, however. Also note that we
can accept query cancel and die() interrupts while waiting for a regular
lock, but we will not accept them while waiting for spinlocks or LW locks.
It is therefore not a good idea to use LW locks when the wait time might
exceed a few seconds.
The rest of this README file discusses the regular lock manager in detail.
LOCK DATA STRUCTURES
There are two fundamental lock structures: the per-lockable-object LOCK There are two fundamental lock structures: the per-lockable-object LOCK
struct, and the per-lock-holder HOLDER struct. A LOCK object exists struct, and the per-lock-holder HOLDER struct. A LOCK object exists
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.3 2001/03/22 03:59:46 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.4 2001/09/29 04:02:24 tgl Exp $
* *
* Interface: * Interface:
* *
...@@ -172,8 +172,8 @@ InitDeadLockChecking(void) ...@@ -172,8 +172,8 @@ InitDeadLockChecking(void)
* *
* We must have already locked the master lock before being called. * We must have already locked the master lock before being called.
* NOTE: although the lockctl structure appears to allow each lock * NOTE: although the lockctl structure appears to allow each lock
* table to have a different spinlock, all locks that can block had * table to have a different LWLock, all locks that can block had
* better use the same spinlock, else this code will not be adequately * better use the same LWLock, else this code will not be adequately
* interlocked! * interlocked!
*/ */
bool bool
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.95 2001/09/27 16:29:12 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.96 2001/09/29 04:02:24 tgl Exp $
* *
* NOTES * NOTES
* Outside modules can create a lock table and acquire/release * Outside modules can create a lock table and acquire/release
...@@ -78,8 +78,8 @@ static char *lock_mode_names[] = ...@@ -78,8 +78,8 @@ static char *lock_mode_names[] =
* TRACE_LOCK_TABLE -- trace locks on this table (oid) unconditionally * TRACE_LOCK_TABLE -- trace locks on this table (oid) unconditionally
* DEBUG_DEADLOCKS -- currently dumps locks at untimely occasions ;) * DEBUG_DEADLOCKS -- currently dumps locks at untimely occasions ;)
* *
* Furthermore, but in storage/ipc/spin.c: * Furthermore, but in storage/lmgr/lwlock.c:
* TRACE_SPINLOCKS -- trace spinlocks (pretty useless) * TRACE_LWLOCKS -- trace lightweight locks (pretty useless)
* *
* Define LOCK_DEBUG at compile time to get all these enabled. * Define LOCK_DEBUG at compile time to get all these enabled.
* -------- * --------
...@@ -151,10 +151,6 @@ HOLDER_PRINT(const char *where, const HOLDER *holderP) ...@@ -151,10 +151,6 @@ HOLDER_PRINT(const char *where, const HOLDER *holderP)
#endif /* not LOCK_DEBUG */ #endif /* not LOCK_DEBUG */
SPINLOCK LockMgrLock; /* in Shmem or created in
* CreateSpinlocks() */
/* /*
* These are to simplify/speed up some bit arithmetic. * These are to simplify/speed up some bit arithmetic.
* *
...@@ -230,12 +226,6 @@ LockMethodInit(LOCKMETHODTABLE *lockMethodTable, ...@@ -230,12 +226,6 @@ LockMethodInit(LOCKMETHODTABLE *lockMethodTable,
/* /*
* LockMethodTableInit -- initialize a lock table structure * LockMethodTableInit -- initialize a lock table structure
* *
* Notes:
* (a) a lock table has four separate entries in the shmem index
* table. This is because every shared hash table and spinlock
* has its name stored in the shmem index at its creation. It
* is wasteful, in this case, but not much space is involved.
*
* NOTE: data structures allocated here are allocated permanently, using * NOTE: data structures allocated here are allocated permanently, using
* TopMemoryContext and shared memory. We don't ever release them anyway, * TopMemoryContext and shared memory. We don't ever release them anyway,
* and in normal multi-backend operation the lock table structures set up * and in normal multi-backend operation the lock table structures set up
...@@ -277,9 +267,9 @@ LockMethodTableInit(char *tabName, ...@@ -277,9 +267,9 @@ LockMethodTableInit(char *tabName,
MemoryContextAlloc(TopMemoryContext, sizeof(LOCKMETHODTABLE)); MemoryContextAlloc(TopMemoryContext, sizeof(LOCKMETHODTABLE));
/* /*
* find/acquire the spinlock for the table * Lock the LWLock for the table (probably not necessary here)
*/ */
SpinAcquire(LockMgrLock); LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
/* /*
* allocate a control structure from shared memory or attach to it if * allocate a control structure from shared memory or attach to it if
...@@ -356,7 +346,7 @@ LockMethodTableInit(char *tabName, ...@@ -356,7 +346,7 @@ LockMethodTableInit(char *tabName,
/* init ctl data structures */ /* init ctl data structures */
LockMethodInit(lockMethodTable, conflictsP, prioP, numModes); LockMethodInit(lockMethodTable, conflictsP, prioP, numModes);
SpinRelease(LockMgrLock); LWLockRelease(LockMgrLock);
pfree(shmemName); pfree(shmemName);
...@@ -464,7 +454,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -464,7 +454,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HTAB *holderTable; HTAB *holderTable;
bool found; bool found;
LOCK *lock; LOCK *lock;
SPINLOCK masterLock; LWLockId masterLock;
LOCKMETHODTABLE *lockMethodTable; LOCKMETHODTABLE *lockMethodTable;
int status; int status;
int myHolding[MAX_LOCKMODES]; int myHolding[MAX_LOCKMODES];
...@@ -489,7 +479,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -489,7 +479,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
masterLock = lockMethodTable->ctl->masterLock; masterLock = lockMethodTable->ctl->masterLock;
SpinAcquire(masterLock); LWLockAcquire(masterLock, LW_EXCLUSIVE);
/* /*
* Find or create a lock with this tag * Find or create a lock with this tag
...@@ -499,7 +489,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -499,7 +489,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HASH_ENTER, &found); HASH_ENTER, &found);
if (!lock) if (!lock)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(FATAL, "LockAcquire: lock table %d is corrupted", lockmethod); elog(FATAL, "LockAcquire: lock table %d is corrupted", lockmethod);
return FALSE; return FALSE;
} }
...@@ -544,7 +534,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -544,7 +534,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HASH_ENTER, &found); HASH_ENTER, &found);
if (!holder) if (!holder)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(FATAL, "LockAcquire: holder table corrupted"); elog(FATAL, "LockAcquire: holder table corrupted");
return FALSE; return FALSE;
} }
...@@ -617,7 +607,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -617,7 +607,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
{ {
GrantLock(lock, holder, lockmode); GrantLock(lock, holder, lockmode);
HOLDER_PRINT("LockAcquire: owning", holder); HOLDER_PRINT("LockAcquire: owning", holder);
SpinRelease(masterLock); LWLockRelease(masterLock);
return TRUE; return TRUE;
} }
...@@ -630,7 +620,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -630,7 +620,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
{ {
GrantLock(lock, holder, lockmode); GrantLock(lock, holder, lockmode);
HOLDER_PRINT("LockAcquire: my other XID owning", holder); HOLDER_PRINT("LockAcquire: my other XID owning", holder);
SpinRelease(masterLock); LWLockRelease(masterLock);
return TRUE; return TRUE;
} }
...@@ -677,7 +667,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -677,7 +667,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode); LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode);
Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0)); Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0));
Assert(lock->nGranted <= lock->nRequested); Assert(lock->nGranted <= lock->nRequested);
SpinRelease(masterLock); LWLockRelease(masterLock);
return FALSE; return FALSE;
} }
...@@ -719,14 +709,14 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -719,14 +709,14 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HOLDER_PRINT("LockAcquire: INCONSISTENT", holder); HOLDER_PRINT("LockAcquire: INCONSISTENT", holder);
LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
/* Should we retry ? */ /* Should we retry ? */
SpinRelease(masterLock); LWLockRelease(masterLock);
return FALSE; return FALSE;
} }
HOLDER_PRINT("LockAcquire: granted", holder); HOLDER_PRINT("LockAcquire: granted", holder);
LOCK_PRINT("LockAcquire: granted", lock, lockmode); LOCK_PRINT("LockAcquire: granted", lock, lockmode);
} }
SpinRelease(masterLock); LWLockRelease(masterLock);
return status == STATUS_OK; return status == STATUS_OK;
} }
...@@ -879,7 +869,7 @@ GrantLock(LOCK *lock, HOLDER *holder, LOCKMODE lockmode) ...@@ -879,7 +869,7 @@ GrantLock(LOCK *lock, HOLDER *holder, LOCKMODE lockmode)
* Caller must have set MyProc->heldLocks to reflect locks already held * Caller must have set MyProc->heldLocks to reflect locks already held
* on the lockable object by this process (under all XIDs). * on the lockable object by this process (under all XIDs).
* *
* The locktable spinlock must be held at entry. * The locktable's masterLock must be held at entry.
*/ */
static int static int
WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode, WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode,
...@@ -925,7 +915,7 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode, ...@@ -925,7 +915,7 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode,
* needed, will happen in xact cleanup (see above for motivation). * needed, will happen in xact cleanup (see above for motivation).
*/ */
LOCK_PRINT("WaitOnLock: aborting on lock", lock, lockmode); LOCK_PRINT("WaitOnLock: aborting on lock", lock, lockmode);
SpinRelease(lockMethodTable->ctl->masterLock); LWLockRelease(lockMethodTable->ctl->masterLock);
elog(ERROR, "deadlock detected"); elog(ERROR, "deadlock detected");
/* not reached */ /* not reached */
} }
...@@ -998,7 +988,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -998,7 +988,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
TransactionId xid, LOCKMODE lockmode) TransactionId xid, LOCKMODE lockmode)
{ {
LOCK *lock; LOCK *lock;
SPINLOCK masterLock; LWLockId masterLock;
bool found; bool found;
LOCKMETHODTABLE *lockMethodTable; LOCKMETHODTABLE *lockMethodTable;
HOLDER *holder; HOLDER *holder;
...@@ -1023,7 +1013,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1023,7 +1013,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
} }
masterLock = lockMethodTable->ctl->masterLock; masterLock = lockMethodTable->ctl->masterLock;
SpinAcquire(masterLock); LWLockAcquire(masterLock, LW_EXCLUSIVE);
/* /*
* Find a lock with this tag * Find a lock with this tag
...@@ -1038,14 +1028,14 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1038,14 +1028,14 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
*/ */
if (!lock) if (!lock)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockRelease: locktable corrupted"); elog(NOTICE, "LockRelease: locktable corrupted");
return FALSE; return FALSE;
} }
if (!found) if (!found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockRelease: no such lock"); elog(NOTICE, "LockRelease: no such lock");
return FALSE; return FALSE;
} }
...@@ -1065,7 +1055,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1065,7 +1055,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HASH_FIND_SAVE, &found); HASH_FIND_SAVE, &found);
if (!holder || !found) if (!holder || !found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
#ifdef USER_LOCKS #ifdef USER_LOCKS
if (!found && lockmethod == USER_LOCKMETHOD) if (!found && lockmethod == USER_LOCKMETHOD)
elog(NOTICE, "LockRelease: no lock with this tag"); elog(NOTICE, "LockRelease: no lock with this tag");
...@@ -1084,7 +1074,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1084,7 +1074,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
{ {
HOLDER_PRINT("LockRelease: WRONGTYPE", holder); HOLDER_PRINT("LockRelease: WRONGTYPE", holder);
Assert(holder->holding[lockmode] >= 0); Assert(holder->holding[lockmode] >= 0);
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockRelease: you don't own a lock of type %s", elog(NOTICE, "LockRelease: you don't own a lock of type %s",
lock_mode_names[lockmode]); lock_mode_names[lockmode]);
return FALSE; return FALSE;
...@@ -1139,7 +1129,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1139,7 +1129,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
&found); &found);
if (!lock || !found) if (!lock || !found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockRelease: remove lock, table corrupted"); elog(NOTICE, "LockRelease: remove lock, table corrupted");
return FALSE; return FALSE;
} }
...@@ -1167,7 +1157,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1167,7 +1157,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HASH_REMOVE_SAVED, &found); HASH_REMOVE_SAVED, &found);
if (!holder || !found) if (!holder || !found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockRelease: remove holder, table corrupted"); elog(NOTICE, "LockRelease: remove holder, table corrupted");
return FALSE; return FALSE;
} }
...@@ -1179,7 +1169,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1179,7 +1169,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
if (wakeupNeeded) if (wakeupNeeded)
ProcLockWakeup(lockMethodTable, lock); ProcLockWakeup(lockMethodTable, lock);
SpinRelease(masterLock); LWLockRelease(masterLock);
return TRUE; return TRUE;
} }
...@@ -1201,7 +1191,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, ...@@ -1201,7 +1191,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc,
SHM_QUEUE *procHolders = &(proc->procHolders); SHM_QUEUE *procHolders = &(proc->procHolders);
HOLDER *holder; HOLDER *holder;
HOLDER *nextHolder; HOLDER *nextHolder;
SPINLOCK masterLock; LWLockId masterLock;
LOCKMETHODTABLE *lockMethodTable; LOCKMETHODTABLE *lockMethodTable;
int i, int i,
numLockModes; numLockModes;
...@@ -1225,7 +1215,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, ...@@ -1225,7 +1215,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc,
numLockModes = lockMethodTable->ctl->numLockModes; numLockModes = lockMethodTable->ctl->numLockModes;
masterLock = lockMethodTable->ctl->masterLock; masterLock = lockMethodTable->ctl->masterLock;
SpinAcquire(masterLock); LWLockAcquire(masterLock, LW_EXCLUSIVE);
holder = (HOLDER *) SHMQueueNext(procHolders, procHolders, holder = (HOLDER *) SHMQueueNext(procHolders, procHolders,
offsetof(HOLDER, procLink)); offsetof(HOLDER, procLink));
...@@ -1321,7 +1311,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, ...@@ -1321,7 +1311,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc,
&found); &found);
if (!holder || !found) if (!holder || !found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockReleaseAll: holder table corrupted"); elog(NOTICE, "LockReleaseAll: holder table corrupted");
return FALSE; return FALSE;
} }
...@@ -1340,7 +1330,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, ...@@ -1340,7 +1330,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc,
HASH_REMOVE, &found); HASH_REMOVE, &found);
if (!lock || !found) if (!lock || !found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockReleaseAll: cannot remove lock from HTAB"); elog(NOTICE, "LockReleaseAll: cannot remove lock from HTAB");
return FALSE; return FALSE;
} }
...@@ -1352,7 +1342,7 @@ next_item: ...@@ -1352,7 +1342,7 @@ next_item:
holder = nextHolder; holder = nextHolder;
} }
SpinRelease(masterLock); LWLockRelease(masterLock);
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
if (lockmethod == USER_LOCKMETHOD ? Trace_userlocks : Trace_locks) if (lockmethod == USER_LOCKMETHOD ? Trace_userlocks : Trace_locks)
......
This diff is collapsed.
This diff is collapsed.
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* s_lock.c * s_lock.c
* Spinlock support routines * Hardware-dependent implementation of spinlocks.
*
* *
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/s_lock.c,v 1.1 2001/09/27 19:10:02 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/s_lock.c,v 1.2 2001/09/29 04:02:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -17,49 +18,14 @@ ...@@ -17,49 +18,14 @@
#include <sys/time.h> #include <sys/time.h>
#include <unistd.h> #include <unistd.h>
#include "miscadmin.h"
#include "storage/s_lock.h" #include "storage/s_lock.h"
/*----------
* Each time we busy spin we select the next element of this array as the
* number of microseconds to wait. This accomplishes pseudo random back-off.
*
* Note that on most platforms, specified values will be rounded up to the
* next multiple of a clock tick, which is often ten milliseconds (10000).
* So, we are being way overoptimistic to assume that these different values
* are really different, other than the last. But there are a few platforms
* with better-than-usual timekeeping, and on these we will get pretty good
* pseudo-random behavior.
*
* Total time to cycle through all 20 entries will be at least 100 msec,
* more commonly (10 msec resolution) 220 msec, and on some platforms
* as much as 420 msec (when the remainder of the current tick cycle is
* ignored in deciding when to time out, as on FreeBSD and older Linuxen).
* We use the 100msec figure to figure max_spins, so actual timeouts may
* be as much as four times the nominal value, but will never be less.
*----------
*/
#define S_NSPINCYCLE 20
int s_spincycle[S_NSPINCYCLE] =
{1, 10, 100, 1000,
10000, 1000, 1000, 1000,
10000, 1000, 1000, 10000,
1000, 1000, 10000, 1000,
10000, 1000, 10000, 30000
};
#define AVG_SPINCYCLE 5000 /* average entry in microsec: 100ms / 20 */
#define DEFAULT_TIMEOUT (100*1000000) /* default timeout: 100 sec */
/* /*
* s_lock_stuck() - complain about a stuck spinlock * s_lock_stuck() - complain about a stuck spinlock
*/ */
static void static void
s_lock_stuck(volatile slock_t *lock, const char *file, const int line) s_lock_stuck(volatile slock_t *lock, const char *file, int line)
{ {
fprintf(stderr, fprintf(stderr,
"\nFATAL: s_lock(%p) at %s:%d, stuck spinlock. Aborting.\n", "\nFATAL: s_lock(%p) at %s:%d, stuck spinlock. Aborting.\n",
...@@ -72,69 +38,41 @@ s_lock_stuck(volatile slock_t *lock, const char *file, const int line) ...@@ -72,69 +38,41 @@ s_lock_stuck(volatile slock_t *lock, const char *file, const int line)
/* /*
* s_lock_sleep() - sleep a pseudo-random amount of time, check for timeout * s_lock(lock) - platform-independent portion of waiting for a spinlock.
*
* The 'timeout' is given in microsec, or may be 0 for "infinity". Note that
* this will be a lower bound (a fairly loose lower bound, on most platforms).
*
* 'microsec' is the number of microsec to delay per loop. Normally
* 'microsec' is 0, specifying to use the next s_spincycle[] value.
* Some callers may pass a nonzero interval, specifying to use exactly that
* delay value rather than a pseudo-random delay.
*/ */
void void
s_lock_sleep(unsigned spins, int timeout, int microsec, s_lock(volatile slock_t *lock, const char *file, int line)
volatile slock_t *lock,
const char *file, const int line)
{
struct timeval delay;
if (microsec > 0)
{
delay.tv_sec = microsec / 1000000;
delay.tv_usec = microsec % 1000000;
}
else
{
delay.tv_sec = 0;
delay.tv_usec = s_spincycle[spins % S_NSPINCYCLE];
microsec = AVG_SPINCYCLE; /* use average to figure timeout */
}
if (timeout > 0)
{
unsigned max_spins = timeout / microsec;
if (spins > max_spins)
s_lock_stuck(lock, file, line);
}
(void) select(0, NULL, NULL, NULL, &delay);
}
/*
* s_lock(lock) - take a spinlock with backoff
*/
void
s_lock(volatile slock_t *lock, const char *file, const int line)
{ {
unsigned spins = 0; unsigned spins = 0;
unsigned delays = 0;
struct timeval delay;
/* /*
* If you are thinking of changing this code, be careful. This same * We loop tightly for awhile, then delay using select() and try again.
* loop logic is used in other places that call TAS() directly. * Preferably, "awhile" should be a small multiple of the maximum time
* we expect a spinlock to be held. 100 iterations seems about right.
* *
* While waiting for a lock, we check for cancel/die interrupts (which is * We use a 10 millisec select delay because that is the lower limit on
* a no-op if we are inside a critical section). The interrupt check * many platforms. The timeout is figured on this delay only, and so the
* can be omitted in places that know they are inside a critical * nominal 1 minute is a lower bound.
* section. Note that an interrupt must NOT be accepted after
* acquiring the lock.
*/ */
#define SPINS_PER_DELAY 100
#define DELAY_MSEC 10
#define TIMEOUT_MSEC (60 * 1000)
while (TAS(lock)) while (TAS(lock))
{ {
s_lock_sleep(spins++, DEFAULT_TIMEOUT, 0, lock, file, line); if (++spins > SPINS_PER_DELAY)
CHECK_FOR_INTERRUPTS(); {
if (++delays > (TIMEOUT_MSEC / DELAY_MSEC))
s_lock_stuck(lock, file, line);
delay.tv_sec = 0;
delay.tv_usec = DELAY_MSEC * 1000;
(void) select(0, NULL, NULL, NULL, &delay);
spins = 0;
}
} }
} }
......
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* spin.c * spin.c
* routines for managing spin locks * Hardware-independent implementation of spinlocks.
*
*
* For machines that have test-and-set (TAS) instructions, s_lock.h/.c
* define the spinlock implementation. This file contains only a stub
* implementation for spinlocks using SysV semaphores. The semaphore method
* is too slow to be very useful :-(
* *
* POSTGRES has two kinds of locks: semaphores (which put the
* process to sleep) and spinlocks (which are supposed to be
* short term locks). Spinlocks are implemented via test-and-set (TAS)
* instructions if possible, else via semaphores. The semaphore method
* is too slow to be useful :-(
* *
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.1 2001/09/27 19:10:02 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.2 2001/09/29 04:02:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
#include <errno.h> #include <errno.h>
#if !defined(HAS_TEST_AND_SET) && defined(HAVE_SYS_SEM_H) #ifdef HAVE_SYS_SEM_H
#include <sys/sem.h> #include <sys/sem.h>
#endif #endif
#include "miscadmin.h" #include "storage/lwlock.h"
#include "storage/proc.h" #include "storage/proc.h"
#include "storage/s_lock.h" #include "storage/spin.h"
/* Probably should move these to an appropriate header file */
extern SPINLOCK BufMgrLock;
extern SPINLOCK OidGenLockId;
extern SPINLOCK XidGenLockId;
extern SPINLOCK ControlFileLockId;
extern SPINLOCK ShmemLock;
extern SPINLOCK ShmemIndexLock;
extern SPINLOCK LockMgrLock;
extern SPINLOCK SInvalLock;
extern SPINLOCK ProcStructLock;
extern SPINLOCK FreeSpaceLock;
#ifdef STABLE_MEMORY_STORAGE
extern SPINLOCK MMCacheLock;
#endif
/*
* Initialize identifiers for permanent spinlocks during startup
*
* The same identifiers are used for both TAS and semaphore implementations,
* although in one case they are indexes into a shmem array and in the other
* they are semaphore numbers.
*/
static void
InitSpinLockIDs(void)
{
BufMgrLock = (SPINLOCK) BUFMGRLOCKID;
OidGenLockId = (SPINLOCK) OIDGENLOCKID;
XidGenLockId = (SPINLOCK) XIDGENLOCKID;
ControlFileLockId = (SPINLOCK) CNTLFILELOCKID;
ShmemLock = (SPINLOCK) SHMEMLOCKID;
ShmemIndexLock = (SPINLOCK) SHMEMINDEXLOCKID;
LockMgrLock = (SPINLOCK) LOCKMGRLOCKID;
SInvalLock = (SPINLOCK) SINVALLOCKID;
ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID;
FreeSpaceLock = (SPINLOCK) FREESPACELOCKID;
#ifdef STABLE_MEMORY_STORAGE
MMCacheLock = (SPINLOCK) MMCACHELOCKID;
#endif
}
#ifdef HAS_TEST_AND_SET #ifdef HAS_TEST_AND_SET
/* real spin lock implementation */
typedef struct slock
{
slock_t shlock;
} SLock;
#ifdef LOCK_DEBUG
bool Trace_spinlocks = false;
inline static void
PRINT_SLDEBUG(const char *where, SPINLOCK lockid, const SLock *lock)
{
if (Trace_spinlocks)
elog(DEBUG, "%s: id=%d", where, lockid);
}
#else /* not LOCK_DEBUG */
#define PRINT_SLDEBUG(a,b,c)
#endif /* not LOCK_DEBUG */
static SLock *SLockArray = NULL;
#define SLOCKMEMORYSIZE ((int) MAX_SPINS * sizeof(SLock))
/*
* SLockShmemSize --- return shared-memory space needed
*/
int
SLockShmemSize(void)
{
return MAXALIGN(SLOCKMEMORYSIZE);
}
/* /*
* CreateSpinlocks --- create and initialize spinlocks during startup * CreateSpinlocks --- create and initialize spinlocks during startup
*/ */
void void
CreateSpinlocks(PGShmemHeader *seghdr) CreateSpinlocks(void)
{
int id;
/*
* We must allocate the space "by hand" because shmem.c isn't up yet
*/
SLockArray = (SLock *) (((char *) seghdr) + seghdr->freeoffset);
seghdr->freeoffset += MAXALIGN(SLOCKMEMORYSIZE);
Assert(seghdr->freeoffset <= seghdr->totalsize);
/*
* Initialize all spinlocks to "unlocked" state
*/
for (id = 0; id < (int) MAX_SPINS; id++)
{
SLock *slckP = &(SLockArray[id]);
S_INIT_LOCK(&(slckP->shlock));
}
/*
* Assign indexes for fixed spinlocks
*/
InitSpinLockIDs();
}
void
SpinAcquire(SPINLOCK lockid)
{
SLock *slckP = &(SLockArray[lockid]);
PRINT_SLDEBUG("SpinAcquire", lockid, slckP);
/*
* Acquire the lock, then record that we have done so (for recovery in
* case of elog(ERROR) while holding the lock). Note we assume here
* that S_LOCK will not accept cancel/die interrupts once it has
* acquired the lock. However, interrupts should be accepted while
* waiting, if InterruptHoldoffCount is zero.
*/
S_LOCK(&(slckP->shlock));
PROC_INCR_SLOCK(lockid);
/*
* Lock out cancel/die interrupts until we exit the code section
* protected by the spinlock. This ensures that interrupts will not
* interfere with manipulations of data structures in shared memory.
*/
HOLD_INTERRUPTS();
PRINT_SLDEBUG("SpinAcquire/done", lockid, slckP);
}
void
SpinRelease(SPINLOCK lockid)
{ {
SLock *slckP = &(SLockArray[lockid]); /* no-op when we have TAS spinlocks */
PRINT_SLDEBUG("SpinRelease", lockid, slckP);
/*
* Check that we are actually holding the lock we are releasing. This
* can be done only after MyProc has been initialized.
*/
Assert(!MyProc || MyProc->sLocks[lockid] > 0);
/*
* Record that we no longer hold the spinlock, and release it.
*/
PROC_DECR_SLOCK(lockid);
S_UNLOCK(&(slckP->shlock));
/*
* Exit the interrupt holdoff entered in SpinAcquire().
*/
RESUME_INTERRUPTS();
PRINT_SLDEBUG("SpinRelease/done", lockid, slckP);
} }
#else /* !HAS_TEST_AND_SET */ #else /* !HAS_TEST_AND_SET */
...@@ -199,11 +47,7 @@ SpinRelease(SPINLOCK lockid) ...@@ -199,11 +47,7 @@ SpinRelease(SPINLOCK lockid)
/* /*
* No TAS, so spinlocks are implemented using SysV semaphores. * No TAS, so spinlocks are implemented using SysV semaphores.
* *
* We support two slightly different APIs here: SpinAcquire/SpinRelease * Typedef slock_t stores the semId and sem number of the sema to use.
* work with SPINLOCK integer indexes for the permanent spinlocks, which
* are all assumed to live in the first spinlock semaphore set. There
* is also an emulation of the s_lock.h TAS-spinlock macros; for that case,
* typedef slock_t stores the semId and sem number of the sema to use.
* The semas needed are created by CreateSpinlocks and doled out by * The semas needed are created by CreateSpinlocks and doled out by
* s_init_lock_sema. * s_init_lock_sema.
* *
...@@ -228,35 +72,26 @@ static int nextSpinLock = 0; /* next free spinlock index */ ...@@ -228,35 +72,26 @@ static int nextSpinLock = 0; /* next free spinlock index */
static void SpinFreeAllSemaphores(void); static void SpinFreeAllSemaphores(void);
/*
* SLockShmemSize --- return shared-memory space needed
*/
int
SLockShmemSize(void)
{
return 0;
}
/* /*
* CreateSpinlocks --- create and initialize spinlocks during startup * CreateSpinlocks --- create and initialize spinlocks during startup
*/ */
void void
CreateSpinlocks(PGShmemHeader *seghdr) CreateSpinlocks(void)
{ {
int i; int i;
if (SpinLockIds == NULL) if (SpinLockIds == NULL)
{ {
/* /*
* Compute number of spinlocks needed. If this logic gets any * Compute number of spinlocks needed. It would be cleaner to
* more complicated, it should be distributed into the affected * distribute this logic into the affected modules,
* modules, similar to the way shmem space estimation is handled. * similar to the way shmem space estimation is handled.
* *
* For now, though, we just need the fixed spinlocks (MAX_SPINS), two * For now, though, we just need a few spinlocks (10 should be
* spinlocks per shared disk buffer, and four spinlocks for XLOG. * plenty) plus one for each LWLock.
*/ */
numSpinLocks = (int) MAX_SPINS + 2 * NBuffers + 4; numSpinLocks = NumLWLocks() + 10;
/* might as well round up to a multiple of SPINLOCKS_PER_SET */ /* might as well round up to a multiple of SPINLOCKS_PER_SET */
numSpinSets = (numSpinLocks - 1) / SPINLOCKS_PER_SET + 1; numSpinSets = (numSpinLocks - 1) / SPINLOCKS_PER_SET + 1;
...@@ -288,14 +123,8 @@ CreateSpinlocks(PGShmemHeader *seghdr) ...@@ -288,14 +123,8 @@ CreateSpinlocks(PGShmemHeader *seghdr)
false); false);
} }
/*
* Assign indexes for fixed spinlocks
*/
Assert(MAX_SPINS <= SPINLOCKS_PER_SET);
InitSpinLockIDs();
/* Init counter for allocating dynamic spinlocks */ /* Init counter for allocating dynamic spinlocks */
nextSpinLock = MAX_SPINS; nextSpinLock = 0;
} }
/* /*
...@@ -318,49 +147,6 @@ SpinFreeAllSemaphores(void) ...@@ -318,49 +147,6 @@ SpinFreeAllSemaphores(void)
SpinLockIds = NULL; SpinLockIds = NULL;
} }
/*
* SpinAcquire -- grab a fixed spinlock
*
* FAILS if the semaphore is corrupted.
*/
void
SpinAcquire(SPINLOCK lock)
{
/*
* See the TAS() version of this routine for primary commentary.
*
* NOTE we must pass interruptOK = false to IpcSemaphoreLock, to ensure
* that a cancel/die interrupt cannot prevent us from recording
* ownership of a lock we have just acquired.
*/
IpcSemaphoreLock(SpinLockIds[0], lock, false);
PROC_INCR_SLOCK(lock);
HOLD_INTERRUPTS();
}
/*
* SpinRelease -- release a fixed spin lock
*
* FAILS if the semaphore is corrupted
*/
void
SpinRelease(SPINLOCK lock)
{
/* See the TAS() version of this routine for commentary */
#ifdef USE_ASSERT_CHECKING
/* Check it's locked */
int semval;
semval = IpcSemaphoreGetValue(SpinLockIds[0], lock);
Assert(semval < 1);
#endif
Assert(!MyProc || MyProc->sLocks[lockid] > 0);
PROC_DECR_SLOCK(lock);
IpcSemaphoreUnlock(SpinLockIds[0], lock);
RESUME_INTERRUPTS();
}
/* /*
* s_lock.h hardware-spinlock emulation * s_lock.h hardware-spinlock emulation
*/ */
......
...@@ -11,17 +11,19 @@ ...@@ -11,17 +11,19 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.24 2001/06/27 23:31:39 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.25 2001/09/29 04:02:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
#include "miscadmin.h"
#ifdef STABLE_MEMORY_STORAGE
#include <math.h> #include <math.h>
#include "storage/smgr.h"
#include "miscadmin.h"
#ifdef STABLE_MEMORY_STORAGE
/* /*
* MMCacheTag -- Unique triplet for blocks stored by the main memory * MMCacheTag -- Unique triplet for blocks stored by the main memory
...@@ -71,8 +73,6 @@ typedef struct MMRelHashEntry ...@@ -71,8 +73,6 @@ typedef struct MMRelHashEntry
#define MMNBUFFERS 10 #define MMNBUFFERS 10
#define MMNRELATIONS 2 #define MMNRELATIONS 2
SPINLOCK MMCacheLock;
static int *MMCurTop; static int *MMCurTop;
static int *MMCurRelno; static int *MMCurRelno;
static MMCacheTag *MMBlockTags; static MMCacheTag *MMBlockTags;
...@@ -88,7 +88,7 @@ mminit() ...@@ -88,7 +88,7 @@ mminit()
bool found; bool found;
HASHCTL info; HASHCTL info;
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS); mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS);
mmsize += MAXALIGN(sizeof(*MMCurTop)); mmsize += MAXALIGN(sizeof(*MMCurTop));
...@@ -98,7 +98,7 @@ mminit() ...@@ -98,7 +98,7 @@ mminit()
if (mmcacheblk == (char *) NULL) if (mmcacheblk == (char *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
...@@ -112,7 +112,7 @@ mminit() ...@@ -112,7 +112,7 @@ mminit()
if (MMCacheHT == (HTAB *) NULL) if (MMCacheHT == (HTAB *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
...@@ -126,18 +126,18 @@ mminit() ...@@ -126,18 +126,18 @@ mminit()
if (MMRelCacheHT == (HTAB *) NULL) if (MMRelCacheHT == (HTAB *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
if (IsUnderPostmaster) /* was IsPostmaster bjm */ if (IsUnderPostmaster) /* was IsPostmaster bjm */
{ {
MemSet(mmcacheblk, 0, mmsize); MemSet(mmcacheblk, 0, mmsize);
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_SUCCESS; return SM_SUCCESS;
} }
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
MMCurTop = (int *) mmcacheblk; MMCurTop = (int *) mmcacheblk;
mmcacheblk += sizeof(int); mmcacheblk += sizeof(int);
...@@ -163,11 +163,11 @@ mmcreate(Relation reln) ...@@ -163,11 +163,11 @@ mmcreate(Relation reln)
bool found; bool found;
MMRelTag tag; MMRelTag tag;
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
if (*MMCurRelno == MMNRELATIONS) if (*MMCurRelno == MMNRELATIONS)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
...@@ -184,20 +184,20 @@ mmcreate(Relation reln) ...@@ -184,20 +184,20 @@ mmcreate(Relation reln)
if (entry == (MMRelHashEntry *) NULL) if (entry == (MMRelHashEntry *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "main memory storage mgr rel cache hash table corrupt"); elog(FATAL, "main memory storage mgr rel cache hash table corrupt");
} }
if (found) if (found)
{ {
/* already exists */ /* already exists */
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
entry->mmrhe_nblocks = 0; entry->mmrhe_nblocks = 0;
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_SUCCESS; return SM_SUCCESS;
} }
...@@ -211,30 +211,24 @@ int ...@@ -211,30 +211,24 @@ int
mmunlink(RelFileNode rnode) mmunlink(RelFileNode rnode)
{ {
int i; int i;
Oid reldbid;
MMHashEntry *entry; MMHashEntry *entry;
MMRelHashEntry *rentry; MMRelHashEntry *rentry;
bool found; bool found;
MMRelTag rtag; MMRelTag rtag;
if (reln->rd_rel->relisshared) LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
reldbid = (Oid) 0;
else
reldbid = MyDatabaseId;
SpinAcquire(MMCacheLock);
for (i = 0; i < MMNBUFFERS; i++) for (i = 0; i < MMNBUFFERS; i++)
{ {
if (MMBlockTags[i].mmct_dbid == reldbid if (MMBlockTags[i].mmct_dbid == rnode.tblNode
&& MMBlockTags[i].mmct_relid == RelationGetRelid(reln)) && MMBlockTags[i].mmct_relid == rnode.relNode)
{ {
entry = (MMHashEntry *) hash_search(MMCacheHT, entry = (MMHashEntry *) hash_search(MMCacheHT,
(char *) &MMBlockTags[i], (char *) &MMBlockTags[i],
HASH_REMOVE, &found); HASH_REMOVE, &found);
if (entry == (MMHashEntry *) NULL || !found) if (entry == (MMHashEntry *) NULL || !found)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmunlink: cache hash table corrupted"); elog(FATAL, "mmunlink: cache hash table corrupted");
} }
MMBlockTags[i].mmct_dbid = (Oid) 0; MMBlockTags[i].mmct_dbid = (Oid) 0;
...@@ -242,21 +236,21 @@ mmunlink(RelFileNode rnode) ...@@ -242,21 +236,21 @@ mmunlink(RelFileNode rnode)
MMBlockTags[i].mmct_blkno = (BlockNumber) 0; MMBlockTags[i].mmct_blkno = (BlockNumber) 0;
} }
} }
rtag.mmrt_dbid = reldbid; rtag.mmrt_dbid = rnode.tblNode;
rtag.mmrt_relid = RelationGetRelid(reln); rtag.mmrt_relid = rnode.relNode;
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
HASH_REMOVE, &found); HASH_REMOVE, &found);
if (rentry == (MMRelHashEntry *) NULL || !found) if (rentry == (MMRelHashEntry *) NULL || !found)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmunlink: rel cache hash table corrupted"); elog(FATAL, "mmunlink: rel cache hash table corrupted");
} }
(*MMCurRelno)--; (*MMCurRelno)--;
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return 1; return 1;
} }
...@@ -286,7 +280,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -286,7 +280,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer)
tag.mmct_dbid = rtag.mmrt_dbid = reldbid; tag.mmct_dbid = rtag.mmrt_dbid = reldbid;
tag.mmct_relid = rtag.mmrt_relid = RelationGetRelid(reln); tag.mmct_relid = rtag.mmrt_relid = RelationGetRelid(reln);
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
if (*MMCurTop == MMNBUFFERS) if (*MMCurTop == MMNBUFFERS)
{ {
...@@ -298,7 +292,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -298,7 +292,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer)
} }
if (i == MMNBUFFERS) if (i == MMNBUFFERS)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
} }
...@@ -312,7 +306,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -312,7 +306,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer)
HASH_FIND, &found); HASH_FIND, &found);
if (rentry == (MMRelHashEntry *) NULL || !found) if (rentry == (MMRelHashEntry *) NULL || !found)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmextend: rel cache hash table corrupt"); elog(FATAL, "mmextend: rel cache hash table corrupt");
} }
...@@ -322,7 +316,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -322,7 +316,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer)
HASH_ENTER, &found); HASH_ENTER, &found);
if (entry == (MMHashEntry *) NULL || found) if (entry == (MMHashEntry *) NULL || found)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmextend: cache hash table corrupt"); elog(FATAL, "mmextend: cache hash table corrupt");
} }
...@@ -338,7 +332,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -338,7 +332,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer)
offset = (i * BLCKSZ); offset = (i * BLCKSZ);
memmove(&(MMBlockCache[offset]), buffer, BLCKSZ); memmove(&(MMBlockCache[offset]), buffer, BLCKSZ);
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_SUCCESS; return SM_SUCCESS;
} }
...@@ -386,20 +380,20 @@ mmread(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -386,20 +380,20 @@ mmread(Relation reln, BlockNumber blocknum, char *buffer)
tag.mmct_relid = RelationGetRelid(reln); tag.mmct_relid = RelationGetRelid(reln);
tag.mmct_blkno = blocknum; tag.mmct_blkno = blocknum;
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
HASH_FIND, &found); HASH_FIND, &found);
if (entry == (MMHashEntry *) NULL) if (entry == (MMHashEntry *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmread: hash table corrupt"); elog(FATAL, "mmread: hash table corrupt");
} }
if (!found) if (!found)
{ {
/* reading nonexistent pages is defined to fill them with zeroes */ /* reading nonexistent pages is defined to fill them with zeroes */
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
MemSet(buffer, 0, BLCKSZ); MemSet(buffer, 0, BLCKSZ);
return SM_SUCCESS; return SM_SUCCESS;
} }
...@@ -407,7 +401,7 @@ mmread(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -407,7 +401,7 @@ mmread(Relation reln, BlockNumber blocknum, char *buffer)
offset = (entry->mmhe_bufno * BLCKSZ); offset = (entry->mmhe_bufno * BLCKSZ);
memmove(buffer, &MMBlockCache[offset], BLCKSZ); memmove(buffer, &MMBlockCache[offset], BLCKSZ);
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_SUCCESS; return SM_SUCCESS;
} }
...@@ -433,26 +427,26 @@ mmwrite(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -433,26 +427,26 @@ mmwrite(Relation reln, BlockNumber blocknum, char *buffer)
tag.mmct_relid = RelationGetRelid(reln); tag.mmct_relid = RelationGetRelid(reln);
tag.mmct_blkno = blocknum; tag.mmct_blkno = blocknum;
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
HASH_FIND, &found); HASH_FIND, &found);
if (entry == (MMHashEntry *) NULL) if (entry == (MMHashEntry *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmread: hash table corrupt"); elog(FATAL, "mmread: hash table corrupt");
} }
if (!found) if (!found)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmwrite: hash table missing requested page"); elog(FATAL, "mmwrite: hash table missing requested page");
} }
offset = (entry->mmhe_bufno * BLCKSZ); offset = (entry->mmhe_bufno * BLCKSZ);
memmove(&MMBlockCache[offset], buffer, BLCKSZ); memmove(&MMBlockCache[offset], buffer, BLCKSZ);
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_SUCCESS; return SM_SUCCESS;
} }
...@@ -506,14 +500,14 @@ mmnblocks(Relation reln) ...@@ -506,14 +500,14 @@ mmnblocks(Relation reln)
rtag.mmrt_relid = RelationGetRelid(reln); rtag.mmrt_relid = RelationGetRelid(reln);
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
HASH_FIND, &found); HASH_FIND, &found);
if (rentry == (MMRelHashEntry *) NULL) if (rentry == (MMRelHashEntry *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmnblocks: rel cache hash table corrupt"); elog(FATAL, "mmnblocks: rel cache hash table corrupt");
} }
...@@ -522,7 +516,7 @@ mmnblocks(Relation reln) ...@@ -522,7 +516,7 @@ mmnblocks(Relation reln)
else else
nblocks = InvalidBlockNumber; nblocks = InvalidBlockNumber;
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return nblocks; return nblocks;
} }
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.52 2001/07/02 20:50:46 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.53 2001/09/29 04:02:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "utils/memutils.h" #include "utils/memutils.h"
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.92 2001/09/27 16:29:12 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.93 2001/09/29 04:02:25 tgl Exp $
* *
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
...@@ -401,11 +401,12 @@ ShutdownPostgres(void) ...@@ -401,11 +401,12 @@ ShutdownPostgres(void)
* since that just raises the odds of failure --- but there's some * since that just raises the odds of failure --- but there's some
* stuff we need to do. * stuff we need to do.
* *
* Release any spinlocks or buffer context locks we might be holding. * Release any LW locks and buffer context locks we might be holding.
* This is a kluge to improve the odds that we won't get into a * This is a kluge to improve the odds that we won't get into a
* self-made stuck-spinlock scenario while trying to shut down. * self-made stuck-lock scenario while trying to shut down.
*/ */
ProcReleaseSpins(NULL); LWLockReleaseAll();
AbortBufferIO();
UnlockBuffers(); UnlockBuffers();
/* /*
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* Support for grand unified configuration scheme, including SET * Support for grand unified configuration scheme, including SET
* command, configuration file, and command line options. * command, configuration file, and command line options.
* *
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.52 2001/09/23 21:52:36 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.53 2001/09/29 04:02:25 tgl Exp $
* *
* Copyright 2000 by PostgreSQL Global Development Group * Copyright 2000 by PostgreSQL Global Development Group
* Written by Peter Eisentraut <peter_e@gmx.net>. * Written by Peter Eisentraut <peter_e@gmx.net>.
...@@ -240,7 +240,7 @@ static struct config_bool ...@@ -240,7 +240,7 @@ static struct config_bool
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
{"trace_locks", PGC_SUSET, &Trace_locks, false, NULL}, {"trace_locks", PGC_SUSET, &Trace_locks, false, NULL},
{"trace_userlocks", PGC_SUSET, &Trace_userlocks, false, NULL}, {"trace_userlocks", PGC_SUSET, &Trace_userlocks, false, NULL},
{"trace_spinlocks", PGC_SUSET, &Trace_spinlocks, false, NULL}, {"trace_lwlocks", PGC_SUSET, &Trace_lwlocks, false, NULL},
{"debug_deadlocks", PGC_SUSET, &Debug_deadlocks, false, NULL}, {"debug_deadlocks", PGC_SUSET, &Debug_deadlocks, false, NULL},
#endif #endif
......
...@@ -164,7 +164,7 @@ ...@@ -164,7 +164,7 @@
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
#trace_locks = false #trace_locks = false
#trace_userlocks = false #trace_userlocks = false
#trace_spinlocks = false #trace_lwlocks = false
#debug_deadlocks = false #debug_deadlocks = false
#trace_lock_oidmin = 16384 #trace_lock_oidmin = 16384
#trace_lock_table = 0 #trace_lock_table = 0
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment