Commit 499abb0c authored by Tom Lane's avatar Tom Lane

Implement new 'lightweight lock manager' that's intermediate between

existing lock manager and spinlocks: it understands exclusive vs shared
lock but has few other fancy features.  Replace most uses of spinlocks
with lightweight locks.  All remaining uses of spinlocks have very short
lock hold times (a few dozen instructions), so tweak spinlock backoff
code to work efficiently given this assumption.  All per my proposal on
pghackers 26-Sep-01.
parent 818fb55a
<!-- $Header: /cvsroot/pgsql/doc/src/sgml/wal.sgml,v 1.10 2001/09/22 03:59:17 momjian Exp $ --> <!-- $Header: /cvsroot/pgsql/doc/src/sgml/wal.sgml,v 1.11 2001/09/29 04:02:19 tgl Exp $ -->
<chapter id="wal"> <chapter id="wal">
<title>Write-Ahead Logging (<acronym>WAL</acronym>)</title> <title>Write-Ahead Logging (<acronym>WAL</acronym>)</title>
...@@ -146,7 +146,7 @@ ...@@ -146,7 +146,7 @@
<para> <para>
The <acronym>WAL</acronym> buffers and control structure are in The <acronym>WAL</acronym> buffers and control structure are in
shared memory, and are handled by the backends; they are protected shared memory, and are handled by the backends; they are protected
by spinlocks. The demand on shared memory is dependent on the by lightweight locks. The demand on shared memory is dependent on the
number of buffers; the default size of the <acronym>WAL</acronym> number of buffers; the default size of the <acronym>WAL</acronym>
buffers is 64 kB. buffers is 64 kB.
</para> </para>
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.3 2001/08/26 16:55:59 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.4 2001/09/29 04:02:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#include <unistd.h> #include <unistd.h>
#include "access/clog.h" #include "access/clog.h"
#include "storage/s_lock.h" #include "storage/lwlock.h"
#include "miscadmin.h" #include "miscadmin.h"
...@@ -74,8 +74,8 @@ ...@@ -74,8 +74,8 @@
* The management algorithm is straight LRU except that we will never swap * The management algorithm is straight LRU except that we will never swap
* out the latest page (since we know it's going to be hit again eventually). * out the latest page (since we know it's going to be hit again eventually).
* *
* We use an overall spinlock to protect the shared data structures, plus * We use an overall LWLock to protect the shared data structures, plus
* per-buffer spinlocks that synchronize I/O for each buffer. A process * per-buffer LWLocks that synchronize I/O for each buffer. A process
* that is reading in or writing out a page buffer does not hold the control * that is reading in or writing out a page buffer does not hold the control
* lock, only the per-buffer lock for the buffer it is working on. * lock, only the per-buffer lock for the buffer it is working on.
* *
...@@ -105,10 +105,6 @@ ...@@ -105,10 +105,6 @@
* by setting the page's state from WRITE_IN_PROGRESS to DIRTY. The writing * by setting the page's state from WRITE_IN_PROGRESS to DIRTY. The writing
* process must notice this and not mark the page CLEAN when it's done. * process must notice this and not mark the page CLEAN when it's done.
* *
* XXX it's probably okay to use a spinlock for the control lock, since
* that lock is only held for very short operations. It'd be nice to use
* some other form of lock for the per-buffer I/O locks, however.
*
* XLOG interactions: this module generates an XLOG record whenever a new * XLOG interactions: this module generates an XLOG record whenever a new
* CLOG page is initialized to zeroes. Other writes of CLOG come from * CLOG page is initialized to zeroes. Other writes of CLOG come from
* recording of transaction commit or abort in xact.c, which generates its * recording of transaction commit or abort in xact.c, which generates its
...@@ -121,7 +117,6 @@ ...@@ -121,7 +117,6 @@
* synchronization already. * synchronization already.
*---------- *----------
*/ */
#define NUM_CLOG_BUFFERS 8
typedef enum typedef enum
{ {
...@@ -153,13 +148,17 @@ typedef struct ClogCtlData ...@@ -153,13 +148,17 @@ typedef struct ClogCtlData
* swapping out the latest page. * swapping out the latest page.
*/ */
int latest_page_number; int latest_page_number;
slock_t control_lck; /* Lock for ClogCtlData itself */
slock_t buffer_lck[NUM_CLOG_BUFFERS]; /* Per-buffer I/O locks */
} ClogCtlData; } ClogCtlData;
static ClogCtlData *ClogCtl = NULL; static ClogCtlData *ClogCtl = NULL;
/*
* ClogBufferLocks is set during CLOGShmemInit and does not change thereafter.
* The value is automatically inherited by backends via fork, and
* doesn't need to be in shared memory.
*/
static LWLockId ClogBufferLocks[NUM_CLOG_BUFFERS]; /* Per-buffer I/O locks */
/* /*
* ClogDir is set during CLOGShmemInit and does not change thereafter. * ClogDir is set during CLOGShmemInit and does not change thereafter.
* The value is automatically inherited by backends via fork, and * The value is automatically inherited by backends via fork, and
...@@ -211,7 +210,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) ...@@ -211,7 +210,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
Assert(status == TRANSACTION_STATUS_COMMITTED || Assert(status == TRANSACTION_STATUS_COMMITTED ||
status == TRANSACTION_STATUS_ABORTED); status == TRANSACTION_STATUS_ABORTED);
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
slotno = ReadCLOGPage(pageno); slotno = ReadCLOGPage(pageno);
byteptr = ClogCtl->page_buffer[slotno] + byteno; byteptr = ClogCtl->page_buffer[slotno] + byteno;
...@@ -224,7 +223,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) ...@@ -224,7 +223,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
ClogCtl->page_status[slotno] = CLOG_PAGE_DIRTY; ClogCtl->page_status[slotno] = CLOG_PAGE_DIRTY;
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
/* /*
...@@ -243,14 +242,14 @@ TransactionIdGetStatus(TransactionId xid) ...@@ -243,14 +242,14 @@ TransactionIdGetStatus(TransactionId xid)
char *byteptr; char *byteptr;
XidStatus status; XidStatus status;
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
slotno = ReadCLOGPage(pageno); slotno = ReadCLOGPage(pageno);
byteptr = ClogCtl->page_buffer[slotno] + byteno; byteptr = ClogCtl->page_buffer[slotno] + byteno;
status = (*byteptr >> bshift) & CLOG_XACT_BITMASK; status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
return status; return status;
} }
...@@ -283,15 +282,13 @@ CLOGShmemInit(void) ...@@ -283,15 +282,13 @@ CLOGShmemInit(void)
memset(ClogCtl, 0, sizeof(ClogCtlData)); memset(ClogCtl, 0, sizeof(ClogCtlData));
S_INIT_LOCK(&(ClogCtl->control_lck));
bufptr = ((char *) ClogCtl) + sizeof(ClogCtlData); bufptr = ((char *) ClogCtl) + sizeof(ClogCtlData);
for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
{ {
ClogCtl->page_buffer[slotno] = bufptr; ClogCtl->page_buffer[slotno] = bufptr;
ClogCtl->page_status[slotno] = CLOG_PAGE_EMPTY; ClogCtl->page_status[slotno] = CLOG_PAGE_EMPTY;
S_INIT_LOCK(&(ClogCtl->buffer_lck[slotno])); ClogBufferLocks[slotno] = LWLockAssign();
bufptr += CLOG_BLCKSZ; bufptr += CLOG_BLCKSZ;
} }
...@@ -312,7 +309,7 @@ BootStrapCLOG(void) ...@@ -312,7 +309,7 @@ BootStrapCLOG(void)
{ {
int slotno; int slotno;
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
/* Create and zero the first page of the commit log */ /* Create and zero the first page of the commit log */
slotno = ZeroCLOGPage(0, false); slotno = ZeroCLOGPage(0, false);
...@@ -321,7 +318,7 @@ BootStrapCLOG(void) ...@@ -321,7 +318,7 @@ BootStrapCLOG(void)
WriteCLOGPage(slotno); WriteCLOGPage(slotno);
Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN); Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN);
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
/* /*
...@@ -411,8 +408,8 @@ ReadCLOGPage(int pageno) ...@@ -411,8 +408,8 @@ ReadCLOGPage(int pageno)
ClogCtl->page_lru_count[slotno] = 0; ClogCtl->page_lru_count[slotno] = 0;
/* Release shared lock, grab per-buffer lock instead */ /* Release shared lock, grab per-buffer lock instead */
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
S_LOCK(&(ClogCtl->buffer_lck[slotno])); LWLockAcquire(ClogBufferLocks[slotno], LW_EXCLUSIVE);
/* /*
* Check to see if someone else already did the read, or took the * Check to see if someone else already did the read, or took the
...@@ -421,8 +418,8 @@ ReadCLOGPage(int pageno) ...@@ -421,8 +418,8 @@ ReadCLOGPage(int pageno)
if (ClogCtl->page_number[slotno] != pageno || if (ClogCtl->page_number[slotno] != pageno ||
ClogCtl->page_status[slotno] != CLOG_PAGE_READ_IN_PROGRESS) ClogCtl->page_status[slotno] != CLOG_PAGE_READ_IN_PROGRESS)
{ {
S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); LWLockRelease(ClogBufferLocks[slotno]);
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
continue; continue;
} }
...@@ -430,14 +427,14 @@ ReadCLOGPage(int pageno) ...@@ -430,14 +427,14 @@ ReadCLOGPage(int pageno)
CLOGPhysicalReadPage(pageno, slotno); CLOGPhysicalReadPage(pageno, slotno);
/* Re-acquire shared control lock and update page state */ /* Re-acquire shared control lock and update page state */
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
Assert(ClogCtl->page_number[slotno] == pageno && Assert(ClogCtl->page_number[slotno] == pageno &&
ClogCtl->page_status[slotno] == CLOG_PAGE_READ_IN_PROGRESS); ClogCtl->page_status[slotno] == CLOG_PAGE_READ_IN_PROGRESS);
ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN; ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN;
S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); LWLockRelease(ClogBufferLocks[slotno]);
ClogRecentlyUsed(slotno); ClogRecentlyUsed(slotno);
return slotno; return slotno;
...@@ -468,8 +465,8 @@ WriteCLOGPage(int slotno) ...@@ -468,8 +465,8 @@ WriteCLOGPage(int slotno)
pageno = ClogCtl->page_number[slotno]; pageno = ClogCtl->page_number[slotno];
/* Release shared lock, grab per-buffer lock instead */ /* Release shared lock, grab per-buffer lock instead */
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
S_LOCK(&(ClogCtl->buffer_lck[slotno])); LWLockAcquire(ClogBufferLocks[slotno], LW_EXCLUSIVE);
/* /*
* Check to see if someone else already did the write, or took the * Check to see if someone else already did the write, or took the
...@@ -482,8 +479,8 @@ WriteCLOGPage(int slotno) ...@@ -482,8 +479,8 @@ WriteCLOGPage(int slotno)
(ClogCtl->page_status[slotno] != CLOG_PAGE_DIRTY && (ClogCtl->page_status[slotno] != CLOG_PAGE_DIRTY &&
ClogCtl->page_status[slotno] != CLOG_PAGE_WRITE_IN_PROGRESS)) ClogCtl->page_status[slotno] != CLOG_PAGE_WRITE_IN_PROGRESS))
{ {
S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); LWLockRelease(ClogBufferLocks[slotno]);
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
return; return;
} }
...@@ -504,7 +501,7 @@ WriteCLOGPage(int slotno) ...@@ -504,7 +501,7 @@ WriteCLOGPage(int slotno)
CLOGPhysicalWritePage(pageno, slotno); CLOGPhysicalWritePage(pageno, slotno);
/* Re-acquire shared control lock and update page state */ /* Re-acquire shared control lock and update page state */
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
Assert(ClogCtl->page_number[slotno] == pageno && Assert(ClogCtl->page_number[slotno] == pageno &&
(ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS || (ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS ||
...@@ -514,7 +511,7 @@ WriteCLOGPage(int slotno) ...@@ -514,7 +511,7 @@ WriteCLOGPage(int slotno)
if (ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS) if (ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS)
ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN; ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN;
S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); LWLockRelease(ClogBufferLocks[slotno]);
} }
/* /*
...@@ -714,7 +711,7 @@ ShutdownCLOG(void) ...@@ -714,7 +711,7 @@ ShutdownCLOG(void)
{ {
int slotno; int slotno;
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
{ {
...@@ -723,7 +720,7 @@ ShutdownCLOG(void) ...@@ -723,7 +720,7 @@ ShutdownCLOG(void)
ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN); ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN);
} }
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
/* /*
...@@ -734,7 +731,7 @@ CheckPointCLOG(void) ...@@ -734,7 +731,7 @@ CheckPointCLOG(void)
{ {
int slotno; int slotno;
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
{ {
...@@ -745,7 +742,7 @@ CheckPointCLOG(void) ...@@ -745,7 +742,7 @@ CheckPointCLOG(void)
*/ */
} }
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
...@@ -772,12 +769,12 @@ ExtendCLOG(TransactionId newestXact) ...@@ -772,12 +769,12 @@ ExtendCLOG(TransactionId newestXact)
pageno = TransactionIdToPage(newestXact); pageno = TransactionIdToPage(newestXact);
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
/* Zero the page and make an XLOG entry about it */ /* Zero the page and make an XLOG entry about it */
ZeroCLOGPage(pageno, true); ZeroCLOGPage(pageno, true);
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
...@@ -819,7 +816,7 @@ TruncateCLOG(TransactionId oldestXact) ...@@ -819,7 +816,7 @@ TruncateCLOG(TransactionId oldestXact)
* should have been flushed already during the checkpoint, we're * should have been flushed already during the checkpoint, we're
* just being extra careful here.) * just being extra careful here.)
*/ */
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
restart:; restart:;
/* /*
...@@ -830,7 +827,7 @@ restart:; ...@@ -830,7 +827,7 @@ restart:;
*/ */
if (CLOGPagePrecedes(ClogCtl->latest_page_number, cutoffPage)) if (CLOGPagePrecedes(ClogCtl->latest_page_number, cutoffPage))
{ {
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
elog(LOG, "unable to truncate commit log: apparent wraparound"); elog(LOG, "unable to truncate commit log: apparent wraparound");
return; return;
} }
...@@ -861,7 +858,7 @@ restart:; ...@@ -861,7 +858,7 @@ restart:;
goto restart; goto restart;
} }
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
/* Now we can remove the old CLOG segment(s) */ /* Now we can remove the old CLOG segment(s) */
(void) ScanCLOGDirectory(cutoffPage, true); (void) ScanCLOGDirectory(cutoffPage, true);
...@@ -974,13 +971,13 @@ clog_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -974,13 +971,13 @@ clog_redo(XLogRecPtr lsn, XLogRecord *record)
memcpy(&pageno, XLogRecGetData(record), sizeof(int)); memcpy(&pageno, XLogRecGetData(record), sizeof(int));
S_LOCK(&(ClogCtl->control_lck)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
slotno = ZeroCLOGPage(pageno, false); slotno = ZeroCLOGPage(pageno, false);
WriteCLOGPage(slotno); WriteCLOGPage(slotno);
Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN); Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN);
S_UNLOCK(&(ClogCtl->control_lck)); LWLockRelease(CLogControlLock);
} }
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Copyright (c) 2000, PostgreSQL Global Development Group * Copyright (c) 2000, PostgreSQL Global Development Group
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.45 2001/08/25 18:52:41 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.46 2001/09/29 04:02:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -15,16 +15,13 @@ ...@@ -15,16 +15,13 @@
#include "access/clog.h" #include "access/clog.h"
#include "access/transam.h" #include "access/transam.h"
#include "storage/ipc.h"
#include "storage/proc.h" #include "storage/proc.h"
/* Number of OIDs to prefetch (preallocate) per XLOG write */ /* Number of OIDs to prefetch (preallocate) per XLOG write */
#define VAR_OID_PREFETCH 8192 #define VAR_OID_PREFETCH 8192
/* Spinlocks for serializing generation of XIDs and OIDs, respectively */
SPINLOCK XidGenLockId;
SPINLOCK OidGenLockId;
/* pointer to "variable cache" in shared memory (set up by shmem.c) */ /* pointer to "variable cache" in shared memory (set up by shmem.c) */
VariableCache ShmemVariableCache = NULL; VariableCache ShmemVariableCache = NULL;
...@@ -44,7 +41,7 @@ GetNewTransactionId(void) ...@@ -44,7 +41,7 @@ GetNewTransactionId(void)
if (AMI_OVERRIDE) if (AMI_OVERRIDE)
return BootstrapTransactionId; return BootstrapTransactionId;
SpinAcquire(XidGenLockId); LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
xid = ShmemVariableCache->nextXid; xid = ShmemVariableCache->nextXid;
...@@ -83,7 +80,7 @@ GetNewTransactionId(void) ...@@ -83,7 +80,7 @@ GetNewTransactionId(void)
if (MyProc != (PROC *) NULL) if (MyProc != (PROC *) NULL)
MyProc->xid = xid; MyProc->xid = xid;
SpinRelease(XidGenLockId); LWLockRelease(XidGenLock);
return xid; return xid;
} }
...@@ -103,9 +100,9 @@ ReadNewTransactionId(void) ...@@ -103,9 +100,9 @@ ReadNewTransactionId(void)
if (AMI_OVERRIDE) if (AMI_OVERRIDE)
return BootstrapTransactionId; return BootstrapTransactionId;
SpinAcquire(XidGenLockId); LWLockAcquire(XidGenLock, LW_SHARED);
xid = ShmemVariableCache->nextXid; xid = ShmemVariableCache->nextXid;
SpinRelease(XidGenLockId); LWLockRelease(XidGenLock);
return xid; return xid;
} }
...@@ -122,7 +119,7 @@ GetNewObjectId(void) ...@@ -122,7 +119,7 @@ GetNewObjectId(void)
{ {
Oid result; Oid result;
SpinAcquire(OidGenLockId); LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
/* /*
* Check for wraparound of the OID counter. We *must* not return 0 * Check for wraparound of the OID counter. We *must* not return 0
...@@ -149,7 +146,7 @@ GetNewObjectId(void) ...@@ -149,7 +146,7 @@ GetNewObjectId(void)
(ShmemVariableCache->nextOid)++; (ShmemVariableCache->nextOid)++;
(ShmemVariableCache->oidCount)--; (ShmemVariableCache->oidCount)--;
SpinRelease(OidGenLockId); LWLockRelease(OidGenLock);
lastSeenOid = result; lastSeenOid = result;
...@@ -162,12 +159,12 @@ CheckMaxObjectId(Oid assigned_oid) ...@@ -162,12 +159,12 @@ CheckMaxObjectId(Oid assigned_oid)
if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid) if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid)
return; return;
SpinAcquire(OidGenLockId); LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
if (assigned_oid < ShmemVariableCache->nextOid) if (assigned_oid < ShmemVariableCache->nextOid)
{ {
lastSeenOid = ShmemVariableCache->nextOid - 1; lastSeenOid = ShmemVariableCache->nextOid - 1;
SpinRelease(OidGenLockId); LWLockRelease(OidGenLock);
return; return;
} }
...@@ -178,7 +175,7 @@ CheckMaxObjectId(Oid assigned_oid) ...@@ -178,7 +175,7 @@ CheckMaxObjectId(Oid assigned_oid)
ShmemVariableCache->oidCount -= ShmemVariableCache->oidCount -=
assigned_oid - ShmemVariableCache->nextOid + 1; assigned_oid - ShmemVariableCache->nextOid + 1;
ShmemVariableCache->nextOid = assigned_oid + 1; ShmemVariableCache->nextOid = assigned_oid + 1;
SpinRelease(OidGenLockId); LWLockRelease(OidGenLock);
return; return;
} }
...@@ -192,5 +189,5 @@ CheckMaxObjectId(Oid assigned_oid) ...@@ -192,5 +189,5 @@ CheckMaxObjectId(Oid assigned_oid)
ShmemVariableCache->nextOid = assigned_oid + 1; ShmemVariableCache->nextOid = assigned_oid + 1;
ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1; ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1;
SpinRelease(OidGenLockId); LWLockRelease(OidGenLock);
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.110 2001/09/28 08:08:57 thomas Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.111 2001/09/29 04:02:21 tgl Exp $
* *
* NOTES * NOTES
* Transaction aborts can now occur two ways: * Transaction aborts can now occur two ways:
...@@ -965,7 +965,7 @@ CommitTransaction(void) ...@@ -965,7 +965,7 @@ CommitTransaction(void)
* this must be done _before_ releasing locks we hold and _after_ * this must be done _before_ releasing locks we hold and _after_
* RecordTransactionCommit. * RecordTransactionCommit.
* *
* SpinAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked * LWLockAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked
* by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot - * by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot -
* if xid 2' GetSnapshotData sees xid 1 as running then it must see * if xid 2' GetSnapshotData sees xid 1 as running then it must see
* xid 0 as running as well or it will see two tuple versions - one * xid 0 as running as well or it will see two tuple versions - one
...@@ -975,10 +975,10 @@ CommitTransaction(void) ...@@ -975,10 +975,10 @@ CommitTransaction(void)
if (MyProc != (PROC *) NULL) if (MyProc != (PROC *) NULL)
{ {
/* Lock SInvalLock because that's what GetSnapshotData uses. */ /* Lock SInvalLock because that's what GetSnapshotData uses. */
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
MyProc->xid = InvalidTransactionId; MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId; MyProc->xmin = InvalidTransactionId;
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
} }
/* /*
...@@ -1030,12 +1030,15 @@ AbortTransaction(void) ...@@ -1030,12 +1030,15 @@ AbortTransaction(void)
HOLD_INTERRUPTS(); HOLD_INTERRUPTS();
/* /*
* Release any spinlocks or buffer context locks we might be holding * Release any LW locks we might be holding as quickly as possible.
* as quickly as possible. (Real locks, however, must be held till we * (Regular locks, however, must be held till we finish aborting.)
* finish aborting.) Releasing spinlocks is critical since we might * Releasing LW locks is critical since we might try to grab them again
* try to grab them again while cleaning up! * while cleaning up!
*/ */
ProcReleaseSpins(NULL); LWLockReleaseAll();
/* Clean up buffer I/O and buffer context locks, too */
AbortBufferIO();
UnlockBuffers(); UnlockBuffers();
/* /*
...@@ -1081,10 +1084,10 @@ AbortTransaction(void) ...@@ -1081,10 +1084,10 @@ AbortTransaction(void)
if (MyProc != (PROC *) NULL) if (MyProc != (PROC *) NULL)
{ {
/* Lock SInvalLock because that's what GetSnapshotData uses. */ /* Lock SInvalLock because that's what GetSnapshotData uses. */
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
MyProc->xid = InvalidTransactionId; MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId; MyProc->xmin = InvalidTransactionId;
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
} }
RelationPurgeLocalRelation(false); RelationPurgeLocalRelation(false);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.77 2001/09/26 20:24:02 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.78 2001/09/29 04:02:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -33,11 +33,11 @@ ...@@ -33,11 +33,11 @@
#include "access/xlogutils.h" #include "access/xlogutils.h"
#include "catalog/catversion.h" #include "catalog/catversion.h"
#include "catalog/pg_control.h" #include "catalog/pg_control.h"
#include "storage/sinval.h" #include "storage/bufpage.h"
#include "storage/lwlock.h"
#include "storage/proc.h" #include "storage/proc.h"
#include "storage/sinval.h"
#include "storage/spin.h" #include "storage/spin.h"
#include "storage/s_lock.h"
#include "storage/bufpage.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/relcache.h" #include "utils/relcache.h"
#include "utils/selfuncs.h" #include "utils/selfuncs.h"
...@@ -86,11 +86,6 @@ ...@@ -86,11 +86,6 @@
#endif #endif
/* Max time to wait to acquire XLog activity locks */
#define XLOG_LOCK_TIMEOUT (5*60*1000000) /* 5 minutes */
/* Max time to wait to acquire checkpoint lock */
#define CHECKPOINT_LOCK_TIMEOUT (20*60*1000000) /* 20 minutes */
/* User-settable parameters */ /* User-settable parameters */
int CheckPointSegments = 3; int CheckPointSegments = 3;
int XLOGbuffers = 8; int XLOGbuffers = 8;
...@@ -155,13 +150,10 @@ static XLogRecPtr ProcLastRecPtr = {0, 0}; ...@@ -155,13 +150,10 @@ static XLogRecPtr ProcLastRecPtr = {0, 0};
* (which is almost but not quite the same as a pointer to the most recent * (which is almost but not quite the same as a pointer to the most recent
* CHECKPOINT record). We update this from the shared-memory copy, * CHECKPOINT record). We update this from the shared-memory copy,
* XLogCtl->Insert.RedoRecPtr, whenever we can safely do so (ie, when we * XLogCtl->Insert.RedoRecPtr, whenever we can safely do so (ie, when we
* hold the Insert spinlock). See XLogInsert for details. * hold the Insert lock). See XLogInsert for details.
*/ */
static XLogRecPtr RedoRecPtr; static XLogRecPtr RedoRecPtr;
/* This lock must be held to read/update control file or create new log file */
SPINLOCK ControlFileLockId;
/*---------- /*----------
* Shared-memory data structures for XLOG control * Shared-memory data structures for XLOG control
* *
...@@ -171,24 +163,24 @@ SPINLOCK ControlFileLockId; ...@@ -171,24 +163,24 @@ SPINLOCK ControlFileLockId;
* These structs are identical but are declared separately to indicate their * These structs are identical but are declared separately to indicate their
* slightly different functions. * slightly different functions.
* *
* We do a lot of pushups to minimize the amount of access to spinlocked * We do a lot of pushups to minimize the amount of access to lockable
* shared memory values. There are actually three shared-memory copies of * shared memory values. There are actually three shared-memory copies of
* LogwrtResult, plus one unshared copy in each backend. Here's how it works: * LogwrtResult, plus one unshared copy in each backend. Here's how it works:
* XLogCtl->LogwrtResult is protected by info_lck * XLogCtl->LogwrtResult is protected by info_lck
* XLogCtl->Write.LogwrtResult is protected by logwrt_lck * XLogCtl->Write.LogwrtResult is protected by WALWriteLock
* XLogCtl->Insert.LogwrtResult is protected by insert_lck * XLogCtl->Insert.LogwrtResult is protected by WALInsertLock
* One must hold the associated spinlock to read or write any of these, but * One must hold the associated lock to read or write any of these, but
* of course no spinlock is needed to read/write the unshared LogwrtResult. * of course no lock is needed to read/write the unshared LogwrtResult.
* *
* XLogCtl->LogwrtResult and XLogCtl->Write.LogwrtResult are both "always * XLogCtl->LogwrtResult and XLogCtl->Write.LogwrtResult are both "always
* right", since both are updated by a write or flush operation before * right", since both are updated by a write or flush operation before
* it releases logwrt_lck. The point of keeping XLogCtl->Write.LogwrtResult * it releases WALWriteLock. The point of keeping XLogCtl->Write.LogwrtResult
* is that it can be examined/modified by code that already holds logwrt_lck * is that it can be examined/modified by code that already holds WALWriteLock
* without needing to grab info_lck as well. * without needing to grab info_lck as well.
* *
* XLogCtl->Insert.LogwrtResult may lag behind the reality of the other two, * XLogCtl->Insert.LogwrtResult may lag behind the reality of the other two,
* but is updated when convenient. Again, it exists for the convenience of * but is updated when convenient. Again, it exists for the convenience of
* code that is already holding insert_lck but not the other locks. * code that is already holding WALInsertLock but not the other locks.
* *
* The unshared LogwrtResult may lag behind any or all of these, and again * The unshared LogwrtResult may lag behind any or all of these, and again
* is updated when convenient. * is updated when convenient.
...@@ -199,6 +191,24 @@ SPINLOCK ControlFileLockId; ...@@ -199,6 +191,24 @@ SPINLOCK ControlFileLockId;
* Note that this all works because the request and result positions can only * Note that this all works because the request and result positions can only
* advance forward, never back up, and so we can easily determine which of two * advance forward, never back up, and so we can easily determine which of two
* values is "more up to date". * values is "more up to date".
*
* info_lck is only held long enough to read/update the protected variables,
* so it's a plain spinlock. The other locks are held longer (potentially
* over I/O operations), so we use LWLocks for them. These locks are:
*
* WALInsertLock: must be held to insert a record into the WAL buffers.
*
* WALWriteLock: must be held to write WAL buffers to disk (XLogWrite or
* XLogFlush).
*
* ControlFileLock: must be held to read/update control file or create
* new log file.
*
* CheckpointLock: must be held to do a checkpoint (ensures only one
* checkpointer at a time; even though the postmaster won't launch
* parallel checkpoint processes, we need this because manual checkpoints
* could be launched simultaneously).
*
*---------- *----------
*/ */
typedef struct XLogwrtRqst typedef struct XLogwrtRqst
...@@ -240,18 +250,18 @@ typedef struct XLogCtlWrite ...@@ -240,18 +250,18 @@ typedef struct XLogCtlWrite
*/ */
typedef struct XLogCtlData typedef struct XLogCtlData
{ {
/* Protected by insert_lck: */ /* Protected by WALInsertLock: */
XLogCtlInsert Insert; XLogCtlInsert Insert;
/* Protected by info_lck: */ /* Protected by info_lck: */
XLogwrtRqst LogwrtRqst; XLogwrtRqst LogwrtRqst;
XLogwrtResult LogwrtResult; XLogwrtResult LogwrtResult;
/* Protected by logwrt_lck: */ /* Protected by WALWriteLock: */
XLogCtlWrite Write; XLogCtlWrite Write;
/* /*
* These values do not change after startup, although the pointed-to * These values do not change after startup, although the pointed-to
* pages and xlblocks values certainly do. Permission to read/write * pages and xlblocks values certainly do. Permission to read/write the
* the pages and xlblocks values depends on insert_lck and logwrt_lck. * pages and xlblocks values depends on WALInsertLock and WALWriteLock.
*/ */
char *pages; /* buffers for unwritten XLOG pages */ char *pages; /* buffers for unwritten XLOG pages */
XLogRecPtr *xlblocks; /* 1st byte ptr-s + BLCKSZ */ XLogRecPtr *xlblocks; /* 1st byte ptr-s + BLCKSZ */
...@@ -259,13 +269,10 @@ typedef struct XLogCtlData ...@@ -259,13 +269,10 @@ typedef struct XLogCtlData
uint32 XLogCacheBlck; /* highest allocated xlog buffer index */ uint32 XLogCacheBlck; /* highest allocated xlog buffer index */
StartUpID ThisStartUpID; StartUpID ThisStartUpID;
/* This value is not protected by *any* spinlock... */ /* This value is not protected by *any* lock... */
XLogRecPtr RedoRecPtr; /* see SetRedoRecPtr/GetRedoRecPtr */ XLogRecPtr RedoRecPtr; /* see SetRedoRecPtr/GetRedoRecPtr */
slock_t insert_lck; /* XLogInsert lock */
slock_t info_lck; /* locks shared LogwrtRqst/LogwrtResult */ slock_t info_lck; /* locks shared LogwrtRqst/LogwrtResult */
slock_t logwrt_lck; /* XLogWrite/XLogFlush lock */
slock_t chkp_lck; /* checkpoint lock */
} XLogCtlData; } XLogCtlData;
static XLogCtlData *XLogCtl = NULL; static XLogCtlData *XLogCtl = NULL;
...@@ -473,7 +480,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) ...@@ -473,7 +480,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
uint32 len, uint32 len,
write_len; write_len;
unsigned i; unsigned i;
bool do_logwrt; XLogwrtRqst LogwrtRqst;
bool updrqst; bool updrqst;
bool no_tran = (rmid == RM_XLOG_ID) ? true : false; bool no_tran = (rmid == RM_XLOG_ID) ? true : false;
...@@ -505,7 +512,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) ...@@ -505,7 +512,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
* *
* We may have to loop back to here if a race condition is detected * We may have to loop back to here if a race condition is detected
* below. We could prevent the race by doing all this work while * below. We could prevent the race by doing all this work while
* holding the insert spinlock, but it seems better to avoid doing CRC * holding the insert lock, but it seems better to avoid doing CRC
* calculations while holding the lock. This means we have to be * calculations while holding the lock. This means we have to be
* careful about modifying the rdata list until we know we aren't * careful about modifying the rdata list until we know we aren't
* going to loop back again. The only change we allow ourselves to * going to loop back again. The only change we allow ourselves to
...@@ -607,48 +614,33 @@ begin:; ...@@ -607,48 +614,33 @@ begin:;
START_CRIT_SECTION(); START_CRIT_SECTION();
/* wait to obtain xlog insert lock */ /* update LogwrtResult before doing cache fill check */
do_logwrt = true; SpinLockAcquire_NoHoldoff(&XLogCtl->info_lck);
LogwrtRqst = XLogCtl->LogwrtRqst;
LogwrtResult = XLogCtl->LogwrtResult;
SpinLockRelease_NoHoldoff(&XLogCtl->info_lck);
for (i = 0;;) /*
* If cache is half filled then try to acquire write lock and
* do XLogWrite. Ignore any fractional blocks in performing this check.
*/
LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % BLCKSZ;
if (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid ||
(LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff +
XLogCtl->XLogCacheByte / 2))
{ {
/* try to update LogwrtResult while waiting for insert lock */ if (LWLockConditionalAcquire(WALWriteLock, LW_EXCLUSIVE))
if (!TAS(&(XLogCtl->info_lck)))
{ {
XLogwrtRqst LogwrtRqst; LogwrtResult = XLogCtl->Write.LogwrtResult;
if (XLByteLT(LogwrtResult.Write, LogwrtRqst.Write))
LogwrtRqst = XLogCtl->LogwrtRqst; XLogWrite(LogwrtRqst);
LogwrtResult = XLogCtl->LogwrtResult; LWLockRelease(WALWriteLock);
S_UNLOCK(&(XLogCtl->info_lck));
/*
* If cache is half filled then try to acquire logwrt lock and
* do LOGWRT work, but only once per XLogInsert call. Ignore
* any fractional blocks in performing this check.
*/
LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % BLCKSZ;
if (do_logwrt &&
(LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid ||
(LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff +
XLogCtl->XLogCacheByte / 2)))
{
if (!TAS(&(XLogCtl->logwrt_lck)))
{
LogwrtResult = XLogCtl->Write.LogwrtResult;
if (XLByteLT(LogwrtResult.Write, LogwrtRqst.Write))
{
XLogWrite(LogwrtRqst);
do_logwrt = false;
}
S_UNLOCK(&(XLogCtl->logwrt_lck));
}
}
} }
if (!TAS(&(XLogCtl->insert_lck)))
break;
S_LOCK_SLEEP(&(XLogCtl->insert_lck), i++, XLOG_LOCK_TIMEOUT);
} }
/* Now wait to get insert lock */
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
/* /*
* Check to see if my RedoRecPtr is out of date. If so, may have to * Check to see if my RedoRecPtr is out of date. If so, may have to
* go back and recompute everything. This can only happen just after * go back and recompute everything. This can only happen just after
...@@ -667,12 +659,11 @@ begin:; ...@@ -667,12 +659,11 @@ begin:;
if (dtbuf_bkp[i] == false && if (dtbuf_bkp[i] == false &&
XLByteLE(dtbuf_lsn[i], RedoRecPtr)) XLByteLE(dtbuf_lsn[i], RedoRecPtr))
{ {
/* /*
* Oops, this buffer now needs to be backed up, but we * Oops, this buffer now needs to be backed up, but we
* didn't think so above. Start over. * didn't think so above. Start over.
*/ */
S_UNLOCK(&(XLogCtl->insert_lck)); LWLockRelease(WALInsertLock);
END_CRIT_SECTION(); END_CRIT_SECTION();
goto begin; goto begin;
} }
...@@ -751,9 +742,9 @@ begin:; ...@@ -751,9 +742,9 @@ begin:;
/* If first XLOG record of transaction, save it in PROC array */ /* If first XLOG record of transaction, save it in PROC array */
if (MyLastRecPtr.xrecoff == 0 && !no_tran) if (MyLastRecPtr.xrecoff == 0 && !no_tran)
{ {
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
MyProc->logRec = RecPtr; MyProc->logRec = RecPtr;
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
} }
if (XLOG_DEBUG) if (XLOG_DEBUG)
...@@ -837,17 +828,17 @@ begin:; ...@@ -837,17 +828,17 @@ begin:;
curridx = PrevBufIdx(curridx); curridx = PrevBufIdx(curridx);
WriteRqst = XLogCtl->xlblocks[curridx]; WriteRqst = XLogCtl->xlblocks[curridx];
S_UNLOCK(&(XLogCtl->insert_lck)); LWLockRelease(WALInsertLock);
if (updrqst) if (updrqst)
{ {
S_LOCK(&(XLogCtl->info_lck)); SpinLockAcquire_NoHoldoff(&XLogCtl->info_lck);
/* advance global request to include new block(s) */ /* advance global request to include new block(s) */
if (XLByteLT(XLogCtl->LogwrtRqst.Write, WriteRqst)) if (XLByteLT(XLogCtl->LogwrtRqst.Write, WriteRqst))
XLogCtl->LogwrtRqst.Write = WriteRqst; XLogCtl->LogwrtRqst.Write = WriteRqst;
/* update local result copy while I have the chance */ /* update local result copy while I have the chance */
LogwrtResult = XLogCtl->LogwrtResult; LogwrtResult = XLogCtl->LogwrtResult;
S_UNLOCK(&(XLogCtl->info_lck)); SpinLockRelease_NoHoldoff(&XLogCtl->info_lck);
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
...@@ -859,11 +850,11 @@ begin:; ...@@ -859,11 +850,11 @@ begin:;
* buffer if it still contains unwritten data. * buffer if it still contains unwritten data.
* *
* The global LogwrtRqst.Write pointer needs to be advanced to include the * The global LogwrtRqst.Write pointer needs to be advanced to include the
* just-filled page. If we can do this for free (without an extra spinlock), * just-filled page. If we can do this for free (without an extra lock),
* we do so here. Otherwise the caller must do it. We return TRUE if the * we do so here. Otherwise the caller must do it. We return TRUE if the
* request update still needs to be done, FALSE if we did it internally. * request update still needs to be done, FALSE if we did it internally.
* *
* Must be called with insert_lck held. * Must be called with WALInsertLock held.
*/ */
static bool static bool
AdvanceXLInsertBuffer(void) AdvanceXLInsertBuffer(void)
...@@ -890,45 +881,37 @@ AdvanceXLInsertBuffer(void) ...@@ -890,45 +881,37 @@ AdvanceXLInsertBuffer(void)
if (!XLByteLE(OldPageRqstPtr, LogwrtResult.Write)) if (!XLByteLE(OldPageRqstPtr, LogwrtResult.Write))
{ {
/* nope, got work to do... */ /* nope, got work to do... */
unsigned spins = 0;
XLogRecPtr FinishedPageRqstPtr; XLogRecPtr FinishedPageRqstPtr;
FinishedPageRqstPtr = XLogCtl->xlblocks[Insert->curridx]; FinishedPageRqstPtr = XLogCtl->xlblocks[Insert->curridx];
for (;;) /* Before waiting, get info_lck and update LogwrtResult */
SpinLockAcquire_NoHoldoff(&XLogCtl->info_lck);
if (XLByteLT(XLogCtl->LogwrtRqst.Write, FinishedPageRqstPtr))
XLogCtl->LogwrtRqst.Write = FinishedPageRqstPtr;
LogwrtResult = XLogCtl->LogwrtResult;
SpinLockRelease_NoHoldoff(&XLogCtl->info_lck);
update_needed = false; /* Did the shared-request update */
if (XLByteLE(OldPageRqstPtr, LogwrtResult.Write))
{
/* OK, someone wrote it already */
Insert->LogwrtResult = LogwrtResult;
}
else
{ {
/* While waiting, try to get info_lck and update LogwrtResult */ /* Must acquire write lock */
if (!TAS(&(XLogCtl->info_lck))) LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
LogwrtResult = Write->LogwrtResult;
if (XLByteLE(OldPageRqstPtr, LogwrtResult.Write))
{ {
if (XLByteLT(XLogCtl->LogwrtRqst.Write, FinishedPageRqstPtr)) /* OK, someone wrote it already */
XLogCtl->LogwrtRqst.Write = FinishedPageRqstPtr; LWLockRelease(WALWriteLock);
update_needed = false; /* Did the shared-request update */ Insert->LogwrtResult = LogwrtResult;
LogwrtResult = XLogCtl->LogwrtResult;
S_UNLOCK(&(XLogCtl->info_lck));
if (XLByteLE(OldPageRqstPtr, LogwrtResult.Write))
{
/* OK, someone wrote it already */
Insert->LogwrtResult = LogwrtResult;
break;
}
} }
else
/*
* LogwrtResult lock is busy or we know the page is still
* dirty. Try to acquire logwrt lock and write full blocks.
*/
if (!TAS(&(XLogCtl->logwrt_lck)))
{ {
LogwrtResult = Write->LogwrtResult;
if (XLByteLE(OldPageRqstPtr, LogwrtResult.Write))
{
S_UNLOCK(&(XLogCtl->logwrt_lck));
/* OK, someone wrote it already */
Insert->LogwrtResult = LogwrtResult;
break;
}
/* /*
* Have to write buffers while holding insert lock. This * Have to write buffers while holding insert lock. This
* is not good, so only write as much as we absolutely * is not good, so only write as much as we absolutely
...@@ -938,11 +921,9 @@ AdvanceXLInsertBuffer(void) ...@@ -938,11 +921,9 @@ AdvanceXLInsertBuffer(void)
WriteRqst.Flush.xlogid = 0; WriteRqst.Flush.xlogid = 0;
WriteRqst.Flush.xrecoff = 0; WriteRqst.Flush.xrecoff = 0;
XLogWrite(WriteRqst); XLogWrite(WriteRqst);
S_UNLOCK(&(XLogCtl->logwrt_lck)); LWLockRelease(WALWriteLock);
Insert->LogwrtResult = LogwrtResult; Insert->LogwrtResult = LogwrtResult;
break;
} }
S_LOCK_SLEEP(&(XLogCtl->logwrt_lck), spins++, XLOG_LOCK_TIMEOUT);
} }
} }
...@@ -986,7 +967,7 @@ AdvanceXLInsertBuffer(void) ...@@ -986,7 +967,7 @@ AdvanceXLInsertBuffer(void)
/* /*
* Write and/or fsync the log at least as far as WriteRqst indicates. * Write and/or fsync the log at least as far as WriteRqst indicates.
* *
* Must be called with logwrt_lck held. * Must be called with WALWriteLock held.
*/ */
static void static void
XLogWrite(XLogwrtRqst WriteRqst) XLogWrite(XLogwrtRqst WriteRqst)
...@@ -1047,7 +1028,7 @@ XLogWrite(XLogwrtRqst WriteRqst) ...@@ -1047,7 +1028,7 @@ XLogWrite(XLogwrtRqst WriteRqst)
"consider increasing WAL_FILES"); "consider increasing WAL_FILES");
/* update pg_control, unless someone else already did */ /* update pg_control, unless someone else already did */
SpinAcquire(ControlFileLockId); LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
if (ControlFile->logId < openLogId || if (ControlFile->logId < openLogId ||
(ControlFile->logId == openLogId && (ControlFile->logId == openLogId &&
ControlFile->logSeg < openLogSeg + 1)) ControlFile->logSeg < openLogSeg + 1))
...@@ -1073,7 +1054,7 @@ XLogWrite(XLogwrtRqst WriteRqst) ...@@ -1073,7 +1054,7 @@ XLogWrite(XLogwrtRqst WriteRqst)
kill(getppid(), SIGUSR1); kill(getppid(), SIGUSR1);
} }
} }
SpinRelease(ControlFileLockId); LWLockRelease(ControlFileLock);
} }
if (openLogFile < 0) if (openLogFile < 0)
...@@ -1167,13 +1148,13 @@ XLogWrite(XLogwrtRqst WriteRqst) ...@@ -1167,13 +1148,13 @@ XLogWrite(XLogwrtRqst WriteRqst)
* 'result' values. This is not absolutely essential, but it saves * 'result' values. This is not absolutely essential, but it saves
* some code in a couple of places. * some code in a couple of places.
*/ */
S_LOCK(&(XLogCtl->info_lck)); SpinLockAcquire_NoHoldoff(&XLogCtl->info_lck);
XLogCtl->LogwrtResult = LogwrtResult; XLogCtl->LogwrtResult = LogwrtResult;
if (XLByteLT(XLogCtl->LogwrtRqst.Write, LogwrtResult.Write)) if (XLByteLT(XLogCtl->LogwrtRqst.Write, LogwrtResult.Write))
XLogCtl->LogwrtRqst.Write = LogwrtResult.Write; XLogCtl->LogwrtRqst.Write = LogwrtResult.Write;
if (XLByteLT(XLogCtl->LogwrtRqst.Flush, LogwrtResult.Flush)) if (XLByteLT(XLogCtl->LogwrtRqst.Flush, LogwrtResult.Flush))
XLogCtl->LogwrtRqst.Flush = LogwrtResult.Flush; XLogCtl->LogwrtRqst.Flush = LogwrtResult.Flush;
S_UNLOCK(&(XLogCtl->info_lck)); SpinLockRelease_NoHoldoff(&XLogCtl->info_lck);
Write->LogwrtResult = LogwrtResult; Write->LogwrtResult = LogwrtResult;
} }
...@@ -1181,7 +1162,7 @@ XLogWrite(XLogwrtRqst WriteRqst) ...@@ -1181,7 +1162,7 @@ XLogWrite(XLogwrtRqst WriteRqst)
/* /*
* Ensure that all XLOG data through the given position is flushed to disk. * Ensure that all XLOG data through the given position is flushed to disk.
* *
* NOTE: this differs from XLogWrite mainly in that the logwrt_lck is not * NOTE: this differs from XLogWrite mainly in that the WALWriteLock is not
* already held, and we try to avoid acquiring it if possible. * already held, and we try to avoid acquiring it if possible.
*/ */
void void
...@@ -1189,7 +1170,6 @@ XLogFlush(XLogRecPtr record) ...@@ -1189,7 +1170,6 @@ XLogFlush(XLogRecPtr record)
{ {
XLogRecPtr WriteRqstPtr; XLogRecPtr WriteRqstPtr;
XLogwrtRqst WriteRqst; XLogwrtRqst WriteRqst;
unsigned spins = 0;
if (XLOG_DEBUG) if (XLOG_DEBUG)
{ {
...@@ -1224,23 +1204,18 @@ XLogFlush(XLogRecPtr record) ...@@ -1224,23 +1204,18 @@ XLogFlush(XLogRecPtr record)
/* initialize to given target; may increase below */ /* initialize to given target; may increase below */
WriteRqstPtr = record; WriteRqstPtr = record;
for (;;) /* read LogwrtResult and update local state */
SpinLockAcquire_NoHoldoff(&XLogCtl->info_lck);
if (XLByteLT(WriteRqstPtr, XLogCtl->LogwrtRqst.Write))
WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
LogwrtResult = XLogCtl->LogwrtResult;
SpinLockRelease_NoHoldoff(&XLogCtl->info_lck);
/* done already? */
if (!XLByteLE(record, LogwrtResult.Flush))
{ {
/* try to read LogwrtResult and update local state */
if (!TAS(&(XLogCtl->info_lck)))
{
if (XLByteLT(WriteRqstPtr, XLogCtl->LogwrtRqst.Write))
WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
LogwrtResult = XLogCtl->LogwrtResult;
S_UNLOCK(&(XLogCtl->info_lck));
if (XLByteLE(record, LogwrtResult.Flush))
{
/* Done already */
break;
}
}
/* if something was added to log cache then try to flush this too */ /* if something was added to log cache then try to flush this too */
if (!TAS(&(XLogCtl->insert_lck))) if (LWLockConditionalAcquire(WALInsertLock, LW_EXCLUSIVE))
{ {
XLogCtlInsert *Insert = &XLogCtl->Insert; XLogCtlInsert *Insert = &XLogCtl->Insert;
uint32 freespace = INSERT_FREESPACE(Insert); uint32 freespace = INSERT_FREESPACE(Insert);
...@@ -1252,29 +1227,22 @@ XLogFlush(XLogRecPtr record) ...@@ -1252,29 +1227,22 @@ XLogFlush(XLogRecPtr record)
WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx]; WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx];
WriteRqstPtr.xrecoff -= freespace; WriteRqstPtr.xrecoff -= freespace;
} }
S_UNLOCK(&(XLogCtl->insert_lck)); LWLockRelease(WALInsertLock);
} }
/* now try to get the logwrt lock */ /* now wait for the write lock */
if (!TAS(&(XLogCtl->logwrt_lck))) LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
LogwrtResult = XLogCtl->Write.LogwrtResult;
if (!XLByteLE(record, LogwrtResult.Flush))
{ {
LogwrtResult = XLogCtl->Write.LogwrtResult;
if (XLByteLE(record, LogwrtResult.Flush))
{
/* Done already */
S_UNLOCK(&(XLogCtl->logwrt_lck));
break;
}
WriteRqst.Write = WriteRqstPtr; WriteRqst.Write = WriteRqstPtr;
WriteRqst.Flush = record; WriteRqst.Flush = record;
XLogWrite(WriteRqst); XLogWrite(WriteRqst);
S_UNLOCK(&(XLogCtl->logwrt_lck));
if (XLByteLT(LogwrtResult.Flush, record)) if (XLByteLT(LogwrtResult.Flush, record))
elog(STOP, "XLogFlush: request %X/%X is not satisfied --- flushed only to %X/%X", elog(STOP, "XLogFlush: request %X/%X is not satisfied --- flushed only to %X/%X",
record.xlogid, record.xrecoff, record.xlogid, record.xrecoff,
LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff); LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
break;
} }
S_LOCK_SLEEP(&(XLogCtl->logwrt_lck), spins++, XLOG_LOCK_TIMEOUT); LWLockRelease(WALWriteLock);
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
...@@ -1289,9 +1257,9 @@ XLogFlush(XLogRecPtr record) ...@@ -1289,9 +1257,9 @@ XLogFlush(XLogRecPtr record)
* pre-existing file will be deleted). On return, TRUE if a pre-existing * pre-existing file will be deleted). On return, TRUE if a pre-existing
* file was used. * file was used.
* *
* use_lock: if TRUE, acquire ControlFileLock spinlock while moving file into * use_lock: if TRUE, acquire ControlFileLock while moving file into
* place. This should be TRUE except during bootstrap log creation. The * place. This should be TRUE except during bootstrap log creation. The
* caller must *not* hold the spinlock at call. * caller must *not* hold the lock at call.
* *
* Returns FD of opened file. * Returns FD of opened file.
*/ */
...@@ -1329,7 +1297,7 @@ XLogFileInit(uint32 log, uint32 seg, ...@@ -1329,7 +1297,7 @@ XLogFileInit(uint32 log, uint32 seg,
* Initialize an empty (all zeroes) segment. NOTE: it is possible * Initialize an empty (all zeroes) segment. NOTE: it is possible
* that another process is doing the same thing. If so, we will end * that another process is doing the same thing. If so, we will end
* up pre-creating an extra log segment. That seems OK, and better * up pre-creating an extra log segment. That seems OK, and better
* than holding the spinlock throughout this lengthy process. * than holding the lock throughout this lengthy process.
*/ */
snprintf(tmppath, MAXPGPATH, "%s/xlogtemp.%d", snprintf(tmppath, MAXPGPATH, "%s/xlogtemp.%d",
XLogDir, (int) getpid()); XLogDir, (int) getpid());
...@@ -1423,9 +1391,9 @@ XLogFileInit(uint32 log, uint32 seg, ...@@ -1423,9 +1391,9 @@ XLogFileInit(uint32 log, uint32 seg,
* point. Fail if no free slot is found in this range. (Irrelevant if * point. Fail if no free slot is found in this range. (Irrelevant if
* find_free is FALSE.) * find_free is FALSE.)
* *
* use_lock: if TRUE, acquire ControlFileLock spinlock while moving file into * use_lock: if TRUE, acquire ControlFileLock while moving file into
* place. This should be TRUE except during bootstrap log creation. The * place. This should be TRUE except during bootstrap log creation. The
* caller must *not* hold the spinlock at call. * caller must *not* hold the lock at call.
* *
* Returns TRUE if file installed, FALSE if not installed because of * Returns TRUE if file installed, FALSE if not installed because of
* exceeding max_advance limit. (Any other kind of failure causes elog().) * exceeding max_advance limit. (Any other kind of failure causes elog().)
...@@ -1444,7 +1412,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, ...@@ -1444,7 +1412,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
* We want to be sure that only one process does this at a time. * We want to be sure that only one process does this at a time.
*/ */
if (use_lock) if (use_lock)
SpinAcquire(ControlFileLockId); LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
if (!find_free) if (!find_free)
{ {
...@@ -1462,7 +1430,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, ...@@ -1462,7 +1430,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
{ {
/* Failed to find a free slot within specified range */ /* Failed to find a free slot within specified range */
if (use_lock) if (use_lock)
SpinRelease(ControlFileLockId); LWLockRelease(ControlFileLock);
return false; return false;
} }
NextLogSeg(log, seg); NextLogSeg(log, seg);
...@@ -1487,7 +1455,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, ...@@ -1487,7 +1455,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
#endif #endif
if (use_lock) if (use_lock)
SpinRelease(ControlFileLockId); LWLockRelease(ControlFileLock);
return true; return true;
} }
...@@ -2319,10 +2287,7 @@ XLOGShmemInit(void) ...@@ -2319,10 +2287,7 @@ XLOGShmemInit(void)
XLogCtl->XLogCacheByte = BLCKSZ * XLOGbuffers; XLogCtl->XLogCacheByte = BLCKSZ * XLOGbuffers;
XLogCtl->XLogCacheBlck = XLOGbuffers - 1; XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages); XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
S_INIT_LOCK(&(XLogCtl->insert_lck)); SpinLockInit(&XLogCtl->info_lck);
S_INIT_LOCK(&(XLogCtl->info_lck));
S_INIT_LOCK(&(XLogCtl->logwrt_lck));
S_INIT_LOCK(&(XLogCtl->chkp_lck));
/* /*
* If we are not in bootstrap mode, pg_control should already exist. * If we are not in bootstrap mode, pg_control should already exist.
...@@ -2821,12 +2786,12 @@ SetThisStartUpID(void) ...@@ -2821,12 +2786,12 @@ SetThisStartUpID(void)
* in shmem (using SetRedoRecPtr). When checkpointer completes, postmaster * in shmem (using SetRedoRecPtr). When checkpointer completes, postmaster
* calls GetRedoRecPtr to update its own copy of RedoRecPtr, so that * calls GetRedoRecPtr to update its own copy of RedoRecPtr, so that
* subsequently-spawned backends will start out with a reasonably up-to-date * subsequently-spawned backends will start out with a reasonably up-to-date
* local RedoRecPtr. Since these operations are not protected by any spinlock * local RedoRecPtr. Since these operations are not protected by any lock
* and copying an XLogRecPtr isn't atomic, it's unsafe to use either of these * and copying an XLogRecPtr isn't atomic, it's unsafe to use either of these
* routines at other times! * routines at other times!
* *
* Note: once spawned, a backend must update its local RedoRecPtr from * Note: once spawned, a backend must update its local RedoRecPtr from
* XLogCtl->Insert.RedoRecPtr while holding the insert spinlock. This is * XLogCtl->Insert.RedoRecPtr while holding the insert lock. This is
* done in XLogInsert(). * done in XLogInsert().
*/ */
void void
...@@ -2874,20 +2839,26 @@ CreateCheckPoint(bool shutdown) ...@@ -2874,20 +2839,26 @@ CreateCheckPoint(bool shutdown)
uint32 freespace; uint32 freespace;
uint32 _logId; uint32 _logId;
uint32 _logSeg; uint32 _logSeg;
unsigned spins = 0;
if (MyLastRecPtr.xrecoff != 0) if (MyLastRecPtr.xrecoff != 0)
elog(ERROR, "CreateCheckPoint: cannot be called inside transaction block"); elog(ERROR, "CreateCheckPoint: cannot be called inside transaction block");
START_CRIT_SECTION(); /*
* The CheckpointLock can be held for quite a while, which is not good
/* Grab lock, using larger than normal sleep between tries (1 sec) */ * because we won't respond to a cancel/die request while waiting for an
while (TAS(&(XLogCtl->chkp_lck))) * LWLock. (But the alternative of using a regular lock won't work for
* background checkpoint processes, which are not regular backends.)
* So, rather than use a plain LWLockAcquire, use this kluge to allow
* an interrupt to be accepted while we are waiting:
*/
while (!LWLockConditionalAcquire(CheckpointLock, LW_EXCLUSIVE))
{ {
S_LOCK_SLEEP_INTERVAL(&(XLogCtl->chkp_lck), spins++, CHECK_FOR_INTERRUPTS();
CHECKPOINT_LOCK_TIMEOUT, 1000000); sleep(1);
} }
START_CRIT_SECTION();
if (shutdown) if (shutdown)
{ {
ControlFile->state = DB_SHUTDOWNING; ControlFile->state = DB_SHUTDOWNING;
...@@ -2899,7 +2870,7 @@ CreateCheckPoint(bool shutdown) ...@@ -2899,7 +2870,7 @@ CreateCheckPoint(bool shutdown)
checkPoint.ThisStartUpID = ThisStartUpID; checkPoint.ThisStartUpID = ThisStartUpID;
checkPoint.time = time(NULL); checkPoint.time = time(NULL);
S_LOCK(&(XLogCtl->insert_lck)); LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
/* /*
* If this isn't a shutdown, and we have not inserted any XLOG records * If this isn't a shutdown, and we have not inserted any XLOG records
...@@ -2929,8 +2900,8 @@ CreateCheckPoint(bool shutdown) ...@@ -2929,8 +2900,8 @@ CreateCheckPoint(bool shutdown)
ControlFile->checkPoint.xrecoff == ControlFile->checkPoint.xrecoff ==
ControlFile->checkPointCopy.redo.xrecoff) ControlFile->checkPointCopy.redo.xrecoff)
{ {
S_UNLOCK(&(XLogCtl->insert_lck)); LWLockRelease(WALInsertLock);
S_UNLOCK(&(XLogCtl->chkp_lck)); LWLockRelease(CheckpointLock);
END_CRIT_SECTION(); END_CRIT_SECTION();
return; return;
} }
...@@ -2974,17 +2945,17 @@ CreateCheckPoint(bool shutdown) ...@@ -2974,17 +2945,17 @@ CreateCheckPoint(bool shutdown)
* Now we can release insert lock, allowing other xacts to proceed * Now we can release insert lock, allowing other xacts to proceed
* even while we are flushing disk buffers. * even while we are flushing disk buffers.
*/ */
S_UNLOCK(&(XLogCtl->insert_lck)); LWLockRelease(WALInsertLock);
SpinAcquire(XidGenLockId); LWLockAcquire(XidGenLock, LW_SHARED);
checkPoint.nextXid = ShmemVariableCache->nextXid; checkPoint.nextXid = ShmemVariableCache->nextXid;
SpinRelease(XidGenLockId); LWLockRelease(XidGenLock);
SpinAcquire(OidGenLockId); LWLockAcquire(OidGenLock, LW_SHARED);
checkPoint.nextOid = ShmemVariableCache->nextOid; checkPoint.nextOid = ShmemVariableCache->nextOid;
if (!shutdown) if (!shutdown)
checkPoint.nextOid += ShmemVariableCache->oidCount; checkPoint.nextOid += ShmemVariableCache->oidCount;
SpinRelease(OidGenLockId); LWLockRelease(OidGenLock);
/* /*
* Having constructed the checkpoint record, ensure all shmem disk * Having constructed the checkpoint record, ensure all shmem disk
...@@ -3039,7 +3010,7 @@ CreateCheckPoint(bool shutdown) ...@@ -3039,7 +3010,7 @@ CreateCheckPoint(bool shutdown)
/* /*
* Update the control file. * Update the control file.
*/ */
SpinAcquire(ControlFileLockId); LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
if (shutdown) if (shutdown)
ControlFile->state = DB_SHUTDOWNED; ControlFile->state = DB_SHUTDOWNED;
ControlFile->prevCheckPoint = ControlFile->checkPoint; ControlFile->prevCheckPoint = ControlFile->checkPoint;
...@@ -3047,7 +3018,7 @@ CreateCheckPoint(bool shutdown) ...@@ -3047,7 +3018,7 @@ CreateCheckPoint(bool shutdown)
ControlFile->checkPointCopy = checkPoint; ControlFile->checkPointCopy = checkPoint;
ControlFile->time = time(NULL); ControlFile->time = time(NULL);
UpdateControlFile(); UpdateControlFile();
SpinRelease(ControlFileLockId); LWLockRelease(ControlFileLock);
/* /*
* Delete offline log files (those no longer needed even for previous * Delete offline log files (those no longer needed even for previous
...@@ -3067,7 +3038,7 @@ CreateCheckPoint(bool shutdown) ...@@ -3067,7 +3038,7 @@ CreateCheckPoint(bool shutdown)
if (!shutdown) if (!shutdown)
PreallocXlogFiles(recptr); PreallocXlogFiles(recptr);
S_UNLOCK(&(XLogCtl->chkp_lck)); LWLockRelease(CheckpointLock);
END_CRIT_SECTION(); END_CRIT_SECTION();
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/bootstrap/bootparse.y,v 1.38 2001/08/21 16:36:00 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootparse.y,v 1.39 2001/09/29 04:02:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -45,7 +45,6 @@ ...@@ -45,7 +45,6 @@
#include "storage/itemptr.h" #include "storage/itemptr.h"
#include "storage/off.h" #include "storage/off.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "storage/spin.h"
#include "tcop/dest.h" #include "tcop/dest.h"
#include "utils/nabstime.h" #include "utils/nabstime.h"
#include "utils/rel.h" #include "utils/rel.h"
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.116 2001/09/27 16:29:12 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.117 2001/09/29 04:02:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "libpq/pqsignal.h" #include "libpq/pqsignal.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/proc.h"
#include "tcop/tcopprot.h" #include "tcop/tcopprot.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/exc.h" #include "utils/exc.h"
...@@ -360,29 +361,39 @@ BootstrapMain(int argc, char *argv[]) ...@@ -360,29 +361,39 @@ BootstrapMain(int argc, char *argv[])
* XLOG operations * XLOG operations
*/ */
SetProcessingMode(NormalProcessing); SetProcessingMode(NormalProcessing);
if (xlogop == BS_XLOG_NOP)
StartupXLOG(); switch (xlogop)
else if (xlogop == BS_XLOG_BOOTSTRAP)
{
BootStrapXLOG();
StartupXLOG();
}
else
{ {
if (xlogop == BS_XLOG_CHECKPOINT) case BS_XLOG_NOP:
{ StartupXLOG();
break;
case BS_XLOG_BOOTSTRAP:
BootStrapXLOG();
StartupXLOG();
break;
case BS_XLOG_CHECKPOINT:
if (IsUnderPostmaster)
InitDummyProcess(); /* needed to get LWLocks */
CreateDummyCaches(); CreateDummyCaches();
CreateCheckPoint(false); CreateCheckPoint(false);
SetRedoRecPtr(); SetRedoRecPtr();
} proc_exit(0); /* done */
else if (xlogop == BS_XLOG_STARTUP)
case BS_XLOG_STARTUP:
StartupXLOG(); StartupXLOG();
else if (xlogop == BS_XLOG_SHUTDOWN) proc_exit(0); /* done */
case BS_XLOG_SHUTDOWN:
ShutdownXLOG(); ShutdownXLOG();
else proc_exit(0); /* done */
default:
elog(STOP, "Unsupported XLOG op %d", xlogop); elog(STOP, "Unsupported XLOG op %d", xlogop);
proc_exit(0); proc_exit(0);
} }
SetProcessingMode(BootstrapProcessing); SetProcessingMode(BootstrapProcessing);
/* /*
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.7 2001/09/21 03:32:35 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.8 2001/09/29 04:02:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -53,7 +53,7 @@ ...@@ -53,7 +53,7 @@
* A page with less than PAGE_SPACE_THRESHOLD free space will be forgotten * A page with less than PAGE_SPACE_THRESHOLD free space will be forgotten
* immediately, and not even passed to the free space map. Removing the * immediately, and not even passed to the free space map. Removing the
* uselessly small entries early saves cycles, and in particular reduces * uselessly small entries early saves cycles, and in particular reduces
* the amount of time we spend holding the FSM spinlock when we finally call * the amount of time we spend holding the FSM lock when we finally call
* MultiRecordFreeSpace. Since the FSM will ignore pages below its own * MultiRecordFreeSpace. Since the FSM will ignore pages below its own
* runtime threshold anyway, there's no point in making this really small. * runtime threshold anyway, there's no point in making this really small.
* XXX Is it worth trying to measure average tuple size, and using that to * XXX Is it worth trying to measure average tuple size, and using that to
......
$Header: /cvsroot/pgsql/src/backend/storage/buffer/README,v 1.2 2001/08/25 18:52:42 tgl Exp $ $Header: /cvsroot/pgsql/src/backend/storage/buffer/README,v 1.3 2001/09/29 04:02:22 tgl Exp $
Notes about shared buffer access rules Notes about shared buffer access rules
-------------------------------------- --------------------------------------
...@@ -30,12 +30,10 @@ Buffer locks: there are two kinds of buffer locks, shared and exclusive, ...@@ -30,12 +30,10 @@ Buffer locks: there are two kinds of buffer locks, shared and exclusive,
which act just as you'd expect: multiple backends can hold shared locks on which act just as you'd expect: multiple backends can hold shared locks on
the same buffer, but an exclusive lock prevents anyone else from holding the same buffer, but an exclusive lock prevents anyone else from holding
either shared or exclusive lock. (These can alternatively be called READ either shared or exclusive lock. (These can alternatively be called READ
and WRITE locks.) These locks are short-term: they should not be held for and WRITE locks.) These locks are intended to be short-term: they should not
long. They are implemented as per-buffer spinlocks, so another backend be held for long. Buffer locks are acquired and released by LockBuffer().
trying to acquire a competing lock will spin as long as you hold yours! It will *not* work for a single backend to try to acquire multiple locks on
Buffer locks are acquired and released by LockBuffer(). It will *not* work the same buffer. One must pin a buffer before trying to lock it.
for a single backend to try to acquire multiple locks on the same buffer.
One must pin a buffer before trying to lock it.
Buffer access rules: Buffer access rules:
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.43 2001/07/06 21:04:25 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.44 2001/09/29 04:02:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -28,10 +28,9 @@ ...@@ -28,10 +28,9 @@
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/s_lock.h"
#include "storage/shmem.h" #include "storage/shmem.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "storage/spin.h" #include "storage/lwlock.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/hsearch.h" #include "utils/hsearch.h"
#include "utils/memutils.h" #include "utils/memutils.h"
...@@ -117,8 +116,6 @@ bool *BufferDirtiedByMe; /* T if buf has been dirtied in cur xact */ ...@@ -117,8 +116,6 @@ bool *BufferDirtiedByMe; /* T if buf has been dirtied in cur xact */
* *
*/ */
SPINLOCK BufMgrLock;
long int ReadBufferCount; long int ReadBufferCount;
long int ReadLocalBufferCount; long int ReadLocalBufferCount;
long int BufferHitCount; long int BufferHitCount;
...@@ -151,7 +148,7 @@ InitBufferPool(void) ...@@ -151,7 +148,7 @@ InitBufferPool(void)
* anyone else attached to the shmem at this point, we've got * anyone else attached to the shmem at this point, we've got
* problems. * problems.
*/ */
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
#ifdef BMTRACE #ifdef BMTRACE
CurTraceBuf = (long *) ShmemInitStruct("Buffer trace", CurTraceBuf = (long *) ShmemInitStruct("Buffer trace",
...@@ -186,8 +183,8 @@ InitBufferPool(void) ...@@ -186,8 +183,8 @@ InitBufferPool(void)
/* /*
* link the buffers into a circular, doubly-linked list to * link the buffers into a circular, doubly-linked list to
* initialize free list. Still don't know anything about * initialize free list, and initialize the buffer headers.
* replacement strategy in this file. * Still don't know anything about replacement strategy in this file.
*/ */
for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++) for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++)
{ {
...@@ -197,12 +194,15 @@ InitBufferPool(void) ...@@ -197,12 +194,15 @@ InitBufferPool(void)
buf->freePrev = i - 1; buf->freePrev = i - 1;
CLEAR_BUFFERTAG(&(buf->tag)); CLEAR_BUFFERTAG(&(buf->tag));
buf->buf_id = i;
buf->data = MAKE_OFFSET(block); buf->data = MAKE_OFFSET(block);
buf->flags = (BM_DELETED | BM_FREE | BM_VALID); buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
buf->refcount = 0; buf->refcount = 0;
buf->buf_id = i; buf->io_in_progress_lock = LWLockAssign();
S_INIT_LOCK(&(buf->io_in_progress_lock)); buf->cntx_lock = LWLockAssign();
S_INIT_LOCK(&(buf->cntx_lock)); buf->cntxDirty = false;
buf->wait_backend_id = 0;
} }
/* close the circular queue */ /* close the circular queue */
...@@ -214,7 +214,7 @@ InitBufferPool(void) ...@@ -214,7 +214,7 @@ InitBufferPool(void)
InitBufTable(); InitBufTable();
InitFreeList(!foundDescs); InitFreeList(!foundDescs);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
} }
/* /*
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.21 2001/03/22 03:59:44 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.22 2001/09/29 04:02:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,8 +23,7 @@ ...@@ -23,8 +23,7 @@
* *
* Synchronization: * Synchronization:
* *
* All routines in this file assume buffer manager spinlock is * All routines in this file assume BufMgrLock is held by their caller.
* held by their caller.
*/ */
#include "postgres.h" #include "postgres.h"
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.116 2001/07/06 21:04:25 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.117 2001/09/29 04:02:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -59,7 +59,6 @@ ...@@ -59,7 +59,6 @@
(*((XLogRecPtr*)MAKE_PTR((bufHdr)->data))) (*((XLogRecPtr*)MAKE_PTR((bufHdr)->data)))
extern SPINLOCK BufMgrLock;
extern long int ReadBufferCount; extern long int ReadBufferCount;
extern long int ReadLocalBufferCount; extern long int ReadLocalBufferCount;
extern long int BufferHitCount; extern long int BufferHitCount;
...@@ -76,7 +75,7 @@ extern long int LocalBufferFlushCount; ...@@ -76,7 +75,7 @@ extern long int LocalBufferFlushCount;
*/ */
bool SharedBufferChanged = false; bool SharedBufferChanged = false;
static void WaitIO(BufferDesc *buf, SPINLOCK spinlock); static void WaitIO(BufferDesc *buf);
static void StartBufferIO(BufferDesc *buf, bool forInput); static void StartBufferIO(BufferDesc *buf, bool forInput);
static void TerminateBufferIO(BufferDesc *buf); static void TerminateBufferIO(BufferDesc *buf);
static void ContinueBufferIO(BufferDesc *buf, bool forInput); static void ContinueBufferIO(BufferDesc *buf, bool forInput);
...@@ -130,7 +129,7 @@ ReadBuffer(Relation reln, BlockNumber blockNum) ...@@ -130,7 +129,7 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
/* /*
* ReadBufferInternal -- internal version of ReadBuffer with more options * ReadBufferInternal -- internal version of ReadBuffer with more options
* *
* bufferLockHeld: if true, caller already acquired the bufmgr spinlock. * bufferLockHeld: if true, caller already acquired the bufmgr lock.
* (This is assumed never to be true if dealing with a local buffer!) * (This is assumed never to be true if dealing with a local buffer!)
*/ */
static Buffer static Buffer
...@@ -179,7 +178,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, ...@@ -179,7 +178,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
* block is not currently in memory. * block is not currently in memory.
*/ */
if (!bufferLockHeld) if (!bufferLockHeld)
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
bufHdr = BufferAlloc(reln, blockNum, &found); bufHdr = BufferAlloc(reln, blockNum, &found);
if (found) if (found)
{ {
...@@ -188,7 +187,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, ...@@ -188,7 +187,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
} }
} }
/* At this point we do NOT hold the bufmgr spinlock. */ /* At this point we do NOT hold the bufmgr lock. */
if (!bufHdr) if (!bufHdr)
return InvalidBuffer; return InvalidBuffer;
...@@ -208,9 +207,9 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, ...@@ -208,9 +207,9 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
*/ */
if (!isLocalBuf) if (!isLocalBuf)
{ {
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
StartBufferIO(bufHdr, false); StartBufferIO(bufHdr, false);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
} }
} }
...@@ -243,7 +242,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, ...@@ -243,7 +242,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
} }
/* lock buffer manager again to update IO IN PROGRESS */ /* lock buffer manager again to update IO IN PROGRESS */
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
if (status == SM_FAIL) if (status == SM_FAIL)
{ {
...@@ -251,7 +250,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, ...@@ -251,7 +250,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
if (!BufTableDelete(bufHdr)) if (!BufTableDelete(bufHdr))
{ {
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
elog(FATAL, "BufRead: buffer table broken after IO error"); elog(FATAL, "BufRead: buffer table broken after IO error");
} }
/* remember that BufferAlloc() pinned the buffer */ /* remember that BufferAlloc() pinned the buffer */
...@@ -274,7 +273,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, ...@@ -274,7 +273,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
/* If anyone was waiting for IO to complete, wake them up now */ /* If anyone was waiting for IO to complete, wake them up now */
TerminateBufferIO(bufHdr); TerminateBufferIO(bufHdr);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
if (status == SM_FAIL) if (status == SM_FAIL)
return InvalidBuffer; return InvalidBuffer;
...@@ -322,7 +321,7 @@ BufferAlloc(Relation reln, ...@@ -322,7 +321,7 @@ BufferAlloc(Relation reln,
*foundPtr = TRUE; *foundPtr = TRUE;
if (inProgress) /* confirm end of IO */ if (inProgress) /* confirm end of IO */
{ {
WaitIO(buf, BufMgrLock); WaitIO(buf);
inProgress = (buf->flags & BM_IO_IN_PROGRESS); inProgress = (buf->flags & BM_IO_IN_PROGRESS);
} }
if (BUFFER_IS_BROKEN(buf)) if (BUFFER_IS_BROKEN(buf))
...@@ -354,7 +353,7 @@ BufferAlloc(Relation reln, ...@@ -354,7 +353,7 @@ BufferAlloc(Relation reln,
if (!(*foundPtr)) if (!(*foundPtr))
StartBufferIO(buf, true); StartBufferIO(buf, true);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
return buf; return buf;
} }
...@@ -364,7 +363,7 @@ BufferAlloc(Relation reln, ...@@ -364,7 +363,7 @@ BufferAlloc(Relation reln,
/* /*
* Didn't find it in the buffer pool. We'll have to initialize a new * Didn't find it in the buffer pool. We'll have to initialize a new
* buffer. First, grab one from the free list. If it's dirty, flush * buffer. First, grab one from the free list. If it's dirty, flush
* it to disk. Remember to unlock BufMgr spinlock while doing the IOs. * it to disk. Remember to unlock BufMgrLock while doing the IOs.
*/ */
inProgress = FALSE; inProgress = FALSE;
for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;) for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;)
...@@ -502,7 +501,7 @@ BufferAlloc(Relation reln, ...@@ -502,7 +501,7 @@ BufferAlloc(Relation reln,
*foundPtr = TRUE; *foundPtr = TRUE;
if (inProgress) if (inProgress)
{ {
WaitIO(buf2, BufMgrLock); WaitIO(buf2);
inProgress = (buf2->flags & BM_IO_IN_PROGRESS); inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
} }
if (BUFFER_IS_BROKEN(buf2)) if (BUFFER_IS_BROKEN(buf2))
...@@ -510,7 +509,7 @@ BufferAlloc(Relation reln, ...@@ -510,7 +509,7 @@ BufferAlloc(Relation reln,
if (!(*foundPtr)) if (!(*foundPtr))
StartBufferIO(buf2, true); StartBufferIO(buf2, true);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
return buf2; return buf2;
} }
...@@ -534,7 +533,7 @@ BufferAlloc(Relation reln, ...@@ -534,7 +533,7 @@ BufferAlloc(Relation reln,
if (!BufTableDelete(buf)) if (!BufTableDelete(buf))
{ {
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
elog(FATAL, "buffer wasn't in the buffer table"); elog(FATAL, "buffer wasn't in the buffer table");
} }
...@@ -542,7 +541,7 @@ BufferAlloc(Relation reln, ...@@ -542,7 +541,7 @@ BufferAlloc(Relation reln,
if (!BufTableInsert(buf)) if (!BufTableInsert(buf))
{ {
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
elog(FATAL, "Buffer in lookup table twice"); elog(FATAL, "Buffer in lookup table twice");
} }
...@@ -561,7 +560,7 @@ BufferAlloc(Relation reln, ...@@ -561,7 +560,7 @@ BufferAlloc(Relation reln,
_bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND); _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND);
#endif /* BMTRACE */ #endif /* BMTRACE */
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
return buf; return buf;
} }
...@@ -595,13 +594,13 @@ WriteBuffer(Buffer buffer) ...@@ -595,13 +594,13 @@ WriteBuffer(Buffer buffer)
SharedBufferChanged = true; SharedBufferChanged = true;
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
Assert(bufHdr->refcount > 0); Assert(bufHdr->refcount > 0);
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
UnpinBuffer(bufHdr); UnpinBuffer(bufHdr);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
return TRUE; return TRUE;
} }
...@@ -625,12 +624,12 @@ WriteNoReleaseBuffer(Buffer buffer) ...@@ -625,12 +624,12 @@ WriteNoReleaseBuffer(Buffer buffer)
SharedBufferChanged = true; SharedBufferChanged = true;
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
Assert(bufHdr->refcount > 0); Assert(bufHdr->refcount > 0);
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
return STATUS_OK; return STATUS_OK;
} }
...@@ -639,10 +638,10 @@ WriteNoReleaseBuffer(Buffer buffer) ...@@ -639,10 +638,10 @@ WriteNoReleaseBuffer(Buffer buffer)
#undef ReleaseAndReadBuffer #undef ReleaseAndReadBuffer
/* /*
* ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer() * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer()
* to save a spinlock release/acquire. * to save a lock release/acquire.
* *
* Also, if the passed buffer is valid and already contains the desired block * Also, if the passed buffer is valid and already contains the desired block
* number, we simply return it without ever acquiring the spinlock at all. * number, we simply return it without ever acquiring the lock at all.
* Since the passed buffer must be pinned, it's OK to examine its block * Since the passed buffer must be pinned, it's OK to examine its block
* number without getting the lock first. * number without getting the lock first.
* *
...@@ -652,7 +651,7 @@ WriteNoReleaseBuffer(Buffer buffer) ...@@ -652,7 +651,7 @@ WriteNoReleaseBuffer(Buffer buffer)
* *
* Also note: while it will work to call this routine with blockNum == P_NEW, * Also note: while it will work to call this routine with blockNum == P_NEW,
* it's best to avoid doing so, since that would result in calling * it's best to avoid doing so, since that would result in calling
* smgrnblocks() while holding the bufmgr spinlock, hence some loss of * smgrnblocks() while holding the bufmgr lock, hence some loss of
* concurrency. * concurrency.
*/ */
Buffer Buffer
...@@ -684,7 +683,7 @@ ReleaseAndReadBuffer(Buffer buffer, ...@@ -684,7 +683,7 @@ ReleaseAndReadBuffer(Buffer buffer,
PrivateRefCount[buffer - 1]--; PrivateRefCount[buffer - 1]--;
else else
{ {
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
UnpinBuffer(bufHdr); UnpinBuffer(bufHdr);
return ReadBufferInternal(relation, blockNum, true); return ReadBufferInternal(relation, blockNum, true);
} }
...@@ -712,12 +711,11 @@ BufferSync() ...@@ -712,12 +711,11 @@ BufferSync()
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
{ {
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
SpinAcquire(BufMgrLock);
if (!(bufHdr->flags & BM_VALID)) if (!(bufHdr->flags & BM_VALID))
{ {
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
continue; continue;
} }
...@@ -731,7 +729,7 @@ BufferSync() ...@@ -731,7 +729,7 @@ BufferSync()
*/ */
if (!(bufHdr->flags & BM_DIRTY) && !(bufHdr->cntxDirty)) if (!(bufHdr->flags & BM_DIRTY) && !(bufHdr->cntxDirty))
{ {
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
continue; continue;
} }
...@@ -741,11 +739,11 @@ BufferSync() ...@@ -741,11 +739,11 @@ BufferSync()
*/ */
if (bufHdr->flags & BM_IO_IN_PROGRESS) if (bufHdr->flags & BM_IO_IN_PROGRESS)
{ {
WaitIO(bufHdr, BufMgrLock); WaitIO(bufHdr);
if (!(bufHdr->flags & BM_VALID) || if (!(bufHdr->flags & BM_VALID) ||
(!(bufHdr->flags & BM_DIRTY) && !(bufHdr->cntxDirty))) (!(bufHdr->flags & BM_DIRTY) && !(bufHdr->cntxDirty)))
{ {
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
continue; continue;
} }
} }
...@@ -761,7 +759,7 @@ BufferSync() ...@@ -761,7 +759,7 @@ BufferSync()
buffer = BufferDescriptorGetBuffer(bufHdr); buffer = BufferDescriptorGetBuffer(bufHdr);
rnode = bufHdr->tag.rnode; rnode = bufHdr->tag.rnode;
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
/* /*
* Try to find relation for buffer * Try to find relation for buffer
...@@ -784,10 +782,10 @@ BufferSync() ...@@ -784,10 +782,10 @@ BufferSync()
* should not be able to write it while we were busy with locking * should not be able to write it while we were busy with locking
* and log flushing because of we setted IO flag. * and log flushing because of we setted IO flag.
*/ */
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
Assert(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty); Assert(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty);
bufHdr->flags &= ~BM_JUST_DIRTIED; bufHdr->flags &= ~BM_JUST_DIRTIED;
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
if (reln == (Relation) NULL) if (reln == (Relation) NULL)
{ {
...@@ -822,7 +820,7 @@ BufferSync() ...@@ -822,7 +820,7 @@ BufferSync()
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
BufferFlushCount++; BufferFlushCount++;
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */ bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */
TerminateBufferIO(bufHdr); /* Sync IO finished */ TerminateBufferIO(bufHdr); /* Sync IO finished */
...@@ -834,7 +832,7 @@ BufferSync() ...@@ -834,7 +832,7 @@ BufferSync()
if (!(bufHdr->flags & BM_JUST_DIRTIED)) if (!(bufHdr->flags & BM_JUST_DIRTIED))
bufHdr->flags &= ~BM_DIRTY; bufHdr->flags &= ~BM_DIRTY;
UnpinBuffer(bufHdr); UnpinBuffer(bufHdr);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
/* drop refcnt obtained by RelationNodeCacheGetRelation */ /* drop refcnt obtained by RelationNodeCacheGetRelation */
if (reln != (Relation) NULL) if (reln != (Relation) NULL)
...@@ -846,24 +844,25 @@ BufferSync() ...@@ -846,24 +844,25 @@ BufferSync()
/* /*
* WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' is cleared. * WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' is cleared.
* *
* Should be entered with buffer manager spinlock held; releases it before * Should be entered with buffer manager lock held; releases it before
* waiting and re-acquires it afterwards. * waiting and re-acquires it afterwards.
*/ */
static void static void
WaitIO(BufferDesc *buf, SPINLOCK spinlock) WaitIO(BufferDesc *buf)
{ {
/* /*
* Changed to wait until there's no IO - Inoue 01/13/2000 * Changed to wait until there's no IO - Inoue 01/13/2000
*
* Note this is *necessary* because an error abort in the process
* doing I/O could release the io_in_progress_lock prematurely.
* See AbortBufferIO.
*/ */
while ((buf->flags & BM_IO_IN_PROGRESS) != 0) while ((buf->flags & BM_IO_IN_PROGRESS) != 0)
{ {
SpinRelease(spinlock); LWLockRelease(BufMgrLock);
HOLD_INTERRUPTS(); /* don't want to die() holding the lock... */ LWLockAcquire(buf->io_in_progress_lock, LW_SHARED);
S_LOCK(&(buf->io_in_progress_lock)); LWLockRelease(buf->io_in_progress_lock);
S_UNLOCK(&(buf->io_in_progress_lock)); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
RESUME_INTERRUPTS();
SpinAcquire(spinlock);
} }
} }
...@@ -932,9 +931,9 @@ ResetBufferPool(bool isCommit) ...@@ -932,9 +931,9 @@ ResetBufferPool(bool isCommit)
BufferDesc *buf = &BufferDescriptors[i]; BufferDesc *buf = &BufferDescriptors[i];
PrivateRefCount[i] = 1; /* make sure we release shared pin */ PrivateRefCount[i] = 1; /* make sure we release shared pin */
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
UnpinBuffer(buf); UnpinBuffer(buf);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
Assert(PrivateRefCount[i] == 0); Assert(PrivateRefCount[i] == 0);
} }
} }
...@@ -1039,7 +1038,7 @@ BufferReplace(BufferDesc *bufHdr) ...@@ -1039,7 +1038,7 @@ BufferReplace(BufferDesc *bufHdr)
/* To check if block content changed while flushing. - vadim 01/17/97 */ /* To check if block content changed while flushing. - vadim 01/17/97 */
bufHdr->flags &= ~BM_JUST_DIRTIED; bufHdr->flags &= ~BM_JUST_DIRTIED;
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
/* /*
* No need to lock buffer context - no one should be able to end * No need to lock buffer context - no one should be able to end
...@@ -1067,7 +1066,7 @@ BufferReplace(BufferDesc *bufHdr) ...@@ -1067,7 +1066,7 @@ BufferReplace(BufferDesc *bufHdr)
if (reln != (Relation) NULL) if (reln != (Relation) NULL)
RelationDecrementReferenceCount(reln); RelationDecrementReferenceCount(reln);
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
if (status == SM_FAIL) if (status == SM_FAIL)
return FALSE; return FALSE;
...@@ -1140,7 +1139,8 @@ DropRelationBuffers(Relation rel) ...@@ -1140,7 +1139,8 @@ DropRelationBuffers(Relation rel)
return; return;
} }
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
for (i = 1; i <= NBuffers; i++) for (i = 1; i <= NBuffers; i++)
{ {
bufHdr = &BufferDescriptors[i - 1]; bufHdr = &BufferDescriptors[i - 1];
...@@ -1155,7 +1155,7 @@ recheck: ...@@ -1155,7 +1155,7 @@ recheck:
*/ */
if (bufHdr->flags & BM_IO_IN_PROGRESS) if (bufHdr->flags & BM_IO_IN_PROGRESS)
{ {
WaitIO(bufHdr, BufMgrLock); WaitIO(bufHdr);
/* /*
* By now, the buffer very possibly belongs to some other * By now, the buffer very possibly belongs to some other
...@@ -1189,7 +1189,7 @@ recheck: ...@@ -1189,7 +1189,7 @@ recheck:
} }
} }
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
} }
/* --------------------------------------------------------------------- /* ---------------------------------------------------------------------
...@@ -1223,7 +1223,8 @@ DropRelFileNodeBuffers(RelFileNode rnode) ...@@ -1223,7 +1223,8 @@ DropRelFileNodeBuffers(RelFileNode rnode)
} }
} }
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
for (i = 1; i <= NBuffers; i++) for (i = 1; i <= NBuffers; i++)
{ {
bufHdr = &BufferDescriptors[i - 1]; bufHdr = &BufferDescriptors[i - 1];
...@@ -1238,7 +1239,7 @@ recheck: ...@@ -1238,7 +1239,7 @@ recheck:
*/ */
if (bufHdr->flags & BM_IO_IN_PROGRESS) if (bufHdr->flags & BM_IO_IN_PROGRESS)
{ {
WaitIO(bufHdr, BufMgrLock); WaitIO(bufHdr);
/* /*
* By now, the buffer very possibly belongs to some other * By now, the buffer very possibly belongs to some other
...@@ -1272,7 +1273,7 @@ recheck: ...@@ -1272,7 +1273,7 @@ recheck:
} }
} }
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
} }
/* --------------------------------------------------------------------- /* ---------------------------------------------------------------------
...@@ -1292,7 +1293,8 @@ DropBuffers(Oid dbid) ...@@ -1292,7 +1293,8 @@ DropBuffers(Oid dbid)
int i; int i;
BufferDesc *bufHdr; BufferDesc *bufHdr;
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
for (i = 1; i <= NBuffers; i++) for (i = 1; i <= NBuffers; i++)
{ {
bufHdr = &BufferDescriptors[i - 1]; bufHdr = &BufferDescriptors[i - 1];
...@@ -1313,7 +1315,7 @@ recheck: ...@@ -1313,7 +1315,7 @@ recheck:
*/ */
if (bufHdr->flags & BM_IO_IN_PROGRESS) if (bufHdr->flags & BM_IO_IN_PROGRESS)
{ {
WaitIO(bufHdr, BufMgrLock); WaitIO(bufHdr);
/* /*
* By now, the buffer very possibly belongs to some other * By now, the buffer very possibly belongs to some other
...@@ -1337,7 +1339,8 @@ recheck: ...@@ -1337,7 +1339,8 @@ recheck:
BufTableDelete(bufHdr); BufTableDelete(bufHdr);
} }
} }
SpinRelease(BufMgrLock);
LWLockRelease(BufMgrLock);
} }
/* ----------------------------------------------------------------- /* -----------------------------------------------------------------
...@@ -1355,7 +1358,7 @@ PrintBufferDescs() ...@@ -1355,7 +1358,7 @@ PrintBufferDescs()
if (IsUnderPostmaster) if (IsUnderPostmaster)
{ {
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
for (i = 0; i < NBuffers; ++i, ++buf) for (i = 0; i < NBuffers; ++i, ++buf)
{ {
elog(DEBUG, "[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u, \ elog(DEBUG, "[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u, \
...@@ -1365,7 +1368,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)", ...@@ -1365,7 +1368,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)",
buf->tag.blockNum, buf->flags, buf->tag.blockNum, buf->flags,
buf->refcount, PrivateRefCount[i]); buf->refcount, PrivateRefCount[i]);
} }
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
} }
else else
{ {
...@@ -1386,7 +1389,7 @@ PrintPinnedBufs() ...@@ -1386,7 +1389,7 @@ PrintPinnedBufs()
int i; int i;
BufferDesc *buf = BufferDescriptors; BufferDesc *buf = BufferDescriptors;
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
for (i = 0; i < NBuffers; ++i, ++buf) for (i = 0; i < NBuffers; ++i, ++buf)
{ {
if (PrivateRefCount[i] > 0) if (PrivateRefCount[i] > 0)
...@@ -1397,7 +1400,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)", ...@@ -1397,7 +1400,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)",
buf->tag.blockNum, buf->flags, buf->tag.blockNum, buf->flags,
buf->refcount, PrivateRefCount[i]); buf->refcount, PrivateRefCount[i]);
} }
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
} }
/* /*
...@@ -1514,7 +1517,8 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) ...@@ -1514,7 +1517,8 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
return 0; return 0;
} }
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
for (i = 0; i < NBuffers; i++) for (i = 0; i < NBuffers; i++)
{ {
bufHdr = &BufferDescriptors[i]; bufHdr = &BufferDescriptors[i];
...@@ -1524,8 +1528,8 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) ...@@ -1524,8 +1528,8 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
{ {
PinBuffer(bufHdr); PinBuffer(bufHdr);
if (bufHdr->flags & BM_IO_IN_PROGRESS) if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr, BufMgrLock); WaitIO(bufHdr);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
/* /*
* Force XLOG flush for buffer' LSN * Force XLOG flush for buffer' LSN
...@@ -1537,16 +1541,16 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) ...@@ -1537,16 +1541,16 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
* Now it's safe to write buffer to disk * Now it's safe to write buffer to disk
*/ */
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
if (bufHdr->flags & BM_IO_IN_PROGRESS) if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr, BufMgrLock); WaitIO(bufHdr);
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{ {
bufHdr->flags &= ~BM_JUST_DIRTIED; bufHdr->flags &= ~BM_JUST_DIRTIED;
StartBufferIO(bufHdr, false); /* output IO start */ StartBufferIO(bufHdr, false); /* output IO start */
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
status = smgrwrite(DEFAULT_SMGR, rel, status = smgrwrite(DEFAULT_SMGR, rel,
bufHdr->tag.blockNum, bufHdr->tag.blockNum,
...@@ -1560,7 +1564,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) ...@@ -1560,7 +1564,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
BufferFlushCount++; BufferFlushCount++;
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
bufHdr->flags &= ~BM_IO_IN_PROGRESS; bufHdr->flags &= ~BM_IO_IN_PROGRESS;
TerminateBufferIO(bufHdr); TerminateBufferIO(bufHdr);
Assert(!(bufHdr->flags & BM_JUST_DIRTIED)); Assert(!(bufHdr->flags & BM_JUST_DIRTIED));
...@@ -1578,7 +1582,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) ...@@ -1578,7 +1582,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
} }
if (!(bufHdr->flags & BM_FREE)) if (!(bufHdr->flags & BM_FREE))
{ {
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is referenced (private %ld, global %d)", elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is referenced (private %ld, global %d)",
RelationGetRelationName(rel), firstDelBlock, RelationGetRelationName(rel), firstDelBlock,
bufHdr->tag.blockNum, bufHdr->tag.blockNum,
...@@ -1589,7 +1593,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) ...@@ -1589,7 +1593,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
BufTableDelete(bufHdr); BufTableDelete(bufHdr);
} }
} }
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
return 0; return 0;
} }
...@@ -1621,9 +1625,9 @@ ReleaseBuffer(Buffer buffer) ...@@ -1621,9 +1625,9 @@ ReleaseBuffer(Buffer buffer)
PrivateRefCount[buffer - 1]--; PrivateRefCount[buffer - 1]--;
else else
{ {
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
UnpinBuffer(bufHdr); UnpinBuffer(bufHdr);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
} }
return STATUS_OK; return STATUS_OK;
...@@ -1919,13 +1923,18 @@ SetBufferCommitInfoNeedsSave(Buffer buffer) ...@@ -1919,13 +1923,18 @@ SetBufferCommitInfoNeedsSave(Buffer buffer)
if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) != if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
(BM_DIRTY | BM_JUST_DIRTIED)) (BM_DIRTY | BM_JUST_DIRTIED))
{ {
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
Assert(bufHdr->refcount > 0); Assert(bufHdr->refcount > 0);
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
} }
} }
/*
* Release buffer context locks for shared buffers.
*
* Used to clean up after errors.
*/
void void
UnlockBuffers(void) UnlockBuffers(void)
{ {
...@@ -1942,36 +1951,15 @@ UnlockBuffers(void) ...@@ -1942,36 +1951,15 @@ UnlockBuffers(void)
Assert(BufferIsValid(i + 1)); Assert(BufferIsValid(i + 1));
buf = &(BufferDescriptors[i]); buf = &(BufferDescriptors[i]);
HOLD_INTERRUPTS(); /* don't want to die() holding the lock... */ HOLD_INTERRUPTS(); /* don't want to die() partway through... */
S_LOCK(&(buf->cntx_lock));
if (buflocks & BL_R_LOCK)
{
Assert(buf->r_locks > 0);
(buf->r_locks)--;
}
if (buflocks & BL_RI_LOCK)
{
/*
* Someone else could remove our RI lock when acquiring W
* lock. This is possible if we came here from elog(ERROR)
* from IpcSemaphore{Lock|Unlock}(WaitCLSemId). And so we
* don't do Assert(buf->ri_lock) here.
*/
buf->ri_lock = false;
}
if (buflocks & BL_W_LOCK)
{
Assert(buf->w_lock);
buf->w_lock = false;
}
S_UNLOCK(&(buf->cntx_lock)); /*
* The buffer's cntx_lock has already been released by lwlock.c.
*/
if (buflocks & BL_PIN_COUNT_LOCK) if (buflocks & BL_PIN_COUNT_LOCK)
{ {
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
/* /*
* Don't complain if flag bit not set; it could have been reset * Don't complain if flag bit not set; it could have been reset
* but we got a cancel/die interrupt before getting the signal. * but we got a cancel/die interrupt before getting the signal.
...@@ -1979,7 +1967,7 @@ UnlockBuffers(void) ...@@ -1979,7 +1967,7 @@ UnlockBuffers(void)
if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 && if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 &&
buf->wait_backend_id == MyBackendId) buf->wait_backend_id == MyBackendId)
buf->flags &= ~BM_PIN_COUNT_WAITER; buf->flags &= ~BM_PIN_COUNT_WAITER;
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
ProcCancelWaitForSignal(); ProcCancelWaitForSignal();
} }
...@@ -1989,94 +1977,31 @@ UnlockBuffers(void) ...@@ -1989,94 +1977,31 @@ UnlockBuffers(void)
} }
} }
/* Max time to wait to acquire a buffer read or write lock */ /*
#define BUFFER_LOCK_TIMEOUT (10*60*1000000) /* 10 minutes */ * Acquire or release the cntx_lock for the buffer.
*/
void void
LockBuffer(Buffer buffer, int mode) LockBuffer(Buffer buffer, int mode)
{ {
BufferDesc *buf; BufferDesc *buf;
bits8 *buflock;
Assert(BufferIsValid(buffer)); Assert(BufferIsValid(buffer));
if (BufferIsLocal(buffer)) if (BufferIsLocal(buffer))
return; return;
buf = &(BufferDescriptors[buffer - 1]); buf = &(BufferDescriptors[buffer - 1]);
buflock = &(BufferLocks[buffer - 1]);
HOLD_INTERRUPTS(); /* don't want to die() holding the lock... */
S_LOCK(&(buf->cntx_lock));
if (mode == BUFFER_LOCK_UNLOCK) if (mode == BUFFER_LOCK_UNLOCK)
{ {
if (*buflock & BL_R_LOCK) LWLockRelease(buf->cntx_lock);
{
Assert(buf->r_locks > 0);
Assert(!(buf->w_lock));
Assert(!(*buflock & (BL_W_LOCK | BL_RI_LOCK)));
(buf->r_locks)--;
*buflock &= ~BL_R_LOCK;
}
else if (*buflock & BL_W_LOCK)
{
Assert(buf->w_lock);
Assert(buf->r_locks == 0);
Assert(!(*buflock & (BL_R_LOCK | BL_RI_LOCK)));
buf->w_lock = false;
*buflock &= ~BL_W_LOCK;
}
else
{
S_UNLOCK(&(buf->cntx_lock));
RESUME_INTERRUPTS();
elog(ERROR, "UNLockBuffer: buffer %d is not locked", buffer);
}
} }
else if (mode == BUFFER_LOCK_SHARE) else if (mode == BUFFER_LOCK_SHARE)
{ {
unsigned i = 0; LWLockAcquire(buf->cntx_lock, LW_SHARED);
Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK)));
while (buf->ri_lock || buf->w_lock)
{
S_UNLOCK(&(buf->cntx_lock));
RESUME_INTERRUPTS();
S_LOCK_SLEEP(&(buf->cntx_lock), i++, BUFFER_LOCK_TIMEOUT);
HOLD_INTERRUPTS();
S_LOCK(&(buf->cntx_lock));
}
(buf->r_locks)++;
*buflock |= BL_R_LOCK;
} }
else if (mode == BUFFER_LOCK_EXCLUSIVE) else if (mode == BUFFER_LOCK_EXCLUSIVE)
{ {
unsigned i = 0; LWLockAcquire(buf->cntx_lock, LW_EXCLUSIVE);
Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK)));
while (buf->r_locks > 0 || buf->w_lock)
{
if (buf->r_locks > 3 || (*buflock & BL_RI_LOCK))
{
/*
* Our RI lock might be removed by concurrent W lock
* acquiring (see what we do with RI locks below when our
* own W acquiring succeeded) and so we set RI lock again
* if we already did this.
*/
*buflock |= BL_RI_LOCK;
buf->ri_lock = true;
}
S_UNLOCK(&(buf->cntx_lock));
RESUME_INTERRUPTS();
S_LOCK_SLEEP(&(buf->cntx_lock), i++, BUFFER_LOCK_TIMEOUT);
HOLD_INTERRUPTS();
S_LOCK(&(buf->cntx_lock));
}
buf->w_lock = true;
*buflock |= BL_W_LOCK;
/* /*
* This is not the best place to set cntxDirty flag (eg indices do * This is not the best place to set cntxDirty flag (eg indices do
...@@ -2085,27 +2010,11 @@ LockBuffer(Buffer buffer, int mode) ...@@ -2085,27 +2010,11 @@ LockBuffer(Buffer buffer, int mode)
* changes with XLogInsert() - see comments in BufferSync(). * changes with XLogInsert() - see comments in BufferSync().
*/ */
buf->cntxDirty = true; buf->cntxDirty = true;
if (*buflock & BL_RI_LOCK)
{
/*
* It's possible to remove RI locks acquired by another W
* lockers here, but they'll take care about it.
*/
buf->ri_lock = false;
*buflock &= ~BL_RI_LOCK;
}
} }
else else
{ {
S_UNLOCK(&(buf->cntx_lock));
RESUME_INTERRUPTS();
elog(ERROR, "LockBuffer: unknown lock mode %d", mode); elog(ERROR, "LockBuffer: unknown lock mode %d", mode);
} }
S_UNLOCK(&(buf->cntx_lock));
RESUME_INTERRUPTS();
} }
/* /*
...@@ -2152,25 +2061,25 @@ LockBufferForCleanup(Buffer buffer) ...@@ -2152,25 +2061,25 @@ LockBufferForCleanup(Buffer buffer)
{ {
/* Try to acquire lock */ /* Try to acquire lock */
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
SpinAcquire(BufMgrLock); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
Assert(bufHdr->refcount > 0); Assert(bufHdr->refcount > 0);
if (bufHdr->refcount == 1) if (bufHdr->refcount == 1)
{ {
/* Successfully acquired exclusive lock with pincount 1 */ /* Successfully acquired exclusive lock with pincount 1 */
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
return; return;
} }
/* Failed, so mark myself as waiting for pincount 1 */ /* Failed, so mark myself as waiting for pincount 1 */
if (bufHdr->flags & BM_PIN_COUNT_WAITER) if (bufHdr->flags & BM_PIN_COUNT_WAITER)
{ {
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
elog(ERROR, "Multiple backends attempting to wait for pincount 1"); elog(ERROR, "Multiple backends attempting to wait for pincount 1");
} }
bufHdr->wait_backend_id = MyBackendId; bufHdr->wait_backend_id = MyBackendId;
bufHdr->flags |= BM_PIN_COUNT_WAITER; bufHdr->flags |= BM_PIN_COUNT_WAITER;
*buflock |= BL_PIN_COUNT_LOCK; *buflock |= BL_PIN_COUNT_LOCK;
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
/* Wait to be signaled by UnpinBuffer() */ /* Wait to be signaled by UnpinBuffer() */
ProcWaitForSignal(); ProcWaitForSignal();
...@@ -2183,8 +2092,7 @@ LockBufferForCleanup(Buffer buffer) ...@@ -2183,8 +2092,7 @@ LockBufferForCleanup(Buffer buffer)
* Functions for IO error handling * Functions for IO error handling
* *
* Note : We assume that nested buffer IO never occur. * Note : We assume that nested buffer IO never occur.
* i.e at most one io_in_progress spinlock is held * i.e at most one io_in_progress lock is held per proc.
* per proc.
*/ */
static BufferDesc *InProgressBuf = (BufferDesc *) NULL; static BufferDesc *InProgressBuf = (BufferDesc *) NULL;
static bool IsForInput; static bool IsForInput;
...@@ -2207,18 +2115,7 @@ StartBufferIO(BufferDesc *buf, bool forInput) ...@@ -2207,18 +2115,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
Assert(!(buf->flags & BM_IO_IN_PROGRESS)); Assert(!(buf->flags & BM_IO_IN_PROGRESS));
buf->flags |= BM_IO_IN_PROGRESS; buf->flags |= BM_IO_IN_PROGRESS;
/* LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
* There used to be
*
* Assert(S_LOCK_FREE(&(buf->io_in_progress_lock)));
*
* here, but that's wrong because of the way WaitIO works: someone else
* waiting for the I/O to complete will succeed in grabbing the lock
* for a few instructions, and if we context-swap back to here the
* Assert could fail. Tiny window for failure, but I've seen it
* happen -- tgl
*/
S_LOCK(&(buf->io_in_progress_lock));
InProgressBuf = buf; InProgressBuf = buf;
IsForInput = forInput; IsForInput = forInput;
...@@ -2238,7 +2135,7 @@ static void ...@@ -2238,7 +2135,7 @@ static void
TerminateBufferIO(BufferDesc *buf) TerminateBufferIO(BufferDesc *buf)
{ {
Assert(buf == InProgressBuf); Assert(buf == InProgressBuf);
S_UNLOCK(&(buf->io_in_progress_lock)); LWLockRelease(buf->io_in_progress_lock);
InProgressBuf = (BufferDesc *) 0; InProgressBuf = (BufferDesc *) 0;
} }
...@@ -2271,7 +2168,6 @@ InitBufferIO(void) ...@@ -2271,7 +2168,6 @@ InitBufferIO(void)
/* /*
* Clean up any active buffer I/O after an error. * Clean up any active buffer I/O after an error.
* This function is called from ProcReleaseSpins().
* BufMgrLock isn't held when this function is called. * BufMgrLock isn't held when this function is called.
* *
* If I/O was in progress, we always set BM_IO_ERROR. * If I/O was in progress, we always set BM_IO_ERROR.
...@@ -2283,7 +2179,16 @@ AbortBufferIO(void) ...@@ -2283,7 +2179,16 @@ AbortBufferIO(void)
if (buf) if (buf)
{ {
SpinAcquire(BufMgrLock); /*
* Since LWLockReleaseAll has already been called,
* we're not holding the buffer's io_in_progress_lock.
* We have to re-acquire it so that we can use TerminateBufferIO.
* Anyone who's executing WaitIO on the buffer will be in a busy spin
* until we succeed in doing this.
*/
LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
Assert(buf->flags & BM_IO_IN_PROGRESS); Assert(buf->flags & BM_IO_IN_PROGRESS);
if (IsForInput) if (IsForInput)
Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty)); Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty));
...@@ -2302,7 +2207,7 @@ AbortBufferIO(void) ...@@ -2302,7 +2207,7 @@ AbortBufferIO(void)
buf->flags |= BM_IO_ERROR; buf->flags |= BM_IO_ERROR;
buf->flags &= ~BM_IO_IN_PROGRESS; buf->flags &= ~BM_IO_IN_PROGRESS;
TerminateBufferIO(buf); TerminateBufferIO(buf);
SpinRelease(BufMgrLock); LWLockRelease(BufMgrLock);
} }
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.24 2001/07/06 21:04:26 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.25 2001/09/29 04:02:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "storage/buf_internals.h" #include "storage/buf_internals.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/proc.h" #include "storage/proc.h"
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.4 2001/07/19 21:25:37 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.5 2001/09/29 04:02:23 tgl Exp $
* *
* *
* NOTES: * NOTES:
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/itemid.h" #include "storage/itemid.h"
#include "storage/lwlock.h"
#include "storage/shmem.h" #include "storage/shmem.h"
...@@ -122,9 +123,6 @@ struct FSMChunk ...@@ -122,9 +123,6 @@ struct FSMChunk
}; };
SPINLOCK FreeSpaceLock; /* in Shmem or created in
* CreateSpinlocks() */
int MaxFSMRelations; /* these are set by guc.c */ int MaxFSMRelations; /* these are set by guc.c */
int MaxFSMPages; int MaxFSMPages;
...@@ -256,7 +254,7 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) ...@@ -256,7 +254,7 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
FSMRelation *fsmrel; FSMRelation *fsmrel;
BlockNumber freepage; BlockNumber freepage;
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/* /*
* We always add a rel to the hashtable when it is inquired about. * We always add a rel to the hashtable when it is inquired about.
*/ */
...@@ -279,7 +277,7 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) ...@@ -279,7 +277,7 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
fsmrel->threshold = (Size) cur_avg; fsmrel->threshold = (Size) cur_avg;
} }
freepage = find_free_space(fsmrel, spaceNeeded); freepage = find_free_space(fsmrel, spaceNeeded);
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
return freepage; return freepage;
} }
...@@ -299,7 +297,7 @@ RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail) ...@@ -299,7 +297,7 @@ RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail)
/* Sanity check: ensure spaceAvail will fit into ItemLength */ /* Sanity check: ensure spaceAvail will fit into ItemLength */
AssertArg(spaceAvail < BLCKSZ); AssertArg(spaceAvail < BLCKSZ);
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/* /*
* We choose not to add rels to the hashtable unless they've been * We choose not to add rels to the hashtable unless they've been
* inquired about with GetPageWithFreeSpace. Also, a Record operation * inquired about with GetPageWithFreeSpace. Also, a Record operation
...@@ -308,11 +306,11 @@ RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail) ...@@ -308,11 +306,11 @@ RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail)
fsmrel = lookup_fsm_rel(rel); fsmrel = lookup_fsm_rel(rel);
if (fsmrel) if (fsmrel)
fsm_record_free_space(fsmrel, page, spaceAvail); fsm_record_free_space(fsmrel, page, spaceAvail);
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
} }
/* /*
* RecordAndGetPageWithFreeSpace - combo form to save one spinlock and * RecordAndGetPageWithFreeSpace - combo form to save one lock and
* hash table lookup cycle. * hash table lookup cycle.
*/ */
BlockNumber BlockNumber
...@@ -327,7 +325,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel, ...@@ -327,7 +325,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel,
/* Sanity check: ensure spaceAvail will fit into ItemLength */ /* Sanity check: ensure spaceAvail will fit into ItemLength */
AssertArg(oldSpaceAvail < BLCKSZ); AssertArg(oldSpaceAvail < BLCKSZ);
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/* /*
* We always add a rel to the hashtable when it is inquired about. * We always add a rel to the hashtable when it is inquired about.
*/ */
...@@ -351,7 +349,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel, ...@@ -351,7 +349,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel,
fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail); fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
/* Do the Get */ /* Do the Get */
freepage = find_free_space(fsmrel, spaceNeeded); freepage = find_free_space(fsmrel, spaceNeeded);
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
return freepage; return freepage;
} }
...@@ -378,7 +376,7 @@ MultiRecordFreeSpace(RelFileNode *rel, ...@@ -378,7 +376,7 @@ MultiRecordFreeSpace(RelFileNode *rel,
FSMRelation *fsmrel; FSMRelation *fsmrel;
int i; int i;
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
fsmrel = lookup_fsm_rel(rel); fsmrel = lookup_fsm_rel(rel);
if (fsmrel) if (fsmrel)
{ {
...@@ -437,7 +435,7 @@ MultiRecordFreeSpace(RelFileNode *rel, ...@@ -437,7 +435,7 @@ MultiRecordFreeSpace(RelFileNode *rel,
fsm_record_free_space(fsmrel, page, avail); fsm_record_free_space(fsmrel, page, avail);
} }
} }
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
} }
/* /*
...@@ -452,11 +450,11 @@ FreeSpaceMapForgetRel(RelFileNode *rel) ...@@ -452,11 +450,11 @@ FreeSpaceMapForgetRel(RelFileNode *rel)
{ {
FSMRelation *fsmrel; FSMRelation *fsmrel;
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
fsmrel = lookup_fsm_rel(rel); fsmrel = lookup_fsm_rel(rel);
if (fsmrel) if (fsmrel)
delete_fsm_rel(fsmrel); delete_fsm_rel(fsmrel);
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
} }
/* /*
...@@ -474,14 +472,14 @@ FreeSpaceMapForgetDatabase(Oid dbid) ...@@ -474,14 +472,14 @@ FreeSpaceMapForgetDatabase(Oid dbid)
FSMRelation *fsmrel, FSMRelation *fsmrel,
*nextrel; *nextrel;
SpinAcquire(FreeSpaceLock); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = nextrel) for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = nextrel)
{ {
nextrel = fsmrel->nextRel; /* in case we delete it */ nextrel = fsmrel->nextRel; /* in case we delete it */
if (fsmrel->key.tblNode == dbid) if (fsmrel->key.tblNode == dbid)
delete_fsm_rel(fsmrel); delete_fsm_rel(fsmrel);
} }
SpinRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.68 2001/09/04 00:22:34 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.69 2001/09/29 04:02:23 tgl Exp $
* *
* NOTES * NOTES
* *
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#include <unistd.h> #include <unistd.h>
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/s_lock.h"
/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */ /* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
#ifdef HAVE_SYS_SEM_H #ifdef HAVE_SYS_SEM_H
#include <sys/sem.h> #include <sys/sem.h>
...@@ -306,7 +305,7 @@ InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, ...@@ -306,7 +305,7 @@ InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
if (errno == ENOSPC) if (errno == ENOSPC)
fprintf(stderr, fprintf(stderr,
"\nThis error does *not* mean that you have run out of disk space.\n\n" "\nThis error does *not* mean that you have run out of disk space.\n\n"
"It occurs either because system limit for the maximum number of\n" "It occurs because either the system limit for the maximum number of\n"
"semaphore sets (SEMMNI), or the system wide maximum number of\n" "semaphore sets (SEMMNI), or the system wide maximum number of\n"
"semaphores (SEMMNS), would be exceeded. You need to raise the\n" "semaphores (SEMMNS), would be exceeded. You need to raise the\n"
"respective kernel parameter. Look into the PostgreSQL documentation\n" "respective kernel parameter. Look into the PostgreSQL documentation\n"
...@@ -416,8 +415,8 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, bool interruptOK) ...@@ -416,8 +415,8 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, bool interruptOK)
* record acquiring the lock. (This is currently true for lockmanager * record acquiring the lock. (This is currently true for lockmanager
* locks, since the process that granted us the lock did all the * locks, since the process that granted us the lock did all the
* necessary state updates. It's not true for SysV semaphores used to * necessary state updates. It's not true for SysV semaphores used to
* emulate spinlocks --- but our performance on such platforms is so * implement LW locks or emulate spinlocks --- but the wait time for
* horrible anyway that I'm not going to worry too much about it.) * such locks should not be very long, anyway.)
*/ */
do do
{ {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.42 2001/08/25 18:52:42 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.43 2001/09/29 04:02:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/lwlock.h"
#include "storage/proc.h" #include "storage/proc.h"
#include "storage/sinval.h" #include "storage/sinval.h"
#include "storage/spin.h" #include "storage/spin.h"
...@@ -53,7 +54,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends) ...@@ -53,7 +54,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends)
size += LockShmemSize(maxBackends); size += LockShmemSize(maxBackends);
size += XLOGShmemSize(); size += XLOGShmemSize();
size += CLOGShmemSize(); size += CLOGShmemSize();
size += SLockShmemSize(); size += LWLockShmemSize();
size += SInvalShmemSize(maxBackends); size += SInvalShmemSize(maxBackends);
size += FreeSpaceShmemSize(); size += FreeSpaceShmemSize();
#ifdef STABLE_MEMORY_STORAGE #ifdef STABLE_MEMORY_STORAGE
...@@ -74,13 +75,24 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends) ...@@ -74,13 +75,24 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends)
/* /*
* First initialize spinlocks --- needed by InitShmemAllocation() * First initialize spinlocks --- needed by InitShmemAllocation()
*/ */
CreateSpinlocks(seghdr); CreateSpinlocks();
/* /*
* Set up shmem.c hashtable * Set up shared memory allocation mechanism
*/ */
InitShmemAllocation(seghdr); InitShmemAllocation(seghdr);
/*
* Now initialize LWLocks, which do shared memory allocation and
* are needed for InitShmemIndex.
*/
CreateLWLocks();
/*
* Set up shmem.c index hashtable
*/
InitShmemIndex();
/* /*
* Set up xlog, clog, and buffers * Set up xlog, clog, and buffers
*/ */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.58 2001/09/07 00:27:29 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.59 2001/09/29 04:02:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -61,8 +61,10 @@ ...@@ -61,8 +61,10 @@
#include "postgres.h" #include "postgres.h"
#include "access/transam.h" #include "access/transam.h"
#include "storage/spin.h"
#include "utils/tqual.h" #include "utils/tqual.h"
/* shared memory global variables */ /* shared memory global variables */
static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */ static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
...@@ -71,9 +73,7 @@ SHMEM_OFFSET ShmemBase; /* start address of shared memory */ ...@@ -71,9 +73,7 @@ SHMEM_OFFSET ShmemBase; /* start address of shared memory */
static SHMEM_OFFSET ShmemEnd; /* end+1 address of shared memory */ static SHMEM_OFFSET ShmemEnd; /* end+1 address of shared memory */
SPINLOCK ShmemLock; /* lock for shared memory allocation */ static slock_t *ShmemLock; /* spinlock for shared memory allocation */
SPINLOCK ShmemIndexLock; /* lock for shmem index access */
static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */ static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
...@@ -81,63 +81,33 @@ static bool ShmemBootstrap = false; /* bootstrapping shmem index? */ ...@@ -81,63 +81,33 @@ static bool ShmemBootstrap = false; /* bootstrapping shmem index? */
/* /*
* InitShmemAllocation() --- set up shared-memory allocation and index table. * InitShmemAllocation() --- set up shared-memory allocation.
*
* Note: the argument should be declared "PGShmemHeader *seghdr",
* but we use void to avoid having to include ipc.h in shmem.h.
*/ */
void void
InitShmemAllocation(PGShmemHeader *seghdr) InitShmemAllocation(void *seghdr)
{ {
HASHCTL info; PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr;
int hash_flags;
ShmemIndexEnt *result,
item;
bool found;
/* Set up basic pointers to shared memory */ /* Set up basic pointers to shared memory */
ShmemSegHdr = seghdr; ShmemSegHdr = shmhdr;
ShmemBase = (SHMEM_OFFSET) seghdr; ShmemBase = (SHMEM_OFFSET) shmhdr;
ShmemEnd = ShmemBase + seghdr->totalsize; ShmemEnd = ShmemBase + shmhdr->totalsize;
/*
* Since ShmemInitHash calls ShmemInitStruct, which expects the
* ShmemIndex hashtable to exist already, we have a bit of a
* circularity problem in initializing the ShmemIndex itself. We set
* ShmemBootstrap to tell ShmemInitStruct to fake it.
*/
ShmemIndex = (HTAB *) NULL;
ShmemBootstrap = true;
/* create the shared memory shmem index */
info.keysize = SHMEM_INDEX_KEYSIZE;
info.datasize = SHMEM_INDEX_DATASIZE;
hash_flags = HASH_ELEM;
/* This will acquire the shmem index lock, but not release it. */
ShmemIndex = ShmemInitHash("ShmemIndex",
SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
&info, hash_flags);
if (!ShmemIndex)
elog(FATAL, "InitShmemAllocation: couldn't initialize Shmem Index");
/* /*
* Now, create an entry in the hashtable for the index itself. * Initialize the spinlock used by ShmemAlloc. We have to do the
* space allocation the hard way, since ShmemAlloc can't be called yet.
*/ */
MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE); ShmemLock = (slock_t *) (((char *) shmhdr) + shmhdr->freeoffset);
strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE); shmhdr->freeoffset += MAXALIGN(sizeof(slock_t));
Assert(shmhdr->freeoffset <= shmhdr->totalsize);
result = (ShmemIndexEnt *) SpinLockInit(ShmemLock);
hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found);
if (!result)
elog(FATAL, "InitShmemAllocation: corrupted shmem index");
Assert(ShmemBootstrap && !found); /* ShmemIndex can't be set up yet (need LWLocks first) */
ShmemIndex = (HTAB *) NULL;
result->location = MAKE_OFFSET(ShmemIndex->hctl);
result->size = SHMEM_INDEX_SIZE;
ShmemBootstrap = false;
/* now release the lock acquired in ShmemInitStruct */
SpinRelease(ShmemIndexLock);
/* /*
* Initialize ShmemVariableCache for transaction manager. * Initialize ShmemVariableCache for transaction manager.
...@@ -167,9 +137,9 @@ ShmemAlloc(Size size) ...@@ -167,9 +137,9 @@ ShmemAlloc(Size size)
*/ */
size = MAXALIGN(size); size = MAXALIGN(size);
Assert(ShmemSegHdr); Assert(ShmemSegHdr != NULL);
SpinAcquire(ShmemLock); SpinLockAcquire(ShmemLock);
newFree = ShmemSegHdr->freeoffset + size; newFree = ShmemSegHdr->freeoffset + size;
if (newFree <= ShmemSegHdr->totalsize) if (newFree <= ShmemSegHdr->totalsize)
...@@ -180,7 +150,7 @@ ShmemAlloc(Size size) ...@@ -180,7 +150,7 @@ ShmemAlloc(Size size)
else else
newSpace = NULL; newSpace = NULL;
SpinRelease(ShmemLock); SpinLockRelease(ShmemLock);
if (!newSpace) if (!newSpace)
elog(NOTICE, "ShmemAlloc: out of memory"); elog(NOTICE, "ShmemAlloc: out of memory");
...@@ -199,6 +169,60 @@ ShmemIsValid(unsigned long addr) ...@@ -199,6 +169,60 @@ ShmemIsValid(unsigned long addr)
return (addr < ShmemEnd) && (addr >= ShmemBase); return (addr < ShmemEnd) && (addr >= ShmemBase);
} }
/*
* InitShmemIndex() --- set up shmem index table.
*/
void
InitShmemIndex(void)
{
HASHCTL info;
int hash_flags;
ShmemIndexEnt *result,
item;
bool found;
/*
* Since ShmemInitHash calls ShmemInitStruct, which expects the
* ShmemIndex hashtable to exist already, we have a bit of a
* circularity problem in initializing the ShmemIndex itself. We set
* ShmemBootstrap to tell ShmemInitStruct to fake it.
*/
ShmemBootstrap = true;
/* create the shared memory shmem index */
info.keysize = SHMEM_INDEX_KEYSIZE;
info.datasize = SHMEM_INDEX_DATASIZE;
hash_flags = HASH_ELEM;
/* This will acquire the shmem index lock, but not release it. */
ShmemIndex = ShmemInitHash("ShmemIndex",
SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
&info, hash_flags);
if (!ShmemIndex)
elog(FATAL, "InitShmemIndex: couldn't initialize Shmem Index");
/*
* Now, create an entry in the hashtable for the index itself.
*/
MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE);
result = (ShmemIndexEnt *)
hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found);
if (!result)
elog(FATAL, "InitShmemIndex: corrupted shmem index");
Assert(ShmemBootstrap && !found);
result->location = MAKE_OFFSET(ShmemIndex->hctl);
result->size = SHMEM_INDEX_SIZE;
ShmemBootstrap = false;
/* now release the lock acquired in ShmemInitStruct */
LWLockRelease(ShmemIndexLock);
}
/* /*
* ShmemInitHash -- Create/Attach to and initialize * ShmemInitHash -- Create/Attach to and initialize
* shared memory hash table. * shared memory hash table.
...@@ -207,8 +231,7 @@ ShmemIsValid(unsigned long addr) ...@@ -207,8 +231,7 @@ ShmemIsValid(unsigned long addr)
* *
* assume caller is doing some kind of synchronization * assume caller is doing some kind of synchronization
* so that two people dont try to create/initialize the * so that two people dont try to create/initialize the
* table at once. Use SpinAlloc() to create a spinlock * table at once.
* for the structure before creating the structure itself.
*/ */
HTAB * HTAB *
ShmemInitHash(char *name, /* table string name for shmem index */ ShmemInitHash(char *name, /* table string name for shmem index */
...@@ -283,7 +306,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -283,7 +306,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
strncpy(item.key, name, SHMEM_INDEX_KEYSIZE); strncpy(item.key, name, SHMEM_INDEX_KEYSIZE);
item.location = BAD_LOCATION; item.location = BAD_LOCATION;
SpinAcquire(ShmemIndexLock); LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
if (!ShmemIndex) if (!ShmemIndex)
{ {
...@@ -306,7 +329,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -306,7 +329,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
if (!result) if (!result)
{ {
SpinRelease(ShmemIndexLock); LWLockRelease(ShmemIndexLock);
elog(ERROR, "ShmemInitStruct: Shmem Index corrupted"); elog(ERROR, "ShmemInitStruct: Shmem Index corrupted");
return NULL; return NULL;
} }
...@@ -320,7 +343,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -320,7 +343,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
*/ */
if (result->size != size) if (result->size != size)
{ {
SpinRelease(ShmemIndexLock); LWLockRelease(ShmemIndexLock);
elog(NOTICE, "ShmemInitStruct: ShmemIndex entry size is wrong"); elog(NOTICE, "ShmemInitStruct: ShmemIndex entry size is wrong");
/* let caller print its message too */ /* let caller print its message too */
...@@ -337,7 +360,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -337,7 +360,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
/* out of memory */ /* out of memory */
Assert(ShmemIndex); Assert(ShmemIndex);
hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, foundPtr); hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, foundPtr);
SpinRelease(ShmemIndexLock); LWLockRelease(ShmemIndexLock);
*foundPtr = FALSE; *foundPtr = FALSE;
elog(NOTICE, "ShmemInitStruct: cannot allocate '%s'", elog(NOTICE, "ShmemInitStruct: cannot allocate '%s'",
...@@ -349,6 +372,6 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -349,6 +372,6 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
} }
Assert(ShmemIsValid((unsigned long) structPtr)); Assert(ShmemIsValid((unsigned long) structPtr));
SpinRelease(ShmemIndexLock); LWLockRelease(ShmemIndexLock);
return structPtr; return structPtr;
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.40 2001/08/26 16:56:00 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.41 2001/09/29 04:02:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,8 +23,6 @@ ...@@ -23,8 +23,6 @@
#include "miscadmin.h" #include "miscadmin.h"
SPINLOCK SInvalLock = (SPINLOCK) NULL;
/****************************************************************************/ /****************************************************************************/
/* CreateSharedInvalidationState() Initialize SI buffer */ /* CreateSharedInvalidationState() Initialize SI buffer */
/* */ /* */
...@@ -33,7 +31,7 @@ SPINLOCK SInvalLock = (SPINLOCK) NULL; ...@@ -33,7 +31,7 @@ SPINLOCK SInvalLock = (SPINLOCK) NULL;
void void
CreateSharedInvalidationState(int maxBackends) CreateSharedInvalidationState(int maxBackends)
{ {
/* SInvalLock must be initialized already, during spinlock init */ /* SInvalLock must be initialized already, during LWLock init */
SIBufferInit(maxBackends); SIBufferInit(maxBackends);
} }
...@@ -46,9 +44,9 @@ InitBackendSharedInvalidationState(void) ...@@ -46,9 +44,9 @@ InitBackendSharedInvalidationState(void)
{ {
int flag; int flag;
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
flag = SIBackendInit(shmInvalBuffer); flag = SIBackendInit(shmInvalBuffer);
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
if (flag < 0) /* unexpected problem */ if (flag < 0) /* unexpected problem */
elog(FATAL, "Backend cache invalidation initialization failed"); elog(FATAL, "Backend cache invalidation initialization failed");
if (flag == 0) /* expected problem: MaxBackends exceeded */ if (flag == 0) /* expected problem: MaxBackends exceeded */
...@@ -64,9 +62,9 @@ SendSharedInvalidMessage(SharedInvalidationMessage *msg) ...@@ -64,9 +62,9 @@ SendSharedInvalidMessage(SharedInvalidationMessage *msg)
{ {
bool insertOK; bool insertOK;
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
insertOK = SIInsertDataEntry(shmInvalBuffer, msg); insertOK = SIInsertDataEntry(shmInvalBuffer, msg);
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
if (!insertOK) if (!insertOK)
elog(DEBUG, "SendSharedInvalidMessage: SI buffer overflow"); elog(DEBUG, "SendSharedInvalidMessage: SI buffer overflow");
} }
...@@ -86,9 +84,25 @@ ReceiveSharedInvalidMessages( ...@@ -86,9 +84,25 @@ ReceiveSharedInvalidMessages(
for (;;) for (;;)
{ {
SpinAcquire(SInvalLock); /*
* We can run SIGetDataEntry in parallel with other backends running
* SIGetDataEntry for themselves, since each instance will modify
* only fields of its own backend's ProcState, and no instance will
* look at fields of other backends' ProcStates. We express this
* by grabbing SInvalLock in shared mode. Note that this is not
* exactly the normal (read-only) interpretation of a shared lock!
* Look closely at the interactions before allowing SInvalLock to
* be grabbed in shared mode for any other reason!
*
* The routines later in this file that use shared mode are okay
* with this, because they aren't looking at the ProcState fields
* associated with SI message transfer; they only use the ProcState
* array as an easy way to find all the PROC structures.
*/
LWLockAcquire(SInvalLock, LW_SHARED);
getResult = SIGetDataEntry(shmInvalBuffer, MyBackendId, &data); getResult = SIGetDataEntry(shmInvalBuffer, MyBackendId, &data);
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
if (getResult == 0) if (getResult == 0)
break; /* nothing more to do */ break; /* nothing more to do */
if (getResult < 0) if (getResult < 0)
...@@ -108,9 +122,9 @@ ReceiveSharedInvalidMessages( ...@@ -108,9 +122,9 @@ ReceiveSharedInvalidMessages(
/* If we got any messages, try to release dead messages */ /* If we got any messages, try to release dead messages */
if (gotMessage) if (gotMessage)
{ {
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
SIDelExpiredDataEntries(shmInvalBuffer); SIDelExpiredDataEntries(shmInvalBuffer);
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
} }
} }
...@@ -149,7 +163,7 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself) ...@@ -149,7 +163,7 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself)
ProcState *stateP = segP->procState; ProcState *stateP = segP->procState;
int index; int index;
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_SHARED);
for (index = 0; index < segP->lastBackend; index++) for (index = 0; index < segP->lastBackend; index++)
{ {
...@@ -170,7 +184,7 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself) ...@@ -170,7 +184,7 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself)
} }
} }
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
return result; return result;
} }
...@@ -186,7 +200,7 @@ TransactionIdIsInProgress(TransactionId xid) ...@@ -186,7 +200,7 @@ TransactionIdIsInProgress(TransactionId xid)
ProcState *stateP = segP->procState; ProcState *stateP = segP->procState;
int index; int index;
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_SHARED);
for (index = 0; index < segP->lastBackend; index++) for (index = 0; index < segP->lastBackend; index++)
{ {
...@@ -206,7 +220,7 @@ TransactionIdIsInProgress(TransactionId xid) ...@@ -206,7 +220,7 @@ TransactionIdIsInProgress(TransactionId xid)
} }
} }
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
return result; return result;
} }
...@@ -237,7 +251,7 @@ GetOldestXmin(bool allDbs) ...@@ -237,7 +251,7 @@ GetOldestXmin(bool allDbs)
result = GetCurrentTransactionId(); result = GetCurrentTransactionId();
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_SHARED);
for (index = 0; index < segP->lastBackend; index++) for (index = 0; index < segP->lastBackend; index++)
{ {
...@@ -265,7 +279,7 @@ GetOldestXmin(bool allDbs) ...@@ -265,7 +279,7 @@ GetOldestXmin(bool allDbs)
} }
} }
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
return result; return result;
} }
...@@ -298,7 +312,7 @@ GetSnapshotData(bool serializable) ...@@ -298,7 +312,7 @@ GetSnapshotData(bool serializable)
snapshot->xmin = GetCurrentTransactionId(); snapshot->xmin = GetCurrentTransactionId();
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_SHARED);
/* /*
* There can be no more than lastBackend active transactions, so this * There can be no more than lastBackend active transactions, so this
...@@ -307,15 +321,12 @@ GetSnapshotData(bool serializable) ...@@ -307,15 +321,12 @@ GetSnapshotData(bool serializable)
snapshot->xip = (TransactionId *) snapshot->xip = (TransactionId *)
malloc(segP->lastBackend * sizeof(TransactionId)); malloc(segP->lastBackend * sizeof(TransactionId));
if (snapshot->xip == NULL) if (snapshot->xip == NULL)
{
SpinRelease(SInvalLock);
elog(ERROR, "Memory exhausted in GetSnapshotData"); elog(ERROR, "Memory exhausted in GetSnapshotData");
}
/*-------------------- /*--------------------
* Unfortunately, we have to call ReadNewTransactionId() after acquiring * Unfortunately, we have to call ReadNewTransactionId() after acquiring
* SInvalLock above. It's not good because ReadNewTransactionId() does * SInvalLock above. It's not good because ReadNewTransactionId() does
* SpinAcquire(XidGenLockId), but *necessary*. We need to be sure that * LWLockAcquire(XidGenLock), but *necessary*. We need to be sure that
* no transactions exit the set of currently-running transactions * no transactions exit the set of currently-running transactions
* between the time we fetch xmax and the time we finish building our * between the time we fetch xmax and the time we finish building our
* snapshot. Otherwise we could have a situation like this: * snapshot. Otherwise we could have a situation like this:
...@@ -373,7 +384,7 @@ GetSnapshotData(bool serializable) ...@@ -373,7 +384,7 @@ GetSnapshotData(bool serializable)
if (serializable) if (serializable)
MyProc->xmin = snapshot->xmin; MyProc->xmin = snapshot->xmin;
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
/* Serializable snapshot must be computed before any other... */ /* Serializable snapshot must be computed before any other... */
Assert(TransactionIdIsValid(MyProc->xmin)); Assert(TransactionIdIsValid(MyProc->xmin));
...@@ -439,7 +450,7 @@ GetUndoRecPtr(void) ...@@ -439,7 +450,7 @@ GetUndoRecPtr(void)
XLogRecPtr tempr; XLogRecPtr tempr;
int index; int index;
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_SHARED);
for (index = 0; index < segP->lastBackend; index++) for (index = 0; index < segP->lastBackend; index++)
{ {
...@@ -458,7 +469,7 @@ GetUndoRecPtr(void) ...@@ -458,7 +469,7 @@ GetUndoRecPtr(void)
} }
} }
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
return (urec); return (urec);
} }
...@@ -470,7 +481,7 @@ GetUndoRecPtr(void) ...@@ -470,7 +481,7 @@ GetUndoRecPtr(void)
* knows that the backend isn't going to go away, so we do not bother with * knows that the backend isn't going to go away, so we do not bother with
* locking. * locking.
*/ */
struct proc * struct PROC *
BackendIdGetProc(BackendId procId) BackendIdGetProc(BackendId procId)
{ {
SISeg *segP = shmInvalBuffer; SISeg *segP = shmInvalBuffer;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.40 2001/06/19 19:42:15 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.41 2001/09/29 04:02:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -83,7 +83,7 @@ SIBufferInit(int maxBackends) ...@@ -83,7 +83,7 @@ SIBufferInit(int maxBackends)
* <0 Some other failure (not currently used) * <0 Some other failure (not currently used)
* *
* NB: this routine, and all following ones, must be executed with the * NB: this routine, and all following ones, must be executed with the
* SInvalLock spinlock held, since there may be multiple backends trying * SInvalLock lock held, since there may be multiple backends trying
* to access the buffer. * to access the buffer.
*/ */
int int
...@@ -152,7 +152,7 @@ CleanupInvalidationState(int status, Datum arg) ...@@ -152,7 +152,7 @@ CleanupInvalidationState(int status, Datum arg)
Assert(PointerIsValid(segP)); Assert(PointerIsValid(segP));
SpinAcquire(SInvalLock); LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
/* Mark myself inactive */ /* Mark myself inactive */
segP->procState[MyBackendId - 1].nextMsgNum = -1; segP->procState[MyBackendId - 1].nextMsgNum = -1;
...@@ -167,7 +167,7 @@ CleanupInvalidationState(int status, Datum arg) ...@@ -167,7 +167,7 @@ CleanupInvalidationState(int status, Datum arg)
} }
segP->lastBackend = i; segP->lastBackend = i;
SpinRelease(SInvalLock); LWLockRelease(SInvalLock);
} }
/* /*
...@@ -267,6 +267,10 @@ SISetProcStateInvalid(SISeg *segP) ...@@ -267,6 +267,10 @@ SISetProcStateInvalid(SISeg *segP)
* 1: next SI message has been extracted into *data * 1: next SI message has been extracted into *data
* (there may be more messages available after this one!) * (there may be more messages available after this one!)
* -1: SI reset message extracted * -1: SI reset message extracted
*
* NB: this can run in parallel with other instances of SIGetDataEntry
* executing on behalf of other backends. See comments in sinval.c in
* ReceiveSharedInvalidMessages().
*/ */
int int
SIGetDataEntry(SISeg *segP, int backendId, SIGetDataEntry(SISeg *segP, int backendId,
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
# Makefile for storage/lmgr # Makefile for storage/lmgr
# #
# IDENTIFICATION # IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Makefile,v 1.16 2001/09/27 19:10:02 tgl Exp $ # $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Makefile,v 1.17 2001/09/29 04:02:24 tgl Exp $
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
...@@ -12,7 +12,7 @@ subdir = src/backend/storage/lmgr ...@@ -12,7 +12,7 @@ subdir = src/backend/storage/lmgr
top_builddir = ../../../.. top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
OBJS = lmgr.o lock.o proc.o deadlock.o spin.o s_lock.o OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o
all: SUBSYS.o all: SUBSYS.o
......
$Header: /cvsroot/pgsql/src/backend/storage/lmgr/README,v 1.8 2001/01/26 18:23:12 tgl Exp $ $Header: /cvsroot/pgsql/src/backend/storage/lmgr/README,v 1.9 2001/09/29 04:02:24 tgl Exp $
LOCKING OVERVIEW
Postgres uses three types of interprocess locks:
* Spinlocks. These are intended for *very* short-term locks. If a lock
is to be held more than a few dozen instructions, or across any sort of
kernel call (or even a call to a nontrivial subroutine), don't use a spinlock.
Spinlocks are primarily used as infrastructure for lightweight locks.
They are implemented using a hardware atomic-test-and-set instruction,
if available. Waiting processes busy-loop until they can get the lock.
There is no provision for deadlock detection, automatic release on error,
or any other nicety. There is a timeout if the lock cannot be gotten after
a minute or so (which is approximately forever in comparison to the intended
lock hold time, so this is certainly an error condition).
* Lightweight locks (LWLocks). These locks are typically used to interlock
access to datastructures in shared memory. LWLocks support both exclusive
and shared lock modes (for read/write and read-only access to a shared object).
There is no provision for deadlock detection, but the LWLock manager will
automatically release held LWLocks during elog() recovery, so it is safe to
raise an error while holding LWLocks. Obtaining or releasing an LWLock is
quite fast (a few dozen instructions) when there is no contention for the
lock. When a process has to wait for an LWLock, it blocks on a SysV semaphore
so as to not consume CPU time. Waiting processes will be granted the lock
in arrival order. There is no timeout.
* Regular locks (a/k/a heavyweight locks). The regular lock manager supports
a variety of lock modes with table-driven semantics, and it has full deadlock
detection and automatic release at transaction end. Regular locks should be
used for all user-driven lock requests.
Acquisition of either a spinlock or a lightweight lock causes query cancel
and die() interrupts to be held off until all such locks are released.
No such restriction exists for regular locks, however. Also note that we
can accept query cancel and die() interrupts while waiting for a regular
lock, but we will not accept them while waiting for spinlocks or LW locks.
It is therefore not a good idea to use LW locks when the wait time might
exceed a few seconds.
The rest of this README file discusses the regular lock manager in detail.
LOCK DATA STRUCTURES
There are two fundamental lock structures: the per-lockable-object LOCK There are two fundamental lock structures: the per-lockable-object LOCK
struct, and the per-lock-holder HOLDER struct. A LOCK object exists struct, and the per-lock-holder HOLDER struct. A LOCK object exists
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.3 2001/03/22 03:59:46 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.4 2001/09/29 04:02:24 tgl Exp $
* *
* Interface: * Interface:
* *
...@@ -172,8 +172,8 @@ InitDeadLockChecking(void) ...@@ -172,8 +172,8 @@ InitDeadLockChecking(void)
* *
* We must have already locked the master lock before being called. * We must have already locked the master lock before being called.
* NOTE: although the lockctl structure appears to allow each lock * NOTE: although the lockctl structure appears to allow each lock
* table to have a different spinlock, all locks that can block had * table to have a different LWLock, all locks that can block had
* better use the same spinlock, else this code will not be adequately * better use the same LWLock, else this code will not be adequately
* interlocked! * interlocked!
*/ */
bool bool
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.95 2001/09/27 16:29:12 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.96 2001/09/29 04:02:24 tgl Exp $
* *
* NOTES * NOTES
* Outside modules can create a lock table and acquire/release * Outside modules can create a lock table and acquire/release
...@@ -78,8 +78,8 @@ static char *lock_mode_names[] = ...@@ -78,8 +78,8 @@ static char *lock_mode_names[] =
* TRACE_LOCK_TABLE -- trace locks on this table (oid) unconditionally * TRACE_LOCK_TABLE -- trace locks on this table (oid) unconditionally
* DEBUG_DEADLOCKS -- currently dumps locks at untimely occasions ;) * DEBUG_DEADLOCKS -- currently dumps locks at untimely occasions ;)
* *
* Furthermore, but in storage/ipc/spin.c: * Furthermore, but in storage/lmgr/lwlock.c:
* TRACE_SPINLOCKS -- trace spinlocks (pretty useless) * TRACE_LWLOCKS -- trace lightweight locks (pretty useless)
* *
* Define LOCK_DEBUG at compile time to get all these enabled. * Define LOCK_DEBUG at compile time to get all these enabled.
* -------- * --------
...@@ -151,10 +151,6 @@ HOLDER_PRINT(const char *where, const HOLDER *holderP) ...@@ -151,10 +151,6 @@ HOLDER_PRINT(const char *where, const HOLDER *holderP)
#endif /* not LOCK_DEBUG */ #endif /* not LOCK_DEBUG */
SPINLOCK LockMgrLock; /* in Shmem or created in
* CreateSpinlocks() */
/* /*
* These are to simplify/speed up some bit arithmetic. * These are to simplify/speed up some bit arithmetic.
* *
...@@ -230,12 +226,6 @@ LockMethodInit(LOCKMETHODTABLE *lockMethodTable, ...@@ -230,12 +226,6 @@ LockMethodInit(LOCKMETHODTABLE *lockMethodTable,
/* /*
* LockMethodTableInit -- initialize a lock table structure * LockMethodTableInit -- initialize a lock table structure
* *
* Notes:
* (a) a lock table has four separate entries in the shmem index
* table. This is because every shared hash table and spinlock
* has its name stored in the shmem index at its creation. It
* is wasteful, in this case, but not much space is involved.
*
* NOTE: data structures allocated here are allocated permanently, using * NOTE: data structures allocated here are allocated permanently, using
* TopMemoryContext and shared memory. We don't ever release them anyway, * TopMemoryContext and shared memory. We don't ever release them anyway,
* and in normal multi-backend operation the lock table structures set up * and in normal multi-backend operation the lock table structures set up
...@@ -277,9 +267,9 @@ LockMethodTableInit(char *tabName, ...@@ -277,9 +267,9 @@ LockMethodTableInit(char *tabName,
MemoryContextAlloc(TopMemoryContext, sizeof(LOCKMETHODTABLE)); MemoryContextAlloc(TopMemoryContext, sizeof(LOCKMETHODTABLE));
/* /*
* find/acquire the spinlock for the table * Lock the LWLock for the table (probably not necessary here)
*/ */
SpinAcquire(LockMgrLock); LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
/* /*
* allocate a control structure from shared memory or attach to it if * allocate a control structure from shared memory or attach to it if
...@@ -356,7 +346,7 @@ LockMethodTableInit(char *tabName, ...@@ -356,7 +346,7 @@ LockMethodTableInit(char *tabName,
/* init ctl data structures */ /* init ctl data structures */
LockMethodInit(lockMethodTable, conflictsP, prioP, numModes); LockMethodInit(lockMethodTable, conflictsP, prioP, numModes);
SpinRelease(LockMgrLock); LWLockRelease(LockMgrLock);
pfree(shmemName); pfree(shmemName);
...@@ -464,7 +454,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -464,7 +454,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HTAB *holderTable; HTAB *holderTable;
bool found; bool found;
LOCK *lock; LOCK *lock;
SPINLOCK masterLock; LWLockId masterLock;
LOCKMETHODTABLE *lockMethodTable; LOCKMETHODTABLE *lockMethodTable;
int status; int status;
int myHolding[MAX_LOCKMODES]; int myHolding[MAX_LOCKMODES];
...@@ -489,7 +479,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -489,7 +479,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
masterLock = lockMethodTable->ctl->masterLock; masterLock = lockMethodTable->ctl->masterLock;
SpinAcquire(masterLock); LWLockAcquire(masterLock, LW_EXCLUSIVE);
/* /*
* Find or create a lock with this tag * Find or create a lock with this tag
...@@ -499,7 +489,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -499,7 +489,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HASH_ENTER, &found); HASH_ENTER, &found);
if (!lock) if (!lock)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(FATAL, "LockAcquire: lock table %d is corrupted", lockmethod); elog(FATAL, "LockAcquire: lock table %d is corrupted", lockmethod);
return FALSE; return FALSE;
} }
...@@ -544,7 +534,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -544,7 +534,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HASH_ENTER, &found); HASH_ENTER, &found);
if (!holder) if (!holder)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(FATAL, "LockAcquire: holder table corrupted"); elog(FATAL, "LockAcquire: holder table corrupted");
return FALSE; return FALSE;
} }
...@@ -617,7 +607,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -617,7 +607,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
{ {
GrantLock(lock, holder, lockmode); GrantLock(lock, holder, lockmode);
HOLDER_PRINT("LockAcquire: owning", holder); HOLDER_PRINT("LockAcquire: owning", holder);
SpinRelease(masterLock); LWLockRelease(masterLock);
return TRUE; return TRUE;
} }
...@@ -630,7 +620,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -630,7 +620,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
{ {
GrantLock(lock, holder, lockmode); GrantLock(lock, holder, lockmode);
HOLDER_PRINT("LockAcquire: my other XID owning", holder); HOLDER_PRINT("LockAcquire: my other XID owning", holder);
SpinRelease(masterLock); LWLockRelease(masterLock);
return TRUE; return TRUE;
} }
...@@ -677,7 +667,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -677,7 +667,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode); LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode);
Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0)); Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0));
Assert(lock->nGranted <= lock->nRequested); Assert(lock->nGranted <= lock->nRequested);
SpinRelease(masterLock); LWLockRelease(masterLock);
return FALSE; return FALSE;
} }
...@@ -719,14 +709,14 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -719,14 +709,14 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HOLDER_PRINT("LockAcquire: INCONSISTENT", holder); HOLDER_PRINT("LockAcquire: INCONSISTENT", holder);
LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
/* Should we retry ? */ /* Should we retry ? */
SpinRelease(masterLock); LWLockRelease(masterLock);
return FALSE; return FALSE;
} }
HOLDER_PRINT("LockAcquire: granted", holder); HOLDER_PRINT("LockAcquire: granted", holder);
LOCK_PRINT("LockAcquire: granted", lock, lockmode); LOCK_PRINT("LockAcquire: granted", lock, lockmode);
} }
SpinRelease(masterLock); LWLockRelease(masterLock);
return status == STATUS_OK; return status == STATUS_OK;
} }
...@@ -879,7 +869,7 @@ GrantLock(LOCK *lock, HOLDER *holder, LOCKMODE lockmode) ...@@ -879,7 +869,7 @@ GrantLock(LOCK *lock, HOLDER *holder, LOCKMODE lockmode)
* Caller must have set MyProc->heldLocks to reflect locks already held * Caller must have set MyProc->heldLocks to reflect locks already held
* on the lockable object by this process (under all XIDs). * on the lockable object by this process (under all XIDs).
* *
* The locktable spinlock must be held at entry. * The locktable's masterLock must be held at entry.
*/ */
static int static int
WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode, WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode,
...@@ -925,7 +915,7 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode, ...@@ -925,7 +915,7 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode,
* needed, will happen in xact cleanup (see above for motivation). * needed, will happen in xact cleanup (see above for motivation).
*/ */
LOCK_PRINT("WaitOnLock: aborting on lock", lock, lockmode); LOCK_PRINT("WaitOnLock: aborting on lock", lock, lockmode);
SpinRelease(lockMethodTable->ctl->masterLock); LWLockRelease(lockMethodTable->ctl->masterLock);
elog(ERROR, "deadlock detected"); elog(ERROR, "deadlock detected");
/* not reached */ /* not reached */
} }
...@@ -998,7 +988,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -998,7 +988,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
TransactionId xid, LOCKMODE lockmode) TransactionId xid, LOCKMODE lockmode)
{ {
LOCK *lock; LOCK *lock;
SPINLOCK masterLock; LWLockId masterLock;
bool found; bool found;
LOCKMETHODTABLE *lockMethodTable; LOCKMETHODTABLE *lockMethodTable;
HOLDER *holder; HOLDER *holder;
...@@ -1023,7 +1013,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1023,7 +1013,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
} }
masterLock = lockMethodTable->ctl->masterLock; masterLock = lockMethodTable->ctl->masterLock;
SpinAcquire(masterLock); LWLockAcquire(masterLock, LW_EXCLUSIVE);
/* /*
* Find a lock with this tag * Find a lock with this tag
...@@ -1038,14 +1028,14 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1038,14 +1028,14 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
*/ */
if (!lock) if (!lock)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockRelease: locktable corrupted"); elog(NOTICE, "LockRelease: locktable corrupted");
return FALSE; return FALSE;
} }
if (!found) if (!found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockRelease: no such lock"); elog(NOTICE, "LockRelease: no such lock");
return FALSE; return FALSE;
} }
...@@ -1065,7 +1055,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1065,7 +1055,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HASH_FIND_SAVE, &found); HASH_FIND_SAVE, &found);
if (!holder || !found) if (!holder || !found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
#ifdef USER_LOCKS #ifdef USER_LOCKS
if (!found && lockmethod == USER_LOCKMETHOD) if (!found && lockmethod == USER_LOCKMETHOD)
elog(NOTICE, "LockRelease: no lock with this tag"); elog(NOTICE, "LockRelease: no lock with this tag");
...@@ -1084,7 +1074,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1084,7 +1074,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
{ {
HOLDER_PRINT("LockRelease: WRONGTYPE", holder); HOLDER_PRINT("LockRelease: WRONGTYPE", holder);
Assert(holder->holding[lockmode] >= 0); Assert(holder->holding[lockmode] >= 0);
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockRelease: you don't own a lock of type %s", elog(NOTICE, "LockRelease: you don't own a lock of type %s",
lock_mode_names[lockmode]); lock_mode_names[lockmode]);
return FALSE; return FALSE;
...@@ -1139,7 +1129,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1139,7 +1129,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
&found); &found);
if (!lock || !found) if (!lock || !found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockRelease: remove lock, table corrupted"); elog(NOTICE, "LockRelease: remove lock, table corrupted");
return FALSE; return FALSE;
} }
...@@ -1167,7 +1157,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1167,7 +1157,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
HASH_REMOVE_SAVED, &found); HASH_REMOVE_SAVED, &found);
if (!holder || !found) if (!holder || !found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockRelease: remove holder, table corrupted"); elog(NOTICE, "LockRelease: remove holder, table corrupted");
return FALSE; return FALSE;
} }
...@@ -1179,7 +1169,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, ...@@ -1179,7 +1169,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag,
if (wakeupNeeded) if (wakeupNeeded)
ProcLockWakeup(lockMethodTable, lock); ProcLockWakeup(lockMethodTable, lock);
SpinRelease(masterLock); LWLockRelease(masterLock);
return TRUE; return TRUE;
} }
...@@ -1201,7 +1191,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, ...@@ -1201,7 +1191,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc,
SHM_QUEUE *procHolders = &(proc->procHolders); SHM_QUEUE *procHolders = &(proc->procHolders);
HOLDER *holder; HOLDER *holder;
HOLDER *nextHolder; HOLDER *nextHolder;
SPINLOCK masterLock; LWLockId masterLock;
LOCKMETHODTABLE *lockMethodTable; LOCKMETHODTABLE *lockMethodTable;
int i, int i,
numLockModes; numLockModes;
...@@ -1225,7 +1215,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, ...@@ -1225,7 +1215,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc,
numLockModes = lockMethodTable->ctl->numLockModes; numLockModes = lockMethodTable->ctl->numLockModes;
masterLock = lockMethodTable->ctl->masterLock; masterLock = lockMethodTable->ctl->masterLock;
SpinAcquire(masterLock); LWLockAcquire(masterLock, LW_EXCLUSIVE);
holder = (HOLDER *) SHMQueueNext(procHolders, procHolders, holder = (HOLDER *) SHMQueueNext(procHolders, procHolders,
offsetof(HOLDER, procLink)); offsetof(HOLDER, procLink));
...@@ -1321,7 +1311,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, ...@@ -1321,7 +1311,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc,
&found); &found);
if (!holder || !found) if (!holder || !found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockReleaseAll: holder table corrupted"); elog(NOTICE, "LockReleaseAll: holder table corrupted");
return FALSE; return FALSE;
} }
...@@ -1340,7 +1330,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, ...@@ -1340,7 +1330,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc,
HASH_REMOVE, &found); HASH_REMOVE, &found);
if (!lock || !found) if (!lock || !found)
{ {
SpinRelease(masterLock); LWLockRelease(masterLock);
elog(NOTICE, "LockReleaseAll: cannot remove lock from HTAB"); elog(NOTICE, "LockReleaseAll: cannot remove lock from HTAB");
return FALSE; return FALSE;
} }
...@@ -1352,7 +1342,7 @@ next_item: ...@@ -1352,7 +1342,7 @@ next_item:
holder = nextHolder; holder = nextHolder;
} }
SpinRelease(masterLock); LWLockRelease(masterLock);
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
if (lockmethod == USER_LOCKMETHOD ? Trace_userlocks : Trace_locks) if (lockmethod == USER_LOCKMETHOD ? Trace_userlocks : Trace_locks)
......
/*-------------------------------------------------------------------------
*
* lwlock.c
* Lightweight lock manager
*
* Lightweight locks are intended primarily to provide mutual exclusion of
* access to shared-memory data structures. Therefore, they offer both
* exclusive and shared lock modes (to support read/write and read-only
* access to a shared object). There are few other frammishes. User-level
* locking should be done with the full lock manager --- which depends on
* an LWLock to protect its shared state.
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lwlock.c,v 1.1 2001/09/29 04:02:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/clog.h"
#include "storage/lwlock.h"
#include "storage/proc.h"
#include "storage/spin.h"
typedef struct LWLock
{
slock_t mutex; /* Protects LWLock and queue of PROCs */
char exclusive; /* # of exclusive holders (0 or 1) */
int shared; /* # of shared holders (0..MaxBackends) */
PROC *head; /* head of list of waiting PROCs */
PROC *tail; /* tail of list of waiting PROCs */
/* tail is undefined when head is NULL */
} LWLock;
/*
* This points to the array of LWLocks in shared memory. Backends inherit
* the pointer by fork from the postmaster. LWLockIds are indexes into
* the array.
*/
static LWLock *LWLockArray = NULL;
/* shared counter for dynamic allocation of LWLockIds */
static int *LWLockCounter;
/*
* We use this structure to keep track of locked LWLocks for release
* during error recovery. The maximum size could be determined at runtime
* if necessary, but it seems unlikely that more than a few locks could
* ever be held simultaneously.
*/
#define MAX_SIMUL_LWLOCKS 100
static int num_held_lwlocks = 0;
static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS];
#ifdef LOCK_DEBUG
bool Trace_lwlocks = false;
inline static void
PRINT_LWDEBUG(const char *where, LWLockId lockid, const LWLock *lock)
{
if (Trace_lwlocks)
elog(DEBUG, "%s(%d): excl %d shared %d head %p",
where, (int) lockid,
(int) lock->exclusive, lock->shared, lock->head);
}
#else /* not LOCK_DEBUG */
#define PRINT_LWDEBUG(a,b,c)
#endif /* LOCK_DEBUG */
/*
* Compute number of LWLocks to allocate.
*/
int
NumLWLocks(void)
{
int numLocks;
/*
* Possibly this logic should be spread out among the affected modules,
* the same way that shmem space estimation is done. But for now,
* there are few enough users of LWLocks that we can get away with
* just keeping the knowledge here.
*/
/* Predefined LWLocks */
numLocks = (int) NumFixedLWLocks;
/* bufmgr.c needs two for each shared buffer */
numLocks += 2 * NBuffers;
/* clog.c needs one per CLOG buffer */
numLocks += NUM_CLOG_BUFFERS;
/* Perhaps create a few more for use by user-defined modules? */
return numLocks;
}
/*
* Compute shmem space needed for LWLocks.
*/
int
LWLockShmemSize(void)
{
int numLocks = NumLWLocks();
uint32 spaceLocks;
/* Allocate the LWLocks plus space for shared allocation counter. */
spaceLocks = numLocks * sizeof(LWLock) + 2 * sizeof(int);
spaceLocks = MAXALIGN(spaceLocks);
return (int) spaceLocks;
}
/*
* Allocate shmem space for LWLocks and initialize the locks.
*/
void
CreateLWLocks(void)
{
int numLocks = NumLWLocks();
uint32 spaceLocks = LWLockShmemSize();
LWLock *lock;
int id;
/* Allocate space */
LWLockArray = (LWLock *) ShmemAlloc(spaceLocks);
/*
* Initialize all LWLocks to "unlocked" state
*/
for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++)
{
SpinLockInit(&lock->mutex);
lock->exclusive = 0;
lock->shared = 0;
lock->head = NULL;
lock->tail = NULL;
}
/*
* Initialize the dynamic-allocation counter at the end of the array
*/
LWLockCounter = (int *) lock;
LWLockCounter[0] = (int) NumFixedLWLocks;
LWLockCounter[1] = numLocks;
}
/*
* LWLockAssign - assign a dynamically-allocated LWLock number
*
* NB: we do not currently try to interlock this. Could perhaps use
* ShmemLock spinlock if there were any need to assign LWLockIds after
* shmem setup.
*/
LWLockId
LWLockAssign(void)
{
if (LWLockCounter[0] >= LWLockCounter[1])
elog(FATAL, "No more LWLockIds available");
return (LWLockId) (LWLockCounter[0]++);
}
/*
* LWLockAcquire - acquire a lightweight lock in the specified mode
*
* If the lock is not available, sleep until it is.
*
* Side effect: cancel/die interrupts are held off until lock release.
*/
void
LWLockAcquire(LWLockId lockid, LWLockMode mode)
{
LWLock *lock = LWLockArray + lockid;
bool mustwait;
PRINT_LWDEBUG("LWLockAcquire", lockid, lock);
/*
* Lock out cancel/die interrupts until we exit the code section
* protected by the LWLock. This ensures that interrupts will not
* interfere with manipulations of data structures in shared memory.
*/
HOLD_INTERRUPTS();
/* Acquire mutex. Time spent holding mutex should be short! */
SpinLockAcquire_NoHoldoff(&lock->mutex);
/* If I can get the lock, do so quickly. */
if (mode == LW_EXCLUSIVE)
{
if (lock->exclusive == 0 && lock->shared == 0)
{
lock->exclusive++;
mustwait = false;
}
else
mustwait = true;
}
else
{
/*
* If there is someone waiting (presumably for exclusive access),
* queue up behind him even though I could get the lock. This
* prevents a stream of read locks from starving a writer.
*/
if (lock->exclusive == 0 && lock->head == NULL)
{
lock->shared++;
mustwait = false;
}
else
mustwait = true;
}
if (mustwait)
{
/* Add myself to wait queue */
PROC *proc = MyProc;
int extraWaits = 0;
/*
* If we don't have a PROC structure, there's no way to wait.
* This should never occur, since MyProc should only be null
* during shared memory initialization.
*/
if (proc == NULL)
elog(FATAL, "LWLockAcquire: can't wait without a PROC structure");
proc->lwWaiting = true;
proc->lwExclusive = (mode == LW_EXCLUSIVE);
proc->lwWaitLink = NULL;
if (lock->head == NULL)
lock->head = proc;
else
lock->tail->lwWaitLink = proc;
lock->tail = proc;
/* Can release the mutex now */
SpinLockRelease_NoHoldoff(&lock->mutex);
/*
* Wait until awakened.
*
* Since we share the process wait semaphore with the regular lock
* manager and ProcWaitForSignal, and we may need to acquire an LWLock
* while one of those is pending, it is possible that we get awakened
* for a reason other than being granted the LWLock. If so, loop back
* and wait again. Once we've gotten the lock, re-increment the sema
* by the number of additional signals received, so that the lock
* manager or signal manager will see the received signal when it
* next waits.
*/
for (;;)
{
/* "false" means cannot accept cancel/die interrupt here. */
IpcSemaphoreLock(proc->sem.semId, proc->sem.semNum, false);
if (!proc->lwWaiting)
break;
extraWaits++;
}
/*
* The awakener already updated the lock struct's state, so we
* don't need to do anything more to it. Just need to fix the
* semaphore count.
*/
while (extraWaits-- > 0)
IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
}
else
{
/* Got the lock without waiting */
SpinLockRelease_NoHoldoff(&lock->mutex);
}
/* Add lock to list of locks held by this backend */
Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS);
held_lwlocks[num_held_lwlocks++] = lockid;
}
/*
* LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
*
* If the lock is not available, return FALSE with no side-effects.
*
* If successful, cancel/die interrupts are held off until lock release.
*/
bool
LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
{
LWLock *lock = LWLockArray + lockid;
bool mustwait;
PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock);
/*
* Lock out cancel/die interrupts until we exit the code section
* protected by the LWLock. This ensures that interrupts will not
* interfere with manipulations of data structures in shared memory.
*/
HOLD_INTERRUPTS();
/* Acquire mutex. Time spent holding mutex should be short! */
SpinLockAcquire_NoHoldoff(&lock->mutex);
/* If I can get the lock, do so quickly. */
if (mode == LW_EXCLUSIVE)
{
if (lock->exclusive == 0 && lock->shared == 0)
{
lock->exclusive++;
mustwait = false;
}
else
mustwait = true;
}
else
{
/*
* If there is someone waiting (presumably for exclusive access),
* queue up behind him even though I could get the lock. This
* prevents a stream of read locks from starving a writer.
*/
if (lock->exclusive == 0 && lock->head == NULL)
{
lock->shared++;
mustwait = false;
}
else
mustwait = true;
}
/* We are done updating shared state of the lock itself. */
SpinLockRelease_NoHoldoff(&lock->mutex);
if (mustwait)
{
/* Failed to get lock, so release interrupt holdoff */
RESUME_INTERRUPTS();
}
else
{
/* Add lock to list of locks held by this backend */
Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS);
held_lwlocks[num_held_lwlocks++] = lockid;
}
return !mustwait;
}
/*
* LWLockRelease - release a previously acquired lock
*/
void
LWLockRelease(LWLockId lockid)
{
LWLock *lock = LWLockArray + lockid;
PROC *head;
PROC *proc;
int i;
PRINT_LWDEBUG("LWLockRelease", lockid, lock);
/*
* Remove lock from list of locks held. Usually, but not always,
* it will be the latest-acquired lock; so search array backwards.
*/
for (i = num_held_lwlocks; --i >= 0; )
{
if (lockid == held_lwlocks[i])
break;
}
if (i < 0)
elog(ERROR, "LWLockRelease: lock %d is not held", (int) lockid);
num_held_lwlocks--;
for (; i < num_held_lwlocks; i++)
held_lwlocks[i] = held_lwlocks[i+1];
/* Acquire mutex. Time spent holding mutex should be short! */
SpinLockAcquire_NoHoldoff(&lock->mutex);
/* Release my hold on lock */
if (lock->exclusive > 0)
lock->exclusive--;
else
{
Assert(lock->shared > 0);
lock->shared--;
}
/*
* See if I need to awaken any waiters. If I released a non-last shared
* hold, there cannot be anything to do.
*/
head = lock->head;
if (head != NULL)
{
if (lock->exclusive == 0 && lock->shared == 0)
{
/*
* Remove the to-be-awakened PROCs from the queue, and update the
* lock state to show them as holding the lock.
*/
proc = head;
if (proc->lwExclusive)
{
lock->exclusive++;
}
else
{
lock->shared++;
while (proc->lwWaitLink != NULL &&
!proc->lwWaitLink->lwExclusive)
{
proc = proc->lwWaitLink;
lock->shared++;
}
}
/* proc is now the last PROC to be released */
lock->head = proc->lwWaitLink;
proc->lwWaitLink = NULL;
}
else
{
/* lock is still held, can't awaken anything */
head = NULL;
}
}
/* We are done updating shared state of the lock itself. */
SpinLockRelease_NoHoldoff(&lock->mutex);
/*
* Awaken any waiters I removed from the queue.
*/
while (head != NULL)
{
proc = head;
head = proc->lwWaitLink;
proc->lwWaitLink = NULL;
proc->lwWaiting = false;
IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
}
/*
* Now okay to allow cancel/die interrupts.
*/
RESUME_INTERRUPTS();
}
/*
* LWLockReleaseAll - release all currently-held locks
*
* Used to clean up after elog(ERROR). An important difference between this
* function and retail LWLockRelease calls is that InterruptHoldoffCount is
* unchanged by this operation. This is necessary since InterruptHoldoffCount
* has been set to an appropriate level earlier in error recovery. We could
* decrement it below zero if we allow it to drop for each released lock!
*/
void
LWLockReleaseAll(void)
{
while (num_held_lwlocks > 0)
{
HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
LWLockRelease(held_lwlocks[num_held_lwlocks-1]);
}
}
...@@ -8,15 +8,11 @@ ...@@ -8,15 +8,11 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.108 2001/09/21 17:06:12 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.109 2001/09/29 04:02:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
/* /*
* Each postgres backend gets one of these. We'll use it to
* clean up after the process should the process suddenly die.
*
*
* Interface (a): * Interface (a):
* ProcSleep(), ProcWakeup(), * ProcSleep(), ProcWakeup(),
* ProcQueueAlloc() -- create a shm queue for sleeping processes * ProcQueueAlloc() -- create a shm queue for sleeping processes
...@@ -75,27 +71,31 @@ ...@@ -75,27 +71,31 @@
#include "access/xact.h" #include "access/xact.h"
#include "storage/proc.h" #include "storage/proc.h"
#include "storage/sinval.h" #include "storage/sinval.h"
#include "storage/spin.h"
int DeadlockTimeout = 1000; int DeadlockTimeout = 1000;
/* -------------------- PROC *MyProc = NULL;
* Spin lock for manipulating the shared process data structure:
* ProcGlobal.... Adding an extra spin lock seemed like the smallest /*
* hack to get around reading and updating this structure in shared * This spinlock protects the freelist of recycled PROC structures and the
* memory. -mer 17 July 1991 * bitmap of free semaphores. We cannot use an LWLock because the LWLock
* -------------------- * manager depends on already having a PROC and a wait semaphore! But these
* structures are touched relatively infrequently (only at backend startup
* or shutdown) and not for very long, so a spinlock is okay.
*/ */
SPINLOCK ProcStructLock; static slock_t *ProcStructLock = NULL;
static PROC_HDR *ProcGlobal = NULL; static PROC_HDR *ProcGlobal = NULL;
PROC *MyProc = NULL; static PROC *DummyProc = NULL;
static bool waitingForLock = false; static bool waitingForLock = false;
static bool waitingForSignal = false; static bool waitingForSignal = false;
static void ProcKill(void); static void ProcKill(void);
static void DummyProcKill(void);
static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum); static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum);
static void ProcFreeSem(IpcSemaphoreId semId, int semNum); static void ProcFreeSem(IpcSemaphoreId semId, int semNum);
static void ZeroProcSemaphore(PROC *proc); static void ZeroProcSemaphore(PROC *proc);
...@@ -128,9 +128,12 @@ InitProcGlobal(int maxBackends) ...@@ -128,9 +128,12 @@ InitProcGlobal(int maxBackends)
Size procGlobalSize; Size procGlobalSize;
bool found = false; bool found = false;
/* Compute size for ProcGlobal structure */ /*
* Compute size for ProcGlobal structure. Note we need one more sema
* besides those used for regular backends.
*/
Assert(maxBackends > 0); Assert(maxBackends > 0);
semMapEntries = PROC_SEM_MAP_ENTRIES(maxBackends); semMapEntries = PROC_SEM_MAP_ENTRIES(maxBackends+1);
procGlobalSize = sizeof(PROC_HDR) + (semMapEntries-1) * sizeof(SEM_MAP_ENTRY); procGlobalSize = sizeof(PROC_HDR) + (semMapEntries-1) * sizeof(SEM_MAP_ENTRY);
/* Create or attach to the ProcGlobal shared structure */ /* Create or attach to the ProcGlobal shared structure */
...@@ -178,13 +181,26 @@ InitProcGlobal(int maxBackends) ...@@ -178,13 +181,26 @@ InitProcGlobal(int maxBackends)
false); false);
ProcGlobal->procSemMap[i].procSemId = semId; ProcGlobal->procSemMap[i].procSemId = semId;
} }
/*
* Pre-allocate a PROC structure for dummy (checkpoint) processes,
* and reserve the last sema of the precreated semas for it.
*/
DummyProc = (PROC *) ShmemAlloc(sizeof(PROC));
DummyProc->pid = 0; /* marks DummyProc as not in use */
i = semMapEntries-1;
ProcGlobal->procSemMap[i].freeSemMap |= 1 << (PROC_NSEMS_PER_SET-1);
DummyProc->sem.semId = ProcGlobal->procSemMap[i].procSemId;
DummyProc->sem.semNum = PROC_NSEMS_PER_SET-1;
/* Create ProcStructLock spinlock, too */
ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
SpinLockInit(ProcStructLock);
} }
} }
/* ------------------------ /*
* InitProc -- create a per-process data structure for this process * InitProcess -- create a per-process data structure for this backend
* used by the lock manager on semaphore queues.
* ------------------------
*/ */
void void
InitProcess(void) InitProcess(void)
...@@ -202,39 +218,27 @@ InitProcess(void) ...@@ -202,39 +218,27 @@ InitProcess(void)
elog(ERROR, "InitProcess: you already exist"); elog(ERROR, "InitProcess: you already exist");
/* /*
* ProcStructLock protects the freelist of PROC entries and the map * try to get a proc struct from the free list first
* of free semaphores. Note that when we acquire it here, we do not
* have a PROC entry and so the ownership of the spinlock is not
* recorded anywhere; even if it was, until we register ProcKill as
* an on_shmem_exit callback, there is no exit hook that will cause
* owned spinlocks to be released. Upshot: during the first part of
* this routine, be careful to release the lock manually before any
* elog(), else you'll have a stuck spinlock to add to your woes.
*/ */
SpinAcquire(ProcStructLock); SpinLockAcquire(ProcStructLock);
/* try to get a proc struct from the free list first */
myOffset = ProcGlobal->freeProcs; myOffset = ProcGlobal->freeProcs;
if (myOffset != INVALID_OFFSET) if (myOffset != INVALID_OFFSET)
{ {
MyProc = (PROC *) MAKE_PTR(myOffset); MyProc = (PROC *) MAKE_PTR(myOffset);
ProcGlobal->freeProcs = MyProc->links.next; ProcGlobal->freeProcs = MyProc->links.next;
SpinLockRelease(ProcStructLock);
} }
else else
{ {
/* /*
* have to allocate one. We can't use the normal shmem index * have to allocate a new one.
* table mechanism because the proc structure is stored by PID
* instead of by a global name (need to look it up by PID when we
* cleanup dead processes).
*/ */
SpinLockRelease(ProcStructLock);
MyProc = (PROC *) ShmemAlloc(sizeof(PROC)); MyProc = (PROC *) ShmemAlloc(sizeof(PROC));
if (!MyProc) if (!MyProc)
{
SpinRelease(ProcStructLock);
elog(FATAL, "cannot create new proc: out of memory"); elog(FATAL, "cannot create new proc: out of memory");
}
} }
/* /*
...@@ -246,39 +250,30 @@ InitProcess(void) ...@@ -246,39 +250,30 @@ InitProcess(void)
MyProc->errType = STATUS_OK; MyProc->errType = STATUS_OK;
MyProc->xid = InvalidTransactionId; MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId; MyProc->xmin = InvalidTransactionId;
MyProc->pid = MyProcPid;
MyProc->databaseId = MyDatabaseId;
MyProc->logRec.xrecoff = 0; MyProc->logRec.xrecoff = 0;
MyProc->lwWaiting = false;
MyProc->lwExclusive = false;
MyProc->lwWaitLink = NULL;
MyProc->waitLock = NULL; MyProc->waitLock = NULL;
MyProc->waitHolder = NULL; MyProc->waitHolder = NULL;
MyProc->pid = MyProcPid;
MyProc->databaseId = MyDatabaseId;
SHMQueueInit(&(MyProc->procHolders)); SHMQueueInit(&(MyProc->procHolders));
/*
* Zero out the spin lock counts and set the sLocks field for
* ProcStructLock to 1 as we have acquired this spinlock above but
* didn't record it since we didn't have MyProc until now.
*/
MemSet(MyProc->sLocks, 0, sizeof(MyProc->sLocks));
MyProc->sLocks[ProcStructLock] = 1;
/* /*
* Arrange to clean up at backend exit. Once we do this, owned * Arrange to clean up at backend exit.
* spinlocks will be released on exit, and so we can be a lot less
* tense about errors.
*/ */
on_shmem_exit(ProcKill, 0); on_shmem_exit(ProcKill, 0);
/* /*
* Set up a wait-semaphore for the proc. (We rely on ProcKill to clean * Set up a wait-semaphore for the proc. (We rely on ProcKill to clean
* up if this fails.) * up MyProc if this fails.)
*/ */
if (IsUnderPostmaster) if (IsUnderPostmaster)
ProcGetNewSemIdAndNum(&MyProc->sem.semId, &MyProc->sem.semNum); ProcGetNewSemIdAndNum(&MyProc->sem.semId, &MyProc->sem.semNum);
/* Done with freelist and sem map */
SpinRelease(ProcStructLock);
/* /*
* We might be reusing a semaphore that belongs to a dead backend. * We might be reusing a semaphore that belonged to a failed process.
* So be careful and reinitialize its value here. * So be careful and reinitialize its value here.
*/ */
if (MyProc->sem.semId >= 0) if (MyProc->sem.semId >= 0)
...@@ -291,6 +286,65 @@ InitProcess(void) ...@@ -291,6 +286,65 @@ InitProcess(void)
InitDeadLockChecking(); InitDeadLockChecking();
} }
/*
* InitDummyProcess -- create a dummy per-process data structure
*
* This is called by checkpoint processes so that they will have a MyProc
* value that's real enough to let them wait for LWLocks. The PROC and
* sema that are assigned are the extra ones created during InitProcGlobal.
*/
void
InitDummyProcess(void)
{
/*
* ProcGlobal should be set by a previous call to InitProcGlobal
* (we inherit this by fork() from the postmaster).
*/
if (ProcGlobal == NULL || DummyProc == NULL)
elog(STOP, "InitDummyProcess: Proc Header uninitialized");
if (MyProc != NULL)
elog(ERROR, "InitDummyProcess: you already exist");
/*
* DummyProc should not presently be in use by anyone else
*/
if (DummyProc->pid != 0)
elog(FATAL, "InitDummyProcess: DummyProc is in use by PID %d",
DummyProc->pid);
MyProc = DummyProc;
/*
* Initialize all fields of MyProc, except MyProc->sem which was
* set up by InitProcGlobal.
*/
MyProc->pid = MyProcPid; /* marks DummyProc as in use by me */
SHMQueueElemInit(&(MyProc->links));
MyProc->errType = STATUS_OK;
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
MyProc->databaseId = MyDatabaseId;
MyProc->logRec.xrecoff = 0;
MyProc->lwWaiting = false;
MyProc->lwExclusive = false;
MyProc->lwWaitLink = NULL;
MyProc->waitLock = NULL;
MyProc->waitHolder = NULL;
SHMQueueInit(&(MyProc->procHolders));
/*
* Arrange to clean up at process exit.
*/
on_shmem_exit(DummyProcKill, 0);
/*
* We might be reusing a semaphore that belonged to a failed process.
* So be careful and reinitialize its value here.
*/
if (MyProc->sem.semId >= 0)
ZeroProcSemaphore(MyProc);
}
/* /*
* Initialize the proc's wait-semaphore to count zero. * Initialize the proc's wait-semaphore to count zero.
*/ */
...@@ -330,10 +384,10 @@ LockWaitCancel(void) ...@@ -330,10 +384,10 @@ LockWaitCancel(void)
disable_sigalrm_interrupt(); disable_sigalrm_interrupt();
/* Unlink myself from the wait queue, if on it (might not be anymore!) */ /* Unlink myself from the wait queue, if on it (might not be anymore!) */
LockLockTable(); LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
if (MyProc->links.next != INVALID_OFFSET) if (MyProc->links.next != INVALID_OFFSET)
RemoveFromWaitQueue(MyProc); RemoveFromWaitQueue(MyProc);
UnlockLockTable(); LWLockRelease(LockMgrLock);
/* /*
* Reset the proc wait semaphore to zero. This is necessary in the * Reset the proc wait semaphore to zero. This is necessary in the
...@@ -381,15 +435,18 @@ ProcReleaseLocks(bool isCommit) ...@@ -381,15 +435,18 @@ ProcReleaseLocks(bool isCommit)
/* /*
* ProcKill() -- Destroy the per-proc data structure for * ProcKill() -- Destroy the per-proc data structure for
* this process. Release any of its held spin locks. * this process. Release any of its held LW locks.
*/ */
static void static void
ProcKill(void) ProcKill(void)
{ {
Assert(MyProc != NULL); Assert(MyProc != NULL);
/* Release any spinlocks I am holding */ /* Release any LW locks I am holding */
ProcReleaseSpins(MyProc); LWLockReleaseAll();
/* Abort any buffer I/O in progress */
AbortBufferIO();
/* Get off any wait queue I might be on */ /* Get off any wait queue I might be on */
LockWaitCancel(); LockWaitCancel();
...@@ -402,7 +459,7 @@ ProcKill(void) ...@@ -402,7 +459,7 @@ ProcKill(void)
LockReleaseAll(USER_LOCKMETHOD, MyProc, true, InvalidTransactionId); LockReleaseAll(USER_LOCKMETHOD, MyProc, true, InvalidTransactionId);
#endif #endif
SpinAcquire(ProcStructLock); SpinLockAcquire(ProcStructLock);
/* Free up my wait semaphore, if I got one */ /* Free up my wait semaphore, if I got one */
if (MyProc->sem.semId >= 0) if (MyProc->sem.semId >= 0)
...@@ -412,10 +469,35 @@ ProcKill(void) ...@@ -412,10 +469,35 @@ ProcKill(void)
MyProc->links.next = ProcGlobal->freeProcs; MyProc->links.next = ProcGlobal->freeProcs;
ProcGlobal->freeProcs = MAKE_OFFSET(MyProc); ProcGlobal->freeProcs = MAKE_OFFSET(MyProc);
/* PROC struct isn't mine anymore; stop tracking spinlocks with it! */ /* PROC struct isn't mine anymore */
MyProc = NULL; MyProc = NULL;
SpinRelease(ProcStructLock); SpinLockRelease(ProcStructLock);
}
/*
* DummyProcKill() -- Cut-down version of ProcKill for dummy (checkpoint)
* processes. The PROC and sema are not released, only marked
* as not-in-use.
*/
static void
DummyProcKill(void)
{
Assert(MyProc != NULL && MyProc == DummyProc);
/* Release any LW locks I am holding */
LWLockReleaseAll();
/* Abort any buffer I/O in progress */
AbortBufferIO();
/* I can't be on regular lock queues, so needn't check */
/* Mark DummyProc no longer in use */
MyProc->pid = 0;
/* PROC struct isn't mine anymore */
MyProc = NULL;
} }
...@@ -464,13 +546,13 @@ ProcQueueInit(PROC_QUEUE *queue) ...@@ -464,13 +546,13 @@ ProcQueueInit(PROC_QUEUE *queue)
* Caller must have set MyProc->heldLocks to reflect locks already held * Caller must have set MyProc->heldLocks to reflect locks already held
* on the lockable object by this process (under all XIDs). * on the lockable object by this process (under all XIDs).
* *
* Locktable's spinlock must be held at entry, and will be held * Locktable's masterLock must be held at entry, and will be held
* at exit. * at exit.
* *
* Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock). * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
* *
* ASSUME: that no one will fiddle with the queue until after * ASSUME: that no one will fiddle with the queue until after
* we release the spin lock. * we release the masterLock.
* *
* NOTES: The process queue is now a priority queue for locking. * NOTES: The process queue is now a priority queue for locking.
* *
...@@ -484,7 +566,7 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable, ...@@ -484,7 +566,7 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable,
HOLDER *holder) HOLDER *holder)
{ {
LOCKMETHODCTL *lockctl = lockMethodTable->ctl; LOCKMETHODCTL *lockctl = lockMethodTable->ctl;
SPINLOCK spinlock = lockctl->masterLock; LWLockId masterLock = lockctl->masterLock;
PROC_QUEUE *waitQueue = &(lock->waitProcs); PROC_QUEUE *waitQueue = &(lock->waitProcs);
int myHeldLocks = MyProc->heldLocks; int myHeldLocks = MyProc->heldLocks;
bool early_deadlock = false; bool early_deadlock = false;
...@@ -595,14 +677,14 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable, ...@@ -595,14 +677,14 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable,
waitingForLock = true; waitingForLock = true;
/* /*
* Release the locktable's spin lock. * Release the locktable's masterLock.
* *
* NOTE: this may also cause us to exit critical-section state, possibly * NOTE: this may also cause us to exit critical-section state, possibly
* allowing a cancel/die interrupt to be accepted. This is OK because * allowing a cancel/die interrupt to be accepted. This is OK because
* we have recorded the fact that we are waiting for a lock, and so * we have recorded the fact that we are waiting for a lock, and so
* LockWaitCancel will clean up if cancel/die happens. * LockWaitCancel will clean up if cancel/die happens.
*/ */
SpinRelease(spinlock); LWLockRelease(masterLock);
/* /*
* Set timer so we can wake up after awhile and check for a deadlock. * Set timer so we can wake up after awhile and check for a deadlock.
...@@ -617,7 +699,7 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable, ...@@ -617,7 +699,7 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable,
elog(FATAL, "ProcSleep: Unable to set timer for process wakeup"); elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
/* /*
* If someone wakes us between SpinRelease and IpcSemaphoreLock, * If someone wakes us between LWLockRelease and IpcSemaphoreLock,
* IpcSemaphoreLock will not block. The wakeup is "saved" by the * IpcSemaphoreLock will not block. The wakeup is "saved" by the
* semaphore implementation. Note also that if HandleDeadLock is * semaphore implementation. Note also that if HandleDeadLock is
* invoked but does not detect a deadlock, IpcSemaphoreLock() will * invoked but does not detect a deadlock, IpcSemaphoreLock() will
...@@ -644,12 +726,9 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable, ...@@ -644,12 +726,9 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable,
waitingForLock = false; waitingForLock = false;
/* /*
* Re-acquire the locktable's spin lock. * Re-acquire the locktable's masterLock.
*
* We could accept a cancel/die interrupt here. That's OK because the
* lock is now registered as being held by this process.
*/ */
SpinAcquire(spinlock); LWLockAcquire(masterLock, LW_EXCLUSIVE);
/* /*
* We don't have to do anything else, because the awaker did all the * We don't have to do anything else, because the awaker did all the
...@@ -674,7 +753,7 @@ ProcWakeup(PROC *proc, int errType) ...@@ -674,7 +753,7 @@ ProcWakeup(PROC *proc, int errType)
{ {
PROC *retProc; PROC *retProc;
/* assume that spinlock has been acquired */ /* assume that masterLock has been acquired */
/* Proc should be sleeping ... */ /* Proc should be sleeping ... */
if (proc->links.prev == INVALID_OFFSET || if (proc->links.prev == INVALID_OFFSET ||
...@@ -777,11 +856,11 @@ HandleDeadLock(SIGNAL_ARGS) ...@@ -777,11 +856,11 @@ HandleDeadLock(SIGNAL_ARGS)
/* /*
* Acquire locktable lock. Note that the SIGALRM interrupt had better * Acquire locktable lock. Note that the SIGALRM interrupt had better
* not be enabled anywhere that this process itself holds the * not be enabled anywhere that this process itself holds the
* locktable lock, else this will wait forever. Also note that this * locktable lock, else this will wait forever. Also note that
* calls SpinAcquire which creates a critical section, so that this * LWLockAcquire creates a critical section, so that this
* routine cannot be interrupted by cancel/die interrupts. * routine cannot be interrupted by cancel/die interrupts.
*/ */
LockLockTable(); LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
/* /*
* Check to see if we've been awoken by anyone in the interim. * Check to see if we've been awoken by anyone in the interim.
...@@ -799,7 +878,7 @@ HandleDeadLock(SIGNAL_ARGS) ...@@ -799,7 +878,7 @@ HandleDeadLock(SIGNAL_ARGS)
if (MyProc->links.prev == INVALID_OFFSET || if (MyProc->links.prev == INVALID_OFFSET ||
MyProc->links.next == INVALID_OFFSET) MyProc->links.next == INVALID_OFFSET)
{ {
UnlockLockTable(); LWLockRelease(LockMgrLock);
errno = save_errno; errno = save_errno;
return; return;
} }
...@@ -812,7 +891,7 @@ HandleDeadLock(SIGNAL_ARGS) ...@@ -812,7 +891,7 @@ HandleDeadLock(SIGNAL_ARGS)
if (!DeadLockCheck(MyProc)) if (!DeadLockCheck(MyProc))
{ {
/* No deadlock, so keep waiting */ /* No deadlock, so keep waiting */
UnlockLockTable(); LWLockRelease(LockMgrLock);
errno = save_errno; errno = save_errno;
return; return;
} }
...@@ -846,30 +925,10 @@ HandleDeadLock(SIGNAL_ARGS) ...@@ -846,30 +925,10 @@ HandleDeadLock(SIGNAL_ARGS)
* wakable because we're not in front of them anymore. However, * wakable because we're not in front of them anymore. However,
* RemoveFromWaitQueue took care of waking up any such processes. * RemoveFromWaitQueue took care of waking up any such processes.
*/ */
UnlockLockTable(); LWLockRelease(LockMgrLock);
errno = save_errno; errno = save_errno;
} }
void
ProcReleaseSpins(PROC *proc)
{
int i;
if (!proc)
proc = MyProc;
if (!proc)
return;
for (i = 0; i < (int) MAX_SPINS; i++)
{
if (proc->sLocks[i])
{
Assert(proc->sLocks[i] == 1);
SpinRelease(i);
}
}
AbortBufferIO();
}
/* /*
* ProcWaitForSignal - wait for a signal from another backend. * ProcWaitForSignal - wait for a signal from another backend.
...@@ -994,10 +1053,7 @@ ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum) ...@@ -994,10 +1053,7 @@ ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum)
SEM_MAP_ENTRY *procSemMap = ProcGlobal->procSemMap; SEM_MAP_ENTRY *procSemMap = ProcGlobal->procSemMap;
int32 fullmask = (1 << PROC_NSEMS_PER_SET) - 1; int32 fullmask = (1 << PROC_NSEMS_PER_SET) - 1;
/* SpinLockAcquire(ProcStructLock);
* we hold ProcStructLock when entering this routine. We scan through
* the bitmap to look for a free semaphore.
*/
for (i = 0; i < semMapEntries; i++) for (i = 0; i < semMapEntries; i++)
{ {
...@@ -1018,12 +1074,17 @@ ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum) ...@@ -1018,12 +1074,17 @@ ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum)
*semId = procSemMap[i].procSemId; *semId = procSemMap[i].procSemId;
*semNum = j; *semNum = j;
SpinLockRelease(ProcStructLock);
return; return;
} }
mask <<= 1; mask <<= 1;
} }
} }
SpinLockRelease(ProcStructLock);
/* /*
* If we reach here, all the semaphores are in use. This is one of the * If we reach here, all the semaphores are in use. This is one of the
* possible places to detect "too many backends", so give the standard * possible places to detect "too many backends", so give the standard
...@@ -1036,6 +1097,8 @@ ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum) ...@@ -1036,6 +1097,8 @@ ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum)
/* /*
* ProcFreeSem - * ProcFreeSem -
* free up our semaphore in the semaphore set. * free up our semaphore in the semaphore set.
*
* Caller is assumed to hold ProcStructLock.
*/ */
static void static void
ProcFreeSem(IpcSemaphoreId semId, int semNum) ProcFreeSem(IpcSemaphoreId semId, int semNum)
...@@ -1054,6 +1117,7 @@ ProcFreeSem(IpcSemaphoreId semId, int semNum) ...@@ -1054,6 +1117,7 @@ ProcFreeSem(IpcSemaphoreId semId, int semNum)
return; return;
} }
} }
/* can't elog here!!! */
fprintf(stderr, "ProcFreeSem: no ProcGlobal entry for semId %d\n", semId); fprintf(stderr, "ProcFreeSem: no ProcGlobal entry for semId %d\n", semId);
} }
......
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* s_lock.c * s_lock.c
* Spinlock support routines * Hardware-dependent implementation of spinlocks.
*
* *
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/s_lock.c,v 1.1 2001/09/27 19:10:02 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/s_lock.c,v 1.2 2001/09/29 04:02:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -17,49 +18,14 @@ ...@@ -17,49 +18,14 @@
#include <sys/time.h> #include <sys/time.h>
#include <unistd.h> #include <unistd.h>
#include "miscadmin.h"
#include "storage/s_lock.h" #include "storage/s_lock.h"
/*----------
* Each time we busy spin we select the next element of this array as the
* number of microseconds to wait. This accomplishes pseudo random back-off.
*
* Note that on most platforms, specified values will be rounded up to the
* next multiple of a clock tick, which is often ten milliseconds (10000).
* So, we are being way overoptimistic to assume that these different values
* are really different, other than the last. But there are a few platforms
* with better-than-usual timekeeping, and on these we will get pretty good
* pseudo-random behavior.
*
* Total time to cycle through all 20 entries will be at least 100 msec,
* more commonly (10 msec resolution) 220 msec, and on some platforms
* as much as 420 msec (when the remainder of the current tick cycle is
* ignored in deciding when to time out, as on FreeBSD and older Linuxen).
* We use the 100msec figure to figure max_spins, so actual timeouts may
* be as much as four times the nominal value, but will never be less.
*----------
*/
#define S_NSPINCYCLE 20
int s_spincycle[S_NSPINCYCLE] =
{1, 10, 100, 1000,
10000, 1000, 1000, 1000,
10000, 1000, 1000, 10000,
1000, 1000, 10000, 1000,
10000, 1000, 10000, 30000
};
#define AVG_SPINCYCLE 5000 /* average entry in microsec: 100ms / 20 */
#define DEFAULT_TIMEOUT (100*1000000) /* default timeout: 100 sec */
/* /*
* s_lock_stuck() - complain about a stuck spinlock * s_lock_stuck() - complain about a stuck spinlock
*/ */
static void static void
s_lock_stuck(volatile slock_t *lock, const char *file, const int line) s_lock_stuck(volatile slock_t *lock, const char *file, int line)
{ {
fprintf(stderr, fprintf(stderr,
"\nFATAL: s_lock(%p) at %s:%d, stuck spinlock. Aborting.\n", "\nFATAL: s_lock(%p) at %s:%d, stuck spinlock. Aborting.\n",
...@@ -72,69 +38,41 @@ s_lock_stuck(volatile slock_t *lock, const char *file, const int line) ...@@ -72,69 +38,41 @@ s_lock_stuck(volatile slock_t *lock, const char *file, const int line)
/* /*
* s_lock_sleep() - sleep a pseudo-random amount of time, check for timeout * s_lock(lock) - platform-independent portion of waiting for a spinlock.
*
* The 'timeout' is given in microsec, or may be 0 for "infinity". Note that
* this will be a lower bound (a fairly loose lower bound, on most platforms).
*
* 'microsec' is the number of microsec to delay per loop. Normally
* 'microsec' is 0, specifying to use the next s_spincycle[] value.
* Some callers may pass a nonzero interval, specifying to use exactly that
* delay value rather than a pseudo-random delay.
*/ */
void void
s_lock_sleep(unsigned spins, int timeout, int microsec, s_lock(volatile slock_t *lock, const char *file, int line)
volatile slock_t *lock,
const char *file, const int line)
{
struct timeval delay;
if (microsec > 0)
{
delay.tv_sec = microsec / 1000000;
delay.tv_usec = microsec % 1000000;
}
else
{
delay.tv_sec = 0;
delay.tv_usec = s_spincycle[spins % S_NSPINCYCLE];
microsec = AVG_SPINCYCLE; /* use average to figure timeout */
}
if (timeout > 0)
{
unsigned max_spins = timeout / microsec;
if (spins > max_spins)
s_lock_stuck(lock, file, line);
}
(void) select(0, NULL, NULL, NULL, &delay);
}
/*
* s_lock(lock) - take a spinlock with backoff
*/
void
s_lock(volatile slock_t *lock, const char *file, const int line)
{ {
unsigned spins = 0; unsigned spins = 0;
unsigned delays = 0;
struct timeval delay;
/* /*
* If you are thinking of changing this code, be careful. This same * We loop tightly for awhile, then delay using select() and try again.
* loop logic is used in other places that call TAS() directly. * Preferably, "awhile" should be a small multiple of the maximum time
* we expect a spinlock to be held. 100 iterations seems about right.
* *
* While waiting for a lock, we check for cancel/die interrupts (which is * We use a 10 millisec select delay because that is the lower limit on
* a no-op if we are inside a critical section). The interrupt check * many platforms. The timeout is figured on this delay only, and so the
* can be omitted in places that know they are inside a critical * nominal 1 minute is a lower bound.
* section. Note that an interrupt must NOT be accepted after
* acquiring the lock.
*/ */
#define SPINS_PER_DELAY 100
#define DELAY_MSEC 10
#define TIMEOUT_MSEC (60 * 1000)
while (TAS(lock)) while (TAS(lock))
{ {
s_lock_sleep(spins++, DEFAULT_TIMEOUT, 0, lock, file, line); if (++spins > SPINS_PER_DELAY)
CHECK_FOR_INTERRUPTS(); {
if (++delays > (TIMEOUT_MSEC / DELAY_MSEC))
s_lock_stuck(lock, file, line);
delay.tv_sec = 0;
delay.tv_usec = DELAY_MSEC * 1000;
(void) select(0, NULL, NULL, NULL, &delay);
spins = 0;
}
} }
} }
......
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* spin.c * spin.c
* routines for managing spin locks * Hardware-independent implementation of spinlocks.
*
*
* For machines that have test-and-set (TAS) instructions, s_lock.h/.c
* define the spinlock implementation. This file contains only a stub
* implementation for spinlocks using SysV semaphores. The semaphore method
* is too slow to be very useful :-(
* *
* POSTGRES has two kinds of locks: semaphores (which put the
* process to sleep) and spinlocks (which are supposed to be
* short term locks). Spinlocks are implemented via test-and-set (TAS)
* instructions if possible, else via semaphores. The semaphore method
* is too slow to be useful :-(
* *
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.1 2001/09/27 19:10:02 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.2 2001/09/29 04:02:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
#include <errno.h> #include <errno.h>
#if !defined(HAS_TEST_AND_SET) && defined(HAVE_SYS_SEM_H) #ifdef HAVE_SYS_SEM_H
#include <sys/sem.h> #include <sys/sem.h>
#endif #endif
#include "miscadmin.h" #include "storage/lwlock.h"
#include "storage/proc.h" #include "storage/proc.h"
#include "storage/s_lock.h" #include "storage/spin.h"
/* Probably should move these to an appropriate header file */
extern SPINLOCK BufMgrLock;
extern SPINLOCK OidGenLockId;
extern SPINLOCK XidGenLockId;
extern SPINLOCK ControlFileLockId;
extern SPINLOCK ShmemLock;
extern SPINLOCK ShmemIndexLock;
extern SPINLOCK LockMgrLock;
extern SPINLOCK SInvalLock;
extern SPINLOCK ProcStructLock;
extern SPINLOCK FreeSpaceLock;
#ifdef STABLE_MEMORY_STORAGE
extern SPINLOCK MMCacheLock;
#endif
/*
* Initialize identifiers for permanent spinlocks during startup
*
* The same identifiers are used for both TAS and semaphore implementations,
* although in one case they are indexes into a shmem array and in the other
* they are semaphore numbers.
*/
static void
InitSpinLockIDs(void)
{
BufMgrLock = (SPINLOCK) BUFMGRLOCKID;
OidGenLockId = (SPINLOCK) OIDGENLOCKID;
XidGenLockId = (SPINLOCK) XIDGENLOCKID;
ControlFileLockId = (SPINLOCK) CNTLFILELOCKID;
ShmemLock = (SPINLOCK) SHMEMLOCKID;
ShmemIndexLock = (SPINLOCK) SHMEMINDEXLOCKID;
LockMgrLock = (SPINLOCK) LOCKMGRLOCKID;
SInvalLock = (SPINLOCK) SINVALLOCKID;
ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID;
FreeSpaceLock = (SPINLOCK) FREESPACELOCKID;
#ifdef STABLE_MEMORY_STORAGE
MMCacheLock = (SPINLOCK) MMCACHELOCKID;
#endif
}
#ifdef HAS_TEST_AND_SET #ifdef HAS_TEST_AND_SET
/* real spin lock implementation */
typedef struct slock
{
slock_t shlock;
} SLock;
#ifdef LOCK_DEBUG
bool Trace_spinlocks = false;
inline static void
PRINT_SLDEBUG(const char *where, SPINLOCK lockid, const SLock *lock)
{
if (Trace_spinlocks)
elog(DEBUG, "%s: id=%d", where, lockid);
}
#else /* not LOCK_DEBUG */
#define PRINT_SLDEBUG(a,b,c)
#endif /* not LOCK_DEBUG */
static SLock *SLockArray = NULL;
#define SLOCKMEMORYSIZE ((int) MAX_SPINS * sizeof(SLock))
/*
* SLockShmemSize --- return shared-memory space needed
*/
int
SLockShmemSize(void)
{
return MAXALIGN(SLOCKMEMORYSIZE);
}
/* /*
* CreateSpinlocks --- create and initialize spinlocks during startup * CreateSpinlocks --- create and initialize spinlocks during startup
*/ */
void void
CreateSpinlocks(PGShmemHeader *seghdr) CreateSpinlocks(void)
{
int id;
/*
* We must allocate the space "by hand" because shmem.c isn't up yet
*/
SLockArray = (SLock *) (((char *) seghdr) + seghdr->freeoffset);
seghdr->freeoffset += MAXALIGN(SLOCKMEMORYSIZE);
Assert(seghdr->freeoffset <= seghdr->totalsize);
/*
* Initialize all spinlocks to "unlocked" state
*/
for (id = 0; id < (int) MAX_SPINS; id++)
{
SLock *slckP = &(SLockArray[id]);
S_INIT_LOCK(&(slckP->shlock));
}
/*
* Assign indexes for fixed spinlocks
*/
InitSpinLockIDs();
}
void
SpinAcquire(SPINLOCK lockid)
{
SLock *slckP = &(SLockArray[lockid]);
PRINT_SLDEBUG("SpinAcquire", lockid, slckP);
/*
* Acquire the lock, then record that we have done so (for recovery in
* case of elog(ERROR) while holding the lock). Note we assume here
* that S_LOCK will not accept cancel/die interrupts once it has
* acquired the lock. However, interrupts should be accepted while
* waiting, if InterruptHoldoffCount is zero.
*/
S_LOCK(&(slckP->shlock));
PROC_INCR_SLOCK(lockid);
/*
* Lock out cancel/die interrupts until we exit the code section
* protected by the spinlock. This ensures that interrupts will not
* interfere with manipulations of data structures in shared memory.
*/
HOLD_INTERRUPTS();
PRINT_SLDEBUG("SpinAcquire/done", lockid, slckP);
}
void
SpinRelease(SPINLOCK lockid)
{ {
SLock *slckP = &(SLockArray[lockid]); /* no-op when we have TAS spinlocks */
PRINT_SLDEBUG("SpinRelease", lockid, slckP);
/*
* Check that we are actually holding the lock we are releasing. This
* can be done only after MyProc has been initialized.
*/
Assert(!MyProc || MyProc->sLocks[lockid] > 0);
/*
* Record that we no longer hold the spinlock, and release it.
*/
PROC_DECR_SLOCK(lockid);
S_UNLOCK(&(slckP->shlock));
/*
* Exit the interrupt holdoff entered in SpinAcquire().
*/
RESUME_INTERRUPTS();
PRINT_SLDEBUG("SpinRelease/done", lockid, slckP);
} }
#else /* !HAS_TEST_AND_SET */ #else /* !HAS_TEST_AND_SET */
...@@ -199,11 +47,7 @@ SpinRelease(SPINLOCK lockid) ...@@ -199,11 +47,7 @@ SpinRelease(SPINLOCK lockid)
/* /*
* No TAS, so spinlocks are implemented using SysV semaphores. * No TAS, so spinlocks are implemented using SysV semaphores.
* *
* We support two slightly different APIs here: SpinAcquire/SpinRelease * Typedef slock_t stores the semId and sem number of the sema to use.
* work with SPINLOCK integer indexes for the permanent spinlocks, which
* are all assumed to live in the first spinlock semaphore set. There
* is also an emulation of the s_lock.h TAS-spinlock macros; for that case,
* typedef slock_t stores the semId and sem number of the sema to use.
* The semas needed are created by CreateSpinlocks and doled out by * The semas needed are created by CreateSpinlocks and doled out by
* s_init_lock_sema. * s_init_lock_sema.
* *
...@@ -228,35 +72,26 @@ static int nextSpinLock = 0; /* next free spinlock index */ ...@@ -228,35 +72,26 @@ static int nextSpinLock = 0; /* next free spinlock index */
static void SpinFreeAllSemaphores(void); static void SpinFreeAllSemaphores(void);
/*
* SLockShmemSize --- return shared-memory space needed
*/
int
SLockShmemSize(void)
{
return 0;
}
/* /*
* CreateSpinlocks --- create and initialize spinlocks during startup * CreateSpinlocks --- create and initialize spinlocks during startup
*/ */
void void
CreateSpinlocks(PGShmemHeader *seghdr) CreateSpinlocks(void)
{ {
int i; int i;
if (SpinLockIds == NULL) if (SpinLockIds == NULL)
{ {
/* /*
* Compute number of spinlocks needed. If this logic gets any * Compute number of spinlocks needed. It would be cleaner to
* more complicated, it should be distributed into the affected * distribute this logic into the affected modules,
* modules, similar to the way shmem space estimation is handled. * similar to the way shmem space estimation is handled.
* *
* For now, though, we just need the fixed spinlocks (MAX_SPINS), two * For now, though, we just need a few spinlocks (10 should be
* spinlocks per shared disk buffer, and four spinlocks for XLOG. * plenty) plus one for each LWLock.
*/ */
numSpinLocks = (int) MAX_SPINS + 2 * NBuffers + 4; numSpinLocks = NumLWLocks() + 10;
/* might as well round up to a multiple of SPINLOCKS_PER_SET */ /* might as well round up to a multiple of SPINLOCKS_PER_SET */
numSpinSets = (numSpinLocks - 1) / SPINLOCKS_PER_SET + 1; numSpinSets = (numSpinLocks - 1) / SPINLOCKS_PER_SET + 1;
...@@ -288,14 +123,8 @@ CreateSpinlocks(PGShmemHeader *seghdr) ...@@ -288,14 +123,8 @@ CreateSpinlocks(PGShmemHeader *seghdr)
false); false);
} }
/*
* Assign indexes for fixed spinlocks
*/
Assert(MAX_SPINS <= SPINLOCKS_PER_SET);
InitSpinLockIDs();
/* Init counter for allocating dynamic spinlocks */ /* Init counter for allocating dynamic spinlocks */
nextSpinLock = MAX_SPINS; nextSpinLock = 0;
} }
/* /*
...@@ -318,49 +147,6 @@ SpinFreeAllSemaphores(void) ...@@ -318,49 +147,6 @@ SpinFreeAllSemaphores(void)
SpinLockIds = NULL; SpinLockIds = NULL;
} }
/*
* SpinAcquire -- grab a fixed spinlock
*
* FAILS if the semaphore is corrupted.
*/
void
SpinAcquire(SPINLOCK lock)
{
/*
* See the TAS() version of this routine for primary commentary.
*
* NOTE we must pass interruptOK = false to IpcSemaphoreLock, to ensure
* that a cancel/die interrupt cannot prevent us from recording
* ownership of a lock we have just acquired.
*/
IpcSemaphoreLock(SpinLockIds[0], lock, false);
PROC_INCR_SLOCK(lock);
HOLD_INTERRUPTS();
}
/*
* SpinRelease -- release a fixed spin lock
*
* FAILS if the semaphore is corrupted
*/
void
SpinRelease(SPINLOCK lock)
{
/* See the TAS() version of this routine for commentary */
#ifdef USE_ASSERT_CHECKING
/* Check it's locked */
int semval;
semval = IpcSemaphoreGetValue(SpinLockIds[0], lock);
Assert(semval < 1);
#endif
Assert(!MyProc || MyProc->sLocks[lockid] > 0);
PROC_DECR_SLOCK(lock);
IpcSemaphoreUnlock(SpinLockIds[0], lock);
RESUME_INTERRUPTS();
}
/* /*
* s_lock.h hardware-spinlock emulation * s_lock.h hardware-spinlock emulation
*/ */
......
...@@ -11,17 +11,19 @@ ...@@ -11,17 +11,19 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.24 2001/06/27 23:31:39 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.25 2001/09/29 04:02:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
#include "miscadmin.h"
#ifdef STABLE_MEMORY_STORAGE
#include <math.h> #include <math.h>
#include "storage/smgr.h"
#include "miscadmin.h"
#ifdef STABLE_MEMORY_STORAGE
/* /*
* MMCacheTag -- Unique triplet for blocks stored by the main memory * MMCacheTag -- Unique triplet for blocks stored by the main memory
...@@ -71,8 +73,6 @@ typedef struct MMRelHashEntry ...@@ -71,8 +73,6 @@ typedef struct MMRelHashEntry
#define MMNBUFFERS 10 #define MMNBUFFERS 10
#define MMNRELATIONS 2 #define MMNRELATIONS 2
SPINLOCK MMCacheLock;
static int *MMCurTop; static int *MMCurTop;
static int *MMCurRelno; static int *MMCurRelno;
static MMCacheTag *MMBlockTags; static MMCacheTag *MMBlockTags;
...@@ -88,7 +88,7 @@ mminit() ...@@ -88,7 +88,7 @@ mminit()
bool found; bool found;
HASHCTL info; HASHCTL info;
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS); mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS);
mmsize += MAXALIGN(sizeof(*MMCurTop)); mmsize += MAXALIGN(sizeof(*MMCurTop));
...@@ -98,7 +98,7 @@ mminit() ...@@ -98,7 +98,7 @@ mminit()
if (mmcacheblk == (char *) NULL) if (mmcacheblk == (char *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
...@@ -112,7 +112,7 @@ mminit() ...@@ -112,7 +112,7 @@ mminit()
if (MMCacheHT == (HTAB *) NULL) if (MMCacheHT == (HTAB *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
...@@ -126,18 +126,18 @@ mminit() ...@@ -126,18 +126,18 @@ mminit()
if (MMRelCacheHT == (HTAB *) NULL) if (MMRelCacheHT == (HTAB *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
if (IsUnderPostmaster) /* was IsPostmaster bjm */ if (IsUnderPostmaster) /* was IsPostmaster bjm */
{ {
MemSet(mmcacheblk, 0, mmsize); MemSet(mmcacheblk, 0, mmsize);
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_SUCCESS; return SM_SUCCESS;
} }
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
MMCurTop = (int *) mmcacheblk; MMCurTop = (int *) mmcacheblk;
mmcacheblk += sizeof(int); mmcacheblk += sizeof(int);
...@@ -163,11 +163,11 @@ mmcreate(Relation reln) ...@@ -163,11 +163,11 @@ mmcreate(Relation reln)
bool found; bool found;
MMRelTag tag; MMRelTag tag;
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
if (*MMCurRelno == MMNRELATIONS) if (*MMCurRelno == MMNRELATIONS)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
...@@ -184,20 +184,20 @@ mmcreate(Relation reln) ...@@ -184,20 +184,20 @@ mmcreate(Relation reln)
if (entry == (MMRelHashEntry *) NULL) if (entry == (MMRelHashEntry *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "main memory storage mgr rel cache hash table corrupt"); elog(FATAL, "main memory storage mgr rel cache hash table corrupt");
} }
if (found) if (found)
{ {
/* already exists */ /* already exists */
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
entry->mmrhe_nblocks = 0; entry->mmrhe_nblocks = 0;
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_SUCCESS; return SM_SUCCESS;
} }
...@@ -211,30 +211,24 @@ int ...@@ -211,30 +211,24 @@ int
mmunlink(RelFileNode rnode) mmunlink(RelFileNode rnode)
{ {
int i; int i;
Oid reldbid;
MMHashEntry *entry; MMHashEntry *entry;
MMRelHashEntry *rentry; MMRelHashEntry *rentry;
bool found; bool found;
MMRelTag rtag; MMRelTag rtag;
if (reln->rd_rel->relisshared) LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
reldbid = (Oid) 0;
else
reldbid = MyDatabaseId;
SpinAcquire(MMCacheLock);
for (i = 0; i < MMNBUFFERS; i++) for (i = 0; i < MMNBUFFERS; i++)
{ {
if (MMBlockTags[i].mmct_dbid == reldbid if (MMBlockTags[i].mmct_dbid == rnode.tblNode
&& MMBlockTags[i].mmct_relid == RelationGetRelid(reln)) && MMBlockTags[i].mmct_relid == rnode.relNode)
{ {
entry = (MMHashEntry *) hash_search(MMCacheHT, entry = (MMHashEntry *) hash_search(MMCacheHT,
(char *) &MMBlockTags[i], (char *) &MMBlockTags[i],
HASH_REMOVE, &found); HASH_REMOVE, &found);
if (entry == (MMHashEntry *) NULL || !found) if (entry == (MMHashEntry *) NULL || !found)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmunlink: cache hash table corrupted"); elog(FATAL, "mmunlink: cache hash table corrupted");
} }
MMBlockTags[i].mmct_dbid = (Oid) 0; MMBlockTags[i].mmct_dbid = (Oid) 0;
...@@ -242,21 +236,21 @@ mmunlink(RelFileNode rnode) ...@@ -242,21 +236,21 @@ mmunlink(RelFileNode rnode)
MMBlockTags[i].mmct_blkno = (BlockNumber) 0; MMBlockTags[i].mmct_blkno = (BlockNumber) 0;
} }
} }
rtag.mmrt_dbid = reldbid; rtag.mmrt_dbid = rnode.tblNode;
rtag.mmrt_relid = RelationGetRelid(reln); rtag.mmrt_relid = rnode.relNode;
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
HASH_REMOVE, &found); HASH_REMOVE, &found);
if (rentry == (MMRelHashEntry *) NULL || !found) if (rentry == (MMRelHashEntry *) NULL || !found)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmunlink: rel cache hash table corrupted"); elog(FATAL, "mmunlink: rel cache hash table corrupted");
} }
(*MMCurRelno)--; (*MMCurRelno)--;
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return 1; return 1;
} }
...@@ -286,7 +280,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -286,7 +280,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer)
tag.mmct_dbid = rtag.mmrt_dbid = reldbid; tag.mmct_dbid = rtag.mmrt_dbid = reldbid;
tag.mmct_relid = rtag.mmrt_relid = RelationGetRelid(reln); tag.mmct_relid = rtag.mmrt_relid = RelationGetRelid(reln);
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
if (*MMCurTop == MMNBUFFERS) if (*MMCurTop == MMNBUFFERS)
{ {
...@@ -298,7 +292,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -298,7 +292,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer)
} }
if (i == MMNBUFFERS) if (i == MMNBUFFERS)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_FAIL; return SM_FAIL;
} }
} }
...@@ -312,7 +306,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -312,7 +306,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer)
HASH_FIND, &found); HASH_FIND, &found);
if (rentry == (MMRelHashEntry *) NULL || !found) if (rentry == (MMRelHashEntry *) NULL || !found)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmextend: rel cache hash table corrupt"); elog(FATAL, "mmextend: rel cache hash table corrupt");
} }
...@@ -322,7 +316,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -322,7 +316,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer)
HASH_ENTER, &found); HASH_ENTER, &found);
if (entry == (MMHashEntry *) NULL || found) if (entry == (MMHashEntry *) NULL || found)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmextend: cache hash table corrupt"); elog(FATAL, "mmextend: cache hash table corrupt");
} }
...@@ -338,7 +332,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -338,7 +332,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer)
offset = (i * BLCKSZ); offset = (i * BLCKSZ);
memmove(&(MMBlockCache[offset]), buffer, BLCKSZ); memmove(&(MMBlockCache[offset]), buffer, BLCKSZ);
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_SUCCESS; return SM_SUCCESS;
} }
...@@ -386,20 +380,20 @@ mmread(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -386,20 +380,20 @@ mmread(Relation reln, BlockNumber blocknum, char *buffer)
tag.mmct_relid = RelationGetRelid(reln); tag.mmct_relid = RelationGetRelid(reln);
tag.mmct_blkno = blocknum; tag.mmct_blkno = blocknum;
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
HASH_FIND, &found); HASH_FIND, &found);
if (entry == (MMHashEntry *) NULL) if (entry == (MMHashEntry *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmread: hash table corrupt"); elog(FATAL, "mmread: hash table corrupt");
} }
if (!found) if (!found)
{ {
/* reading nonexistent pages is defined to fill them with zeroes */ /* reading nonexistent pages is defined to fill them with zeroes */
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
MemSet(buffer, 0, BLCKSZ); MemSet(buffer, 0, BLCKSZ);
return SM_SUCCESS; return SM_SUCCESS;
} }
...@@ -407,7 +401,7 @@ mmread(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -407,7 +401,7 @@ mmread(Relation reln, BlockNumber blocknum, char *buffer)
offset = (entry->mmhe_bufno * BLCKSZ); offset = (entry->mmhe_bufno * BLCKSZ);
memmove(buffer, &MMBlockCache[offset], BLCKSZ); memmove(buffer, &MMBlockCache[offset], BLCKSZ);
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_SUCCESS; return SM_SUCCESS;
} }
...@@ -433,26 +427,26 @@ mmwrite(Relation reln, BlockNumber blocknum, char *buffer) ...@@ -433,26 +427,26 @@ mmwrite(Relation reln, BlockNumber blocknum, char *buffer)
tag.mmct_relid = RelationGetRelid(reln); tag.mmct_relid = RelationGetRelid(reln);
tag.mmct_blkno = blocknum; tag.mmct_blkno = blocknum;
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
HASH_FIND, &found); HASH_FIND, &found);
if (entry == (MMHashEntry *) NULL) if (entry == (MMHashEntry *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmread: hash table corrupt"); elog(FATAL, "mmread: hash table corrupt");
} }
if (!found) if (!found)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmwrite: hash table missing requested page"); elog(FATAL, "mmwrite: hash table missing requested page");
} }
offset = (entry->mmhe_bufno * BLCKSZ); offset = (entry->mmhe_bufno * BLCKSZ);
memmove(&MMBlockCache[offset], buffer, BLCKSZ); memmove(&MMBlockCache[offset], buffer, BLCKSZ);
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return SM_SUCCESS; return SM_SUCCESS;
} }
...@@ -506,14 +500,14 @@ mmnblocks(Relation reln) ...@@ -506,14 +500,14 @@ mmnblocks(Relation reln)
rtag.mmrt_relid = RelationGetRelid(reln); rtag.mmrt_relid = RelationGetRelid(reln);
SpinAcquire(MMCacheLock); LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
HASH_FIND, &found); HASH_FIND, &found);
if (rentry == (MMRelHashEntry *) NULL) if (rentry == (MMRelHashEntry *) NULL)
{ {
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
elog(FATAL, "mmnblocks: rel cache hash table corrupt"); elog(FATAL, "mmnblocks: rel cache hash table corrupt");
} }
...@@ -522,7 +516,7 @@ mmnblocks(Relation reln) ...@@ -522,7 +516,7 @@ mmnblocks(Relation reln)
else else
nblocks = InvalidBlockNumber; nblocks = InvalidBlockNumber;
SpinRelease(MMCacheLock); LWLockRelease(MMCacheLock);
return nblocks; return nblocks;
} }
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.52 2001/07/02 20:50:46 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.53 2001/09/29 04:02:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "utils/memutils.h" #include "utils/memutils.h"
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.92 2001/09/27 16:29:12 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.93 2001/09/29 04:02:25 tgl Exp $
* *
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
...@@ -401,11 +401,12 @@ ShutdownPostgres(void) ...@@ -401,11 +401,12 @@ ShutdownPostgres(void)
* since that just raises the odds of failure --- but there's some * since that just raises the odds of failure --- but there's some
* stuff we need to do. * stuff we need to do.
* *
* Release any spinlocks or buffer context locks we might be holding. * Release any LW locks and buffer context locks we might be holding.
* This is a kluge to improve the odds that we won't get into a * This is a kluge to improve the odds that we won't get into a
* self-made stuck-spinlock scenario while trying to shut down. * self-made stuck-lock scenario while trying to shut down.
*/ */
ProcReleaseSpins(NULL); LWLockReleaseAll();
AbortBufferIO();
UnlockBuffers(); UnlockBuffers();
/* /*
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* Support for grand unified configuration scheme, including SET * Support for grand unified configuration scheme, including SET
* command, configuration file, and command line options. * command, configuration file, and command line options.
* *
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.52 2001/09/23 21:52:36 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.53 2001/09/29 04:02:25 tgl Exp $
* *
* Copyright 2000 by PostgreSQL Global Development Group * Copyright 2000 by PostgreSQL Global Development Group
* Written by Peter Eisentraut <peter_e@gmx.net>. * Written by Peter Eisentraut <peter_e@gmx.net>.
...@@ -240,7 +240,7 @@ static struct config_bool ...@@ -240,7 +240,7 @@ static struct config_bool
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
{"trace_locks", PGC_SUSET, &Trace_locks, false, NULL}, {"trace_locks", PGC_SUSET, &Trace_locks, false, NULL},
{"trace_userlocks", PGC_SUSET, &Trace_userlocks, false, NULL}, {"trace_userlocks", PGC_SUSET, &Trace_userlocks, false, NULL},
{"trace_spinlocks", PGC_SUSET, &Trace_spinlocks, false, NULL}, {"trace_lwlocks", PGC_SUSET, &Trace_lwlocks, false, NULL},
{"debug_deadlocks", PGC_SUSET, &Debug_deadlocks, false, NULL}, {"debug_deadlocks", PGC_SUSET, &Debug_deadlocks, false, NULL},
#endif #endif
......
...@@ -164,7 +164,7 @@ ...@@ -164,7 +164,7 @@
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
#trace_locks = false #trace_locks = false
#trace_userlocks = false #trace_userlocks = false
#trace_spinlocks = false #trace_lwlocks = false
#debug_deadlocks = false #debug_deadlocks = false
#trace_lock_oidmin = 16384 #trace_lock_oidmin = 16384
#trace_lock_table = 0 #trace_lock_table = 0
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: clog.h,v 1.1 2001/08/25 18:52:42 tgl Exp $ * $Id: clog.h,v 1.2 2001/09/29 04:02:26 tgl Exp $
*/ */
#ifndef CLOG_H #ifndef CLOG_H
#define CLOG_H #define CLOG_H
...@@ -24,6 +24,9 @@ typedef int XidStatus; ...@@ -24,6 +24,9 @@ typedef int XidStatus;
#define TRANSACTION_STATUS_ABORTED 0x02 #define TRANSACTION_STATUS_ABORTED 0x02
/* 0x03 is available without changing commit log space allocation */ /* 0x03 is available without changing commit log space allocation */
/* exported because lwlock.c needs it */
#define NUM_CLOG_BUFFERS 8
extern void TransactionIdSetStatus(TransactionId xid, XidStatus status); extern void TransactionIdSetStatus(TransactionId xid, XidStatus status);
extern XidStatus TransactionIdGetStatus(TransactionId xid); extern XidStatus TransactionIdGetStatus(TransactionId xid);
......
...@@ -7,15 +7,13 @@ ...@@ -7,15 +7,13 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: transam.h,v 1.40 2001/08/26 16:56:00 tgl Exp $ * $Id: transam.h,v 1.41 2001/09/29 04:02:26 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef TRANSAM_H #ifndef TRANSAM_H
#define TRANSAM_H #define TRANSAM_H
#include "storage/spin.h"
/* ---------------- /* ----------------
* Special transaction ID values * Special transaction ID values
...@@ -122,8 +120,6 @@ extern void CheckMaxObjectId(Oid assigned_oid); ...@@ -122,8 +120,6 @@ extern void CheckMaxObjectId(Oid assigned_oid);
extern bool AMI_OVERRIDE; extern bool AMI_OVERRIDE;
/* in varsup.c */ /* in varsup.c */
extern SPINLOCK OidGenLockId;
extern SPINLOCK XidGenLockId;
extern VariableCache ShmemVariableCache; extern VariableCache ShmemVariableCache;
#endif /* TRAMSAM_H */ #endif /* TRAMSAM_H */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: miscadmin.h,v 1.92 2001/09/27 16:29:13 tgl Exp $ * $Id: miscadmin.h,v 1.93 2001/09/29 04:02:26 tgl Exp $
* *
* NOTES * NOTES
* some of the information in this file should be moved to * some of the information in this file should be moved to
...@@ -46,8 +46,8 @@ ...@@ -46,8 +46,8 @@
* will be held off until the last matching RESUME_INTERRUPTS() occurs. * will be held off until the last matching RESUME_INTERRUPTS() occurs.
* *
* Special mechanisms are used to let an interrupt be accepted when we are * Special mechanisms are used to let an interrupt be accepted when we are
* waiting for a lock or spinlock, and when we are waiting for command input * waiting for a lock or when we are waiting for command input (but, of
* (but, of course, only if the interrupt holdoff counter is zero). See the * course, only if the interrupt holdoff counter is zero). See the
* related code for details. * related code for details.
* *
* A related, but conceptually distinct, mechanism is the "critical section" * A related, but conceptually distinct, mechanism is the "critical section"
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: buf_internals.h,v 1.49 2001/07/06 21:04:26 tgl Exp $ * $Id: buf_internals.h,v 1.50 2001/09/29 04:02:26 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include "storage/backendid.h" #include "storage/backendid.h"
#include "storage/buf.h" #include "storage/buf.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/s_lock.h" #include "storage/lwlock.h"
/* Buf Mgr constants */ /* Buf Mgr constants */
...@@ -89,12 +89,8 @@ typedef struct sbufdesc ...@@ -89,12 +89,8 @@ typedef struct sbufdesc
BufFlags flags; /* see bit definitions above */ BufFlags flags; /* see bit definitions above */
unsigned refcount; /* # of backends holding pins on buffer */ unsigned refcount; /* # of backends holding pins on buffer */
slock_t io_in_progress_lock; /* to wait for I/O to complete */ LWLockId io_in_progress_lock; /* to wait for I/O to complete */
slock_t cntx_lock; /* to lock access to page context */ LWLockId cntx_lock; /* to lock access to page context */
unsigned r_locks; /* # of shared locks */
bool ri_lock; /* read-intent lock */
bool w_lock; /* context exclusively locked */
bool cntxDirty; /* new way to mark block as dirty */ bool cntxDirty; /* new way to mark block as dirty */
...@@ -117,10 +113,7 @@ typedef struct sbufdesc ...@@ -117,10 +113,7 @@ typedef struct sbufdesc
* We have to free these locks in elog(ERROR)... * We have to free these locks in elog(ERROR)...
*/ */
#define BL_IO_IN_PROGRESS (1 << 0) /* unimplemented */ #define BL_IO_IN_PROGRESS (1 << 0) /* unimplemented */
#define BL_R_LOCK (1 << 1) #define BL_PIN_COUNT_LOCK (1 << 1)
#define BL_RI_LOCK (1 << 2)
#define BL_W_LOCK (1 << 3)
#define BL_PIN_COUNT_LOCK (1 << 4)
/* /*
* mao tracing buffer allocation * mao tracing buffer allocation
...@@ -173,7 +166,6 @@ extern bits8 *BufferLocks; ...@@ -173,7 +166,6 @@ extern bits8 *BufferLocks;
extern BufferTag *BufferTagLastDirtied; extern BufferTag *BufferTagLastDirtied;
extern LockRelId *BufferRelidLastDirtied; extern LockRelId *BufferRelidLastDirtied;
extern bool *BufferDirtiedByMe; extern bool *BufferDirtiedByMe;
extern SPINLOCK BufMgrLock;
/* localbuf.c */ /* localbuf.c */
extern BufferDesc *LocalBufferDescriptors; extern BufferDesc *LocalBufferDescriptors;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: freespace.h,v 1.2 2001/07/02 20:50:46 tgl Exp $ * $Id: freespace.h,v 1.3 2001/09/29 04:02:26 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -16,11 +16,8 @@ ...@@ -16,11 +16,8 @@
#include "storage/block.h" #include "storage/block.h"
#include "storage/relfilenode.h" #include "storage/relfilenode.h"
#include "storage/spin.h"
extern SPINLOCK FreeSpaceLock;
extern int MaxFSMRelations; extern int MaxFSMRelations;
extern int MaxFSMPages; extern int MaxFSMPages;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: ipc.h,v 1.50 2001/06/27 23:31:39 tgl Exp $ * $Id: ipc.h,v 1.51 2001/09/29 04:02:26 tgl Exp $
* *
* Some files that would normally need to include only sys/ipc.h must * Some files that would normally need to include only sys/ipc.h must
* instead include this file because on Ultrix, sys/ipc.h is not designed * instead include this file because on Ultrix, sys/ipc.h is not designed
...@@ -30,9 +30,9 @@ union semun ...@@ -30,9 +30,9 @@ union semun
struct semid_ds *buf; struct semid_ds *buf;
unsigned short *array; unsigned short *array;
}; };
#endif #endif
/* generic IPC definitions */ /* generic IPC definitions */
#define IPCProtection (0600) /* access/modify by user only */ #define IPCProtection (0600) /* access/modify by user only */
...@@ -51,7 +51,7 @@ typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */ ...@@ -51,7 +51,7 @@ typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
typedef uint32 IpcMemoryKey; /* shared memory key passed to shmget(2) */ typedef uint32 IpcMemoryKey; /* shared memory key passed to shmget(2) */
typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
typedef struct /* standard header for all Postgres shmem */ typedef struct PGShmemHeader /* standard header for all Postgres shmem */
{ {
int32 magic; /* magic # to identify Postgres segments */ int32 magic; /* magic # to identify Postgres segments */
#define PGShmemMagic 679834892 #define PGShmemMagic 679834892
...@@ -61,29 +61,6 @@ typedef struct /* standard header for all Postgres shmem */ ...@@ -61,29 +61,6 @@ typedef struct /* standard header for all Postgres shmem */
} PGShmemHeader; } PGShmemHeader;
/* spinlock definitions */
typedef enum _LockId_
{
BUFMGRLOCKID,
OIDGENLOCKID,
XIDGENLOCKID,
CNTLFILELOCKID,
SHMEMLOCKID,
SHMEMINDEXLOCKID,
LOCKMGRLOCKID,
SINVALLOCKID,
PROCSTRUCTLOCKID,
FREESPACELOCKID,
#ifdef STABLE_MEMORY_STORAGE
MMCACHELOCKID,
#endif
MAX_SPINS /* must be last item! */
} _LockId_;
/* ipc.c */ /* ipc.c */
extern bool proc_exit_inprogress; extern bool proc_exit_inprogress;
......
...@@ -7,15 +7,15 @@ ...@@ -7,15 +7,15 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: lock.h,v 1.52 2001/09/27 16:29:13 tgl Exp $ * $Id: lock.h,v 1.53 2001/09/29 04:02:26 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef LOCK_H_ #ifndef LOCK_H_
#define LOCK_H_ #define LOCK_H_
#include "storage/ipc.h"
#include "storage/itemptr.h" #include "storage/itemptr.h"
#include "storage/lwlock.h"
#include "storage/shmem.h" #include "storage/shmem.h"
...@@ -26,12 +26,10 @@ typedef struct PROC_QUEUE ...@@ -26,12 +26,10 @@ typedef struct PROC_QUEUE
int size; /* number of entries in list */ int size; /* number of entries in list */
} PROC_QUEUE; } PROC_QUEUE;
/* struct proc is declared in storage/proc.h, but must forward-reference it */ /* struct PROC is declared in storage/proc.h, but must forward-reference it */
typedef struct proc PROC; typedef struct PROC PROC;
extern SPINLOCK LockMgrLock;
extern int max_locks_per_xact; extern int max_locks_per_xact;
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
...@@ -51,11 +49,7 @@ typedef int LOCKMETHOD; ...@@ -51,11 +49,7 @@ typedef int LOCKMETHOD;
/* MAX_LOCKMODES cannot be larger than the # of bits in LOCKMASK */ /* MAX_LOCKMODES cannot be larger than the # of bits in LOCKMASK */
#define MAX_LOCKMODES 10 #define MAX_LOCKMODES 10
/* /* MAX_LOCK_METHODS is the number of distinct lock control tables allowed */
* MAX_LOCK_METHODS corresponds to the number of spin locks allocated in
* CreateSpinLocks() or the number of shared memory locations allocated
* for lock table spin locks in the case of machines with TAS instructions.
*/
#define MAX_LOCK_METHODS 3 #define MAX_LOCK_METHODS 3
#define INVALID_TABLEID 0 #define INVALID_TABLEID 0
...@@ -69,7 +63,7 @@ typedef int LOCKMETHOD; ...@@ -69,7 +63,7 @@ typedef int LOCKMETHOD;
* If user locks are enabled, an additional lock method is present. * If user locks are enabled, an additional lock method is present.
* *
* LOCKMETHODCTL and LOCKMETHODTABLE are split because the first lives * LOCKMETHODCTL and LOCKMETHODTABLE are split because the first lives
* in shared memory. This is because it contains a spinlock. * in shared memory. (There isn't any really good reason for the split.)
* LOCKMETHODTABLE exists in private memory. Both are created by the * LOCKMETHODTABLE exists in private memory. Both are created by the
* postmaster and should be the same in all backends. * postmaster and should be the same in all backends.
*/ */
...@@ -93,7 +87,7 @@ typedef int LOCKMETHOD; ...@@ -93,7 +87,7 @@ typedef int LOCKMETHOD;
* writers can be given priority over readers (to avoid * writers can be given priority over readers (to avoid
* starvation). XXX this field is not actually used at present! * starvation). XXX this field is not actually used at present!
* *
* masterlock -- synchronizes access to the table * masterLock -- synchronizes access to the table
*/ */
typedef struct LOCKMETHODCTL typedef struct LOCKMETHODCTL
{ {
...@@ -101,7 +95,7 @@ typedef struct LOCKMETHODCTL ...@@ -101,7 +95,7 @@ typedef struct LOCKMETHODCTL
int numLockModes; int numLockModes;
int conflictTab[MAX_LOCKMODES]; int conflictTab[MAX_LOCKMODES];
int prio[MAX_LOCKMODES]; int prio[MAX_LOCKMODES];
SPINLOCK masterLock; LWLockId masterLock;
} LOCKMETHODCTL; } LOCKMETHODCTL;
/* /*
...@@ -235,11 +229,6 @@ typedef struct HOLDER ...@@ -235,11 +229,6 @@ typedef struct HOLDER
(((LOCK *) MAKE_PTR((holder).tag.lock))->tag.lockmethod) (((LOCK *) MAKE_PTR((holder).tag.lock))->tag.lockmethod)
#define LockLockTable() SpinAcquire(LockMgrLock)
#define UnlockLockTable() SpinRelease(LockMgrLock)
/* /*
* function prototypes * function prototypes
*/ */
...@@ -267,7 +256,6 @@ extern void InitDeadLockChecking(void); ...@@ -267,7 +256,6 @@ extern void InitDeadLockChecking(void);
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
extern void DumpLocks(void); extern void DumpLocks(void);
extern void DumpAllLocks(void); extern void DumpAllLocks(void);
#endif #endif
#endif /* LOCK_H */ #endif /* LOCK_H */
/*-------------------------------------------------------------------------
*
* lwlock.h
* Lightweight lock manager
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: lwlock.h,v 1.1 2001/09/29 04:02:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef LWLOCK_H
#define LWLOCK_H
/*
* We have a number of predefined LWLocks, plus a bunch of LWLocks that are
* dynamically assigned (for shared buffers). The LWLock structures live
* in shared memory (since they contain shared data) and are identified by
* values of this enumerated type. We abuse the notion of an enum somewhat
* by allowing values not listed in the enum declaration to be assigned.
* The extra value MaxDynamicLWLock is there to keep the compiler from
* deciding that the enum can be represented as char or short ...
*/
typedef enum LWLockId
{
BufMgrLock,
LockMgrLock,
OidGenLock,
XidGenLock,
ShmemIndexLock,
SInvalLock,
FreeSpaceLock,
MMCacheLock,
WALInsertLock,
WALWriteLock,
ControlFileLock,
CheckpointLock,
CLogControlLock,
NumFixedLWLocks, /* must be last except for MaxDynamicLWLock */
MaxDynamicLWLock = 1000000000
} LWLockId;
typedef enum LWLockMode
{
LW_EXCLUSIVE,
LW_SHARED
} LWLockMode;
#ifdef LOCK_DEBUG
extern bool Trace_lwlocks;
#endif
extern LWLockId LWLockAssign(void);
extern void LWLockAcquire(LWLockId lockid, LWLockMode mode);
extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode);
extern void LWLockRelease(LWLockId lockid);
extern void LWLockReleaseAll(void);
extern int NumLWLocks(void);
extern int LWLockShmemSize(void);
extern void CreateLWLocks(void);
#endif /* LWLOCK_H */
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: proc.h,v 1.47 2001/09/21 17:06:12 tgl Exp $ * $Id: proc.h,v 1.48 2001/09/29 04:02:26 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -16,10 +16,9 @@ ...@@ -16,10 +16,9 @@
#include "access/xlog.h" #include "access/xlog.h"
#include "storage/backendid.h" #include "storage/backendid.h"
#include "storage/ipc.h"
#include "storage/lock.h" #include "storage/lock.h"
/* configurable option */
extern int DeadlockTimeout;
typedef struct typedef struct
{ {
...@@ -35,10 +34,9 @@ typedef struct ...@@ -35,10 +34,9 @@ typedef struct
* the PROC is linked into that lock's waitProcs queue. A recycled PROC * the PROC is linked into that lock's waitProcs queue. A recycled PROC
* is linked into ProcGlobal's freeProcs list. * is linked into ProcGlobal's freeProcs list.
*/ */
struct proc struct PROC
{ {
/* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */ /* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */
SHM_QUEUE links; /* list link if process is in a list */ SHM_QUEUE links; /* list link if process is in a list */
SEMA sem; /* ONE semaphore to sleep on */ SEMA sem; /* ONE semaphore to sleep on */
...@@ -51,6 +49,9 @@ struct proc ...@@ -51,6 +49,9 @@ struct proc
* were starting our xact: vacuum must not * were starting our xact: vacuum must not
* remove tuples deleted by xid >= xmin ! */ * remove tuples deleted by xid >= xmin ! */
int pid; /* This backend's process id */
Oid databaseId; /* OID of database this backend is using */
/* /*
* XLOG location of first XLOG record written by this backend's * XLOG location of first XLOG record written by this backend's
* current transaction. If backend is not in a transaction or hasn't * current transaction. If backend is not in a transaction or hasn't
...@@ -58,6 +59,11 @@ struct proc ...@@ -58,6 +59,11 @@ struct proc
*/ */
XLogRecPtr logRec; XLogRecPtr logRec;
/* Info about LWLock the process is currently waiting for, if any. */
bool lwWaiting; /* true if waiting for an LW lock */
bool lwExclusive; /* true if waiting for exclusive access */
struct PROC *lwWaitLink; /* next waiter for same LW lock */
/* Info about lock the process is currently waiting for, if any. */ /* Info about lock the process is currently waiting for, if any. */
/* waitLock and waitHolder are NULL if not currently waiting. */ /* waitLock and waitHolder are NULL if not currently waiting. */
LOCK *waitLock; /* Lock object we're sleeping on ... */ LOCK *waitLock; /* Lock object we're sleeping on ... */
...@@ -66,32 +72,15 @@ struct proc ...@@ -66,32 +72,15 @@ struct proc
LOCKMASK heldLocks; /* bitmask for lock types already held on LOCKMASK heldLocks; /* bitmask for lock types already held on
* this lock object by this backend */ * this lock object by this backend */
int pid; /* This backend's process id */
Oid databaseId; /* OID of database this backend is using */
short sLocks[MAX_SPINS]; /* Spin lock stats */
SHM_QUEUE procHolders; /* list of HOLDER objects for locks held SHM_QUEUE procHolders; /* list of HOLDER objects for locks held
* or awaited by this backend */ * or awaited by this backend */
}; };
/* NOTE: "typedef struct proc PROC" appears in storage/lock.h. */ /* NOTE: "typedef struct PROC PROC" appears in storage/lock.h. */
extern PROC *MyProc; extern PROC *MyProc;
extern SPINLOCK ProcStructLock;
#define PROC_INCR_SLOCK(lock) \
do { \
if (MyProc) (MyProc->sLocks[(lock)])++; \
} while (0)
#define PROC_DECR_SLOCK(lock) \
do { \
if (MyProc) (MyProc->sLocks[(lock)])--; \
} while (0)
/* /*
* There is one ProcGlobal struct for the whole installation. * There is one ProcGlobal struct for the whole installation.
...@@ -120,7 +109,7 @@ typedef struct ...@@ -120,7 +109,7 @@ typedef struct
*/ */
} SEM_MAP_ENTRY; } SEM_MAP_ENTRY;
typedef struct procglobal typedef struct PROC_HDR
{ {
/* Head of list of free PROC structures */ /* Head of list of free PROC structures */
SHMEM_OFFSET freeProcs; SHMEM_OFFSET freeProcs;
...@@ -134,11 +123,17 @@ typedef struct procglobal ...@@ -134,11 +123,17 @@ typedef struct procglobal
SEM_MAP_ENTRY procSemMap[1]; SEM_MAP_ENTRY procSemMap[1];
} PROC_HDR; } PROC_HDR;
/* configurable option */
extern int DeadlockTimeout;
/* /*
* Function Prototypes * Function Prototypes
*/ */
extern void InitProcGlobal(int maxBackends); extern void InitProcGlobal(int maxBackends);
extern void InitProcess(void); extern void InitProcess(void);
extern void InitDummyProcess(void);
extern void ProcReleaseLocks(bool isCommit); extern void ProcReleaseLocks(bool isCommit);
extern void ProcQueueInit(PROC_QUEUE *queue); extern void ProcQueueInit(PROC_QUEUE *queue);
...@@ -146,7 +141,6 @@ extern int ProcSleep(LOCKMETHODTABLE *lockMethodTable, LOCKMODE lockmode, ...@@ -146,7 +141,6 @@ extern int ProcSleep(LOCKMETHODTABLE *lockMethodTable, LOCKMODE lockmode,
LOCK *lock, HOLDER *holder); LOCK *lock, HOLDER *holder);
extern PROC *ProcWakeup(PROC *proc, int errType); extern PROC *ProcWakeup(PROC *proc, int errType);
extern void ProcLockWakeup(LOCKMETHODTABLE *lockMethodTable, LOCK *lock); extern void ProcLockWakeup(LOCKMETHODTABLE *lockMethodTable, LOCK *lock);
extern void ProcReleaseSpins(PROC *proc);
extern bool LockWaitCancel(void); extern bool LockWaitCancel(void);
extern void HandleDeadLock(SIGNAL_ARGS); extern void HandleDeadLock(SIGNAL_ARGS);
......
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* s_lock.h * s_lock.h
* This file contains the in-line portion of the implementation * Hardware-dependent implementation of spinlocks.
* of spinlocks.
* *
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * NOTE: none of the macros in this file are intended to be called directly.
* Portions Copyright (c) 1994, Regents of the University of California * Call them through the hardware-independent macros in spin.h.
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/include/storage/s_lock.h,v 1.94 2001/09/24 20:10:44 petere Exp $
* *
*------------------------------------------------------------------------- * The following hardware-dependent macros must be provided for each
*/ * supported platform:
/*----------
* DESCRIPTION
* The public macros that must be provided are:
* *
* void S_INIT_LOCK(slock_t *lock) * void S_INIT_LOCK(slock_t *lock)
* Initialize a spinlock (to the unlocked state). * Initialize a spinlock (to the unlocked state).
...@@ -33,51 +24,43 @@ ...@@ -33,51 +24,43 @@
* Tests if the lock is free. Returns TRUE if free, FALSE if locked. * Tests if the lock is free. Returns TRUE if free, FALSE if locked.
* This does *not* change the state of the lock. * This does *not* change the state of the lock.
* *
* Note to implementors: there are default implementations for all these
* macros at the bottom of the file. Check if your platform can use
* these or needs to override them.
*
* Usually, S_LOCK() is implemented in terms of an even lower-level macro
* TAS():
*
* int TAS(slock_t *lock) * int TAS(slock_t *lock)
* Atomic test-and-set instruction. Attempt to acquire the lock, * Atomic test-and-set instruction. Attempt to acquire the lock,
* but do *not* wait. Returns 0 if successful, nonzero if unable * but do *not* wait. Returns 0 if successful, nonzero if unable
* to acquire the lock. * to acquire the lock.
* *
* TAS() is a lower-level part of the API, but is used directly in a * TAS() is NOT part of the API, and should never be called directly.
* few places that want to do other things while waiting for a lock.
* The S_LOCK() macro is equivalent to
*
* void
* S_LOCK(slock_t *lock)
* {
* unsigned spins = 0;
*
* while (TAS(lock))
* S_LOCK_SLEEP(lock, spins++, timeout);
* }
* *
* where S_LOCK_SLEEP() checks for timeout and sleeps for a short * CAUTION: on some platforms TAS() may sometimes report failure to acquire
* interval. (The timeout is expressed in microseconds, or can be 0 for * a lock even when the lock is not locked. For example, on Alpha TAS()
* "infinity".) Callers that want to perform useful work while waiting * will "fail" if interrupted. Therefore TAS() should always be invoked
* can write out this entire loop and insert the "useful work" inside * in a retry loop, even if you are certain the lock is free.
* the loop.
*
* CAUTION to TAS() callers: on some platforms TAS() may sometimes
* report failure to acquire a lock even when the lock is not locked.
* For example, on Alpha TAS() will "fail" if interrupted. Therefore
* TAS() must *always* be invoked in a retry loop as depicted, even when
* you are certain the lock is free.
* *
* On most supported platforms, TAS() uses a tas() function written * On most supported platforms, TAS() uses a tas() function written
* in assembly language to execute a hardware atomic-test-and-set * in assembly language to execute a hardware atomic-test-and-set
* instruction. Equivalent OS-supplied mutex routines could be used too. * instruction. Equivalent OS-supplied mutex routines could be used too.
* *
* If no system-specific TAS() is available (ie, HAS_TEST_AND_SET is not * If no system-specific TAS() is available (ie, HAS_TEST_AND_SET is not
* defined), then we fall back on an emulation that uses SysV semaphores. * defined), then we fall back on an emulation that uses SysV semaphores
* This emulation will be MUCH MUCH MUCH slower than a proper TAS() * (see spin.c). This emulation will be MUCH MUCH slower than a proper TAS()
* implementation, because of the cost of a kernel call per lock or unlock. * implementation, because of the cost of a kernel call per lock or unlock.
* An old report is that Postgres spends around 40% of its time in semop(2) * An old report is that Postgres spends around 40% of its time in semop(2)
* when using the SysV semaphore code. * when using the SysV semaphore code.
* *
* Note to implementors: there are default implementations for all these *
* macros at the bottom of the file. Check if your platform can use * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* these or needs to override them. * Portions Copyright (c) 1994, Regents of the University of California
*---------- *
* $Id: s_lock.h,v 1.95 2001/09/29 04:02:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/ */
#ifndef S_LOCK_H #ifndef S_LOCK_H
#define S_LOCK_H #define S_LOCK_H
...@@ -476,7 +459,7 @@ extern slock_t wc_tas(volatile slock_t *lock); ...@@ -476,7 +459,7 @@ extern slock_t wc_tas(volatile slock_t *lock);
/* /*
* Fake spinlock implementation using SysV semaphores --- slow and prone * Fake spinlock implementation using SysV semaphores --- slow and prone
* to fall foul of kernel limits on number of semaphores, so don't use this * to fall foul of kernel limits on number of semaphores, so don't use this
* unless you must! * unless you must! The subroutines appear in spin.c.
*/ */
typedef struct typedef struct
...@@ -500,7 +483,7 @@ extern int tas_sema(volatile slock_t *lock); ...@@ -500,7 +483,7 @@ extern int tas_sema(volatile slock_t *lock);
/**************************************************************************** /*
* Default Definitions - override these above as needed. * Default Definitions - override these above as needed.
*/ */
...@@ -512,16 +495,6 @@ extern int tas_sema(volatile slock_t *lock); ...@@ -512,16 +495,6 @@ extern int tas_sema(volatile slock_t *lock);
} while (0) } while (0)
#endif /* S_LOCK */ #endif /* S_LOCK */
#if !defined(S_LOCK_SLEEP)
#define S_LOCK_SLEEP(lock,spins,timeout) \
s_lock_sleep((spins), (timeout), 0, (lock), __FILE__, __LINE__)
#endif /* S_LOCK_SLEEP */
#if !defined(S_LOCK_SLEEP_INTERVAL)
#define S_LOCK_SLEEP_INTERVAL(lock,spins,timeout,microsec) \
s_lock_sleep((spins), (timeout), (microsec), (lock), __FILE__, __LINE__)
#endif /* S_LOCK_SLEEP_INTERVAL */
#if !defined(S_LOCK_FREE) #if !defined(S_LOCK_FREE)
#define S_LOCK_FREE(lock) (*(lock) == 0) #define S_LOCK_FREE(lock) (*(lock) == 0)
#endif /* S_LOCK_FREE */ #endif /* S_LOCK_FREE */
...@@ -542,13 +515,9 @@ extern int tas(volatile slock_t *lock); /* in port/.../tas.s, or ...@@ -542,13 +515,9 @@ extern int tas(volatile slock_t *lock); /* in port/.../tas.s, or
#endif /* TAS */ #endif /* TAS */
/**************************************************************************** /*
* Platform-independent out-of-line support routines * Platform-independent out-of-line support routines
*/ */
extern void s_lock(volatile slock_t *lock, extern void s_lock(volatile slock_t *lock, const char *file, int line);
const char *file, const int line);
extern void s_lock_sleep(unsigned spins, int timeout, int microsec,
volatile slock_t *lock,
const char *file, const int line);
#endif /* S_LOCK_H */ #endif /* S_LOCK_H */
...@@ -7,14 +7,13 @@ ...@@ -7,14 +7,13 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: shmem.h,v 1.30 2001/09/07 00:27:30 tgl Exp $ * $Id: shmem.h,v 1.31 2001/09/29 04:02:27 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef SHMEM_H #ifndef SHMEM_H
#define SHMEM_H #define SHMEM_H
#include "storage/spin.h"
#include "utils/hsearch.h" #include "utils/hsearch.h"
...@@ -55,9 +54,6 @@ extern DLLIMPORT SHMEM_OFFSET ShmemBase; ...@@ -55,9 +54,6 @@ extern DLLIMPORT SHMEM_OFFSET ShmemBase;
(((xx_offs) != 0) && ((xx_offs) != INVALID_OFFSET)) (((xx_offs) != 0) && ((xx_offs) != INVALID_OFFSET))
extern SPINLOCK ShmemLock;
extern SPINLOCK ShmemIndexLock;
/* shmemqueue.c */ /* shmemqueue.c */
typedef struct SHM_QUEUE typedef struct SHM_QUEUE
{ {
...@@ -66,16 +62,15 @@ typedef struct SHM_QUEUE ...@@ -66,16 +62,15 @@ typedef struct SHM_QUEUE
} SHM_QUEUE; } SHM_QUEUE;
/* shmem.c */ /* shmem.c */
extern void InitShmemAllocation(PGShmemHeader *seghdr); extern void InitShmemAllocation(void *seghdr);
extern void *ShmemAlloc(Size size); extern void *ShmemAlloc(Size size);
extern bool ShmemIsValid(unsigned long addr); extern bool ShmemIsValid(unsigned long addr);
extern void InitShmemIndex(void);
extern HTAB *ShmemInitHash(char *name, long init_size, long max_size, extern HTAB *ShmemInitHash(char *name, long init_size, long max_size,
HASHCTL *infoP, int hash_flags); HASHCTL *infoP, int hash_flags);
extern void *ShmemInitStruct(char *name, Size size, bool *foundPtr); extern void *ShmemInitStruct(char *name, Size size, bool *foundPtr);
typedef int TableID;
/* size constants for the shmem index table */ /* size constants for the shmem index table */
/* max size of data structure string name */ /* max size of data structure string name */
#define SHMEM_INDEX_KEYSIZE (50) #define SHMEM_INDEX_KEYSIZE (50)
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: sinval.h,v 1.21 2001/08/26 16:56:02 tgl Exp $ * $Id: sinval.h,v 1.22 2001/09/29 04:02:27 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include "storage/backendid.h" #include "storage/backendid.h"
#include "storage/itemptr.h" #include "storage/itemptr.h"
#include "storage/spin.h"
/* /*
...@@ -64,8 +63,6 @@ typedef union ...@@ -64,8 +63,6 @@ typedef union
} SharedInvalidationMessage; } SharedInvalidationMessage;
extern SPINLOCK SInvalLock;
extern int SInvalShmemSize(int maxBackends); extern int SInvalShmemSize(int maxBackends);
extern void CreateSharedInvalidationState(int maxBackends); extern void CreateSharedInvalidationState(int maxBackends);
extern void InitBackendSharedInvalidationState(void); extern void InitBackendSharedInvalidationState(void);
...@@ -78,7 +75,7 @@ extern bool DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself); ...@@ -78,7 +75,7 @@ extern bool DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself);
extern bool TransactionIdIsInProgress(TransactionId xid); extern bool TransactionIdIsInProgress(TransactionId xid);
extern TransactionId GetOldestXmin(bool allDbs); extern TransactionId GetOldestXmin(bool allDbs);
extern int CountActiveBackends(void); extern int CountActiveBackends(void);
/* Use "struct proc", not PROC, to avoid including proc.h here */ /* Use "struct PROC", not PROC, to avoid including proc.h here */
extern struct proc *BackendIdGetProc(BackendId procId); extern struct PROC *BackendIdGetProc(BackendId procId);
#endif /* SINVAL_H */ #endif /* SINVAL_H */
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: smgr.h,v 1.30 2001/06/27 23:31:39 tgl Exp $ * $Id: smgr.h,v 1.31 2001/09/29 04:02:27 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -17,9 +17,9 @@ ...@@ -17,9 +17,9 @@
#include "access/xlog.h" #include "access/xlog.h"
#include "storage/relfilenode.h" #include "storage/relfilenode.h"
#include "storage/block.h" #include "storage/block.h"
#include "storage/spin.h"
#include "utils/rel.h" #include "utils/rel.h"
#define SM_FAIL 0 #define SM_FAIL 0
#define SM_SUCCESS 1 #define SM_SUCCESS 1
...@@ -79,8 +79,6 @@ extern int mdabort(void); ...@@ -79,8 +79,6 @@ extern int mdabort(void);
extern int mdsync(void); extern int mdsync(void);
/* mm.c */ /* mm.c */
extern SPINLOCK MMCacheLock;
extern int mminit(void); extern int mminit(void);
extern int mmcreate(Relation reln); extern int mmcreate(Relation reln);
extern int mmunlink(RelFileNode rnode); extern int mmunlink(RelFileNode rnode);
......
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* spin.h * spin.h
* synchronization routines * Hardware-independent implementation of spinlocks.
*
*
* The hardware-independent interface to spinlocks is defined by the
* typedef "slock_t" and these macros:
*
* void SpinLockInit(slock_t *lock)
* Initialize a spinlock (to the unlocked state).
*
* void SpinLockAcquire(slock_t *lock)
* Acquire a spinlock, waiting if necessary.
* Time out and abort() if unable to acquire the lock in a
* "reasonable" amount of time --- typically ~ 1 minute.
* Cancel/die interrupts are held off until the lock is released.
*
* void SpinLockRelease(slock_t *lock)
* Unlock a previously acquired lock.
* Release the cancel/die interrupt holdoff.
*
* void SpinLockAcquire_NoHoldoff(slock_t *lock)
* void SpinLockRelease_NoHoldoff(slock_t *lock)
* Same as above, except no interrupt holdoff processing is done.
* This pair of macros may be used when there is a surrounding
* interrupt holdoff.
*
* bool SpinLockFree(slock_t *lock)
* Tests if the lock is free. Returns TRUE if free, FALSE if locked.
* This does *not* change the state of the lock.
*
* Callers must beware that the macro argument may be evaluated multiple
* times!
*
* The macros are implemented in terms of hardware-dependent macros
* supplied by s_lock.h.
* *
* *
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: spin.h,v 1.15 2001/03/22 04:01:09 momjian Exp $ * $Id: spin.h,v 1.16 2001/09/29 04:02:27 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef SPIN_H #ifndef SPIN_H
#define SPIN_H #define SPIN_H
#include "storage/ipc.h" #include "storage/s_lock.h"
#include "miscadmin.h"
/*
* two implementations of spin locks
*
* Where TAS instruction is available: real spin locks.
* See src/storage/ipc/s_lock.c for details.
*
* Otherwise: fake spin locks using semaphores. see spin.c
*/
typedef int SPINLOCK; #define SpinLockInit(lock) S_INIT_LOCK(lock)
#define SpinLockAcquire(lock) \
do { \
HOLD_INTERRUPTS(); \
S_LOCK(lock); \
} while (0)
#define SpinLockAcquire_NoHoldoff(lock) S_LOCK(lock)
#ifdef LOCK_DEBUG #define SpinLockRelease(lock) \
extern bool Trace_spinlocks; do { \
S_UNLOCK(lock); \
RESUME_INTERRUPTS(); \
} while (0)
#endif #define SpinLockRelease_NoHoldoff(lock) S_UNLOCK(lock)
#define SpinLockFree(lock) S_LOCK_FREE(lock)
extern int SLockShmemSize(void);
extern void CreateSpinlocks(PGShmemHeader *seghdr);
extern void SpinAcquire(SPINLOCK lockid); extern void CreateSpinlocks(void);
extern void SpinRelease(SPINLOCK lockid);
#endif /* SPIN_H */ #endif /* SPIN_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment