Commit c715fdea authored by Tom Lane's avatar Tom Lane

Significant cleanups in SysV IPC handling (shared mem and semaphores).

IPC key assignment will now work correctly even when multiple postmasters
are using same logical port number (which is possible given -k switch).
There is only one shared-mem segment per postmaster now, not 3.
Rip out broken code for non-TAS case in bufmgr and xlog, substitute a
complete S_LOCK emulation using semaphores in spin.c.  TAS and non-TAS
logic is now exactly the same.
When deadlock is detected, "Deadlock detected" is now the elog(ERROR)
message, rather than a NOTICE that comes out before an unhelpful ERROR.
parent 91482271
<!-- <!--
$Header: /cvsroot/pgsql/doc/src/sgml/ref/postmaster.sgml,v 1.16 2000/11/22 01:41:13 momjian Exp $ $Header: /cvsroot/pgsql/doc/src/sgml/ref/postmaster.sgml,v 1.17 2000/11/28 23:27:54 tgl Exp $
Postgres documentation Postgres documentation
--> -->
...@@ -400,32 +400,6 @@ $ ps -e | grep postmast ...@@ -400,32 +400,6 @@ $ ps -e | grep postmast
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term><computeroutput>
IpcMemoryAttach: shmat() failed: Permission denied
</computeroutput></term>
<listitem>
<para>
A likely explanation is that another user attempted to start a
<application>postmaster</application>
process on the same port which acquired shared resources and then
died. Since Postgres shared memory keys are based on the port number
assigned to the
<application>postmaster</application>,
such conflicts are likely if there is more than one installation on
a single host. If there are no other
<application>postmaster</application>
processes currently running (see above), run
<application>ipcclean</application>
and try again. If other <application>postmaster</application>
images
are running, you will have to find the owners of those processes to
coordinate the assignment of port numbers and/or removal of unused
shared memory segments.
</para>
</listitem>
</varlistentry>
</variablelist> </variablelist>
</para> </para>
</refsect2> </refsect2>
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.35 2000/11/27 05:36:12 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.36 2000/11/28 23:27:54 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -85,12 +85,6 @@ typedef struct XLogCtlWrite ...@@ -85,12 +85,6 @@ typedef struct XLogCtlWrite
} XLogCtlWrite; } XLogCtlWrite;
#ifndef HAS_TEST_AND_SET
#define TAS(lck) 0
#define S_UNLOCK(lck)
#define S_INIT_LOCK(lck)
#endif
typedef struct XLogCtlData typedef struct XLogCtlData
{ {
XLogCtlInsert Insert; XLogCtlInsert Insert;
...@@ -102,12 +96,10 @@ typedef struct XLogCtlData ...@@ -102,12 +96,10 @@ typedef struct XLogCtlData
uint32 XLogCacheByte; uint32 XLogCacheByte;
uint32 XLogCacheBlck; uint32 XLogCacheBlck;
StartUpID ThisStartUpID; StartUpID ThisStartUpID;
#ifdef HAS_TEST_AND_SET
slock_t insert_lck; slock_t insert_lck;
slock_t info_lck; slock_t info_lck;
slock_t lgwr_lck; slock_t lgwr_lck;
slock_t chkp_lck; /* checkpoint lock */ slock_t chkp_lck; /* checkpoint lock */
#endif
} XLogCtlData; } XLogCtlData;
static XLogCtlData *XLogCtl = NULL; static XLogCtlData *XLogCtl = NULL;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/async.c,v 1.72 2000/11/21 21:15:59 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/commands/async.c,v 1.73 2000/11/28 23:27:54 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -119,8 +119,8 @@ static Dllist *pendingNotifies = NULL; ...@@ -119,8 +119,8 @@ static Dllist *pendingNotifies = NULL;
static volatile int notifyInterruptEnabled = 0; static volatile int notifyInterruptEnabled = 0;
static volatile int notifyInterruptOccurred = 0; static volatile int notifyInterruptOccurred = 0;
/* True if we've registered an on_shmem_exit cleanup (or at least tried to). */ /* True if we've registered an on_shmem_exit cleanup */
static int unlistenExitRegistered = 0; static bool unlistenExitRegistered = false;
static void Async_UnlistenAll(void); static void Async_UnlistenAll(void);
...@@ -253,9 +253,8 @@ Async_Listen(char *relname, int pid) ...@@ -253,9 +253,8 @@ Async_Listen(char *relname, int pid)
*/ */
if (!unlistenExitRegistered) if (!unlistenExitRegistered)
{ {
if (on_shmem_exit(Async_UnlistenOnExit, 0) < 0) on_shmem_exit(Async_UnlistenOnExit, 0);
elog(NOTICE, "Async_Listen: out of shmem_exit slots"); unlistenExitRegistered = true;
unlistenExitRegistered = 1;
} }
} }
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.193 2000/11/27 04:03:20 inoue Exp $ * $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.194 2000/11/28 23:27:55 tgl Exp $
* *
* NOTES * NOTES
* *
...@@ -117,26 +117,6 @@ int PostPortNumber; ...@@ -117,26 +117,6 @@ int PostPortNumber;
char * UnixSocketDir; char * UnixSocketDir;
char * Virtual_host; char * Virtual_host;
/*
* This is a sequence number that indicates how many times we've had to
* throw away the shared memory and start over because we doubted its
* integrity. It starts off at zero and is incremented every time we
* start over. We use this to ensure that we use a new IPC shared memory
* key for the new shared memory segment in case the old segment isn't
* entirely gone yet.
*
* The sequence actually cycles back to 0 after 9, so pathologically there
* could be an IPC failure if 10 sets of backends are all stuck and won't
* release IPC resources.
*/
static short shmem_seq = 0;
/*
* This is the base IPC shared memory key. Other keys are generated by
* adding to this.
*/
static IpcMemoryKey ipc_key;
/* /*
* MaxBackends is the actual limit on the number of backends we will * MaxBackends is the actual limit on the number of backends we will
* start. The default is established by configure, but it can be * start. The default is established by configure, but it can be
...@@ -1292,39 +1272,6 @@ ConnFree(Port *conn) ...@@ -1292,39 +1272,6 @@ ConnFree(Port *conn)
free(conn); free(conn);
} }
/*
* get_host_port -- return a pseudo port number (16 bits)
* derived from the primary IP address of Virtual_host.
*/
static unsigned short
get_host_port(void)
{
static unsigned short hostPort = 0;
if (hostPort == 0)
{
SockAddr saddr;
struct hostent *hp;
hp = gethostbyname(Virtual_host);
if ((hp == NULL) || (hp->h_addrtype != AF_INET))
{
char msg[1024];
snprintf(msg, sizeof(msg),
"FATAL: get_host_port: gethostbyname(%s) failed\n",
Virtual_host);
fputs(msg, stderr);
pqdebug("%s", msg);
exit(1);
}
memmove((char *) &(saddr.in.sin_addr),
(char *) hp->h_addr,
hp->h_length);
hostPort = ntohl(saddr.in.sin_addr.s_addr) & 0xFFFF;
}
return hostPort;
}
/* /*
* reset_shared -- reset shared memory and semaphores * reset_shared -- reset shared memory and semaphores
...@@ -1333,40 +1280,16 @@ static void ...@@ -1333,40 +1280,16 @@ static void
reset_shared(unsigned short port) reset_shared(unsigned short port)
{ {
/* /*
* A typical ipc_key is 5432001, which is port 5432, sequence * Reset assignment of shared mem and semaphore IPC keys.
* number 0, and 01 as the index in IPCKeyGetBufferMemoryKey(). * Doing this means that in normal cases we'll assign the same keys
* The 32-bit INT_MAX is 2147483 6 47. * on each "cycle of life", and thereby avoid leaving dead IPC objects
* * floating around if the postmaster crashes and is restarted.
* The default algorithm for calculating the IPC keys assumes that all
* instances of postmaster on a given host are listening on different
* ports. In order to work (prevent shared memory collisions) if you
* run multiple PostgreSQL instances on the same port and different IP
* addresses on a host, we change the algorithm if you give postmaster
* the -h option, or set PGHOST, to a value other than the internal
* default.
*
* If Virtual_host is set, then we generate the IPC keys using the
* last two octets of the IP address instead of the port number.
* This algorithm assumes that no one will run multiple PostgreSQL
* instances on one host using two IP addresses that have the same two
* last octets in different class C networks. If anyone does, it
* would be rare.
*
* So, if you use -h or PGHOST, don't try to run two instances of
* PostgreSQL on the same IP address but different ports. If you
* don't use them, then you must use different ports (via -p or
* PGPORT). And, of course, don't try to use both approaches on one
* host.
*/ */
IpcInitKeyAssignment(port);
if (Virtual_host[0] != '\0') /*
port = get_host_port(); * Create or re-create shared memory and semaphores.
*/
ipc_key = port * 1000 + shmem_seq * 100; CreateSharedMemoryAndSemaphores(false, MaxBackends);
CreateSharedMemoryAndSemaphores(ipc_key, MaxBackends);
shmem_seq += 1;
if (shmem_seq >= 10)
shmem_seq -= 10;
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.37 2000/10/23 04:10:06 vadim Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.38 2000/11/28 23:27:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -56,13 +56,6 @@ int Num_Descriptors; ...@@ -56,13 +56,6 @@ int Num_Descriptors;
BufferDesc *BufferDescriptors; BufferDesc *BufferDescriptors;
BufferBlock BufferBlocks; BufferBlock BufferBlocks;
#ifndef HAS_TEST_AND_SET
long *NWaitIOBackendP;
#endif
extern IpcSemaphoreId WaitIOSemId;
long *PrivateRefCount; /* also used in freelist.c */ long *PrivateRefCount; /* also used in freelist.c */
bits8 *BufferLocks; /* flag bits showing locks I have set */ bits8 *BufferLocks; /* flag bits showing locks I have set */
BufferTag *BufferTagLastDirtied; /* tag buffer had when last BufferTag *BufferTagLastDirtied; /* tag buffer had when last
...@@ -139,7 +132,7 @@ long int LocalBufferFlushCount; ...@@ -139,7 +132,7 @@ long int LocalBufferFlushCount;
* amount of available memory. * amount of available memory.
*/ */
void void
InitBufferPool(IPCKey key) InitBufferPool(void)
{ {
bool foundBufs, bool foundBufs,
foundDescs; foundDescs;
...@@ -170,18 +163,6 @@ InitBufferPool(IPCKey key) ...@@ -170,18 +163,6 @@ InitBufferPool(IPCKey key)
ShmemInitStruct("Buffer Blocks", ShmemInitStruct("Buffer Blocks",
NBuffers * BLCKSZ, &foundBufs); NBuffers * BLCKSZ, &foundBufs);
#ifndef HAS_TEST_AND_SET
{
bool foundNWaitIO;
NWaitIOBackendP = (long *) ShmemInitStruct("#Backends Waiting IO",
sizeof(long),
&foundNWaitIO);
if (!foundNWaitIO)
*NWaitIOBackendP = 0;
}
#endif
if (foundDescs || foundBufs) if (foundDescs || foundBufs)
{ {
...@@ -214,10 +195,8 @@ InitBufferPool(IPCKey key) ...@@ -214,10 +195,8 @@ InitBufferPool(IPCKey key)
buf->flags = (BM_DELETED | BM_FREE | BM_VALID); buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
buf->refcount = 0; buf->refcount = 0;
buf->buf_id = i; buf->buf_id = i;
#ifdef HAS_TEST_AND_SET
S_INIT_LOCK(&(buf->io_in_progress_lock)); S_INIT_LOCK(&(buf->io_in_progress_lock));
S_INIT_LOCK(&(buf->cntx_lock)); S_INIT_LOCK(&(buf->cntx_lock));
#endif
} }
/* close the circular queue */ /* close the circular queue */
...@@ -231,22 +210,6 @@ InitBufferPool(IPCKey key) ...@@ -231,22 +210,6 @@ InitBufferPool(IPCKey key)
SpinRelease(BufMgrLock); SpinRelease(BufMgrLock);
#ifndef HAS_TEST_AND_SET
{
extern IpcSemaphoreId WaitIOSemId;
extern IpcSemaphoreId WaitCLSemId;
WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key),
1, IPCProtection, 0, 1);
if (WaitIOSemId < 0)
elog(FATAL, "InitBufferPool: IpcSemaphoreCreate(WaitIOSemId) failed");
WaitCLSemId = IpcSemaphoreCreate(IPCKeyGetWaitCLSemaphoreKey(key),
1, IPCProtection,
IpcSemaphoreDefaultStartValue, 1);
if (WaitCLSemId < 0)
elog(FATAL, "InitBufferPool: IpcSemaphoreCreate(WaitCLSemId) failed");
}
#endif
PrivateRefCount = (long *) calloc(NBuffers, sizeof(long)); PrivateRefCount = (long *) calloc(NBuffers, sizeof(long));
BufferLocks = (bits8 *) calloc(NBuffers, sizeof(bits8)); BufferLocks = (bits8 *) calloc(NBuffers, sizeof(bits8));
BufferTagLastDirtied = (BufferTag *) calloc(NBuffers, sizeof(BufferTag)); BufferTagLastDirtied = (BufferTag *) calloc(NBuffers, sizeof(BufferTag));
...@@ -262,7 +225,7 @@ InitBufferPool(IPCKey key) ...@@ -262,7 +225,7 @@ InitBufferPool(IPCKey key)
* ---------------------------------------------------- * ----------------------------------------------------
*/ */
int int
BufferShmemSize() BufferShmemSize(void)
{ {
int size = 0; int size = 0;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.94 2000/11/20 16:47:31 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.95 2000/11/28 23:27:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -93,12 +93,6 @@ extern void AbortBufferIO(void); ...@@ -93,12 +93,6 @@ extern void AbortBufferIO(void);
*/ */
#define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY)) #define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY))
#ifndef HAS_TEST_AND_SET
static void SignalIO(BufferDesc *buf);
extern long *NWaitIOBackendP; /* defined in buf_init.c */
#endif /* HAS_TEST_AND_SET */
static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum, static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum,
bool bufferLockHeld); bool bufferLockHeld);
static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
...@@ -1187,27 +1181,7 @@ BufferSync() ...@@ -1187,27 +1181,7 @@ BufferSync()
* *
* Should be entered with buffer manager spinlock held; releases it before * Should be entered with buffer manager spinlock held; releases it before
* waiting and re-acquires it afterwards. * waiting and re-acquires it afterwards.
*
* OLD NOTES:
* Because IO_IN_PROGRESS conflicts are
* expected to be rare, there is only one BufferIO
* lock in the entire system. All processes block
* on this semaphore when they try to use a buffer
* that someone else is faulting in. Whenever a
* process finishes an IO and someone is waiting for
* the buffer, BufferIO is signaled (SignalIO). All
* waiting processes then wake up and check to see
* if their buffer is now ready. This implementation
* is simple, but efficient enough if WaitIO is
* rarely called by multiple processes simultaneously.
*
* NEW NOTES:
* The above is true only on machines without test-and-set
* semaphores (which we hope are few, these days). On better
* hardware, each buffer has a spinlock that we can wait on.
*/ */
#ifdef HAS_TEST_AND_SET
static void static void
WaitIO(BufferDesc *buf, SPINLOCK spinlock) WaitIO(BufferDesc *buf, SPINLOCK spinlock)
{ {
...@@ -1224,43 +1198,6 @@ WaitIO(BufferDesc *buf, SPINLOCK spinlock) ...@@ -1224,43 +1198,6 @@ WaitIO(BufferDesc *buf, SPINLOCK spinlock)
} }
} }
#else /* !HAS_TEST_AND_SET */
IpcSemaphoreId WaitIOSemId;
IpcSemaphoreId WaitCLSemId;
static void
WaitIO(BufferDesc *buf, SPINLOCK spinlock)
{
bool inProgress;
for (;;)
{
/* wait until someone releases IO lock */
(*NWaitIOBackendP)++;
SpinRelease(spinlock);
IpcSemaphoreLock(WaitIOSemId, 0, 1);
SpinAcquire(spinlock);
inProgress = (buf->flags & BM_IO_IN_PROGRESS);
if (!inProgress)
break;
}
}
/*
* SignalIO
*/
static void
SignalIO(BufferDesc *buf)
{
/* somebody better be waiting. */
Assert(buf->refcount > 1);
IpcSemaphoreUnlock(WaitIOSemId, 0, *NWaitIOBackendP);
*NWaitIOBackendP = 0;
}
#endif /* HAS_TEST_AND_SET */
long NDirectFileRead; /* some I/O's are direct file access. long NDirectFileRead; /* some I/O's are direct file access.
* bypass bufmgr */ * bypass bufmgr */
...@@ -2297,11 +2234,7 @@ UnlockBuffers() ...@@ -2297,11 +2234,7 @@ UnlockBuffers()
Assert(BufferIsValid(i + 1)); Assert(BufferIsValid(i + 1));
buf = &(BufferDescriptors[i]); buf = &(BufferDescriptors[i]);
#ifdef HAS_TEST_AND_SET
S_LOCK(&(buf->cntx_lock)); S_LOCK(&(buf->cntx_lock));
#else
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
if (BufferLocks[i] & BL_R_LOCK) if (BufferLocks[i] & BL_R_LOCK)
{ {
...@@ -2324,11 +2257,9 @@ UnlockBuffers() ...@@ -2324,11 +2257,9 @@ UnlockBuffers()
Assert(buf->w_lock); Assert(buf->w_lock);
buf->w_lock = false; buf->w_lock = false;
} }
#ifdef HAS_TEST_AND_SET
S_UNLOCK(&(buf->cntx_lock)); S_UNLOCK(&(buf->cntx_lock));
#else
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
BufferLocks[i] = 0; BufferLocks[i] = 0;
} }
} }
...@@ -2346,11 +2277,7 @@ LockBuffer(Buffer buffer, int mode) ...@@ -2346,11 +2277,7 @@ LockBuffer(Buffer buffer, int mode)
buf = &(BufferDescriptors[buffer - 1]); buf = &(BufferDescriptors[buffer - 1]);
buflock = &(BufferLocks[buffer - 1]); buflock = &(BufferLocks[buffer - 1]);
#ifdef HAS_TEST_AND_SET
S_LOCK(&(buf->cntx_lock)); S_LOCK(&(buf->cntx_lock));
#else
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
if (mode == BUFFER_LOCK_UNLOCK) if (mode == BUFFER_LOCK_UNLOCK)
{ {
...@@ -2380,15 +2307,9 @@ LockBuffer(Buffer buffer, int mode) ...@@ -2380,15 +2307,9 @@ LockBuffer(Buffer buffer, int mode)
Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK))); Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK)));
while (buf->ri_lock || buf->w_lock) while (buf->ri_lock || buf->w_lock)
{ {
#ifdef HAS_TEST_AND_SET
S_UNLOCK(&(buf->cntx_lock)); S_UNLOCK(&(buf->cntx_lock));
s_lock_sleep(i++); s_lock_sleep(i++);
S_LOCK(&(buf->cntx_lock)); S_LOCK(&(buf->cntx_lock));
#else
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
s_lock_sleep(i++);
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
} }
(buf->r_locks)++; (buf->r_locks)++;
*buflock |= BL_R_LOCK; *buflock |= BL_R_LOCK;
...@@ -2412,15 +2333,9 @@ LockBuffer(Buffer buffer, int mode) ...@@ -2412,15 +2333,9 @@ LockBuffer(Buffer buffer, int mode)
*buflock |= BL_RI_LOCK; *buflock |= BL_RI_LOCK;
buf->ri_lock = true; buf->ri_lock = true;
} }
#ifdef HAS_TEST_AND_SET
S_UNLOCK(&(buf->cntx_lock)); S_UNLOCK(&(buf->cntx_lock));
s_lock_sleep(i++); s_lock_sleep(i++);
S_LOCK(&(buf->cntx_lock)); S_LOCK(&(buf->cntx_lock));
#else
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
s_lock_sleep(i++);
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
} }
buf->w_lock = true; buf->w_lock = true;
*buflock |= BL_W_LOCK; *buflock |= BL_W_LOCK;
...@@ -2438,12 +2353,7 @@ LockBuffer(Buffer buffer, int mode) ...@@ -2438,12 +2353,7 @@ LockBuffer(Buffer buffer, int mode)
else else
elog(ERROR, "LockBuffer: unknown lock mode %d", mode); elog(ERROR, "LockBuffer: unknown lock mode %d", mode);
#ifdef HAS_TEST_AND_SET
S_UNLOCK(&(buf->cntx_lock)); S_UNLOCK(&(buf->cntx_lock));
#else
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
} }
/* /*
...@@ -2471,7 +2381,6 @@ StartBufferIO(BufferDesc *buf, bool forInput) ...@@ -2471,7 +2381,6 @@ StartBufferIO(BufferDesc *buf, bool forInput)
Assert(!InProgressBuf); Assert(!InProgressBuf);
Assert(!(buf->flags & BM_IO_IN_PROGRESS)); Assert(!(buf->flags & BM_IO_IN_PROGRESS));
buf->flags |= BM_IO_IN_PROGRESS; buf->flags |= BM_IO_IN_PROGRESS;
#ifdef HAS_TEST_AND_SET
/* /*
* There used to be * There used to be
...@@ -2485,7 +2394,7 @@ StartBufferIO(BufferDesc *buf, bool forInput) ...@@ -2485,7 +2394,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
* happen -- tgl * happen -- tgl
*/ */
S_LOCK(&(buf->io_in_progress_lock)); S_LOCK(&(buf->io_in_progress_lock));
#endif /* HAS_TEST_AND_SET */
InProgressBuf = buf; InProgressBuf = buf;
IsForInput = forInput; IsForInput = forInput;
} }
...@@ -2502,12 +2411,7 @@ static void ...@@ -2502,12 +2411,7 @@ static void
TerminateBufferIO(BufferDesc *buf) TerminateBufferIO(BufferDesc *buf)
{ {
Assert(buf == InProgressBuf); Assert(buf == InProgressBuf);
#ifdef HAS_TEST_AND_SET
S_UNLOCK(&(buf->io_in_progress_lock)); S_UNLOCK(&(buf->io_in_progress_lock));
#else
if (buf->refcount > 1)
SignalIO(buf);
#endif /* HAS_TEST_AND_SET */
InProgressBuf = (BufferDesc *) 0; InProgressBuf = (BufferDesc *) 0;
} }
......
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* s_lock.c * s_lock.c
* buffer manager interface routines * Spinlock support routines
* *
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/s_lock.c,v 1.25 2000/11/16 05:51:01 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/s_lock.c,v 1.26 2000/11/28 23:27:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h"
#include <sys/time.h> #include <sys/time.h>
#include <unistd.h> #include <unistd.h>
#include "postgres.h"
#include "storage/s_lock.h" #include "storage/s_lock.h"
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/xlog_bufmgr.c,v 1.4 2000/11/22 02:19:14 inoue Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/xlog_bufmgr.c,v 1.5 2000/11/28 23:27:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -88,12 +88,6 @@ extern void AbortBufferIO(void); ...@@ -88,12 +88,6 @@ extern void AbortBufferIO(void);
*/ */
#define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY)) #define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY))
#ifndef HAS_TEST_AND_SET
static void SignalIO(BufferDesc *buf);
extern long *NWaitIOBackendP; /* defined in buf_init.c */
#endif /* HAS_TEST_AND_SET */
static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum, static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum,
bool bufferLockHeld); bool bufferLockHeld);
static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
...@@ -853,27 +847,7 @@ BufferSync() ...@@ -853,27 +847,7 @@ BufferSync()
* *
* Should be entered with buffer manager spinlock held; releases it before * Should be entered with buffer manager spinlock held; releases it before
* waiting and re-acquires it afterwards. * waiting and re-acquires it afterwards.
*
* OLD NOTES:
* Because IO_IN_PROGRESS conflicts are
* expected to be rare, there is only one BufferIO
* lock in the entire system. All processes block
* on this semaphore when they try to use a buffer
* that someone else is faulting in. Whenever a
* process finishes an IO and someone is waiting for
* the buffer, BufferIO is signaled (SignalIO). All
* waiting processes then wake up and check to see
* if their buffer is now ready. This implementation
* is simple, but efficient enough if WaitIO is
* rarely called by multiple processes simultaneously.
*
* NEW NOTES:
* The above is true only on machines without test-and-set
* semaphores (which we hope are few, these days). On better
* hardware, each buffer has a spinlock that we can wait on.
*/ */
#ifdef HAS_TEST_AND_SET
static void static void
WaitIO(BufferDesc *buf, SPINLOCK spinlock) WaitIO(BufferDesc *buf, SPINLOCK spinlock)
{ {
...@@ -890,43 +864,6 @@ WaitIO(BufferDesc *buf, SPINLOCK spinlock) ...@@ -890,43 +864,6 @@ WaitIO(BufferDesc *buf, SPINLOCK spinlock)
} }
} }
#else /* !HAS_TEST_AND_SET */
IpcSemaphoreId WaitIOSemId;
IpcSemaphoreId WaitCLSemId;
static void
WaitIO(BufferDesc *buf, SPINLOCK spinlock)
{
bool inProgress;
for (;;)
{
/* wait until someone releases IO lock */
(*NWaitIOBackendP)++;
SpinRelease(spinlock);
IpcSemaphoreLock(WaitIOSemId, 0, 1);
SpinAcquire(spinlock);
inProgress = (buf->flags & BM_IO_IN_PROGRESS);
if (!inProgress)
break;
}
}
/*
* SignalIO
*/
static void
SignalIO(BufferDesc *buf)
{
/* somebody better be waiting. */
Assert(buf->refcount > 1);
IpcSemaphoreUnlock(WaitIOSemId, 0, *NWaitIOBackendP);
*NWaitIOBackendP = 0;
}
#endif /* HAS_TEST_AND_SET */
long NDirectFileRead; /* some I/O's are direct file access. long NDirectFileRead; /* some I/O's are direct file access.
* bypass bufmgr */ * bypass bufmgr */
...@@ -1965,11 +1902,7 @@ UnlockBuffers() ...@@ -1965,11 +1902,7 @@ UnlockBuffers()
Assert(BufferIsValid(i + 1)); Assert(BufferIsValid(i + 1));
buf = &(BufferDescriptors[i]); buf = &(BufferDescriptors[i]);
#ifdef HAS_TEST_AND_SET
S_LOCK(&(buf->cntx_lock)); S_LOCK(&(buf->cntx_lock));
#else
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
if (BufferLocks[i] & BL_R_LOCK) if (BufferLocks[i] & BL_R_LOCK)
{ {
...@@ -1992,11 +1925,9 @@ UnlockBuffers() ...@@ -1992,11 +1925,9 @@ UnlockBuffers()
Assert(buf->w_lock); Assert(buf->w_lock);
buf->w_lock = false; buf->w_lock = false;
} }
#ifdef HAS_TEST_AND_SET
S_UNLOCK(&(buf->cntx_lock)); S_UNLOCK(&(buf->cntx_lock));
#else
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
BufferLocks[i] = 0; BufferLocks[i] = 0;
} }
} }
...@@ -2014,11 +1945,7 @@ LockBuffer(Buffer buffer, int mode) ...@@ -2014,11 +1945,7 @@ LockBuffer(Buffer buffer, int mode)
buf = &(BufferDescriptors[buffer - 1]); buf = &(BufferDescriptors[buffer - 1]);
buflock = &(BufferLocks[buffer - 1]); buflock = &(BufferLocks[buffer - 1]);
#ifdef HAS_TEST_AND_SET
S_LOCK(&(buf->cntx_lock)); S_LOCK(&(buf->cntx_lock));
#else
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
if (mode == BUFFER_LOCK_UNLOCK) if (mode == BUFFER_LOCK_UNLOCK)
{ {
...@@ -2048,15 +1975,9 @@ LockBuffer(Buffer buffer, int mode) ...@@ -2048,15 +1975,9 @@ LockBuffer(Buffer buffer, int mode)
Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK))); Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK)));
while (buf->ri_lock || buf->w_lock) while (buf->ri_lock || buf->w_lock)
{ {
#ifdef HAS_TEST_AND_SET
S_UNLOCK(&(buf->cntx_lock)); S_UNLOCK(&(buf->cntx_lock));
s_lock_sleep(i++); s_lock_sleep(i++);
S_LOCK(&(buf->cntx_lock)); S_LOCK(&(buf->cntx_lock));
#else
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
s_lock_sleep(i++);
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
} }
(buf->r_locks)++; (buf->r_locks)++;
*buflock |= BL_R_LOCK; *buflock |= BL_R_LOCK;
...@@ -2080,15 +2001,9 @@ LockBuffer(Buffer buffer, int mode) ...@@ -2080,15 +2001,9 @@ LockBuffer(Buffer buffer, int mode)
*buflock |= BL_RI_LOCK; *buflock |= BL_RI_LOCK;
buf->ri_lock = true; buf->ri_lock = true;
} }
#ifdef HAS_TEST_AND_SET
S_UNLOCK(&(buf->cntx_lock)); S_UNLOCK(&(buf->cntx_lock));
s_lock_sleep(i++); s_lock_sleep(i++);
S_LOCK(&(buf->cntx_lock)); S_LOCK(&(buf->cntx_lock));
#else
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
s_lock_sleep(i++);
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
} }
buf->w_lock = true; buf->w_lock = true;
*buflock |= BL_W_LOCK; *buflock |= BL_W_LOCK;
...@@ -2109,12 +2024,7 @@ LockBuffer(Buffer buffer, int mode) ...@@ -2109,12 +2024,7 @@ LockBuffer(Buffer buffer, int mode)
else else
elog(ERROR, "LockBuffer: unknown lock mode %d", mode); elog(ERROR, "LockBuffer: unknown lock mode %d", mode);
#ifdef HAS_TEST_AND_SET
S_UNLOCK(&(buf->cntx_lock)); S_UNLOCK(&(buf->cntx_lock));
#else
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
#endif
} }
/* /*
...@@ -2142,7 +2052,6 @@ StartBufferIO(BufferDesc *buf, bool forInput) ...@@ -2142,7 +2052,6 @@ StartBufferIO(BufferDesc *buf, bool forInput)
Assert(!InProgressBuf); Assert(!InProgressBuf);
Assert(!(buf->flags & BM_IO_IN_PROGRESS)); Assert(!(buf->flags & BM_IO_IN_PROGRESS));
buf->flags |= BM_IO_IN_PROGRESS; buf->flags |= BM_IO_IN_PROGRESS;
#ifdef HAS_TEST_AND_SET
/* /*
* There used to be * There used to be
...@@ -2156,7 +2065,7 @@ StartBufferIO(BufferDesc *buf, bool forInput) ...@@ -2156,7 +2065,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
* happen -- tgl * happen -- tgl
*/ */
S_LOCK(&(buf->io_in_progress_lock)); S_LOCK(&(buf->io_in_progress_lock));
#endif /* HAS_TEST_AND_SET */
InProgressBuf = buf; InProgressBuf = buf;
IsForInput = forInput; IsForInput = forInput;
} }
...@@ -2173,12 +2082,7 @@ static void ...@@ -2173,12 +2082,7 @@ static void
TerminateBufferIO(BufferDesc *buf) TerminateBufferIO(BufferDesc *buf)
{ {
Assert(buf == InProgressBuf); Assert(buf == InProgressBuf);
#ifdef HAS_TEST_AND_SET
S_UNLOCK(&(buf->io_in_progress_lock)); S_UNLOCK(&(buf->io_in_progress_lock));
#else
if (buf->refcount > 1)
SignalIO(buf);
#endif /* HAS_TEST_AND_SET */
InProgressBuf = (BufferDesc *) 0; InProgressBuf = (BufferDesc *) 0;
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.53 2000/11/21 21:16:01 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.54 2000/11/28 23:27:56 tgl Exp $
* *
* NOTES * NOTES
* *
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <sys/types.h> #include <sys/types.h>
#include <sys/file.h> #include <sys/file.h>
#include <errno.h> #include <errno.h>
#include <unistd.h>
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/s_lock.h" #include "storage/s_lock.h"
...@@ -51,6 +52,7 @@ ...@@ -51,6 +52,7 @@
#include <sys/ipc.h> #include <sys/ipc.h>
#endif #endif
/* /*
* This flag is set during proc_exit() to change elog()'s behavior, * This flag is set during proc_exit() to change elog()'s behavior,
* so that an elog() from an on_proc_exit routine cannot get us out * so that an elog() from an on_proc_exit routine cannot get us out
...@@ -58,12 +60,31 @@ ...@@ -58,12 +60,31 @@
*/ */
bool proc_exit_inprogress = false; bool proc_exit_inprogress = false;
static int UsePrivateMemory = 0; static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
int numSems, int permission,
int semStartValue, bool removeOnExit);
static void CallbackSemaphoreKill(int status, Datum semId);
static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size,
int permission);
static void IpcMemoryDetach(int status, Datum shmaddr);
static void IpcMemoryDelete(int status, Datum shmId);
static void *PrivateMemoryCreate(uint32 size);
static void PrivateMemoryDelete(int status, Datum memaddr);
static void IpcMemoryDetach(int status, char *shmaddr);
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* exit() handling stuff * exit() handling stuff
*
* These functions are in generally the same spirit as atexit(2),
* but provide some additional features we need --- in particular,
* we want to register callbacks to invoke when we are disconnecting
* from a broken shared-memory context but not exiting the postmaster.
*
* Callback functions can take zero, one, or two args: the first passed
* arg is the integer exitcode, the second is the Datum supplied when
* the callback was registered.
*
* XXX these functions probably ought to live in some other module.
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
...@@ -73,43 +94,12 @@ static struct ONEXIT ...@@ -73,43 +94,12 @@ static struct ONEXIT
{ {
void (*function) (); void (*function) ();
Datum arg; Datum arg;
} on_proc_exit_list[MAX_ON_EXITS], on_shmem_exit_list[MAX_ON_EXITS]; } on_proc_exit_list[MAX_ON_EXITS],
on_shmem_exit_list[MAX_ON_EXITS];
static int on_proc_exit_index, static int on_proc_exit_index,
on_shmem_exit_index; on_shmem_exit_index;
typedef struct _PrivateMemStruct
{
int id;
char *memptr;
} PrivateMem;
static PrivateMem IpcPrivateMem[16];
static int
PrivateMemoryCreate(IpcMemoryKey memKey,
uint32 size)
{
static int memid = 0;
UsePrivateMemory = 1;
IpcPrivateMem[memid].id = memid;
IpcPrivateMem[memid].memptr = malloc(size);
if (IpcPrivateMem[memid].memptr == NULL)
elog(ERROR, "PrivateMemoryCreate: not enough memory to malloc");
MemSet(IpcPrivateMem[memid].memptr, 0, size); /* XXX PURIFY */
return memid++;
}
static char *
PrivateMemoryAttach(IpcMemoryId memid)
{
return IpcPrivateMem[memid].memptr;
}
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* proc_exit * proc_exit
...@@ -156,9 +146,9 @@ proc_exit(int code) ...@@ -156,9 +146,9 @@ proc_exit(int code)
} }
/* ------------------ /* ------------------
* Run all of the on_shmem_exit routines but don't exit in the end. * Run all of the on_shmem_exit routines --- but don't actually exit.
* This is used by the postmaster to re-initialize shared memory and * This is used by the postmaster to re-initialize shared memory and
* semaphores after a backend dies horribly * semaphores after a backend dies horribly.
* ------------------ * ------------------
*/ */
void void
...@@ -188,18 +178,16 @@ shmem_exit(int code) ...@@ -188,18 +178,16 @@ shmem_exit(int code)
* functions invoked by proc_exit(). -cim 2/6/90 * functions invoked by proc_exit(). -cim 2/6/90
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
int void
on_proc_exit(void (*function) (), Datum arg) on_proc_exit(void (*function) (), Datum arg)
{ {
if (on_proc_exit_index >= MAX_ON_EXITS) if (on_proc_exit_index >= MAX_ON_EXITS)
return -1; elog(FATAL, "Out of on_proc_exit slots");
on_proc_exit_list[on_proc_exit_index].function = function; on_proc_exit_list[on_proc_exit_index].function = function;
on_proc_exit_list[on_proc_exit_index].arg = arg; on_proc_exit_list[on_proc_exit_index].arg = arg;
++on_proc_exit_index; ++on_proc_exit_index;
return 0;
} }
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
...@@ -209,24 +197,25 @@ on_proc_exit(void (*function) (), Datum arg) ...@@ -209,24 +197,25 @@ on_proc_exit(void (*function) (), Datum arg)
* functions invoked by shmem_exit(). -cim 2/6/90 * functions invoked by shmem_exit(). -cim 2/6/90
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
int void
on_shmem_exit(void (*function) (), Datum arg) on_shmem_exit(void (*function) (), Datum arg)
{ {
if (on_shmem_exit_index >= MAX_ON_EXITS) if (on_shmem_exit_index >= MAX_ON_EXITS)
return -1; elog(FATAL, "Out of on_shmem_exit slots");
on_shmem_exit_list[on_shmem_exit_index].function = function; on_shmem_exit_list[on_shmem_exit_index].function = function;
on_shmem_exit_list[on_shmem_exit_index].arg = arg; on_shmem_exit_list[on_shmem_exit_index].arg = arg;
++on_shmem_exit_index; ++on_shmem_exit_index;
return 0;
} }
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* on_exit_reset * on_exit_reset
* *
* this function clears all proc_exit() registered functions. * this function clears all on_proc_exit() and on_shmem_exit()
* registered functions. This is used just after forking a backend,
* so that the backend doesn't believe it should call the postmaster's
* on-exit routines when it exits...
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
void void
...@@ -236,190 +225,135 @@ on_exit_reset(void) ...@@ -236,190 +225,135 @@ on_exit_reset(void)
on_proc_exit_index = 0; on_proc_exit_index = 0;
} }
/****************************************************************************/
/* IPCPrivateSemaphoreKill(status, semId) */
/* */
/****************************************************************************/
static void
IPCPrivateSemaphoreKill(int status, int semId)
{
union semun semun;
semun.val = 0; /* unused */
if (semctl(semId, 0, IPC_RMID, semun) == -1)
elog(NOTICE, "IPCPrivateSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s",
semId, strerror(errno));
}
/****************************************************************************/
/* IPCPrivateMemoryKill(status, shmId) */
/* */
/****************************************************************************/
static void
IPCPrivateMemoryKill(int status, int shmId)
{
if (UsePrivateMemory)
{
/* free ( IpcPrivateMem[shmId].memptr ); */
}
else
{
if (shmctl(shmId, IPC_RMID, (struct shmid_ds *) NULL) < 0)
{
elog(NOTICE, "IPCPrivateMemoryKill: shmctl(%d, %d, 0) failed: %m",
shmId, IPC_RMID);
}
}
}
/* /* ----------------------------------------------------------------
* Note: * Semaphore support
* XXX This should be split into two different calls. One should
* XXX be used to create a semaphore set. The other to "attach" a
* XXX existing set. It should be an error for the semaphore set
* XXX to to already exist or for it not to, respectively.
* *
* Currently, the semaphore sets are "attached" and an error * These routines represent a fairly thin layer on top of SysV semaphore
* is detected only when a later shared memory attach fails. * functionality.
* ----------------------------------------------------------------
*/ */
IpcSemaphoreId /* ----------------------------------------------------------------
IpcSemaphoreCreate(IpcSemaphoreKey semKey, * InternalIpcSemaphoreCreate(semKey, numSems, permission,
int semNum, * semStartValue, removeOnExit)
int permission, *
int semStartValue, * Attempt to create a new semaphore set with the specified key.
int removeOnExit) * Will fail (return -1) if such a set already exists.
* On success, a callback is optionally registered with on_shmem_exit
* to delete the semaphore set when on_shmem_exit is called.
*
* If we fail with a failure code other than collision-with-existing-set,
* print out an error and abort. Other types of errors are not recoverable.
* ----------------------------------------------------------------
*/
static IpcSemaphoreId
InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
int numSems, int permission,
int semStartValue, bool removeOnExit)
{ {
int semId; int semId;
int i; int i;
int errStatus;
u_short array[IPC_NMAXSEM]; u_short array[IPC_NMAXSEM];
union semun semun; union semun semun;
/* check arguments */ Assert(numSems > 0 && numSems <= IPC_NMAXSEM);
if (semNum > IPC_NMAXSEM || semNum <= 0)
return (-1);
semId = semget(semKey, 0, 0); semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | permission);
if (semId == -1) if (semId < 0)
{ {
#ifdef DEBUG_IPC /*
fprintf(stderr, "calling semget(%d, %d, 0%o)\n", * Fail quietly if error indicates a collision with existing set.
semKey, semNum, (unsigned)(IPC_CREAT|permission)); * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
#endif * we could get a permission violation instead?
*/
if (errno == EEXIST || errno == EACCES)
return -1;
/*
* Else complain and abort
*/
fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n",
(int) semKey, numSems, (IPC_CREAT|IPC_EXCL|permission),
strerror(errno));
semId = semget(semKey, semNum, IPC_CREAT | permission); if (errno == ENOSPC)
fprintf(stderr,
"\nThis error does *not* mean that you have run out of disk space.\n\n"
"It occurs either because system limit for the maximum number of\n"
"semaphore sets (SEMMNI), or the system wide maximum number of\n"
"semaphores (SEMMNS), would be exceeded. You need to raise the\n"
"respective kernel parameter. Look into the PostgreSQL documentation\n"
"for details.\n\n");
if (semId < 0) proc_exit(1);
{ }
fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n",
semKey, semNum, (unsigned)(permission|IPC_CREAT),
strerror(errno));
if (errno == ENOSPC)
fprintf(stderr,
"\nThis error does *not* mean that you have run out of disk space.\n\n"
"It occurs either because system limit for the maximum number of\n"
"semaphore sets (SEMMNI), or the system wide maximum number of\n"
"semaphores (SEMMNS), would be exceeded. You need to raise the\n"
"respective kernel parameter. Look into the PostgreSQL documentation\n"
"for details.\n\n");
return (-1);
}
for (i = 0; i < semNum; i++)
array[i] = semStartValue;
semun.array = array;
errStatus = semctl(semId, 0, SETALL, semun);
if (errStatus == -1)
{
fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, 0, SETALL, ...) failed: %s\n",
semId, strerror(errno));
if (errno == ERANGE) /* Initialize new semas to specified start value */
fprintf(stderr, for (i = 0; i < numSems; i++)
"You possibly need to raise your kernel's SEMVMX value to be at least\n" array[i] = semStartValue;
"%d. Look into the PostgreSQL documentation for details.\n", semun.array = array;
semStartValue); if (semctl(semId, 0, SETALL, semun) < 0)
{
fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, 0, SETALL, ...) failed: %s\n",
semId, strerror(errno));
semctl(semId, 0, IPC_RMID, semun); if (errno == ERANGE)
return (-1); fprintf(stderr,
} "You possibly need to raise your kernel's SEMVMX value to be at least\n"
"%d. Look into the PostgreSQL documentation for details.\n",
semStartValue);
if (removeOnExit) IpcSemaphoreKill(semId);
on_shmem_exit(IPCPrivateSemaphoreKill, (Datum) semId); proc_exit(1);
} }
/* Register on-exit routine to delete the new set */
#ifdef DEBUG_IPC if (removeOnExit)
fprintf(stderr, "IpcSemaphoreCreate returns %d\n", semId); on_shmem_exit(CallbackSemaphoreKill, Int32GetDatum(semId));
fflush(stdout);
fflush(stderr);
#endif
return semId; return semId;
} }
/****************************************************************************/ /****************************************************************************/
/* IpcSemaphoreSet() - sets the initial value of the semaphore */ /* IpcSemaphoreKill(semId) - removes a semaphore set */
/* */ /* */
/* note: the xxx_return variables are only used for debugging. */
/****************************************************************************/ /****************************************************************************/
#ifdef NOT_USED
static int IpcSemaphoreSet_return;
void void
IpcSemaphoreSet(int semId, int semno, int value) IpcSemaphoreKill(IpcSemaphoreId semId)
{ {
int errStatus;
union semun semun; union semun semun;
semun.val = value; semun.val = 0; /* unused, but keep compiler quiet */
errStatus = semctl(semId, semno, SETVAL, semun);
IpcSemaphoreSet_return = errStatus;
if (errStatus == -1) if (semctl(semId, 0, IPC_RMID, semun) < 0)
fprintf(stderr, "IpcSemaphoreSet: semctl(id=%d) failed: %s\n", fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n",
semId, strerror(errno)); semId, strerror(errno));
/* We used to report a failure via elog(NOTICE), but that's pretty
* pointless considering any client has long since disconnected ...
*/
} }
#endif /* NOT_USED */
/****************************************************************************/ /****************************************************************************/
/* IpcSemaphoreKill(key) - removes a semaphore */ /* CallbackSemaphoreKill(status, semId) */
/* */ /* (called as an on_shmem_exit callback, hence funny argument list) */
/****************************************************************************/ /****************************************************************************/
void static void
IpcSemaphoreKill(IpcSemaphoreKey key) CallbackSemaphoreKill(int status, Datum semId)
{ {
int semId; IpcSemaphoreKill(DatumGetInt32(semId));
union semun semun;
semun.val = 0; /* unused */
/* kill semaphore if existent */
semId = semget(key, 0, 0);
if (semId != -1)
semctl(semId, 0, IPC_RMID, semun);
} }
/****************************************************************************/ /****************************************************************************/
/* IpcSemaphoreLock(semId, sem, lock) - locks a semaphore */ /* IpcSemaphoreLock(semId, sem) - locks a semaphore */
/* */
/* note: the xxx_return variables are only used for debugging. */
/****************************************************************************/ /****************************************************************************/
static int IpcSemaphoreLock_return;
void void
IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock) IpcSemaphoreLock(IpcSemaphoreId semId, int sem)
{ {
int errStatus; int errStatus;
struct sembuf sops; struct sembuf sops;
sops.sem_op = lock; sops.sem_op = -1; /* decrement */
sops.sem_flg = 0; sops.sem_flg = 0;
sops.sem_num = sem; sops.sem_num = sem;
...@@ -427,11 +361,6 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock) ...@@ -427,11 +361,6 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock)
* Note: if errStatus is -1 and errno == EINTR then it means we * Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were * returned from the operation prematurely because we were
* sent a signal. So we try and lock the semaphore again. * sent a signal. So we try and lock the semaphore again.
* I am not certain this is correct, but the semantics aren't
* clear it fixes problems with parallel abort synchronization,
* namely that after processing an abort signal, the semaphore
* call returns with -1 (and errno == EINTR) before it should.
* -cim 3/28/90
* ---------------- * ----------------
*/ */
do do
...@@ -439,8 +368,6 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock) ...@@ -439,8 +368,6 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock)
errStatus = semop(semId, &sops, 1); errStatus = semop(semId, &sops, 1);
} while (errStatus == -1 && errno == EINTR); } while (errStatus == -1 && errno == EINTR);
IpcSemaphoreLock_return = errStatus;
if (errStatus == -1) if (errStatus == -1)
{ {
fprintf(stderr, "IpcSemaphoreLock: semop(id=%d) failed: %s\n", fprintf(stderr, "IpcSemaphoreLock: semop(id=%d) failed: %s\n",
...@@ -450,19 +377,15 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock) ...@@ -450,19 +377,15 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock)
} }
/****************************************************************************/ /****************************************************************************/
/* IpcSemaphoreUnlock(semId, sem, lock) - unlocks a semaphore */ /* IpcSemaphoreUnlock(semId, sem) - unlocks a semaphore */
/* */
/* note: the xxx_return variables are only used for debugging. */
/****************************************************************************/ /****************************************************************************/
static int IpcSemaphoreUnlock_return;
void void
IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock) IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem)
{ {
int errStatus; int errStatus;
struct sembuf sops; struct sembuf sops;
sops.sem_op = -lock; sops.sem_op = 1; /* increment */
sops.sem_flg = 0; sops.sem_flg = 0;
sops.sem_num = sem; sops.sem_num = sem;
...@@ -470,12 +393,8 @@ IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock) ...@@ -470,12 +393,8 @@ IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock)
/* ---------------- /* ----------------
* Note: if errStatus is -1 and errno == EINTR then it means we * Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were * returned from the operation prematurely because we were
* sent a signal. So we try and lock the semaphore again. * sent a signal. So we try and unlock the semaphore again.
* I am not certain this is correct, but the semantics aren't * Not clear this can really happen, but might as well cope.
* clear it fixes problems with parallel abort synchronization,
* namely that after processing an abort signal, the semaphore
* call returns with -1 (and errno == EINTR) before it should.
* -cim 3/28/90
* ---------------- * ----------------
*/ */
do do
...@@ -483,8 +402,6 @@ IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock) ...@@ -483,8 +402,6 @@ IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock)
errStatus = semop(semId, &sops, 1); errStatus = semop(semId, &sops, 1);
} while (errStatus == -1 && errno == EINTR); } while (errStatus == -1 && errno == EINTR);
IpcSemaphoreUnlock_return = errStatus;
if (errStatus == -1) if (errStatus == -1)
{ {
fprintf(stderr, "IpcSemaphoreUnlock: semop(id=%d) failed: %s\n", fprintf(stderr, "IpcSemaphoreUnlock: semop(id=%d) failed: %s\n",
...@@ -493,53 +410,115 @@ IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock) ...@@ -493,53 +410,115 @@ IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock)
} }
} }
/****************************************************************************/
/* IpcSemaphoreTryLock(semId, sem) - conditionally locks a semaphore */
/* Lock the semaphore if it's free, but don't block. */
/****************************************************************************/
bool
IpcSemaphoreTryLock(IpcSemaphoreId semId, int sem)
{
int errStatus;
struct sembuf sops;
sops.sem_op = -1; /* decrement */
sops.sem_flg = IPC_NOWAIT; /* but don't block */
sops.sem_num = sem;
/* ----------------
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were
* sent a signal. So we try and lock the semaphore again.
* ----------------
*/
do
{
errStatus = semop(semId, &sops, 1);
} while (errStatus == -1 && errno == EINTR);
if (errStatus == -1)
{
/* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
#ifdef EAGAIN
if (errno == EAGAIN)
return false; /* failed to lock it */
#endif
#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
if (errno == EWOULDBLOCK)
return false; /* failed to lock it */
#endif
/* Otherwise we got trouble */
fprintf(stderr, "IpcSemaphoreTryLock: semop(id=%d) failed: %s\n",
semId, strerror(errno));
proc_exit(255);
}
return true;
}
/* Get the current value (semval) of the semaphore */
int int
IpcSemaphoreGetCount(IpcSemaphoreId semId, int sem) IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem)
{ {
int semncnt;
union semun dummy; /* for Solaris */ union semun dummy; /* for Solaris */
dummy.val = 0; /* unused */ dummy.val = 0; /* unused */
semncnt = semctl(semId, sem, GETNCNT, dummy); return semctl(semId, sem, GETVAL, dummy);
return semncnt;
} }
int /* Get the PID of the last process to do semop() on the semaphore */
IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem) static pid_t
IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int sem)
{ {
int semval;
union semun dummy; /* for Solaris */ union semun dummy; /* for Solaris */
dummy.val = 0; /* unused */ dummy.val = 0; /* unused */
semval = semctl(semId, sem, GETVAL, dummy); return semctl(semId, sem, GETPID, dummy);
return semval;
} }
/****************************************************************************/
/* IpcMemoryCreate(memKey) */
/* */
/* - returns the memory identifier, if creation succeeds */
/* returns IpcMemCreationFailed, if failure */
/****************************************************************************/
IpcMemoryId /* ----------------------------------------------------------------
IpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission) * Shared memory support
*
* These routines represent a fairly thin layer on top of SysV shared
* memory functionality.
* ----------------------------------------------------------------
*/
/* ----------------------------------------------------------------
* InternalIpcMemoryCreate(memKey, size, permission)
*
* Attempt to create a new shared memory segment with the specified key.
* Will fail (return NULL) if such a segment already exists. If successful,
* attach the segment to the current process and return its attached address.
* On success, callbacks are registered with on_shmem_exit to detach and
* delete the segment when on_shmem_exit is called.
*
* If we fail with a failure code other than collision-with-existing-segment,
* print out an error and abort. Other types of errors are not recoverable.
* ----------------------------------------------------------------
*/
static void *
InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission)
{ {
IpcMemoryId shmid; IpcMemoryId shmid;
void *memAddress;
if (memKey == PrivateIPCKey) shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | permission);
{
/* private */
shmid = PrivateMemoryCreate(memKey, size);
}
else
shmid = shmget(memKey, size, IPC_CREAT | permission);
if (shmid < 0) if (shmid < 0)
{ {
fprintf(stderr, "IpcMemoryCreate: shmget(key=%d, size=%d, 0%o) failed: %s\n", /*
(int)memKey, size, (unsigned)(IPC_CREAT|permission), * Fail quietly if error indicates a collision with existing segment.
* One would expect EEXIST, given that we said IPC_EXCL, but perhaps
* we could get a permission violation instead?
*/
if (errno == EEXIST || errno == EACCES)
return NULL;
/*
* Else complain and abort
*/
fprintf(stderr, "IpcMemoryCreate: shmget(key=%d, size=%u, 0%o) failed: %s\n",
(int) memKey, size, (IPC_CREAT | IPC_EXCL | permission),
strerror(errno)); strerror(errno));
if (errno == EINVAL) if (errno == EINVAL)
...@@ -547,7 +526,7 @@ IpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission) ...@@ -547,7 +526,7 @@ IpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission)
"\nThis error can be caused by one of three things:\n\n" "\nThis error can be caused by one of three things:\n\n"
"1. The maximum size for shared memory segments on your system was\n" "1. The maximum size for shared memory segments on your system was\n"
" exceeded. You need to raise the SHMMAX parameter in your kernel\n" " exceeded. You need to raise the SHMMAX parameter in your kernel\n"
" to be at least %d bytes.\n\n" " to be at least %u bytes.\n\n"
"2. The requested shared memory segment was too small for your system.\n" "2. The requested shared memory segment was too small for your system.\n"
" You need to lower the SHMMIN parameter in your kernel.\n\n" " You need to lower the SHMMIN parameter in your kernel.\n\n"
"3. The requested shared memory segment already exists but is of the\n" "3. The requested shared memory segment already exists but is of the\n"
...@@ -567,179 +546,302 @@ IpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission) ...@@ -567,179 +546,302 @@ IpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission)
"reached. The PostgreSQL Administrator's Guide contains more\n" "reached. The PostgreSQL Administrator's Guide contains more\n"
"information about shared memory configuration.\n\n"); "information about shared memory configuration.\n\n");
return IpcMemCreationFailed; proc_exit(1);
} }
/* Register on-exit routine to delete the new segment */
on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
/* if (memKey == PrivateIPCKey) */ /* OK, should be able to attach to the segment */
on_shmem_exit(IPCPrivateMemoryKill, (Datum) shmid); memAddress = shmat(shmid, 0, 0);
return shmid; if (memAddress == (void *) -1)
}
/****************************************************************************/
/* IpcMemoryIdGet(memKey, size) returns the shared memory Id */
/* or IpcMemIdGetFailed */
/****************************************************************************/
IpcMemoryId
IpcMemoryIdGet(IpcMemoryKey memKey, uint32 size)
{
IpcMemoryId shmid;
shmid = shmget(memKey, size, 0);
if (shmid < 0)
{ {
fprintf(stderr, "IpcMemoryIdGet: shmget(key=%d, size=%d, 0) failed: %s\n", fprintf(stderr, "IpcMemoryCreate: shmat(id=%d) failed: %s\n",
memKey, size, strerror(errno)); shmid, strerror(errno));
return IpcMemIdGetFailed; proc_exit(1);
} }
return shmid; /* Register on-exit routine to detach new segment before deleting */
on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));
return memAddress;
} }
/****************************************************************************/ /****************************************************************************/
/* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */ /* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */
/* from a backend address space */ /* from process' address spaceq */
/* (only called by backends running under the postmaster) */ /* (called as an on_shmem_exit callback, hence funny argument list) */
/****************************************************************************/ /****************************************************************************/
static void static void
IpcMemoryDetach(int status, char *shmaddr) IpcMemoryDetach(int status, Datum shmaddr)
{ {
if (shmdt(shmaddr) < 0) if (shmdt(DatumGetPointer(shmaddr)) < 0)
elog(NOTICE, "IpcMemoryDetach: shmdt(0x%p) failed: %m", shmaddr); fprintf(stderr, "IpcMemoryDetach: shmdt(%p) failed: %s\n",
DatumGetPointer(shmaddr), strerror(errno));
/* We used to report a failure via elog(NOTICE), but that's pretty
* pointless considering any client has long since disconnected ...
*/
} }
/****************************************************************************/ /****************************************************************************/
/* IpcMemoryAttach(memId) returns the adress of shared memory */ /* IpcMemoryDelete(status, shmId) deletes a shared memory segment */
/* or IpcMemAttachFailed */ /* (called as an on_shmem_exit callback, hence funny argument list) */
/* */
/* CALL IT: addr = (struct <MemoryStructure> *) IpcMemoryAttach(memId); */
/* */
/****************************************************************************/ /****************************************************************************/
char * static void
IpcMemoryAttach(IpcMemoryId memId) IpcMemoryDelete(int status, Datum shmId)
{ {
char *memAddress; if (shmctl(DatumGetInt32(shmId), IPC_RMID, (struct shmid_ds *) NULL) < 0)
fprintf(stderr, "IpcMemoryDelete: shmctl(%d, %d, 0) failed: %s\n",
DatumGetInt32(shmId), IPC_RMID, strerror(errno));
/* We used to report a failure via elog(NOTICE), but that's pretty
* pointless considering any client has long since disconnected ...
*/
}
if (UsePrivateMemory) /* ----------------------------------------------------------------
memAddress = (char *) PrivateMemoryAttach(memId); * private memory support
else *
memAddress = (char *) shmat(memId, 0, 0); * Rather than allocating shmem segments with IPC_PRIVATE key, we
* just malloc() the requested amount of space. This code emulates
* the needed shmem functions.
* ----------------------------------------------------------------
*/
/* if ( *memAddress == -1) { XXX ??? */ static void *
if (memAddress == (char *) -1) PrivateMemoryCreate(uint32 size)
{
void *memAddress;
memAddress = malloc(size);
if (!memAddress)
{ {
fprintf(stderr, "IpcMemoryAttach: shmat(id=%d) failed: %s\n", fprintf(stderr, "PrivateMemoryCreate: malloc(%u) failed\n", size);
memId, strerror(errno)); proc_exit(1);
return IpcMemAttachFailed;
} }
MemSet(memAddress, 0, size); /* keep Purify quiet */
if (!UsePrivateMemory) /* Register on-exit routine to release storage */
on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress)); on_shmem_exit(PrivateMemoryDelete, PointerGetDatum(memAddress));
return (char *) memAddress; return memAddress;
} }
static void
/****************************************************************************/ PrivateMemoryDelete(int status, Datum memaddr)
/* IpcMemoryKill(memKey) removes a shared memory segment */
/* (only called by the postmaster and standalone backends) */
/****************************************************************************/
void
IpcMemoryKill(IpcMemoryKey memKey)
{ {
IpcMemoryId shmid; free(DatumGetPointer(memaddr));
if (!UsePrivateMemory && (shmid = shmget(memKey, 0, 0)) >= 0)
{
if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0)
{
elog(NOTICE, "IpcMemoryKill: shmctl(%d, %d, 0) failed: %m",
shmid, IPC_RMID);
}
}
} }
#ifdef HAS_TEST_AND_SET
/* ------------------ /* ------------------
* use hardware locks to replace semaphores for sequent machines * Routines to assign keys for new IPC objects
* to avoid costs of swapping processes and to provide unlimited *
* supply of locks. * The idea here is to detect and re-use keys that may have been assigned
* by a crashed postmaster or backend.
* ------------------ * ------------------
*/ */
/* used in spin.c */ static IpcMemoryKey NextShmemSegID = 0;
SLock *SLockArray = NULL; static IpcSemaphoreKey NextSemaID = 0;
static SLock **FreeSLockPP; /*
static int *UnusedSLockIP; * (Re) initialize key assignment at startup of postmaster or standalone
static slock_t *SLockMemoryLock; * backend, also at postmaster reset.
static IpcMemoryId SLockMemoryId = -1; */
void
IpcInitKeyAssignment(int port)
{
NextShmemSegID = port * 1000;
NextSemaID = port * 1000;
}
struct ipcdummy /*
{ /* to get alignment/size right */ * Create a shared memory segment of the given size and initialize its
SLock *free; * standard header. Dead Postgres segments are recycled if found,
int unused; * but we do not fail upon collision with non-Postgres shmem segments.
slock_t memlock; */
SLock slocks[MAX_SPINS + 1]; PGShmemHeader *
}; IpcMemoryCreate(uint32 size, bool private, int permission)
{
void *memAddress;
PGShmemHeader *hdr;
#define SLOCKMEMORYSIZE sizeof(struct ipcdummy) /* Room for a header? */
Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
void /* Loop till we find a free IPC key */
CreateAndInitSLockMemory(IPCKey key) for (NextShmemSegID++ ; ; NextShmemSegID++)
{
int id;
SLock *slckP;
SLockMemoryId = IpcMemoryCreate(key,
SLOCKMEMORYSIZE,
0700);
AttachSLockMemory(key);
*FreeSLockPP = NULL;
*UnusedSLockIP = (int) FIRSTFREELOCKID;
for (id = 0; id < (int) FIRSTFREELOCKID; id++)
{ {
slckP = &(SLockArray[id]); IpcMemoryId shmid;
S_INIT_LOCK(&(slckP->locklock));
slckP->flag = NOLOCK; /* Special case if creating a private segment --- just malloc() it */
slckP->nshlocks = 0; if (private)
S_INIT_LOCK(&(slckP->shlock)); {
S_INIT_LOCK(&(slckP->exlock)); memAddress = PrivateMemoryCreate(size);
S_INIT_LOCK(&(slckP->comlock)); break;
slckP->next = NULL; }
/* Try to create new segment */
memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission);
if (memAddress)
break; /* successful create and attach */
/* See if it looks to be leftover from a dead Postgres process */
shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
if (shmid < 0)
continue; /* failed: must be some other app's */
memAddress = shmat(shmid, 0, 0);
if (memAddress == (void *) -1)
continue; /* failed: must be some other app's */
hdr = (PGShmemHeader *) memAddress;
if (hdr->magic != PGShmemMagic)
{
shmdt(memAddress);
continue; /* segment belongs to a non-Postgres app */
}
/*
* If the creator PID is my own PID or does not belong to any
* extant process, it's safe to zap it.
*/
if (hdr->creatorPID != getpid())
{
if (kill(hdr->creatorPID, 0) == 0 ||
errno != ESRCH)
{
shmdt(memAddress);
continue; /* segment belongs to a live process */
}
}
/*
* The segment appears to be from a dead Postgres process, or
* from a previous cycle of life in this same process. Zap it,
* if possible. This probably shouldn't fail, but if it does,
* assume the segment belongs to someone else after all,
* and continue quietly.
*/
shmdt(memAddress);
if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0)
continue;
/*
* Now try again to create the segment.
*/
memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission);
if (memAddress)
break; /* successful create and attach */
/*
* Can only get here if some other process managed to create the
* same shmem key before we did. Let him have that one,
* loop around to try next key.
*/
} }
return; /*
} * OK, we created a new segment. Mark it as created by this process.
* The order of assignments here is critical so that another Postgres
* process can't see the header as valid but belonging to an invalid
* PID!
*/
hdr = (PGShmemHeader *) memAddress;
hdr->creatorPID = getpid();
hdr->magic = PGShmemMagic;
/*
* Initialize space allocation status for segment.
*/
hdr->totalsize = size;
hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
void return hdr;
AttachSLockMemory(IPCKey key)
{
struct ipcdummy *slockM;
if (SLockMemoryId == -1)
SLockMemoryId = IpcMemoryIdGet(key, SLOCKMEMORYSIZE);
if (SLockMemoryId == -1)
elog(FATAL, "SLockMemory not in shared memory");
slockM = (struct ipcdummy *) IpcMemoryAttach(SLockMemoryId);
if (slockM == IpcMemAttachFailed)
elog(FATAL, "AttachSLockMemory: could not attach segment");
FreeSLockPP = (SLock **) &(slockM->free);
UnusedSLockIP = (int *) &(slockM->unused);
SLockMemoryLock = (slock_t *) &(slockM->memlock);
S_INIT_LOCK(SLockMemoryLock);
SLockArray = (SLock *) &(slockM->slocks[0]);
return;
} }
#ifdef NOT_USED /*
bool * Create a semaphore set with the given number of useful semaphores
LockIsFree(int lockid) * (an additional sema is actually allocated to serve as identifier).
* Dead Postgres sema sets are recycled if found, but we do not fail
* upon collision with non-Postgres sema sets.
*/
IpcSemaphoreId
IpcSemaphoreCreate(int numSems, int permission,
int semStartValue, bool removeOnExit)
{ {
return SLockArray[lockid].flag == NOLOCK; IpcSemaphoreId semId;
} union semun semun;
#endif /* Loop till we find a free IPC key */
for (NextSemaID++ ; ; NextSemaID++)
{
pid_t creatorPID;
/* Try to create new semaphore set */
semId = InternalIpcSemaphoreCreate(NextSemaID, numSems+1,
permission, semStartValue,
removeOnExit);
if (semId >= 0)
break; /* successful create */
#endif /* HAS_TEST_AND_SET */ /* See if it looks to be leftover from a dead Postgres process */
semId = semget(NextSemaID, numSems+1, 0);
if (semId < 0)
continue; /* failed: must be some other app's */
if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
continue; /* sema belongs to a non-Postgres app */
/*
* If the creator PID is my own PID or does not belong to any
* extant process, it's safe to zap it.
*/
creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
if (creatorPID <= 0)
continue; /* oops, GETPID failed */
if (creatorPID != getpid())
{
if (kill(creatorPID, 0) == 0 ||
errno != ESRCH)
continue; /* sema belongs to a live process */
}
/*
* The sema set appears to be from a dead Postgres process, or
* from a previous cycle of life in this same process. Zap it,
* if possible. This probably shouldn't fail, but if it does,
* assume the sema set belongs to someone else after all,
* and continue quietly.
*/
semun.val = 0; /* unused, but keep compiler quiet */
if (semctl(semId, 0, IPC_RMID, semun) < 0)
continue;
/*
* Now try again to create the sema set.
*/
semId = InternalIpcSemaphoreCreate(NextSemaID, numSems+1,
permission, semStartValue,
removeOnExit);
if (semId >= 0)
break; /* successful create */
/*
* Can only get here if some other process managed to create the
* same sema key before we did. Let him have that one,
* loop around to try next key.
*/
}
/*
* OK, we created a new sema set. Mark it as created by this process.
* We do this by setting the spare semaphore to PGSemaMagic-1 and then
* incrementing it with semop(). That leaves it with value PGSemaMagic
* and sempid referencing this process.
*/
semun.val = PGSemaMagic-1;
if (semctl(semId, numSems, SETVAL, semun) < 0)
{
fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, %d, SETVAL, %d) failed: %s\n",
semId, numSems, PGSemaMagic-1, strerror(errno));
if (errno == ERANGE)
fprintf(stderr,
"You possibly need to raise your kernel's SEMVMX value to be at least\n"
"%d. Look into the PostgreSQL documentation for details.\n",
PGSemaMagic);
proc_exit(1);
}
IpcSemaphoreUnlock(semId, numSems);
return semId;
}
...@@ -8,148 +8,91 @@ ...@@ -8,148 +8,91 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.34 2000/11/21 21:16:01 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.35 2000/11/28 23:27:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include <sys/types.h>
#include "postgres.h" #include "postgres.h"
#include <sys/types.h>
#include "miscadmin.h" #include "miscadmin.h"
#include "access/xlog.h" #include "access/xlog.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/proc.h"
#include "storage/sinval.h" #include "storage/sinval.h"
#include "storage/spin.h"
/*
* SystemPortAddressCreateMemoryKey
* Returns a memory key given a port address.
*/
IPCKey
SystemPortAddressCreateIPCKey(SystemPortAddress address)
{
Assert(address < 32768); /* XXX */
return SystemPortAddressGetIPCKey(address);
}
/* /*
* CreateSharedMemoryAndSemaphores * CreateSharedMemoryAndSemaphores
* Creates and initializes shared memory and semaphores. * Creates and initializes shared memory and semaphores.
*
* This is called by the postmaster or by a standalone backend.
* It is NEVER called by a backend forked from the postmaster;
* for such a backend, the shared memory is already ready-to-go.
*
* If "private" is true then we only need private memory, not shared
* memory. This is true for a standalone backend, false for a postmaster.
*/ */
/**************************************************
CreateSharedMemoryAndSemaphores
is called exactly *ONCE* by the postmaster.
It is *NEVER* called by the postgres backend,
except in the case of a standalone backend.
0) destroy any existing semaphores for both buffer
and lock managers.
1) create the appropriate *SHARED* memory segments
for the two resource managers.
2) create shared semaphores as needed.
**************************************************/
void void
CreateSharedMemoryAndSemaphores(IPCKey key, int maxBackends) CreateSharedMemoryAndSemaphores(bool private, int maxBackends)
{ {
int size; int size;
PGShmemHeader *seghdr;
#ifdef HAS_TEST_AND_SET
/*
* Create shared memory for slocks
*/
CreateAndInitSLockMemory(IPCKeyGetSLockSharedMemoryKey(key));
#endif
/*
* Kill and create the buffer manager buffer pool (and semaphore)
*/
CreateSpinlocks(IPCKeyGetSpinLockSemaphoreKey(key));
/* /*
* Size of the primary shared-memory block is estimated via * Size of the Postgres shared-memory block is estimated via
* moderately-accurate estimates for the big hogs, plus 100K for the * moderately-accurate estimates for the big hogs, plus 100K for the
* stuff that's too small to bother with estimating. * stuff that's too small to bother with estimating.
*/ */
size = BufferShmemSize() + LockShmemSize(maxBackends) + XLOGShmemSize(); size = BufferShmemSize() + LockShmemSize(maxBackends) +
XLOGShmemSize() + SLockShmemSize() + SInvalShmemSize(maxBackends);
#ifdef STABLE_MEMORY_STORAGE #ifdef STABLE_MEMORY_STORAGE
size += MMShmemSize(); size += MMShmemSize();
#endif #endif
size += 100000; size += 100000;
/* might as well round it off to a multiple of a K or so... */ /* might as well round it off to a multiple of a typical page size */
size += 1024 - (size % 1024); size += 8192 - (size % 8192);
if (DebugLvl > 1) if (DebugLvl > 1)
{ fprintf(stderr, "invoking IpcMemoryCreate(size=%d)\n", size);
fprintf(stderr, "binding ShmemCreate(key=%x, size=%d)\n",
IPCKeyGetBufferMemoryKey(key), size);
}
ShmemCreate(IPCKeyGetBufferMemoryKey(key), size);
ShmemIndexReset();
InitShmem(key, size);
XLOGShmemInit();
InitBufferPool(key);
/* ---------------- /*
* do the lock table stuff * Create the shmem segment
* ----------------
*/ */
InitLocks(); seghdr = IpcMemoryCreate(size, private, IPCProtection);
if (InitLockTable() == INVALID_TABLEID)
elog(FATAL, "Couldn't create the lock table");
/* ---------------- /*
* do process table stuff * First initialize spinlocks --- needed by InitShmemAllocation()
* ----------------
*/ */
InitProcGlobal(key, maxBackends); CreateSpinlocks(seghdr);
CreateSharedInvalidationState(key, maxBackends);
}
/* /*
* AttachSharedMemoryAndSemaphores * Set up shmem.c hashtable
* Attachs existant shared memory and semaphores.
*/
void
AttachSharedMemoryAndSemaphores(IPCKey key)
{
/* ----------------
* create rather than attach if using private key
* ----------------
*/ */
if (key == PrivateIPCKey) InitShmemAllocation(seghdr);
{
CreateSharedMemoryAndSemaphores(key, 16);
return;
}
#ifdef HAS_TEST_AND_SET /*
/* ---------------- * Set up xlog and buffers
* attach the slock shared memory
* ----------------
*/
AttachSLockMemory(IPCKeyGetSLockSharedMemoryKey(key));
#endif
/* ----------------
* attach the buffer manager buffer pool (and semaphore)
* ----------------
*/ */
InitShmem(key, 0); XLOGShmemInit();
InitBufferPool(key); InitBufferPool();
/* ---------------- /*
* initialize lock table stuff * Set up lock manager
* ----------------
*/ */
InitLocks(); InitLocks();
if (InitLockTable() == INVALID_TABLEID) if (InitLockTable() == INVALID_TABLEID)
elog(FATAL, "Couldn't attach to the lock table"); elog(FATAL, "Couldn't create the lock table");
/*
* Set up process table
*/
InitProcGlobal(maxBackends);
AttachSharedInvalidationState(key); /*
* Set up shared-inval messaging
*/
CreateSharedInvalidationState(maxBackends);
} }
...@@ -8,14 +8,14 @@ ...@@ -8,14 +8,14 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.54 2000/11/21 21:16:01 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.55 2000/11/28 23:27:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
/* /*
* POSTGRES processes share one or more regions of shared memory. * POSTGRES processes share one or more regions of shared memory.
* The shared memory is created by a postmaster and is inherited * The shared memory is created by a postmaster and is inherited
* by each backends via fork(). The routines in this file are used for * by each backend via fork(). The routines in this file are used for
* allocating and binding to shared memory data structures. * allocating and binding to shared memory data structures.
* *
* NOTES: * NOTES:
...@@ -56,153 +56,57 @@ ...@@ -56,153 +56,57 @@
* *
* See InitSem() in sem.c for an example of how to use the * See InitSem() in sem.c for an example of how to use the
* shmem index. * shmem index.
*
*/ */
#include "postgres.h" #include "postgres.h"
#include "access/transam.h" #include "access/transam.h"
#include "utils/tqual.h" #include "utils/tqual.h"
/* shared memory global variables */ /* shared memory global variables */
unsigned long ShmemBase = 0; /* start and end address of shared memory */ static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
static unsigned long ShmemEnd = 0;
static unsigned long ShmemSize = 0; /* current size (and default) */ SHMEM_OFFSET ShmemBase; /* start address of shared memory */
static SHMEM_OFFSET ShmemEnd; /* end+1 address of shared memory */
SPINLOCK ShmemLock; /* lock for shared memory allocation */ SPINLOCK ShmemLock; /* lock for shared memory allocation */
SPINLOCK ShmemIndexLock; /* lock for shmem index access */ SPINLOCK ShmemIndexLock; /* lock for shmem index access */
static unsigned long *ShmemFreeStart = NULL; /* pointer to the OFFSET static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
* of first free shared
* memory */
static unsigned long *ShmemIndexOffset = NULL; /* start of the shmem
* index table (for
* bootstrap) */
static int ShmemBootstrap = FALSE; /* flag becomes true when shared
* mem is created by POSTMASTER */
static HTAB *ShmemIndex = NULL;
/* ---------------------
* ShmemIndexReset() - Resets the shmem index to NULL....
* useful when the postmaster destroys existing shared memory
* and creates all new segments after a backend crash.
* ----------------------
*/
void
ShmemIndexReset(void)
{
ShmemIndex = (HTAB *) NULL;
}
/* static bool ShmemBootstrap = false; /* bootstrapping shmem index? */
* CreateSharedRegion()
*
* This routine is called once by the postmaster to
* initialize the shared buffer pool. Assume there is
* only one postmaster so no synchronization is necessary
* until after this routine completes successfully.
*
* key is a unique identifier for the shmem region.
* size is the size of the region.
*/
static IpcMemoryId ShmemId;
void
ShmemCreate(unsigned int key, unsigned int size)
{
if (size)
ShmemSize = size;
/* create shared mem region */
if ((ShmemId = IpcMemoryCreate(key, ShmemSize, IPCProtection))
== IpcMemCreationFailed)
{
elog(FATAL, "ShmemCreate: cannot create region");
exit(1);
}
/*
* ShmemBootstrap is true if shared memory has been created, but not
* yet initialized. Only the postmaster/creator-of-all-things should
* have this flag set.
*/
ShmemBootstrap = TRUE;
}
/* /*
* InitShmem() -- map region into process address space * InitShmemAllocation() --- set up shared-memory allocation and index table.
* and initialize shared data structures.
*
*/ */
int void
InitShmem(unsigned int key, unsigned int size) InitShmemAllocation(PGShmemHeader *seghdr)
{ {
Pointer sharedRegion;
unsigned long currFreeSpace;
HASHCTL info; HASHCTL info;
int hash_flags; int hash_flags;
ShmemIndexEnt *result, ShmemIndexEnt *result,
item; item;
bool found; bool found;
IpcMemoryId shmid;
/* if zero key, use default memory size */
if (size)
ShmemSize = size;
/* default key is 0 */
/* attach to shared memory region (SysV or BSD OS specific) */
if (ShmemBootstrap && key == PrivateIPCKey)
/* if we are running backend alone */
shmid = ShmemId;
else
shmid = IpcMemoryIdGet(IPCKeyGetBufferMemoryKey(key), ShmemSize);
sharedRegion = IpcMemoryAttach(shmid);
if (sharedRegion == NULL)
{
elog(FATAL, "AttachSharedRegion: couldn't attach to shmem\n");
return FALSE;
}
/* get pointers to the dimensions of shared memory */
ShmemBase = (unsigned long) sharedRegion;
ShmemEnd = (unsigned long) sharedRegion + ShmemSize;
/* First long in shared memory is the available-space pointer */
ShmemFreeStart = (unsigned long *) ShmemBase;
/* next is a shmem pointer to the shmem index */
ShmemIndexOffset = ShmemFreeStart + 1;
/* next is ShmemVariableCache */
ShmemVariableCache = (VariableCache) (ShmemIndexOffset + 1);
/* here is where to start dynamic allocation */
currFreeSpace = MAXALIGN(sizeof(*ShmemFreeStart) +
sizeof(*ShmemIndexOffset) +
sizeof(*ShmemVariableCache));
/* /* Set up basic pointers to shared memory */
* bootstrap initialize spin locks so we can start to use the ShmemSegHdr = seghdr;
* allocator and shmem index. ShmemBase = (SHMEM_OFFSET) seghdr;
*/ ShmemEnd = ShmemBase + seghdr->totalsize;
InitSpinLocks();
/* /*
* We have just allocated additional space for two spinlocks. Now * Since ShmemInitHash calls ShmemInitStruct, which expects the
* setup the global free space count * ShmemIndex hashtable to exist already, we have a bit of a circularity
* problem in initializing the ShmemIndex itself. We set ShmemBootstrap
* to tell ShmemInitStruct to fake it.
*/ */
if (ShmemBootstrap) ShmemIndex = (HTAB *) NULL;
{ ShmemBootstrap = true;
*ShmemFreeStart = currFreeSpace;
memset(ShmemVariableCache, 0, sizeof(*ShmemVariableCache));
}
/* if ShmemFreeStart is NULL, then the allocator won't work */
Assert(*ShmemFreeStart);
/* create OR attach to the shared memory shmem index */ /* create the shared memory shmem index */
info.keysize = SHMEM_INDEX_KEYSIZE; info.keysize = SHMEM_INDEX_KEYSIZE;
info.datasize = SHMEM_INDEX_DATASIZE; info.datasize = SHMEM_INDEX_DATASIZE;
hash_flags = HASH_ELEM; hash_flags = HASH_ELEM;
...@@ -211,60 +115,43 @@ InitShmem(unsigned int key, unsigned int size) ...@@ -211,60 +115,43 @@ InitShmem(unsigned int key, unsigned int size)
ShmemIndex = ShmemInitHash("ShmemIndex", ShmemIndex = ShmemInitHash("ShmemIndex",
SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
&info, hash_flags); &info, hash_flags);
if (!ShmemIndex) if (!ShmemIndex)
{ elog(FATAL, "InitShmemAllocation: couldn't initialize Shmem Index");
elog(FATAL, "InitShmem: couldn't initialize Shmem Index");
return FALSE;
}
/* /*
* Now, check the shmem index for an entry to the shmem index. If * Now, create an entry in the hashtable for the index itself.
* there is an entry there, someone else created the table. Otherwise,
* we did and we have to initialize it.
*/ */
MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE); MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE); strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE);
result = (ShmemIndexEnt *) result = (ShmemIndexEnt *)
hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found); hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found);
if (!result) if (!result)
{ elog(FATAL, "InitShmemAllocation: corrupted shmem index");
elog(FATAL, "InitShmem: corrupted shmem index");
return FALSE;
}
if (!found)
{
/* Assert(ShmemBootstrap && !found);
* bootstrapping shmem: we have to initialize the shmem index now.
*/
Assert(ShmemBootstrap); result->location = MAKE_OFFSET(ShmemIndex->hctl);
result->location = MAKE_OFFSET(ShmemIndex->hctl); result->size = SHMEM_INDEX_SIZE;
*ShmemIndexOffset = result->location;
result->size = SHMEM_INDEX_SIZE;
ShmemBootstrap = FALSE; ShmemBootstrap = false;
} /* now release the lock acquired in ShmemInitStruct */
else
Assert(!ShmemBootstrap);
/* now release the lock acquired in ShmemHashInit */
SpinRelease(ShmemIndexLock); SpinRelease(ShmemIndexLock);
Assert(result->location == MAKE_OFFSET(ShmemIndex->hctl)); /*
* Initialize ShmemVariableCache for transaction manager.
return TRUE; */
ShmemVariableCache = (VariableCache)
ShmemAlloc(sizeof(*ShmemVariableCache));
memset(ShmemVariableCache, 0, sizeof(*ShmemVariableCache));
} }
/* /*
* ShmemAlloc -- allocate max-aligned byte string from shared memory * ShmemAlloc -- allocate max-aligned chunk from shared memory
*
* Assumes ShmemLock and ShmemSegHdr are initialized.
* *
* Assumes ShmemLock and ShmemFreeStart are initialized.
* Returns: real pointer to memory or NULL if we are out * Returns: real pointer to memory or NULL if we are out
* of space. Has to return a real pointer in order * of space. Has to return a real pointer in order
* to be compatible with malloc(). * to be compatible with malloc().
...@@ -272,7 +159,7 @@ InitShmem(unsigned int key, unsigned int size) ...@@ -272,7 +159,7 @@ InitShmem(unsigned int key, unsigned int size)
void * void *
ShmemAlloc(Size size) ShmemAlloc(Size size)
{ {
unsigned long tmpFree; uint32 newFree;
void *newSpace; void *newSpace;
/* /*
...@@ -280,15 +167,15 @@ ShmemAlloc(Size size) ...@@ -280,15 +167,15 @@ ShmemAlloc(Size size)
*/ */
size = MAXALIGN(size); size = MAXALIGN(size);
Assert(*ShmemFreeStart); Assert(ShmemSegHdr);
SpinAcquire(ShmemLock); SpinAcquire(ShmemLock);
tmpFree = *ShmemFreeStart + size; newFree = ShmemSegHdr->freeoffset + size;
if (tmpFree <= ShmemSize) if (newFree <= ShmemSegHdr->totalsize)
{ {
newSpace = (void *) MAKE_PTR(*ShmemFreeStart); newSpace = (void *) MAKE_PTR(ShmemSegHdr->freeoffset);
*ShmemFreeStart += size; ShmemSegHdr->freeoffset = newFree;
} }
else else
newSpace = NULL; newSpace = NULL;
...@@ -306,7 +193,7 @@ ShmemAlloc(Size size) ...@@ -306,7 +193,7 @@ ShmemAlloc(Size size)
* *
* Returns TRUE if the pointer is valid. * Returns TRUE if the pointer is valid.
*/ */
int bool
ShmemIsValid(unsigned long addr) ShmemIsValid(unsigned long addr)
{ {
return (addr < ShmemEnd) && (addr >= ShmemBase); return (addr < ShmemEnd) && (addr >= ShmemBase);
...@@ -394,16 +281,15 @@ ShmemPIDLookup(int pid, SHMEM_OFFSET *locationPtr) ...@@ -394,16 +281,15 @@ ShmemPIDLookup(int pid, SHMEM_OFFSET *locationPtr)
sprintf(item.key, "PID %d", pid); sprintf(item.key, "PID %d", pid);
SpinAcquire(ShmemIndexLock); SpinAcquire(ShmemIndexLock);
result = (ShmemIndexEnt *) result = (ShmemIndexEnt *)
hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found); hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found);
if (!result) if (!result)
{ {
SpinRelease(ShmemIndexLock); SpinRelease(ShmemIndexLock);
elog(ERROR, "ShmemInitPID: ShmemIndex corrupted"); elog(ERROR, "ShmemInitPID: ShmemIndex corrupted");
return FALSE; return FALSE;
} }
if (found) if (found)
...@@ -438,19 +324,19 @@ ShmemPIDDestroy(int pid) ...@@ -438,19 +324,19 @@ ShmemPIDDestroy(int pid)
sprintf(item.key, "PID %d", pid); sprintf(item.key, "PID %d", pid);
SpinAcquire(ShmemIndexLock); SpinAcquire(ShmemIndexLock);
result = (ShmemIndexEnt *) result = (ShmemIndexEnt *)
hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, &found); hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, &found);
if (found) if (found)
location = result->location; location = result->location;
SpinRelease(ShmemIndexLock); SpinRelease(ShmemIndexLock);
if (!result) if (!result)
{ {
elog(ERROR, "ShmemPIDDestroy: PID table corrupted"); elog(ERROR, "ShmemPIDDestroy: PID table corrupted");
return INVALID_OFFSET; return INVALID_OFFSET;
} }
if (found) if (found)
...@@ -487,53 +373,31 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) ...@@ -487,53 +373,31 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr)
if (!ShmemIndex) if (!ShmemIndex)
{ {
#ifdef USE_ASSERT_CHECKING
char *strname = "ShmemIndex";
#endif
/* /*
* If the shmem index doesn't exist, we fake it. * If the shmem index doesn't exist, we are bootstrapping: we must
* be trying to init the shmem index itself.
* *
* If we are creating the first shmem index, then let shmemalloc() * Notice that the ShmemIndexLock is held until the shmem index has
* allocate the space for a new HTAB. Otherwise, find the old one * been completely initialized.
* and return that. Notice that the ShmemIndexLock is held until
* the shmem index has been completely initialized.
*/ */
Assert(strcmp(name, strname) == 0); Assert(strcmp(name, "ShmemIndex") == 0);
if (ShmemBootstrap) Assert(ShmemBootstrap);
{ *foundPtr = FALSE;
/* in POSTMASTER/Single process */ return ShmemAlloc(size);
*foundPtr = FALSE;
return ShmemAlloc(size);
}
else
{
Assert(*ShmemIndexOffset);
*foundPtr = TRUE;
return (void *) MAKE_PTR(*ShmemIndexOffset);
}
}
else
{
/* look it up in the shmem index */
result = (ShmemIndexEnt *)
hash_search(ShmemIndex, (char *) &item, HASH_ENTER, foundPtr);
} }
/* look it up in the shmem index */
result = (ShmemIndexEnt *)
hash_search(ShmemIndex, (char *) &item, HASH_ENTER, foundPtr);
if (!result) if (!result)
{ {
SpinRelease(ShmemIndexLock); SpinRelease(ShmemIndexLock);
elog(ERROR, "ShmemInitStruct: Shmem Index corrupted"); elog(ERROR, "ShmemInitStruct: Shmem Index corrupted");
return NULL; return NULL;
} }
else if (*foundPtr)
if (*foundPtr)
{ {
/* /*
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.23 2000/11/12 20:51:51 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.24 2000/11/28 23:27:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -27,52 +27,23 @@ ...@@ -27,52 +27,23 @@
SPINLOCK SInvalLock = (SPINLOCK) NULL; SPINLOCK SInvalLock = (SPINLOCK) NULL;
/****************************************************************************/ /****************************************************************************/
/* CreateSharedInvalidationState() Create a buffer segment */ /* CreateSharedInvalidationState() Initialize SI buffer */
/* */ /* */
/* should be called only by the POSTMASTER */ /* should be called only by the POSTMASTER */
/****************************************************************************/ /****************************************************************************/
void void
CreateSharedInvalidationState(IPCKey key, int maxBackends) CreateSharedInvalidationState(int maxBackends)
{ {
int status; /* SInvalLock must be initialized already, during spinlock init */
SIBufferInit(maxBackends);
/* SInvalLock gets set in spin.c, during spinlock init */
status = SISegmentInit(true, IPCKeyGetSIBufferMemoryBlock(key),
maxBackends);
if (status == -1)
elog(FATAL, "CreateSharedInvalidationState: failed segment init");
}
/****************************************************************************/
/* AttachSharedInvalidationState(key) Attach to existing buffer segment */
/* */
/* should be called by each backend during startup */
/****************************************************************************/
void
AttachSharedInvalidationState(IPCKey key)
{
int status;
if (key == PrivateIPCKey)
{
CreateSharedInvalidationState(key, 16);
return;
}
/* SInvalLock gets set in spin.c, during spinlock init */
status = SISegmentInit(false, IPCKeyGetSIBufferMemoryBlock(key), 0);
if (status == -1)
elog(FATAL, "AttachSharedInvalidationState: failed segment init");
} }
/* /*
* InitSharedInvalidationState * InitBackendSharedInvalidationState
* Initialize new backend's state info in buffer segment. * Initialize new backend's state info in buffer segment.
* Must be called after AttachSharedInvalidationState().
*/ */
void void
InitSharedInvalidationState(void) InitBackendSharedInvalidationState(void)
{ {
SpinAcquire(SInvalLock); SpinAcquire(SInvalLock);
if (!SIBackendInit(shmInvalBuffer)) if (!SIBackendInit(shmInvalBuffer))
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.35 2000/11/12 20:51:51 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.36 2000/11/28 23:27:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -25,95 +25,38 @@ ...@@ -25,95 +25,38 @@
SISeg *shmInvalBuffer; SISeg *shmInvalBuffer;
static void SISegmentAttach(IpcMemoryId shmid);
static void SISegInit(SISeg *segP, int maxBackends);
static void CleanupInvalidationState(int status, Datum arg); static void CleanupInvalidationState(int status, Datum arg);
static void SISetProcStateInvalid(SISeg *segP); static void SISetProcStateInvalid(SISeg *segP);
/*
* SISegmentInit
* Create a new SI memory segment, or attach to an existing one
*
* This is called with createNewSegment = true by the postmaster (or by
* a standalone backend), and subsequently with createNewSegment = false
* by backends started by the postmaster.
*
* Note: maxBackends param is only valid when createNewSegment is true
*/
int
SISegmentInit(bool createNewSegment, IPCKey key, int maxBackends)
{
int segSize;
IpcMemoryId shmId;
if (createNewSegment)
{
/* Kill existing segment, if any */
IpcMemoryKill(key);
/*
* Figure space needed. Note sizeof(SISeg) includes the first
* ProcState entry.
*/
segSize = sizeof(SISeg) + sizeof(ProcState) * (maxBackends - 1);
/* Get a shared segment */
shmId = IpcMemoryCreate(key, segSize, IPCProtection);
if (shmId < 0)
{
perror("SISegmentInit: segment create failed");
return -1; /* an error */
}
/* Attach to the shared cache invalidation segment */
/* sets the global variable shmInvalBuffer */
SISegmentAttach(shmId);
/* Init shared memory contents */
SISegInit(shmInvalBuffer, maxBackends);
}
else
{
/* find existing segment */
shmId = IpcMemoryIdGet(key, 0);
if (shmId < 0)
{
perror("SISegmentInit: segment get failed");
return -1; /* an error */
}
/* Attach to the shared cache invalidation segment */
/* sets the global variable shmInvalBuffer */
SISegmentAttach(shmId);
}
return 1;
}
/* /*
* SISegmentAttach * SInvalShmemSize --- return shared-memory space needed
* Attach to specified shared memory segment
*/ */
static void int
SISegmentAttach(IpcMemoryId shmid) SInvalShmemSize(int maxBackends)
{ {
shmInvalBuffer = (SISeg *) IpcMemoryAttach(shmid); /*
* Figure space needed. Note sizeof(SISeg) includes the first
if (shmInvalBuffer == IpcMemAttachFailed) * ProcState entry.
{ */
/* XXX use validity function */ return sizeof(SISeg) + sizeof(ProcState) * (maxBackends - 1);
elog(FATAL, "SISegmentAttach: Could not attach segment: %m");
}
} }
/* /*
* SISegInit * SIBufferInit
* Initialize contents of a new shared memory sinval segment * Create and initialize a new SI message buffer
*/ */
static void void
SISegInit(SISeg *segP, int maxBackends) SIBufferInit(int maxBackends)
{ {
int segSize;
SISeg *segP;
int i; int i;
/* Allocate space in shared memory */
segSize = SInvalShmemSize(maxBackends);
shmInvalBuffer = segP = (SISeg *) ShmemAlloc(segSize);
/* Clear message counters, save size of procState array */ /* Clear message counters, save size of procState array */
segP->minMsgNum = 0; segP->minMsgNum = 0;
segP->maxMsgNum = 0; segP->maxMsgNum = 0;
......
...@@ -3,31 +3,24 @@ ...@@ -3,31 +3,24 @@
* spin.c * spin.c
* routines for managing spin locks * routines for managing spin locks
* *
* POSTGRES has two kinds of locks: semaphores (which put the
* process to sleep) and spinlocks (which are supposed to be
* short term locks). Spinlocks are implemented via test-and-set (TAS)
* instructions if possible, else via semaphores. The semaphore method
* is too slow to be useful :-(
*
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/spin.c,v 1.25 2000/05/31 00:28:29 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/spin.c,v 1.26 2000/11/28 23:27:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
/*
* POSTGRES has two kinds of locks: semaphores (which put the
* process to sleep) and spinlocks (which are supposed to be
* short term locks). Currently both are implemented as SysV
* semaphores, but presumably this can change if we move to
* a machine with a test-and-set (TAS) instruction. Its probably
* a good idea to think about (and allocate) short term and long
* term semaphores separately anyway.
*
* NOTE: These routines are not supposed to be widely used in Postgres.
* They are preserved solely for the purpose of porting Mark Sullivan's
* buffer manager to Postgres.
*/
#include <errno.h>
#include "postgres.h" #include "postgres.h"
#include <errno.h>
#ifndef HAS_TEST_AND_SET #ifndef HAS_TEST_AND_SET
#include <sys/sem.h> #include <sys/sem.h>
#endif #endif
...@@ -35,39 +28,33 @@ ...@@ -35,39 +28,33 @@
#include "storage/proc.h" #include "storage/proc.h"
#include "storage/s_lock.h" #include "storage/s_lock.h"
/* Probably should move these to an appropriate header file */
/* globals used in this file */ extern SPINLOCK ShmemLock;
IpcSemaphoreId SpinLockId; extern SPINLOCK ShmemIndexLock;
extern SPINLOCK BufMgrLock;
#ifdef HAS_TEST_AND_SET extern SPINLOCK LockMgrLock;
/* real spin lock implementations */ extern SPINLOCK ProcStructLock;
extern SPINLOCK SInvalLock;
void extern SPINLOCK OidGenLockId;
CreateSpinlocks(IPCKey key) extern SPINLOCK XidGenLockId;
{ extern SPINLOCK ControlFileLockId;
/* the spin lock shared memory must have been created by now */
return;
}
void
InitSpinLocks(void)
{
extern SPINLOCK ShmemLock;
extern SPINLOCK ShmemIndexLock;
extern SPINLOCK BufMgrLock;
extern SPINLOCK LockMgrLock;
extern SPINLOCK ProcStructLock;
extern SPINLOCK SInvalLock;
extern SPINLOCK OidGenLockId;
extern SPINLOCK XidGenLockId;
extern SPINLOCK ControlFileLockId;
#ifdef STABLE_MEMORY_STORAGE #ifdef STABLE_MEMORY_STORAGE
extern SPINLOCK MMCacheLock; extern SPINLOCK MMCacheLock;
#endif #endif
/* These six spinlocks have fixed location is shmem */
/*
* Initialize identifiers for permanent spinlocks during startup
*
* The same identifiers are used for both TAS and semaphore implementations,
* although in one case they are indexes into a shmem array and in the other
* they are semaphore numbers.
*/
static void
InitSpinLockIDs(void)
{
ShmemLock = (SPINLOCK) SHMEMLOCKID; ShmemLock = (SPINLOCK) SHMEMLOCKID;
ShmemIndexLock = (SPINLOCK) SHMEMINDEXLOCKID; ShmemIndexLock = (SPINLOCK) SHMEMINDEXLOCKID;
BufMgrLock = (SPINLOCK) BUFMGRLOCKID; BufMgrLock = (SPINLOCK) BUFMGRLOCKID;
...@@ -81,11 +68,18 @@ InitSpinLocks(void) ...@@ -81,11 +68,18 @@ InitSpinLocks(void)
#ifdef STABLE_MEMORY_STORAGE #ifdef STABLE_MEMORY_STORAGE
MMCacheLock = (SPINLOCK) MMCACHELOCKID; MMCacheLock = (SPINLOCK) MMCACHELOCKID;
#endif #endif
return;
} }
#ifdef HAS_TEST_AND_SET
/* real spin lock implementation */
typedef struct slock
{
slock_t shlock;
} SLock;
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
bool Trace_spinlocks = false; bool Trace_spinlocks = false;
...@@ -93,193 +87,268 @@ inline static void ...@@ -93,193 +87,268 @@ inline static void
PRINT_SLDEBUG(const char * where, SPINLOCK lockid, const SLock * lock) PRINT_SLDEBUG(const char * where, SPINLOCK lockid, const SLock * lock)
{ {
if (Trace_spinlocks) if (Trace_spinlocks)
elog(DEBUG, elog(DEBUG, "%s: id=%d", where, lockid);
"%s: id=%d (locklock=%d, flag=%d, nshlocks=%d, shlock=%d, exlock=%d)",
where, lockid,
lock->locklock, lock->flag, lock->nshlocks, lock->shlock, lock->exlock);
} }
#else /* not LOCK_DEBUG */ #else /* not LOCK_DEBUG */
#define PRINT_SLDEBUG(a,b,c) #define PRINT_SLDEBUG(a,b,c)
#endif /* not LOCK_DEBUG */ #endif /* not LOCK_DEBUG */
/* from ipc.c */ static SLock *SLockArray = NULL;
extern SLock *SLockArray;
#define SLOCKMEMORYSIZE ((int) MAX_SPINS * sizeof(SLock))
/*
* SLockShmemSize --- return shared-memory space needed
*/
int
SLockShmemSize(void)
{
return MAXALIGN(SLOCKMEMORYSIZE);
}
/*
* CreateSpinlocks --- create and initialize spinlocks during startup
*/
void
CreateSpinlocks(PGShmemHeader *seghdr)
{
int id;
/*
* We must allocate the space "by hand" because shmem.c isn't up yet
*/
SLockArray = (SLock *) (((char *) seghdr) + seghdr->freeoffset);
seghdr->freeoffset += MAXALIGN(SLOCKMEMORYSIZE);
Assert(seghdr->freeoffset <= seghdr->totalsize);
/*
* Initialize all spinlocks to "unlocked" state
*/
for (id = 0; id < (int) MAX_SPINS; id++)
{
SLock *slckP = &(SLockArray[id]);
S_INIT_LOCK(&(slckP->shlock));
}
/*
* Assign indexes for fixed spinlocks
*/
InitSpinLockIDs();
}
void void
SpinAcquire(SPINLOCK lockid) SpinAcquire(SPINLOCK lockid)
{ {
SLock *slckP; SLock *slckP = &(SLockArray[lockid]);
/* This used to be in ipc.c, but move here to reduce function calls */
slckP = &(SLockArray[lockid]);
PRINT_SLDEBUG("SpinAcquire", lockid, slckP); PRINT_SLDEBUG("SpinAcquire", lockid, slckP);
ex_try_again: S_LOCK(&(slckP->shlock));
S_LOCK(&(slckP->locklock));
switch (slckP->flag)
{
case NOLOCK:
slckP->flag = EXCLUSIVELOCK;
S_LOCK(&(slckP->exlock));
S_LOCK(&(slckP->shlock));
S_UNLOCK(&(slckP->locklock));
PRINT_SLDEBUG("OUT", lockid, slckP);
break;
case SHAREDLOCK:
case EXCLUSIVELOCK:
S_UNLOCK(&(slckP->locklock));
S_LOCK(&(slckP->exlock));
S_UNLOCK(&(slckP->exlock));
goto ex_try_again;
}
PROC_INCR_SLOCK(lockid); PROC_INCR_SLOCK(lockid);
PRINT_SLDEBUG("SpinAcquire/success", lockid, slckP); PRINT_SLDEBUG("SpinAcquire/done", lockid, slckP);
} }
void void
SpinRelease(SPINLOCK lockid) SpinRelease(SPINLOCK lockid)
{ {
SLock *slckP; SLock *slckP = &(SLockArray[lockid]);
/* This used to be in ipc.c, but move here to reduce function calls */
slckP = &(SLockArray[lockid]);
/* /*
* Check that we are actually holding the lock we are releasing. This * Check that we are actually holding the lock we are releasing. This
* can be done only after MyProc has been initialized. * can be done only after MyProc has been initialized.
*/ */
Assert(!MyProc || MyProc->sLocks[lockid] > 0); Assert(!MyProc || MyProc->sLocks[lockid] > 0);
Assert(slckP->flag != NOLOCK);
PROC_DECR_SLOCK(lockid); PROC_DECR_SLOCK(lockid);
PRINT_SLDEBUG("SpinRelease", lockid, slckP); PRINT_SLDEBUG("SpinRelease", lockid, slckP);
S_LOCK(&(slckP->locklock)); S_UNLOCK(&(slckP->shlock));
/* ------------- PRINT_SLDEBUG("SpinRelease/done", lockid, slckP);
* give favor to read processes }
* -------------
#else /* !HAS_TEST_AND_SET */
/*
* No TAS, so spinlocks are implemented using SysV semaphores.
*
* We support two slightly different APIs here: SpinAcquire/SpinRelease
* work with SPINLOCK integer indexes for the permanent spinlocks, which
* are all assumed to live in the first spinlock semaphore set. There
* is also an emulation of the s_lock.h TAS-spinlock macros; for that case,
* typedef slock_t stores the semId and sem number of the sema to use.
* The semas needed are created by CreateSpinlocks and doled out by
* s_init_lock_sema.
*
* Since many systems have a rather small SEMMSL limit on semas per set,
* we allocate the semaphores required in sets of SPINLOCKS_PER_SET semas.
* This value is deliberately made equal to PROC_NSEMS_PER_SET so that all
* sema sets allocated by Postgres will be the same size; that eases the
* semaphore-recycling logic in IpcSemaphoreCreate().
*
* Note that the SpinLockIds array is not in shared memory; it is filled
* by the postmaster and then inherited through fork() by backends. This
* is OK because its contents do not change after system startup.
*/
#define SPINLOCKS_PER_SET PROC_NSEMS_PER_SET
static IpcSemaphoreId *SpinLockIds = NULL;
static int numSpinSets = 0; /* number of sema sets used */
static int numSpinLocks = 0; /* total number of semas allocated */
static int nextSpinLock = 0; /* next free spinlock index */
static void SpinFreeAllSemaphores(void);
/*
* SLockShmemSize --- return shared-memory space needed
*/
int
SLockShmemSize(void)
{
return 0;
}
/*
* CreateSpinlocks --- create and initialize spinlocks during startup
*/
void
CreateSpinlocks(PGShmemHeader *seghdr)
{
int i;
if (SpinLockIds == NULL)
{
/*
* Compute number of spinlocks needed. If this logic gets any more
* complicated, it should be distributed into the affected modules,
* similar to the way shmem space estimation is handled.
*
* For now, though, we just need the fixed spinlocks (MAX_SPINS),
* two spinlocks per shared disk buffer, and four spinlocks for XLOG.
*/
numSpinLocks = (int) MAX_SPINS + 2 * NBuffers + 4;
/* might as well round up to a multiple of SPINLOCKS_PER_SET */
numSpinSets = (numSpinLocks - 1) / SPINLOCKS_PER_SET + 1;
numSpinLocks = numSpinSets * SPINLOCKS_PER_SET;
SpinLockIds = (IpcSemaphoreId *)
malloc(numSpinSets * sizeof(IpcSemaphoreId));
Assert(SpinLockIds != NULL);
}
for (i = 0; i < numSpinSets; i++)
SpinLockIds[i] = -1;
/*
* Arrange to delete semas on exit --- set this up now so that we
* will clean up if allocation fails. We use our own freeproc,
* rather than IpcSemaphoreCreate's removeOnExit option, because
* we don't want to fill up the on_shmem_exit list with a separate
* entry for each semaphore set.
*/ */
slckP->flag = NOLOCK; on_shmem_exit(SpinFreeAllSemaphores, 0);
if (slckP->nshlocks > 0)
/* Create sema sets and set all semas to count 1 */
for (i = 0; i < numSpinSets; i++)
{ {
while (slckP->nshlocks > 0) SpinLockIds[i] = IpcSemaphoreCreate(SPINLOCKS_PER_SET,
{ IPCProtection,
S_UNLOCK(&(slckP->shlock)); 1,
S_LOCK(&(slckP->comlock)); false);
}
S_UNLOCK(&(slckP->shlock));
} }
else
S_UNLOCK(&(slckP->shlock)); /*
S_UNLOCK(&(slckP->exlock)); * Assign indexes for fixed spinlocks
S_UNLOCK(&(slckP->locklock)); */
PRINT_SLDEBUG("SpinRelease/released", lockid, slckP); Assert(MAX_SPINS <= SPINLOCKS_PER_SET);
InitSpinLockIDs();
/* Init counter for allocating dynamic spinlocks */
nextSpinLock = MAX_SPINS;
} }
#else /* !HAS_TEST_AND_SET */ /*
/* Spinlocks are implemented using SysV semaphores */ * SpinFreeAllSemaphores -
* called at shmem_exit time, ie when exiting the postmaster or
* destroying shared state for a failed set of backends.
* Free up all the semaphores allocated for spinlocks.
*/
static void
SpinFreeAllSemaphores(void)
{
int i;
static bool AttachSpinLocks(IPCKey key); for (i = 0; i < numSpinSets; i++)
static bool SpinIsLocked(SPINLOCK lock); {
if (SpinLockIds[i] >= 0)
IpcSemaphoreKill(SpinLockIds[i]);
}
}
/* /*
* SpinAcquire -- try to grab a spinlock * SpinAcquire -- grab a fixed spinlock
* *
* FAILS if the semaphore is corrupted. * FAILS if the semaphore is corrupted.
*/ */
void void
SpinAcquire(SPINLOCK lock) SpinAcquire(SPINLOCK lock)
{ {
IpcSemaphoreLock(SpinLockId, lock, IpcExclusiveLock); IpcSemaphoreLock(SpinLockIds[0], lock);
PROC_INCR_SLOCK(lock); PROC_INCR_SLOCK(lock);
} }
/* /*
* SpinRelease -- release a spin lock * SpinRelease -- release a fixed spin lock
* *
* FAILS if the semaphore is corrupted * FAILS if the semaphore is corrupted
*/ */
void void
SpinRelease(SPINLOCK lock) SpinRelease(SPINLOCK lock)
{ {
Assert(SpinIsLocked(lock)); #ifdef USE_ASSERT_CHECKING
PROC_DECR_SLOCK(lock); /* Check it's locked */
IpcSemaphoreUnlock(SpinLockId, lock, IpcExclusiveLock);
}
static bool
SpinIsLocked(SPINLOCK lock)
{
int semval; int semval;
semval = IpcSemaphoreGetValue(SpinLockId, lock); semval = IpcSemaphoreGetValue(SpinLockIds[0], lock);
return semval < IpcSemaphoreDefaultStartValue; Assert(semval < 1);
#endif
PROC_DECR_SLOCK(lock);
IpcSemaphoreUnlock(SpinLockIds[0], lock);
} }
/* /*
* CreateSpinlocks -- Create a sysV semaphore array for * s_lock.h hardware-spinlock emulation
* the spinlocks
*
*/ */
void void
CreateSpinlocks(IPCKey key) s_init_lock_sema(volatile slock_t *lock)
{ {
if (nextSpinLock >= numSpinLocks)
SpinLockId = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection, elog(FATAL, "s_init_lock_sema: not enough semaphores");
IpcSemaphoreDefaultStartValue, 1); lock->semId = SpinLockIds[nextSpinLock / SPINLOCKS_PER_SET];
lock->sem = nextSpinLock % SPINLOCKS_PER_SET;
if (SpinLockId <= 0) nextSpinLock++;
elog(STOP, "CreateSpinlocks: cannot create spin locks");
return;
} }
/*
* InitSpinLocks -- Spinlock bootstrapping
*
* We need several spinlocks for bootstrapping:
* ShmemIndexLock (for the shmem index table) and
* ShmemLock (for the shmem allocator), BufMgrLock (for buffer
* pool exclusive access), LockMgrLock (for the lock table), and
* ProcStructLock (a spin lock for the shared process structure).
* If there's a Sony WORM drive attached, we also have a spinlock
* (SJCacheLock) for it. Same story for the main memory storage mgr.
*
*/
void void
InitSpinLocks(void) s_unlock_sema(volatile slock_t *lock)
{ {
extern SPINLOCK ShmemLock; IpcSemaphoreUnlock(lock->semId, lock->sem);
extern SPINLOCK ShmemIndexLock; }
extern SPINLOCK BufMgrLock;
extern SPINLOCK LockMgrLock;
extern SPINLOCK ProcStructLock;
extern SPINLOCK SInvalLock;
extern SPINLOCK OidGenLockId;
extern SPINLOCK XidGenLockId;
extern SPINLOCK ControlFileLockId;
#ifdef STABLE_MEMORY_STORAGE
extern SPINLOCK MMCacheLock;
#endif
/* These five (or six) spinlocks have fixed location is shmem */
ShmemLock = (SPINLOCK) SHMEMLOCKID;
ShmemIndexLock = (SPINLOCK) SHMEMINDEXLOCKID;
BufMgrLock = (SPINLOCK) BUFMGRLOCKID;
LockMgrLock = (SPINLOCK) LOCKMGRLOCKID;
ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID;
SInvalLock = (SPINLOCK) SINVALLOCKID;
OidGenLockId = (SPINLOCK) OIDGENLOCKID;
XidGenLockId = (SPINLOCK) XIDGENLOCKID;
ControlFileLockId = (SPINLOCK) CNTLFILELOCKID;
#ifdef STABLE_MEMORY_STORAGE bool
MMCacheLock = (SPINLOCK) MMCACHELOCKID; s_lock_free_sema(volatile slock_t *lock)
#endif {
return IpcSemaphoreGetValue(lock->semId, lock->sem) > 0;
}
return; int
tas_sema(volatile slock_t *lock)
{
/* Note that TAS macros return 0 if *success* */
return ! IpcSemaphoreTryLock(lock->semId, lock->sem);
} }
#endif /* !HAS_TEST_AND_SET */ #endif /* !HAS_TEST_AND_SET */
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.72 2000/11/08 22:10:00 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.73 2000/11/28 23:27:56 tgl Exp $
* *
* NOTES * NOTES
* Outside modules can create a lock table and acquire/release * Outside modules can create a lock table and acquire/release
...@@ -56,6 +56,7 @@ static char *lock_types[] = ...@@ -56,6 +56,7 @@ static char *lock_types[] =
"AccessExclusiveLock" "AccessExclusiveLock"
}; };
static char *DeadLockMessage = "Deadlock detected.\n\tSee the lock(l) manual page for a possible cause.";
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
...@@ -943,8 +944,7 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCK *lock, LOCKMODE lockmode) ...@@ -943,8 +944,7 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCK *lock, LOCKMODE lockmode)
lock) != NO_ERROR) lock) != NO_ERROR)
{ {
/* ------------------- /* -------------------
* This could have happend as a result of a deadlock, * We failed as a result of a deadlock, see HandleDeadLock().
* see HandleDeadLock().
* Decrement the lock nHolding and holders fields as * Decrement the lock nHolding and holders fields as
* we are no longer waiting on this lock. * we are no longer waiting on this lock.
* ------------------- * -------------------
...@@ -957,8 +957,7 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCK *lock, LOCKMODE lockmode) ...@@ -957,8 +957,7 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCK *lock, LOCKMODE lockmode)
if (lock->activeHolders[lockmode] == lock->holders[lockmode]) if (lock->activeHolders[lockmode] == lock->holders[lockmode])
lock->waitMask &= BITS_OFF[lockmode]; lock->waitMask &= BITS_OFF[lockmode];
SpinRelease(lockMethodTable->ctl->masterLock); SpinRelease(lockMethodTable->ctl->masterLock);
elog(ERROR, "WaitOnLock: error on wakeup - Aborting this transaction"); elog(ERROR, DeadLockMessage);
/* not reached */ /* not reached */
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.83 2000/10/07 14:39:13 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.84 2000/11/28 23:27:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
* This is so that we can support more backends. (system-wide semaphore * This is so that we can support more backends. (system-wide semaphore
* sets run out pretty fast.) -ay 4/95 * sets run out pretty fast.) -ay 4/95
* *
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.83 2000/10/07 14:39:13 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.84 2000/11/28 23:27:56 tgl Exp $
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -91,10 +91,8 @@ static PROC_HDR *ProcGlobal = NULL; ...@@ -91,10 +91,8 @@ static PROC_HDR *ProcGlobal = NULL;
PROC *MyProc = NULL; PROC *MyProc = NULL;
static void ProcKill(int exitStatus, Datum pid); static void ProcKill(int exitStatus, Datum pid);
static void ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum); static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum);
static void ProcFreeSem(IpcSemaphoreKey semKey, int semNum); static void ProcFreeSem(IpcSemaphoreId semId, int semNum);
static char *DeadLockMessage = "Deadlock detected -- See the lock(l) manual page for a possible cause.";
/* /*
* InitProcGlobal - * InitProcGlobal -
...@@ -116,7 +114,7 @@ static char *DeadLockMessage = "Deadlock detected -- See the lock(l) manual page ...@@ -116,7 +114,7 @@ static char *DeadLockMessage = "Deadlock detected -- See the lock(l) manual page
* rather than later. * rather than later.
*/ */
void void
InitProcGlobal(IPCKey key, int maxBackends) InitProcGlobal(int maxBackends)
{ {
bool found = false; bool found = false;
...@@ -135,39 +133,35 @@ InitProcGlobal(IPCKey key, int maxBackends) ...@@ -135,39 +133,35 @@ InitProcGlobal(IPCKey key, int maxBackends)
int i; int i;
ProcGlobal->freeProcs = INVALID_OFFSET; ProcGlobal->freeProcs = INVALID_OFFSET;
ProcGlobal->currKey = IPCGetProcessSemaphoreInitKey(key); for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++) {
ProcGlobal->procSemIds[i] = -1;
ProcGlobal->freeSemMap[i] = 0; ProcGlobal->freeSemMap[i] = 0;
}
/* /*
* Arrange to delete semas on exit --- set this up now so that we * Arrange to delete semas on exit --- set this up now so that we
* will clean up if pre-allocation fails... * will clean up if pre-allocation fails. We use our own freeproc,
* rather than IpcSemaphoreCreate's removeOnExit option, because
* we don't want to fill up the on_shmem_exit list with a separate
* entry for each semaphore set.
*/ */
on_shmem_exit(ProcFreeAllSemaphores, 0); on_shmem_exit(ProcFreeAllSemaphores, 0);
/* /*
* Pre-create the semaphores for the first maxBackends processes, * Pre-create the semaphores for the first maxBackends processes.
* unless we are running as a standalone backend.
*/ */
if (key != PrivateIPCKey) Assert(maxBackends > 0 && maxBackends <= MAXBACKENDS);
for (i = 0; i < ((maxBackends-1)/PROC_NSEMS_PER_SET+1); i++)
{ {
for (i = 0; IpcSemaphoreId semId;
i < (maxBackends + PROC_NSEMS_PER_SET - 1) / PROC_NSEMS_PER_SET;
i++) semId = IpcSemaphoreCreate(PROC_NSEMS_PER_SET,
{ IPCProtection,
IPCKey semKey = ProcGlobal->currKey + i; 1,
int semId; false);
ProcGlobal->procSemIds[i] = semId;
semId = IpcSemaphoreCreate(semKey,
PROC_NSEMS_PER_SET,
IPCProtection,
IpcSemaphoreDefaultStartValue,
0);
if (semId < 0)
elog(FATAL, "InitProcGlobal: IpcSemaphoreCreate failed");
/* mark this sema set allocated */
ProcGlobal->freeSemMap[i] = (1 << PROC_NSEMS_PER_SET);
}
} }
} }
} }
...@@ -178,7 +172,7 @@ InitProcGlobal(IPCKey key, int maxBackends) ...@@ -178,7 +172,7 @@ InitProcGlobal(IPCKey key, int maxBackends)
* ------------------------ * ------------------------
*/ */
void void
InitProcess(IPCKey key) InitProcess(void)
{ {
bool found = false; bool found = false;
unsigned long location, unsigned long location,
...@@ -186,7 +180,7 @@ InitProcess(IPCKey key) ...@@ -186,7 +180,7 @@ InitProcess(IPCKey key)
SpinAcquire(ProcStructLock); SpinAcquire(ProcStructLock);
/* attach to the free list */ /* attach to the ProcGlobal structure */
ProcGlobal = (PROC_HDR *) ProcGlobal = (PROC_HDR *)
ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found); ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
if (!found) if (!found)
...@@ -199,10 +193,9 @@ InitProcess(IPCKey key) ...@@ -199,10 +193,9 @@ InitProcess(IPCKey key)
{ {
SpinRelease(ProcStructLock); SpinRelease(ProcStructLock);
elog(ERROR, "ProcInit: you already exist"); elog(ERROR, "ProcInit: you already exist");
return;
} }
/* try to get a proc from the free list first */ /* try to get a proc struct from the free list first */
myOffset = ProcGlobal->freeProcs; myOffset = ProcGlobal->freeProcs;
...@@ -243,36 +236,22 @@ InitProcess(IPCKey key) ...@@ -243,36 +236,22 @@ InitProcess(IPCKey key)
if (IsUnderPostmaster) if (IsUnderPostmaster)
{ {
IPCKey semKey; IpcSemaphoreId semId;
int semNum; int semNum;
int semId; union semun semun;
union semun semun;
ProcGetNewSemKeyAndNum(&semKey, &semNum);
/* ProcGetNewSemIdAndNum(&semId, &semNum);
* Note: because of the pre-allocation done in InitProcGlobal,
* this call should always attach to an existing semaphore. It
* will (try to) create a new group of semaphores only if the
* postmaster tries to start more backends than it said it would.
*/
semId = IpcSemaphoreCreate(semKey,
PROC_NSEMS_PER_SET,
IPCProtection,
IpcSemaphoreDefaultStartValue,
0);
/* /*
* we might be reusing a semaphore that belongs to a dead backend. * we might be reusing a semaphore that belongs to a dead backend.
* So be careful and reinitialize its value here. * So be careful and reinitialize its value here.
*/ */
semun.val = IpcSemaphoreDefaultStartValue; semun.val = 1;
semctl(semId, semNum, SETVAL, semun); semctl(semId, semNum, SETVAL, semun);
IpcSemaphoreLock(semId, semNum, IpcExclusiveLock); IpcSemaphoreLock(semId, semNum);
MyProc->sem.semId = semId; MyProc->sem.semId = semId;
MyProc->sem.semNum = semNum; MyProc->sem.semNum = semNum;
MyProc->sem.semKey = semKey;
} }
else else
MyProc->sem.semId = -1; MyProc->sem.semId = -1;
...@@ -304,7 +283,7 @@ InitProcess(IPCKey key) ...@@ -304,7 +283,7 @@ InitProcess(IPCKey key)
*/ */
location = MAKE_OFFSET(MyProc); location = MAKE_OFFSET(MyProc);
if ((!ShmemPIDLookup(MyProcPid, &location)) || (location != MAKE_OFFSET(MyProc))) if ((!ShmemPIDLookup(MyProcPid, &location)) || (location != MAKE_OFFSET(MyProc)))
elog(STOP, "InitProc: ShmemPID table broken"); elog(STOP, "InitProcess: ShmemPID table broken");
MyProc->errType = NO_ERROR; MyProc->errType = NO_ERROR;
SHMQueueElemInit(&(MyProc->links)); SHMQueueElemInit(&(MyProc->links));
...@@ -363,10 +342,7 @@ ProcReleaseLocks() ...@@ -363,10 +342,7 @@ ProcReleaseLocks()
/* /*
* ProcRemove - * ProcRemove -
* used by the postmaster to clean up the global tables. This also frees * used by the postmaster to clean up the global tables. This also frees
* up the semaphore used for the lmgr of the process. (We have to do * up the semaphore used for the lmgr of the process.
* this is the postmaster instead of doing a IpcSemaphoreKill on exiting
* the process because the semaphore set is shared among backends and
* we don't want to remove other's semaphores on exit.)
*/ */
bool bool
ProcRemove(int pid) ProcRemove(int pid)
...@@ -383,7 +359,7 @@ ProcRemove(int pid) ...@@ -383,7 +359,7 @@ ProcRemove(int pid)
SpinAcquire(ProcStructLock); SpinAcquire(ProcStructLock);
ProcFreeSem(proc->sem.semKey, proc->sem.semNum); ProcFreeSem(proc->sem.semId, proc->sem.semNum);
proc->links.next = ProcGlobal->freeProcs; proc->links.next = ProcGlobal->freeProcs;
ProcGlobal->freeProcs = MAKE_OFFSET(proc); ProcGlobal->freeProcs = MAKE_OFFSET(proc);
...@@ -490,6 +466,7 @@ ProcQueueInit(PROC_QUEUE *queue) ...@@ -490,6 +466,7 @@ ProcQueueInit(PROC_QUEUE *queue)
* *
*/ */
static bool lockWaiting = false; static bool lockWaiting = false;
void void
SetWaitingForLock(bool waiting) SetWaitingForLock(bool waiting)
{ {
...@@ -514,12 +491,12 @@ SetWaitingForLock(bool waiting) ...@@ -514,12 +491,12 @@ SetWaitingForLock(bool waiting)
} }
} }
} }
void void
LockWaitCancel(void) LockWaitCancel(void)
{ {
/* BeOS doesn't have setitimer, but has set_alarm */
#ifndef __BEOS__ #ifndef __BEOS__
struct itimerval timeval, struct itimerval timeval,
dummy; dummy;
if (!lockWaiting) if (!lockWaiting)
...@@ -529,6 +506,7 @@ struct itimerval timeval, ...@@ -529,6 +506,7 @@ struct itimerval timeval,
MemSet(&timeval, 0, sizeof(struct itimerval)); MemSet(&timeval, 0, sizeof(struct itimerval));
setitimer(ITIMER_REAL, &timeval, &dummy); setitimer(ITIMER_REAL, &timeval, &dummy);
#else #else
/* BeOS doesn't have setitimer, but has set_alarm */
if (!lockWaiting) if (!lockWaiting)
return; return;
lockWaiting = false; lockWaiting = false;
...@@ -547,6 +525,8 @@ struct itimerval timeval, ...@@ -547,6 +525,8 @@ struct itimerval timeval,
* semaphore is cleared by default, so the first time we try * semaphore is cleared by default, so the first time we try
* to acquire it, we sleep. * to acquire it, we sleep.
* *
* Result is NO_ERROR if we acquired the lock, STATUS_ERROR if not (deadlock).
*
* ASSUME: that no one will fiddle with the queue until after * ASSUME: that no one will fiddle with the queue until after
* we release the spin lock. * we release the spin lock.
* *
...@@ -566,7 +546,6 @@ ProcSleep(PROC_QUEUE *waitQueue,/* lock->waitProcs */ ...@@ -566,7 +546,6 @@ ProcSleep(PROC_QUEUE *waitQueue,/* lock->waitProcs */
int aheadHolders[MAX_LOCKMODES]; int aheadHolders[MAX_LOCKMODES];
bool selfConflict = (lockctl->conflictTab[token] & myMask), bool selfConflict = (lockctl->conflictTab[token] & myMask),
prevSame = false; prevSame = false;
bool deadlock_checked = false;
#ifndef __BEOS__ #ifndef __BEOS__
struct itimerval timeval, struct itimerval timeval,
dummy; dummy;
...@@ -595,8 +574,8 @@ ProcSleep(PROC_QUEUE *waitQueue,/* lock->waitProcs */ ...@@ -595,8 +574,8 @@ ProcSleep(PROC_QUEUE *waitQueue,/* lock->waitProcs */
/* is he waiting for me ? */ /* is he waiting for me ? */
if (lockctl->conflictTab[proc->token] & MyProc->holdLock) if (lockctl->conflictTab[proc->token] & MyProc->holdLock)
{ {
/* Yes, report deadlock failure */
MyProc->errType = STATUS_ERROR; MyProc->errType = STATUS_ERROR;
elog(NOTICE, DeadLockMessage);
goto rt; goto rt;
} }
/* being waiting for him - go past */ /* being waiting for him - go past */
...@@ -642,10 +621,16 @@ ins:; ...@@ -642,10 +621,16 @@ ins:;
lock->waitMask |= myMask; lock->waitMask |= myMask;
SpinRelease(spinlock); SpinRelease(spinlock);
MyProc->errType = NO_ERROR; /* initialize result for success */
/* -------------- /* --------------
* We set this so we can wake up periodically and check for a deadlock. * Set timer so we can wake up after awhile and check for a deadlock.
* If a deadlock is detected, the handler releases the processes * If a deadlock is detected, the handler releases the process's
* semaphore and aborts the current transaction. * semaphore and sets MyProc->errType = STATUS_ERROR, allowing us to
* know that we must report failure rather than success.
*
* By delaying the check until we've waited for a bit, we can avoid
* running the rather expensive deadlock-check code in most cases.
* *
* Need to zero out struct to set the interval and the micro seconds fields * Need to zero out struct to set the interval and the micro seconds fields
* to 0. * to 0.
...@@ -655,49 +640,42 @@ ins:; ...@@ -655,49 +640,42 @@ ins:;
MemSet(&timeval, 0, sizeof(struct itimerval)); MemSet(&timeval, 0, sizeof(struct itimerval));
timeval.it_value.tv_sec = DeadlockTimeout / 1000; timeval.it_value.tv_sec = DeadlockTimeout / 1000;
timeval.it_value.tv_usec = (DeadlockTimeout % 1000) * 1000; timeval.it_value.tv_usec = (DeadlockTimeout % 1000) * 1000;
if (setitimer(ITIMER_REAL, &timeval, &dummy))
elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
#else #else
/* usecs */ time_interval = DeadlockTimeout * 1000000; /* usecs */
time_interval = DeadlockTimeout * 1000000; if (set_alarm(time_interval, B_ONE_SHOT_RELATIVE_ALARM) < 0)
elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
#endif #endif
SetWaitingForLock(true); SetWaitingForLock(true);
do
{
MyProc->errType = NO_ERROR; /* reset flag after deadlock check */
if (!deadlock_checked) /* --------------
#ifndef __BEOS__ * If someone wakes us between SpinRelease and IpcSemaphoreLock,
if (setitimer(ITIMER_REAL, &timeval, &dummy)) * IpcSemaphoreLock will not block. The wakeup is "saved" by
#else * the semaphore implementation. Note also that if HandleDeadLock
if (set_alarm(time_interval, B_ONE_SHOT_RELATIVE_ALARM) < 0) * is invoked but does not detect a deadlock, IpcSemaphoreLock()
#endif * will continue to wait. There used to be a loop here, but it
elog(FATAL, "ProcSleep: Unable to set timer for process wakeup"); * was useless code...
deadlock_checked = true; * --------------
*/
/* -------------- IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum);
* if someone wakes us between SpinRelease and IpcSemaphoreLock,
* IpcSemaphoreLock will not block. The wakeup is "saved" by
* the semaphore implementation.
* --------------
*/
IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum,
IpcExclusiveLock);
} while (MyProc->errType == STATUS_NOT_FOUND); /* sleep after deadlock
* check */
lockWaiting = false; lockWaiting = false;
/* --------------- /* ---------------
* We were awoken before a timeout - now disable the timer * Disable the timer, if it's still running
* --------------- * ---------------
*/ */
#ifndef __BEOS__ #ifndef __BEOS__
timeval.it_value.tv_sec = 0; timeval.it_value.tv_sec = 0;
timeval.it_value.tv_usec = 0; timeval.it_value.tv_usec = 0;
if (setitimer(ITIMER_REAL, &timeval, &dummy)) if (setitimer(ITIMER_REAL, &timeval, &dummy))
elog(FATAL, "ProcSleep: Unable to disable timer for process wakeup");
#else #else
if (set_alarm(B_INFINITE_TIMEOUT, B_PERIODIC_ALARM) < 0) if (set_alarm(B_INFINITE_TIMEOUT, B_PERIODIC_ALARM) < 0)
elog(FATAL, "ProcSleep: Unable to disable timer for process wakeup");
#endif #endif
elog(FATAL, "ProcSleep: Unable to diable timer for process wakeup");
/* ---------------- /* ----------------
* We were assumed to be in a critical section when we went * We were assumed to be in a critical section when we went
...@@ -742,7 +720,7 @@ ProcWakeup(PROC *proc, int errType) ...@@ -742,7 +720,7 @@ ProcWakeup(PROC *proc, int errType)
proc->errType = errType; proc->errType = errType;
IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum, IpcExclusiveLock); IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
return retProc; return retProc;
} }
...@@ -855,27 +833,11 @@ HandleDeadLock(SIGNAL_ARGS) ...@@ -855,27 +833,11 @@ HandleDeadLock(SIGNAL_ARGS)
* Before we are awoken the process releasing the lock grants it to * Before we are awoken the process releasing the lock grants it to
* us so we know that we don't have to wait anymore. * us so we know that we don't have to wait anymore.
* *
* Damn these names are LONG! -mer * We check by looking to see if we've been unlinked from the wait queue.
* This is quicker than checking our semaphore's state, since no kernel
* call is needed, and it is safe because we hold the locktable lock.
* --------------------- * ---------------------
*/ */
if (IpcSemaphoreGetCount(MyProc->sem.semId, MyProc->sem.semNum) ==
IpcSemaphoreDefaultStartValue)
{
UnlockLockTable();
return;
}
/*
* you would think this would be unnecessary, but...
*
* this also means we've been removed already. in some ports (e.g.,
* sparc and aix) the semop(2) implementation is such that we can
* actually end up in this handler after someone has removed us from
* the queue and bopped the semaphore *but the test above fails to
* detect the semaphore update* (presumably something weird having to
* do with the order in which the semaphore wakeup signal and SIGALRM
* get handled).
*/
if (MyProc->links.prev == INVALID_OFFSET || if (MyProc->links.prev == INVALID_OFFSET ||
MyProc->links.next == INVALID_OFFSET) MyProc->links.next == INVALID_OFFSET)
{ {
...@@ -888,19 +850,18 @@ HandleDeadLock(SIGNAL_ARGS) ...@@ -888,19 +850,18 @@ HandleDeadLock(SIGNAL_ARGS)
DumpAllLocks(); DumpAllLocks();
#endif #endif
MyProc->errType = STATUS_NOT_FOUND;
if (!DeadLockCheck(MyProc, MyProc->waitLock)) if (!DeadLockCheck(MyProc, MyProc->waitLock))
{ {
/* No deadlock, so keep waiting */
UnlockLockTable(); UnlockLockTable();
return; return;
} }
mywaitlock = MyProc->waitLock;
/* ------------------------ /* ------------------------
* Get this process off the lock's wait queue * Get this process off the lock's wait queue
* ------------------------ * ------------------------
*/ */
mywaitlock = MyProc->waitLock;
Assert(mywaitlock->waitProcs.size > 0); Assert(mywaitlock->waitProcs.size > 0);
lockWaiting = false; lockWaiting = false;
--mywaitlock->waitProcs.size; --mywaitlock->waitProcs.size;
...@@ -908,12 +869,10 @@ HandleDeadLock(SIGNAL_ARGS) ...@@ -908,12 +869,10 @@ HandleDeadLock(SIGNAL_ARGS)
SHMQueueElemInit(&(MyProc->links)); SHMQueueElemInit(&(MyProc->links));
/* ------------------ /* ------------------
* Unlock my semaphore so that the count is right for next time. * Unlock my semaphore so that the interrupted ProcSleep() call can finish.
* I was awoken by a signal, not by someone unlocking my semaphore.
* ------------------ * ------------------
*/ */
IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum, IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum);
IpcExclusiveLock);
/* ------------- /* -------------
* Set MyProc->errType to STATUS_ERROR so that we abort after * Set MyProc->errType to STATUS_ERROR so that we abort after
...@@ -928,9 +887,6 @@ HandleDeadLock(SIGNAL_ARGS) ...@@ -928,9 +887,6 @@ HandleDeadLock(SIGNAL_ARGS)
* conditions. i don't claim to understand this... * conditions. i don't claim to understand this...
*/ */
UnlockLockTable(); UnlockLockTable();
elog(NOTICE, DeadLockMessage);
return;
} }
void void
...@@ -959,31 +915,32 @@ ProcReleaseSpins(PROC *proc) ...@@ -959,31 +915,32 @@ ProcReleaseSpins(PROC *proc)
*****************************************************************************/ *****************************************************************************/
/* /*
* ProcGetNewSemKeyAndNum - * ProcGetNewSemIdAndNum -
* scan the free semaphore bitmap and allocate a single semaphore from * scan the free semaphore bitmap and allocate a single semaphore from
* a semaphore set. (If the semaphore set doesn't exist yet, * a semaphore set.
* IpcSemaphoreCreate will create it. Otherwise, we use the existing
* semaphore set.)
*/ */
static void static void
ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum) ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum)
{ {
int i; int i;
IpcSemaphoreId *procSemIds = ProcGlobal->procSemIds;
int32 *freeSemMap = ProcGlobal->freeSemMap; int32 *freeSemMap = ProcGlobal->freeSemMap;
int32 fullmask = (1 << (PROC_NSEMS_PER_SET + 1)) - 1; int32 fullmask = (1 << PROC_NSEMS_PER_SET) - 1;
/* /*
* we hold ProcStructLock when entering this routine. We scan through * we hold ProcStructLock when entering this routine. We scan through
* the bitmap to look for a free semaphore. * the bitmap to look for a free semaphore.
*/ */
for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++) for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
{ {
int mask = 1; int mask = 1;
int j; int j;
if (freeSemMap[i] == fullmask) if (freeSemMap[i] == fullmask)
continue; /* this set is fully allocated */ continue; /* this set is fully allocated */
if (procSemIds[i] < 0)
continue; /* this set hasn't been initialized */
for (j = 0; j < PROC_NSEMS_PER_SET; j++) for (j = 0; j < PROC_NSEMS_PER_SET; j++)
{ {
...@@ -991,12 +948,11 @@ ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum) ...@@ -991,12 +948,11 @@ ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum)
{ {
/* /*
* a free semaphore found. Mark it as allocated. Also set * a free semaphore found. Mark it as allocated.
* the bit indicating whole set is allocated.
*/ */
freeSemMap[i] |= mask + (1 << PROC_NSEMS_PER_SET); freeSemMap[i] |= mask;
*key = ProcGlobal->currKey + i; *semId = procSemIds[i];
*semNum = j; *semNum = j;
return; return;
} }
...@@ -1005,7 +961,7 @@ ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum) ...@@ -1005,7 +961,7 @@ ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum)
} }
/* if we reach here, all the semaphores are in use. */ /* if we reach here, all the semaphores are in use. */
elog(ERROR, "InitProc: cannot allocate a free semaphore"); elog(ERROR, "ProcGetNewSemIdAndNum: cannot allocate a free semaphore");
} }
/* /*
...@@ -1013,23 +969,22 @@ ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum) ...@@ -1013,23 +969,22 @@ ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum)
* free up our semaphore in the semaphore set. * free up our semaphore in the semaphore set.
*/ */
static void static void
ProcFreeSem(IpcSemaphoreKey semKey, int semNum) ProcFreeSem(IpcSemaphoreId semId, int semNum)
{ {
int mask; int32 mask;
int i; int i;
int32 *freeSemMap = ProcGlobal->freeSemMap;
i = semKey - ProcGlobal->currKey;
mask = ~(1 << semNum); mask = ~(1 << semNum);
freeSemMap[i] &= mask;
/* for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
* Formerly we'd release a semaphore set if it was now completely {
* unused, but now we keep the semaphores to ensure we won't run out if (ProcGlobal->procSemIds[i] == semId)
* when starting new backends --- cf. InitProcGlobal. Note that the {
* PROC_NSEMS_PER_SET+1'st bit of the freeSemMap entry remains set to ProcGlobal->freeSemMap[i] &= mask;
* indicate it is still allocated; ProcFreeAllSemaphores() needs that. return;
*/ }
}
fprintf(stderr, "ProcFreeSem: no ProcGlobal entry for semId %d\n", semId);
} }
/* /*
...@@ -1039,14 +994,13 @@ ProcFreeSem(IpcSemaphoreKey semKey, int semNum) ...@@ -1039,14 +994,13 @@ ProcFreeSem(IpcSemaphoreKey semKey, int semNum)
* Free up all the semaphores allocated to the lmgrs of the backends. * Free up all the semaphores allocated to the lmgrs of the backends.
*/ */
static void static void
ProcFreeAllSemaphores() ProcFreeAllSemaphores(void)
{ {
int i; int i;
int32 *freeSemMap = ProcGlobal->freeSemMap;
for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++) for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
{ {
if (freeSemMap[i] != 0) if (ProcGlobal->procSemIds[i] >= 0)
IpcSemaphoreKill(ProcGlobal->currKey + i); IpcSemaphoreKill(ProcGlobal->procSemIds[i]);
} }
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.72 2000/11/16 22:30:39 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.73 2000/11/28 23:27:57 tgl Exp $
* *
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
...@@ -45,7 +45,6 @@ ...@@ -45,7 +45,6 @@
static void ReverifyMyDatabase(const char *name); static void ReverifyMyDatabase(const char *name);
static void InitCommunication(void); static void InitCommunication(void);
static IPCKey PostgresIpcKey;
/*** InitPostgres support ***/ /*** InitPostgres support ***/
...@@ -141,7 +140,7 @@ ReverifyMyDatabase(const char *name) ...@@ -141,7 +140,7 @@ ReverifyMyDatabase(const char *name)
* -------------------------------- * --------------------------------
*/ */
static void static void
InitCommunication() InitCommunication(void)
{ {
/* ---------------- /* ----------------
* initialize shared memory and semaphores appropriately. * initialize shared memory and semaphores appropriately.
...@@ -151,26 +150,11 @@ InitCommunication() ...@@ -151,26 +150,11 @@ InitCommunication()
{ {
/* ---------------- /* ----------------
* we're running a postgres backend by itself with * we're running a postgres backend by itself with
* no front end or postmaster. * no front end or postmaster. Create private "shmem"
* and semaphores. Setting MaxBackends = 16 is arbitrary.
* ---------------- * ----------------
*/ */
char *ipc_key; /* value of environment variable */ CreateSharedMemoryAndSemaphores(true, 16);
IPCKey key;
ipc_key = getenv("IPC_KEY");
if (!PointerIsValid(ipc_key))
{
/* Normal standalone backend */
key = PrivateIPCKey;
}
else
{
/* Allow standalone's IPC key to be set */
key = atoi(ipc_key);
}
PostgresIpcKey = key;
AttachSharedMemoryAndSemaphores(key);
} }
} }
...@@ -295,7 +279,7 @@ InitPostgres(const char *dbname, const char *username) ...@@ -295,7 +279,7 @@ InitPostgres(const char *dbname, const char *username)
/* /*
* Set up my per-backend PROC struct in shared memory. * Set up my per-backend PROC struct in shared memory.
*/ */
InitProcess(PostgresIpcKey); InitProcess();
/* /*
* Initialize my entry in the shared-invalidation manager's array of * Initialize my entry in the shared-invalidation manager's array of
...@@ -307,7 +291,7 @@ InitPostgres(const char *dbname, const char *username) ...@@ -307,7 +291,7 @@ InitPostgres(const char *dbname, const char *username)
*/ */
MyBackendId = InvalidBackendId; MyBackendId = InvalidBackendId;
InitSharedInvalidationState(); InitBackendSharedInvalidationState();
if (MyBackendId > MAXBACKENDS || MyBackendId <= 0) if (MyBackendId > MAXBACKENDS || MyBackendId <= 0)
elog(FATAL, "cinit2: bad backend id %d", MyBackendId); elog(FATAL, "cinit2: bad backend id %d", MyBackendId);
...@@ -365,11 +349,11 @@ BaseInit(void) ...@@ -365,11 +349,11 @@ BaseInit(void)
*/ */
InitCommunication(); InitCommunication();
DebugFileOpen(); DebugFileOpen();
smgrinit(); smgrinit();
EnablePortalManager(); /* memory for portal/transaction stuff */ EnablePortalManager(); /* memory for portal/transaction stuff */
/* initialize the local buffer manager */ /* initialize the local buffer manager */
InitLocalBuffer(); InitLocalBuffer();
} }
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: buf_internals.h,v 1.43 2000/11/08 22:10:02 tgl Exp $ * $Id: buf_internals.h,v 1.44 2000/11/28 23:27:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "storage/buf.h" #include "storage/buf.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/s_lock.h"
/* Buf Mgr constants */ /* Buf Mgr constants */
/* in bufmgr.c */ /* in bufmgr.c */
...@@ -100,11 +101,9 @@ typedef struct sbufdesc ...@@ -100,11 +101,9 @@ typedef struct sbufdesc
BufFlags flags; /* see bit definitions above */ BufFlags flags; /* see bit definitions above */
unsigned refcount; /* # of times buffer is pinned */ unsigned refcount; /* # of times buffer is pinned */
#ifdef HAS_TEST_AND_SET slock_t io_in_progress_lock; /* to block for I/O to complete */
/* can afford a dedicated lock if test-and-set locks are available */
slock_t io_in_progress_lock;
slock_t cntx_lock; /* to lock access to page context */ slock_t cntx_lock; /* to lock access to page context */
#endif /* HAS_TEST_AND_SET */
unsigned r_locks; /* # of shared locks */ unsigned r_locks; /* # of shared locks */
bool ri_lock; /* read-intent lock */ bool ri_lock; /* read-intent lock */
bool w_lock; /* context exclusively locked */ bool w_lock; /* context exclusively locked */
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: bufmgr.h,v 1.43 2000/11/08 22:10:02 tgl Exp $ * $Id: bufmgr.h,v 1.44 2000/11/28 23:27:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -154,7 +154,7 @@ extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, ...@@ -154,7 +154,7 @@ extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
BlockNumber blockNum); BlockNumber blockNum);
extern int FlushBuffer(Buffer buffer, bool sync, bool release); extern int FlushBuffer(Buffer buffer, bool sync, bool release);
extern void InitBufferPool(IPCKey key); extern void InitBufferPool(void);
extern void PrintBufferUsage(FILE *statfp); extern void PrintBufferUsage(FILE *statfp);
extern void ResetBufferUsage(void); extern void ResetBufferUsage(void);
extern void ResetBufferPool(bool isCommit); extern void ResetBufferPool(bool isCommit);
......
...@@ -7,14 +7,10 @@ ...@@ -7,14 +7,10 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: ipc.h,v 1.42 2000/10/07 14:39:17 momjian Exp $ * $Id: ipc.h,v 1.43 2000/11/28 23:27:57 tgl Exp $
*
* NOTES
* This file is very architecture-specific. This stuff should actually
* be factored into the port/ directories.
* *
* Some files that would normally need to include only sys/ipc.h must * Some files that would normally need to include only sys/ipc.h must
* instead included this file because on Ultrix, sys/ipc.h is not designed * instead include this file because on Ultrix, sys/ipc.h is not designed
* to be included multiple times. This file (by virtue of the ifndef IPC_H) * to be included multiple times. This file (by virtue of the ifndef IPC_H)
* is. * is.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
...@@ -26,11 +22,9 @@ ...@@ -26,11 +22,9 @@
#include <sys/types.h> #include <sys/types.h>
#ifdef HAVE_SYS_IPC_H #ifdef HAVE_SYS_IPC_H
#include <sys/ipc.h> /* For IPC_PRIVATE */ #include <sys/ipc.h>
#endif /* HAVE_SYS_IPC_H */ #endif /* HAVE_SYS_IPC_H */
#include "config.h"
#ifndef HAVE_UNION_SEMUN #ifndef HAVE_UNION_SEMUN
union semun union semun
{ {
...@@ -38,79 +32,41 @@ union semun ...@@ -38,79 +32,41 @@ union semun
struct semid_ds *buf; struct semid_ds *buf;
unsigned short *array; unsigned short *array;
}; };
#endif #endif
typedef uint16 SystemPortAddress; /* generic IPC definitions */
/* semaphore definitions */
#define IPCProtection (0600) /* access/modify by user only */ #define IPCProtection (0600) /* access/modify by user only */
#define IPC_NMAXSEM 25 /* maximum number of semaphores */ /* semaphore definitions */
#define IpcSemaphoreDefaultStartValue 255
#define IpcSharedLock (-1)
#define IpcExclusiveLock (-255)
#define IpcUnknownStatus (-1)
#define IpcInvalidArgument (-2)
#define IpcSemIdExist (-3)
#define IpcSemIdNotExist (-4)
typedef uint32 IpcSemaphoreKey; /* semaphore key */
typedef int IpcSemaphoreId;
/* shared memory definitions */
#define IpcMemCreationFailed (-1)
#define IpcMemIdGetFailed (-2)
#define IpcMemAttachFailed 0
typedef uint32 IPCKey;
#define PrivateIPCKey IPC_PRIVATE
#define DefaultIPCKey 17317
typedef uint32 IpcMemoryKey; /* shared memory key */ typedef uint32 IpcSemaphoreKey; /* semaphore key passed to semget(2) */
typedef int IpcMemoryId; typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
#define IPC_NMAXSEM 32 /* maximum number of semaphores per semID */
/* ipc.c */ #define PGSemaMagic 537 /* must be less than SEMVMX */
extern bool proc_exit_inprogress;
extern void proc_exit(int code); /* shared memory definitions */
extern void shmem_exit(int code);
extern int on_shmem_exit(void (*function) (), Datum arg);
extern int on_proc_exit(void (*function) (), Datum arg);
extern void on_exit_reset(void);
extern IpcSemaphoreId IpcSemaphoreCreate(IpcSemaphoreKey semKey, typedef uint32 IpcMemoryKey; /* shared memory key passed to shmget(2) */
int semNum, int permission, int semStartValue, typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
int removeOnExit);
extern void IpcSemaphoreKill(IpcSemaphoreKey key);
extern void IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock);
extern void IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock);
extern int IpcSemaphoreGetCount(IpcSemaphoreId semId, int sem);
extern int IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem);
extern IpcMemoryId IpcMemoryCreate(IpcMemoryKey memKey, uint32 size,
int permission);
extern IpcMemoryId IpcMemoryIdGet(IpcMemoryKey memKey, uint32 size);
extern char *IpcMemoryAttach(IpcMemoryId memId);
extern void IpcMemoryKill(IpcMemoryKey memKey);
extern void CreateAndInitSLockMemory(IPCKey key);
extern void AttachSLockMemory(IPCKey key);
typedef struct /* standard header for all Postgres shmem */
{
int32 magic; /* magic # to identify Postgres segments */
#define PGShmemMagic 679834892
pid_t creatorPID; /* PID of creating process */
uint32 totalsize; /* total size of segment */
uint32 freeoffset; /* offset to first free space */
} PGShmemHeader;
#ifdef HAS_TEST_AND_SET
#define NOLOCK 0 /* spinlock definitions */
#define SHAREDLOCK 1
#define EXCLUSIVELOCK 2
typedef enum _LockId_ typedef enum _LockId_
{ {
BUFMGRLOCKID, BUFMGRLOCKID,
LOCKLOCKID,
OIDGENLOCKID, OIDGENLOCKID,
XIDGENLOCKID, XIDGENLOCKID,
CNTLFILELOCKID, CNTLFILELOCKID,
...@@ -118,100 +74,40 @@ typedef enum _LockId_ ...@@ -118,100 +74,40 @@ typedef enum _LockId_
SHMEMINDEXLOCKID, SHMEMINDEXLOCKID,
LOCKMGRLOCKID, LOCKMGRLOCKID,
SINVALLOCKID, SINVALLOCKID,
#ifdef STABLE_MEMORY_STORAGE
MMCACHELOCKID,
#endif
PROCSTRUCTLOCKID, PROCSTRUCTLOCKID,
FIRSTFREELOCKID
} _LockId_;
#define MAX_SPINS FIRSTFREELOCKID
typedef struct slock
{
slock_t locklock;
unsigned char flag;
short nshlocks;
slock_t shlock;
slock_t exlock;
slock_t comlock;
struct slock *next;
} SLock;
#else /* HAS_TEST_AND_SET */
typedef enum _LockId_
{
SHMEMLOCKID,
SHMEMINDEXLOCKID,
BUFMGRLOCKID,
LOCKMGRLOCKID,
SINVALLOCKID,
#ifdef STABLE_MEMORY_STORAGE #ifdef STABLE_MEMORY_STORAGE
MMCACHELOCKID, MMCACHELOCKID,
#endif #endif
PROCSTRUCTLOCKID, MAX_SPINS /* must be last item! */
OIDGENLOCKID,
XIDGENLOCKID,
CNTLFILELOCKID,
FIRSTFREELOCKID
} _LockId_; } _LockId_;
#define MAX_SPINS FIRSTFREELOCKID
#endif /* HAS_TEST_AND_SET */ /* ipc.c */
extern bool proc_exit_inprogress;
/* extern void proc_exit(int code);
* the following are originally in ipci.h but the prototypes have circular extern void shmem_exit(int code);
* dependencies and most files include both ipci.h and ipc.h anyway, hence extern void on_proc_exit(void (*function) (), Datum arg);
* combined. extern void on_shmem_exit(void (*function) (), Datum arg);
* extern void on_exit_reset(void);
*/
/* extern void IpcInitKeyAssignment(int port);
* Note:
* These must not hash to DefaultIPCKey or PrivateIPCKey.
*/
#define SystemPortAddressGetIPCKey(address) \
(28597 * (address) + 17491)
/* extern IpcSemaphoreId IpcSemaphoreCreate(int numSems, int permission,
* these keys are originally numbered from 1 to 12 consecutively but not int semStartValue,
* all are used. The unused ones are removed. - ay 4/95. bool removeOnExit);
*/ extern void IpcSemaphoreKill(IpcSemaphoreId semId);
#define IPCKeyGetBufferMemoryKey(key) \ extern void IpcSemaphoreLock(IpcSemaphoreId semId, int sem);
((key == PrivateIPCKey) ? key : 1 + (key)) extern void IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem);
extern bool IpcSemaphoreTryLock(IpcSemaphoreId semId, int sem);
#define IPCKeyGetSIBufferMemoryBlock(key) \ extern int IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem);
((key == PrivateIPCKey) ? key : 7 + (key))
extern PGShmemHeader *IpcMemoryCreate(uint32 size, bool private,
#define IPCKeyGetSLockSharedMemoryKey(key) \ int permission);
((key == PrivateIPCKey) ? key : 10 + (key))
#define IPCKeyGetSpinLockSemaphoreKey(key) \
((key == PrivateIPCKey) ? key : 11 + (key))
#define IPCKeyGetWaitIOSemaphoreKey(key) \
((key == PrivateIPCKey) ? key : 12 + (key))
#define IPCKeyGetWaitCLSemaphoreKey(key) \
((key == PrivateIPCKey) ? key : 13 + (key))
/* --------------------------
* NOTE: This macro must always give the highest numbered key as every backend
* process forked off by the postmaster will be trying to acquire a semaphore
* with a unique key value starting at key+14 and incrementing up. Each
* backend uses the current key value then increments it by one.
* --------------------------
*/
#define IPCGetProcessSemaphoreInitKey(key) \
((key == PrivateIPCKey) ? key : 14 + (key))
/* ipci.c */ /* ipci.c */
extern IPCKey SystemPortAddressCreateIPCKey(SystemPortAddress address); extern void CreateSharedMemoryAndSemaphores(bool private, int maxBackends);
extern void CreateSharedMemoryAndSemaphores(IPCKey key, int maxBackends);
extern void AttachSharedMemoryAndSemaphores(IPCKey key);
#endif /* IPC_H */ #endif /* IPC_H */
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: lmgr.h,v 1.25 2000/06/08 22:37:54 momjian Exp $ * $Id: lmgr.h,v 1.26 2000/11/28 23:27:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -47,7 +47,4 @@ extern void UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); ...@@ -47,7 +47,4 @@ extern void UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
extern void XactLockTableInsert(TransactionId xid); extern void XactLockTableInsert(TransactionId xid);
extern void XactLockTableWait(TransactionId xid); extern void XactLockTableWait(TransactionId xid);
/* proc.c */
extern void InitProcGlobal(IPCKey key, int maxBackends);
#endif /* LMGR_H */ #endif /* LMGR_H */
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* proc.h * proc.h
* * per-process shared memory data structures
* *
* *
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: proc.h,v 1.31 2000/05/31 00:28:38 petere Exp $ * $Id: proc.h,v 1.32 2000/11/28 23:27:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,9 +23,8 @@ extern int DeadlockTimeout; ...@@ -23,9 +23,8 @@ extern int DeadlockTimeout;
typedef struct typedef struct
{ {
int sleeplock; int sleeplock;
int semNum;
IpcSemaphoreId semId; IpcSemaphoreId semId;
IpcSemaphoreKey semKey; int semNum;
} SEMA; } SEMA;
/* /*
...@@ -33,7 +32,6 @@ typedef struct ...@@ -33,7 +32,6 @@ typedef struct
*/ */
typedef struct proc typedef struct proc
{ {
/* proc->links MUST BE THE FIRST ELEMENT OF STRUCT (see ProcWakeup()) */ /* proc->links MUST BE THE FIRST ELEMENT OF STRUCT (see ProcWakeup()) */
SHM_QUEUE links; /* proc can be waiting for one event(lock) */ SHM_QUEUE links; /* proc can be waiting for one event(lock) */
...@@ -63,34 +61,6 @@ typedef struct proc ...@@ -63,34 +61,6 @@ typedef struct proc
* transaction */ * transaction */
} PROC; } PROC;
/*
* PROC_NSEMS_PER_SET is the number of semaphores in each sys-V semaphore set
* we allocate. It must be *less than* 32 (or however many bits in an int
* on your machine), or our free-semaphores bitmap won't work. You also must
* not set it higher than your kernel's SEMMSL (max semaphores per set)
* parameter, which is often around 25.
*
* MAX_PROC_SEMS is the maximum number of per-process semaphores (those used
* by the lock mgr) we can keep track of. It must be a multiple of
* PROC_NSEMS_PER_SET.
*/
#define PROC_NSEMS_PER_SET 16
#define MAX_PROC_SEMS (((MAXBACKENDS-1)/PROC_NSEMS_PER_SET+1)*PROC_NSEMS_PER_SET)
typedef struct procglobal
{
SHMEM_OFFSET freeProcs;
IPCKey currKey;
int32 freeSemMap[MAX_PROC_SEMS / PROC_NSEMS_PER_SET];
/*
* In each freeSemMap entry, the PROC_NSEMS_PER_SET least-significant
* bits flag whether individual semaphores are in use, and the next
* higher bit is set to show that the entire set is allocated.
*/
} PROC_HDR;
extern PROC *MyProc; extern PROC *MyProc;
#define PROC_INCR_SLOCK(lock) \ #define PROC_INCR_SLOCK(lock) \
...@@ -115,16 +85,46 @@ do { \ ...@@ -115,16 +85,46 @@ do { \
extern SPINLOCK ProcStructLock; extern SPINLOCK ProcStructLock;
/*
* There is one ProcGlobal struct for the whole installation.
*
* PROC_NSEMS_PER_SET is the number of semaphores in each sys-V semaphore set
* we allocate. It must be no more than 32 (or however many bits in an int
* on your machine), or our free-semaphores bitmap won't work. It also must
* be *less than* your kernel's SEMMSL (max semaphores per set) parameter,
* which is often around 25. (Less than, because we allocate one extra sema
* in each set for identification purposes.)
*
* PROC_SEM_MAP_ENTRIES is the number of semaphore sets we need to allocate
* to keep track of up to MAXBACKENDS backends.
*/
#define PROC_NSEMS_PER_SET 16
#define PROC_SEM_MAP_ENTRIES ((MAXBACKENDS-1)/PROC_NSEMS_PER_SET+1)
typedef struct procglobal
{
/* Head of list of free PROC structures */
SHMEM_OFFSET freeProcs;
/* Info about semaphore sets used for per-process semaphores */
IpcSemaphoreId procSemIds[PROC_SEM_MAP_ENTRIES];
int32 freeSemMap[PROC_SEM_MAP_ENTRIES];
/*
* In each freeSemMap entry, bit i is set if the i'th semaphore of the
* set is allocated to a process. (i counts from 0 at the LSB)
*/
} PROC_HDR;
/* /*
* Function Prototypes * Function Prototypes
*/ */
extern void InitProcess(IPCKey key); extern void InitProcGlobal(int maxBackends);
extern void InitProcess(void);
extern void ProcReleaseLocks(void); extern void ProcReleaseLocks(void);
extern bool ProcRemove(int pid); extern bool ProcRemove(int pid);
/* extern bool ProcKill(int exitStatus, int pid); */
/* make static in storage/lmgr/proc.c -- jolly */
extern void ProcQueueInit(PROC_QUEUE *queue); extern void ProcQueueInit(PROC_QUEUE *queue);
extern int ProcSleep(PROC_QUEUE *queue, LOCKMETHODCTL *lockctl, int token, extern int ProcSleep(PROC_QUEUE *queue, LOCKMETHODCTL *lockctl, int token,
LOCK *lock); LOCK *lock);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/include/storage/s_lock.h,v 1.73 2000/10/22 22:15:03 petere Exp $ * $Header: /cvsroot/pgsql/src/include/storage/s_lock.h,v 1.74 2000/11/28 23:27:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
* void S_LOCK_FREE(slock_t *lock) * void S_LOCK_FREE(slock_t *lock)
* Tests if the lock is free. Returns non-zero if free, 0 if locked. * Tests if the lock is free. Returns non-zero if free, 0 if locked.
* *
* The S_LOCK() macro implements a primitive but still useful random * The S_LOCK() macro implements a primitive but still useful random
* backoff to avoid hordes of busywaiting lockers chewing CPU. * backoff to avoid hordes of busywaiting lockers chewing CPU.
* *
* Effectively: * Effectively:
...@@ -64,7 +64,7 @@ ...@@ -64,7 +64,7 @@
* manual for POWER in any case. * manual for POWER in any case.
* *
*/ */
#if !defined(S_LOCK_H) #ifndef S_LOCK_H
#define S_LOCK_H #define S_LOCK_H
#include "storage/ipc.h" #include "storage/ipc.h"
...@@ -403,8 +403,8 @@ extern void s_lock(volatile slock_t *lock, const char *file, const int line); ...@@ -403,8 +403,8 @@ extern void s_lock(volatile slock_t *lock, const char *file, const int line);
#define S_LOCK(lock) \ #define S_LOCK(lock) \
do { \ do { \
if (TAS((volatile slock_t *) lock)) \ if (TAS((volatile slock_t *) (lock))) \
s_lock((volatile slock_t *) lock, __FILE__, __LINE__); \ s_lock((volatile slock_t *) (lock), __FILE__, __LINE__); \
} while (0) } while (0)
#endif /* S_LOCK */ #endif /* S_LOCK */
...@@ -421,12 +421,46 @@ extern void s_lock(volatile slock_t *lock, const char *file, const int line); ...@@ -421,12 +421,46 @@ extern void s_lock(volatile slock_t *lock, const char *file, const int line);
#endif /* S_INIT_LOCK */ #endif /* S_INIT_LOCK */
#if !defined(TAS) #if !defined(TAS)
int tas(volatile slock_t *lock); /* port/.../tas.s, or extern int tas(volatile slock_t *lock); /* port/.../tas.s, or
* s_lock.c */ * s_lock.c */
#define TAS(lock) tas((volatile slock_t *) lock) #define TAS(lock) tas((volatile slock_t *) (lock))
#endif /* TAS */ #endif /* TAS */
#else /* !HAS_TEST_AND_SET */
/*
* Fake spinlock implementation using SysV semaphores --- slow and prone
* to fall foul of kernel limits on number of semaphores, so don't use this
* unless you must!
*/
typedef struct
{
/* reference to semaphore used to implement this spinlock */
IpcSemaphoreId semId;
int sem;
} slock_t;
extern bool s_lock_free_sema(volatile slock_t *lock);
extern void s_unlock_sema(volatile slock_t *lock);
extern void s_init_lock_sema(volatile slock_t *lock);
extern int tas_sema(volatile slock_t *lock);
extern void s_lock(volatile slock_t *lock, const char *file, const int line);
#define S_LOCK(lock) \
do { \
if (TAS((volatile slock_t *) (lock))) \
s_lock((volatile slock_t *) (lock), __FILE__, __LINE__); \
} while (0)
#define S_LOCK_FREE(lock) s_lock_free_sema(lock)
#define S_UNLOCK(lock) s_unlock_sema(lock)
#define S_INIT_LOCK(lock) s_init_lock_sema(lock)
#define TAS(lock) tas_sema(lock)
#endif /* HAS_TEST_AND_SET */ #endif /* HAS_TEST_AND_SET */
#endif /* S_LOCK_H */
#endif /* S_LOCK_H */
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: shmem.h,v 1.23 2000/06/28 03:33:27 tgl Exp $ * $Id: shmem.h,v 1.24 2000/11/28 23:27:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -18,17 +18,23 @@ ...@@ -18,17 +18,23 @@
#include "utils/hsearch.h" #include "utils/hsearch.h"
/* The shared memory region can start at a different address /*
* The shared memory region can start at a different address
* in every process. Shared memory "pointers" are actually * in every process. Shared memory "pointers" are actually
* offsets relative to the start of the shared memory region(s). * offsets relative to the start of the shared memory region(s).
*
* In current usage, this is not actually a problem, but we keep
* the code that used to handle it...
*/ */
typedef unsigned long SHMEM_OFFSET; typedef unsigned long SHMEM_OFFSET;
#define INVALID_OFFSET (-1) #define INVALID_OFFSET (-1)
#define BAD_LOCATION (-1) #define BAD_LOCATION (-1)
/* start of the lowest shared memory region. For now, assume that /*
* there is only one shared memory region * Start of the primary shared memory region, in this process' address space.
* The macros in this header file can only cope with offsets into this
* shared memory region!
*/ */
extern SHMEM_OFFSET ShmemBase; extern SHMEM_OFFSET ShmemBase;
...@@ -39,14 +45,14 @@ extern SHMEM_OFFSET ShmemBase; ...@@ -39,14 +45,14 @@ extern SHMEM_OFFSET ShmemBase;
/* coerce a pointer into a shmem offset */ /* coerce a pointer into a shmem offset */
#define MAKE_OFFSET(xx_ptr)\ #define MAKE_OFFSET(xx_ptr)\
(SHMEM_OFFSET) (((unsigned long)(xx_ptr))-ShmemBase) ((SHMEM_OFFSET) (((unsigned long)(xx_ptr))-ShmemBase))
#define SHM_PTR_VALID(xx_ptr)\ #define SHM_PTR_VALID(xx_ptr)\
(((unsigned long)xx_ptr) > ShmemBase) (((unsigned long)(xx_ptr)) > ShmemBase)
/* cannot have an offset to ShmemFreeStart (offset 0) */ /* cannot have an offset to ShmemFreeStart (offset 0) */
#define SHM_OFFSET_VALID(xx_offs)\ #define SHM_OFFSET_VALID(xx_offs)\
((xx_offs != 0) && (xx_offs != INVALID_OFFSET)) (((xx_offs) != 0) && ((xx_offs) != INVALID_OFFSET))
extern SPINLOCK ShmemLock; extern SPINLOCK ShmemLock;
...@@ -60,11 +66,9 @@ typedef struct SHM_QUEUE ...@@ -60,11 +66,9 @@ typedef struct SHM_QUEUE
} SHM_QUEUE; } SHM_QUEUE;
/* shmem.c */ /* shmem.c */
extern void ShmemIndexReset(void); extern void InitShmemAllocation(PGShmemHeader *seghdr);
extern void ShmemCreate(unsigned int key, unsigned int size);
extern int InitShmem(unsigned int key, unsigned int size);
extern void *ShmemAlloc(Size size); extern void *ShmemAlloc(Size size);
extern int ShmemIsValid(unsigned long addr); extern bool ShmemIsValid(unsigned long addr);
extern HTAB *ShmemInitHash(char *name, long init_size, long max_size, extern HTAB *ShmemInitHash(char *name, long init_size, long max_size,
HASHCTL *infoP, int hash_flags); HASHCTL *infoP, int hash_flags);
extern bool ShmemPIDLookup(int pid, SHMEM_OFFSET *locationPtr); extern bool ShmemPIDLookup(int pid, SHMEM_OFFSET *locationPtr);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: sinval.h,v 1.15 2000/11/12 20:51:52 tgl Exp $ * $Id: sinval.h,v 1.16 2000/11/28 23:27:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
extern SPINLOCK SInvalLock; extern SPINLOCK SInvalLock;
extern void CreateSharedInvalidationState(IPCKey key, int maxBackends); extern int SInvalShmemSize(int maxBackends);
extern void AttachSharedInvalidationState(IPCKey key); extern void CreateSharedInvalidationState(int maxBackends);
extern void InitSharedInvalidationState(void); extern void InitBackendSharedInvalidationState(void);
extern void RegisterSharedInvalid(int cacheId, Index hashIndex, extern void RegisterSharedInvalid(int cacheId, Index hashIndex,
ItemPointer pointer); ItemPointer pointer);
extern void InvalidateSharedInvalid(void (*invalFunction) (), extern void InvalidateSharedInvalid(void (*invalFunction) (),
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: sinvaladt.h,v 1.23 2000/11/12 20:51:52 tgl Exp $ * $Id: sinvaladt.h,v 1.24 2000/11/28 23:27:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -107,15 +107,13 @@ typedef struct SISeg ...@@ -107,15 +107,13 @@ typedef struct SISeg
} SISeg; } SISeg;
extern SISeg *shmInvalBuffer; /* pointer to the shared buffer segment, extern SISeg *shmInvalBuffer; /* pointer to the shared inval buffer */
* set by SISegmentAttach() */
/* /*
* prototypes for functions in sinvaladt.c * prototypes for functions in sinvaladt.c
*/ */
extern int SISegmentInit(bool createNewSegment, IPCKey key, extern void SIBufferInit(int maxBackends);
int maxBackends);
extern int SIBackendInit(SISeg *segP); extern int SIBackendInit(SISeg *segP);
extern bool SIInsertDataEntry(SISeg *segP, SharedInvalidData *data); extern bool SIInsertDataEntry(SISeg *segP, SharedInvalidData *data);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: spin.h,v 1.12 2000/05/31 00:28:38 petere Exp $ * $Id: spin.h,v 1.13 2000/11/28 23:27:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,11 +19,10 @@ ...@@ -19,11 +19,10 @@
/* /*
* two implementations of spin locks * two implementations of spin locks
* *
* sequent, sparc, sun3: real spin locks. uses a TAS instruction; see * Where TAS instruction is available: real spin locks.
* src/storage/ipc/s_lock.c for details. * See src/storage/ipc/s_lock.c for details.
*
* default: fake spin locks using semaphores. see spin.c
* *
* Otherwise: fake spin locks using semaphores. see spin.c
*/ */
typedef int SPINLOCK; typedef int SPINLOCK;
...@@ -32,8 +31,10 @@ typedef int SPINLOCK; ...@@ -32,8 +31,10 @@ typedef int SPINLOCK;
extern bool Trace_spinlocks; extern bool Trace_spinlocks;
#endif #endif
extern void CreateSpinlocks(IPCKey key);
extern void InitSpinLocks(void); extern int SLockShmemSize(void);
extern void CreateSpinlocks(PGShmemHeader *seghdr);
extern void SpinAcquire(SPINLOCK lockid); extern void SpinAcquire(SPINLOCK lockid);
extern void SpinRelease(SPINLOCK lockid); extern void SpinRelease(SPINLOCK lockid);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment