Commit 7f60b81e authored by Tom Lane's avatar Tom Lane

Fix failure in CreateCheckPoint on some Alpha boxes --- it's not OK to

assume that TAS() will always succeed the first time, even if the lock
is known to be free.  Also, make sure that code will eventually time out
and report a stuck spinlock, rather than looping forever.  Small cleanups
in s_lock.h, too.
parent 7d363c4c
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.45 2000/12/28 13:00:08 vadim Exp $
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.46 2000/12/29 21:31:21 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -411,7 +411,7 @@ begin:;
}
}
}
s_lock_sleep(i++);
S_LOCK_SLEEP(&(XLogCtl->insert_lck), i++);
if (!TAS(&(XLogCtl->insert_lck)))
break;
}
......@@ -599,17 +599,10 @@ begin:;
if (updrqst)
{
for (;;)
{
if (!TAS(&(XLogCtl->info_lck)))
{
S_LOCK(&(XLogCtl->info_lck));
if (XLByteLT(XLogCtl->LgwrRqst.Write, LgwrRqst.Write))
XLogCtl->LgwrRqst.Write = LgwrRqst.Write;
S_UNLOCK(&(XLogCtl->info_lck));
break;
}
s_lock_sleep(i++);
}
}
END_CRIT_CODE;
......@@ -622,7 +615,7 @@ XLogFlush(XLogRecPtr record)
XLogRecPtr WriteRqst;
char buffer[BLCKSZ];
char *usebuf = NULL;
unsigned i = 0;
unsigned spins = 0;
bool force_lgwr = false;
if (XLOG_DEBUG)
......@@ -715,7 +708,7 @@ XLogFlush(XLogRecPtr record)
break;
}
}
s_lock_sleep(i++);
S_LOCK_SLEEP(&(XLogCtl->lgwr_lck), spins++);
}
if (logFile >= 0 && (LgwrResult.Write.xlogid != logId ||
......@@ -740,18 +733,12 @@ XLogFlush(XLogRecPtr record)
logId, logSeg);
LgwrResult.Flush = LgwrResult.Write;
for (i = 0;;)
{
if (!TAS(&(XLogCtl->info_lck)))
{
S_LOCK(&(XLogCtl->info_lck));
XLogCtl->LgwrResult = LgwrResult;
if (XLByteLT(XLogCtl->LgwrRqst.Write, LgwrResult.Write))
XLogCtl->LgwrRqst.Write = LgwrResult.Write;
S_UNLOCK(&(XLogCtl->info_lck));
break;
}
s_lock_sleep(i++);
}
XLogCtl->Write.LgwrResult = LgwrResult;
S_UNLOCK(&(XLogCtl->lgwr_lck));
......@@ -767,6 +754,7 @@ GetFreeXLBuffer()
XLogCtlInsert *Insert = &XLogCtl->Insert;
XLogCtlWrite *Write = &XLogCtl->Write;
uint16 curridx = NextBufIdx(Insert->curridx);
unsigned spins = 0;
LgwrRqst.Write = XLogCtl->xlblocks[Insert->curridx];
for (;;)
......@@ -809,9 +797,8 @@ GetFreeXLBuffer()
InitXLBuffer(curridx);
return;
}
S_LOCK_SLEEP(&(XLogCtl->lgwr_lck), spins++);
}
return;
}
static void
......@@ -820,7 +807,6 @@ XLogWrite(char *buffer)
XLogCtlWrite *Write = &XLogCtl->Write;
char *from;
uint32 wcnt = 0;
int i = 0;
bool usexistent;
for (; XLByteLT(LgwrResult.Write, LgwrRqst.Write);)
......@@ -919,18 +905,12 @@ XLogWrite(char *buffer)
LgwrResult.Flush = LgwrResult.Write;
}
for (;;)
{
if (!TAS(&(XLogCtl->info_lck)))
{
S_LOCK(&(XLogCtl->info_lck));
XLogCtl->LgwrResult = LgwrResult;
if (XLByteLT(XLogCtl->LgwrRqst.Write, LgwrResult.Write))
XLogCtl->LgwrRqst.Write = LgwrResult.Write;
S_UNLOCK(&(XLogCtl->info_lck));
break;
}
s_lock_sleep(i++);
}
Write->LgwrResult = LgwrResult;
}
......@@ -2062,18 +2042,17 @@ CreateCheckPoint(bool shutdown)
uint32 _logId;
uint32 _logSeg;
char archdir[MAXPGPATH];
unsigned spins = 0;
if (MyLastRecPtr.xrecoff != 0)
elog(ERROR, "CreateCheckPoint: cannot be called inside transaction block");
START_CRIT_CODE;
/* Grab lock, using larger than normal sleep between tries (1 sec) */
while (TAS(&(XLogCtl->chkp_lck)))
{
struct timeval delay = {2, 0};
if (shutdown)
elog(STOP, "Checkpoint lock is busy while data base is shutting down");
(void) select(0, NULL, NULL, NULL, &delay);
S_LOCK_SLEEP_INTERVAL(&(XLogCtl->chkp_lck), spins++, 1000000);
}
memset(&checkPoint, 0, sizeof(checkPoint));
......@@ -2087,14 +2066,7 @@ CreateCheckPoint(bool shutdown)
checkPoint.Shutdown = shutdown;
/* Get REDO record ptr */
while (TAS(&(XLogCtl->insert_lck)))
{
struct timeval delay = {1, 0};
if (shutdown)
elog(STOP, "XLog insert lock is busy while data base is shutting down");
(void) select(0, NULL, NULL, NULL, &delay);
}
S_LOCK(&(XLogCtl->insert_lck));
freespace = ((char *) Insert->currpage) + BLCKSZ - Insert->currpos;
if (freespace < SizeOfXLogRecord)
{
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.100 2000/12/28 13:00:21 vadim Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.101 2000/12/29 21:31:21 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1987,7 +1987,7 @@ LockBuffer(Buffer buffer, int mode)
while (buf->ri_lock || buf->w_lock)
{
S_UNLOCK(&(buf->cntx_lock));
s_lock_sleep(i++);
S_LOCK_SLEEP(&(buf->cntx_lock), i++);
S_LOCK(&(buf->cntx_lock));
}
(buf->r_locks)++;
......@@ -2013,7 +2013,7 @@ LockBuffer(Buffer buffer, int mode)
buf->ri_lock = true;
}
S_UNLOCK(&(buf->cntx_lock));
s_lock_sleep(i++);
S_LOCK_SLEEP(&(buf->cntx_lock), i++);
S_LOCK(&(buf->cntx_lock));
}
buf->w_lock = true;
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/s_lock.c,v 1.27 2000/12/11 00:49:51 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/s_lock.c,v 1.28 2000/12/29 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -25,19 +25,21 @@
* number of microseconds to wait. This accomplishes pseudo random back-off.
* Values are not critical but 10 milliseconds is a common platform
* granularity.
* note: total time to cycle through all 16 entries might be about .07 sec.
*
* Total time to cycle through all 20 entries might be about .07 sec,
* so the given value of S_MAX_BUSY results in timeout after ~70 sec.
*/
#define S_NSPINCYCLE 20
#define S_MAX_BUSY 1000 * S_NSPINCYCLE
int s_spincycle[S_NSPINCYCLE] =
{0, 0, 0, 0, 10000, 0, 0, 0, 10000, 0,
{ 0, 0, 0, 0, 10000, 0, 0, 0, 10000, 0,
0, 10000, 0, 0, 10000, 0, 10000, 0, 10000, 10000
};
/*
* s_lock_stuck(lock) - complain about a stuck spinlock
* s_lock_stuck() - complain about a stuck spinlock
*/
static void
s_lock_stuck(volatile slock_t *lock, const char *file, const int line)
......@@ -52,13 +54,38 @@ s_lock_stuck(volatile slock_t *lock, const char *file, const int line)
}
/*
* s_lock_sleep() - sleep a pseudo-random amount of time, check for timeout
*
* Normally 'microsec' is 0, specifying to use the next s_spincycle[] value.
* Some callers may pass a nonzero interval, specifying to use exactly that
* delay value rather than a pseudo-random delay.
*/
void
s_lock_sleep(unsigned spin)
s_lock_sleep(unsigned spins, int microsec,
volatile slock_t *lock,
const char *file, const int line)
{
struct timeval delay;
unsigned max_spins;
if (microsec > 0)
{
delay.tv_sec = 0;
delay.tv_usec = s_spincycle[spin % S_NSPINCYCLE];
delay.tv_usec = microsec;
/* two-minute timeout in this case */
max_spins = 120000000 / microsec;
}
else
{
delay.tv_sec = 0;
delay.tv_usec = s_spincycle[spins % S_NSPINCYCLE];
max_spins = S_MAX_BUSY;
}
if (spins > max_spins)
s_lock_stuck(lock, file, line);
(void) select(0, NULL, NULL, NULL, &delay);
}
......@@ -71,14 +98,13 @@ s_lock(volatile slock_t *lock, const char *file, const int line)
{
unsigned spins = 0;
/*
* If you are thinking of changing this code, be careful. This same
* loop logic is used in other places that call TAS() directly.
*/
while (TAS(lock))
{
s_lock_sleep(spins);
if (++spins > S_MAX_BUSY)
{
/* It's been over a minute... */
s_lock_stuck(lock, file, line);
}
s_lock_sleep(spins++, 0, lock, file, line);
}
}
......
/*-------------------------------------------------------------------------
*
* s_lock.h
* This file contains the implementation (if any) for spinlocks.
* This file contains the in-line portion of the implementation
* of spinlocks.
*
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/include/storage/s_lock.h,v 1.75 2000/12/03 14:41:42 thomas Exp $
* $Header: /cvsroot/pgsql/src/include/storage/s_lock.h,v 1.76 2000/12/29 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
/*----------
* DESCRIPTION
* The public macros that must be provided are:
*
* void S_INIT_LOCK(slock_t *lock)
* Initialize a spinlock (to the unlocked state).
*
* void S_LOCK(slock_t *lock)
* Acquire a spinlock, waiting if necessary.
* Time out and abort() if unable to acquire the lock in a
* "reasonable" amount of time --- typically ~ 1 minute.
*
* void S_UNLOCK(slock_t *lock)
* Unlock a previously acquired lock.
*
* bool S_LOCK_FREE(slock_t *lock)
* Tests if the lock is free. Returns TRUE if free, FALSE if locked.
* This does *not* change the state of the lock.
*
* void S_LOCK_FREE(slock_t *lock)
* Tests if the lock is free. Returns non-zero if free, 0 if locked.
* int TAS(slock_t *lock)
* Atomic test-and-set instruction. Attempt to acquire the lock,
* but do *not* wait. Returns 0 if successful, nonzero if unable
* to acquire the lock.
*
* The S_LOCK() macro implements a primitive but still useful random
* backoff to avoid hordes of busywaiting lockers chewing CPU.
* TAS() is a lower-level part of the API, but is used directly in a
* few places that want to do other things while waiting for a lock.
* The S_LOCK() macro is equivalent to
*
* Effectively:
* void
* S_LOCK(slock_t *lock)
* {
* unsigned spins = 0;
*
* while (TAS(lock))
* {
* // back off the cpu for a semi-random short time
* S_LOCK_SLEEP(lock, spins++);
* }
* }
*
* This implementation takes advantage of a tas function written
* (in assembly language) on machines that have a native test-and-set
* instruction. Alternative mutex implementations may also be used.
* This function is hidden under the TAS macro to allow substitutions.
*
* #define TAS(lock) tas(lock)
* int tas(slock_t *lock) // True if lock already set
* where S_LOCK_SLEEP() checks for timeout and sleeps for a short
* interval. Callers that want to perform useful work while waiting
* can write out this entire loop and insert the "useful work" inside
* the loop.
*
* There are default implementations for all these macros at the bottom
* of this file. Check if your platform can use these or needs to
* override them.
* CAUTION to TAS() callers: on some platforms TAS() may sometimes
* report failure to acquire a lock even when the lock is not locked.
* For example, on Alpha TAS() will "fail" if interrupted. Therefore
* TAS() must *always* be invoked in a retry loop as depicted, even when
* you are certain the lock is free.
*
* NOTES
* If none of this can be done, POSTGRES will default to using
* System V semaphores (and take a large performance hit -- around 40%
* of its time on a DS5000/240 is spent in semop(3)...).
* On most supported platforms, TAS() uses a tas() function written
* in assembly language to execute a hardware atomic-test-and-set
* instruction. Equivalent OS-supplied mutex routines could be used too.
*
* AIX has a test-and-set but the recommended interface is the cs(3)
* system call. This provides an 8-instruction (plus system call
* overhead) uninterruptible compare-and-set operation. True
* spinlocks might be faster but using cs(3) still speeds up the
* regression test suite by about 25%. I don't have an assembler
* manual for POWER in any case.
* If no system-specific TAS() is available (ie, HAS_TEST_AND_SET is not
* defined), then we fall back on an emulation that uses SysV semaphores.
* This emulation will be MUCH MUCH MUCH slower than a proper TAS()
* implementation, because of the cost of a kernel call per lock or unlock.
* An old report is that Postgres spends around 40% of its time in semop(2)
* when using the SysV semaphore code.
*
* Note to implementors: there are default implementations for all these
* macros at the bottom of the file. Check if your platform can use
* these or needs to override them.
*----------
*/
#ifndef S_LOCK_H
#define S_LOCK_H
#include "storage/ipc.h"
extern void s_lock_sleep(unsigned spin);
/* Platform-independent out-of-line support routines */
extern void s_lock(volatile slock_t *lock,
const char *file, const int line);
extern void s_lock_sleep(unsigned spins, int microsec,
volatile slock_t *lock,
const char *file, const int line);
#if defined(HAS_TEST_AND_SET)
......@@ -216,7 +238,6 @@ tas(volatile slock_t *lock)
#endif /* NEED_VAX_TAS_ASM */
#if defined(NEED_NS32K_TAS_ASM)
#define TAS(lock) tas(lock)
......@@ -234,28 +255,13 @@ tas(volatile slock_t *lock)
#else /* __GNUC__ */
/***************************************************************************
* All non gcc
*/
#else /* !__GNUC__ */
#if defined(__QNX__)
/*
* QNX 4
*
* Note that slock_t under QNX is sem_t instead of char
/***************************************************************************
* All non-gcc inlines
*/
#define TAS(lock) (sem_trywait((lock)) < 0)
#define S_UNLOCK(lock) sem_post((lock))
#define S_INIT_LOCK(lock) sem_init((lock), 1, 1)
#define S_LOCK_FREE(lock) (lock)->value
#endif /* __QNX__ */
#if defined(NEED_I386_TAS_ASM)
/* non gcc i386 based things */
#if defined(USE_UNIVEL_CC)
#if defined(NEED_I386_TAS_ASM) && defined(USE_UNIVEL_CC)
#define TAS(lock) tas(lock)
asm int
......@@ -271,16 +277,15 @@ tas(volatile slock_t *s_lock)
popl %ebx
}
#endif /* USE_UNIVEL_CC */
#endif /* NEED_I386_TAS_ASM */
#endif /* defined(NEED_I386_TAS_ASM) && defined(USE_UNIVEL_CC) */
#endif /* defined(__GNUC__) */
/*************************************************************************
* These are the platforms that have common code for gcc and non-gcc
* These are the platforms that do not use inline assembler (and hence
* have common code for gcc and non-gcc compilers, if both are available).
*/
......@@ -342,7 +347,7 @@ __asm__(" ldq $0, %0 \n\
* (see include/port/hpux.h).
*
* a "set" slock_t has a single word cleared. a "clear" slock_t has
* all words set to non-zero. tas() in tas.s
* all words set to non-zero. tas() is in tas.s
*/
#define S_UNLOCK(lock) \
......@@ -356,6 +361,19 @@ do { \
#endif /* __hpux */
#if defined(__QNX__)
/*
* QNX 4
*
* Note that slock_t under QNX is sem_t instead of char
*/
#define TAS(lock) (sem_trywait((lock)) < 0)
#define S_UNLOCK(lock) sem_post((lock))
#define S_INIT_LOCK(lock) sem_init((lock), 1, 1)
#define S_LOCK_FREE(lock) ((lock)->value)
#endif /* __QNX__ */
#if defined(__sgi)
/*
* SGI IRIX 5
......@@ -416,21 +434,57 @@ do { \
#else /* !HAS_TEST_AND_SET */
/*
* Fake spinlock implementation using SysV semaphores --- slow and prone
* to fall foul of kernel limits on number of semaphores, so don't use this
* unless you must!
*/
typedef struct
{
/* reference to semaphore used to implement this spinlock */
IpcSemaphoreId semId;
int sem;
} slock_t;
extern bool s_lock_free_sema(volatile slock_t *lock);
extern void s_unlock_sema(volatile slock_t *lock);
extern void s_init_lock_sema(volatile slock_t *lock);
extern int tas_sema(volatile slock_t *lock);
#define S_LOCK_FREE(lock) s_lock_free_sema(lock)
#define S_UNLOCK(lock) s_unlock_sema(lock)
#define S_INIT_LOCK(lock) s_init_lock_sema(lock)
#define TAS(lock) tas_sema(lock)
#endif /* HAS_TEST_AND_SET */
/****************************************************************************
* Default Definitions - override these above as needed.
*/
#if !defined(S_LOCK)
extern void s_lock(volatile slock_t *lock, const char *file, const int line);
#define S_LOCK(lock) \
do { \
if (TAS((volatile slock_t *) (lock))) \
s_lock((volatile slock_t *) (lock), __FILE__, __LINE__); \
if (TAS(lock)) \
s_lock((lock), __FILE__, __LINE__); \
} while (0)
#endif /* S_LOCK */
#if !defined(S_LOCK_SLEEP)
#define S_LOCK_SLEEP(lock,spins) \
s_lock_sleep((spins), 0, (lock), __FILE__, __LINE__)
#endif /* S_LOCK_SLEEP */
#if !defined(S_LOCK_SLEEP_INTERVAL)
#define S_LOCK_SLEEP_INTERVAL(lock,spins,microsec) \
s_lock_sleep((spins), (microsec), (lock), __FILE__, __LINE__)
#endif /* S_LOCK_SLEEP_INTERVAL */
#if !defined(S_LOCK_FREE)
#define S_LOCK_FREE(lock) (*(lock) == 0)
#endif /* S_LOCK_FREE */
......@@ -444,46 +498,11 @@ extern void s_lock(volatile slock_t *lock, const char *file, const int line);
#endif /* S_INIT_LOCK */
#if !defined(TAS)
extern int tas(volatile slock_t *lock); /* port/.../tas.s, or
extern int tas(volatile slock_t *lock); /* in port/.../tas.s, or
* s_lock.c */
#define TAS(lock) tas((volatile slock_t *) (lock))
#define TAS(lock) tas(lock)
#endif /* TAS */
#else /* !HAS_TEST_AND_SET */
/*
* Fake spinlock implementation using SysV semaphores --- slow and prone
* to fall foul of kernel limits on number of semaphores, so don't use this
* unless you must!
*/
typedef struct
{
/* reference to semaphore used to implement this spinlock */
IpcSemaphoreId semId;
int sem;
} slock_t;
extern bool s_lock_free_sema(volatile slock_t *lock);
extern void s_unlock_sema(volatile slock_t *lock);
extern void s_init_lock_sema(volatile slock_t *lock);
extern int tas_sema(volatile slock_t *lock);
extern void s_lock(volatile slock_t *lock, const char *file, const int line);
#define S_LOCK(lock) \
do { \
if (TAS((volatile slock_t *) (lock))) \
s_lock((volatile slock_t *) (lock), __FILE__, __LINE__); \
} while (0)
#define S_LOCK_FREE(lock) s_lock_free_sema(lock)
#define S_UNLOCK(lock) s_unlock_sema(lock)
#define S_INIT_LOCK(lock) s_init_lock_sema(lock)
#define TAS(lock) tas_sema(lock)
#endif /* HAS_TEST_AND_SET */
#endif /* S_LOCK_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment