Commit ab5194e6 authored by Andres Freund's avatar Andres Freund

Improve LWLock scalability.

The old LWLock implementation had the problem that concurrent lock
acquisitions required exclusively acquiring a spinlock. Often that
could lead to acquirers waiting behind the spinlock, even if the
actual LWLock was free.

The new implementation doesn't acquire the spinlock when acquiring the
lock itself. Instead the new atomic operations are used to atomically
manipulate the state. Only the waitqueue, used solely in the slow
path, is still protected by the spinlock. Check lwlock.c's header for
an explanation about the used algorithm.

For some common workloads on larger machines this can yield
significant performance improvements. Particularly in read mostly
workloads.

Reviewed-By: Amit Kapila and Robert Haas
Author: Andres Freund

Discussion: 20130926225545.GB26663@awork2.anarazel.de
parent 7882c3b0
This diff is collapsed.
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "lib/ilist.h" #include "lib/ilist.h"
#include "storage/s_lock.h" #include "storage/s_lock.h"
#include "port/atomics.h"
struct PGPROC; struct PGPROC;
...@@ -47,11 +48,16 @@ typedef struct LWLockTranche ...@@ -47,11 +48,16 @@ typedef struct LWLockTranche
typedef struct LWLock typedef struct LWLock
{ {
slock_t mutex; /* Protects LWLock and queue of PGPROCs */ slock_t mutex; /* Protects LWLock and queue of PGPROCs */
bool releaseOK; /* T if ok to release waiters */ uint16 tranche; /* tranche ID */
char exclusive; /* # of exclusive holders (0 or 1) */
int shared; /* # of shared holders (0..MaxBackends) */ pg_atomic_uint32 state; /* state of exlusive/nonexclusive lockers */
int tranche; /* tranche ID */ #ifdef LOCK_DEBUG
pg_atomic_uint32 nwaiters; /* number of waiters */
#endif
dlist_head waiters; /* list of waiting PGPROCs */ dlist_head waiters; /* list of waiting PGPROCs */
#ifdef LOCK_DEBUG
struct PGPROC *owner; /* last exlusive owner of the lock */
#endif
} LWLock; } LWLock;
/* /*
...@@ -66,11 +72,11 @@ typedef struct LWLock ...@@ -66,11 +72,11 @@ typedef struct LWLock
* (Of course, we have to also ensure that the array start address is suitably * (Of course, we have to also ensure that the array start address is suitably
* aligned.) * aligned.)
* *
* Even on a 32-bit platform, an lwlock will be more than 16 bytes, because * On a 32-bit platforms a LWLock will these days fit into 16 bytes, but since
* it contains 2 integers and 2 pointers, plus other stuff. It should fit * that didn't use to be the case and cramming more lwlocks into a cacheline
* into 32 bytes, though, unless slock_t is really big. On a 64-bit platform, * might be detrimental performancewise we still use 32 byte alignment
* it should fit into 32 bytes unless slock_t is larger than 4 bytes. We * there. So, both on 32 and 64 bit platforms, it should fit into 32 bytes
* allow for that just in case. * unless slock_t is really big. We allow for that just in case.
*/ */
#define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 32 ? 32 : 64) #define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 32 ? 32 : 64)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment