Commit 73487a60 authored by Andres Freund's avatar Andres Freund

snapshot scalability: Move subxact info to ProcGlobal, remove PGXACT.

Similar to the previous changes this increases the chance that data
frequently needed by GetSnapshotData() stays in l2 cache. In many
workloads subtransactions are very rare, and this makes the check for
that considerably cheaper.

As this removes the last member of PGXACT, there is no need to keep it
around anymore.

On a larger 2 socket machine this and the two preceding commits result
in a ~1.07x performance increase in read-only pgbench. For read-heavy
mixed r/w workloads without row level contention, I see about 1.1x.

Author: Andres Freund <andres@anarazel.de>
Reviewed-By: default avatarRobert Haas <robertmhaas@gmail.com>
Reviewed-By: default avatarThomas Munro <thomas.munro@gmail.com>
Reviewed-By: default avatarDavid Rowley <dgrowleyml@gmail.com>
Discussion: https://postgr.es/m/20200301083601.ews6hz5dduc3w2se@alap3.anarazel.de
parent 5788e258
...@@ -295,7 +295,7 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids, ...@@ -295,7 +295,7 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
*/ */
if (all_xact_same_page && xid == MyProc->xid && if (all_xact_same_page && xid == MyProc->xid &&
nsubxids <= THRESHOLD_SUBTRANS_CLOG_OPT && nsubxids <= THRESHOLD_SUBTRANS_CLOG_OPT &&
nsubxids == MyPgXact->nxids && nsubxids == MyProc->subxidStatus.count &&
memcmp(subxids, MyProc->subxids.xids, memcmp(subxids, MyProc->subxids.xids,
nsubxids * sizeof(TransactionId)) == 0) nsubxids * sizeof(TransactionId)) == 0)
{ {
...@@ -510,16 +510,15 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, ...@@ -510,16 +510,15 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
while (nextidx != INVALID_PGPROCNO) while (nextidx != INVALID_PGPROCNO)
{ {
PGPROC *proc = &ProcGlobal->allProcs[nextidx]; PGPROC *proc = &ProcGlobal->allProcs[nextidx];
PGXACT *pgxact = &ProcGlobal->allPgXact[nextidx];
/* /*
* Transactions with more than THRESHOLD_SUBTRANS_CLOG_OPT sub-XIDs * Transactions with more than THRESHOLD_SUBTRANS_CLOG_OPT sub-XIDs
* should not use group XID status update mechanism. * should not use group XID status update mechanism.
*/ */
Assert(pgxact->nxids <= THRESHOLD_SUBTRANS_CLOG_OPT); Assert(proc->subxidStatus.count <= THRESHOLD_SUBTRANS_CLOG_OPT);
TransactionIdSetPageStatusInternal(proc->clogGroupMemberXid, TransactionIdSetPageStatusInternal(proc->clogGroupMemberXid,
pgxact->nxids, proc->subxidStatus.count,
proc->subxids.xids, proc->subxids.xids,
proc->clogGroupMemberXidStatus, proc->clogGroupMemberXidStatus,
proc->clogGroupMemberLsn, proc->clogGroupMemberLsn,
......
...@@ -21,9 +21,9 @@ ...@@ -21,9 +21,9 @@
* GIDs and aborts the transaction if there already is a global * GIDs and aborts the transaction if there already is a global
* transaction in prepared state with the same GID. * transaction in prepared state with the same GID.
* *
* A global transaction (gxact) also has dummy PGXACT and PGPROC; this is * A global transaction (gxact) also has dummy PGPROC; this is what keeps
* what keeps the XID considered running by TransactionIdIsInProgress. * the XID considered running by TransactionIdIsInProgress. It is also
* It is also convenient as a PGPROC to hook the gxact's locks to. * convenient as a PGPROC to hook the gxact's locks to.
* *
* Information to recover prepared transactions in case of crash is * Information to recover prepared transactions in case of crash is
* now stored in WAL for the common case. In some cases there will be * now stored in WAL for the common case. In some cases there will be
...@@ -447,14 +447,12 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, ...@@ -447,14 +447,12 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
TimestampTz prepared_at, Oid owner, Oid databaseid) TimestampTz prepared_at, Oid owner, Oid databaseid)
{ {
PGPROC *proc; PGPROC *proc;
PGXACT *pgxact;
int i; int i;
Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE)); Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
Assert(gxact != NULL); Assert(gxact != NULL);
proc = &ProcGlobal->allProcs[gxact->pgprocno]; proc = &ProcGlobal->allProcs[gxact->pgprocno];
pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
/* Initialize the PGPROC entry */ /* Initialize the PGPROC entry */
MemSet(proc, 0, sizeof(PGPROC)); MemSet(proc, 0, sizeof(PGPROC));
...@@ -480,8 +478,8 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, ...@@ -480,8 +478,8 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
for (i = 0; i < NUM_LOCK_PARTITIONS; i++) for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
SHMQueueInit(&(proc->myProcLocks[i])); SHMQueueInit(&(proc->myProcLocks[i]));
/* subxid data must be filled later by GXactLoadSubxactData */ /* subxid data must be filled later by GXactLoadSubxactData */
pgxact->overflowed = false; proc->subxidStatus.overflowed = false;
pgxact->nxids = 0; proc->subxidStatus.count = 0;
gxact->prepared_at = prepared_at; gxact->prepared_at = prepared_at;
gxact->xid = xid; gxact->xid = xid;
...@@ -510,19 +508,18 @@ GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts, ...@@ -510,19 +508,18 @@ GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts,
TransactionId *children) TransactionId *children)
{ {
PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno]; PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
/* We need no extra lock since the GXACT isn't valid yet */ /* We need no extra lock since the GXACT isn't valid yet */
if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS) if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS)
{ {
pgxact->overflowed = true; proc->subxidStatus.overflowed = true;
nsubxacts = PGPROC_MAX_CACHED_SUBXIDS; nsubxacts = PGPROC_MAX_CACHED_SUBXIDS;
} }
if (nsubxacts > 0) if (nsubxacts > 0)
{ {
memcpy(proc->subxids.xids, children, memcpy(proc->subxids.xids, children,
nsubxacts * sizeof(TransactionId)); nsubxacts * sizeof(TransactionId));
pgxact->nxids = nsubxacts; proc->subxidStatus.count = nsubxacts;
} }
} }
......
...@@ -222,22 +222,31 @@ GetNewTransactionId(bool isSubXact) ...@@ -222,22 +222,31 @@ GetNewTransactionId(bool isSubXact)
*/ */
if (!isSubXact) if (!isSubXact)
{ {
Assert(ProcGlobal->subxidStates[MyProc->pgxactoff].count == 0);
Assert(!ProcGlobal->subxidStates[MyProc->pgxactoff].overflowed);
Assert(MyProc->subxidStatus.count == 0);
Assert(!MyProc->subxidStatus.overflowed);
/* LWLockRelease acts as barrier */ /* LWLockRelease acts as barrier */
MyProc->xid = xid; MyProc->xid = xid;
ProcGlobal->xids[MyProc->pgxactoff] = xid; ProcGlobal->xids[MyProc->pgxactoff] = xid;
} }
else else
{ {
int nxids = MyPgXact->nxids; XidCacheStatus *substat = &ProcGlobal->subxidStates[MyProc->pgxactoff];
int nxids = MyProc->subxidStatus.count;
Assert(substat->count == MyProc->subxidStatus.count);
Assert(substat->overflowed == MyProc->subxidStatus.overflowed);
if (nxids < PGPROC_MAX_CACHED_SUBXIDS) if (nxids < PGPROC_MAX_CACHED_SUBXIDS)
{ {
MyProc->subxids.xids[nxids] = xid; MyProc->subxids.xids[nxids] = xid;
pg_write_barrier(); pg_write_barrier();
MyPgXact->nxids = nxids + 1; MyProc->subxidStatus.count = substat->count = nxids + 1;
} }
else else
MyPgXact->overflowed = true; MyProc->subxidStatus.overflowed = substat->overflowed = true;
} }
LWLockRelease(XidGenLock); LWLockRelease(XidGenLock);
......
This diff is collapsed.
...@@ -63,9 +63,8 @@ int LockTimeout = 0; ...@@ -63,9 +63,8 @@ int LockTimeout = 0;
int IdleInTransactionSessionTimeout = 0; int IdleInTransactionSessionTimeout = 0;
bool log_lock_waits = false; bool log_lock_waits = false;
/* Pointer to this process's PGPROC and PGXACT structs, if any */ /* Pointer to this process's PGPROC struct, if any */
PGPROC *MyProc = NULL; PGPROC *MyProc = NULL;
PGXACT *MyPgXact = NULL;
/* /*
* This spinlock protects the freelist of recycled PGPROC structures. * This spinlock protects the freelist of recycled PGPROC structures.
...@@ -110,10 +109,8 @@ ProcGlobalShmemSize(void) ...@@ -110,10 +109,8 @@ ProcGlobalShmemSize(void)
size = add_size(size, mul_size(TotalProcs, sizeof(PGPROC))); size = add_size(size, mul_size(TotalProcs, sizeof(PGPROC)));
size = add_size(size, sizeof(slock_t)); size = add_size(size, sizeof(slock_t));
size = add_size(size, mul_size(MaxBackends, sizeof(PGXACT)));
size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGXACT)));
size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGXACT)));
size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->xids))); size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->xids)));
size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->subxidStates)));
size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->vacuumFlags))); size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->vacuumFlags)));
return size; return size;
...@@ -161,7 +158,6 @@ void ...@@ -161,7 +158,6 @@ void
InitProcGlobal(void) InitProcGlobal(void)
{ {
PGPROC *procs; PGPROC *procs;
PGXACT *pgxacts;
int i, int i,
j; j;
bool found; bool found;
...@@ -202,18 +198,6 @@ InitProcGlobal(void) ...@@ -202,18 +198,6 @@ InitProcGlobal(void)
/* XXX allProcCount isn't really all of them; it excludes prepared xacts */ /* XXX allProcCount isn't really all of them; it excludes prepared xacts */
ProcGlobal->allProcCount = MaxBackends + NUM_AUXILIARY_PROCS; ProcGlobal->allProcCount = MaxBackends + NUM_AUXILIARY_PROCS;
/*
* Also allocate a separate array of PGXACT structures. This is separate
* from the main PGPROC array so that the most heavily accessed data is
* stored contiguously in memory in as few cache lines as possible. This
* provides significant performance benefits, especially on a
* multiprocessor system. There is one PGXACT structure for every PGPROC
* structure.
*/
pgxacts = (PGXACT *) ShmemAlloc(TotalProcs * sizeof(PGXACT));
MemSet(pgxacts, 0, TotalProcs * sizeof(PGXACT));
ProcGlobal->allPgXact = pgxacts;
/* /*
* Allocate arrays mirroring PGPROC fields in a dense manner. See * Allocate arrays mirroring PGPROC fields in a dense manner. See
* PROC_HDR. * PROC_HDR.
...@@ -224,6 +208,8 @@ InitProcGlobal(void) ...@@ -224,6 +208,8 @@ InitProcGlobal(void)
ProcGlobal->xids = ProcGlobal->xids =
(TransactionId *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->xids)); (TransactionId *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->xids));
MemSet(ProcGlobal->xids, 0, TotalProcs * sizeof(*ProcGlobal->xids)); MemSet(ProcGlobal->xids, 0, TotalProcs * sizeof(*ProcGlobal->xids));
ProcGlobal->subxidStates = (XidCacheStatus *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->subxidStates));
MemSet(ProcGlobal->subxidStates, 0, TotalProcs * sizeof(*ProcGlobal->subxidStates));
ProcGlobal->vacuumFlags = (uint8 *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->vacuumFlags)); ProcGlobal->vacuumFlags = (uint8 *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->vacuumFlags));
MemSet(ProcGlobal->vacuumFlags, 0, TotalProcs * sizeof(*ProcGlobal->vacuumFlags)); MemSet(ProcGlobal->vacuumFlags, 0, TotalProcs * sizeof(*ProcGlobal->vacuumFlags));
...@@ -372,7 +358,6 @@ InitProcess(void) ...@@ -372,7 +358,6 @@ InitProcess(void)
(errcode(ERRCODE_TOO_MANY_CONNECTIONS), (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
errmsg("sorry, too many clients already"))); errmsg("sorry, too many clients already")));
} }
MyPgXact = &ProcGlobal->allPgXact[MyProc->pgprocno];
/* /*
* Cross-check that the PGPROC is of the type we expect; if this were not * Cross-check that the PGPROC is of the type we expect; if this were not
...@@ -569,7 +554,6 @@ InitAuxiliaryProcess(void) ...@@ -569,7 +554,6 @@ InitAuxiliaryProcess(void)
((volatile PGPROC *) auxproc)->pid = MyProcPid; ((volatile PGPROC *) auxproc)->pid = MyProcPid;
MyProc = auxproc; MyProc = auxproc;
MyPgXact = &ProcGlobal->allPgXact[auxproc->pgprocno];
SpinLockRelease(ProcStructLock); SpinLockRelease(ProcStructLock);
......
...@@ -35,6 +35,14 @@ ...@@ -35,6 +35,14 @@
*/ */
#define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */ #define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */
typedef struct XidCacheStatus
{
/* number of cached subxids, never more than PGPROC_MAX_CACHED_SUBXIDS */
uint8 count;
/* has PGPROC->subxids overflowed */
bool overflowed;
} XidCacheStatus;
struct XidCache struct XidCache
{ {
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]; TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
...@@ -187,6 +195,8 @@ struct PGPROC ...@@ -187,6 +195,8 @@ struct PGPROC
*/ */
SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS]; SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
XidCacheStatus subxidStatus; /* mirrored with
* ProcGlobal->subxidStates[i] */
struct XidCache subxids; /* cache for subtransaction XIDs */ struct XidCache subxids; /* cache for subtransaction XIDs */
/* Support for group XID clearing. */ /* Support for group XID clearing. */
...@@ -235,22 +245,6 @@ struct PGPROC ...@@ -235,22 +245,6 @@ struct PGPROC
extern PGDLLIMPORT PGPROC *MyProc; extern PGDLLIMPORT PGPROC *MyProc;
extern PGDLLIMPORT struct PGXACT *MyPgXact;
/*
* Prior to PostgreSQL 9.2, the fields below were stored as part of the
* PGPROC. However, benchmarking revealed that packing these particular
* members into a separate array as tightly as possible sped up GetSnapshotData
* considerably on systems with many CPU cores, by reducing the number of
* cache lines needing to be fetched. Thus, think very carefully before adding
* anything else here.
*/
typedef struct PGXACT
{
bool overflowed;
uint8 nxids;
} PGXACT;
/* /*
* There is one ProcGlobal struct for the whole database cluster. * There is one ProcGlobal struct for the whole database cluster.
...@@ -310,12 +304,16 @@ typedef struct PROC_HDR ...@@ -310,12 +304,16 @@ typedef struct PROC_HDR
{ {
/* Array of PGPROC structures (not including dummies for prepared txns) */ /* Array of PGPROC structures (not including dummies for prepared txns) */
PGPROC *allProcs; PGPROC *allProcs;
/* Array of PGXACT structures (not including dummies for prepared txns) */
PGXACT *allPgXact;
/* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */ /* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */
TransactionId *xids; TransactionId *xids;
/*
* Array mirroring PGPROC.subxidStatus for each PGPROC currently in the
* procarray.
*/
XidCacheStatus *subxidStates;
/* /*
* Array mirroring PGPROC.vacuumFlags for each PGPROC currently in the * Array mirroring PGPROC.vacuumFlags for each PGPROC currently in the
* procarray. * procarray.
......
...@@ -1536,7 +1536,6 @@ PGSetenvStatusType ...@@ -1536,7 +1536,6 @@ PGSetenvStatusType
PGShmemHeader PGShmemHeader
PGTransactionStatusType PGTransactionStatusType
PGVerbosity PGVerbosity
PGXACT
PG_Locale_Strategy PG_Locale_Strategy
PG_Lock_Status PG_Lock_Status
PG_init_t PG_init_t
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment