Commit 1c6821be authored by Heikki Linnakangas's avatar Heikki Linnakangas

Fix and enhance the assertion of no palloc's in a critical section.

The assertion failed if WAL_DEBUG or LWLOCK_STATS was enabled; fix that by
using separate memory contexts for the allocations made within those code
blocks.

This patch introduces a mechanism for marking any memory context as allowed
in a critical section. Previously ErrorContext was exempt as a special case.

Instead of a blanket exception of the checkpointer process, only exempt the
memory context used for the pending ops hash table.
parent a749a23d
...@@ -60,6 +60,7 @@ ...@@ -60,6 +60,7 @@
#include "storage/spin.h" #include "storage/spin.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/guc.h" #include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/ps_status.h" #include "utils/ps_status.h"
#include "utils/relmapper.h" #include "utils/relmapper.h"
#include "utils/snapmgr.h" #include "utils/snapmgr.h"
...@@ -736,6 +737,10 @@ static bool bgwriterLaunched = false; ...@@ -736,6 +737,10 @@ static bool bgwriterLaunched = false;
static int MyLockNo = 0; static int MyLockNo = 0;
static bool holdingAllLocks = false; static bool holdingAllLocks = false;
#ifdef WAL_DEBUG
static MemoryContext walDebugCxt = NULL;
#endif
static void readRecoveryCommandFile(void); static void readRecoveryCommandFile(void);
static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo); static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
static bool recoveryStopsBefore(XLogRecord *record); static bool recoveryStopsBefore(XLogRecord *record);
...@@ -1258,6 +1263,7 @@ begin:; ...@@ -1258,6 +1263,7 @@ begin:;
if (XLOG_DEBUG) if (XLOG_DEBUG)
{ {
StringInfoData buf; StringInfoData buf;
MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
initStringInfo(&buf); initStringInfo(&buf);
appendStringInfo(&buf, "INSERT @ %X/%X: ", appendStringInfo(&buf, "INSERT @ %X/%X: ",
...@@ -1282,10 +1288,11 @@ begin:; ...@@ -1282,10 +1288,11 @@ begin:;
appendStringInfoString(&buf, " - "); appendStringInfoString(&buf, " - ");
RmgrTable[rechdr->xl_rmid].rm_desc(&buf, (XLogRecord *) recordbuf.data); RmgrTable[rechdr->xl_rmid].rm_desc(&buf, (XLogRecord *) recordbuf.data);
pfree(recordbuf.data);
} }
elog(LOG, "%s", buf.data); elog(LOG, "%s", buf.data);
pfree(buf.data);
MemoryContextSwitchTo(oldCxt);
MemoryContextReset(walDebugCxt);
} }
#endif #endif
...@@ -4807,6 +4814,24 @@ XLOGShmemInit(void) ...@@ -4807,6 +4814,24 @@ XLOGShmemInit(void)
char *allocptr; char *allocptr;
int i; int i;
#ifdef WAL_DEBUG
/*
* Create a memory context for WAL debugging that's exempt from the
* normal "no pallocs in critical section" rule. Yes, that can lead to a
* PANIC if an allocation fails, but wal_debug is not for production use
* anyway.
*/
if (walDebugCxt == NULL)
{
walDebugCxt = AllocSetContextCreate(TopMemoryContext,
"WAL Debug",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextAllowInCriticalSection(walDebugCxt, true);
}
#endif
ControlFile = (ControlFileData *) ControlFile = (ControlFileData *)
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile); ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
XLogCtl = (XLogCtlData *) XLogCtl = (XLogCtlData *)
......
...@@ -1305,19 +1305,6 @@ AbsorbFsyncRequests(void) ...@@ -1305,19 +1305,6 @@ AbsorbFsyncRequests(void)
if (!AmCheckpointerProcess()) if (!AmCheckpointerProcess())
return; return;
/*
* We have to PANIC if we fail to absorb all the pending requests (eg,
* because our hashtable runs out of memory). This is because the system
* cannot run safely if we are unable to fsync what we have been told to
* fsync. Fortunately, the hashtable is so small that the problem is
* quite unlikely to arise in practice.
*/
START_CRIT_SECTION();
/*
* We try to avoid holding the lock for a long time by copying the request
* array.
*/
LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE);
/* Transfer stats counts into pending pgstats message */ /* Transfer stats counts into pending pgstats message */
...@@ -1327,12 +1314,25 @@ AbsorbFsyncRequests(void) ...@@ -1327,12 +1314,25 @@ AbsorbFsyncRequests(void)
CheckpointerShmem->num_backend_writes = 0; CheckpointerShmem->num_backend_writes = 0;
CheckpointerShmem->num_backend_fsync = 0; CheckpointerShmem->num_backend_fsync = 0;
/*
* We try to avoid holding the lock for a long time by copying the request
* array, and processing the requests after releasing the lock.
*
* Once we have cleared the requests from shared memory, we have to PANIC
* if we then fail to absorb them (eg, because our hashtable runs out of
* memory). This is because the system cannot run safely if we are unable
* to fsync what we have been told to fsync. Fortunately, the hashtable
* is so small that the problem is quite unlikely to arise in practice.
*/
n = CheckpointerShmem->num_requests; n = CheckpointerShmem->num_requests;
if (n > 0) if (n > 0)
{ {
requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest)); requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest));
memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest)); memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest));
} }
START_CRIT_SECTION();
CheckpointerShmem->num_requests = 0; CheckpointerShmem->num_requests = 0;
LWLockRelease(CheckpointerCommLock); LWLockRelease(CheckpointerCommLock);
...@@ -1340,10 +1340,10 @@ AbsorbFsyncRequests(void) ...@@ -1340,10 +1340,10 @@ AbsorbFsyncRequests(void)
for (request = requests; n > 0; request++, n--) for (request = requests; n > 0; request++, n--)
RememberFsyncRequest(request->rnode, request->forknum, request->segno); RememberFsyncRequest(request->rnode, request->forknum, request->segno);
END_CRIT_SECTION();
if (requests) if (requests)
pfree(requests); pfree(requests);
END_CRIT_SECTION();
} }
/* /*
......
...@@ -104,8 +104,8 @@ typedef struct lwlock_stats ...@@ -104,8 +104,8 @@ typedef struct lwlock_stats
int spin_delay_count; int spin_delay_count;
} lwlock_stats; } lwlock_stats;
static int counts_for_pid = 0;
static HTAB *lwlock_stats_htab; static HTAB *lwlock_stats_htab;
static lwlock_stats lwlock_stats_dummy;
#endif #endif
#ifdef LOCK_DEBUG #ifdef LOCK_DEBUG
...@@ -142,21 +142,39 @@ static void ...@@ -142,21 +142,39 @@ static void
init_lwlock_stats(void) init_lwlock_stats(void)
{ {
HASHCTL ctl; HASHCTL ctl;
static MemoryContext lwlock_stats_cxt = NULL;
static bool exit_registered = false;
if (lwlock_stats_htab != NULL) if (lwlock_stats_cxt != NULL)
{ MemoryContextDelete(lwlock_stats_cxt);
hash_destroy(lwlock_stats_htab);
lwlock_stats_htab = NULL; /*
} * The LWLock stats will be updated within a critical section, which
* requires allocating new hash entries. Allocations within a critical
* section are normally not allowed because running out of memory would
* lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
* turned on in production, so that's an acceptable risk. The hash entries
* are small, so the risk of running out of memory is minimal in practice.
*/
lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
"LWLock stats",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
MemSet(&ctl, 0, sizeof(ctl)); MemSet(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(lwlock_stats_key); ctl.keysize = sizeof(lwlock_stats_key);
ctl.entrysize = sizeof(lwlock_stats); ctl.entrysize = sizeof(lwlock_stats);
ctl.hash = tag_hash; ctl.hash = tag_hash;
ctl.hcxt = lwlock_stats_cxt;
lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl, lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
HASH_ELEM | HASH_FUNCTION); HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
counts_for_pid = MyProcPid; if (!exit_registered)
{
on_shmem_exit(print_lwlock_stats, 0); on_shmem_exit(print_lwlock_stats, 0);
exit_registered = true;
}
} }
static void static void
...@@ -190,9 +208,13 @@ get_lwlock_stats_entry(LWLock *lock) ...@@ -190,9 +208,13 @@ get_lwlock_stats_entry(LWLock *lock)
lwlock_stats *lwstats; lwlock_stats *lwstats;
bool found; bool found;
/* Set up local count state first time through in a given process */ /*
if (counts_for_pid != MyProcPid) * During shared memory initialization, the hash table doesn't exist yet.
init_lwlock_stats(); * Stats of that phase aren't very interesting, so just collect operations
* on all locks in a single dummy entry.
*/
if (lwlock_stats_htab == NULL)
return &lwlock_stats_dummy;
/* Fetch or create the entry. */ /* Fetch or create the entry. */
key.tranche = lock->tranche; key.tranche = lock->tranche;
...@@ -361,6 +383,16 @@ CreateLWLocks(void) ...@@ -361,6 +383,16 @@ CreateLWLocks(void)
LWLockRegisterTranche(0, &MainLWLockTranche); LWLockRegisterTranche(0, &MainLWLockTranche);
} }
/*
* InitLWLockAccess - initialize backend-local state needed to hold LWLocks
*/
void
InitLWLockAccess(void)
{
#ifdef LWLOCK_STATS
init_lwlock_stats();
#endif
}
/* /*
* LWLockAssign - assign a dynamically-allocated LWLock number * LWLockAssign - assign a dynamically-allocated LWLock number
......
...@@ -411,8 +411,9 @@ InitProcess(void) ...@@ -411,8 +411,9 @@ InitProcess(void)
/* /*
* Now that we have a PGPROC, we could try to acquire locks, so initialize * Now that we have a PGPROC, we could try to acquire locks, so initialize
* the deadlock checker. * local state needed for LWLocks, and the deadlock checker.
*/ */
InitLWLockAccess();
InitDeadLockChecking(); InitDeadLockChecking();
} }
......
...@@ -115,7 +115,7 @@ typedef struct _MdfdVec ...@@ -115,7 +115,7 @@ typedef struct _MdfdVec
struct _MdfdVec *mdfd_chain; /* next segment, or NULL */ struct _MdfdVec *mdfd_chain; /* next segment, or NULL */
} MdfdVec; } MdfdVec;
static MemoryContext MdCxt; /* context for all md.c allocations */ static MemoryContext MdCxt; /* context for all MdfdVec objects */
/* /*
...@@ -157,6 +157,7 @@ typedef struct ...@@ -157,6 +157,7 @@ typedef struct
static HTAB *pendingOpsTable = NULL; static HTAB *pendingOpsTable = NULL;
static List *pendingUnlinks = NIL; static List *pendingUnlinks = NIL;
static MemoryContext pendingOpsCxt; /* context for the above */
static CycleCtr mdsync_cycle_ctr = 0; static CycleCtr mdsync_cycle_ctr = 0;
static CycleCtr mdckpt_cycle_ctr = 0; static CycleCtr mdckpt_cycle_ctr = 0;
...@@ -209,11 +210,27 @@ mdinit(void) ...@@ -209,11 +210,27 @@ mdinit(void)
{ {
HASHCTL hash_ctl; HASHCTL hash_ctl;
/*
* XXX: The checkpointer needs to add entries to the pending ops table
* when absorbing fsync requests. That is done within a critical
* section, which isn't usually allowed, but we make an exception.
* It means that there's a theoretical possibility that you run out of
* memory while absorbing fsync requests, which leads to a PANIC.
* Fortunately the hash table is small so that's unlikely to happen in
* practice.
*/
pendingOpsCxt = AllocSetContextCreate(MdCxt,
"Pending Ops Context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
MemSet(&hash_ctl, 0, sizeof(hash_ctl)); MemSet(&hash_ctl, 0, sizeof(hash_ctl));
hash_ctl.keysize = sizeof(RelFileNode); hash_ctl.keysize = sizeof(RelFileNode);
hash_ctl.entrysize = sizeof(PendingOperationEntry); hash_ctl.entrysize = sizeof(PendingOperationEntry);
hash_ctl.hash = tag_hash; hash_ctl.hash = tag_hash;
hash_ctl.hcxt = MdCxt; hash_ctl.hcxt = pendingOpsCxt;
pendingOpsTable = hash_create("Pending Ops Table", pendingOpsTable = hash_create("Pending Ops Table",
100L, 100L,
&hash_ctl, &hash_ctl,
...@@ -1516,7 +1533,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) ...@@ -1516,7 +1533,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
else if (segno == UNLINK_RELATION_REQUEST) else if (segno == UNLINK_RELATION_REQUEST)
{ {
/* Unlink request: put it in the linked list */ /* Unlink request: put it in the linked list */
MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt); MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
PendingUnlinkEntry *entry; PendingUnlinkEntry *entry;
/* PendingUnlinkEntry doesn't store forknum, since it's always MAIN */ /* PendingUnlinkEntry doesn't store forknum, since it's always MAIN */
...@@ -1533,7 +1550,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) ...@@ -1533,7 +1550,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
else else
{ {
/* Normal case: enter a request to fsync this segment */ /* Normal case: enter a request to fsync this segment */
MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt); MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
PendingOperationEntry *entry; PendingOperationEntry *entry;
bool found; bool found;
......
...@@ -60,15 +60,9 @@ static void MemoryContextStatsInternal(MemoryContext context, int level); ...@@ -60,15 +60,9 @@ static void MemoryContextStatsInternal(MemoryContext context, int level);
* You should not do memory allocations within a critical section, because * You should not do memory allocations within a critical section, because
* an out-of-memory error will be escalated to a PANIC. To enforce that * an out-of-memory error will be escalated to a PANIC. To enforce that
* rule, the allocation functions Assert that. * rule, the allocation functions Assert that.
*
* There are a two exceptions: 1) error recovery uses ErrorContext, which
* has some memory set aside so that you don't run out. And 2) checkpointer
* currently just hopes for the best, which is wrong and ought to be fixed,
* but it's a known issue so let's not complain about in the meanwhile.
*/ */
#define AssertNotInCriticalSection(context) \ #define AssertNotInCriticalSection(context) \
Assert(CritSectionCount == 0 || (context) == ErrorContext || \ Assert(CritSectionCount == 0 || (context)->allowInCritSection)
AmCheckpointerProcess())
/***************************************************************************** /*****************************************************************************
* EXPORTED ROUTINES * * EXPORTED ROUTINES *
...@@ -120,7 +114,10 @@ MemoryContextInit(void) ...@@ -120,7 +114,10 @@ MemoryContextInit(void)
* require it to contain at least 8K at all times. This is the only case * require it to contain at least 8K at all times. This is the only case
* where retained memory in a context is *essential* --- we want to be * where retained memory in a context is *essential* --- we want to be
* sure ErrorContext still has some memory even if we've run out * sure ErrorContext still has some memory even if we've run out
* elsewhere! * elsewhere! Also, allow allocations in ErrorContext within a critical
* section. Otherwise a PANIC will cause an assertion failure in the
* error reporting code, before printing out the real cause of the
* failure.
* *
* This should be the last step in this function, as elog.c assumes memory * This should be the last step in this function, as elog.c assumes memory
* management works once ErrorContext is non-null. * management works once ErrorContext is non-null.
...@@ -130,6 +127,7 @@ MemoryContextInit(void) ...@@ -130,6 +127,7 @@ MemoryContextInit(void)
8 * 1024, 8 * 1024,
8 * 1024, 8 * 1024,
8 * 1024); 8 * 1024);
MemoryContextAllowInCriticalSection(ErrorContext, true);
} }
/* /*
...@@ -305,6 +303,26 @@ MemoryContextSetParent(MemoryContext context, MemoryContext new_parent) ...@@ -305,6 +303,26 @@ MemoryContextSetParent(MemoryContext context, MemoryContext new_parent)
} }
} }
/*
* MemoryContextAllowInCriticalSection
* Allow/disallow allocations in this memory context within a critical
* section.
*
* Normally, memory allocations are not allowed within a critical section,
* because a failure would lead to PANIC. There are a few exceptions to
* that, like allocations related to debugging code that is not supposed to
* be enabled in production. This function can be used to exempt specific
* memory contexts from the assertion in palloc().
*/
void
MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
{
AssertArg(MemoryContextIsValid(context));
#ifdef USE_ASSERT_CHECKING
context->allowInCritSection = allow;
#endif
}
/* /*
* GetMemoryChunkSpace * GetMemoryChunkSpace
* Given a currently-allocated chunk, determine the total space * Given a currently-allocated chunk, determine the total space
...@@ -533,6 +551,7 @@ MemoryContextCreate(NodeTag tag, Size size, ...@@ -533,6 +551,7 @@ MemoryContextCreate(NodeTag tag, Size size,
MemoryContext node; MemoryContext node;
Size needed = size + strlen(name) + 1; Size needed = size + strlen(name) + 1;
/* creating new memory contexts is not allowed in a critical section */
Assert(CritSectionCount == 0); Assert(CritSectionCount == 0);
/* Get space for node and name */ /* Get space for node and name */
...@@ -570,6 +589,11 @@ MemoryContextCreate(NodeTag tag, Size size, ...@@ -570,6 +589,11 @@ MemoryContextCreate(NodeTag tag, Size size,
node->parent = parent; node->parent = parent;
node->nextchild = parent->firstchild; node->nextchild = parent->firstchild;
parent->firstchild = node; parent->firstchild = node;
#ifdef USE_ASSERT_CHECKING
/* inherit allowInCritSection flag from parent */
node->allowInCritSection = parent->allowInCritSection;
#endif
} }
VALGRIND_CREATE_MEMPOOL(node, 0, false); VALGRIND_CREATE_MEMPOOL(node, 0, false);
......
...@@ -60,6 +60,9 @@ typedef struct MemoryContextData ...@@ -60,6 +60,9 @@ typedef struct MemoryContextData
MemoryContext nextchild; /* next child of same parent */ MemoryContext nextchild; /* next child of same parent */
char *name; /* context name (just for debugging) */ char *name; /* context name (just for debugging) */
bool isReset; /* T = no space alloced since last reset */ bool isReset; /* T = no space alloced since last reset */
#ifdef USE_ASSERT_CHECKING
bool allowInCritSection; /* allow palloc in critical section */
#endif
} MemoryContextData; } MemoryContextData;
/* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */ /* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */
......
...@@ -182,6 +182,7 @@ extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value); ...@@ -182,6 +182,7 @@ extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
extern Size LWLockShmemSize(void); extern Size LWLockShmemSize(void);
extern void CreateLWLocks(void); extern void CreateLWLocks(void);
extern void InitLWLockAccess(void);
/* /*
* The traditional method for obtaining an lwlock for use by an extension is * The traditional method for obtaining an lwlock for use by an extension is
......
...@@ -101,6 +101,8 @@ extern MemoryContext GetMemoryChunkContext(void *pointer); ...@@ -101,6 +101,8 @@ extern MemoryContext GetMemoryChunkContext(void *pointer);
extern MemoryContext MemoryContextGetParent(MemoryContext context); extern MemoryContext MemoryContextGetParent(MemoryContext context);
extern bool MemoryContextIsEmpty(MemoryContext context); extern bool MemoryContextIsEmpty(MemoryContext context);
extern void MemoryContextStats(MemoryContext context); extern void MemoryContextStats(MemoryContext context);
extern void MemoryContextAllowInCriticalSection(MemoryContext context,
bool allow);
#ifdef MEMORY_CONTEXT_CHECKING #ifdef MEMORY_CONTEXT_CHECKING
extern void MemoryContextCheck(MemoryContext context); extern void MemoryContextCheck(MemoryContext context);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment