Commit 5f1433ac authored by Tom Lane's avatar Tom Lane

Prevent memory leaks associated with relcache rd_partcheck structures.

The original coding of generate_partition_qual() just copied the list
of predicate expressions into the global CacheMemoryContext, making it
effectively impossible to clean up when the owning relcache entry is
destroyed --- the relevant code in RelationDestroyRelation() only managed
to free the topmost List header :-(.  This resulted in a session-lifespan
memory leak whenever a table partition's relcache entry is rebuilt.
Fortunately, that's not normally a large data structure, and rebuilds
shouldn't occur all that often in production situations; but this is
still a bug worth fixing back to v10 where the code was introduced.

To fix, put the cached expression tree into its own small memory context,
as we do with other complicated substructures of relcache entries.
Also, deal more honestly with the case that a partition has an empty
partcheck list; while that probably isn't a case that's very interesting
for production use, it's legal.

In passing, clarify comments about how partitioning-related relcache
data structures are managed, and add some Asserts that we're not leaking
old copies when we overwrite these data fields.

Amit Langote and Tom Lane

Discussion: https://postgr.es/m/7961.1552498252@sss.pgh.pa.us
parent c0985099
...@@ -49,10 +49,13 @@ typedef struct PartitionDirectoryEntry ...@@ -49,10 +49,13 @@ typedef struct PartitionDirectoryEntry
/* /*
* RelationBuildPartitionDesc * RelationBuildPartitionDesc
* Form rel's partition descriptor * Form rel's partition descriptor, and store in relcache entry
* *
* Not flushed from the cache by RelationClearRelation() unless changed because * Note: the descriptor won't be flushed from the cache by
* of addition or removal of partition. * RelationClearRelation() unless it's changed because of
* addition or removal of a partition. Hence, code holding a lock
* that's sufficient to prevent that can assume that rd_partdesc
* won't change underneath it.
*/ */
void void
RelationBuildPartitionDesc(Relation rel) RelationBuildPartitionDesc(Relation rel)
...@@ -173,7 +176,19 @@ RelationBuildPartitionDesc(Relation rel) ...@@ -173,7 +176,19 @@ RelationBuildPartitionDesc(Relation rel)
++i; ++i;
} }
/* Now build the actual relcache partition descriptor */ /* Assert we aren't about to leak any old data structure */
Assert(rel->rd_pdcxt == NULL);
Assert(rel->rd_partdesc == NULL);
/*
* Now build the actual relcache partition descriptor. Note that the
* order of operations here is fairly critical. If we fail partway
* through this code, we won't have leaked memory because the rd_pdcxt is
* attached to the relcache entry immediately, so it'll be freed whenever
* the entry is rebuilt or destroyed. However, we don't assign to
* rd_partdesc until the cached data structure is fully complete and
* valid, so that no other code might try to use it.
*/
rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext, rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
"partition descriptor", "partition descriptor",
ALLOCSET_SMALL_SIZES); ALLOCSET_SMALL_SIZES);
......
...@@ -41,11 +41,11 @@ static List *generate_partition_qual(Relation rel); ...@@ -41,11 +41,11 @@ static List *generate_partition_qual(Relation rel);
/* /*
* RelationBuildPartitionKey * RelationBuildPartitionKey
* Build and attach to relcache partition key data of relation * Build partition key data of relation, and attach to relcache
* *
* Partitioning key data is a complex structure; to avoid complicated logic to * Partitioning key data is a complex structure; to avoid complicated logic to
* free individual elements whenever the relcache entry is flushed, we give it * free individual elements whenever the relcache entry is flushed, we give it
* its own memory context, child of CacheMemoryContext, which can easily be * its own memory context, a child of CacheMemoryContext, which can easily be
* deleted on its own. To avoid leaking memory in that context in case of an * deleted on its own. To avoid leaking memory in that context in case of an
* error partway through this function, the context is initially created as a * error partway through this function, the context is initially created as a
* child of CurTransactionContext and only re-parented to CacheMemoryContext * child of CurTransactionContext and only re-parented to CacheMemoryContext
...@@ -144,6 +144,7 @@ RelationBuildPartitionKey(Relation relation) ...@@ -144,6 +144,7 @@ RelationBuildPartitionKey(Relation relation)
MemoryContextSwitchTo(oldcxt); MemoryContextSwitchTo(oldcxt);
} }
/* Allocate assorted arrays in the partkeycxt, which we'll fill below */
oldcxt = MemoryContextSwitchTo(partkeycxt); oldcxt = MemoryContextSwitchTo(partkeycxt);
key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber)); key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid)); key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
...@@ -151,8 +152,6 @@ RelationBuildPartitionKey(Relation relation) ...@@ -151,8 +152,6 @@ RelationBuildPartitionKey(Relation relation)
key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo)); key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid)); key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
/* Gather type and collation info as well */
key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid)); key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32)); key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16)); key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
...@@ -235,6 +234,10 @@ RelationBuildPartitionKey(Relation relation) ...@@ -235,6 +234,10 @@ RelationBuildPartitionKey(Relation relation)
ReleaseSysCache(tuple); ReleaseSysCache(tuple);
/* Assert that we're not leaking any old data during assignments below */
Assert(relation->rd_partkeycxt == NULL);
Assert(relation->rd_partkey == NULL);
/* /*
* Success --- reparent our context and make the relcache point to the * Success --- reparent our context and make the relcache point to the
* newly constructed key * newly constructed key
...@@ -305,11 +308,9 @@ get_partition_qual_relid(Oid relid) ...@@ -305,11 +308,9 @@ get_partition_qual_relid(Oid relid)
* Generate partition predicate from rel's partition bound expression. The * Generate partition predicate from rel's partition bound expression. The
* function returns a NIL list if there is no predicate. * function returns a NIL list if there is no predicate.
* *
* Result expression tree is stored CacheMemoryContext to ensure it survives * We cache a copy of the result in the relcache entry, after constructing
* as long as the relcache entry. But we should be running in a less long-lived * it using the caller's context. This approach avoids leaking any data
* working context. To avoid leaking cache memory if this routine fails partway * into long-lived cache contexts, especially if we fail partway through.
* through, we build in working memory and then copy the completed structure
* into cache memory.
*/ */
static List * static List *
generate_partition_qual(Relation rel) generate_partition_qual(Relation rel)
...@@ -326,8 +327,8 @@ generate_partition_qual(Relation rel) ...@@ -326,8 +327,8 @@ generate_partition_qual(Relation rel)
/* Guard against stack overflow due to overly deep partition tree */ /* Guard against stack overflow due to overly deep partition tree */
check_stack_depth(); check_stack_depth();
/* Quick copy */ /* If we already cached the result, just return a copy */
if (rel->rd_partcheck != NIL) if (rel->rd_partcheckvalid)
return copyObject(rel->rd_partcheck); return copyObject(rel->rd_partcheck);
/* Grab at least an AccessShareLock on the parent table */ /* Grab at least an AccessShareLock on the parent table */
...@@ -373,13 +374,36 @@ generate_partition_qual(Relation rel) ...@@ -373,13 +374,36 @@ generate_partition_qual(Relation rel)
if (found_whole_row) if (found_whole_row)
elog(ERROR, "unexpected whole-row reference found in partition key"); elog(ERROR, "unexpected whole-row reference found in partition key");
/* Save a copy in the relcache */ /* Assert that we're not leaking any old data during assignments below */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext); Assert(rel->rd_partcheckcxt == NULL);
rel->rd_partcheck = copyObject(result); Assert(rel->rd_partcheck == NIL);
MemoryContextSwitchTo(oldcxt);
/*
* Save a copy in the relcache. The order of these operations is fairly
* critical to avoid memory leaks and ensure that we don't leave a corrupt
* relcache entry if we fail partway through copyObject.
*
* If, as is definitely possible, the partcheck list is NIL, then we do
* not need to make a context to hold it.
*/
if (result != NIL)
{
rel->rd_partcheckcxt = AllocSetContextCreate(CacheMemoryContext,
"partition constraint",
ALLOCSET_SMALL_SIZES);
MemoryContextCopyAndSetIdentifier(rel->rd_partcheckcxt,
RelationGetRelationName(rel));
oldcxt = MemoryContextSwitchTo(rel->rd_partcheckcxt);
rel->rd_partcheck = copyObject(result);
MemoryContextSwitchTo(oldcxt);
}
else
rel->rd_partcheck = NIL;
rel->rd_partcheckvalid = true;
/* Keep the parent locked until commit */ /* Keep the parent locked until commit */
relation_close(parent, NoLock); relation_close(parent, NoLock);
/* Return the working copy to the caller */
return result; return result;
} }
...@@ -1175,11 +1175,15 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) ...@@ -1175,11 +1175,15 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
} }
else else
{ {
relation->rd_partkeycxt = NULL;
relation->rd_partkey = NULL; relation->rd_partkey = NULL;
relation->rd_partkeycxt = NULL;
relation->rd_partdesc = NULL; relation->rd_partdesc = NULL;
relation->rd_pdcxt = NULL; relation->rd_pdcxt = NULL;
} }
/* ... but partcheck is not loaded till asked for */
relation->rd_partcheck = NIL;
relation->rd_partcheckvalid = false;
relation->rd_partcheckcxt = NULL;
/* /*
* initialize access method information * initialize access method information
...@@ -2364,8 +2368,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) ...@@ -2364,8 +2368,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
MemoryContextDelete(relation->rd_partkeycxt); MemoryContextDelete(relation->rd_partkeycxt);
if (relation->rd_pdcxt) if (relation->rd_pdcxt)
MemoryContextDelete(relation->rd_pdcxt); MemoryContextDelete(relation->rd_pdcxt);
if (relation->rd_partcheck) if (relation->rd_partcheckcxt)
pfree(relation->rd_partcheck); MemoryContextDelete(relation->rd_partcheckcxt);
if (relation->rd_fdwroutine) if (relation->rd_fdwroutine)
pfree(relation->rd_fdwroutine); pfree(relation->rd_fdwroutine);
pfree(relation); pfree(relation);
...@@ -5600,18 +5604,20 @@ load_relcache_init_file(bool shared) ...@@ -5600,18 +5604,20 @@ load_relcache_init_file(bool shared)
* format is complex and subject to change). They must be rebuilt if * format is complex and subject to change). They must be rebuilt if
* needed by RelationCacheInitializePhase3. This is not expected to * needed by RelationCacheInitializePhase3. This is not expected to
* be a big performance hit since few system catalogs have such. Ditto * be a big performance hit since few system catalogs have such. Ditto
* for RLS policy data, index expressions, predicates, exclusion info, * for RLS policy data, partition info, index expressions, predicates,
* and FDW info. * exclusion info, and FDW info.
*/ */
rel->rd_rules = NULL; rel->rd_rules = NULL;
rel->rd_rulescxt = NULL; rel->rd_rulescxt = NULL;
rel->trigdesc = NULL; rel->trigdesc = NULL;
rel->rd_rsdesc = NULL; rel->rd_rsdesc = NULL;
rel->rd_partkeycxt = NULL;
rel->rd_partkey = NULL; rel->rd_partkey = NULL;
rel->rd_pdcxt = NULL; rel->rd_partkeycxt = NULL;
rel->rd_partdesc = NULL; rel->rd_partdesc = NULL;
rel->rd_pdcxt = NULL;
rel->rd_partcheck = NIL; rel->rd_partcheck = NIL;
rel->rd_partcheckvalid = false;
rel->rd_partcheckcxt = NULL;
rel->rd_indexprs = NIL; rel->rd_indexprs = NIL;
rel->rd_indpred = NIL; rel->rd_indpred = NIL;
rel->rd_exclops = NULL; rel->rd_exclops = NULL;
......
...@@ -95,11 +95,13 @@ typedef struct RelationData ...@@ -95,11 +95,13 @@ typedef struct RelationData
List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */ List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */
bool rd_fkeyvalid; /* true if list has been computed */ bool rd_fkeyvalid; /* true if list has been computed */
MemoryContext rd_partkeycxt; /* private memory cxt for the below */
struct PartitionKeyData *rd_partkey; /* partition key, or NULL */ struct PartitionKeyData *rd_partkey; /* partition key, or NULL */
MemoryContext rd_pdcxt; /* private context for partdesc */ MemoryContext rd_partkeycxt; /* private context for rd_partkey, if any */
struct PartitionDescData *rd_partdesc; /* partitions, or NULL */ struct PartitionDescData *rd_partdesc; /* partitions, or NULL */
MemoryContext rd_pdcxt; /* private context for rd_partdesc, if any */
List *rd_partcheck; /* partition CHECK quals */ List *rd_partcheck; /* partition CHECK quals */
bool rd_partcheckvalid; /* true if list has been computed */
MemoryContext rd_partcheckcxt; /* private cxt for rd_partcheck, if any */
/* data managed by RelationGetIndexList: */ /* data managed by RelationGetIndexList: */
List *rd_indexlist; /* list of OIDs of indexes on relation */ List *rd_indexlist; /* list of OIDs of indexes on relation */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment