From 5f1433ac5e7f943b29ef01266b6b8fc915e6b917 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 13 Apr 2019 13:22:26 -0400 Subject: [PATCH] Prevent memory leaks associated with relcache rd_partcheck structures. The original coding of generate_partition_qual() just copied the list of predicate expressions into the global CacheMemoryContext, making it effectively impossible to clean up when the owning relcache entry is destroyed --- the relevant code in RelationDestroyRelation() only managed to free the topmost List header :-(. This resulted in a session-lifespan memory leak whenever a table partition's relcache entry is rebuilt. Fortunately, that's not normally a large data structure, and rebuilds shouldn't occur all that often in production situations; but this is still a bug worth fixing back to v10 where the code was introduced. To fix, put the cached expression tree into its own small memory context, as we do with other complicated substructures of relcache entries. Also, deal more honestly with the case that a partition has an empty partcheck list; while that probably isn't a case that's very interesting for production use, it's legal. In passing, clarify comments about how partitioning-related relcache data structures are managed, and add some Asserts that we're not leaking old copies when we overwrite these data fields. Amit Langote and Tom Lane Discussion: https://postgr.es/m/7961.1552498252@sss.pgh.pa.us --- src/backend/partitioning/partdesc.c | 23 +++++++++--- src/backend/utils/cache/partcache.c | 54 +++++++++++++++++++++-------- src/backend/utils/cache/relcache.c | 20 +++++++---- src/include/utils/rel.h | 6 ++-- 4 files changed, 75 insertions(+), 28 deletions(-) diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c index e436d1ed34..4d6595b249 100644 --- a/src/backend/partitioning/partdesc.c +++ b/src/backend/partitioning/partdesc.c @@ -49,10 +49,13 @@ typedef struct PartitionDirectoryEntry /* * RelationBuildPartitionDesc - * Form rel's partition descriptor + * Form rel's partition descriptor, and store in relcache entry * - * Not flushed from the cache by RelationClearRelation() unless changed because - * of addition or removal of partition. + * Note: the descriptor won't be flushed from the cache by + * RelationClearRelation() unless it's changed because of + * addition or removal of a partition. Hence, code holding a lock + * that's sufficient to prevent that can assume that rd_partdesc + * won't change underneath it. */ void RelationBuildPartitionDesc(Relation rel) @@ -173,7 +176,19 @@ RelationBuildPartitionDesc(Relation rel) ++i; } - /* Now build the actual relcache partition descriptor */ + /* Assert we aren't about to leak any old data structure */ + Assert(rel->rd_pdcxt == NULL); + Assert(rel->rd_partdesc == NULL); + + /* + * Now build the actual relcache partition descriptor. Note that the + * order of operations here is fairly critical. If we fail partway + * through this code, we won't have leaked memory because the rd_pdcxt is + * attached to the relcache entry immediately, so it'll be freed whenever + * the entry is rebuilt or destroyed. However, we don't assign to + * rd_partdesc until the cached data structure is fully complete and + * valid, so that no other code might try to use it. + */ rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext, "partition descriptor", ALLOCSET_SMALL_SIZES); diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c index 8f43d682cf..29b4a76e13 100644 --- a/src/backend/utils/cache/partcache.c +++ b/src/backend/utils/cache/partcache.c @@ -41,11 +41,11 @@ static List *generate_partition_qual(Relation rel); /* * RelationBuildPartitionKey - * Build and attach to relcache partition key data of relation + * Build partition key data of relation, and attach to relcache * * Partitioning key data is a complex structure; to avoid complicated logic to * free individual elements whenever the relcache entry is flushed, we give it - * its own memory context, child of CacheMemoryContext, which can easily be + * its own memory context, a child of CacheMemoryContext, which can easily be * deleted on its own. To avoid leaking memory in that context in case of an * error partway through this function, the context is initially created as a * child of CurTransactionContext and only re-parented to CacheMemoryContext @@ -144,6 +144,7 @@ RelationBuildPartitionKey(Relation relation) MemoryContextSwitchTo(oldcxt); } + /* Allocate assorted arrays in the partkeycxt, which we'll fill below */ oldcxt = MemoryContextSwitchTo(partkeycxt); key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber)); key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid)); @@ -151,8 +152,6 @@ RelationBuildPartitionKey(Relation relation) key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo)); key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid)); - - /* Gather type and collation info as well */ key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid)); key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32)); key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16)); @@ -235,6 +234,10 @@ RelationBuildPartitionKey(Relation relation) ReleaseSysCache(tuple); + /* Assert that we're not leaking any old data during assignments below */ + Assert(relation->rd_partkeycxt == NULL); + Assert(relation->rd_partkey == NULL); + /* * Success --- reparent our context and make the relcache point to the * newly constructed key @@ -305,11 +308,9 @@ get_partition_qual_relid(Oid relid) * Generate partition predicate from rel's partition bound expression. The * function returns a NIL list if there is no predicate. * - * Result expression tree is stored CacheMemoryContext to ensure it survives - * as long as the relcache entry. But we should be running in a less long-lived - * working context. To avoid leaking cache memory if this routine fails partway - * through, we build in working memory and then copy the completed structure - * into cache memory. + * We cache a copy of the result in the relcache entry, after constructing + * it using the caller's context. This approach avoids leaking any data + * into long-lived cache contexts, especially if we fail partway through. */ static List * generate_partition_qual(Relation rel) @@ -326,8 +327,8 @@ generate_partition_qual(Relation rel) /* Guard against stack overflow due to overly deep partition tree */ check_stack_depth(); - /* Quick copy */ - if (rel->rd_partcheck != NIL) + /* If we already cached the result, just return a copy */ + if (rel->rd_partcheckvalid) return copyObject(rel->rd_partcheck); /* Grab at least an AccessShareLock on the parent table */ @@ -373,13 +374,36 @@ generate_partition_qual(Relation rel) if (found_whole_row) elog(ERROR, "unexpected whole-row reference found in partition key"); - /* Save a copy in the relcache */ - oldcxt = MemoryContextSwitchTo(CacheMemoryContext); - rel->rd_partcheck = copyObject(result); - MemoryContextSwitchTo(oldcxt); + /* Assert that we're not leaking any old data during assignments below */ + Assert(rel->rd_partcheckcxt == NULL); + Assert(rel->rd_partcheck == NIL); + + /* + * Save a copy in the relcache. The order of these operations is fairly + * critical to avoid memory leaks and ensure that we don't leave a corrupt + * relcache entry if we fail partway through copyObject. + * + * If, as is definitely possible, the partcheck list is NIL, then we do + * not need to make a context to hold it. + */ + if (result != NIL) + { + rel->rd_partcheckcxt = AllocSetContextCreate(CacheMemoryContext, + "partition constraint", + ALLOCSET_SMALL_SIZES); + MemoryContextCopyAndSetIdentifier(rel->rd_partcheckcxt, + RelationGetRelationName(rel)); + oldcxt = MemoryContextSwitchTo(rel->rd_partcheckcxt); + rel->rd_partcheck = copyObject(result); + MemoryContextSwitchTo(oldcxt); + } + else + rel->rd_partcheck = NIL; + rel->rd_partcheckvalid = true; /* Keep the parent locked until commit */ relation_close(parent, NoLock); + /* Return the working copy to the caller */ return result; } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 64f3c2e887..4b884d5527 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1175,11 +1175,15 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) } else { - relation->rd_partkeycxt = NULL; relation->rd_partkey = NULL; + relation->rd_partkeycxt = NULL; relation->rd_partdesc = NULL; relation->rd_pdcxt = NULL; } + /* ... but partcheck is not loaded till asked for */ + relation->rd_partcheck = NIL; + relation->rd_partcheckvalid = false; + relation->rd_partcheckcxt = NULL; /* * initialize access method information @@ -2364,8 +2368,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) MemoryContextDelete(relation->rd_partkeycxt); if (relation->rd_pdcxt) MemoryContextDelete(relation->rd_pdcxt); - if (relation->rd_partcheck) - pfree(relation->rd_partcheck); + if (relation->rd_partcheckcxt) + MemoryContextDelete(relation->rd_partcheckcxt); if (relation->rd_fdwroutine) pfree(relation->rd_fdwroutine); pfree(relation); @@ -5600,18 +5604,20 @@ load_relcache_init_file(bool shared) * format is complex and subject to change). They must be rebuilt if * needed by RelationCacheInitializePhase3. This is not expected to * be a big performance hit since few system catalogs have such. Ditto - * for RLS policy data, index expressions, predicates, exclusion info, - * and FDW info. + * for RLS policy data, partition info, index expressions, predicates, + * exclusion info, and FDW info. */ rel->rd_rules = NULL; rel->rd_rulescxt = NULL; rel->trigdesc = NULL; rel->rd_rsdesc = NULL; - rel->rd_partkeycxt = NULL; rel->rd_partkey = NULL; - rel->rd_pdcxt = NULL; + rel->rd_partkeycxt = NULL; rel->rd_partdesc = NULL; + rel->rd_pdcxt = NULL; rel->rd_partcheck = NIL; + rel->rd_partcheckvalid = false; + rel->rd_partcheckcxt = NULL; rel->rd_indexprs = NIL; rel->rd_indpred = NIL; rel->rd_exclops = NULL; diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 764e6fad6f..bddfcafe26 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -95,11 +95,13 @@ typedef struct RelationData List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */ bool rd_fkeyvalid; /* true if list has been computed */ - MemoryContext rd_partkeycxt; /* private memory cxt for the below */ struct PartitionKeyData *rd_partkey; /* partition key, or NULL */ - MemoryContext rd_pdcxt; /* private context for partdesc */ + MemoryContext rd_partkeycxt; /* private context for rd_partkey, if any */ struct PartitionDescData *rd_partdesc; /* partitions, or NULL */ + MemoryContext rd_pdcxt; /* private context for rd_partdesc, if any */ List *rd_partcheck; /* partition CHECK quals */ + bool rd_partcheckvalid; /* true if list has been computed */ + MemoryContext rd_partcheckcxt; /* private cxt for rd_partcheck, if any */ /* data managed by RelationGetIndexList: */ List *rd_indexlist; /* list of OIDs of indexes on relation */ -- 2.40.0