From e06e56212c95e5187d930ac4a98131a88cb934ed Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 7 Jun 2015 15:32:09 -0400 Subject: [PATCH] Use a safer method for determining whether relcache init file is stale. When we invalidate the relcache entry for a system catalog or index, we must also delete the relcache "init file" if the init file contains a copy of that rel's entry. The old way of doing this relied on a specially maintained list of the OIDs of relations present in the init file: we made the list either when reading the file in, or when writing the file out. The problem is that when writing the file out, we included only rels present in our local relcache, which might have already suffered some deletions due to relcache inval events. In such cases we correctly decided not to overwrite the real init file with incomplete data --- but we still used the incomplete initFileRelationIds list for the rest of the current session. This could result in wrong decisions about whether the session's own actions require deletion of the init file, potentially allowing an init file created by some other concurrent session to be left around even though it's been made stale. Since we don't support changing the schema of a system catalog at runtime, the only likely scenario in which this would cause a problem in the field involves a "vacuum full" on a catalog concurrently with other activity, and even then it's far from easy to provoke. Remarkably, this has been broken since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had never seen a reproducible test case until recently. If it did happen in the field, the symptoms would probably involve unexpected "cache lookup failed" errors to begin with, then "could not open file" failures after the next checkpoint, as all accesses to the affected catalog stopped working. Recovery would require manually removing the stale "pg_internal.init" file. To fix, get rid of the initFileRelationIds list, and instead consult syscache.c's list of relations used in catalog caches to decide whether a relation is included in the init file. This should be a tad more efficient anyway, since we're replacing linear search of a list with ~100 entries with a binary search. It's a bit ugly that the init file contents are now so directly tied to the catalog caches, but in practice that won't make much difference. Back-patch to all supported branches. --- src/backend/utils/cache/inval.c | 7 ++- src/backend/utils/cache/relcache.c | 57 +++++++---------------- src/backend/utils/cache/syscache.c | 75 ++++++++++++++++++++++++++++-- src/include/utils/relcache.h | 1 - src/include/utils/syscache.h | 2 + 5 files changed, 95 insertions(+), 47 deletions(-) diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 396cc0bea6..59851ef02c 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -455,10 +455,13 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) (void) GetCurrentCommandId(true); /* - * If the relation being invalidated is one of those cached in the + * If the relation being invalidated is one of those cached in the local * relcache init file, mark that we need to zap that file at commit. + * (Note: perhaps it would be better if this code were a bit more + * decoupled from the knowledge that the init file contains exactly those + * non-shared rels used in catalog caches.) */ - if (RelationIdIsInInitFile(relId)) + if (OidIsValid(dbId) && RelationSupportsSysCache(relId)) transInvalInfo->RelcacheInitFileInval = true; } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 7aaa3fb802..5d748914f9 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -130,14 +130,6 @@ bool criticalSharedRelcachesBuilt = false; */ static long relcacheInvalsReceived = 0L; -/* - * This list remembers the OIDs of the non-shared relations cached in the - * database's local relcache init file. Note that there is no corresponding - * list for the shared relcache init file, for reasons explained in the - * comments for RelationCacheInitFileRemove. - */ -static List *initFileRelationIds = NIL; - /* * This flag lets us optimize away work in AtEO(Sub)Xact_RelationCache(). */ @@ -3083,9 +3075,6 @@ RelationCacheInitializePhase3(void) */ InitCatalogCachePhase2(); - /* reset initFileRelationIds list; we'll fill it during write */ - initFileRelationIds = NIL; - /* now write the files */ write_relcache_init_file(true); write_relcache_init_file(false); @@ -4248,10 +4237,6 @@ load_relcache_init_file(bool shared) for (relno = 0; relno < num_rels; relno++) { RelationCacheInsert(rels[relno]); - /* also make a list of their OIDs, for RelationIdIsInInitFile */ - if (!shared) - initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]), - initFileRelationIds); } pfree(rels); @@ -4288,9 +4273,15 @@ write_relcache_init_file(bool shared) int magic; HASH_SEQ_STATUS status; RelIdCacheEnt *idhentry; - MemoryContext oldcxt; int i; + /* + * If we have already received any relcache inval events, there's no + * chance of succeeding so we may as well skip the whole thing. + */ + if (relcacheInvalsReceived != 0L) + return; + /* * We must write a temporary file and rename it into place. Otherwise, * another backend starting at about the same time might crash trying to @@ -4350,6 +4341,16 @@ write_relcache_init_file(bool shared) if (relform->relisshared != shared) continue; + /* + * Ignore if not supposed to be in init file. We can allow any shared + * relation that's been loaded so far to be in the shared init file, + * but unshared relations must be used for catalog caches. (Note: if + * you want to change the criterion for rels to be kept in the init + * file, see also inval.c.) + */ + if (!shared && !RelationSupportsSysCache(RelationGetRelid(rel))) + continue; + /* first write the relcache entry proper */ write_item(rel, sizeof(RelationData), fp); @@ -4406,15 +4407,6 @@ write_relcache_init_file(bool shared) relform->relnatts * sizeof(int16), fp); } - - /* also make a list of their OIDs, for RelationIdIsInInitFile */ - if (!shared) - { - oldcxt = MemoryContextSwitchTo(CacheMemoryContext); - initFileRelationIds = lcons_oid(RelationGetRelid(rel), - initFileRelationIds); - MemoryContextSwitchTo(oldcxt); - } } if (FreeFile(fp)) @@ -4473,21 +4465,6 @@ write_item(const void *data, Size len, FILE *fp) elog(FATAL, "could not write init file"); } -/* - * Detect whether a given relation (identified by OID) is one of the ones - * we store in the local relcache init file. - * - * Note that we effectively assume that all backends running in a database - * would choose to store the same set of relations in the init file; - * otherwise there are cases where we'd fail to detect the need for an init - * file invalidation. This does not seem likely to be a problem in practice. - */ -bool -RelationIdIsInInitFile(Oid relationId) -{ - return list_member_oid(initFileRelationIds, relationId); -} - /* * Invalidate (remove) the init file during commit of a transaction that * changed one or more of the relation cache entries that are kept in the diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 55d852e95e..f82d49af0e 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -754,11 +754,18 @@ static const struct cachedesc cacheinfo[] = { } }; -static CatCache *SysCache[ - lengthof(cacheinfo)]; -static int SysCacheSize = lengthof(cacheinfo); +#define SysCacheSize ((int) lengthof(cacheinfo)) + +static CatCache *SysCache[SysCacheSize]; + static bool CacheInitialized = false; +/* Sorted array of OIDs of tables and indexes used by caches */ +static Oid SysCacheSupportingRelOid[SysCacheSize * 2]; +static int SysCacheSupportingRelOidSize; + +static int oid_compare(const void *a, const void *b); + /* * InitCatalogCache - initialize the caches @@ -772,10 +779,12 @@ void InitCatalogCache(void) { int cacheId; + int i, + j; Assert(!CacheInitialized); - MemSet(SysCache, 0, sizeof(SysCache)); + SysCacheSupportingRelOidSize = 0; for (cacheId = 0; cacheId < SysCacheSize; cacheId++) { @@ -788,7 +797,25 @@ InitCatalogCache(void) if (!PointerIsValid(SysCache[cacheId])) elog(ERROR, "could not initialize cache %u (%d)", cacheinfo[cacheId].reloid, cacheId); + /* Accumulate data for OID lists, too */ + SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] = + cacheinfo[cacheId].reloid; + SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] = + cacheinfo[cacheId].indoid; + } + + Assert(SysCacheSupportingRelOidSize <= lengthof(SysCacheSupportingRelOid)); + + /* Sort and de-dup OID arrays, so we can use binary search. */ + pg_qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize, + sizeof(Oid), oid_compare); + for (i = 1, j = 0; i < SysCacheSupportingRelOidSize; i++) + { + if (SysCacheSupportingRelOid[i] != SysCacheSupportingRelOid[j]) + SysCacheSupportingRelOid[++j] = SysCacheSupportingRelOid[i]; } + SysCacheSupportingRelOidSize = j + 1; + CacheInitialized = true; } @@ -1052,3 +1079,43 @@ SearchSysCacheList(int cacheId, int nkeys, return SearchCatCacheList(SysCache[cacheId], nkeys, key1, key2, key3, key4); } + +/* + * Test whether a relation supports a system cache, ie it is either a + * cached table or the index used for a cache. + */ +bool +RelationSupportsSysCache(Oid relid) +{ + int low = 0, + high = SysCacheSupportingRelOidSize - 1; + + while (low <= high) + { + int middle = low + (high - low) / 2; + + if (SysCacheSupportingRelOid[middle] == relid) + return true; + if (SysCacheSupportingRelOid[middle] < relid) + low = middle + 1; + else + high = middle - 1; + } + + return false; +} + + +/* + * OID comparator for pg_qsort + */ +static int +oid_compare(const void *a, const void *b) +{ + Oid oa = *((const Oid *) a); + Oid ob = *((const Oid *) b); + + if (oa == ob) + return 0; + return (oa > ob) ? 1 : -1; +} diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 9e2ea8ce06..e9bd87d5c5 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -96,7 +96,6 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid, /* * Routines to help manage rebuilding of relcache init files */ -extern bool RelationIdIsInInitFile(Oid relationId); extern void RelationCacheInitFilePreInvalidate(void); extern void RelationCacheInitFilePostInvalidate(void); extern void RelationCacheInitFileRemove(void); diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index 55d22303a7..804c04c096 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -116,6 +116,8 @@ extern Datum SysCacheGetAttr(int cacheId, HeapTuple tup, extern struct catclist *SearchSysCacheList(int cacheId, int nkeys, Datum key1, Datum key2, Datum key3, Datum key4); +extern bool RelationSupportsSysCache(Oid relid); + /* * The use of the macros below rather than direct calls to the corresponding * functions is encouraged, as it insulates the caller from changes in the -- 2.40.0