From 45476031a54c12bebf2f209ea2b6fb3bfc905193 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 16 Aug 2011 13:12:03 -0400 Subject: [PATCH] Fix race condition in relcache init file invalidation. The previous code tried to synchronize by unlinking the init file twice, but that doesn't actually work: it leaves a window wherein a third process could read the already-stale init file but miss the SI messages that would tell it the data is stale. The result would be bizarre failures in catalog accesses, typically "could not read block 0 in file ..." later during startup. Instead, hold RelCacheInitLock across both the unlink and the sending of the SI messages. This is more straightforward, and might even be a bit faster since only one unlink call is needed. This has been wrong since it was put in (in 2002!), so back-patch to all supported releases. --- src/backend/access/transam/twophase.c | 4 +- src/backend/utils/cache/inval.c | 33 +++++++------- src/backend/utils/cache/relcache.c | 66 +++++++++++++++------------ src/include/utils/relcache.h | 3 +- 4 files changed, 57 insertions(+), 49 deletions(-) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 281268120e..54176ee9df 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1356,10 +1356,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * after we send the SI messages. See AtEOXact_Inval() */ if (hdr->initfileinval) - RelationCacheInitFileInvalidate(true); + RelationCacheInitFilePreInvalidate(); SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs); if (hdr->initfileinval) - RelationCacheInitFileInvalidate(false); + RelationCacheInitFilePostInvalidate(); /* And now do the callbacks */ if (isCommit) diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index a5580bd92f..4249bd3376 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -854,24 +854,12 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, return numSharedInvalidMessagesArray; } -#define RecoveryRelationCacheInitFileInvalidate(dbo, tbo, tf) \ -{ \ - DatabasePath = GetDatabasePath(dbo, tbo); \ - elog(trace_recovery(DEBUG4), "removing relcache init file in %s", DatabasePath); \ - RelationCacheInitFileInvalidate(tf); \ - pfree(DatabasePath); \ -} - /* * ProcessCommittedInvalidationMessages is executed by xact_redo_commit() * to process invalidation messages added to commit records. * * Relcache init file invalidation requires processing both * before and after we send the SI messages. See AtEOXact_Inval() - * - * We deliberately avoid SetDatabasePath() since it is intended to be used - * only once by normal backends, so we set DatabasePath directly then - * pfree after use. See RecoveryRelationCacheInitFileInvalidate() macro. */ void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, @@ -885,12 +873,25 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, (RelcacheInitFileInval ? " and relcache file invalidation" : "")); if (RelcacheInitFileInval) - RecoveryRelationCacheInitFileInvalidate(dbid, tsid, true); + { + /* + * RelationCacheInitFilePreInvalidate requires DatabasePath to be set, + * but we should not use SetDatabasePath during recovery, since it is + * intended to be used only once by normal backends. Hence, a quick + * hack: set DatabasePath directly then unset after use. + */ + DatabasePath = GetDatabasePath(dbid, tsid); + elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"", + DatabasePath); + RelationCacheInitFilePreInvalidate(); + pfree(DatabasePath); + DatabasePath = NULL; + } SendSharedInvalidMessages(msgs, nmsgs); if (RelcacheInitFileInval) - RecoveryRelationCacheInitFileInvalidate(dbid, tsid, false); + RelationCacheInitFilePostInvalidate(); } /* @@ -931,7 +932,7 @@ AtEOXact_Inval(bool isCommit) * unless we committed. */ if (transInvalInfo->RelcacheInitFileInval) - RelationCacheInitFileInvalidate(true); + RelationCacheInitFilePreInvalidate(); AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, &transInvalInfo->CurrentCmdInvalidMsgs); @@ -940,7 +941,7 @@ AtEOXact_Inval(bool isCommit) SendSharedInvalidMessages); if (transInvalInfo->RelcacheInitFileInval) - RelationCacheInitFileInvalidate(false); + RelationCacheInitFilePostInvalidate(); } else if (transInvalInfo != NULL) { diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 1a400a0a57..bc52c56021 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -4377,8 +4377,8 @@ write_relcache_init_file(bool shared) * updated by SI message processing, but we can't be sure whether what we * wrote out was up-to-date.) * - * This mustn't run concurrently with RelationCacheInitFileInvalidate, so - * grab a serialization lock for the duration. + * This mustn't run concurrently with the code that unlinks an init file + * and sends SI messages, so grab a serialization lock for the duration. */ LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); @@ -4442,19 +4442,22 @@ RelationIdIsInInitFile(Oid relationId) * changed one or more of the relation cache entries that are kept in the * local init file. * - * We actually need to remove the init file twice: once just before sending - * the SI messages that include relcache inval for such relations, and once - * just after sending them. The unlink before ensures that a backend that's - * currently starting cannot read the now-obsolete init file and then miss - * the SI messages that will force it to update its relcache entries. (This - * works because the backend startup sequence gets into the PGPROC array before - * trying to load the init file.) The unlink after is to synchronize with a - * backend that may currently be trying to write an init file based on data - * that we've just rendered invalid. Such a backend will see the SI messages, - * but we can't leave the init file sitting around to fool later backends. + * To be safe against concurrent inspection or rewriting of the init file, + * we must take RelCacheInitLock, then remove the old init file, then send + * the SI messages that include relcache inval for such relations, and then + * release RelCacheInitLock. This serializes the whole affair against + * write_relcache_init_file, so that we can be sure that any other process + * that's concurrently trying to create a new init file won't move an + * already-stale version into place after we unlink. Also, because we unlink + * before sending the SI messages, a backend that's currently starting cannot + * read the now-obsolete init file and then miss the SI messages that will + * force it to update its relcache entries. (This works because the backend + * startup sequence gets into the sinval array before trying to load the init + * file.) * - * Ignore any failure to unlink the file, since it might not be there if - * no backend has been started since the last removal. + * We take the lock and do the unlink in RelationCacheInitFilePreInvalidate, + * then release the lock in RelationCacheInitFilePostInvalidate. Caller must + * send any pending SI messages between those calls. * * Notice this deals only with the local init file, not the shared init file. * The reason is that there can never be a "significant" change to the @@ -4464,34 +4467,37 @@ RelationIdIsInInitFile(Oid relationId) * be invalid enough to make it necessary to remove it. */ void -RelationCacheInitFileInvalidate(bool beforeSend) +RelationCacheInitFilePreInvalidate(void) { char initfilename[MAXPGPATH]; snprintf(initfilename, sizeof(initfilename), "%s/%s", DatabasePath, RELCACHE_INIT_FILENAME); - if (beforeSend) - { - /* no interlock needed here */ - unlink(initfilename); - } - else + LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); + + if (unlink(initfilename) < 0) { /* - * We need to interlock this against write_relcache_init_file, to - * guard against possibility that someone renames a new-but- - * already-obsolete init file into place just after we unlink. With - * the interlock, it's certain that write_relcache_init_file will - * notice our SI inval message before renaming into place, or else - * that we will execute second and successfully unlink the file. + * The file might not be there if no backend has been started since + * the last removal. But complain about failures other than ENOENT. + * Fortunately, it's not too late to abort the transaction if we + * can't get rid of the would-be-obsolete init file. */ - LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); - unlink(initfilename); - LWLockRelease(RelCacheInitLock); + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove cache file \"%s\": %m", + initfilename))); } } +void +RelationCacheInitFilePostInvalidate(void) +{ + LWLockRelease(RelCacheInitLock); +} + /* * Remove the init files during postmaster startup. * diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 1f4def5684..5de9f359ef 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -97,7 +97,8 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid, * Routines to help manage rebuilding of relcache init files */ extern bool RelationIdIsInInitFile(Oid relationId); -extern void RelationCacheInitFileInvalidate(bool beforeSend); +extern void RelationCacheInitFilePreInvalidate(void); +extern void RelationCacheInitFilePostInvalidate(void); extern void RelationCacheInitFileRemove(void); /* should be used only by relcache.c and catcache.c */ -- 2.40.0