Fix bugs in vacuum of shared rels, by keeping their relcache entries current.

author Andres Freund <andres@anarazel.de>

Tue, 12 Jun 2018 18:13:21 +0000 (11:13 -0700)

committer Andres Freund <andres@anarazel.de>

Tue, 12 Jun 2018 18:13:21 +0000 (11:13 -0700)
author Andres Freund <andres@anarazel.de>
Tue, 12 Jun 2018 18:13:21 +0000 (11:13 -0700)
committer Andres Freund <andres@anarazel.de>
Tue, 12 Jun 2018 18:13:21 +0000 (11:13 -0700)
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c

index d0e54b85352bf1fed3eac57f447fb12eb9527260..2226b325720e57503c9ab295a0e5f19f12ceec80 100644 (file)
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -521,12 +521,12 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
         (void) GetCurrentCommandId(true);
  
         /*
-        * If the relation being invalidated is one of those cached in the local
-        * relcache init file, mark that we need to zap that file at commit. Same
-        * is true when we are invalidating whole relcache.
+        * If the relation being invalidated is one of those cached in a relcache
+        * init file, mark that we need to zap that file at commit. For simplicity
+        * invalidations for a specific database always invalidate the shared file
+        * as well.  Also zap when we are invalidating whole relcache.
          */
-       if (OidIsValid(dbId) &&
-               (RelationIdIsInInitFile(relId) || relId == InvalidOid))
+       if (relId == InvalidOid || RelationIdIsInInitFile(relId))
                 transInvalInfo->RelcacheInitFileInval = true;
  }
  
@@ -881,18 +881,26 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
  
         if (RelcacheInitFileInval)
         {
+               elog(trace_recovery(DEBUG4), "removing relcache init files for database %u",
+                        dbid);
+
                 /*
-                * RelationCacheInitFilePreInvalidate requires DatabasePath to be set,
-                * but we should not use SetDatabasePath during recovery, since it is
+                * RelationCacheInitFilePreInvalidate, when the invalidation message
+                * is for a specific database, requires DatabasePath to be set, but we
+                * should not use SetDatabasePath during recovery, since it is
                  * intended to be used only once by normal backends.  Hence, a quick
                  * hack: set DatabasePath directly then unset after use.
                  */
-               DatabasePath = GetDatabasePath(dbid, tsid);
-               elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"",
-                        DatabasePath);
+               if (OidIsValid(dbid))
+                       DatabasePath = GetDatabasePath(dbid, tsid);
+
                 RelationCacheInitFilePreInvalidate();
-               pfree(DatabasePath);
-               DatabasePath = NULL;
+
+               if (OidIsValid(dbid))
+               {
+                       pfree(DatabasePath);
+                       DatabasePath = NULL;
+               }
         }
  
         SendSharedInvalidMessages(msgs, nmsgs);
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c

index c48ec20eee32e320642c9929159ac9b24544a2f0..d58237f5a1561c3635cd7a76e3717cbc5ef722e2 100644 (file)
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -248,6 +248,7 @@ static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
  static void RelationClearRelation(Relation relation, bool rebuild);
  
  static void RelationReloadIndexInfo(Relation relation);
+static void RelationReloadNailed(Relation relation);
  static void RelationFlushRelation(Relation relation);
  static void RememberToFreeTupleDescAtEOX(TupleDesc td);
  static void AtEOXact_cleanup(Relation relation, bool isCommit);
@@ -285,7 +286,7 @@ static void IndexSupportInitialize(oidvector *indclass,
  static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
                                   StrategyNumber numSupport);
  static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
-static void unlink_initfile(const char *initfilename);
+static void unlink_initfile(const char *initfilename, int elevel);
  static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
                                         PartitionDesc partdesc2);
  
@@ -2052,7 +2053,16 @@ RelationIdGetRelation(Oid relationId)
                                 RelationReloadIndexInfo(rd);
                         else
                                 RelationClearRelation(rd, true);
-                       Assert(rd->rd_isvalid);
+
+                       /*
+                        * Normally entries need to be valid here, but before the relcache
+                        * has been initialized, not enough infrastructure exists to
+                        * perform pg_class lookups. The structure of such entries doesn't
+                        * change, but we still want to update the rd_rel entry. So
+                        * rd_isvalid = false is left in place for a later lookup.
+                        */
+                       Assert(rd->rd_isvalid ||
+                                  (rd->rd_isnailed && !criticalRelcachesBuilt));
                 }
                 return rd;
         }
@@ -2255,6 +2265,81 @@ RelationReloadIndexInfo(Relation relation)
         relation->rd_isvalid = true;
  }
  
+/*
+ * RelationReloadNailed - reload minimal information for nailed relations.
+ *
+ * The structure of a nailed relation can never change (which is good, because
+ * we rely on knowing their structure to be able to read catalog content). But
+ * some parts, e.g. pg_class.relfrozenxid, are still important to have
+ * accurate content for. Therefore those need to be reloaded after the arrival
+ * of invalidations.
+ */
+static void
+RelationReloadNailed(Relation relation)
+{
+       Assert(relation->rd_isnailed);
+
+       /*
+        * Redo RelationInitPhysicalAddr in case it is a mapped relation whose
+        * mapping changed.
+        */
+       RelationInitPhysicalAddr(relation);
+
+       /* flag as needing to be revalidated */
+       relation->rd_isvalid = false;
+
+       /*
+        * Can only reread catalog contents if in a transaction.  If the relation
+        * is currently open (not counting the nailed refcount), do so
+        * immediately. Otherwise we've already marked the entry as possibly
+        * invalid, and it'll be fixed when next opened.
+        */
+       if (!IsTransactionState() || relation->rd_refcnt <= 1)
+               return;
+
+       if (relation->rd_rel->relkind == RELKIND_INDEX)
+       {
+               /*
+                * If it's a nailed-but-not-mapped index, then we need to re-read the
+                * pg_class row to see if its relfilenode changed.
+                */
+               RelationReloadIndexInfo(relation);
+       }
+       else
+       {
+               /*
+                * Reload a non-index entry.  We can't easily do so if relcaches
+                * aren't yet built, but that's fine because at that stage the
+                * attributes that need to be current (like relfrozenxid) aren't yet
+                * accessed.  To ensure the entry will later be revalidated, we leave
+                * it in invalid state, but allow use (cf. RelationIdGetRelation()).
+                */
+               if (criticalRelcachesBuilt)
+               {
+                       HeapTuple       pg_class_tuple;
+                       Form_pg_class relp;
+
+                       /*
+                        * NB: Mark the entry as valid before starting to scan, to avoid
+                        * self-recursion when re-building pg_class.
+                        */
+                       relation->rd_isvalid = true;
+
+                       pg_class_tuple = ScanPgRelation(RelationGetRelid(relation),
+                                                                                       true, false);
+                       relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
+                       memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
+                       heap_freetuple(pg_class_tuple);
+
+                       /*
+                        * Again mark as valid, to protect against concurrently arriving
+                        * invalidations.
+                        */
+                       relation->rd_isvalid = true;
+               }
+       }
+}
+
  /*
   * RelationDestroyRelation
   *
@@ -2368,26 +2453,12 @@ RelationClearRelation(Relation relation, bool rebuild)
         RelationCloseSmgr(relation);
  
         /*
-        * Never, never ever blow away a nailed-in system relation, because we'd
-        * be unable to recover.  However, we must redo RelationInitPhysicalAddr
-        * in case it is a mapped relation whose mapping changed.
-        *
-        * If it's a nailed-but-not-mapped index, then we need to re-read the
-        * pg_class row to see if its relfilenode changed. We do that immediately
-        * if we're inside a valid transaction and the relation is open (not
-        * counting the nailed refcount).  Otherwise just mark the entry as
-        * possibly invalid, and it'll be fixed when next opened.
+        * Treat nailed-in system relations separately, they always need to be
+        * accessible, so we can't blow them away.
          */
         if (relation->rd_isnailed)
         {
-               RelationInitPhysicalAddr(relation);
-
-               if (relation->rd_rel->relkind == RELKIND_INDEX)
-               {
-                       relation->rd_isvalid = false;   /* needs to be revalidated */
-                       if (relation->rd_refcnt > 1 && IsTransactionState())
-                               RelationReloadIndexInfo(relation);
-               }
+               RelationReloadNailed(relation);
                 return;
         }
  
@@ -5904,24 +5975,26 @@ write_item(const void *data, Size len, FILE *fp)
  
  /*
   * Determine whether a given relation (identified by OID) is one of the ones
- * we should store in the local relcache init file.
+ * we should store in a relcache init file.
   *
   * We must cache all nailed rels, and for efficiency we should cache every rel
   * that supports a syscache.  The former set is almost but not quite a subset
- * of the latter.  Currently, we must special-case TriggerRelidNameIndexId,
- * which RelationCacheInitializePhase3 chooses to nail for efficiency reasons,
- * but which does not support any syscache.
- *
- * Note: this function is currently never called for shared rels.  If it were,
- * we'd probably also need a special case for DatabaseNameIndexId, which is
- * critical but does not support a syscache.
+ * of the latter. The special cases are relations where
+ * RelationCacheInitializePhase2/3 chooses to nail for efficiency reasons, but
+ * which do not support any syscache.
   */
  bool
  RelationIdIsInInitFile(Oid relationId)
  {
-       if (relationId == TriggerRelidNameIndexId)
+       if (relationId == SharedSecLabelRelationId ||
+               relationId == TriggerRelidNameIndexId ||
+               relationId == DatabaseNameIndexId ||
+               relationId == SharedSecLabelObjectIndexId)
         {
-               /* If this Assert fails, we don't need this special case anymore. */
+               /*
+                * If this Assert fails, we don't need the applicable special case
+                * anymore.
+                */
                 Assert(!RelationSupportsSysCache(relationId));
                 return true;
         }
@@ -5991,38 +6064,30 @@ RelationHasUnloggedIndex(Relation rel)
   * We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
   * then release the lock in RelationCacheInitFilePostInvalidate.  Caller must
   * send any pending SI messages between those calls.
- *
- * Notice this deals only with the local init file, not the shared init file.
- * The reason is that there can never be a "significant" change to the
- * relcache entry of a shared relation; the most that could happen is
- * updates of noncritical fields such as relpages/reltuples.  So, while
- * it's worth updating the shared init file from time to time, it can never
- * be invalid enough to make it necessary to remove it.
   */
  void
  RelationCacheInitFilePreInvalidate(void)
  {
-       char            initfilename[MAXPGPATH];
+       char            localinitfname[MAXPGPATH];
+       char            sharedinitfname[MAXPGPATH];
  
-       snprintf(initfilename, sizeof(initfilename), "%s/%s",
-                        DatabasePath, RELCACHE_INIT_FILENAME);
+       if (DatabasePath)
+               snprintf(localinitfname, sizeof(localinitfname), "%s/%s",
+                                DatabasePath, RELCACHE_INIT_FILENAME);
+       snprintf(sharedinitfname, sizeof(sharedinitfname), "global/%s",
+                        RELCACHE_INIT_FILENAME);
  
         LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
  
-       if (unlink(initfilename) < 0)
-       {
-               /*
-                * The file might not be there if no backend has been started since
-                * the last removal.  But complain about failures other than ENOENT.
-                * Fortunately, it's not too late to abort the transaction if we can't
-                * get rid of the would-be-obsolete init file.
-                */
-               if (errno != ENOENT)
-                       ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not remove cache file \"%s\": %m",
-                                                       initfilename)));
-       }
+       /*
+        * The files might not be there if no backend has been started since the
+        * last removal.  But complain about failures other than ENOENT with
+        * ERROR.  Fortunately, it's not too late to abort the transaction if we
+        * can't get rid of the would-be-obsolete init file.
+        */
+       if (DatabasePath)
+               unlink_initfile(localinitfname, ERROR);
+       unlink_initfile(sharedinitfname, ERROR);
  }
  
  void
@@ -6048,13 +6113,9 @@ RelationCacheInitFileRemove(void)
         struct dirent *de;
         char            path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)];
  
-       /*
-        * We zap the shared cache file too.  In theory it can't get out of sync
-        * enough to be a problem, but in data-corruption cases, who knows ...
-        */
         snprintf(path, sizeof(path), "global/%s",
                          RELCACHE_INIT_FILENAME);
-       unlink_initfile(path);
+       unlink_initfile(path, LOG);
  
         /* Scan everything in the default tablespace */
         RelationCacheInitFileRemoveInDir("base");
@@ -6106,7 +6167,7 @@ RelationCacheInitFileRemoveInDir(const char *tblspcpath)
                         /* Try to remove the init file in each database */
                         snprintf(initfilename, sizeof(initfilename), "%s/%s/%s",
                                          tblspcpath, de->d_name, RELCACHE_INIT_FILENAME);
-                       unlink_initfile(initfilename);
+                       unlink_initfile(initfilename, LOG);
                 }
         }
  
@@ -6114,12 +6175,15 @@ RelationCacheInitFileRemoveInDir(const char *tblspcpath)
  }
  
  static void
-unlink_initfile(const char *initfilename)
+unlink_initfile(const char *initfilename, int elevel)
  {
         if (unlink(initfilename) < 0)
         {
                 /* It might not be there, but log any error other than ENOENT */
                 if (errno != ENOENT)
-                       elog(LOG, "could not remove cache file \"%s\": %m", initfilename);
+                       ereport(elevel,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not remove cache file \"%s\": %m",
+                                                       initfilename)));
         }
  }
diff --git a/src/include/storage/standbydefs.h b/src/include/storage/standbydefs.h

index a0af6788e96beef942e653adad046e1cd9c09667..ff2ac05de87f627222449c0efc69221449ab4dc7 100644 (file)
--- a/src/include/storage/standbydefs.h
+++ b/src/include/storage/standbydefs.h
@@ -64,7 +64,7 @@ typedef struct xl_invalidations
  {
         Oid                     dbId;                   /* MyDatabaseId */
         Oid                     tsId;                   /* MyDatabaseTableSpace */
-       bool            relcacheInitFileInval;  /* invalidate relcache init file */
+       bool            relcacheInitFileInval;  /* invalidate relcache init files */
         int                     nmsgs;                  /* number of shared inval msgs */
         SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER];
  } xl_invalidations;
author	Andres Freund <andres@anarazel.de>
	Tue, 12 Jun 2018 18:13:21 +0000 (11:13 -0700)
committer	Andres Freund <andres@anarazel.de>
	Tue, 12 Jun 2018 18:13:21 +0000 (11:13 -0700)
src/backend/utils/cache/inval.c		patch \| blob \| history
src/backend/utils/cache/relcache.c		patch \| blob \| history
src/include/storage/standbydefs.h		patch \| blob \| history