]> granicus.if.org Git - postgresql/commitdiff
Fix RelationCacheInitializePhase2 (Phase3, in HEAD) to cope with the
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 26 Sep 2009 18:24:49 +0000 (18:24 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 26 Sep 2009 18:24:49 +0000 (18:24 +0000)
possibility of shared-inval messages causing a relcache flush while it tries
to fill in missing data in preloaded relcache entries.  There are actually
two distinct failure modes here:

1. The flush could delete the next-to-be-processed cache entry, causing
the subsequent hash_seq_search calls to go off into the weeds.  This is
the problem reported by Michael Brown, and I believe it also accounts
for bug #5074.  The simplest fix is to restart the hashtable scan after
we've read any new data from the catalogs.  It appears that pre-8.4
branches have not suffered from this failure, because by chance there were
no other catalogs sharing the same hash chains with the catalogs that
RelationCacheInitializePhase2 had work to do for.  However that's obviously
pretty fragile, and it seems possible that derivative versions with
additional system catalogs might be vulnerable, so I'm back-patching this
part of the fix anyway.

2. The flush could delete the *current* cache entry, in which case the
pointer to the newly-loaded data would end up being stored into an
already-deleted Relation struct.  As long as it was still deleted, the only
consequence would be some leaked space in CacheMemoryContext.  But it seems
possible that the Relation struct could already have been recycled, in
which case this represents a hard-to-reproduce clobber of cached data
structures, with unforeseeable consequences.  The fix here is to pin the
entry while we work on it.

In passing, also change RelationCacheInitializePhase2 to Assert that
formrdesc() set up the relation's cached TupleDesc (rd_att) with the
correct type OID and hasoids values.  This is more appropriate than
silently updating the values, because the original tupdesc might already
have been copied into the catcache.  However this part of the patch is
not in HEAD because it fails due to some questionable recent changes in
formrdesc :-(.  That will be cleaned up in a subsequent patch.

src/backend/utils/cache/relcache.c

index dc638b77c2850b16fb5e2c92e5138b634199647c..6b0ff50bb8f3343746ab290661b4efaf9643b3d8 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.290 2009/08/30 17:18:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.291 2009/09/26 18:24:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -144,8 +144,7 @@ do { \
        RelIdCacheEnt *idhentry; bool found; \
        idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
                                                                                   (void *) &(RELATION->rd_id), \
-                                                                                  HASH_ENTER, \
-                                                                                  &found); \
+                                                                                  HASH_ENTER, &found); \
        /* used to give notice if found -- now just keep quiet */ \
        idhentry->reldesc = RELATION; \
 } while(0)
@@ -154,7 +153,8 @@ do { \
 do { \
        RelIdCacheEnt *hentry; \
        hentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
-                                                                                (void *) &(ID), HASH_FIND,NULL); \
+                                                                                (void *) &(ID), \
+                                                                                HASH_FIND, NULL); \
        if (hentry) \
                RELATION = hentry->reldesc; \
        else \
@@ -1412,7 +1412,9 @@ formrdesc(const char *relationName, bool isshared,
         *
         * The data we insert here is pretty incomplete/bogus, but it'll serve to
         * get us launched.  RelationCacheInitializePhase3() will read the real
-        * data from pg_class and replace what we've done here.
+        * data from pg_class and replace what we've done here.  Note in particular
+        * that relowner is left as zero; this cues RelationCacheInitializePhase3
+        * that the real data isn't there yet.
         */
        relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
 
@@ -2687,17 +2689,31 @@ RelationCacheInitializePhase3(void)
         * rows and replace the fake entries with them. Also, if any of the
         * relcache entries have rules or triggers, load that info the hard way
         * since it isn't recorded in the cache file.
+        *
+        * Whenever we access the catalogs to read data, there is a possibility
+        * of a shared-inval cache flush causing relcache entries to be removed.
+        * Since hash_seq_search only guarantees to still work after the *current*
+        * entry is removed, it's unsafe to continue the hashtable scan afterward.
+        * We handle this by restarting the scan from scratch after each access.
+        * This is theoretically O(N^2), but the number of entries that actually
+        * need to be fixed is small enough that it doesn't matter.
         */
        hash_seq_init(&status, RelationIdCache);
 
        while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
        {
                Relation        relation = idhentry->reldesc;
+               bool            restart = false;
+
+               /*
+                * Make sure *this* entry doesn't get flushed while we work with it.
+                */
+               RelationIncrementReferenceCount(relation);
 
                /*
                 * If it's a faked-up entry, read the real pg_class tuple.
                 */
-               if (needNewCacheFile && relation->rd_isnailed)
+               if (relation->rd_rel->relowner == InvalidOid)
                {
                        HeapTuple       htup;
                        Form_pg_class relp;
@@ -2714,7 +2730,6 @@ RelationCacheInitializePhase3(void)
                         * Copy tuple to relation->rd_rel. (See notes in
                         * AllocateRelationDesc())
                         */
-                       Assert(relation->rd_rel != NULL);
                        memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
 
                        /* Update rd_options while we have the tuple */
@@ -2730,15 +2745,47 @@ RelationCacheInitializePhase3(void)
                        relation->rd_att->tdhasoid = relp->relhasoids;
 
                        ReleaseSysCache(htup);
+
+                       /* relowner had better be OK now, else we'll loop forever */
+                       if (relation->rd_rel->relowner == InvalidOid)
+                               elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
+                                        RelationGetRelationName(relation));
+
+                       restart = true;
                }
 
                /*
                 * Fix data that isn't saved in relcache cache file.
+                *
+                * relhasrules or relhastriggers could possibly be wrong or out of
+                * date.  If we don't actually find any rules or triggers, clear the
+                * local copy of the flag so that we don't get into an infinite loop
+                * here.  We don't make any attempt to fix the pg_class entry, though.
                 */
                if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
+               {
                        RelationBuildRuleLock(relation);
+                       if (relation->rd_rules == NULL)
+                               relation->rd_rel->relhasrules = false;
+                       restart = true;
+               }
                if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
+               {
                        RelationBuildTriggers(relation);
+                       if (relation->trigdesc == NULL)
+                               relation->rd_rel->relhastriggers = false;
+                       restart = true;
+               }
+
+               /* Release hold on the relation */
+               RelationDecrementReferenceCount(relation);
+
+               /* Now, restart the hashtable scan if needed */
+               if (restart)
+               {
+                       hash_seq_term(&status);
+                       hash_seq_init(&status, RelationIdCache);
+               }
        }
 
        /*