]> granicus.if.org Git - postgresql/commitdiff
Fix relfilenodemap.c's handling of cache invalidations.
authorRobert Haas <rhaas@postgresql.org>
Wed, 13 Nov 2013 15:52:59 +0000 (10:52 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 13 Nov 2013 15:52:59 +0000 (10:52 -0500)
The old code entered a new hash table entry first, then scanned
pg_class to determine what value to fill in, and then populated the
entry.  This fails to work properly if a cache invalidation happens
as a result of opening pg_class.  Repair.

Along the way, get rid of the idea of blowing away the entire hash
table as a method of processing invalidations.  Instead, just delete
all the entries one by one.  This is probably not quite as cheap but
it's simpler, and shouldn't happen often.

Andres Freund

src/backend/utils/cache/relfilenodemap.c

index 904b2cd9e7487314563d8123628b357cdb9ee361..1469bfaba2f2d0580c981f078bd230fb1c6d162e 100644 (file)
@@ -57,23 +57,20 @@ RelfilenodeMapInvalidateCallback(Datum arg, Oid relid)
        HASH_SEQ_STATUS status;
        RelfilenodeMapEntry *entry;
 
-       /* nothing to do if not active or deleted */
-       if (RelfilenodeMapHash == NULL)
-               return;
-
-       /* if relid is InvalidOid, we must invalidate the entire cache */
-       if (relid == InvalidOid)
-       {
-               hash_destroy(RelfilenodeMapHash);
-               RelfilenodeMapHash = NULL;
-               return;
-       }
+       /* callback only gets registered after creating the hash */
+       Assert(RelfilenodeMapHash != NULL);
 
        hash_seq_init(&status, RelfilenodeMapHash);
        while ((entry = (RelfilenodeMapEntry *) hash_seq_search(&status)) != NULL)
        {
-               /* Same OID may occur in more than one tablespace. */
-               if (entry->relid == relid)
+               /*
+                * If relid is InvalidOid, signalling a complete reset, we must remove
+                * all entries, otherwise just remove the specific relation's entry.
+                * Always remove negative cache entries.
+                */
+               if (relid == InvalidOid ||              /* complete reset */
+                       entry->relid == InvalidOid ||           /* negative cache entry */
+                       entry->relid == relid)          /* individual flushed relation */
                {
                        if (hash_search(RelfilenodeMapHash,
                                                        (void *) &entry->key,
@@ -92,32 +89,12 @@ static void
 InitializeRelfilenodeMap(void)
 {
        HASHCTL         ctl;
-       static bool     initial_init_done = false;
-       int i;
+       int                     i;
 
        /* Make sure we've initialized CacheMemoryContext. */
        if (CacheMemoryContext == NULL)
                CreateCacheMemoryContext();
 
-       /* Initialize the hash table. */
-       MemSet(&ctl, 0, sizeof(ctl));
-       ctl.keysize = sizeof(RelfilenodeMapKey);
-       ctl.entrysize = sizeof(RelfilenodeMapEntry);
-       ctl.hash = tag_hash;
-       ctl.hcxt = CacheMemoryContext;
-
-       RelfilenodeMapHash =
-               hash_create("RelfilenodeMap cache", 1024, &ctl,
-                                       HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
-
-       /*
-        * For complete resets we simply delete the entire hash, but there's no
-        * need to do the other stuff multiple times. Especially the initialization
-        * of the relcche invalidation should only be done once.
-        */
-       if (initial_init_done)
-               return;
-
        /* build skey */
        MemSet(&relfilenode_skey, 0, sizeof(relfilenode_skey));
 
@@ -134,10 +111,25 @@ InitializeRelfilenodeMap(void)
        relfilenode_skey[0].sk_attno = Anum_pg_class_reltablespace;
        relfilenode_skey[1].sk_attno = Anum_pg_class_relfilenode;
 
+       /* Initialize the hash table. */
+       MemSet(&ctl, 0, sizeof(ctl));
+       ctl.keysize = sizeof(RelfilenodeMapKey);
+       ctl.entrysize = sizeof(RelfilenodeMapEntry);
+       ctl.hash = tag_hash;
+       ctl.hcxt = CacheMemoryContext;
+
+       /*
+        * Only create the RelfilenodeMapHash now, so we don't end up partially
+        * initialized when fmgr_info_cxt() above ERRORs out with an out of memory
+        * error.
+        */
+       RelfilenodeMapHash =
+               hash_create("RelfilenodeMap cache", 1024, &ctl,
+                                       HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+
        /* Watch for invalidation events. */
        CacheRegisterRelcacheCallback(RelfilenodeMapInvalidateCallback,
                                                                  (Datum) 0);
-       initial_init_done = true;
 }
 
 /*
@@ -156,6 +148,7 @@ RelidByRelfilenode(Oid reltablespace, Oid relfilenode)
        Relation relation;
        HeapTuple ntp;
        ScanKeyData skey[2];
+       Oid                     relid;
 
        if (RelfilenodeMapHash == NULL)
                InitializeRelfilenodeMap();
@@ -169,81 +162,99 @@ RelidByRelfilenode(Oid reltablespace, Oid relfilenode)
        key.relfilenode = relfilenode;
 
        /*
-        * Check cache and enter entry if nothing could be found. Even if no target
+        * Check cache and return entry if one is found. Even if no target
         * relation can be found later on we store the negative match and return a
-        * InvalidOid from cache. That's not really necessary for performance since
-        * querying invalid values isn't supposed to be a frequent thing, but the
-        * implementation is simpler this way.
+        * InvalidOid from cache. That's not really necessary for performance
+        * since querying invalid values isn't supposed to be a frequent thing,
+        * but it's basically free.
         */
-       entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found);
+       entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_FIND, &found);
 
        if (found)
                return entry->relid;
 
-       /* initialize empty/negative cache entry before doing the actual lookup */
-       entry->relid = InvalidOid;
-
        /* ok, no previous cache entry, do it the hard way */
 
-       /* check shared tables */
+       /* initialize empty/negative cache entry before doing the actual lookups */
+       relid = InvalidOid;
+
        if (reltablespace == GLOBALTABLESPACE_OID)
        {
-               entry->relid = RelationMapFilenodeToOid(relfilenode, true);
-               return entry->relid;
+               /*
+                * Ok, shared table, check relmapper.
+                */
+               relid = RelationMapFilenodeToOid(relfilenode, true);
        }
+       else
+       {
+               /*
+                * Not a shared table, could either be a plain relation or a
+                * non-shared, nailed one, like e.g. pg_class.
+                */
 
-       /* check plain relations by looking in pg_class */
-       relation = heap_open(RelationRelationId, AccessShareLock);
+               /* check for plain relations by looking in pg_class */
+               relation = heap_open(RelationRelationId, AccessShareLock);
 
-       /* copy scankey to local copy, it will be modified during the scan */
-       memcpy(skey, relfilenode_skey, sizeof(skey));
+               /* copy scankey to local copy, it will be modified during the scan */
+               memcpy(skey, relfilenode_skey, sizeof(skey));
 
-       /* set scan arguments */
-       skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
-       skey[1].sk_argument = ObjectIdGetDatum(relfilenode);
+               /* set scan arguments */
+               skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
+               skey[1].sk_argument = ObjectIdGetDatum(relfilenode);
 
-       scandesc = systable_beginscan(relation,
-                                                                 ClassTblspcRelfilenodeIndexId,
-                                                                 true,
-                                                                 NULL,
-                                                                 2,
-                                                                 skey);
+               scandesc = systable_beginscan(relation,
+                                                                         ClassTblspcRelfilenodeIndexId,
+                                                                         true,
+                                                                         NULL,
+                                                                         2,
+                                                                         skey);
 
-       found = false;
+               found = false;
 
-       while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
-       {
-               if (found)
-                       elog(ERROR,
-                                "unexpected duplicate for tablespace %u, relfilenode %u",
-                                reltablespace, relfilenode);
-               found = true;
+               while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
+               {
+                       if (found)
+                               elog(ERROR,
+                                        "unexpected duplicate for tablespace %u, relfilenode %u",
+                                        reltablespace, relfilenode);
+                       found = true;
 
 #ifdef USE_ASSERT_CHECKING
-               if (assert_enabled)
-               {
-                       bool isnull;
-                       Oid check;
-                       check = fastgetattr(ntp, Anum_pg_class_reltablespace,
-                                                               RelationGetDescr(relation),
-                                                               &isnull);
-                       Assert(!isnull && check == reltablespace);
-
-                       check = fastgetattr(ntp, Anum_pg_class_relfilenode,
-                                                               RelationGetDescr(relation),
-                                                               &isnull);
-                       Assert(!isnull && check == relfilenode);
-               }
+                       if (assert_enabled)
+                       {
+                               bool isnull;
+                               Oid check;
+                               check = fastgetattr(ntp, Anum_pg_class_reltablespace,
+                                                                       RelationGetDescr(relation),
+                                                                       &isnull);
+                               Assert(!isnull && check == reltablespace);
+
+                               check = fastgetattr(ntp, Anum_pg_class_relfilenode,
+                                                                       RelationGetDescr(relation),
+                                                                       &isnull);
+                               Assert(!isnull && check == relfilenode);
+                       }
 #endif
-               entry->relid = HeapTupleGetOid(ntp);
-       }
+                       relid = HeapTupleGetOid(ntp);
+               }
 
-       systable_endscan(scandesc);
-       heap_close(relation, AccessShareLock);
+               systable_endscan(scandesc);
+               heap_close(relation, AccessShareLock);
 
-       /* check for tables that are mapped but not shared */
-       if (!found)
-               entry->relid = RelationMapFilenodeToOid(relfilenode, false);
+               /* check for tables that are mapped but not shared */
+               if (!found)
+                       relid = RelationMapFilenodeToOid(relfilenode, false);
+       }
+
+       /*
+        * Only enter entry into cache now, our opening of pg_class could have
+        * caused cache invalidations to be executed which would have deleted a
+        * new entry if we had entered it above.
+        */
+       entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found);
+       if (found)
+               elog(ERROR, "corrupted hashtable");
+       entry->relid = relid;
 
-       return entry->relid;
+       return relid;
 }