]> granicus.if.org Git - postgresql/commitdiff
Add infrastructure for mapping relfilenodes to relation OIDs.
authorRobert Haas <rhaas@postgresql.org>
Mon, 22 Jul 2013 14:34:34 +0000 (10:34 -0400)
committerRobert Haas <rhaas@postgresql.org>
Mon, 22 Jul 2013 15:09:10 +0000 (11:09 -0400)
Future patches are expected to introduce logical replication that
works by decoding WAL.  WAL contains relfilenodes rather than relation
OIDs, so this infrastructure will be needed to find the relation OID
based on WAL contents.

If logical replication does not make it into this release, we probably
should consider reverting this, since it will add some overhead to DDL
operations that create new relations.  One additional index insert per
pg_class row is not a large overhead, but it's more than zero.
Another way of meeting the needs of logical replication would be to
the relation OID to WAL, but that would burden DML operations, not
only DDL.

Andres Freund, with some changes by me.  Design review, in earlier
versions, by Álvaro Herrera.

14 files changed:
doc/src/sgml/func.sgml
src/backend/utils/adt/dbsize.c
src/backend/utils/cache/Makefile
src/backend/utils/cache/inval.c
src/backend/utils/cache/relfilenodemap.c [new file with mode: 0644]
src/backend/utils/cache/relmapper.c
src/include/catalog/catversion.h
src/include/catalog/indexing.h
src/include/catalog/pg_proc.h
src/include/utils/builtins.h
src/include/utils/relfilenodemap.h [new file with mode: 0644]
src/include/utils/relmapper.h
src/test/regress/expected/alter_table.out
src/test/regress/sql/alter_table.sql

index 707d30349563635e1083dbb5214b1edb755eb2d6..da63b347e58600864c28ad3a9db1a8455b8c9a19 100644 (file)
@@ -15748,6 +15748,9 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
    <indexterm>
     <primary>pg_relation_filepath</primary>
    </indexterm>
+   <indexterm>
+    <primary>pg_filenode_relation</primary>
+   </indexterm>
 
    <table id="functions-admin-dblocation">
     <title>Database Object Location Functions</title>
@@ -15776,6 +15779,15 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
         File path name of the specified relation
        </entry>
       </row>
+      <row>
+       <entry>
+        <literal><function>pg_filenode_relation(<parameter>tablespace</parameter> <type>oid</type>, <parameter>filenode</parameter> <type>oid</type>)</function></literal>
+        </entry>
+       <entry><type>regclass</type></entry>
+       <entry>
+        Find the relation associated with a given tablespace and filenode
+       </entry>
+      </row>
      </tbody>
     </tgroup>
    </table>
@@ -15799,6 +15811,13 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
     the relation.
    </para>
 
+   <para>
+    <function>pg_filenode_relation</> is the reverse of
+    <function>pg_relation_filenode</>. Given a <quote>tablespace</> OID and
+    a <quote>filenode</> it returns the associated relation. The default
+    tablespace can be specified as 0.
+   </para>
+
   </sect2>
 
   <sect2 id="functions-admin-genfile">
index 34482abee3e9aea297a891def7e5d0a1f6464858..21d1c946abe9c0959b4a5aec1165e2e88beddeb9 100644 (file)
@@ -28,6 +28,7 @@
 #include "utils/builtins.h"
 #include "utils/numeric.h"
 #include "utils/rel.h"
+#include "utils/relfilenodemap.h"
 #include "utils/relmapper.h"
 #include "utils/syscache.h"
 
@@ -755,6 +756,33 @@ pg_relation_filenode(PG_FUNCTION_ARGS)
        PG_RETURN_OID(result);
 }
 
+/*
+ * Get the relation via (reltablespace, relfilenode)
+ *
+ * This is expected to be used when somebody wants to match an individual file
+ * on the filesystem back to its table. Thats not trivially possible via
+ * pg_class because that doesn't contain the relfilenodes of shared and nailed
+ * tables.
+ *
+ * We don't fail but return NULL if we cannot find a mapping.
+ *
+ * Instead of knowing DEFAULTTABLESPACE_OID you can pass 0.
+ */
+Datum
+pg_filenode_relation(PG_FUNCTION_ARGS)
+{
+       Oid                     reltablespace = PG_GETARG_OID(0);
+       Oid                     relfilenode = PG_GETARG_OID(1);
+       Oid                     heaprel = InvalidOid;
+
+       heaprel = RelidByRelfilenode(reltablespace, relfilenode);
+
+       if (!OidIsValid(heaprel))
+               PG_RETURN_NULL();
+       else
+               PG_RETURN_OID(heaprel);
+}
+
 /*
  * Get the pathname (relative to $PGDATA) of a relation
  *
index 32d722e34f66e118646e696596d3efabcca0ff9b..a943f8ea4bcd4ddb4c642ab1d2716fb620769bcf 100644 (file)
@@ -13,6 +13,7 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = attoptcache.o catcache.o evtcache.o inval.o plancache.o relcache.o \
-       relmapper.o spccache.o syscache.o lsyscache.o typcache.o ts_cache.o
+       relmapper.o relfilenodemap.o spccache.o syscache.o lsyscache.o \
+       typcache.o ts_cache.o
 
 include $(top_srcdir)/src/backend/common.mk
index 3356d0fe1e28ac65e0890f0471d13f1920d0f531..bfe7d787b7b4ce8d45689bf8c71efe3bdf7d8f0d 100644 (file)
@@ -178,7 +178,7 @@ static int  maxSharedInvalidMessagesArray;
  */
 
 #define MAX_SYSCACHE_CALLBACKS 32
-#define MAX_RELCACHE_CALLBACKS 5
+#define MAX_RELCACHE_CALLBACKS 10
 
 static struct SYSCACHECALLBACK
 {
diff --git a/src/backend/utils/cache/relfilenodemap.c b/src/backend/utils/cache/relfilenodemap.c
new file mode 100644 (file)
index 0000000..372cb33
--- /dev/null
@@ -0,0 +1,247 @@
+/*-------------------------------------------------------------------------
+ *
+ * relfilenodemap.c
+ *       relfilenode to oid mapping cache.
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/backend/utils/cache/relfilenode.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_tablespace.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/hsearch.h"
+#include "utils/inval.h"
+#include "utils/fmgroids.h"
+#include "utils/rel.h"
+#include "utils/relfilenodemap.h"
+#include "utils/relmapper.h"
+
+/* Hash table for informations about each relfilenode <-> oid pair */
+static HTAB *RelfilenodeMapHash = NULL;
+
+/* built first time through in InitializeRelfilenodeMap */
+ScanKeyData relfilenode_skey[2];
+
+typedef struct
+{
+       Oid                     reltablespace;
+       Oid                     relfilenode;
+} RelfilenodeMapKey;
+
+typedef struct
+{
+       RelfilenodeMapKey key;  /* lookup key - must be first */
+       Oid                     relid;                  /* pg_class.oid */
+} RelfilenodeMapEntry;
+
+/*
+ * RelfilenodeMapInvalidateCallback
+ *             Flush mapping entries when pg_class is updated in a relevant fashion.
+ */
+static void
+RelfilenodeMapInvalidateCallback(Datum arg, Oid relid)
+{
+       HASH_SEQ_STATUS status;
+       RelfilenodeMapEntry *entry;
+
+       /* nothing to do if not active or deleted */
+       if (RelfilenodeMapHash == NULL)
+               return;
+
+       /* if relid is InvalidOid, we must invalidate the entire cache */
+       if (relid == InvalidOid)
+       {
+               hash_destroy(RelfilenodeMapHash);
+               RelfilenodeMapHash = NULL;
+               return;
+       }
+
+       hash_seq_init(&status, RelfilenodeMapHash);
+       while ((entry = (RelfilenodeMapEntry *) hash_seq_search(&status)) != NULL)
+       {
+               /* Same OID may occur in more than one tablespace. */
+               if (entry->relid == relid)
+               {
+                       if (hash_search(RelfilenodeMapHash,
+                                                       (void *) &entry->key,
+                                                       HASH_REMOVE,
+                                                       NULL) == NULL)
+                               elog(ERROR, "hash table corrupted");
+               }
+       }
+}
+
+/*
+ * RelfilenodeMapInvalidateCallback
+ *             Initialize cache, either on first use or after a reset.
+ */
+static void
+InitializeRelfilenodeMap(void)
+{
+       HASHCTL         ctl;
+       static bool     initial_init_done = false;
+       int i;
+
+       /* Make sure we've initialized CacheMemoryContext. */
+       if (CacheMemoryContext == NULL)
+               CreateCacheMemoryContext();
+
+       /* Initialize the hash table. */
+       MemSet(&ctl, 0, sizeof(ctl));
+       ctl.keysize = sizeof(RelfilenodeMapKey);
+       ctl.entrysize = sizeof(RelfilenodeMapEntry);
+       ctl.hash = tag_hash;
+       ctl.hcxt = CacheMemoryContext;
+
+       RelfilenodeMapHash =
+               hash_create("RelfilenodeMap cache", 1024, &ctl,
+                                       HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+
+       /*
+        * For complete resets we simply delete the entire hash, but there's no
+        * need to do the other stuff multiple times. Especially the initialization
+        * of the relcche invalidation should only be done once.
+        */
+       if (initial_init_done)
+               return;
+
+       /* build skey */
+       MemSet(&relfilenode_skey, 0, sizeof(relfilenode_skey));
+
+       for (i = 0; i < 2; i++)
+       {
+               fmgr_info_cxt(F_OIDEQ,
+                                         &relfilenode_skey[i].sk_func,
+                                         CacheMemoryContext);
+               relfilenode_skey[i].sk_strategy = BTEqualStrategyNumber;
+               relfilenode_skey[i].sk_subtype = InvalidOid;
+               relfilenode_skey[i].sk_collation = InvalidOid;
+       }
+
+       relfilenode_skey[0].sk_attno = Anum_pg_class_reltablespace;
+       relfilenode_skey[1].sk_attno = Anum_pg_class_relfilenode;
+
+       /* Watch for invalidation events. */
+       CacheRegisterRelcacheCallback(RelfilenodeMapInvalidateCallback,
+                                                                 (Datum) 0);
+       initial_init_done = true;
+}
+
+/*
+ * Map a relation's (tablespace, filenode) to a relation's oid and cache the
+ * result.
+ *
+ * Returns InvalidOid if no relation matching the criteria could be found.
+ */
+Oid
+RelidByRelfilenode(Oid reltablespace, Oid relfilenode)
+{
+       RelfilenodeMapKey key;
+       RelfilenodeMapEntry *entry;
+       bool found;
+       SysScanDesc scandesc;
+       Relation relation;
+       HeapTuple ntp;
+       ScanKeyData skey[2];
+
+       if (RelfilenodeMapHash == NULL)
+               InitializeRelfilenodeMap();
+
+       /* pg_class will show 0 when the value is actually MyDatabaseTableSpace */
+       if (reltablespace == MyDatabaseTableSpace)
+               reltablespace = 0;
+
+       MemSet(&key, 0, sizeof(key));
+       key.reltablespace = reltablespace;
+       key.relfilenode = relfilenode;
+
+       /*
+        * Check cache and enter entry if nothing could be found. Even if no target
+        * relation can be found later on we store the negative match and return a
+        * InvalidOid from cache. That's not really necessary for performance since
+        * querying invalid values isn't supposed to be a frequent thing, but the
+        * implementation is simpler this way.
+        */
+       entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found);
+
+       if (found)
+               return entry->relid;
+
+       /* ok, no previous cache entry, do it the hard way */
+
+       /* check shared tables */
+       if (reltablespace == GLOBALTABLESPACE_OID)
+       {
+               entry->relid = RelationMapFilenodeToOid(relfilenode, true);
+               return entry->relid;
+       }
+
+       /* check plain relations by looking in pg_class */
+       relation = heap_open(RelationRelationId, AccessShareLock);
+
+       /* copy scankey to local copy, it will be modified during the scan */
+       memcpy(skey, relfilenode_skey, sizeof(skey));
+
+       /* set scan arguments */
+       skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
+       skey[1].sk_argument = ObjectIdGetDatum(relfilenode);
+
+       scandesc = systable_beginscan(relation,
+                                                                 ClassTblspcRelfilenodeIndexId,
+                                                                 true,
+                                                                 NULL,
+                                                                 2,
+                                                                 skey);
+
+       found = false;
+
+       while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
+       {
+               bool isnull;
+
+               if (found)
+                       elog(ERROR,
+                                "unexpected duplicate for tablespace %u, relfilenode %u",
+                                reltablespace, relfilenode);
+               found = true;
+
+#ifdef USE_ASSERT_CHECKING
+               if (assert_enabled)
+               {
+                       Oid check;
+                       check = fastgetattr(ntp, Anum_pg_class_reltablespace,
+                                                               RelationGetDescr(relation),
+                                                               &isnull);
+                       Assert(!isnull && check == reltablespace);
+
+                       check = fastgetattr(ntp, Anum_pg_class_relfilenode,
+                                                               RelationGetDescr(relation),
+                                                               &isnull);
+                       Assert(!isnull && check == relfilenode);
+               }
+#endif
+               entry->relid = HeapTupleGetOid(ntp);
+       }
+
+       systable_endscan(scandesc);
+       heap_close(relation, AccessShareLock);
+
+       /* check for tables that are mapped but not shared */
+       if (!found)
+               entry->relid = RelationMapFilenodeToOid(relfilenode, false);
+
+       return entry->relid;
+}
index 2c7d9f3287bf30c94024ae4826394a012f39d47f..18f0342a7de41e4b77564f7f25081a279f921ee4 100644 (file)
@@ -180,6 +180,59 @@ RelationMapOidToFilenode(Oid relationId, bool shared)
        return InvalidOid;
 }
 
+/*
+ * RelationMapFilenodeToOid
+ *
+ * Do the reverse of the normal direction of mapping done in
+ * RelationMapOidToFilenode.
+ *
+ * This is not supposed to be used during normal running but rather for
+ * information purposes when looking at the filesystem or xlog.
+ *
+ * Returns InvalidOid if the OID is not known; this can easily happen if the
+ * relfilenode doesn't pertain to a mapped relation.
+ */
+Oid
+RelationMapFilenodeToOid(Oid filenode, bool shared)
+{
+       const RelMapFile *map;
+       int32           i;
+
+       /* If there are active updates, believe those over the main maps */
+       if (shared)
+       {
+               map = &active_shared_updates;
+               for (i = 0; i < map->num_mappings; i++)
+               {
+                       if (filenode == map->mappings[i].mapfilenode)
+                               return map->mappings[i].mapoid;
+               }
+               map = &shared_map;
+               for (i = 0; i < map->num_mappings; i++)
+               {
+                       if (filenode == map->mappings[i].mapfilenode)
+                               return map->mappings[i].mapoid;
+               }
+       }
+       else
+       {
+               map = &active_local_updates;
+               for (i = 0; i < map->num_mappings; i++)
+               {
+                       if (filenode == map->mappings[i].mapfilenode)
+                               return map->mappings[i].mapoid;
+               }
+               map = &local_map;
+               for (i = 0; i < map->num_mappings; i++)
+               {
+                       if (filenode == map->mappings[i].mapfilenode)
+                               return map->mappings[i].mapoid;
+               }
+       }
+
+       return InvalidOid;
+}
+
 /*
  * RelationMapUpdateMap
  *
index d9404dafc16b2eb5b2dc5f3d9ebc0b15fd99aa75..2e51039c24b1e99751cf36269f0b994cb0441a8f 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201307181
+#define CATALOG_VERSION_NO     201307221
 
 #endif
index 19268fbe64864b7d1ffb28b59ca21446f2654fe7..4860e98ca55f0cf802ee81cfe7034ffa92e2bc2a 100644 (file)
@@ -106,6 +106,8 @@ DECLARE_UNIQUE_INDEX(pg_class_oid_index, 2662, on pg_class using btree(oid oid_o
 #define ClassOidIndexId  2662
 DECLARE_UNIQUE_INDEX(pg_class_relname_nsp_index, 2663, on pg_class using btree(relname name_ops, relnamespace oid_ops));
 #define ClassNameNspIndexId  2663
+DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, on pg_class using btree(reltablespace oid_ops, relfilenode oid_ops));
+#define ClassTblspcRelfilenodeIndexId  3455
 
 DECLARE_UNIQUE_INDEX(pg_collation_name_enc_nsp_index, 3164, on pg_collation using btree(collname name_ops, collencoding int4_ops, collnamespace oid_ops));
 #define CollationNameEncNspIndexId 3164
index 90aff3d0484b9fe460726089461d690cfc91ca48..f03dd0b7da46b87d6327ac309c680448c21b38fd 100644 (file)
@@ -3448,6 +3448,8 @@ DATA(insert OID = 2998 ( pg_indexes_size          PGNSP PGUID 12 1 0 0 0 f f f f t f v 1
 DESCR("disk space usage for all indexes attached to the specified table");
 DATA(insert OID = 2999 ( pg_relation_filenode  PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 26 "2205" _null_ _null_ _null_ _null_ pg_relation_filenode _null_ _null_ _null_ ));
 DESCR("filenode identifier of relation");
+DATA(insert OID = 3454 ( pg_filenode_relation PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 2205 "26 26" _null_ _null_ _null_ _null_ pg_filenode_relation _null_ _null_ _null_ ));
+DESCR("relation OID for filenode and tablespace");
 DATA(insert OID = 3034 ( pg_relation_filepath  PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 25 "2205" _null_ _null_ _null_ _null_ pg_relation_filepath _null_ _null_ _null_ ));
 DESCR("file path of relation");
 
index 667c58b5d0c27490352db362ad3aa5c688b79b13..a5a0561a4a5926589921ab340e7916e899dd8451 100644 (file)
@@ -460,6 +460,7 @@ extern Datum pg_size_pretty_numeric(PG_FUNCTION_ARGS);
 extern Datum pg_table_size(PG_FUNCTION_ARGS);
 extern Datum pg_indexes_size(PG_FUNCTION_ARGS);
 extern Datum pg_relation_filenode(PG_FUNCTION_ARGS);
+extern Datum pg_filenode_relation(PG_FUNCTION_ARGS);
 extern Datum pg_relation_filepath(PG_FUNCTION_ARGS);
 
 /* genfile.c */
diff --git a/src/include/utils/relfilenodemap.h b/src/include/utils/relfilenodemap.h
new file mode 100644 (file)
index 0000000..324ff69
--- /dev/null
@@ -0,0 +1,18 @@
+/*-------------------------------------------------------------------------
+ *
+ * relfilenodemap.h
+ *       relfilenode to oid mapping cache.
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/relfilenodemap.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RELFILENODEMAP_H
+#define RELFILENODEMAP_H
+
+extern Oid RelidByRelfilenode(Oid reltablespace, Oid relfilenode);
+
+#endif   /* RELFILENODEMAP_H */
index 8f0b438a1192f12ce28b86c683bbe67d1254cc63..071bc988a5fa1c85fde927ab9c98e80f32dd59c3 100644 (file)
@@ -36,6 +36,8 @@ typedef struct xl_relmap_update
 
 extern Oid     RelationMapOidToFilenode(Oid relationId, bool shared);
 
+extern Oid     RelationMapFilenodeToOid(Oid relationId, bool shared);
+
 extern void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared,
                                         bool immediate);
 
index 18daf95c668cd68d9c4d373e1c6b3012b99f298d..7cc0084b920a5eef9a62567f62f2a27cc9de598a 100644 (file)
@@ -2305,3 +2305,21 @@ Check constraints:
 
 DROP TABLE alter2.tt8;
 DROP SCHEMA alter2;
+-- Check that we map relation oids to filenodes and back correctly.
+-- Don't display all the mappings so the test output doesn't change
+-- all the time, but make sure we actually do test some values.
+SELECT
+    SUM((mapped_oid != oid OR mapped_oid IS NULL)::int) incorrectly_mapped,
+    count(*) > 200 have_mappings
+FROM (
+    SELECT
+        oid, reltablespace, relfilenode, relname,
+        pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) mapped_oid
+    FROM pg_class
+    WHERE relkind IN ('r', 'i', 'S', 't', 'm')
+    ) mapped;
+ incorrectly_mapped | have_mappings 
+--------------------+---------------
+                  0 | t
+(1 row)
+
index dcf8121d70c1dd7ac2db2ffaf58ce20bca2ec742..a546ba74af36f6742b7aa4a5125cbda3407fa6d4 100644 (file)
@@ -1544,3 +1544,17 @@ ALTER TABLE IF EXISTS tt8 SET SCHEMA alter2;
 
 DROP TABLE alter2.tt8;
 DROP SCHEMA alter2;
+
+-- Check that we map relation oids to filenodes and back correctly.
+-- Don't display all the mappings so the test output doesn't change
+-- all the time, but make sure we actually do test some values.
+SELECT
+    SUM((mapped_oid != oid OR mapped_oid IS NULL)::int) incorrectly_mapped,
+    count(*) > 200 have_mappings
+FROM (
+    SELECT
+        oid, reltablespace, relfilenode, relname,
+        pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) mapped_oid
+    FROM pg_class
+    WHERE relkind IN ('r', 'i', 'S', 't', 'm')
+    ) mapped;