From f01d1ae3a104019d6d68aeff85c4816a275130b3 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Mon, 22 Jul 2013 10:34:34 -0400 Subject: [PATCH] Add infrastructure for mapping relfilenodes to relation OIDs. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Future patches are expected to introduce logical replication that works by decoding WAL. WAL contains relfilenodes rather than relation OIDs, so this infrastructure will be needed to find the relation OID based on WAL contents. If logical replication does not make it into this release, we probably should consider reverting this, since it will add some overhead to DDL operations that create new relations. One additional index insert per pg_class row is not a large overhead, but it's more than zero. Another way of meeting the needs of logical replication would be to the relation OID to WAL, but that would burden DML operations, not only DDL. Andres Freund, with some changes by me. Design review, in earlier versions, by Álvaro Herrera. --- doc/src/sgml/func.sgml | 19 ++ src/backend/utils/adt/dbsize.c | 28 +++ src/backend/utils/cache/Makefile | 3 +- src/backend/utils/cache/inval.c | 2 +- src/backend/utils/cache/relfilenodemap.c | 247 ++++++++++++++++++++++ src/backend/utils/cache/relmapper.c | 53 +++++ src/include/catalog/catversion.h | 2 +- src/include/catalog/indexing.h | 2 + src/include/catalog/pg_proc.h | 2 + src/include/utils/builtins.h | 1 + src/include/utils/relfilenodemap.h | 18 ++ src/include/utils/relmapper.h | 2 + src/test/regress/expected/alter_table.out | 18 ++ src/test/regress/sql/alter_table.sql | 14 ++ 14 files changed, 408 insertions(+), 3 deletions(-) create mode 100644 src/backend/utils/cache/relfilenodemap.c create mode 100644 src/include/utils/relfilenodemap.h diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 707d303495..da63b347e5 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -15748,6 +15748,9 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); pg_relation_filepath + + pg_filenode_relation + Database Object Location Functions @@ -15776,6 +15779,15 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); File path name of the specified relation + + + pg_filenode_relation(tablespace oid, filenode oid) + + regclass + + Find the relation associated with a given tablespace and filenode + +
@@ -15799,6 +15811,13 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); the relation. + + pg_filenode_relation is the reverse of + pg_relation_filenode. Given a tablespace OID and + a filenode it returns the associated relation. The default + tablespace can be specified as 0. + + diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 34482abee3..21d1c946ab 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -28,6 +28,7 @@ #include "utils/builtins.h" #include "utils/numeric.h" #include "utils/rel.h" +#include "utils/relfilenodemap.h" #include "utils/relmapper.h" #include "utils/syscache.h" @@ -755,6 +756,33 @@ pg_relation_filenode(PG_FUNCTION_ARGS) PG_RETURN_OID(result); } +/* + * Get the relation via (reltablespace, relfilenode) + * + * This is expected to be used when somebody wants to match an individual file + * on the filesystem back to its table. Thats not trivially possible via + * pg_class because that doesn't contain the relfilenodes of shared and nailed + * tables. + * + * We don't fail but return NULL if we cannot find a mapping. + * + * Instead of knowing DEFAULTTABLESPACE_OID you can pass 0. + */ +Datum +pg_filenode_relation(PG_FUNCTION_ARGS) +{ + Oid reltablespace = PG_GETARG_OID(0); + Oid relfilenode = PG_GETARG_OID(1); + Oid heaprel = InvalidOid; + + heaprel = RelidByRelfilenode(reltablespace, relfilenode); + + if (!OidIsValid(heaprel)) + PG_RETURN_NULL(); + else + PG_RETURN_OID(heaprel); +} + /* * Get the pathname (relative to $PGDATA) of a relation * diff --git a/src/backend/utils/cache/Makefile b/src/backend/utils/cache/Makefile index 32d722e34f..a943f8ea4b 100644 --- a/src/backend/utils/cache/Makefile +++ b/src/backend/utils/cache/Makefile @@ -13,6 +13,7 @@ top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global OBJS = attoptcache.o catcache.o evtcache.o inval.o plancache.o relcache.o \ - relmapper.o spccache.o syscache.o lsyscache.o typcache.o ts_cache.o + relmapper.o relfilenodemap.o spccache.o syscache.o lsyscache.o \ + typcache.o ts_cache.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 3356d0fe1e..bfe7d787b7 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -178,7 +178,7 @@ static int maxSharedInvalidMessagesArray; */ #define MAX_SYSCACHE_CALLBACKS 32 -#define MAX_RELCACHE_CALLBACKS 5 +#define MAX_RELCACHE_CALLBACKS 10 static struct SYSCACHECALLBACK { diff --git a/src/backend/utils/cache/relfilenodemap.c b/src/backend/utils/cache/relfilenodemap.c new file mode 100644 index 0000000000..372cb33c00 --- /dev/null +++ b/src/backend/utils/cache/relfilenodemap.c @@ -0,0 +1,247 @@ +/*------------------------------------------------------------------------- + * + * relfilenodemap.c + * relfilenode to oid mapping cache. + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/relfilenode.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "catalog/indexing.h" +#include "catalog/pg_class.h" +#include "catalog/pg_tablespace.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/hsearch.h" +#include "utils/inval.h" +#include "utils/fmgroids.h" +#include "utils/rel.h" +#include "utils/relfilenodemap.h" +#include "utils/relmapper.h" + +/* Hash table for informations about each relfilenode <-> oid pair */ +static HTAB *RelfilenodeMapHash = NULL; + +/* built first time through in InitializeRelfilenodeMap */ +ScanKeyData relfilenode_skey[2]; + +typedef struct +{ + Oid reltablespace; + Oid relfilenode; +} RelfilenodeMapKey; + +typedef struct +{ + RelfilenodeMapKey key; /* lookup key - must be first */ + Oid relid; /* pg_class.oid */ +} RelfilenodeMapEntry; + +/* + * RelfilenodeMapInvalidateCallback + * Flush mapping entries when pg_class is updated in a relevant fashion. + */ +static void +RelfilenodeMapInvalidateCallback(Datum arg, Oid relid) +{ + HASH_SEQ_STATUS status; + RelfilenodeMapEntry *entry; + + /* nothing to do if not active or deleted */ + if (RelfilenodeMapHash == NULL) + return; + + /* if relid is InvalidOid, we must invalidate the entire cache */ + if (relid == InvalidOid) + { + hash_destroy(RelfilenodeMapHash); + RelfilenodeMapHash = NULL; + return; + } + + hash_seq_init(&status, RelfilenodeMapHash); + while ((entry = (RelfilenodeMapEntry *) hash_seq_search(&status)) != NULL) + { + /* Same OID may occur in more than one tablespace. */ + if (entry->relid == relid) + { + if (hash_search(RelfilenodeMapHash, + (void *) &entry->key, + HASH_REMOVE, + NULL) == NULL) + elog(ERROR, "hash table corrupted"); + } + } +} + +/* + * RelfilenodeMapInvalidateCallback + * Initialize cache, either on first use or after a reset. + */ +static void +InitializeRelfilenodeMap(void) +{ + HASHCTL ctl; + static bool initial_init_done = false; + int i; + + /* Make sure we've initialized CacheMemoryContext. */ + if (CacheMemoryContext == NULL) + CreateCacheMemoryContext(); + + /* Initialize the hash table. */ + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(RelfilenodeMapKey); + ctl.entrysize = sizeof(RelfilenodeMapEntry); + ctl.hash = tag_hash; + ctl.hcxt = CacheMemoryContext; + + RelfilenodeMapHash = + hash_create("RelfilenodeMap cache", 1024, &ctl, + HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); + + /* + * For complete resets we simply delete the entire hash, but there's no + * need to do the other stuff multiple times. Especially the initialization + * of the relcche invalidation should only be done once. + */ + if (initial_init_done) + return; + + /* build skey */ + MemSet(&relfilenode_skey, 0, sizeof(relfilenode_skey)); + + for (i = 0; i < 2; i++) + { + fmgr_info_cxt(F_OIDEQ, + &relfilenode_skey[i].sk_func, + CacheMemoryContext); + relfilenode_skey[i].sk_strategy = BTEqualStrategyNumber; + relfilenode_skey[i].sk_subtype = InvalidOid; + relfilenode_skey[i].sk_collation = InvalidOid; + } + + relfilenode_skey[0].sk_attno = Anum_pg_class_reltablespace; + relfilenode_skey[1].sk_attno = Anum_pg_class_relfilenode; + + /* Watch for invalidation events. */ + CacheRegisterRelcacheCallback(RelfilenodeMapInvalidateCallback, + (Datum) 0); + initial_init_done = true; +} + +/* + * Map a relation's (tablespace, filenode) to a relation's oid and cache the + * result. + * + * Returns InvalidOid if no relation matching the criteria could be found. + */ +Oid +RelidByRelfilenode(Oid reltablespace, Oid relfilenode) +{ + RelfilenodeMapKey key; + RelfilenodeMapEntry *entry; + bool found; + SysScanDesc scandesc; + Relation relation; + HeapTuple ntp; + ScanKeyData skey[2]; + + if (RelfilenodeMapHash == NULL) + InitializeRelfilenodeMap(); + + /* pg_class will show 0 when the value is actually MyDatabaseTableSpace */ + if (reltablespace == MyDatabaseTableSpace) + reltablespace = 0; + + MemSet(&key, 0, sizeof(key)); + key.reltablespace = reltablespace; + key.relfilenode = relfilenode; + + /* + * Check cache and enter entry if nothing could be found. Even if no target + * relation can be found later on we store the negative match and return a + * InvalidOid from cache. That's not really necessary for performance since + * querying invalid values isn't supposed to be a frequent thing, but the + * implementation is simpler this way. + */ + entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found); + + if (found) + return entry->relid; + + /* ok, no previous cache entry, do it the hard way */ + + /* check shared tables */ + if (reltablespace == GLOBALTABLESPACE_OID) + { + entry->relid = RelationMapFilenodeToOid(relfilenode, true); + return entry->relid; + } + + /* check plain relations by looking in pg_class */ + relation = heap_open(RelationRelationId, AccessShareLock); + + /* copy scankey to local copy, it will be modified during the scan */ + memcpy(skey, relfilenode_skey, sizeof(skey)); + + /* set scan arguments */ + skey[0].sk_argument = ObjectIdGetDatum(reltablespace); + skey[1].sk_argument = ObjectIdGetDatum(relfilenode); + + scandesc = systable_beginscan(relation, + ClassTblspcRelfilenodeIndexId, + true, + NULL, + 2, + skey); + + found = false; + + while (HeapTupleIsValid(ntp = systable_getnext(scandesc))) + { + bool isnull; + + if (found) + elog(ERROR, + "unexpected duplicate for tablespace %u, relfilenode %u", + reltablespace, relfilenode); + found = true; + +#ifdef USE_ASSERT_CHECKING + if (assert_enabled) + { + Oid check; + check = fastgetattr(ntp, Anum_pg_class_reltablespace, + RelationGetDescr(relation), + &isnull); + Assert(!isnull && check == reltablespace); + + check = fastgetattr(ntp, Anum_pg_class_relfilenode, + RelationGetDescr(relation), + &isnull); + Assert(!isnull && check == relfilenode); + } +#endif + entry->relid = HeapTupleGetOid(ntp); + } + + systable_endscan(scandesc); + heap_close(relation, AccessShareLock); + + /* check for tables that are mapped but not shared */ + if (!found) + entry->relid = RelationMapFilenodeToOid(relfilenode, false); + + return entry->relid; +} diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 2c7d9f3287..18f0342a7d 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -180,6 +180,59 @@ RelationMapOidToFilenode(Oid relationId, bool shared) return InvalidOid; } +/* + * RelationMapFilenodeToOid + * + * Do the reverse of the normal direction of mapping done in + * RelationMapOidToFilenode. + * + * This is not supposed to be used during normal running but rather for + * information purposes when looking at the filesystem or xlog. + * + * Returns InvalidOid if the OID is not known; this can easily happen if the + * relfilenode doesn't pertain to a mapped relation. + */ +Oid +RelationMapFilenodeToOid(Oid filenode, bool shared) +{ + const RelMapFile *map; + int32 i; + + /* If there are active updates, believe those over the main maps */ + if (shared) + { + map = &active_shared_updates; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + map = &shared_map; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + } + else + { + map = &active_local_updates; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + map = &local_map; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + } + + return InvalidOid; +} + /* * RelationMapUpdateMap * diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index d9404dafc1..2e51039c24 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201307181 +#define CATALOG_VERSION_NO 201307221 #endif diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index 19268fbe64..4860e98ca5 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -106,6 +106,8 @@ DECLARE_UNIQUE_INDEX(pg_class_oid_index, 2662, on pg_class using btree(oid oid_o #define ClassOidIndexId 2662 DECLARE_UNIQUE_INDEX(pg_class_relname_nsp_index, 2663, on pg_class using btree(relname name_ops, relnamespace oid_ops)); #define ClassNameNspIndexId 2663 +DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, on pg_class using btree(reltablespace oid_ops, relfilenode oid_ops)); +#define ClassTblspcRelfilenodeIndexId 3455 DECLARE_UNIQUE_INDEX(pg_collation_name_enc_nsp_index, 3164, on pg_collation using btree(collname name_ops, collencoding int4_ops, collnamespace oid_ops)); #define CollationNameEncNspIndexId 3164 diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 90aff3d048..f03dd0b7da 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -3448,6 +3448,8 @@ DATA(insert OID = 2998 ( pg_indexes_size PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 DESCR("disk space usage for all indexes attached to the specified table"); DATA(insert OID = 2999 ( pg_relation_filenode PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 26 "2205" _null_ _null_ _null_ _null_ pg_relation_filenode _null_ _null_ _null_ )); DESCR("filenode identifier of relation"); +DATA(insert OID = 3454 ( pg_filenode_relation PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 2205 "26 26" _null_ _null_ _null_ _null_ pg_filenode_relation _null_ _null_ _null_ )); +DESCR("relation OID for filenode and tablespace"); DATA(insert OID = 3034 ( pg_relation_filepath PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 25 "2205" _null_ _null_ _null_ _null_ pg_relation_filepath _null_ _null_ _null_ )); DESCR("file path of relation"); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 667c58b5d0..a5a0561a4a 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -460,6 +460,7 @@ extern Datum pg_size_pretty_numeric(PG_FUNCTION_ARGS); extern Datum pg_table_size(PG_FUNCTION_ARGS); extern Datum pg_indexes_size(PG_FUNCTION_ARGS); extern Datum pg_relation_filenode(PG_FUNCTION_ARGS); +extern Datum pg_filenode_relation(PG_FUNCTION_ARGS); extern Datum pg_relation_filepath(PG_FUNCTION_ARGS); /* genfile.c */ diff --git a/src/include/utils/relfilenodemap.h b/src/include/utils/relfilenodemap.h new file mode 100644 index 0000000000..324ff69f8e --- /dev/null +++ b/src/include/utils/relfilenodemap.h @@ -0,0 +1,18 @@ +/*------------------------------------------------------------------------- + * + * relfilenodemap.h + * relfilenode to oid mapping cache. + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/utils/relfilenodemap.h + * + *------------------------------------------------------------------------- + */ +#ifndef RELFILENODEMAP_H +#define RELFILENODEMAP_H + +extern Oid RelidByRelfilenode(Oid reltablespace, Oid relfilenode); + +#endif /* RELFILENODEMAP_H */ diff --git a/src/include/utils/relmapper.h b/src/include/utils/relmapper.h index 8f0b438a11..071bc988a5 100644 --- a/src/include/utils/relmapper.h +++ b/src/include/utils/relmapper.h @@ -36,6 +36,8 @@ typedef struct xl_relmap_update extern Oid RelationMapOidToFilenode(Oid relationId, bool shared); +extern Oid RelationMapFilenodeToOid(Oid relationId, bool shared); + extern void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, bool immediate); diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index 18daf95c66..7cc0084b92 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -2305,3 +2305,21 @@ Check constraints: DROP TABLE alter2.tt8; DROP SCHEMA alter2; +-- Check that we map relation oids to filenodes and back correctly. +-- Don't display all the mappings so the test output doesn't change +-- all the time, but make sure we actually do test some values. +SELECT + SUM((mapped_oid != oid OR mapped_oid IS NULL)::int) incorrectly_mapped, + count(*) > 200 have_mappings +FROM ( + SELECT + oid, reltablespace, relfilenode, relname, + pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) mapped_oid + FROM pg_class + WHERE relkind IN ('r', 'i', 'S', 't', 'm') + ) mapped; + incorrectly_mapped | have_mappings +--------------------+--------------- + 0 | t +(1 row) + diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index dcf8121d70..a546ba74af 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -1544,3 +1544,17 @@ ALTER TABLE IF EXISTS tt8 SET SCHEMA alter2; DROP TABLE alter2.tt8; DROP SCHEMA alter2; + +-- Check that we map relation oids to filenodes and back correctly. +-- Don't display all the mappings so the test output doesn't change +-- all the time, but make sure we actually do test some values. +SELECT + SUM((mapped_oid != oid OR mapped_oid IS NULL)::int) incorrectly_mapped, + count(*) > 200 have_mappings +FROM ( + SELECT + oid, reltablespace, relfilenode, relname, + pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) mapped_oid + FROM pg_class + WHERE relkind IN ('r', 'i', 'S', 't', 'm') + ) mapped; -- 2.40.0