1 /*-------------------------------------------------------------------------
4 * POSTGRES relation descriptor cache code
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/utils/cache/relcache.c
13 *-------------------------------------------------------------------------
17 * RelationCacheInitialize - initialize relcache (to empty)
18 * RelationCacheInitializePhase2 - initialize shared-catalog entries
19 * RelationCacheInitializePhase3 - finish initializing relcache
20 * RelationIdGetRelation - get a reldesc by relation id
21 * RelationClose - close an open relation
24 * The following code contains many undocumented hacks. Please be
33 #include "access/htup_details.h"
34 #include "access/multixact.h"
35 #include "access/nbtree.h"
36 #include "access/reloptions.h"
37 #include "access/sysattr.h"
38 #include "access/xact.h"
39 #include "access/xlog.h"
40 #include "catalog/catalog.h"
41 #include "catalog/index.h"
42 #include "catalog/indexing.h"
43 #include "catalog/namespace.h"
44 #include "catalog/partition.h"
45 #include "catalog/pg_am.h"
46 #include "catalog/pg_amproc.h"
47 #include "catalog/pg_attrdef.h"
48 #include "catalog/pg_authid.h"
49 #include "catalog/pg_auth_members.h"
50 #include "catalog/pg_constraint.h"
51 #include "catalog/pg_database.h"
52 #include "catalog/pg_namespace.h"
53 #include "catalog/pg_opclass.h"
54 #include "catalog/pg_partitioned_table.h"
55 #include "catalog/pg_proc.h"
56 #include "catalog/pg_publication.h"
57 #include "catalog/pg_rewrite.h"
58 #include "catalog/pg_shseclabel.h"
59 #include "catalog/pg_subscription.h"
60 #include "catalog/pg_tablespace.h"
61 #include "catalog/pg_trigger.h"
62 #include "catalog/pg_type.h"
63 #include "catalog/schemapg.h"
64 #include "catalog/storage.h"
65 #include "commands/policy.h"
66 #include "commands/trigger.h"
67 #include "miscadmin.h"
68 #include "nodes/nodeFuncs.h"
69 #include "optimizer/clauses.h"
70 #include "optimizer/prep.h"
71 #include "optimizer/var.h"
72 #include "rewrite/rewriteDefine.h"
73 #include "rewrite/rowsecurity.h"
74 #include "storage/lmgr.h"
75 #include "storage/smgr.h"
76 #include "utils/array.h"
77 #include "utils/builtins.h"
78 #include "utils/fmgroids.h"
79 #include "utils/inval.h"
80 #include "utils/lsyscache.h"
81 #include "utils/memutils.h"
82 #include "utils/relmapper.h"
83 #include "utils/resowner_private.h"
84 #include "utils/snapmgr.h"
85 #include "utils/syscache.h"
86 #include "utils/tqual.h"
90 * name of relcache init file(s), used to speed up backend startup
92 #define RELCACHE_INIT_FILENAME "pg_internal.init"
94 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
97 * hardcoded tuple descriptors, contents generated by genbki.pl
99 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
100 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
101 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
102 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
103 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
104 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
105 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
106 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
107 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
108 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
111 * Hash tables that index the relation cache
113 * We used to index the cache by both name and OID, but now there
114 * is only an index by OID.
116 typedef struct relidcacheent
122 static HTAB *RelationIdCache;
125 * This flag is false until we have prepared the critical relcache entries
126 * that are needed to do indexscans on the tables read by relcache building.
128 bool criticalRelcachesBuilt = false;
131 * This flag is false until we have prepared the critical relcache entries
132 * for shared catalogs (which are the tables needed for login).
134 bool criticalSharedRelcachesBuilt = false;
137 * This counter counts relcache inval events received since backend startup
138 * (but only for rels that are actually in cache). Presently, we use it only
139 * to detect whether data about to be written by write_relcache_init_file()
140 * might already be obsolete.
142 static long relcacheInvalsReceived = 0L;
145 * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
146 * cleanup work. This list intentionally has limited size; if it overflows,
147 * we fall back to scanning the whole hashtable. There is no value in a very
148 * large list because (1) at some point, a hash_seq_search scan is faster than
149 * retail lookups, and (2) the value of this is to reduce EOXact work for
150 * short transactions, which can't have dirtied all that many tables anyway.
151 * EOXactListAdd() does not bother to prevent duplicate list entries, so the
152 * cleanup processing must be idempotent.
154 #define MAX_EOXACT_LIST 32
155 static Oid eoxact_list[MAX_EOXACT_LIST];
156 static int eoxact_list_len = 0;
157 static bool eoxact_list_overflowed = false;
159 #define EOXactListAdd(rel) \
161 if (eoxact_list_len < MAX_EOXACT_LIST) \
162 eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
164 eoxact_list_overflowed = true; \
168 * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
169 * cleanup work. The array expands as needed; there is no hashtable because
170 * we don't need to access individual items except at EOXact.
172 static TupleDesc *EOXactTupleDescArray;
173 static int NextEOXactTupleDescNum = 0;
174 static int EOXactTupleDescArrayLen = 0;
177 * macros to manipulate the lookup hashtable
179 #define RelationCacheInsert(RELATION, replace_allowed) \
181 RelIdCacheEnt *hentry; bool found; \
182 hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
183 (void *) &((RELATION)->rd_id), \
184 HASH_ENTER, &found); \
187 /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
188 Relation _old_rel = hentry->reldesc; \
189 Assert(replace_allowed); \
190 hentry->reldesc = (RELATION); \
191 if (RelationHasReferenceCountZero(_old_rel)) \
192 RelationDestroyRelation(_old_rel, false); \
193 else if (!IsBootstrapProcessingMode()) \
194 elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
195 RelationGetRelationName(_old_rel)); \
198 hentry->reldesc = (RELATION); \
201 #define RelationIdCacheLookup(ID, RELATION) \
203 RelIdCacheEnt *hentry; \
204 hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
208 RELATION = hentry->reldesc; \
213 #define RelationCacheDelete(RELATION) \
215 RelIdCacheEnt *hentry; \
216 hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
217 (void *) &((RELATION)->rd_id), \
218 HASH_REMOVE, NULL); \
219 if (hentry == NULL) \
220 elog(WARNING, "failed to delete relcache entry for OID %u", \
221 (RELATION)->rd_id); \
226 * Special cache for opclass-related information
228 * Note: only default support procs get cached, ie, those with
229 * lefttype = righttype = opcintype.
231 typedef struct opclasscacheent
233 Oid opclassoid; /* lookup key: OID of opclass */
234 bool valid; /* set TRUE after successful fill-in */
235 StrategyNumber numSupport; /* max # of support procs (from pg_am) */
236 Oid opcfamily; /* OID of opclass's family */
237 Oid opcintype; /* OID of opclass's declared input type */
238 RegProcedure *supportProcs; /* OIDs of support procedures */
241 static HTAB *OpClassCache = NULL;
244 /* non-export function prototypes */
246 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
247 static void RelationClearRelation(Relation relation, bool rebuild);
249 static void RelationReloadIndexInfo(Relation relation);
250 static void RelationFlushRelation(Relation relation);
251 static void RememberToFreeTupleDescAtEOX(TupleDesc td);
252 static void AtEOXact_cleanup(Relation relation, bool isCommit);
253 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
254 SubTransactionId mySubid, SubTransactionId parentSubid);
255 static bool load_relcache_init_file(bool shared);
256 static void write_relcache_init_file(bool shared);
257 static void write_item(const void *data, Size len, FILE *fp);
259 static void formrdesc(const char *relationName, Oid relationReltype,
260 bool isshared, bool hasoids,
261 int natts, const FormData_pg_attribute *attrs);
263 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
264 static Relation AllocateRelationDesc(Form_pg_class relp);
265 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
266 static void RelationBuildTupleDesc(Relation relation);
267 static void RelationBuildPartitionKey(Relation relation);
268 static PartitionKey copy_partition_key(PartitionKey fromkey);
269 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
270 static void RelationInitPhysicalAddr(Relation relation);
271 static void load_critical_index(Oid indexoid, Oid heapoid);
272 static TupleDesc GetPgClassDescriptor(void);
273 static TupleDesc GetPgIndexDescriptor(void);
274 static void AttrDefaultFetch(Relation relation);
275 static void CheckConstraintFetch(Relation relation);
276 static int CheckConstraintCmp(const void *a, const void *b);
277 static List *insert_ordered_oid(List *list, Oid datum);
278 static void InitIndexAmRoutine(Relation relation);
279 static void IndexSupportInitialize(oidvector *indclass,
280 RegProcedure *indexSupport,
283 StrategyNumber maxSupportNumber,
284 AttrNumber maxAttributeNumber);
285 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
286 StrategyNumber numSupport);
287 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
288 static void unlink_initfile(const char *initfilename);
289 static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
290 PartitionDesc partdesc2);
296 * This is used by RelationBuildDesc to find a pg_class
297 * tuple matching targetRelId. The caller must hold at least
298 * AccessShareLock on the target relid to prevent concurrent-update
299 * scenarios; it isn't guaranteed that all scans used to build the
300 * relcache entry will use the same snapshot. If, for example,
301 * an attribute were to be added after scanning pg_class and before
302 * scanning pg_attribute, relnatts wouldn't match.
304 * NB: the returned tuple has been copied into palloc'd storage
305 * and must eventually be freed with heap_freetuple.
308 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
310 HeapTuple pg_class_tuple;
311 Relation pg_class_desc;
312 SysScanDesc pg_class_scan;
317 * If something goes wrong during backend startup, we might find ourselves
318 * trying to read pg_class before we've selected a database. That ain't
319 * gonna work, so bail out with a useful error message. If this happens,
320 * it probably means a relcache entry that needs to be nailed isn't.
322 if (!OidIsValid(MyDatabaseId))
323 elog(FATAL, "cannot read pg_class without having selected a database");
329 ObjectIdAttributeNumber,
330 BTEqualStrategyNumber, F_OIDEQ,
331 ObjectIdGetDatum(targetRelId));
334 * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
335 * built the critical relcache entries (this includes initdb and startup
336 * without a pg_internal.init file). The caller can also force a heap
337 * scan by setting indexOK == false.
339 pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
342 * The caller might need a tuple that's newer than the one the historic
343 * snapshot; currently the only case requiring to do so is looking up the
344 * relfilenode of non mapped system relations during decoding.
346 if (force_non_historic)
347 snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
349 snapshot = GetCatalogSnapshot(RelationRelationId);
351 pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
352 indexOK && criticalRelcachesBuilt,
356 pg_class_tuple = systable_getnext(pg_class_scan);
359 * Must copy tuple before releasing buffer.
361 if (HeapTupleIsValid(pg_class_tuple))
362 pg_class_tuple = heap_copytuple(pg_class_tuple);
365 systable_endscan(pg_class_scan);
366 heap_close(pg_class_desc, AccessShareLock);
368 return pg_class_tuple;
372 * AllocateRelationDesc
374 * This is used to allocate memory for a new relation descriptor
375 * and initialize the rd_rel field from the given pg_class tuple.
378 AllocateRelationDesc(Form_pg_class relp)
381 MemoryContext oldcxt;
382 Form_pg_class relationForm;
384 /* Relcache entries must live in CacheMemoryContext */
385 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
388 * allocate and zero space for new relation descriptor
390 relation = (Relation) palloc0(sizeof(RelationData));
392 /* make sure relation is marked as having no open file yet */
393 relation->rd_smgr = NULL;
396 * Copy the relation tuple form
398 * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
399 * variable-length fields (relacl, reloptions) are NOT stored in the
400 * relcache --- there'd be little point in it, since we don't copy the
401 * tuple's nulls bitmap and hence wouldn't know if the values are valid.
402 * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
403 * it from the syscache if you need it. The same goes for the original
404 * form of reloptions (however, we do store the parsed form of reloptions
407 relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
409 memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
411 /* initialize relation tuple form */
412 relation->rd_rel = relationForm;
414 /* and allocate attribute tuple form storage */
415 relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
416 relationForm->relhasoids);
417 /* which we mark as a reference-counted tupdesc */
418 relation->rd_att->tdrefcount = 1;
420 MemoryContextSwitchTo(oldcxt);
426 * RelationParseRelOptions
427 * Convert pg_class.reloptions into pre-parsed rd_options
429 * tuple is the real pg_class tuple (not rd_rel!) for relation
431 * Note: rd_rel and (if an index) rd_amroutine must be valid already
434 RelationParseRelOptions(Relation relation, HeapTuple tuple)
438 relation->rd_options = NULL;
440 /* Fall out if relkind should not have options */
441 switch (relation->rd_rel->relkind)
443 case RELKIND_RELATION:
444 case RELKIND_TOASTVALUE:
447 case RELKIND_MATVIEW:
448 case RELKIND_PARTITIONED_TABLE:
455 * Fetch reloptions from tuple; have to use a hardwired descriptor because
456 * we might not have any other for pg_class yet (consider executing this
457 * code for pg_class itself)
459 options = extractRelOptions(tuple,
460 GetPgClassDescriptor(),
461 relation->rd_rel->relkind == RELKIND_INDEX ?
462 relation->rd_amroutine->amoptions : NULL);
465 * Copy parsed data into CacheMemoryContext. To guard against the
466 * possibility of leaks in the reloptions code, we want to do the actual
467 * parsing in the caller's memory context and copy the results into
468 * CacheMemoryContext after the fact.
472 relation->rd_options = MemoryContextAlloc(CacheMemoryContext,
474 memcpy(relation->rd_options, options, VARSIZE(options));
480 * RelationBuildTupleDesc
482 * Form the relation's tuple descriptor from information in
483 * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
486 RelationBuildTupleDesc(Relation relation)
488 HeapTuple pg_attribute_tuple;
489 Relation pg_attribute_desc;
490 SysScanDesc pg_attribute_scan;
494 AttrDefault *attrdef = NULL;
497 /* copy some fields from pg_class row to rd_att */
498 relation->rd_att->tdtypeid = relation->rd_rel->reltype;
499 relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
500 relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
502 constr = (TupleConstr *) MemoryContextAlloc(CacheMemoryContext,
503 sizeof(TupleConstr));
504 constr->has_not_null = false;
507 * Form a scan key that selects only user attributes (attnum > 0).
508 * (Eliminating system attribute rows at the index level is lots faster
509 * than fetching them.)
511 ScanKeyInit(&skey[0],
512 Anum_pg_attribute_attrelid,
513 BTEqualStrategyNumber, F_OIDEQ,
514 ObjectIdGetDatum(RelationGetRelid(relation)));
515 ScanKeyInit(&skey[1],
516 Anum_pg_attribute_attnum,
517 BTGreaterStrategyNumber, F_INT2GT,
521 * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
522 * built the critical relcache entries (this includes initdb and startup
523 * without a pg_internal.init file).
525 pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
526 pg_attribute_scan = systable_beginscan(pg_attribute_desc,
527 AttributeRelidNumIndexId,
528 criticalRelcachesBuilt,
533 * add attribute data to relation->rd_att
535 need = relation->rd_rel->relnatts;
537 while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
539 Form_pg_attribute attp;
541 attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
543 if (attp->attnum <= 0 ||
544 attp->attnum > relation->rd_rel->relnatts)
545 elog(ERROR, "invalid attribute number %d for %s",
546 attp->attnum, RelationGetRelationName(relation));
548 memcpy(relation->rd_att->attrs[attp->attnum - 1],
550 ATTRIBUTE_FIXED_PART_SIZE);
552 /* Update constraint/default info */
553 if (attp->attnotnull)
554 constr->has_not_null = true;
559 attrdef = (AttrDefault *)
560 MemoryContextAllocZero(CacheMemoryContext,
561 relation->rd_rel->relnatts *
562 sizeof(AttrDefault));
563 attrdef[ndef].adnum = attp->attnum;
564 attrdef[ndef].adbin = NULL;
573 * end the scan and close the attribute relation
575 systable_endscan(pg_attribute_scan);
576 heap_close(pg_attribute_desc, AccessShareLock);
579 elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
580 need, RelationGetRelid(relation));
583 * The attcacheoff values we read from pg_attribute should all be -1
584 * ("unknown"). Verify this if assert checking is on. They will be
585 * computed when and if needed during tuple access.
587 #ifdef USE_ASSERT_CHECKING
591 for (i = 0; i < relation->rd_rel->relnatts; i++)
592 Assert(relation->rd_att->attrs[i]->attcacheoff == -1);
597 * However, we can easily set the attcacheoff value for the first
598 * attribute: it must be zero. This eliminates the need for special cases
599 * for attnum=1 that used to exist in fastgetattr() and index_getattr().
601 if (relation->rd_rel->relnatts > 0)
602 relation->rd_att->attrs[0]->attcacheoff = 0;
605 * Set up constraint/default info
607 if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
609 relation->rd_att->constr = constr;
611 if (ndef > 0) /* DEFAULTs */
613 if (ndef < relation->rd_rel->relnatts)
614 constr->defval = (AttrDefault *)
615 repalloc(attrdef, ndef * sizeof(AttrDefault));
617 constr->defval = attrdef;
618 constr->num_defval = ndef;
619 AttrDefaultFetch(relation);
622 constr->num_defval = 0;
624 if (relation->rd_rel->relchecks > 0) /* CHECKs */
626 constr->num_check = relation->rd_rel->relchecks;
627 constr->check = (ConstrCheck *)
628 MemoryContextAllocZero(CacheMemoryContext,
629 constr->num_check * sizeof(ConstrCheck));
630 CheckConstraintFetch(relation);
633 constr->num_check = 0;
638 relation->rd_att->constr = NULL;
643 * RelationBuildRuleLock
645 * Form the relation's rewrite rules from information in
646 * the pg_rewrite system catalog.
648 * Note: The rule parsetrees are potentially very complex node structures.
649 * To allow these trees to be freed when the relcache entry is flushed,
650 * we make a private memory context to hold the RuleLock information for
651 * each relcache entry that has associated rules. The context is used
652 * just for rule info, not for any other subsidiary data of the relcache
653 * entry, because that keeps the update logic in RelationClearRelation()
654 * manageable. The other subsidiary data structures are simple enough
655 * to be easy to free explicitly, anyway.
658 RelationBuildRuleLock(Relation relation)
660 MemoryContext rulescxt;
661 MemoryContext oldcxt;
662 HeapTuple rewrite_tuple;
663 Relation rewrite_desc;
664 TupleDesc rewrite_tupdesc;
665 SysScanDesc rewrite_scan;
673 * Make the private context. Assume it'll not contain much data.
675 rulescxt = AllocSetContextCreate(CacheMemoryContext,
676 RelationGetRelationName(relation),
677 ALLOCSET_SMALL_SIZES);
678 relation->rd_rulescxt = rulescxt;
681 * allocate an array to hold the rewrite rules (the array is extended if
685 rules = (RewriteRule **)
686 MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
693 Anum_pg_rewrite_ev_class,
694 BTEqualStrategyNumber, F_OIDEQ,
695 ObjectIdGetDatum(RelationGetRelid(relation)));
698 * open pg_rewrite and begin a scan
700 * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
701 * be reading the rules in name order, except possibly during
702 * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
703 * ensures that rules will be fired in name order.
705 rewrite_desc = heap_open(RewriteRelationId, AccessShareLock);
706 rewrite_tupdesc = RelationGetDescr(rewrite_desc);
707 rewrite_scan = systable_beginscan(rewrite_desc,
708 RewriteRelRulenameIndexId,
712 while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
714 Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
720 rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
721 sizeof(RewriteRule));
723 rule->ruleId = HeapTupleGetOid(rewrite_tuple);
725 rule->event = rewrite_form->ev_type - '0';
726 rule->enabled = rewrite_form->ev_enabled;
727 rule->isInstead = rewrite_form->is_instead;
730 * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
731 * rule strings are often large enough to be toasted. To avoid
732 * leaking memory in the caller's context, do the detoasting here so
733 * we can free the detoasted version.
735 rule_datum = heap_getattr(rewrite_tuple,
736 Anum_pg_rewrite_ev_action,
740 rule_str = TextDatumGetCString(rule_datum);
741 oldcxt = MemoryContextSwitchTo(rulescxt);
742 rule->actions = (List *) stringToNode(rule_str);
743 MemoryContextSwitchTo(oldcxt);
746 rule_datum = heap_getattr(rewrite_tuple,
747 Anum_pg_rewrite_ev_qual,
751 rule_str = TextDatumGetCString(rule_datum);
752 oldcxt = MemoryContextSwitchTo(rulescxt);
753 rule->qual = (Node *) stringToNode(rule_str);
754 MemoryContextSwitchTo(oldcxt);
758 * We want the rule's table references to be checked as though by the
759 * table owner, not the user referencing the rule. Therefore, scan
760 * through the rule's actions and set the checkAsUser field on all
761 * rtable entries. We have to look at the qual as well, in case it
764 * The reason for doing this when the rule is loaded, rather than when
765 * it is stored, is that otherwise ALTER TABLE OWNER would have to
766 * grovel through stored rules to update checkAsUser fields. Scanning
767 * the rule tree during load is relatively cheap (compared to
768 * constructing it in the first place), so we do it here.
770 setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
771 setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
773 if (numlocks >= maxlocks)
776 rules = (RewriteRule **)
777 repalloc(rules, sizeof(RewriteRule *) * maxlocks);
779 rules[numlocks++] = rule;
783 * end the scan and close the attribute relation
785 systable_endscan(rewrite_scan);
786 heap_close(rewrite_desc, AccessShareLock);
789 * there might not be any rules (if relhasrules is out-of-date)
793 relation->rd_rules = NULL;
794 relation->rd_rulescxt = NULL;
795 MemoryContextDelete(rulescxt);
800 * form a RuleLock and insert into relation
802 rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
803 rulelock->numLocks = numlocks;
804 rulelock->rules = rules;
806 relation->rd_rules = rulelock;
810 * RelationBuildPartitionKey
811 * Build and attach to relcache partition key data of relation
813 * Partitioning key data is stored in CacheMemoryContext to ensure it survives
814 * as long as the relcache. To avoid leaking memory in that context in case
815 * of an error partway through this function, we build the structure in the
816 * working context (which must be short-lived) and copy the completed
817 * structure into the cache memory.
819 * Also, since the structure being created here is sufficiently complex, we
820 * make a private child context of CacheMemoryContext for each relation that
821 * has associated partition key information. That means no complicated logic
822 * to free individual elements whenever the relcache entry is flushed - just
823 * delete the context.
826 RelationBuildPartitionKey(Relation relation)
828 Form_pg_partitioned_table form;
835 oidvector *collation;
836 ListCell *partexprs_item;
838 MemoryContext partkeycxt,
841 tuple = SearchSysCache1(PARTRELID,
842 ObjectIdGetDatum(RelationGetRelid(relation)));
845 * The following happens when we have created our pg_class entry but not
846 * the pg_partitioned_table entry yet.
848 if (!HeapTupleIsValid(tuple))
851 key = (PartitionKey) palloc0(sizeof(PartitionKeyData));
853 /* Fixed-length attributes */
854 form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
855 key->strategy = form->partstrat;
856 key->partnatts = form->partnatts;
859 * We can rely on the first variable-length attribute being mapped to the
860 * relevant field of the catalog's C struct, because all previous
861 * attributes are non-nullable and fixed-length.
863 attrs = form->partattrs.values;
865 /* But use the hard way to retrieve further variable-length attributes */
867 datum = SysCacheGetAttr(PARTRELID, tuple,
868 Anum_pg_partitioned_table_partclass, &isnull);
870 opclass = (oidvector *) DatumGetPointer(datum);
873 datum = SysCacheGetAttr(PARTRELID, tuple,
874 Anum_pg_partitioned_table_partcollation, &isnull);
876 collation = (oidvector *) DatumGetPointer(datum);
879 datum = SysCacheGetAttr(PARTRELID, tuple,
880 Anum_pg_partitioned_table_partexprs, &isnull);
886 exprString = TextDatumGetCString(datum);
887 expr = stringToNode(exprString);
891 * Run the expressions through const-simplification since the planner
892 * will be comparing them to similarly-processed qual clause operands,
893 * and may fail to detect valid matches without this step. We don't
894 * need to bother with canonicalize_qual() though, because partition
895 * expressions are not full-fledged qualification clauses.
897 expr = eval_const_expressions(NULL, (Node *) expr);
899 /* May as well fix opfuncids too */
900 fix_opfuncids((Node *) expr);
901 key->partexprs = (List *) expr;
904 key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
905 key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
906 key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
907 key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
909 key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
911 /* Gather type and collation info as well */
912 key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
913 key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
914 key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
915 key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
916 key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
917 key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
919 /* Copy partattrs and fill other per-attribute info */
920 memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
921 partexprs_item = list_head(key->partexprs);
922 for (i = 0; i < key->partnatts; i++)
924 AttrNumber attno = key->partattrs[i];
925 HeapTuple opclasstup;
926 Form_pg_opclass opclassform;
929 /* Collect opfamily information */
930 opclasstup = SearchSysCache1(CLAOID,
931 ObjectIdGetDatum(opclass->values[i]));
932 if (!HeapTupleIsValid(opclasstup))
933 elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
935 opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
936 key->partopfamily[i] = opclassform->opcfamily;
937 key->partopcintype[i] = opclassform->opcintype;
940 * A btree support function covers the cases of list and range methods
941 * currently supported.
943 funcid = get_opfamily_proc(opclassform->opcfamily,
944 opclassform->opcintype,
945 opclassform->opcintype,
948 fmgr_info(funcid, &key->partsupfunc[i]);
951 key->partcollation[i] = collation->values[i];
953 /* Collect type information */
956 key->parttypid[i] = relation->rd_att->attrs[attno - 1]->atttypid;
957 key->parttypmod[i] = relation->rd_att->attrs[attno - 1]->atttypmod;
958 key->parttypcoll[i] = relation->rd_att->attrs[attno - 1]->attcollation;
962 key->parttypid[i] = exprType(lfirst(partexprs_item));
963 key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
964 key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
966 get_typlenbyvalalign(key->parttypid[i],
968 &key->parttypbyval[i],
969 &key->parttypalign[i]);
971 ReleaseSysCache(opclasstup);
974 ReleaseSysCache(tuple);
976 /* Success --- now copy to the cache memory */
977 partkeycxt = AllocSetContextCreate(CacheMemoryContext,
978 RelationGetRelationName(relation),
979 ALLOCSET_SMALL_SIZES);
980 relation->rd_partkeycxt = partkeycxt;
981 oldcxt = MemoryContextSwitchTo(relation->rd_partkeycxt);
982 relation->rd_partkey = copy_partition_key(key);
983 MemoryContextSwitchTo(oldcxt);
989 * The copy is allocated in the current memory context.
992 copy_partition_key(PartitionKey fromkey)
997 newkey = (PartitionKey) palloc(sizeof(PartitionKeyData));
999 newkey->strategy = fromkey->strategy;
1000 newkey->partnatts = n = fromkey->partnatts;
1002 newkey->partattrs = (AttrNumber *) palloc(n * sizeof(AttrNumber));
1003 memcpy(newkey->partattrs, fromkey->partattrs, n * sizeof(AttrNumber));
1005 newkey->partexprs = copyObject(fromkey->partexprs);
1007 newkey->partopfamily = (Oid *) palloc(n * sizeof(Oid));
1008 memcpy(newkey->partopfamily, fromkey->partopfamily, n * sizeof(Oid));
1010 newkey->partopcintype = (Oid *) palloc(n * sizeof(Oid));
1011 memcpy(newkey->partopcintype, fromkey->partopcintype, n * sizeof(Oid));
1013 newkey->partsupfunc = (FmgrInfo *) palloc(n * sizeof(FmgrInfo));
1014 memcpy(newkey->partsupfunc, fromkey->partsupfunc, n * sizeof(FmgrInfo));
1016 newkey->partcollation = (Oid *) palloc(n * sizeof(Oid));
1017 memcpy(newkey->partcollation, fromkey->partcollation, n * sizeof(Oid));
1019 newkey->parttypid = (Oid *) palloc(n * sizeof(Oid));
1020 memcpy(newkey->parttypid, fromkey->parttypid, n * sizeof(Oid));
1022 newkey->parttypmod = (int32 *) palloc(n * sizeof(int32));
1023 memcpy(newkey->parttypmod, fromkey->parttypmod, n * sizeof(int32));
1025 newkey->parttyplen = (int16 *) palloc(n * sizeof(int16));
1026 memcpy(newkey->parttyplen, fromkey->parttyplen, n * sizeof(int16));
1028 newkey->parttypbyval = (bool *) palloc(n * sizeof(bool));
1029 memcpy(newkey->parttypbyval, fromkey->parttypbyval, n * sizeof(bool));
1031 newkey->parttypalign = (char *) palloc(n * sizeof(bool));
1032 memcpy(newkey->parttypalign, fromkey->parttypalign, n * sizeof(char));
1034 newkey->parttypcoll = (Oid *) palloc(n * sizeof(Oid));
1035 memcpy(newkey->parttypcoll, fromkey->parttypcoll, n * sizeof(Oid));
1043 * Determine whether two RuleLocks are equivalent
1045 * Probably this should be in the rules code someplace...
1048 equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2)
1053 * As of 7.3 we assume the rule ordering is repeatable, because
1054 * RelationBuildRuleLock should read 'em in a consistent order. So just
1055 * compare corresponding slots.
1061 if (rlock1->numLocks != rlock2->numLocks)
1063 for (i = 0; i < rlock1->numLocks; i++)
1065 RewriteRule *rule1 = rlock1->rules[i];
1066 RewriteRule *rule2 = rlock2->rules[i];
1068 if (rule1->ruleId != rule2->ruleId)
1070 if (rule1->event != rule2->event)
1072 if (rule1->enabled != rule2->enabled)
1074 if (rule1->isInstead != rule2->isInstead)
1076 if (!equal(rule1->qual, rule2->qual))
1078 if (!equal(rule1->actions, rule2->actions))
1082 else if (rlock2 != NULL)
1090 * Determine whether two policies are equivalent
1093 equalPolicy(RowSecurityPolicy *policy1, RowSecurityPolicy *policy2)
1099 if (policy1 != NULL)
1101 if (policy2 == NULL)
1104 if (policy1->polcmd != policy2->polcmd)
1106 if (policy1->hassublinks != policy2->hassublinks)
1108 if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
1110 if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
1113 r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
1114 r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
1116 for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
1122 if (!equal(policy1->qual, policy2->qual))
1124 if (!equal(policy1->with_check_qual, policy2->with_check_qual))
1127 else if (policy2 != NULL)
1136 * Determine whether two RowSecurityDesc's are equivalent
1139 equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
1144 if (rsdesc1 == NULL && rsdesc2 == NULL)
1147 if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
1148 (rsdesc1 == NULL && rsdesc2 != NULL))
1151 if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
1154 /* RelationBuildRowSecurity should build policies in order */
1155 forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1157 RowSecurityPolicy *l = (RowSecurityPolicy *) lfirst(lc);
1158 RowSecurityPolicy *r = (RowSecurityPolicy *) lfirst(rc);
1160 if (!equalPolicy(l, r))
1168 * equalPartitionDescs
1169 * Compare two partition descriptors for logical equality
1172 equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
1173 PartitionDesc partdesc2)
1177 if (partdesc1 != NULL)
1179 if (partdesc2 == NULL)
1181 if (partdesc1->nparts != partdesc2->nparts)
1184 Assert(key != NULL || partdesc1->nparts == 0);
1187 * Same oids? If the partitioning structure did not change, that is,
1188 * no partitions were added or removed to the relation, the oids array
1189 * should still match element-by-element.
1191 for (i = 0; i < partdesc1->nparts; i++)
1193 if (partdesc1->oids[i] != partdesc2->oids[i])
1198 * Now compare partition bound collections. The logic to iterate over
1199 * the collections is private to partition.c.
1201 if (partdesc1->boundinfo != NULL)
1203 if (partdesc2->boundinfo == NULL)
1206 if (!partition_bounds_equal(key, partdesc1->boundinfo,
1207 partdesc2->boundinfo))
1210 else if (partdesc2->boundinfo != NULL)
1213 else if (partdesc2 != NULL)
1222 * Build a relation descriptor. The caller must hold at least
1223 * AccessShareLock on the target relid.
1225 * The new descriptor is inserted into the hash table if insertIt is true.
1227 * Returns NULL if no pg_class row could be found for the given relid
1228 * (suggesting we are trying to access a just-deleted relation).
1229 * Any other error is reported via elog.
1232 RelationBuildDesc(Oid targetRelId, bool insertIt)
1236 HeapTuple pg_class_tuple;
1240 * find the tuple in pg_class corresponding to the given relation id
1242 pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1245 * if no such tuple exists, return NULL
1247 if (!HeapTupleIsValid(pg_class_tuple))
1251 * get information from the pg_class_tuple
1253 relid = HeapTupleGetOid(pg_class_tuple);
1254 relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1255 Assert(relid == targetRelId);
1258 * allocate storage for the relation descriptor, and copy pg_class_tuple
1259 * to relation->rd_rel.
1261 relation = AllocateRelationDesc(relp);
1264 * initialize the relation's relation id (relation->rd_id)
1266 RelationGetRelid(relation) = relid;
1269 * normal relations are not nailed into the cache; nor can a pre-existing
1270 * relation be new. It could be temp though. (Actually, it could be new
1271 * too, but it's okay to forget that fact if forced to flush the entry.)
1273 relation->rd_refcnt = 0;
1274 relation->rd_isnailed = false;
1275 relation->rd_createSubid = InvalidSubTransactionId;
1276 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1277 switch (relation->rd_rel->relpersistence)
1279 case RELPERSISTENCE_UNLOGGED:
1280 case RELPERSISTENCE_PERMANENT:
1281 relation->rd_backend = InvalidBackendId;
1282 relation->rd_islocaltemp = false;
1284 case RELPERSISTENCE_TEMP:
1285 if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1287 relation->rd_backend = BackendIdForTempRelations();
1288 relation->rd_islocaltemp = true;
1293 * If it's a temp table, but not one of ours, we have to use
1294 * the slow, grotty method to figure out the owning backend.
1296 * Note: it's possible that rd_backend gets set to MyBackendId
1297 * here, in case we are looking at a pg_class entry left over
1298 * from a crashed backend that coincidentally had the same
1299 * BackendId we're using. We should *not* consider such a
1300 * table to be "ours"; this is why we need the separate
1301 * rd_islocaltemp flag. The pg_class entry will get flushed
1302 * if/when we clean out the corresponding temp table namespace
1303 * in preparation for using it.
1305 relation->rd_backend =
1306 GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1307 Assert(relation->rd_backend != InvalidBackendId);
1308 relation->rd_islocaltemp = false;
1312 elog(ERROR, "invalid relpersistence: %c",
1313 relation->rd_rel->relpersistence);
1318 * initialize the tuple descriptor (relation->rd_att).
1320 RelationBuildTupleDesc(relation);
1323 * Fetch rules and triggers that affect this relation
1325 if (relation->rd_rel->relhasrules)
1326 RelationBuildRuleLock(relation);
1329 relation->rd_rules = NULL;
1330 relation->rd_rulescxt = NULL;
1333 if (relation->rd_rel->relhastriggers)
1334 RelationBuildTriggers(relation);
1336 relation->trigdesc = NULL;
1338 if (relation->rd_rel->relrowsecurity)
1339 RelationBuildRowSecurity(relation);
1341 relation->rd_rsdesc = NULL;
1343 /* foreign key data is not loaded till asked for */
1344 relation->rd_fkeylist = NIL;
1345 relation->rd_fkeyvalid = false;
1347 /* if a partitioned table, initialize key and partition descriptor info */
1348 if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1350 RelationBuildPartitionKey(relation);
1351 RelationBuildPartitionDesc(relation);
1355 relation->rd_partkeycxt = NULL;
1356 relation->rd_partkey = NULL;
1357 relation->rd_partdesc = NULL;
1358 relation->rd_pdcxt = NULL;
1362 * if it's an index, initialize index-related information
1364 if (OidIsValid(relation->rd_rel->relam))
1365 RelationInitIndexAccessInfo(relation);
1367 /* extract reloptions if any */
1368 RelationParseRelOptions(relation, pg_class_tuple);
1371 * initialize the relation lock manager information
1373 RelationInitLockInfo(relation); /* see lmgr.c */
1376 * initialize physical addressing information for the relation
1378 RelationInitPhysicalAddr(relation);
1380 /* make sure relation is marked as having no open file yet */
1381 relation->rd_smgr = NULL;
1384 * now we can free the memory allocated for pg_class_tuple
1386 heap_freetuple(pg_class_tuple);
1389 * Insert newly created relation into relcache hash table, if requested.
1391 * There is one scenario in which we might find a hashtable entry already
1392 * present, even though our caller failed to find it: if the relation is a
1393 * system catalog or index that's used during relcache load, we might have
1394 * recursively created the same relcache entry during the preceding steps.
1395 * So allow RelationCacheInsert to delete any already-present relcache
1396 * entry for the same OID. The already-present entry should have refcount
1397 * zero (else somebody forgot to close it); in the event that it doesn't,
1398 * we'll elog a WARNING and leak the already-present entry.
1401 RelationCacheInsert(relation, true);
1403 /* It's fully valid */
1404 relation->rd_isvalid = true;
1410 * Initialize the physical addressing info (RelFileNode) for a relcache entry
1412 * Note: at the physical level, relations in the pg_global tablespace must
1413 * be treated as shared, even if relisshared isn't set. Hence we do not
1414 * look at relisshared here.
1417 RelationInitPhysicalAddr(Relation relation)
1419 if (relation->rd_rel->reltablespace)
1420 relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1422 relation->rd_node.spcNode = MyDatabaseTableSpace;
1423 if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1424 relation->rd_node.dbNode = InvalidOid;
1426 relation->rd_node.dbNode = MyDatabaseId;
1428 if (relation->rd_rel->relfilenode)
1431 * Even if we are using a decoding snapshot that doesn't represent the
1432 * current state of the catalog we need to make sure the filenode
1433 * points to the current file since the older file will be gone (or
1434 * truncated). The new file will still contain older rows so lookups
1435 * in them will work correctly. This wouldn't work correctly if
1436 * rewrites were allowed to change the schema in an incompatible way,
1437 * but those are prevented both on catalog tables and on user tables
1438 * declared as additional catalog tables.
1440 if (HistoricSnapshotActive()
1441 && RelationIsAccessibleInLogicalDecoding(relation)
1442 && IsTransactionState())
1444 HeapTuple phys_tuple;
1445 Form_pg_class physrel;
1447 phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1448 RelationGetRelid(relation) != ClassOidIndexId,
1450 if (!HeapTupleIsValid(phys_tuple))
1451 elog(ERROR, "could not find pg_class entry for %u",
1452 RelationGetRelid(relation));
1453 physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1455 relation->rd_rel->reltablespace = physrel->reltablespace;
1456 relation->rd_rel->relfilenode = physrel->relfilenode;
1457 heap_freetuple(phys_tuple);
1460 relation->rd_node.relNode = relation->rd_rel->relfilenode;
1464 /* Consult the relation mapper */
1465 relation->rd_node.relNode =
1466 RelationMapOidToFilenode(relation->rd_id,
1467 relation->rd_rel->relisshared);
1468 if (!OidIsValid(relation->rd_node.relNode))
1469 elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1470 RelationGetRelationName(relation), relation->rd_id);
1475 * Fill in the IndexAmRoutine for an index relation.
1477 * relation's rd_amhandler and rd_indexcxt must be valid already.
1480 InitIndexAmRoutine(Relation relation)
1482 IndexAmRoutine *cached,
1486 * Call the amhandler in current, short-lived memory context, just in case
1487 * it leaks anything (it probably won't, but let's be paranoid).
1489 tmp = GetIndexAmRoutine(relation->rd_amhandler);
1491 /* OK, now transfer the data into relation's rd_indexcxt. */
1492 cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1493 sizeof(IndexAmRoutine));
1494 memcpy(cached, tmp, sizeof(IndexAmRoutine));
1495 relation->rd_amroutine = cached;
1501 * Initialize index-access-method support data for an index relation
1504 RelationInitIndexAccessInfo(Relation relation)
1509 Datum indclassDatum;
1510 Datum indoptionDatum;
1513 oidvector *indclass;
1514 int2vector *indoption;
1515 MemoryContext indexcxt;
1516 MemoryContext oldcontext;
1521 * Make a copy of the pg_index entry for the index. Since pg_index
1522 * contains variable-length and possibly-null fields, we have to do this
1523 * honestly rather than just treating it as a Form_pg_index struct.
1525 tuple = SearchSysCache1(INDEXRELID,
1526 ObjectIdGetDatum(RelationGetRelid(relation)));
1527 if (!HeapTupleIsValid(tuple))
1528 elog(ERROR, "cache lookup failed for index %u",
1529 RelationGetRelid(relation));
1530 oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
1531 relation->rd_indextuple = heap_copytuple(tuple);
1532 relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1533 MemoryContextSwitchTo(oldcontext);
1534 ReleaseSysCache(tuple);
1537 * Look up the index's access method, save the OID of its handler function
1539 tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1540 if (!HeapTupleIsValid(tuple))
1541 elog(ERROR, "cache lookup failed for access method %u",
1542 relation->rd_rel->relam);
1543 aform = (Form_pg_am) GETSTRUCT(tuple);
1544 relation->rd_amhandler = aform->amhandler;
1545 ReleaseSysCache(tuple);
1547 natts = relation->rd_rel->relnatts;
1548 if (natts != relation->rd_index->indnatts)
1549 elog(ERROR, "relnatts disagrees with indnatts for index %u",
1550 RelationGetRelid(relation));
1553 * Make the private context to hold index access info. The reason we need
1554 * a context, and not just a couple of pallocs, is so that we won't leak
1555 * any subsidiary info attached to fmgr lookup records.
1557 indexcxt = AllocSetContextCreate(CacheMemoryContext,
1558 RelationGetRelationName(relation),
1559 ALLOCSET_SMALL_SIZES);
1560 relation->rd_indexcxt = indexcxt;
1563 * Now we can fetch the index AM's API struct
1565 InitIndexAmRoutine(relation);
1568 * Allocate arrays to hold data
1570 relation->rd_opfamily = (Oid *)
1571 MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1572 relation->rd_opcintype = (Oid *)
1573 MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1575 amsupport = relation->rd_amroutine->amsupport;
1578 int nsupport = natts * amsupport;
1580 relation->rd_support = (RegProcedure *)
1581 MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1582 relation->rd_supportinfo = (FmgrInfo *)
1583 MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1587 relation->rd_support = NULL;
1588 relation->rd_supportinfo = NULL;
1591 relation->rd_indcollation = (Oid *)
1592 MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1594 relation->rd_indoption = (int16 *)
1595 MemoryContextAllocZero(indexcxt, natts * sizeof(int16));
1598 * indcollation cannot be referenced directly through the C struct,
1599 * because it comes after the variable-width indkey field. Must extract
1600 * the datum the hard way...
1602 indcollDatum = fastgetattr(relation->rd_indextuple,
1603 Anum_pg_index_indcollation,
1604 GetPgIndexDescriptor(),
1607 indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1608 memcpy(relation->rd_indcollation, indcoll->values, natts * sizeof(Oid));
1611 * indclass cannot be referenced directly through the C struct, because it
1612 * comes after the variable-width indkey field. Must extract the datum
1615 indclassDatum = fastgetattr(relation->rd_indextuple,
1616 Anum_pg_index_indclass,
1617 GetPgIndexDescriptor(),
1620 indclass = (oidvector *) DatumGetPointer(indclassDatum);
1623 * Fill the support procedure OID array, as well as the info about
1624 * opfamilies and opclass input types. (aminfo and supportinfo are left
1625 * as zeroes, and are filled on-the-fly when used)
1627 IndexSupportInitialize(indclass, relation->rd_support,
1628 relation->rd_opfamily, relation->rd_opcintype,
1632 * Similarly extract indoption and copy it to the cache entry
1634 indoptionDatum = fastgetattr(relation->rd_indextuple,
1635 Anum_pg_index_indoption,
1636 GetPgIndexDescriptor(),
1639 indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1640 memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16));
1643 * expressions, predicate, exclusion caches will be filled later
1645 relation->rd_indexprs = NIL;
1646 relation->rd_indpred = NIL;
1647 relation->rd_exclops = NULL;
1648 relation->rd_exclprocs = NULL;
1649 relation->rd_exclstrats = NULL;
1650 relation->rd_amcache = NULL;
1654 * IndexSupportInitialize
1655 * Initializes an index's cached opclass information,
1656 * given the index's pg_index.indclass entry.
1658 * Data is returned into *indexSupport, *opFamily, and *opcInType,
1659 * which are arrays allocated by the caller.
1661 * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1662 * indicate the size of the arrays it has allocated --- but in practice these
1663 * numbers must always match those obtainable from the system catalog entries
1664 * for the index and access method.
1667 IndexSupportInitialize(oidvector *indclass,
1668 RegProcedure *indexSupport,
1671 StrategyNumber maxSupportNumber,
1672 AttrNumber maxAttributeNumber)
1676 for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1678 OpClassCacheEnt *opcentry;
1680 if (!OidIsValid(indclass->values[attIndex]))
1681 elog(ERROR, "bogus pg_index tuple");
1683 /* look up the info for this opclass, using a cache */
1684 opcentry = LookupOpclassInfo(indclass->values[attIndex],
1687 /* copy cached data into relcache entry */
1688 opFamily[attIndex] = opcentry->opcfamily;
1689 opcInType[attIndex] = opcentry->opcintype;
1690 if (maxSupportNumber > 0)
1691 memcpy(&indexSupport[attIndex * maxSupportNumber],
1692 opcentry->supportProcs,
1693 maxSupportNumber * sizeof(RegProcedure));
1700 * This routine maintains a per-opclass cache of the information needed
1701 * by IndexSupportInitialize(). This is more efficient than relying on
1702 * the catalog cache, because we can load all the info about a particular
1703 * opclass in a single indexscan of pg_amproc.
1705 * The information from pg_am about expected range of support function
1706 * numbers is passed in, rather than being looked up, mainly because the
1707 * caller will have it already.
1709 * Note there is no provision for flushing the cache. This is OK at the
1710 * moment because there is no way to ALTER any interesting properties of an
1711 * existing opclass --- all you can do is drop it, which will result in
1712 * a useless but harmless dead entry in the cache. To support altering
1713 * opclass membership (not the same as opfamily membership!), we'd need to
1714 * be able to flush this cache as well as the contents of relcache entries
1717 static OpClassCacheEnt *
1718 LookupOpclassInfo(Oid operatorClassOid,
1719 StrategyNumber numSupport)
1721 OpClassCacheEnt *opcentry;
1725 ScanKeyData skey[3];
1729 if (OpClassCache == NULL)
1731 /* First time through: initialize the opclass cache */
1734 MemSet(&ctl, 0, sizeof(ctl));
1735 ctl.keysize = sizeof(Oid);
1736 ctl.entrysize = sizeof(OpClassCacheEnt);
1737 OpClassCache = hash_create("Operator class cache", 64,
1738 &ctl, HASH_ELEM | HASH_BLOBS);
1740 /* Also make sure CacheMemoryContext exists */
1741 if (!CacheMemoryContext)
1742 CreateCacheMemoryContext();
1745 opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1746 (void *) &operatorClassOid,
1747 HASH_ENTER, &found);
1751 /* Need to allocate memory for new entry */
1752 opcentry->valid = false; /* until known OK */
1753 opcentry->numSupport = numSupport;
1756 opcentry->supportProcs = (RegProcedure *)
1757 MemoryContextAllocZero(CacheMemoryContext,
1758 numSupport * sizeof(RegProcedure));
1760 opcentry->supportProcs = NULL;
1764 Assert(numSupport == opcentry->numSupport);
1768 * When testing for cache-flush hazards, we intentionally disable the
1769 * operator class cache and force reloading of the info on each call. This
1770 * is helpful because we want to test the case where a cache flush occurs
1771 * while we are loading the info, and it's very hard to provoke that if
1772 * this happens only once per opclass per backend.
1774 #if defined(CLOBBER_CACHE_ALWAYS)
1775 opcentry->valid = false;
1778 if (opcentry->valid)
1782 * Need to fill in new entry.
1784 * To avoid infinite recursion during startup, force heap scans if we're
1785 * looking up info for the opclasses used by the indexes we would like to
1788 indexOK = criticalRelcachesBuilt ||
1789 (operatorClassOid != OID_BTREE_OPS_OID &&
1790 operatorClassOid != INT2_BTREE_OPS_OID);
1793 * We have to fetch the pg_opclass row to determine its opfamily and
1794 * opcintype, which are needed to look up related operators and functions.
1795 * It'd be convenient to use the syscache here, but that probably doesn't
1796 * work while bootstrapping.
1798 ScanKeyInit(&skey[0],
1799 ObjectIdAttributeNumber,
1800 BTEqualStrategyNumber, F_OIDEQ,
1801 ObjectIdGetDatum(operatorClassOid));
1802 rel = heap_open(OperatorClassRelationId, AccessShareLock);
1803 scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1806 if (HeapTupleIsValid(htup = systable_getnext(scan)))
1808 Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1810 opcentry->opcfamily = opclassform->opcfamily;
1811 opcentry->opcintype = opclassform->opcintype;
1814 elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1816 systable_endscan(scan);
1817 heap_close(rel, AccessShareLock);
1820 * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1821 * the default ones (those with lefttype = righttype = opcintype).
1825 ScanKeyInit(&skey[0],
1826 Anum_pg_amproc_amprocfamily,
1827 BTEqualStrategyNumber, F_OIDEQ,
1828 ObjectIdGetDatum(opcentry->opcfamily));
1829 ScanKeyInit(&skey[1],
1830 Anum_pg_amproc_amproclefttype,
1831 BTEqualStrategyNumber, F_OIDEQ,
1832 ObjectIdGetDatum(opcentry->opcintype));
1833 ScanKeyInit(&skey[2],
1834 Anum_pg_amproc_amprocrighttype,
1835 BTEqualStrategyNumber, F_OIDEQ,
1836 ObjectIdGetDatum(opcentry->opcintype));
1837 rel = heap_open(AccessMethodProcedureRelationId, AccessShareLock);
1838 scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1841 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1843 Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1845 if (amprocform->amprocnum <= 0 ||
1846 (StrategyNumber) amprocform->amprocnum > numSupport)
1847 elog(ERROR, "invalid amproc number %d for opclass %u",
1848 amprocform->amprocnum, operatorClassOid);
1850 opcentry->supportProcs[amprocform->amprocnum - 1] =
1854 systable_endscan(scan);
1855 heap_close(rel, AccessShareLock);
1858 opcentry->valid = true;
1866 * This is a special cut-down version of RelationBuildDesc(),
1867 * used while initializing the relcache.
1868 * The relation descriptor is built just from the supplied parameters,
1869 * without actually looking at any system table entries. We cheat
1870 * quite a lot since we only need to work for a few basic system
1873 * formrdesc is currently used for: pg_database, pg_authid, pg_auth_members,
1874 * pg_shseclabel, pg_class, pg_attribute, pg_proc, and pg_type
1875 * (see RelationCacheInitializePhase2/3).
1877 * Note that these catalogs can't have constraints (except attnotnull),
1878 * default values, rules, or triggers, since we don't cope with any of that.
1879 * (Well, actually, this only matters for properties that need to be valid
1880 * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1881 * these properties matter then...)
1883 * NOTE: we assume we are already switched into CacheMemoryContext.
1886 formrdesc(const char *relationName, Oid relationReltype,
1887 bool isshared, bool hasoids,
1888 int natts, const FormData_pg_attribute *attrs)
1895 * allocate new relation desc, clear all fields of reldesc
1897 relation = (Relation) palloc0(sizeof(RelationData));
1899 /* make sure relation is marked as having no open file yet */
1900 relation->rd_smgr = NULL;
1903 * initialize reference count: 1 because it is nailed in cache
1905 relation->rd_refcnt = 1;
1908 * all entries built with this routine are nailed-in-cache; none are for
1909 * new or temp relations.
1911 relation->rd_isnailed = true;
1912 relation->rd_createSubid = InvalidSubTransactionId;
1913 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1914 relation->rd_backend = InvalidBackendId;
1915 relation->rd_islocaltemp = false;
1918 * initialize relation tuple form
1920 * The data we insert here is pretty incomplete/bogus, but it'll serve to
1921 * get us launched. RelationCacheInitializePhase3() will read the real
1922 * data from pg_class and replace what we've done here. Note in
1923 * particular that relowner is left as zero; this cues
1924 * RelationCacheInitializePhase3 that the real data isn't there yet.
1926 relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
1928 namestrcpy(&relation->rd_rel->relname, relationName);
1929 relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1930 relation->rd_rel->reltype = relationReltype;
1933 * It's important to distinguish between shared and non-shared relations,
1934 * even at bootstrap time, to make sure we know where they are stored.
1936 relation->rd_rel->relisshared = isshared;
1938 relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1940 /* formrdesc is used only for permanent relations */
1941 relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1943 /* ... and they're always populated, too */
1944 relation->rd_rel->relispopulated = true;
1946 relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1947 relation->rd_rel->relpages = 0;
1948 relation->rd_rel->reltuples = 0;
1949 relation->rd_rel->relallvisible = 0;
1950 relation->rd_rel->relkind = RELKIND_RELATION;
1951 relation->rd_rel->relhasoids = hasoids;
1952 relation->rd_rel->relnatts = (int16) natts;
1955 * initialize attribute tuple form
1957 * Unlike the case with the relation tuple, this data had better be right
1958 * because it will never be replaced. The data comes from
1959 * src/include/catalog/ headers via genbki.pl.
1961 relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1962 relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1964 relation->rd_att->tdtypeid = relationReltype;
1965 relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
1968 * initialize tuple desc info
1970 has_not_null = false;
1971 for (i = 0; i < natts; i++)
1973 memcpy(relation->rd_att->attrs[i],
1975 ATTRIBUTE_FIXED_PART_SIZE);
1976 has_not_null |= attrs[i].attnotnull;
1977 /* make sure attcacheoff is valid */
1978 relation->rd_att->attrs[i]->attcacheoff = -1;
1981 /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1982 relation->rd_att->attrs[0]->attcacheoff = 0;
1984 /* mark not-null status */
1987 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1989 constr->has_not_null = true;
1990 relation->rd_att->constr = constr;
1994 * initialize relation id from info in att array (my, this is ugly)
1996 RelationGetRelid(relation) = relation->rd_att->attrs[0]->attrelid;
1999 * All relations made with formrdesc are mapped. This is necessarily so
2000 * because there is no other way to know what filenode they currently
2001 * have. In bootstrap mode, add them to the initial relation mapper data,
2002 * specifying that the initial filenode is the same as the OID.
2004 relation->rd_rel->relfilenode = InvalidOid;
2005 if (IsBootstrapProcessingMode())
2006 RelationMapUpdateMap(RelationGetRelid(relation),
2007 RelationGetRelid(relation),
2011 * initialize the relation lock manager information
2013 RelationInitLockInfo(relation); /* see lmgr.c */
2016 * initialize physical addressing information for the relation
2018 RelationInitPhysicalAddr(relation);
2021 * initialize the rel-has-index flag, using hardwired knowledge
2023 if (IsBootstrapProcessingMode())
2025 /* In bootstrap mode, we have no indexes */
2026 relation->rd_rel->relhasindex = false;
2030 /* Otherwise, all the rels formrdesc is used for have indexes */
2031 relation->rd_rel->relhasindex = true;
2035 * add new reldesc to relcache
2037 RelationCacheInsert(relation, false);
2039 /* It's fully valid */
2040 relation->rd_isvalid = true;
2044 /* ----------------------------------------------------------------
2045 * Relation Descriptor Lookup Interface
2046 * ----------------------------------------------------------------
2050 * RelationIdGetRelation
2052 * Lookup a reldesc by OID; make one if not already in cache.
2054 * Returns NULL if no pg_class row could be found for the given relid
2055 * (suggesting we are trying to access a just-deleted relation).
2056 * Any other error is reported via elog.
2058 * NB: caller should already have at least AccessShareLock on the
2059 * relation ID, else there are nasty race conditions.
2061 * NB: relation ref count is incremented, or set to 1 if new entry.
2062 * Caller should eventually decrement count. (Usually,
2063 * that happens by calling RelationClose().)
2066 RelationIdGetRelation(Oid relationId)
2070 /* Make sure we're in an xact, even if this ends up being a cache hit */
2071 Assert(IsTransactionState());
2074 * first try to find reldesc in the cache
2076 RelationIdCacheLookup(relationId, rd);
2078 if (RelationIsValid(rd))
2080 RelationIncrementReferenceCount(rd);
2081 /* revalidate cache entry if necessary */
2082 if (!rd->rd_isvalid)
2085 * Indexes only have a limited number of possible schema changes,
2086 * and we don't want to use the full-blown procedure because it's
2087 * a headache for indexes that reload itself depends on.
2089 if (rd->rd_rel->relkind == RELKIND_INDEX)
2090 RelationReloadIndexInfo(rd);
2092 RelationClearRelation(rd, true);
2093 Assert(rd->rd_isvalid);
2099 * no reldesc in the cache, so have RelationBuildDesc() build one and add
2102 rd = RelationBuildDesc(relationId, true);
2103 if (RelationIsValid(rd))
2104 RelationIncrementReferenceCount(rd);
2108 /* ----------------------------------------------------------------
2109 * cache invalidation support routines
2110 * ----------------------------------------------------------------
2114 * RelationIncrementReferenceCount
2115 * Increments relation reference count.
2117 * Note: bootstrap mode has its own weird ideas about relation refcount
2118 * behavior; we ought to fix it someday, but for now, just disable
2119 * reference count ownership tracking in bootstrap mode.
2122 RelationIncrementReferenceCount(Relation rel)
2124 ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
2125 rel->rd_refcnt += 1;
2126 if (!IsBootstrapProcessingMode())
2127 ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
2131 * RelationDecrementReferenceCount
2132 * Decrements relation reference count.
2135 RelationDecrementReferenceCount(Relation rel)
2137 Assert(rel->rd_refcnt > 0);
2138 rel->rd_refcnt -= 1;
2139 if (!IsBootstrapProcessingMode())
2140 ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
2144 * RelationClose - close an open relation
2146 * Actually, we just decrement the refcount.
2148 * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2149 * will be freed as soon as their refcount goes to zero. In combination
2150 * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2151 * to catch references to already-released relcache entries. It slows
2152 * things down quite a bit, however.
2155 RelationClose(Relation relation)
2157 /* Note: no locking manipulations needed */
2158 RelationDecrementReferenceCount(relation);
2160 #ifdef RELCACHE_FORCE_RELEASE
2161 if (RelationHasReferenceCountZero(relation) &&
2162 relation->rd_createSubid == InvalidSubTransactionId &&
2163 relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
2164 RelationClearRelation(relation, false);
2169 * RelationReloadIndexInfo - reload minimal information for an open index
2171 * This function is used only for indexes. A relcache inval on an index
2172 * can mean that its pg_class or pg_index row changed. There are only
2173 * very limited changes that are allowed to an existing index's schema,
2174 * so we can update the relcache entry without a complete rebuild; which
2175 * is fortunate because we can't rebuild an index entry that is "nailed"
2176 * and/or in active use. We support full replacement of the pg_class row,
2177 * as well as updates of a few simple fields of the pg_index row.
2179 * We can't necessarily reread the catalog rows right away; we might be
2180 * in a failed transaction when we receive the SI notification. If so,
2181 * RelationClearRelation just marks the entry as invalid by setting
2182 * rd_isvalid to false. This routine is called to fix the entry when it
2185 * We assume that at the time we are called, we have at least AccessShareLock
2186 * on the target index. (Note: in the calls from RelationClearRelation,
2187 * this is legitimate because we know the rel has positive refcount.)
2189 * If the target index is an index on pg_class or pg_index, we'd better have
2190 * previously gotten at least AccessShareLock on its underlying catalog,
2191 * else we are at risk of deadlock against someone trying to exclusive-lock
2192 * the heap and index in that order. This is ensured in current usage by
2193 * only applying this to indexes being opened or having positive refcount.
2196 RelationReloadIndexInfo(Relation relation)
2199 HeapTuple pg_class_tuple;
2202 /* Should be called only for invalidated indexes */
2203 Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
2204 !relation->rd_isvalid);
2206 /* Ensure it's closed at smgr level */
2207 RelationCloseSmgr(relation);
2209 /* Must free any AM cached data upon relcache flush */
2210 if (relation->rd_amcache)
2211 pfree(relation->rd_amcache);
2212 relation->rd_amcache = NULL;
2215 * If it's a shared index, we might be called before backend startup has
2216 * finished selecting a database, in which case we have no way to read
2217 * pg_class yet. However, a shared index can never have any significant
2218 * schema updates, so it's okay to ignore the invalidation signal. Just
2219 * mark it valid and return without doing anything more.
2221 if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2223 relation->rd_isvalid = true;
2228 * Read the pg_class row
2230 * Don't try to use an indexscan of pg_class_oid_index to reload the info
2231 * for pg_class_oid_index ...
2233 indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2234 pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2235 if (!HeapTupleIsValid(pg_class_tuple))
2236 elog(ERROR, "could not find pg_class tuple for index %u",
2237 RelationGetRelid(relation));
2238 relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2239 memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2240 /* Reload reloptions in case they changed */
2241 if (relation->rd_options)
2242 pfree(relation->rd_options);
2243 RelationParseRelOptions(relation, pg_class_tuple);
2244 /* done with pg_class tuple */
2245 heap_freetuple(pg_class_tuple);
2246 /* We must recalculate physical address in case it changed */
2247 RelationInitPhysicalAddr(relation);
2250 * For a non-system index, there are fields of the pg_index row that are
2251 * allowed to change, so re-read that row and update the relcache entry.
2252 * Most of the info derived from pg_index (such as support function lookup
2253 * info) cannot change, and indeed the whole point of this routine is to
2254 * update the relcache entry without clobbering that data; so wholesale
2255 * replacement is not appropriate.
2257 if (!IsSystemRelation(relation))
2260 Form_pg_index index;
2262 tuple = SearchSysCache1(INDEXRELID,
2263 ObjectIdGetDatum(RelationGetRelid(relation)));
2264 if (!HeapTupleIsValid(tuple))
2265 elog(ERROR, "cache lookup failed for index %u",
2266 RelationGetRelid(relation));
2267 index = (Form_pg_index) GETSTRUCT(tuple);
2270 * Basically, let's just copy all the bool fields. There are one or
2271 * two of these that can't actually change in the current code, but
2272 * it's not worth it to track exactly which ones they are. None of
2273 * the array fields are allowed to change, though.
2275 relation->rd_index->indisunique = index->indisunique;
2276 relation->rd_index->indisprimary = index->indisprimary;
2277 relation->rd_index->indisexclusion = index->indisexclusion;
2278 relation->rd_index->indimmediate = index->indimmediate;
2279 relation->rd_index->indisclustered = index->indisclustered;
2280 relation->rd_index->indisvalid = index->indisvalid;
2281 relation->rd_index->indcheckxmin = index->indcheckxmin;
2282 relation->rd_index->indisready = index->indisready;
2283 relation->rd_index->indislive = index->indislive;
2285 /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2286 HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data,
2287 HeapTupleHeaderGetXmin(tuple->t_data));
2289 ReleaseSysCache(tuple);
2292 /* Okay, now it's valid again */
2293 relation->rd_isvalid = true;
2297 * RelationDestroyRelation
2299 * Physically delete a relation cache entry and all subsidiary data.
2300 * Caller must already have unhooked the entry from the hash table.
2303 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2305 Assert(RelationHasReferenceCountZero(relation));
2308 * Make sure smgr and lower levels close the relation's files, if they
2309 * weren't closed already. (This was probably done by caller, but let's
2310 * just be real sure.)
2312 RelationCloseSmgr(relation);
2315 * Free all the subsidiary data structures of the relcache entry, then the
2318 if (relation->rd_rel)
2319 pfree(relation->rd_rel);
2320 /* can't use DecrTupleDescRefCount here */
2321 Assert(relation->rd_att->tdrefcount > 0);
2322 if (--relation->rd_att->tdrefcount == 0)
2325 * If we Rebuilt a relcache entry during a transaction then its
2326 * possible we did that because the TupDesc changed as the result of
2327 * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2328 * possible someone copied that TupDesc, in which case the copy would
2329 * point to free'd memory. So if we rebuild an entry we keep the
2330 * TupDesc around until end of transaction, to be safe.
2332 if (remember_tupdesc)
2333 RememberToFreeTupleDescAtEOX(relation->rd_att);
2335 FreeTupleDesc(relation->rd_att);
2337 FreeTriggerDesc(relation->trigdesc);
2338 list_free_deep(relation->rd_fkeylist);
2339 list_free(relation->rd_indexlist);
2340 bms_free(relation->rd_indexattr);
2341 bms_free(relation->rd_keyattr);
2342 bms_free(relation->rd_pkattr);
2343 bms_free(relation->rd_idattr);
2344 if (relation->rd_pubactions)
2345 pfree(relation->rd_pubactions);
2346 if (relation->rd_options)
2347 pfree(relation->rd_options);
2348 if (relation->rd_indextuple)
2349 pfree(relation->rd_indextuple);
2350 if (relation->rd_indexcxt)
2351 MemoryContextDelete(relation->rd_indexcxt);
2352 if (relation->rd_rulescxt)
2353 MemoryContextDelete(relation->rd_rulescxt);
2354 if (relation->rd_rsdesc)
2355 MemoryContextDelete(relation->rd_rsdesc->rscxt);
2356 if (relation->rd_partkeycxt)
2357 MemoryContextDelete(relation->rd_partkeycxt);
2358 if (relation->rd_pdcxt)
2359 MemoryContextDelete(relation->rd_pdcxt);
2360 if (relation->rd_partcheck)
2361 pfree(relation->rd_partcheck);
2362 if (relation->rd_fdwroutine)
2363 pfree(relation->rd_fdwroutine);
2368 * RelationClearRelation
2370 * Physically blow away a relation cache entry, or reset it and rebuild
2371 * it from scratch (that is, from catalog entries). The latter path is
2372 * used when we are notified of a change to an open relation (one with
2375 * NB: when rebuilding, we'd better hold some lock on the relation,
2376 * else the catalog data we need to read could be changing under us.
2377 * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2378 * an sinval reset could happen while we're accessing the catalogs, and
2379 * the rel would get blown away underneath us by RelationCacheInvalidate
2380 * if it has zero refcnt.
2382 * The "rebuild" parameter is redundant in current usage because it has
2383 * to match the relation's refcnt status, but we keep it as a crosscheck
2384 * that we're doing what the caller expects.
2387 RelationClearRelation(Relation relation, bool rebuild)
2390 * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2391 * course it would be an equally bad idea to blow away one with nonzero
2392 * refcnt, since that would leave someone somewhere with a dangling
2393 * pointer. All callers are expected to have verified that this holds.
2396 !RelationHasReferenceCountZero(relation) :
2397 RelationHasReferenceCountZero(relation));
2400 * Make sure smgr and lower levels close the relation's files, if they
2401 * weren't closed already. If the relation is not getting deleted, the
2402 * next smgr access should reopen the files automatically. This ensures
2403 * that the low-level file access state is updated after, say, a vacuum
2406 RelationCloseSmgr(relation);
2409 * Never, never ever blow away a nailed-in system relation, because we'd
2410 * be unable to recover. However, we must redo RelationInitPhysicalAddr
2411 * in case it is a mapped relation whose mapping changed.
2413 * If it's a nailed-but-not-mapped index, then we need to re-read the
2414 * pg_class row to see if its relfilenode changed. We do that immediately
2415 * if we're inside a valid transaction and the relation is open (not
2416 * counting the nailed refcount). Otherwise just mark the entry as
2417 * possibly invalid, and it'll be fixed when next opened.
2419 if (relation->rd_isnailed)
2421 RelationInitPhysicalAddr(relation);
2423 if (relation->rd_rel->relkind == RELKIND_INDEX)
2425 relation->rd_isvalid = false; /* needs to be revalidated */
2426 if (relation->rd_refcnt > 1 && IsTransactionState())
2427 RelationReloadIndexInfo(relation);
2433 * Even non-system indexes should not be blown away if they are open and
2434 * have valid index support information. This avoids problems with active
2435 * use of the index support information. As with nailed indexes, we
2436 * re-read the pg_class row to handle possible physical relocation of the
2437 * index, and we check for pg_index updates too.
2439 if (relation->rd_rel->relkind == RELKIND_INDEX &&
2440 relation->rd_refcnt > 0 &&
2441 relation->rd_indexcxt != NULL)
2443 relation->rd_isvalid = false; /* needs to be revalidated */
2444 if (IsTransactionState())
2445 RelationReloadIndexInfo(relation);
2449 /* Mark it invalid until we've finished rebuild */
2450 relation->rd_isvalid = false;
2453 * If we're really done with the relcache entry, blow it away. But if
2454 * someone is still using it, reconstruct the whole deal without moving
2455 * the physical RelationData record (so that the someone's pointer is
2460 /* Remove it from the hash table */
2461 RelationCacheDelete(relation);
2463 /* And release storage */
2464 RelationDestroyRelation(relation, false);
2466 else if (!IsTransactionState())
2469 * If we're not inside a valid transaction, we can't do any catalog
2470 * access so it's not possible to rebuild yet. Just exit, leaving
2471 * rd_isvalid = false so that the rebuild will occur when the entry is
2474 * Note: it's possible that we come here during subtransaction abort,
2475 * and the reason for wanting to rebuild is that the rel is open in
2476 * the outer transaction. In that case it might seem unsafe to not
2477 * rebuild immediately, since whatever code has the rel already open
2478 * will keep on using the relcache entry as-is. However, in such a
2479 * case the outer transaction should be holding a lock that's
2480 * sufficient to prevent any significant change in the rel's schema,
2481 * so the existing entry contents should be good enough for its
2482 * purposes; at worst we might be behind on statistics updates or the
2483 * like. (See also CheckTableNotInUse() and its callers.) These same
2484 * remarks also apply to the cases above where we exit without having
2485 * done RelationReloadIndexInfo() yet.
2492 * Our strategy for rebuilding an open relcache entry is to build a
2493 * new entry from scratch, swap its contents with the old entry, and
2494 * finally delete the new entry (along with any infrastructure swapped
2495 * over from the old entry). This is to avoid trouble in case an
2496 * error causes us to lose control partway through. The old entry
2497 * will still be marked !rd_isvalid, so we'll try to rebuild it again
2498 * on next access. Meanwhile it's not any less valid than it was
2499 * before, so any code that might expect to continue accessing it
2500 * isn't hurt by the rebuild failure. (Consider for example a
2501 * subtransaction that ALTERs a table and then gets canceled partway
2502 * through the cache entry rebuild. The outer transaction should
2503 * still see the not-modified cache entry as valid.) The worst
2504 * consequence of an error is leaking the necessarily-unreferenced new
2505 * entry, and this shouldn't happen often enough for that to be a big
2508 * When rebuilding an open relcache entry, we must preserve ref count,
2509 * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
2510 * attempt to preserve the pg_class entry (rd_rel), tupledesc,
2511 * rewrite-rule, partition key, and partition descriptor substructures
2512 * in place, because various places assume that these structures won't
2513 * move while they are working with an open relcache entry. (Note:
2514 * the refcount mechanism for tupledescs might someday allow us to
2515 * remove this hack for the tupledesc.)
2517 * Note that this process does not touch CurrentResourceOwner; which
2518 * is good because whatever ref counts the entry may have do not
2519 * necessarily belong to that resource owner.
2522 Oid save_relid = RelationGetRelid(relation);
2529 /* Build temporary entry, but don't link it into hashtable */
2530 newrel = RelationBuildDesc(save_relid, false);
2534 * We can validly get here, if we're using a historic snapshot in
2535 * which a relation, accessed from outside logical decoding, is
2536 * still invisible. In that case it's fine to just mark the
2537 * relation as invalid and return - it'll fully get reloaded by
2538 * the cache reset at the end of logical decoding (or at the next
2539 * access). During normal processing we don't want to ignore this
2540 * case as it shouldn't happen there, as explained below.
2542 if (HistoricSnapshotActive())
2546 * This shouldn't happen as dropping a relation is intended to be
2547 * impossible if still referenced (c.f. CheckTableNotInUse()). But
2548 * if we get here anyway, we can't just delete the relcache entry,
2549 * as it possibly could get accessed later (as e.g. the error
2550 * might get trapped and handled via a subtransaction rollback).
2552 elog(ERROR, "relation %u deleted while still in use", save_relid);
2555 keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2556 keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2557 keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2558 keep_partkey = (relation->rd_partkey != NULL);
2559 keep_partdesc = equalPartitionDescs(relation->rd_partkey,
2560 relation->rd_partdesc,
2561 newrel->rd_partdesc);
2564 * Perform swapping of the relcache entry contents. Within this
2565 * process the old entry is momentarily invalid, so there *must* be no
2566 * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2567 * all-in-line code for safety.
2569 * Since the vast majority of fields should be swapped, our method is
2570 * to swap the whole structures and then re-swap those few fields we
2571 * didn't want swapped.
2573 #define SWAPFIELD(fldtype, fldname) \
2575 fldtype _tmp = newrel->fldname; \
2576 newrel->fldname = relation->fldname; \
2577 relation->fldname = _tmp; \
2580 /* swap all Relation struct fields */
2582 RelationData tmpstruct;
2584 memcpy(&tmpstruct, newrel, sizeof(RelationData));
2585 memcpy(newrel, relation, sizeof(RelationData));
2586 memcpy(relation, &tmpstruct, sizeof(RelationData));
2589 /* rd_smgr must not be swapped, due to back-links from smgr level */
2590 SWAPFIELD(SMgrRelation, rd_smgr);
2591 /* rd_refcnt must be preserved */
2592 SWAPFIELD(int, rd_refcnt);
2593 /* isnailed shouldn't change */
2594 Assert(newrel->rd_isnailed == relation->rd_isnailed);
2595 /* creation sub-XIDs must be preserved */
2596 SWAPFIELD(SubTransactionId, rd_createSubid);
2597 SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2598 /* un-swap rd_rel pointers, swap contents instead */
2599 SWAPFIELD(Form_pg_class, rd_rel);
2600 /* ... but actually, we don't have to update newrel->rd_rel */
2601 memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2602 /* preserve old tupledesc and rules if no logical change */
2604 SWAPFIELD(TupleDesc, rd_att);
2607 SWAPFIELD(RuleLock *, rd_rules);
2608 SWAPFIELD(MemoryContext, rd_rulescxt);
2611 SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2612 /* toast OID override must be preserved */
2613 SWAPFIELD(Oid, rd_toastoid);
2614 /* pgstat_info must be preserved */
2615 SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2616 /* partition key must be preserved, if we have one */
2619 SWAPFIELD(PartitionKey, rd_partkey);
2620 SWAPFIELD(MemoryContext, rd_partkeycxt);
2622 /* preserve old partdesc if no logical change */
2625 SWAPFIELD(PartitionDesc, rd_partdesc);
2626 SWAPFIELD(MemoryContext, rd_pdcxt);
2631 /* And now we can throw away the temporary entry */
2632 RelationDestroyRelation(newrel, !keep_tupdesc);
2637 * RelationFlushRelation
2639 * Rebuild the relation if it is open (refcount > 0), else blow it away.
2640 * This is used when we receive a cache invalidation event for the rel.
2643 RelationFlushRelation(Relation relation)
2645 if (relation->rd_createSubid != InvalidSubTransactionId ||
2646 relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2649 * New relcache entries are always rebuilt, not flushed; else we'd
2650 * forget the "new" status of the relation, which is a useful
2651 * optimization to have. Ditto for the new-relfilenode status.
2653 * The rel could have zero refcnt here, so temporarily increment the
2654 * refcnt to ensure it's safe to rebuild it. We can assume that the
2655 * current transaction has some lock on the rel already.
2657 RelationIncrementReferenceCount(relation);
2658 RelationClearRelation(relation, true);
2659 RelationDecrementReferenceCount(relation);
2664 * Pre-existing rels can be dropped from the relcache if not open.
2666 bool rebuild = !RelationHasReferenceCountZero(relation);
2668 RelationClearRelation(relation, rebuild);
2673 * RelationForgetRelation - unconditionally remove a relcache entry
2675 * External interface for destroying a relcache entry when we
2676 * drop the relation.
2679 RelationForgetRelation(Oid rid)
2683 RelationIdCacheLookup(rid, relation);
2685 if (!PointerIsValid(relation))
2686 return; /* not in cache, nothing to do */
2688 if (!RelationHasReferenceCountZero(relation))
2689 elog(ERROR, "relation %u is still open", rid);
2691 /* Unconditionally destroy the relcache entry */
2692 RelationClearRelation(relation, false);
2696 * RelationCacheInvalidateEntry
2698 * This routine is invoked for SI cache flush messages.
2700 * Any relcache entry matching the relid must be flushed. (Note: caller has
2701 * already determined that the relid belongs to our database or is a shared
2704 * We used to skip local relations, on the grounds that they could
2705 * not be targets of cross-backend SI update messages; but it seems
2706 * safer to process them, so that our *own* SI update messages will
2707 * have the same effects during CommandCounterIncrement for both
2708 * local and nonlocal relations.
2711 RelationCacheInvalidateEntry(Oid relationId)
2715 RelationIdCacheLookup(relationId, relation);
2717 if (PointerIsValid(relation))
2719 relcacheInvalsReceived++;
2720 RelationFlushRelation(relation);
2725 * RelationCacheInvalidate
2726 * Blow away cached relation descriptors that have zero reference counts,
2727 * and rebuild those with positive reference counts. Also reset the smgr
2728 * relation cache and re-read relation mapping data.
2730 * This is currently used only to recover from SI message buffer overflow,
2731 * so we do not touch new-in-transaction relations; they cannot be targets
2732 * of cross-backend SI updates (and our own updates now go through a
2733 * separate linked list that isn't limited by the SI message buffer size).
2734 * Likewise, we need not discard new-relfilenode-in-transaction hints,
2735 * since any invalidation of those would be a local event.
2737 * We do this in two phases: the first pass deletes deletable items, and
2738 * the second one rebuilds the rebuildable items. This is essential for
2739 * safety, because hash_seq_search only copes with concurrent deletion of
2740 * the element it is currently visiting. If a second SI overflow were to
2741 * occur while we are walking the table, resulting in recursive entry to
2742 * this routine, we could crash because the inner invocation blows away
2743 * the entry next to be visited by the outer scan. But this way is OK,
2744 * because (a) during the first pass we won't process any more SI messages,
2745 * so hash_seq_search will complete safely; (b) during the second pass we
2746 * only hold onto pointers to nondeletable entries.
2748 * The two-phase approach also makes it easy to update relfilenodes for
2749 * mapped relations before we do anything else, and to ensure that the
2750 * second pass processes nailed-in-cache items before other nondeletable
2751 * items. This should ensure that system catalogs are up to date before
2752 * we attempt to use them to reload information about other open relations.
2755 RelationCacheInvalidate(void)
2757 HASH_SEQ_STATUS status;
2758 RelIdCacheEnt *idhentry;
2760 List *rebuildFirstList = NIL;
2761 List *rebuildList = NIL;
2765 * Reload relation mapping data before starting to reconstruct cache.
2767 RelationMapInvalidateAll();
2770 hash_seq_init(&status, RelationIdCache);
2772 while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2774 relation = idhentry->reldesc;
2776 /* Must close all smgr references to avoid leaving dangling ptrs */
2777 RelationCloseSmgr(relation);
2780 * Ignore new relations; no other backend will manipulate them before
2781 * we commit. Likewise, before replacing a relation's relfilenode, we
2782 * shall have acquired AccessExclusiveLock and drained any applicable
2783 * pending invalidations.
2785 if (relation->rd_createSubid != InvalidSubTransactionId ||
2786 relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2789 relcacheInvalsReceived++;
2791 if (RelationHasReferenceCountZero(relation))
2793 /* Delete this entry immediately */
2794 Assert(!relation->rd_isnailed);
2795 RelationClearRelation(relation, false);
2800 * If it's a mapped relation, immediately update its rd_node in
2801 * case its relfilenode changed. We must do this during phase 1
2802 * in case the relation is consulted during rebuild of other
2803 * relcache entries in phase 2. It's safe since consulting the
2804 * map doesn't involve any access to relcache entries.
2806 if (RelationIsMapped(relation))
2807 RelationInitPhysicalAddr(relation);
2810 * Add this entry to list of stuff to rebuild in second pass.
2811 * pg_class goes to the front of rebuildFirstList while
2812 * pg_class_oid_index goes to the back of rebuildFirstList, so
2813 * they are done first and second respectively. Other nailed
2814 * relations go to the front of rebuildList, so they'll be done
2815 * next in no particular order; and everything else goes to the
2816 * back of rebuildList.
2818 if (RelationGetRelid(relation) == RelationRelationId)
2819 rebuildFirstList = lcons(relation, rebuildFirstList);
2820 else if (RelationGetRelid(relation) == ClassOidIndexId)
2821 rebuildFirstList = lappend(rebuildFirstList, relation);
2822 else if (relation->rd_isnailed)
2823 rebuildList = lcons(relation, rebuildList);
2825 rebuildList = lappend(rebuildList, relation);
2830 * Now zap any remaining smgr cache entries. This must happen before we
2831 * start to rebuild entries, since that may involve catalog fetches which
2832 * will re-open catalog files.
2836 /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2837 foreach(l, rebuildFirstList)
2839 relation = (Relation) lfirst(l);
2840 RelationClearRelation(relation, true);
2842 list_free(rebuildFirstList);
2843 foreach(l, rebuildList)
2845 relation = (Relation) lfirst(l);
2846 RelationClearRelation(relation, true);
2848 list_free(rebuildList);
2852 * RelationCloseSmgrByOid - close a relcache entry's smgr link
2854 * Needed in some cases where we are changing a relation's physical mapping.
2855 * The link will be automatically reopened on next use.
2858 RelationCloseSmgrByOid(Oid relationId)
2862 RelationIdCacheLookup(relationId, relation);
2864 if (!PointerIsValid(relation))
2865 return; /* not in cache, nothing to do */
2867 RelationCloseSmgr(relation);
2871 RememberToFreeTupleDescAtEOX(TupleDesc td)
2873 if (EOXactTupleDescArray == NULL)
2875 MemoryContext oldcxt;
2877 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2879 EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
2880 EOXactTupleDescArrayLen = 16;
2881 NextEOXactTupleDescNum = 0;
2882 MemoryContextSwitchTo(oldcxt);
2884 else if (NextEOXactTupleDescNum >= EOXactTupleDescArrayLen)
2886 int32 newlen = EOXactTupleDescArrayLen * 2;
2888 Assert(EOXactTupleDescArrayLen > 0);
2890 EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2891 newlen * sizeof(TupleDesc));
2892 EOXactTupleDescArrayLen = newlen;
2895 EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2899 * AtEOXact_RelationCache
2901 * Clean up the relcache at main-transaction commit or abort.
2903 * Note: this must be called *before* processing invalidation messages.
2904 * In the case of abort, we don't want to try to rebuild any invalidated
2905 * cache entries (since we can't safely do database accesses). Therefore
2906 * we must reset refcnts before handling pending invalidations.
2908 * As of PostgreSQL 8.1, relcache refcnts should get released by the
2909 * ResourceOwner mechanism. This routine just does a debugging
2910 * cross-check that no pins remain. However, we also need to do special
2911 * cleanup when the current transaction created any relations or made use
2912 * of forced index lists.
2915 AtEOXact_RelationCache(bool isCommit)
2917 HASH_SEQ_STATUS status;
2918 RelIdCacheEnt *idhentry;
2922 * Unless the eoxact_list[] overflowed, we only need to examine the rels
2923 * listed in it. Otherwise fall back on a hash_seq_search scan.
2925 * For simplicity, eoxact_list[] entries are not deleted till end of
2926 * top-level transaction, even though we could remove them at
2927 * subtransaction end in some cases, or remove relations from the list if
2928 * they are cleared for other reasons. Therefore we should expect the
2929 * case that list entries are not found in the hashtable; if not, there's
2930 * nothing to do for them.
2932 if (eoxact_list_overflowed)
2934 hash_seq_init(&status, RelationIdCache);
2935 while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2937 AtEOXact_cleanup(idhentry->reldesc, isCommit);
2942 for (i = 0; i < eoxact_list_len; i++)
2944 idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
2945 (void *) &eoxact_list[i],
2948 if (idhentry != NULL)
2949 AtEOXact_cleanup(idhentry->reldesc, isCommit);
2953 if (EOXactTupleDescArrayLen > 0)
2955 Assert(EOXactTupleDescArray != NULL);
2956 for (i = 0; i < NextEOXactTupleDescNum; i++)
2957 FreeTupleDesc(EOXactTupleDescArray[i]);
2958 pfree(EOXactTupleDescArray);
2959 EOXactTupleDescArray = NULL;
2962 /* Now we're out of the transaction and can clear the lists */
2963 eoxact_list_len = 0;
2964 eoxact_list_overflowed = false;
2965 NextEOXactTupleDescNum = 0;
2966 EOXactTupleDescArrayLen = 0;
2972 * Clean up a single rel at main-transaction commit or abort
2974 * NB: this processing must be idempotent, because EOXactListAdd() doesn't
2975 * bother to prevent duplicate entries in eoxact_list[].
2978 AtEOXact_cleanup(Relation relation, bool isCommit)
2981 * The relcache entry's ref count should be back to its normal
2982 * not-in-a-transaction state: 0 unless it's nailed in cache.
2984 * In bootstrap mode, this is NOT true, so don't check it --- the
2985 * bootstrap code expects relations to stay open across start/commit
2986 * transaction calls. (That seems bogus, but it's not worth fixing.)
2988 * Note: ideally this check would be applied to every relcache entry, not
2989 * just those that have eoxact work to do. But it's not worth forcing a
2990 * scan of the whole relcache just for this. (Moreover, doing so would
2991 * mean that assert-enabled testing never tests the hash_search code path
2992 * above, which seems a bad idea.)
2994 #ifdef USE_ASSERT_CHECKING
2995 if (!IsBootstrapProcessingMode())
2997 int expected_refcnt;
2999 expected_refcnt = relation->rd_isnailed ? 1 : 0;
3000 Assert(relation->rd_refcnt == expected_refcnt);
3005 * Is it a relation created in the current transaction?
3007 * During commit, reset the flag to zero, since we are now out of the
3008 * creating transaction. During abort, simply delete the relcache entry
3009 * --- it isn't interesting any longer. (NOTE: if we have forgotten the
3010 * new-ness of a new relation due to a forced cache flush, the entry will
3011 * get deleted anyway by shared-cache-inval processing of the aborted
3012 * pg_class insertion.)
3014 if (relation->rd_createSubid != InvalidSubTransactionId)
3017 relation->rd_createSubid = InvalidSubTransactionId;
3018 else if (RelationHasReferenceCountZero(relation))
3020 RelationClearRelation(relation, false);
3026 * Hmm, somewhere there's a (leaked?) reference to the relation.
3027 * We daren't remove the entry for fear of dereferencing a
3028 * dangling pointer later. Bleat, and mark it as not belonging to
3029 * the current transaction. Hopefully it'll get cleaned up
3030 * eventually. This must be just a WARNING to avoid
3031 * error-during-error-recovery loops.
3033 relation->rd_createSubid = InvalidSubTransactionId;
3034 elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3035 RelationGetRelationName(relation));
3040 * Likewise, reset the hint about the relfilenode being new.
3042 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3045 * Flush any temporary index list.
3047 if (relation->rd_indexvalid == 2)
3049 list_free(relation->rd_indexlist);
3050 relation->rd_indexlist = NIL;
3051 relation->rd_oidindex = InvalidOid;
3052 relation->rd_pkindex = InvalidOid;
3053 relation->rd_replidindex = InvalidOid;
3054 relation->rd_indexvalid = 0;
3059 * AtEOSubXact_RelationCache
3061 * Clean up the relcache at sub-transaction commit or abort.
3063 * Note: this must be called *before* processing invalidation messages.
3066 AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
3067 SubTransactionId parentSubid)
3069 HASH_SEQ_STATUS status;
3070 RelIdCacheEnt *idhentry;
3074 * Unless the eoxact_list[] overflowed, we only need to examine the rels
3075 * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3076 * logic as in AtEOXact_RelationCache.
3078 if (eoxact_list_overflowed)
3080 hash_seq_init(&status, RelationIdCache);
3081 while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3083 AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3084 mySubid, parentSubid);
3089 for (i = 0; i < eoxact_list_len; i++)
3091 idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3092 (void *) &eoxact_list[i],
3095 if (idhentry != NULL)
3096 AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3097 mySubid, parentSubid);
3101 /* Don't reset the list; we still need more cleanup later */
3105 * AtEOSubXact_cleanup
3107 * Clean up a single rel at subtransaction commit or abort
3109 * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3110 * bother to prevent duplicate entries in eoxact_list[].
3113 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3114 SubTransactionId mySubid, SubTransactionId parentSubid)
3117 * Is it a relation created in the current subtransaction?
3119 * During subcommit, mark it as belonging to the parent, instead. During
3120 * subabort, simply delete the relcache entry.
3122 if (relation->rd_createSubid == mySubid)
3125 relation->rd_createSubid = parentSubid;
3126 else if (RelationHasReferenceCountZero(relation))
3128 RelationClearRelation(relation, false);
3134 * Hmm, somewhere there's a (leaked?) reference to the relation.
3135 * We daren't remove the entry for fear of dereferencing a
3136 * dangling pointer later. Bleat, and transfer it to the parent
3137 * subtransaction so we can try again later. This must be just a
3138 * WARNING to avoid error-during-error-recovery loops.
3140 relation->rd_createSubid = parentSubid;
3141 elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3142 RelationGetRelationName(relation));
3147 * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
3149 if (relation->rd_newRelfilenodeSubid == mySubid)
3152 relation->rd_newRelfilenodeSubid = parentSubid;
3154 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3158 * Flush any temporary index list.
3160 if (relation->rd_indexvalid == 2)
3162 list_free(relation->rd_indexlist);
3163 relation->rd_indexlist = NIL;
3164 relation->rd_oidindex = InvalidOid;
3165 relation->rd_pkindex = InvalidOid;
3166 relation->rd_replidindex = InvalidOid;
3167 relation->rd_indexvalid = 0;
3173 * RelationBuildLocalRelation
3174 * Build a relcache entry for an about-to-be-created relation,
3175 * and enter it into the relcache.
3178 RelationBuildLocalRelation(const char *relname,
3184 bool shared_relation,
3185 bool mapped_relation,
3186 char relpersistence,
3190 MemoryContext oldcxt;
3191 int natts = tupDesc->natts;
3196 AssertArg(natts >= 0);
3199 * check for creation of a rel that must be nailed in cache.
3201 * XXX this list had better match the relations specially handled in
3202 * RelationCacheInitializePhase2/3.
3206 case DatabaseRelationId:
3207 case AuthIdRelationId:
3208 case AuthMemRelationId:
3209 case RelationRelationId:
3210 case AttributeRelationId:
3211 case ProcedureRelationId:
3212 case TypeRelationId:
3221 * check that hardwired list of shared rels matches what's in the
3222 * bootstrap .bki file. If you get a failure here during initdb, you
3223 * probably need to fix IsSharedRelation() to match whatever you've done
3224 * to the set of shared relations.
3226 if (shared_relation != IsSharedRelation(relid))
3227 elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3230 /* Shared relations had better be mapped, too */
3231 Assert(mapped_relation || !shared_relation);
3234 * switch to the cache context to create the relcache entry.
3236 if (!CacheMemoryContext)
3237 CreateCacheMemoryContext();
3239 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3242 * allocate a new relation descriptor and fill in basic state fields.
3244 rel = (Relation) palloc0(sizeof(RelationData));
3246 /* make sure relation is marked as having no open file yet */
3247 rel->rd_smgr = NULL;
3249 /* mark it nailed if appropriate */
3250 rel->rd_isnailed = nailit;
3252 rel->rd_refcnt = nailit ? 1 : 0;
3254 /* it's being created in this transaction */
3255 rel->rd_createSubid = GetCurrentSubTransactionId();
3256 rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3259 * create a new tuple descriptor from the one passed in. We do this
3260 * partly to copy it into the cache context, and partly because the new
3261 * relation can't have any defaults or constraints yet; they have to be
3262 * added in later steps, because they require additions to multiple system
3263 * catalogs. We can copy attnotnull constraints here, however.
3265 rel->rd_att = CreateTupleDescCopy(tupDesc);
3266 rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3267 has_not_null = false;
3268 for (i = 0; i < natts; i++)
3270 rel->rd_att->attrs[i]->attnotnull = tupDesc->attrs[i]->attnotnull;
3271 has_not_null |= tupDesc->attrs[i]->attnotnull;
3276 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3278 constr->has_not_null = true;
3279 rel->rd_att->constr = constr;
3283 * initialize relation tuple form (caller may add/override data later)
3285 rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
3287 namestrcpy(&rel->rd_rel->relname, relname);
3288 rel->rd_rel->relnamespace = relnamespace;
3290 rel->rd_rel->relkind = relkind;
3291 rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
3292 rel->rd_rel->relnatts = natts;
3293 rel->rd_rel->reltype = InvalidOid;
3294 /* needed when bootstrapping: */
3295 rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3297 /* set up persistence and relcache fields dependent on it */
3298 rel->rd_rel->relpersistence = relpersistence;
3299 switch (relpersistence)
3301 case RELPERSISTENCE_UNLOGGED:
3302 case RELPERSISTENCE_PERMANENT:
3303 rel->rd_backend = InvalidBackendId;
3304 rel->rd_islocaltemp = false;
3306 case RELPERSISTENCE_TEMP:
3307 Assert(isTempOrTempToastNamespace(relnamespace));
3308 rel->rd_backend = BackendIdForTempRelations();
3309 rel->rd_islocaltemp = true;
3312 elog(ERROR, "invalid relpersistence: %c", relpersistence);
3316 /* if it's a materialized view, it's not populated initially */
3317 if (relkind == RELKIND_MATVIEW)
3318 rel->rd_rel->relispopulated = false;
3320 rel->rd_rel->relispopulated = true;
3322 /* system relations and non-table objects don't have one */
3323 if (!IsSystemNamespace(relnamespace) &&
3324 (relkind == RELKIND_RELATION ||
3325 relkind == RELKIND_MATVIEW ||
3326 relkind == RELKIND_PARTITIONED_TABLE))
3327 rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3329 rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3332 * Insert relation physical and logical identifiers (OIDs) into the right
3333 * places. For a mapped relation, we set relfilenode to zero and rely on
3334 * RelationInitPhysicalAddr to consult the map.
3336 rel->rd_rel->relisshared = shared_relation;
3338 RelationGetRelid(rel) = relid;
3340 for (i = 0; i < natts; i++)
3341 rel->rd_att->attrs[i]->attrelid = relid;
3343 rel->rd_rel->reltablespace = reltablespace;
3345 if (mapped_relation)
3347 rel->rd_rel->relfilenode = InvalidOid;
3348 /* Add it to the active mapping information */
3349 RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3352 rel->rd_rel->relfilenode = relfilenode;
3354 RelationInitLockInfo(rel); /* see lmgr.c */
3356 RelationInitPhysicalAddr(rel);
3359 * Okay to insert into the relcache hash table.
3361 * Ordinarily, there should certainly not be an existing hash entry for
3362 * the same OID; but during bootstrap, when we create a "real" relcache
3363 * entry for one of the bootstrap relations, we'll be overwriting the
3364 * phony one created with formrdesc. So allow that to happen for nailed
3367 RelationCacheInsert(rel, nailit);
3370 * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3371 * can't do this before storing relid in it.
3376 * done building relcache entry.
3378 MemoryContextSwitchTo(oldcxt);
3380 /* It's fully valid */
3381 rel->rd_isvalid = true;
3384 * Caller expects us to pin the returned entry.
3386 RelationIncrementReferenceCount(rel);
3393 * RelationSetNewRelfilenode
3395 * Assign a new relfilenode (physical file name) to the relation.
3397 * This allows a full rewrite of the relation to be done with transactional
3398 * safety (since the filenode assignment can be rolled back). Note however
3399 * that there is no simple way to access the relation's old data for the
3400 * remainder of the current transaction. This limits the usefulness to cases
3401 * such as TRUNCATE or rebuilding an index from scratch.
3403 * Caller must already hold exclusive lock on the relation.
3405 * The relation is marked with relfrozenxid = freezeXid (InvalidTransactionId
3406 * must be passed for indexes and sequences). This should be a lower bound on
3407 * the XIDs that will be put into the new relation contents.
3409 * The new filenode's persistence is set to the given value. This is useful
3410 * for the cases that are changing the relation's persistence; other callers
3411 * need to pass the original relpersistence value.
3414 RelationSetNewRelfilenode(Relation relation, char persistence,
3415 TransactionId freezeXid, MultiXactId minmulti)
3418 RelFileNodeBackend newrnode;
3421 Form_pg_class classform;
3423 /* Indexes, sequences must have Invalid frozenxid; other rels must not */
3424 Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
3425 relation->rd_rel->relkind == RELKIND_SEQUENCE) ?
3426 freezeXid == InvalidTransactionId :
3427 TransactionIdIsNormal(freezeXid));
3428 Assert(TransactionIdIsNormal(freezeXid) == MultiXactIdIsValid(minmulti));
3430 /* Allocate a new relfilenode */
3431 newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3435 * Get a writable copy of the pg_class tuple for the given relation.
3437 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
3439 tuple = SearchSysCacheCopy1(RELOID,
3440 ObjectIdGetDatum(RelationGetRelid(relation)));
3441 if (!HeapTupleIsValid(tuple))
3442 elog(ERROR, "could not find tuple for relation %u",
3443 RelationGetRelid(relation));
3444 classform = (Form_pg_class) GETSTRUCT(tuple);
3447 * Create storage for the main fork of the new relfilenode.
3449 * NOTE: any conflict in relfilenode value will be caught here, if
3450 * GetNewRelFileNode messes up for any reason.
3452 newrnode.node = relation->rd_node;
3453 newrnode.node.relNode = newrelfilenode;
3454 newrnode.backend = relation->rd_backend;
3455 RelationCreateStorage(newrnode.node, persistence);
3456 smgrclosenode(newrnode);
3459 * Schedule unlinking of the old storage at transaction commit.
3461 RelationDropStorage(relation);
3464 * Now update the pg_class row. However, if we're dealing with a mapped
3465 * index, pg_class.relfilenode doesn't change; instead we have to send the
3466 * update to the relation mapper.
3468 if (RelationIsMapped(relation))
3469 RelationMapUpdateMap(RelationGetRelid(relation),
3471 relation->rd_rel->relisshared,
3474 classform->relfilenode = newrelfilenode;
3476 /* These changes are safe even for a mapped relation */
3477 if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3479 classform->relpages = 0; /* it's empty until further notice */
3480 classform->reltuples = 0;
3481 classform->relallvisible = 0;
3483 classform->relfrozenxid = freezeXid;
3484 classform->relminmxid = minmulti;
3485 classform->relpersistence = persistence;
3487 CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3489 heap_freetuple(tuple);
3491 heap_close(pg_class, RowExclusiveLock);
3494 * Make the pg_class row change visible, as well as the relation map
3495 * change if any. This will cause the relcache entry to get updated, too.
3497 CommandCounterIncrement();
3500 * Mark the rel as having been given a new relfilenode in the current
3501 * (sub) transaction. This is a hint that can be used to optimize later
3502 * operations on the rel in the same transaction.
3504 relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
3506 /* Flag relation as needing eoxact cleanup (to remove the hint) */
3507 EOXactListAdd(relation);
3512 * RelationCacheInitialize
3514 * This initializes the relation descriptor cache. At the time
3515 * that this is invoked, we can't do database access yet (mainly
3516 * because the transaction subsystem is not up); all we are doing
3517 * is making an empty cache hashtable. This must be done before
3518 * starting the initialization transaction, because otherwise
3519 * AtEOXact_RelationCache would crash if that transaction aborts
3520 * before we can get the relcache set up.
3523 #define INITRELCACHESIZE 400
3526 RelationCacheInitialize(void)
3531 * make sure cache memory context exists
3533 if (!CacheMemoryContext)
3534 CreateCacheMemoryContext();
3537 * create hashtable that indexes the relcache
3539 MemSet(&ctl, 0, sizeof(ctl));
3540 ctl.keysize = sizeof(Oid);
3541 ctl.entrysize = sizeof(RelIdCacheEnt);
3542 RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3543 &ctl, HASH_ELEM | HASH_BLOBS);
3546 * relation mapper needs to be initialized too
3548 RelationMapInitialize();
3552 * RelationCacheInitializePhase2
3554 * This is called to prepare for access to shared catalogs during startup.
3555 * We must at least set up nailed reldescs for pg_database, pg_authid,
3556 * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3557 * for their indexes, too. We attempt to load this information from the
3558 * shared relcache init file. If that's missing or broken, just make
3559 * phony entries for the catalogs themselves.
3560 * RelationCacheInitializePhase3 will clean up as needed.
3563 RelationCacheInitializePhase2(void)
3565 MemoryContext oldcxt;
3568 * relation mapper needs initialized too
3570 RelationMapInitializePhase2();
3573 * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3576 if (IsBootstrapProcessingMode())
3580 * switch to cache memory context
3582 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3585 * Try to load the shared relcache cache file. If unsuccessful, bootstrap
3586 * the cache with pre-made descriptors for the critical shared catalogs.
3588 if (!load_relcache_init_file(true))
3590 formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3591 true, Natts_pg_database, Desc_pg_database);
3592 formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3593 true, Natts_pg_authid, Desc_pg_authid);
3594 formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3595 false, Natts_pg_auth_members, Desc_pg_auth_members);
3596 formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3597 false, Natts_pg_shseclabel, Desc_pg_shseclabel);
3598 formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3599 true, Natts_pg_subscription, Desc_pg_subscription);
3601 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3604 MemoryContextSwitchTo(oldcxt);
3608 * RelationCacheInitializePhase3
3610 * This is called as soon as the catcache and transaction system
3611 * are functional and we have determined MyDatabaseId. At this point
3612 * we can actually read data from the database's system catalogs.
3613 * We first try to read pre-computed relcache entries from the local
3614 * relcache init file. If that's missing or broken, make phony entries
3615 * for the minimum set of nailed-in-cache relations. Then (unless
3616 * bootstrapping) make sure we have entries for the critical system
3617 * indexes. Once we've done all this, we have enough infrastructure to
3618 * open any system catalog or use any catcache. The last step is to
3619 * rewrite the cache files if needed.
3622 RelationCacheInitializePhase3(void)
3624 HASH_SEQ_STATUS status;
3625 RelIdCacheEnt *idhentry;
3626 MemoryContext oldcxt;
3627 bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3630 * relation mapper needs initialized too
3632 RelationMapInitializePhase3();
3635 * switch to cache memory context
3637 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3640 * Try to load the local relcache cache file. If unsuccessful, bootstrap
3641 * the cache with pre-made descriptors for the critical "nailed-in" system
3644 if (IsBootstrapProcessingMode() ||
3645 !load_relcache_init_file(false))
3647 needNewCacheFile = true;
3649 formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3650 true, Natts_pg_class, Desc_pg_class);
3651 formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3652 false, Natts_pg_attribute, Desc_pg_attribute);
3653 formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3654 true, Natts_pg_proc, Desc_pg_proc);
3655 formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3656 true, Natts_pg_type, Desc_pg_type);
3658 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3661 MemoryContextSwitchTo(oldcxt);
3663 /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
3664 if (IsBootstrapProcessingMode())
3668 * If we didn't get the critical system indexes loaded into relcache, do
3669 * so now. These are critical because the catcache and/or opclass cache
3670 * depend on them for fetches done during relcache load. Thus, we have an
3671 * infinite-recursion problem. We can break the recursion by doing
3672 * heapscans instead of indexscans at certain key spots. To avoid hobbling
3673 * performance, we only want to do that until we have the critical indexes
3674 * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3675 * decide whether to do heapscan or indexscan at the key spots, and we set
3676 * it true after we've loaded the critical indexes.
3678 * The critical indexes are marked as "nailed in cache", partly to make it
3679 * easy for load_relcache_init_file to count them, but mainly because we
3680 * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3681 * true. (NOTE: perhaps it would be possible to reload them by
3682 * temporarily setting criticalRelcachesBuilt to false again. For now,
3683 * though, we just nail 'em in.)
3685 * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3686 * in the same way as the others, because the critical catalogs don't
3687 * (currently) have any rules or triggers, and so these indexes can be
3688 * rebuilt without inducing recursion. However they are used during
3689 * relcache load when a rel does have rules or triggers, so we choose to
3690 * nail them for performance reasons.
3692 if (!criticalRelcachesBuilt)
3694 load_critical_index(ClassOidIndexId,
3695 RelationRelationId);
3696 load_critical_index(AttributeRelidNumIndexId,
3697 AttributeRelationId);
3698 load_critical_index(IndexRelidIndexId,
3700 load_critical_index(OpclassOidIndexId,
3701 OperatorClassRelationId);
3702 load_critical_index(AccessMethodProcedureIndexId,
3703 AccessMethodProcedureRelationId);
3704 load_critical_index(RewriteRelRulenameIndexId,
3706 load_critical_index(TriggerRelidNameIndexId,
3709 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3711 criticalRelcachesBuilt = true;
3715 * Process critical shared indexes too.
3717 * DatabaseNameIndexId isn't critical for relcache loading, but rather for
3718 * initial lookup of MyDatabaseId, without which we'll never find any
3719 * non-shared catalogs at all. Autovacuum calls InitPostgres with a
3720 * database OID, so it instead depends on DatabaseOidIndexId. We also
3721 * need to nail up some indexes on pg_authid and pg_auth_members for use
3722 * during client authentication. SharedSecLabelObjectIndexId isn't
3723 * critical for the core system, but authentication hooks might be
3726 if (!criticalSharedRelcachesBuilt)
3728 load_critical_index(DatabaseNameIndexId,
3729 DatabaseRelationId);
3730 load_critical_index(DatabaseOidIndexId,
3731 DatabaseRelationId);
3732 load_critical_index(AuthIdRolnameIndexId,
3734 load_critical_index(AuthIdOidIndexId,
3736 load_critical_index(AuthMemMemRoleIndexId,
3738 load_critical_index(SharedSecLabelObjectIndexId,
3739 SharedSecLabelRelationId);
3741 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3743 criticalSharedRelcachesBuilt = true;
3747 * Now, scan all the relcache entries and update anything that might be
3748 * wrong in the results from formrdesc or the relcache cache file. If we
3749 * faked up relcache entries using formrdesc, then read the real pg_class
3750 * rows and replace the fake entries with them. Also, if any of the
3751 * relcache entries have rules, triggers, or security policies, load that
3752 * info the hard way since it isn't recorded in the cache file.
3754 * Whenever we access the catalogs to read data, there is a possibility of
3755 * a shared-inval cache flush causing relcache entries to be removed.
3756 * Since hash_seq_search only guarantees to still work after the *current*
3757 * entry is removed, it's unsafe to continue the hashtable scan afterward.
3758 * We handle this by restarting the scan from scratch after each access.
3759 * This is theoretically O(N^2), but the number of entries that actually
3760 * need to be fixed is small enough that it doesn't matter.
3762 hash_seq_init(&status, RelationIdCache);
3764 while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3766 Relation relation = idhentry->reldesc;
3767 bool restart = false;
3770 * Make sure *this* entry doesn't get flushed while we work with it.
3772 RelationIncrementReferenceCount(relation);
3775 * If it's a faked-up entry, read the real pg_class tuple.
3777 if (relation->rd_rel->relowner == InvalidOid)
3782 htup = SearchSysCache1(RELOID,
3783 ObjectIdGetDatum(RelationGetRelid(relation)));
3784 if (!HeapTupleIsValid(htup))
3785 elog(FATAL, "cache lookup failed for relation %u",
3786 RelationGetRelid(relation));
3787 relp = (Form_pg_class) GETSTRUCT(htup);
3790 * Copy tuple to relation->rd_rel. (See notes in
3791 * AllocateRelationDesc())
3793 memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
3795 /* Update rd_options while we have the tuple */
3796 if (relation->rd_options)
3797 pfree(relation->rd_options);
3798 RelationParseRelOptions(relation, htup);
3801 * Check the values in rd_att were set up correctly. (We cannot
3802 * just copy them over now: formrdesc must have set up the rd_att
3803 * data correctly to start with, because it may already have been
3804 * copied into one or more catcache entries.)
3806 Assert(relation->rd_att->tdtypeid == relp->reltype);
3807 Assert(relation->rd_att->tdtypmod == -1);
3808 Assert(relation->rd_att->tdhasoid == relp->relhasoids);
3810 ReleaseSysCache(htup);
3812 /* relowner had better be OK now, else we'll loop forever */
3813 if (relation->rd_rel->relowner == InvalidOid)
3814 elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
3815 RelationGetRelationName(relation));
3821 * Fix data that isn't saved in relcache cache file.
3823 * relhasrules or relhastriggers could possibly be wrong or out of
3824 * date. If we don't actually find any rules or triggers, clear the
3825 * local copy of the flag so that we don't get into an infinite loop
3826 * here. We don't make any attempt to fix the pg_class entry, though.
3828 if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3830 RelationBuildRuleLock(relation);
3831 if (relation->rd_rules == NULL)
3832 relation->rd_rel->relhasrules = false;
3835 if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3837 RelationBuildTriggers(relation);
3838 if (relation->trigdesc == NULL)
3839 relation->rd_rel->relhastriggers = false;
3844 * Re-load the row security policies if the relation has them, since
3845 * they are not preserved in the cache. Note that we can never NOT
3846 * have a policy while relrowsecurity is true,
3847 * RelationBuildRowSecurity will create a single default-deny policy
3848 * if there is no policy defined in pg_policy.
3850 if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
3852 RelationBuildRowSecurity(relation);
3854 Assert(relation->rd_rsdesc != NULL);
3859 * Reload partition key and descriptor for a partitioned table.
3861 if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3863 RelationBuildPartitionKey(relation);
3864 Assert(relation->rd_partkey != NULL);
3866 RelationBuildPartitionDesc(relation);
3867 Assert(relation->rd_partdesc != NULL);
3872 /* Release hold on the relation */
3873 RelationDecrementReferenceCount(relation);
3875 /* Now, restart the hashtable scan if needed */
3878 hash_seq_term(&status);
3879 hash_seq_init(&status, RelationIdCache);
3884 * Lastly, write out new relcache cache files if needed. We don't bother
3885 * to distinguish cases where only one of the two needs an update.
3887 if (needNewCacheFile)
3890 * Force all the catcaches to finish initializing and thereby open the
3891 * catalogs and indexes they use. This will preload the relcache with
3892 * entries for all the most important system catalogs and indexes, so
3893 * that the init files will be most useful for future backends.
3895 InitCatalogCachePhase2();
3897 /* now write the files */
3898 write_relcache_init_file(true);
3899 write_relcache_init_file(false);
3904 * Load one critical system index into the relcache
3906 * indexoid is the OID of the target index, heapoid is the OID of the catalog
3910 load_critical_index(Oid indexoid, Oid heapoid)
3915 * We must lock the underlying catalog before locking the index to avoid
3916 * deadlock, since RelationBuildDesc might well need to read the catalog,
3917 * and if anyone else is exclusive-locking this catalog and index they'll
3918 * be doing it in that order.
3920 LockRelationOid(heapoid, AccessShareLock);
3921 LockRelationOid(indexoid, AccessShareLock);
3922 ird = RelationBuildDesc(indexoid, true);
3924 elog(PANIC, "could not open critical system index %u", indexoid);
3925 ird->rd_isnailed = true;
3927 UnlockRelationOid(indexoid, AccessShareLock);
3928 UnlockRelationOid(heapoid, AccessShareLock);
3932 * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
3933 * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
3935 * We need this kluge because we have to be able to access non-fixed-width
3936 * fields of pg_class and pg_index before we have the standard catalog caches
3937 * available. We use predefined data that's set up in just the same way as
3938 * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
3939 * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
3940 * does it have a TupleConstr field. But it's good enough for the purpose of
3941 * extracting fields.
3944 BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs,
3948 MemoryContext oldcxt;
3951 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3953 result = CreateTemplateTupleDesc(natts, hasoids);
3954 result->tdtypeid = RECORDOID; /* not right, but we don't care */
3955 result->tdtypmod = -1;
3957 for (i = 0; i < natts; i++)
3959 memcpy(result->attrs[i], &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
3960 /* make sure attcacheoff is valid */
3961 result->attrs[i]->attcacheoff = -1;
3964 /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
3965 result->attrs[0]->attcacheoff = 0;
3967 /* Note: we don't bother to set up a TupleConstr entry */
3969 MemoryContextSwitchTo(oldcxt);
3975 GetPgClassDescriptor(void)
3977 static TupleDesc pgclassdesc = NULL;
3980 if (pgclassdesc == NULL)
3981 pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
3989 GetPgIndexDescriptor(void)
3991 static TupleDesc pgindexdesc = NULL;
3994 if (pgindexdesc == NULL)
3995 pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
4003 * Load any default attribute value definitions for the relation.
4006 AttrDefaultFetch(Relation relation)
4008 AttrDefault *attrdef = relation->rd_att->constr->defval;
4009 int ndef = relation->rd_att->constr->num_defval;
4020 Anum_pg_attrdef_adrelid,
4021 BTEqualStrategyNumber, F_OIDEQ,
4022 ObjectIdGetDatum(RelationGetRelid(relation)));
4024 adrel = heap_open(AttrDefaultRelationId, AccessShareLock);
4025 adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4029 while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4031 Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4033 for (i = 0; i < ndef; i++)
4035 if (adform->adnum != attrdef[i].adnum)
4037 if (attrdef[i].adbin != NULL)
4038 elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
4039 NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
4040 RelationGetRelationName(relation));
4044 val = fastgetattr(htup,
4045 Anum_pg_attrdef_adbin,
4046 adrel->rd_att, &isnull);
4048 elog(WARNING, "null adbin for attr %s of rel %s",
4049 NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
4050 RelationGetRelationName(relation));
4053 /* detoast and convert to cstring in caller's context */
4054 char *s = TextDatumGetCString(val);
4056 attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext, s);
4063 elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
4064 adform->adnum, RelationGetRelationName(relation));
4067 systable_endscan(adscan);
4068 heap_close(adrel, AccessShareLock);
4071 elog(WARNING, "%d attrdef record(s) missing for rel %s",
4072 ndef - found, RelationGetRelationName(relation));
4076 * Load any check constraints for the relation.
4079 CheckConstraintFetch(Relation relation)
4081 ConstrCheck *check = relation->rd_att->constr->check;
4082 int ncheck = relation->rd_att->constr->num_check;
4084 SysScanDesc conscan;
4085 ScanKeyData skey[1];
4089 ScanKeyInit(&skey[0],
4090 Anum_pg_constraint_conrelid,
4091 BTEqualStrategyNumber, F_OIDEQ,
4092 ObjectIdGetDatum(RelationGetRelid(relation)));
4094 conrel = heap_open(ConstraintRelationId, AccessShareLock);
4095 conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4098 while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4100 Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
4105 /* We want check constraints only */
4106 if (conform->contype != CONSTRAINT_CHECK)
4109 if (found >= ncheck)
4110 elog(ERROR, "unexpected constraint record found for rel %s",
4111 RelationGetRelationName(relation));
4113 check[found].ccvalid = conform->convalidated;
4114 check[found].ccnoinherit = conform->connoinherit;
4115 check[found].ccname = MemoryContextStrdup(CacheMemoryContext,
4116 NameStr(conform->conname));
4118 /* Grab and test conbin is actually set */
4119 val = fastgetattr(htup,
4120 Anum_pg_constraint_conbin,
4121 conrel->rd_att, &isnull);
4123 elog(ERROR, "null conbin for rel %s",
4124 RelationGetRelationName(relation));
4126 /* detoast and convert to cstring in caller's context */
4127 s = TextDatumGetCString(val);
4128 check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4134 systable_endscan(conscan);
4135 heap_close(conrel, AccessShareLock);
4137 if (found != ncheck)
4138 elog(ERROR, "%d constraint record(s) missing for rel %s",
4139 ncheck - found, RelationGetRelationName(relation));
4141 /* Sort the records so that CHECKs are applied in a deterministic order */
4143 qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp);
4147 * qsort comparator to sort ConstrCheck entries by name
4150 CheckConstraintCmp(const void *a, const void *b)
4152 const ConstrCheck *ca = (const ConstrCheck *) a;
4153 const ConstrCheck *cb = (const ConstrCheck *) b;
4155 return strcmp(ca->ccname, cb->ccname);
4159 * RelationGetFKeyList -- get a list of foreign key info for the relation
4161 * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4162 * the given relation. This data is a direct copy of relevant fields from
4163 * pg_constraint. The list items are in no particular order.
4165 * CAUTION: the returned list is part of the relcache's data, and could
4166 * vanish in a relcache entry reset. Callers must inspect or copy it
4167 * before doing anything that might trigger a cache flush, such as
4168 * system catalog accesses. copyObject() can be used if desired.
4169 * (We define it this way because current callers want to filter and
4170 * modify the list entries anyway, so copying would be a waste of time.)
4173 RelationGetFKeyList(Relation relation)
4177 SysScanDesc conscan;
4181 MemoryContext oldcxt;
4183 /* Quick exit if we already computed the list. */
4184 if (relation->rd_fkeyvalid)
4185 return relation->rd_fkeylist;
4187 /* Fast path: if it doesn't have any triggers, it can't have FKs */
4188 if (!relation->rd_rel->relhastriggers)
4192 * We build the list we intend to return (in the caller's context) while
4193 * doing the scan. After successfully completing the scan, we copy that
4194 * list into the relcache entry. This avoids cache-context memory leakage
4195 * if we get some sort of error partway through.
4199 /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4201 Anum_pg_constraint_conrelid,
4202 BTEqualStrategyNumber, F_OIDEQ,
4203 ObjectIdGetDatum(RelationGetRelid(relation)));
4205 conrel = heap_open(ConstraintRelationId, AccessShareLock);
4206 conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4209 while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4211 Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4212 ForeignKeyCacheInfo *info;
4218 /* consider only foreign keys */
4219 if (constraint->contype != CONSTRAINT_FOREIGN)
4222 info = makeNode(ForeignKeyCacheInfo);
4223 info->conrelid = constraint->conrelid;
4224 info->confrelid = constraint->confrelid;
4226 /* Extract data from conkey field */
4227 adatum = fastgetattr(htup, Anum_pg_constraint_conkey,
4228 conrel->rd_att, &isnull);
4230 elog(ERROR, "null conkey for rel %s",
4231 RelationGetRelationName(relation));
4233 arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4234 nelem = ARR_DIMS(arr)[0];
4235 if (ARR_NDIM(arr) != 1 ||
4237 nelem > INDEX_MAX_KEYS ||
4239 ARR_ELEMTYPE(arr) != INT2OID)
4240 elog(ERROR, "conkey is not a 1-D smallint array");
4242 info->nkeys = nelem;
4243 memcpy(info->conkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
4245 /* Likewise for confkey */
4246 adatum = fastgetattr(htup, Anum_pg_constraint_confkey,
4247 conrel->rd_att, &isnull);
4249 elog(ERROR, "null confkey for rel %s",
4250 RelationGetRelationName(relation));
4252 arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4253 nelem = ARR_DIMS(arr)[0];
4254 if (ARR_NDIM(arr) != 1 ||
4255 nelem != info->nkeys ||
4257 ARR_ELEMTYPE(arr) != INT2OID)
4258 elog(ERROR, "confkey is not a 1-D smallint array");
4260 memcpy(info->confkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
4262 /* Likewise for conpfeqop */
4263 adatum = fastgetattr(htup, Anum_pg_constraint_conpfeqop,
4264 conrel->rd_att, &isnull);
4266 elog(ERROR, "null conpfeqop for rel %s",
4267 RelationGetRelationName(relation));
4269 arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4270 nelem = ARR_DIMS(arr)[0];
4271 if (ARR_NDIM(arr) != 1 ||
4272 nelem != info->nkeys ||
4274 ARR_ELEMTYPE(arr) != OIDOID)
4275 elog(ERROR, "conpfeqop is not a 1-D OID array");
4277 memcpy(info->conpfeqop, ARR_DATA_PTR(arr), nelem * sizeof(Oid));
4279 /* Add FK's node to the result list */
4280 result = lappend(result, info);
4283 systable_endscan(conscan);
4284 heap_close(conrel, AccessShareLock);
4286 /* Now save a copy of the completed list in the relcache entry. */
4287 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4288 oldlist = relation->rd_fkeylist;
4289 relation->rd_fkeylist = copyObject(result);
4290 relation->rd_fkeyvalid = true;
4291 MemoryContextSwitchTo(oldcxt);
4293 /* Don't leak the old list, if there is one */
4294 list_free_deep(oldlist);
4300 * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4302 * The index list is created only if someone requests it. We scan pg_index
4303 * to find relevant indexes, and add the list to the relcache entry so that
4304 * we won't have to compute it again. Note that shared cache inval of a
4305 * relcache entry will delete the old list and set rd_indexvalid to 0,
4306 * so that we must recompute the index list on next request. This handles
4307 * creation or deletion of an index.
4309 * Indexes that are marked not IndexIsLive are omitted from the returned list.
4310 * Such indexes are expected to be dropped momentarily, and should not be
4311 * touched at all by any caller of this function.
4313 * The returned list is guaranteed to be sorted in order by OID. This is
4314 * needed by the executor, since for index types that we obtain exclusive
4315 * locks on when updating the index, all backends must lock the indexes in
4316 * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4317 * consistent ordering would do, but ordering by OID is easy.
4319 * Since shared cache inval causes the relcache's copy of the list to go away,
4320 * we return a copy of the list palloc'd in the caller's context. The caller
4321 * may list_free() the returned list after scanning it. This is necessary
4322 * since the caller will typically be doing syscache lookups on the relevant
4323 * indexes, and syscache lookup could cause SI messages to be processed!
4325 * We also update rd_oidindex, which this module treats as effectively part
4326 * of the index list. rd_oidindex is valid when rd_indexvalid isn't zero;
4327 * it is the pg_class OID of a unique index on OID when the relation has one,
4328 * and InvalidOid if there is no such index.
4330 * In exactly the same way, we update rd_replidindex, which is the pg_class
4331 * OID of an index to be used as the relation's replication identity index,
4332 * or InvalidOid if there is no such index.
4335 RelationGetIndexList(Relation relation)
4338 SysScanDesc indscan;
4343 char replident = relation->rd_rel->relreplident;
4344 Oid oidIndex = InvalidOid;
4345 Oid pkeyIndex = InvalidOid;
4346 Oid candidateIndex = InvalidOid;
4347 MemoryContext oldcxt;
4349 /* Quick exit if we already computed the list. */
4350 if (relation->rd_indexvalid != 0)
4351 return list_copy(relation->rd_indexlist);
4354 * We build the list we intend to return (in the caller's context) while
4355 * doing the scan. After successfully completing the scan, we copy that
4356 * list into the relcache entry. This avoids cache-context memory leakage
4357 * if we get some sort of error partway through.
4360 oidIndex = InvalidOid;
4362 /* Prepare to scan pg_index for entries having indrelid = this rel. */
4364 Anum_pg_index_indrelid,
4365 BTEqualStrategyNumber, F_OIDEQ,
4366 ObjectIdGetDatum(RelationGetRelid(relation)));
4368 indrel = heap_open(IndexRelationId, AccessShareLock);
4369 indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4372 while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4374 Form_pg_index index = (Form_pg_index) GETSTRUCT(htup);
4375 Datum indclassDatum;
4376 oidvector *indclass;
4380 * Ignore any indexes that are currently being dropped. This will
4381 * prevent them from being searched, inserted into, or considered in
4382 * HOT-safety decisions. It's unsafe to touch such an index at all
4383 * since its catalog entries could disappear at any instant.
4385 if (!IndexIsLive(index))
4388 /* Add index's OID to result list in the proper order */
4389 result = insert_ordered_oid(result, index->indexrelid);
4392 * indclass cannot be referenced directly through the C struct,
4393 * because it comes after the variable-width indkey field. Must
4394 * extract the datum the hard way...
4396 indclassDatum = heap_getattr(htup,
4397 Anum_pg_index_indclass,
4398 GetPgIndexDescriptor(),
4401 indclass = (oidvector *) DatumGetPointer(indclassDatum);
4404 * Invalid, non-unique, non-immediate or predicate indexes aren't
4405 * interesting for either oid indexes or replication identity indexes,
4406 * so don't check them.
4408 if (!IndexIsValid(index) || !index->indisunique ||
4409 !index->indimmediate ||
4410 !heap_attisnull(htup, Anum_pg_index_indpred))
4413 /* Check to see if is a usable btree index on OID */
4414 if (index->indnatts == 1 &&
4415 index->indkey.values[0] == ObjectIdAttributeNumber &&
4416 indclass->values[0] == OID_BTREE_OPS_OID)
4417 oidIndex = index->indexrelid;
4419 /* remember primary key index if any */
4420 if (index->indisprimary)
4421 pkeyIndex = index->indexrelid;
4423 /* remember explicitly chosen replica index */
4424 if (index->indisreplident)
4425 candidateIndex = index->indexrelid;
4428 systable_endscan(indscan);
4430 heap_close(indrel, AccessShareLock);
4432 /* Now save a copy of the completed list in the relcache entry. */
4433 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4434 oldlist = relation->rd_indexlist;
4435 relation->rd_indexlist = list_copy(result);
4436 relation->rd_oidindex = oidIndex;
4437 relation->rd_pkindex = pkeyIndex;
4438 if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4439 relation->rd_replidindex = pkeyIndex;
4440 else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4441 relation->rd_replidindex = candidateIndex;
4443 relation->rd_replidindex = InvalidOid;
4444 relation->rd_indexvalid = 1;
4445 MemoryContextSwitchTo(oldcxt);
4447 /* Don't leak the old list, if there is one */
4454 * insert_ordered_oid
4455 * Insert a new Oid into a sorted list of Oids, preserving ordering
4457 * Building the ordered list this way is O(N^2), but with a pretty small
4458 * constant, so for the number of entries we expect it will probably be
4459 * faster than trying to apply qsort(). Most tables don't have very many
4463 insert_ordered_oid(List *list, Oid datum)
4467 /* Does the datum belong at the front? */
4468 if (list == NIL || datum < linitial_oid(list))
4469 return lcons_oid(datum, list);
4470 /* No, so find the entry it belongs after */
4471 prev = list_head(list);
4474 ListCell *curr = lnext(prev);
4476 if (curr == NULL || datum < lfirst_oid(curr))
4477 break; /* it belongs after 'prev', before 'curr' */
4481 /* Insert datum into list after 'prev' */
4482 lappend_cell_oid(list, prev, datum);
4487 * RelationSetIndexList -- externally force the index list contents
4489 * This is used to temporarily override what we think the set of valid
4490 * indexes is (including the presence or absence of an OID index).
4491 * The forcing will be valid only until transaction commit or abort.
4493 * This should only be applied to nailed relations, because in a non-nailed
4494 * relation the hacked index list could be lost at any time due to SI
4495 * messages. In practice it is only used on pg_class (see REINDEX).
4497 * It is up to the caller to make sure the given list is correctly ordered.
4499 * We deliberately do not change rd_indexattr here: even when operating
4500 * with a temporary partial index list, HOT-update decisions must be made
4501 * correctly with respect to the full index set. It is up to the caller
4502 * to ensure that a correct rd_indexattr set has been cached before first
4503 * calling RelationSetIndexList; else a subsequent inquiry might cause a
4504 * wrong rd_indexattr set to get computed and cached. Likewise, we do not
4505 * touch rd_keyattr, rd_pkattr or rd_idattr.
4508 RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
4510 MemoryContext oldcxt;
4512 Assert(relation->rd_isnailed);
4513 /* Copy the list into the cache context (could fail for lack of mem) */
4514 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4515 indexIds = list_copy(indexIds);
4516 MemoryContextSwitchTo(oldcxt);
4517 /* Okay to replace old list */
4518 list_free(relation->rd_indexlist);
4519 relation->rd_indexlist = indexIds;
4520 relation->rd_oidindex = oidIndex;
4522 * For the moment, assume the target rel hasn't got a pk or replica
4523 * index. We'll load them on demand in the API that wraps access to them.
4525 relation->rd_pkindex = InvalidOid;
4526 relation->rd_replidindex = InvalidOid;
4527 relation->rd_indexvalid = 2; /* mark list as forced */
4528 /* Flag relation as needing eoxact cleanup (to reset the list) */
4529 EOXactListAdd(relation);
4533 * RelationGetOidIndex -- get the pg_class OID of the relation's OID index
4535 * Returns InvalidOid if there is no such index.
4538 RelationGetOidIndex(Relation relation)
4543 * If relation doesn't have OIDs at all, caller is probably confused. (We
4544 * could just silently return InvalidOid, but it seems better to throw an
4547 Assert(relation->rd_rel->relhasoids);
4549 if (relation->rd_indexvalid == 0)
4551 /* RelationGetIndexList does the heavy lifting. */
4552 ilist = RelationGetIndexList(relation);
4554 Assert(relation->rd_indexvalid != 0);
4557 return relation->rd_oidindex;
4561 * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4563 * Returns InvalidOid if there is no such index.
4566 RelationGetPrimaryKeyIndex(Relation relation)
4570 if (relation->rd_indexvalid == 0)
4572 /* RelationGetIndexList does the heavy lifting. */
4573 ilist = RelationGetIndexList(relation);
4575 Assert(relation->rd_indexvalid != 0);
4578 return relation->rd_pkindex;
4582 * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4584 * Returns InvalidOid if there is no such index.
4587 RelationGetReplicaIndex(Relation relation)
4591 if (relation->rd_indexvalid == 0)
4593 /* RelationGetIndexList does the heavy lifting. */
4594 ilist = RelationGetIndexList(relation);
4596 Assert(relation->rd_indexvalid != 0);
4599 return relation->rd_replidindex;
4603 * RelationGetIndexExpressions -- get the index expressions for an index
4605 * We cache the result of transforming pg_index.indexprs into a node tree.
4606 * If the rel is not an index or has no expressional columns, we return NIL.
4607 * Otherwise, the returned tree is copied into the caller's memory context.
4608 * (We don't want to return a pointer to the relcache copy, since it could
4609 * disappear due to relcache invalidation.)
4612 RelationGetIndexExpressions(Relation relation)
4618 MemoryContext oldcxt;
4620 /* Quick exit if we already computed the result. */
4621 if (relation->rd_indexprs)
4622 return (List *) copyObject(relation->rd_indexprs);
4624 /* Quick exit if there is nothing to do. */
4625 if (relation->rd_indextuple == NULL ||
4626 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs))
4630 * We build the tree we intend to return in the caller's context. After
4631 * successfully completing the work, we copy it into the relcache entry.
4632 * This avoids problems if we get some sort of error partway through.
4634 exprsDatum = heap_getattr(relation->rd_indextuple,
4635 Anum_pg_index_indexprs,
4636 GetPgIndexDescriptor(),
4639 exprsString = TextDatumGetCString(exprsDatum);
4640 result = (List *) stringToNode(exprsString);
4644 * Run the expressions through eval_const_expressions. This is not just an
4645 * optimization, but is necessary, because the planner will be comparing
4646 * them to similarly-processed qual clauses, and may fail to detect valid
4647 * matches without this. We don't bother with canonicalize_qual, however.
4649 result = (List *) eval_const_expressions(NULL, (Node *) result);
4651 result = (List *) canonicalize_qual((Expr *) result);
4653 /* May as well fix opfuncids too */
4654 fix_opfuncids((Node *) result);
4656 /* Now save a copy of the completed tree in the relcache entry. */
4657 oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4658 relation->rd_indexprs = (List *) copyObject(result);
4659 MemoryContextSwitchTo(oldcxt);
4665 * RelationGetIndexPredicate -- get the index predicate for an index
4667 * We cache the result of transforming pg_index.indpred into an implicit-AND
4668 * node tree (suitable for ExecQual).
4669 * If the rel is not an index or has no predicate, we return NIL.
4670 * Otherwise, the returned tree is copied into the caller's memory context.
4671 * (We don't want to return a pointer to the relcache copy, since it could
4672 * disappear due to relcache invalidation.)
4675 RelationGetIndexPredicate(Relation relation)
4681 MemoryContext oldcxt;
4683 /* Quick exit if we already computed the result. */
4684 if (relation->rd_indpred)
4685 return (List *) copyObject(relation->rd_indpred);
4687 /* Quick exit if there is nothing to do. */
4688 if (relation->rd_indextuple == NULL ||
4689 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred))
4693 * We build the tree we intend to return in the caller's context. After
4694 * successfully completing the work, we copy it into the relcache entry.
4695 * This avoids problems if we get some sort of error partway through.
4697 predDatum = heap_getattr(relation->rd_indextuple,
4698 Anum_pg_index_indpred,
4699 GetPgIndexDescriptor(),
4702 predString = TextDatumGetCString(predDatum);
4703 result = (List *) stringToNode(predString);
4707 * Run the expression through const-simplification and canonicalization.
4708 * This is not just an optimization, but is necessary, because the planner
4709 * will be comparing it to similarly-processed qual clauses, and may fail
4710 * to detect valid matches without this. This must match the processing
4711 * done to qual clauses in preprocess_expression()! (We can skip the
4712 * stuff involving subqueries, however, since we don't allow any in index
4715 result = (List *) eval_const_expressions(NULL, (Node *) result);
4717 result = (List *) canonicalize_qual((Expr *) result);
4719 /* Also convert to implicit-AND format */
4720 result = make_ands_implicit((Expr *) result);
4722 /* May as well fix opfuncids too */
4723 fix_opfuncids((Node *) result);
4725 /* Now save a copy of the completed tree in the relcache entry. */
4726 oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4727 relation->rd_indpred = (List *) copyObject(result);
4728 MemoryContextSwitchTo(oldcxt);
4734 * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4736 * The result has a bit set for each attribute used anywhere in the index
4737 * definitions of all the indexes on this relation. (This includes not only
4738 * simple index keys, but attributes used in expressions and partial-index
4741 * Depending on attrKind, a bitmap covering the attnums for all index columns,
4742 * for all potential foreign key columns, or for all columns in the configured
4743 * replica identity index is returned.
4745 * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4746 * we can include system attributes (e.g., OID) in the bitmap representation.
4748 * Caller had better hold at least RowExclusiveLock on the target relation
4749 * to ensure that it has a stable set of indexes. This also makes it safe
4750 * (deadlock-free) for us to take locks on the relation's indexes.
4752 * The returned result is palloc'd in the caller's memory context and should
4753 * be bms_free'd when not needed anymore.
4756 RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
4758 Bitmapset *indexattrs; /* indexed columns */
4759 Bitmapset *uindexattrs; /* columns in unique indexes */
4760 Bitmapset *pkindexattrs; /* columns in the primary index */
4761 Bitmapset *idindexattrs; /* columns in the replica identity */
4766 MemoryContext oldcxt;
4768 /* Quick exit if we already computed the result. */
4769 if (relation->rd_indexattr != NULL)
4773 case INDEX_ATTR_BITMAP_ALL:
4774 return bms_copy(relation->rd_indexattr);
4775 case INDEX_ATTR_BITMAP_KEY:
4776 return bms_copy(relation->rd_keyattr);
4777 case INDEX_ATTR_BITMAP_PRIMARY_KEY:
4778 return bms_copy(relation->rd_pkattr);
4779 case INDEX_ATTR_BITMAP_IDENTITY_KEY:
4780 return bms_copy(relation->rd_idattr);
4782 elog(ERROR, "unknown attrKind %u", attrKind);
4786 /* Fast path if definitely no indexes */
4787 if (!RelationGetForm(relation)->relhasindex)
4791 * Get cached list of index OIDs
4793 indexoidlist = RelationGetIndexList(relation);
4795 /* Fall out if no indexes (but relhasindex was set) */
4796 if (indexoidlist == NIL)
4800 * Copy the rd_pkindex and rd_replidindex value computed by
4801 * RelationGetIndexList before proceeding. This is needed because a
4802 * relcache flush could occur inside index_open below, resetting the
4803 * fields managed by RelationGetIndexList. (The values we're computing
4804 * will still be valid, assuming that caller has a sufficient lock on
4807 relpkindex = relation->rd_pkindex;
4808 relreplindex = relation->rd_replidindex;
4811 * For each index, add referenced attributes to indexattrs.
4813 * Note: we consider all indexes returned by RelationGetIndexList, even if
4814 * they are not indisready or indisvalid. This is important because an
4815 * index for which CREATE INDEX CONCURRENTLY has just started must be
4816 * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
4817 * CONCURRENTLY is far enough along that we should ignore the index, it
4818 * won't be returned at all by RelationGetIndexList.
4822 pkindexattrs = NULL;
4823 idindexattrs = NULL;
4824 foreach(l, indexoidlist)
4826 Oid indexOid = lfirst_oid(l);
4828 IndexInfo *indexInfo;
4830 bool isKey; /* candidate key */
4831 bool isPK; /* primary key */
4832 bool isIDKey; /* replica identity index */
4834 indexDesc = index_open(indexOid, AccessShareLock);
4836 /* Extract index key information from the index's pg_index row */
4837 indexInfo = BuildIndexInfo(indexDesc);
4839 /* Can this index be referenced by a foreign key? */
4840 isKey = indexInfo->ii_Unique &&
4841 indexInfo->ii_Expressions == NIL &&
4842 indexInfo->ii_Predicate == NIL;
4844 /* Is this a primary key? */
4845 isPK = (indexOid == relpkindex);
4847 /* Is this index the configured (or default) replica identity? */
4848 isIDKey = (indexOid == relreplindex);
4850 /* Collect simple attribute references */
4851 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
4853 int attrnum = indexInfo->ii_KeyAttrNumbers[i];
4857 indexattrs = bms_add_member(indexattrs,
4858 attrnum - FirstLowInvalidHeapAttributeNumber);
4861 uindexattrs = bms_add_member(uindexattrs,
4862 attrnum - FirstLowInvalidHeapAttributeNumber);
4865 pkindexattrs = bms_add_member(pkindexattrs,
4866 attrnum - FirstLowInvalidHeapAttributeNumber);
4869 idindexattrs = bms_add_member(idindexattrs,
4870 attrnum - FirstLowInvalidHeapAttributeNumber);
4874 /* Collect all attributes used in expressions, too */
4875 pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
4877 /* Collect all attributes in the index predicate, too */
4878 pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
4880 index_close(indexDesc, AccessShareLock);
4883 list_free(indexoidlist);
4885 /* Don't leak the old values of these bitmaps, if any */
4886 bms_free(relation->rd_indexattr);
4887 relation->rd_indexattr = NULL;
4888 bms_free(relation->rd_keyattr);
4889 relation->rd_keyattr = NULL;
4890 bms_free(relation->rd_pkattr);
4891 relation->rd_pkattr = NULL;
4892 bms_free(relation->rd_idattr);
4893 relation->rd_idattr = NULL;
4896 * Now save copies of the bitmaps in the relcache entry. We intentionally
4897 * set rd_indexattr last, because that's the one that signals validity of
4898 * the values; if we run out of memory before making that copy, we won't
4899 * leave the relcache entry looking like the other ones are valid but
4902 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4903 relation->rd_keyattr = bms_copy(uindexattrs);
4904 relation->rd_pkattr = bms_copy(pkindexattrs);
4905 relation->rd_idattr = bms_copy(idindexattrs);
4906 relation->rd_indexattr = bms_copy(indexattrs);
4907 MemoryContextSwitchTo(oldcxt);
4909 /* We return our original working copy for caller to play with */
4912 case INDEX_ATTR_BITMAP_ALL:
4914 case INDEX_ATTR_BITMAP_KEY:
4916 case INDEX_ATTR_BITMAP_PRIMARY_KEY:
4917 return bms_copy(relation->rd_pkattr);
4918 case INDEX_ATTR_BITMAP_IDENTITY_KEY:
4919 return idindexattrs;
4921 elog(ERROR, "unknown attrKind %u", attrKind);
4927 * RelationGetExclusionInfo -- get info about index's exclusion constraint
4929 * This should be called only for an index that is known to have an
4930 * associated exclusion constraint. It returns arrays (palloc'd in caller's
4931 * context) of the exclusion operator OIDs, their underlying functions'
4932 * OIDs, and their strategy numbers in the index's opclasses. We cache
4933 * all this information since it requires a fair amount of work to get.
4936 RelationGetExclusionInfo(Relation indexRelation,
4939 uint16 **strategies)
4941 int ncols = indexRelation->rd_rel->relnatts;
4946 SysScanDesc conscan;
4947 ScanKeyData skey[1];
4950 MemoryContext oldcxt;
4953 /* Allocate result space in caller context */
4954 *operators = ops = (Oid *) palloc(sizeof(Oid) * ncols);
4955 *procs = funcs = (Oid *) palloc(sizeof(Oid) * ncols);
4956 *strategies = strats = (uint16 *) palloc(sizeof(uint16) * ncols);
4958 /* Quick exit if we have the data cached already */
4959 if (indexRelation->rd_exclstrats != NULL)
4961 memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * ncols);
4962 memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * ncols);
4963 memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * ncols);
4968 * Search pg_constraint for the constraint associated with the index. To
4969 * make this not too painfully slow, we use the index on conrelid; that
4970 * will hold the parent relation's OID not the index's own OID.
4972 ScanKeyInit(&skey[0],
4973 Anum_pg_constraint_conrelid,
4974 BTEqualStrategyNumber, F_OIDEQ,
4975 ObjectIdGetDatum(indexRelation->rd_index->indrelid));
4977 conrel = heap_open(ConstraintRelationId, AccessShareLock);
4978 conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4982 while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4984 Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
4990 /* We want the exclusion constraint owning the index */
4991 if (conform->contype != CONSTRAINT_EXCLUSION ||
4992 conform->conindid != RelationGetRelid(indexRelation))
4995 /* There should be only one */
4997 elog(ERROR, "unexpected exclusion constraint record found for rel %s",
4998 RelationGetRelationName(indexRelation));
5001 /* Extract the operator OIDS from conexclop */
5002 val = fastgetattr(htup,
5003 Anum_pg_constraint_conexclop,
5004 conrel->rd_att, &isnull);
5006 elog(ERROR, "null conexclop for rel %s",
5007 RelationGetRelationName(indexRelation));
5009 arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5010 nelem = ARR_DIMS(arr)[0];
5011 if (ARR_NDIM(arr) != 1 ||
5014 ARR_ELEMTYPE(arr) != OIDOID)
5015 elog(ERROR, "conexclop is not a 1-D Oid array");
5017 memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * ncols);
5020 systable_endscan(conscan);
5021 heap_close(conrel, AccessShareLock);
5024 elog(ERROR, "exclusion constraint record missing for rel %s",
5025 RelationGetRelationName(indexRelation));
5027 /* We need the func OIDs and strategy numbers too */
5028 for (i = 0; i < ncols; i++)
5030 funcs[i] = get_opcode(ops[i]);
5031 strats[i] = get_op_opfamily_strategy(ops[i],
5032 indexRelation->rd_opfamily[i]);
5033 /* shouldn't fail, since it was checked at index creation */
5034 if (strats[i] == InvalidStrategy)
5035 elog(ERROR, "could not find strategy for operator %u in family %u",
5036 ops[i], indexRelation->rd_opfamily[i]);
5039 /* Save a copy of the results in the relcache entry. */
5040 oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5041 indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * ncols);
5042 indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * ncols);
5043 indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * ncols);
5044 memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * ncols);
5045 memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * ncols);
5046 memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * ncols);
5047 MemoryContextSwitchTo(oldcxt);
5051 * Get publication actions for the given relation.
5053 struct PublicationActions *
5054 GetRelationPublicationActions(Relation relation)
5058 MemoryContext oldcxt;
5059 PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
5061 if (relation->rd_pubactions)
5062 return memcpy(pubactions, relation->rd_pubactions,
5063 sizeof(PublicationActions));
5065 /* Fetch the publication membership info. */
5066 puboids = GetRelationPublications(RelationGetRelid(relation));
5067 puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5069 foreach(lc, puboids)
5071 Oid pubid = lfirst_oid(lc);
5073 Form_pg_publication pubform;
5075 tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
5077 if (!HeapTupleIsValid(tup))
5078 elog(ERROR, "cache lookup failed for publication %u", pubid);
5080 pubform = (Form_pg_publication) GETSTRUCT(tup);
5082 pubactions->pubinsert |= pubform->pubinsert;
5083 pubactions->pubupdate |= pubform->pubupdate;
5084 pubactions->pubdelete |= pubform->pubdelete;
5086 ReleaseSysCache(tup);
5089 * If we know everything is replicated, there is no point to check
5090 * for other publications.
5092 if (pubactions->pubinsert && pubactions->pubupdate &&
5093 pubactions->pubdelete)
5097 if (relation->rd_pubactions)
5099 pfree(relation->rd_pubactions);
5100 relation->rd_pubactions = NULL;
5103 /* Now save copy of the actions in the relcache entry. */
5104 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
5105 relation->rd_pubactions = palloc(sizeof(PublicationActions));
5106 memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5107 MemoryContextSwitchTo(oldcxt);
5113 * Routines to support ereport() reports of relation-related errors
5115 * These could have been put into elog.c, but it seems like a module layering
5116 * violation to have elog.c calling relcache or syscache stuff --- and we
5117 * definitely don't want elog.h including rel.h. So we put them here.
5121 * errtable --- stores schema_name and table_name of a table
5122 * within the current errordata.
5125 errtable(Relation rel)
5127 err_generic_string(PG_DIAG_SCHEMA_NAME,
5128 get_namespace_name(RelationGetNamespace(rel)));
5129 err_generic_string(PG_DIAG_TABLE_NAME, RelationGetRelationName(rel));
5131 return 0; /* return value does not matter */
5135 * errtablecol --- stores schema_name, table_name and column_name
5136 * of a table column within the current errordata.
5138 * The column is specified by attribute number --- for most callers, this is
5139 * easier and less error-prone than getting the column name for themselves.
5142 errtablecol(Relation rel, int attnum)
5144 TupleDesc reldesc = RelationGetDescr(rel);
5145 const char *colname;
5147 /* Use reldesc if it's a user attribute, else consult the catalogs */
5148 if (attnum > 0 && attnum <= reldesc->natts)
5149 colname = NameStr(reldesc->attrs[attnum - 1]->attname);
5151 colname = get_relid_attribute_name(RelationGetRelid(rel), attnum);
5153 return errtablecolname(rel, colname);
5157 * errtablecolname --- stores schema_name, table_name and column_name
5158 * of a table column within the current errordata, where the column name is
5159 * given directly rather than extracted from the relation's catalog data.
5161 * Don't use this directly unless errtablecol() is inconvenient for some
5162 * reason. This might possibly be needed during intermediate states in ALTER
5163 * TABLE, for instance.
5166 errtablecolname(Relation rel, const char *colname)
5169 err_generic_string(PG_DIAG_COLUMN_NAME, colname);
5171 return 0; /* return value does not matter */
5175 * errtableconstraint --- stores schema_name, table_name and constraint_name
5176 * of a table-related constraint within the current errordata.
5179 errtableconstraint(Relation rel, const char *conname)
5182 err_generic_string(PG_DIAG_CONSTRAINT_NAME, conname);
5184 return 0; /* return value does not matter */
5189 * load_relcache_init_file, write_relcache_init_file
5191 * In late 1992, we started regularly having databases with more than
5192 * a thousand classes in them. With this number of classes, it became
5193 * critical to do indexed lookups on the system catalogs.
5195 * Bootstrapping these lookups is very hard. We want to be able to
5196 * use an index on pg_attribute, for example, but in order to do so,
5197 * we must have read pg_attribute for the attributes in the index,
5198 * which implies that we need to use the index.
5200 * In order to get around the problem, we do the following:
5202 * + When the database system is initialized (at initdb time), we
5203 * don't use indexes. We do sequential scans.
5205 * + When the backend is started up in normal mode, we load an image
5206 * of the appropriate relation descriptors, in internal format,
5207 * from an initialization file in the data/base/... directory.
5209 * + If the initialization file isn't there, then we create the
5210 * relation descriptors using sequential scans and write 'em to
5211 * the initialization file for use by subsequent backends.
5213 * As of Postgres 9.0, there is one local initialization file in each
5214 * database, plus one shared initialization file for shared catalogs.
5216 * We could dispense with the initialization files and just build the
5217 * critical reldescs the hard way on every backend startup, but that
5218 * slows down backend startup noticeably.
5220 * We can in fact go further, and save more relcache entries than
5221 * just the ones that are absolutely critical; this allows us to speed
5222 * up backend startup by not having to build such entries the hard way.
5223 * Presently, all the catalog and index entries that are referred to
5224 * by catcaches are stored in the initialization files.
5226 * The same mechanism that detects when catcache and relcache entries
5227 * need to be invalidated (due to catalog updates) also arranges to
5228 * unlink the initialization files when the contents may be out of date.
5229 * The files will then be rebuilt during the next backend startup.
5233 * load_relcache_init_file -- attempt to load cache from the shared
5234 * or local cache init file
5236 * If successful, return TRUE and set criticalRelcachesBuilt or
5237 * criticalSharedRelcachesBuilt to true.
5238 * If not successful, return FALSE.
5240 * NOTE: we assume we are already switched into CacheMemoryContext.
5243 load_relcache_init_file(bool shared)
5246 char initfilename[MAXPGPATH];
5257 snprintf(initfilename, sizeof(initfilename), "global/%s",
5258 RELCACHE_INIT_FILENAME);
5260 snprintf(initfilename, sizeof(initfilename), "%s/%s",
5261 DatabasePath, RELCACHE_INIT_FILENAME);
5263 fp = AllocateFile(initfilename, PG_BINARY_R);
5268 * Read the index relcache entries from the file. Note we will not enter
5269 * any of them into the cache if the read fails partway through; this
5270 * helps to guard against broken init files.
5273 rels = (Relation *) palloc(max_rels * sizeof(Relation));
5275 nailed_rels = nailed_indexes = 0;
5277 /* check for correct magic number (compatible version) */
5278 if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5280 if (magic != RELCACHE_INIT_FILEMAGIC)
5283 for (relno = 0;; relno++)
5288 Form_pg_class relform;
5291 /* first read the relation descriptor length */
5292 nread = fread(&len, 1, sizeof(len), fp);
5293 if (nread != sizeof(len))
5296 break; /* end of file */
5300 /* safety check for incompatible relcache layout */
5301 if (len != sizeof(RelationData))
5304 /* allocate another relcache header */
5305 if (num_rels >= max_rels)
5308 rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
5311 rel = rels[num_rels++] = (Relation) palloc(len);
5313 /* then, read the Relation structure */
5314 if (fread(rel, 1, len, fp) != len)
5317 /* next read the relation tuple form */
5318 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5321 relform = (Form_pg_class) palloc(len);
5322 if (fread(relform, 1, len, fp) != len)
5325 rel->rd_rel = relform;
5327 /* initialize attribute tuple forms */
5328 rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
5329 relform->relhasoids);
5330 rel->rd_att->tdrefcount = 1; /* mark as refcounted */
5332 rel->rd_att->tdtypeid = relform->reltype;
5333 rel->rd_att->tdtypmod = -1; /* unnecessary, but... */
5335 /* next read all the attribute tuple form data entries */
5336 has_not_null = false;
5337 for (i = 0; i < relform->relnatts; i++)
5339 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5341 if (len != ATTRIBUTE_FIXED_PART_SIZE)
5343 if (fread(rel->rd_att->attrs[i], 1, len, fp) != len)
5346 has_not_null |= rel->rd_att->attrs[i]->attnotnull;
5349 /* next read the access method specific field */
5350 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5354 rel->rd_options = palloc(len);
5355 if (fread(rel->rd_options, 1, len, fp) != len)
5357 if (len != VARSIZE(rel->rd_options))
5358 goto read_failed; /* sanity check */
5362 rel->rd_options = NULL;
5365 /* mark not-null status */
5368 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
5370 constr->has_not_null = true;
5371 rel->rd_att->constr = constr;
5374 /* If it's an index, there's more to do */
5375 if (rel->rd_rel->relkind == RELKIND_INDEX)
5377 MemoryContext indexcxt;
5380 RegProcedure *support;
5385 /* Count nailed indexes to ensure we have 'em all */
5386 if (rel->rd_isnailed)
5389 /* next, read the pg_index tuple */
5390 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5393 rel->rd_indextuple = (HeapTuple) palloc(len);
5394 if (fread(rel->rd_indextuple, 1, len, fp) != len)
5397 /* Fix up internal pointers in the tuple -- see heap_copytuple */
5398 rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5399 rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple);
5402 * prepare index info context --- parameters should match
5403 * RelationInitIndexAccessInfo
5405 indexcxt = AllocSetContextCreate(CacheMemoryContext,
5406 RelationGetRelationName(rel),
5407 ALLOCSET_SMALL_SIZES);
5408 rel->rd_indexcxt = indexcxt;
5411 * Now we can fetch the index AM's API struct. (We can't store
5412 * that in the init file, since it contains function pointers that
5413 * might vary across server executions. Fortunately, it should be
5414 * safe to call the amhandler even while bootstrapping indexes.)
5416 InitIndexAmRoutine(rel);
5418 /* next, read the vector of opfamily OIDs */
5419 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5422 opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5423 if (fread(opfamily, 1, len, fp) != len)
5426 rel->rd_opfamily = opfamily;
5428 /* next, read the vector of opcintype OIDs */
5429 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5432 opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5433 if (fread(opcintype, 1, len, fp) != len)
5436 rel->rd_opcintype = opcintype;
5438 /* next, read the vector of support procedure OIDs */
5439 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5441 support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5442 if (fread(support, 1, len, fp) != len)
5445 rel->rd_support = support;
5447 /* next, read the vector of collation OIDs */
5448 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5451 indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
5452 if (fread(indcollation, 1, len, fp) != len)
5455 rel->rd_indcollation = indcollation;
5457 /* finally, read the vector of indoption values */
5458 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5461 indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
5462 if (fread(indoption, 1, len, fp) != len)
5465 rel->rd_indoption = indoption;
5467 /* set up zeroed fmgr-info vector */
5468 nsupport = relform->relnatts * rel->rd_amroutine->amsupport;
5469 rel->rd_supportinfo = (FmgrInfo *)
5470 MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
5474 /* Count nailed rels to ensure we have 'em all */
5475 if (rel->rd_isnailed)
5478 Assert(rel->rd_index == NULL);
5479 Assert(rel->rd_indextuple == NULL);
5480 Assert(rel->rd_indexcxt == NULL);
5481 Assert(rel->rd_amroutine == NULL);
5482 Assert(rel->rd_opfamily == NULL);
5483 Assert(rel->rd_opcintype == NULL);
5484 Assert(rel->rd_support == NULL);
5485 Assert(rel->rd_supportinfo == NULL);
5486 Assert(rel->rd_indoption == NULL);
5487 Assert(rel->rd_indcollation == NULL);
5491 * Rules and triggers are not saved (mainly because the internal
5492 * format is complex and subject to change). They must be rebuilt if
5493 * needed by RelationCacheInitializePhase3. This is not expected to
5494 * be a big performance hit since few system catalogs have such. Ditto
5495 * for RLS policy data, index expressions, predicates, exclusion info,
5498 rel->rd_rules = NULL;
5499 rel->rd_rulescxt = NULL;
5500 rel->trigdesc = NULL;
5501 rel->rd_rsdesc = NULL;
5502 rel->rd_partkeycxt = NULL;
5503 rel->rd_partkey = NULL;
5504 rel->rd_partdesc = NULL;
5505 rel->rd_partcheck = NIL;
5506 rel->rd_indexprs = NIL;
5507 rel->rd_indpred = NIL;
5508 rel->rd_exclops = NULL;
5509 rel->rd_exclprocs = NULL;
5510 rel->rd_exclstrats = NULL;
5511 rel->rd_fdwroutine = NULL;
5514 * Reset transient-state fields in the relcache entry
5516 rel->rd_smgr = NULL;
5517 if (rel->rd_isnailed)
5521 rel->rd_indexvalid = 0;
5522 rel->rd_fkeylist = NIL;
5523 rel->rd_fkeyvalid = false;
5524 rel->rd_indexlist = NIL;
5525 rel->rd_oidindex = InvalidOid;
5526 rel->rd_pkindex = InvalidOid;
5527 rel->rd_replidindex = InvalidOid;
5528 rel->rd_indexattr = NULL;
5529 rel->rd_keyattr = NULL;
5530 rel->rd_pkattr = NULL;
5531 rel->rd_idattr = NULL;
5532 rel->rd_pubactions = NULL;
5533 rel->rd_createSubid = InvalidSubTransactionId;
5534 rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
5535 rel->rd_amcache = NULL;
5536 MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
5539 * Recompute lock and physical addressing info. This is needed in
5540 * case the pg_internal.init file was copied from some other database
5541 * by CREATE DATABASE.
5543 RelationInitLockInfo(rel);
5544 RelationInitPhysicalAddr(rel);
5548 * We reached the end of the init file without apparent problem. Did we
5549 * get the right number of nailed items? This is a useful crosscheck in
5550 * case the set of critical rels or indexes changes. However, that should
5551 * not happen in a normally-running system, so let's bleat if it does.
5553 * For the shared init file, we're called before client authentication is
5554 * done, which means that elog(WARNING) will go only to the postmaster
5555 * log, where it's easily missed. To ensure that developers notice bad
5556 * values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put
5557 * an Assert(false) there.
5561 if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
5562 nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
5564 elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively",
5565 nailed_rels, nailed_indexes,
5566 NUM_CRITICAL_SHARED_RELS, NUM_CRITICAL_SHARED_INDEXES);
5567 /* Make sure we get developers' attention about this */
5569 /* In production builds, recover by bootstrapping the relcache */
5575 if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
5576 nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
5578 elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively",
5579 nailed_rels, nailed_indexes,
5580 NUM_CRITICAL_LOCAL_RELS, NUM_CRITICAL_LOCAL_INDEXES);
5581 /* We don't need an Assert() in this case */
5587 * OK, all appears well.
5589 * Now insert all the new relcache entries into the cache.
5591 for (relno = 0; relno < num_rels; relno++)
5593 RelationCacheInsert(rels[relno], false);
5600 criticalSharedRelcachesBuilt = true;
5602 criticalRelcachesBuilt = true;
5606 * init file is broken, so do it the hard way. We don't bother trying to
5607 * free the clutter we just allocated; it's not in the relcache so it
5618 * Write out a new initialization file with the current contents
5619 * of the relcache (either shared rels or local rels, as indicated).
5622 write_relcache_init_file(bool shared)
5625 char tempfilename[MAXPGPATH];
5626 char finalfilename[MAXPGPATH];
5628 HASH_SEQ_STATUS status;
5629 RelIdCacheEnt *idhentry;
5633 * If we have already received any relcache inval events, there's no
5634 * chance of succeeding so we may as well skip the whole thing.
5636 if (relcacheInvalsReceived != 0L)
5640 * We must write a temporary file and rename it into place. Otherwise,
5641 * another backend starting at about the same time might crash trying to
5642 * read the partially-complete file.
5646 snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
5647 RELCACHE_INIT_FILENAME, MyProcPid);
5648 snprintf(finalfilename, sizeof(finalfilename), "global/%s",
5649 RELCACHE_INIT_FILENAME);
5653 snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
5654 DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
5655 snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
5656 DatabasePath, RELCACHE_INIT_FILENAME);
5659 unlink(tempfilename); /* in case it exists w/wrong permissions */
5661 fp = AllocateFile(tempfilename, PG_BINARY_W);
5665 * We used to consider this a fatal error, but we might as well
5666 * continue with backend startup ...
5669 (errcode_for_file_access(),
5670 errmsg("could not create relation-cache initialization file \"%s\": %m",
5672 errdetail("Continuing anyway, but there's something wrong.")));
5677 * Write a magic number to serve as a file version identifier. We can
5678 * change the magic number whenever the relcache layout changes.
5680 magic = RELCACHE_INIT_FILEMAGIC;
5681 if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5682 elog(FATAL, "could not write init file");
5685 * Write all the appropriate reldescs (in no particular order).
5687 hash_seq_init(&status, RelationIdCache);
5689 while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
5691 Relation rel = idhentry->reldesc;
5692 Form_pg_class relform = rel->rd_rel;
5694 /* ignore if not correct group */
5695 if (relform->relisshared != shared)
5699 * Ignore if not supposed to be in init file. We can allow any shared
5700 * relation that's been loaded so far to be in the shared init file,
5701 * but unshared relations must be ones that should be in the local
5702 * file per RelationIdIsInInitFile. (Note: if you want to change the
5703 * criterion for rels to be kept in the init file, see also inval.c.
5704 * The reason for filtering here is to be sure that we don't put
5705 * anything into the local init file for which a relcache inval would
5706 * not cause invalidation of that init file.)
5708 if (!shared && !RelationIdIsInInitFile(RelationGetRelid(rel)))
5710 /* Nailed rels had better get stored. */
5711 Assert(!rel->rd_isnailed);
5715 /* first write the relcache entry proper */
5716 write_item(rel, sizeof(RelationData), fp);
5718 /* next write the relation tuple form */
5719 write_item(relform, CLASS_TUPLE_SIZE, fp);
5721 /* next, do all the attribute tuple form data entries */
5722 for (i = 0; i < relform->relnatts; i++)
5724 write_item(rel->rd_att->attrs[i], ATTRIBUTE_FIXED_PART_SIZE, fp);
5727 /* next, do the access method specific field */
5728 write_item(rel->rd_options,
5729 (rel->rd_options ? VARSIZE(rel->rd_options) : 0),
5732 /* If it's an index, there's more to do */
5733 if (rel->rd_rel->relkind == RELKIND_INDEX)
5735 /* write the pg_index tuple */
5736 /* we assume this was created by heap_copytuple! */
5737 write_item(rel->rd_indextuple,
5738 HEAPTUPLESIZE + rel->rd_indextuple->t_len,
5741 /* next, write the vector of opfamily OIDs */
5742 write_item(rel->rd_opfamily,
5743 relform->relnatts * sizeof(Oid),
5746 /* next, write the vector of opcintype OIDs */
5747 write_item(rel->rd_opcintype,
5748 relform->relnatts * sizeof(Oid),
5751 /* next, write the vector of support procedure OIDs */
5752 write_item(rel->rd_support,
5753 relform->relnatts * (rel->rd_amroutine->amsupport * sizeof(RegProcedure)),
5756 /* next, write the vector of collation OIDs */
5757 write_item(rel->rd_indcollation,
5758 relform->relnatts * sizeof(Oid),
5761 /* finally, write the vector of indoption values */
5762 write_item(rel->rd_indoption,
5763 relform->relnatts * sizeof(int16),
5769 elog(FATAL, "could not write init file");
5772 * Now we have to check whether the data we've so painstakingly
5773 * accumulated is already obsolete due to someone else's just-committed
5774 * catalog changes. If so, we just delete the temp file and leave it to
5775 * the next backend to try again. (Our own relcache entries will be
5776 * updated by SI message processing, but we can't be sure whether what we
5777 * wrote out was up-to-date.)
5779 * This mustn't run concurrently with the code that unlinks an init file
5780 * and sends SI messages, so grab a serialization lock for the duration.
5782 LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
5784 /* Make sure we have seen all incoming SI messages */
5785 AcceptInvalidationMessages();
5788 * If we have received any SI relcache invals since backend start, assume
5789 * we may have written out-of-date data.
5791 if (relcacheInvalsReceived == 0L)
5794 * OK, rename the temp file to its final name, deleting any
5795 * previously-existing init file.
5797 * Note: a failure here is possible under Cygwin, if some other
5798 * backend is holding open an unlinked-but-not-yet-gone init file. So
5799 * treat this as a noncritical failure; just remove the useless temp
5802 if (rename(tempfilename, finalfilename) < 0)
5803 unlink(tempfilename);
5807 /* Delete the already-obsolete temp file */
5808 unlink(tempfilename);
5811 LWLockRelease(RelCacheInitLock);
5814 /* write a chunk of data preceded by its length */
5816 write_item(const void *data, Size len, FILE *fp)
5818 if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
5819 elog(FATAL, "could not write init file");
5820 if (fwrite(data, 1, len, fp) != len)
5821 elog(FATAL, "could not write init file");
5825 * Determine whether a given relation (identified by OID) is one of the ones
5826 * we should store in the local relcache init file.
5828 * We must cache all nailed rels, and for efficiency we should cache every rel
5829 * that supports a syscache. The former set is almost but not quite a subset
5830 * of the latter. Currently, we must special-case TriggerRelidNameIndexId,
5831 * which RelationCacheInitializePhase3 chooses to nail for efficiency reasons,
5832 * but which does not support any syscache.
5834 * Note: this function is currently never called for shared rels. If it were,
5835 * we'd probably also need a special case for DatabaseNameIndexId, which is
5836 * critical but does not support a syscache.
5839 RelationIdIsInInitFile(Oid relationId)
5841 if (relationId == TriggerRelidNameIndexId)
5843 /* If this Assert fails, we don't need this special case anymore. */
5844 Assert(!RelationSupportsSysCache(relationId));
5847 return RelationSupportsSysCache(relationId);
5851 * Tells whether any index for the relation is unlogged.
5853 * Any index using the hash AM is implicitly unlogged.
5855 * Note: There doesn't seem to be any way to have an unlogged index attached
5856 * to a permanent table except to create a hash index, but it seems best to
5857 * keep this general so that it returns sensible results even when they seem
5858 * obvious (like for an unlogged table) and to handle possible future unlogged
5859 * indexes on permanent tables.
5862 RelationHasUnloggedIndex(Relation rel)
5865 ListCell *indexoidscan;
5866 bool result = false;
5868 indexoidlist = RelationGetIndexList(rel);
5870 foreach(indexoidscan, indexoidlist)
5872 Oid indexoid = lfirst_oid(indexoidscan);
5874 Form_pg_class reltup;
5876 tp = SearchSysCache1(RELOID, ObjectIdGetDatum(indexoid));
5877 if (!HeapTupleIsValid(tp))
5878 elog(ERROR, "cache lookup failed for relation %u", indexoid);
5879 reltup = (Form_pg_class) GETSTRUCT(tp);
5881 if (reltup->relpersistence == RELPERSISTENCE_UNLOGGED
5882 || reltup->relam == HASH_AM_OID)
5885 ReleaseSysCache(tp);
5891 list_free(indexoidlist);
5897 * Invalidate (remove) the init file during commit of a transaction that
5898 * changed one or more of the relation cache entries that are kept in the
5901 * To be safe against concurrent inspection or rewriting of the init file,
5902 * we must take RelCacheInitLock, then remove the old init file, then send
5903 * the SI messages that include relcache inval for such relations, and then
5904 * release RelCacheInitLock. This serializes the whole affair against
5905 * write_relcache_init_file, so that we can be sure that any other process
5906 * that's concurrently trying to create a new init file won't move an
5907 * already-stale version into place after we unlink. Also, because we unlink
5908 * before sending the SI messages, a backend that's currently starting cannot
5909 * read the now-obsolete init file and then miss the SI messages that will
5910 * force it to update its relcache entries. (This works because the backend
5911 * startup sequence gets into the sinval array before trying to load the init
5914 * We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
5915 * then release the lock in RelationCacheInitFilePostInvalidate. Caller must
5916 * send any pending SI messages between those calls.
5918 * Notice this deals only with the local init file, not the shared init file.
5919 * The reason is that there can never be a "significant" change to the
5920 * relcache entry of a shared relation; the most that could happen is
5921 * updates of noncritical fields such as relpages/reltuples. So, while
5922 * it's worth updating the shared init file from time to time, it can never
5923 * be invalid enough to make it necessary to remove it.
5926 RelationCacheInitFilePreInvalidate(void)
5928 char initfilename[MAXPGPATH];
5930 snprintf(initfilename, sizeof(initfilename), "%s/%s",
5931 DatabasePath, RELCACHE_INIT_FILENAME);
5933 LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
5935 if (unlink(initfilename) < 0)
5938 * The file might not be there if no backend has been started since
5939 * the last removal. But complain about failures other than ENOENT.
5940 * Fortunately, it's not too late to abort the transaction if we can't
5941 * get rid of the would-be-obsolete init file.
5943 if (errno != ENOENT)
5945 (errcode_for_file_access(),
5946 errmsg("could not remove cache file \"%s\": %m",
5952 RelationCacheInitFilePostInvalidate(void)
5954 LWLockRelease(RelCacheInitLock);
5958 * Remove the init files during postmaster startup.
5960 * We used to keep the init files across restarts, but that is unsafe in PITR
5961 * scenarios, and even in simple crash-recovery cases there are windows for
5962 * the init files to become out-of-sync with the database. So now we just
5963 * remove them during startup and expect the first backend launch to rebuild
5964 * them. Of course, this has to happen in each database of the cluster.
5967 RelationCacheInitFileRemove(void)
5969 const char *tblspcdir = "pg_tblspc";
5972 char path[MAXPGPATH];
5975 * We zap the shared cache file too. In theory it can't get out of sync
5976 * enough to be a problem, but in data-corruption cases, who knows ...
5978 snprintf(path, sizeof(path), "global/%s",
5979 RELCACHE_INIT_FILENAME);
5980 unlink_initfile(path);
5982 /* Scan everything in the default tablespace */
5983 RelationCacheInitFileRemoveInDir("base");
5985 /* Scan the tablespace link directory to find non-default tablespaces */
5986 dir = AllocateDir(tblspcdir);
5989 elog(LOG, "could not open tablespace link directory \"%s\": %m",
5994 while ((de = ReadDir(dir, tblspcdir)) != NULL)
5996 if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
5998 /* Scan the tablespace dir for per-database dirs */
5999 snprintf(path, sizeof(path), "%s/%s/%s",
6000 tblspcdir, de->d_name, TABLESPACE_VERSION_DIRECTORY);
6001 RelationCacheInitFileRemoveInDir(path);
6008 /* Process one per-tablespace directory for RelationCacheInitFileRemove */
6010 RelationCacheInitFileRemoveInDir(const char *tblspcpath)
6014 char initfilename[MAXPGPATH];
6016 /* Scan the tablespace directory to find per-database directories */
6017 dir = AllocateDir(tblspcpath);
6020 elog(LOG, "could not open tablespace directory \"%s\": %m",
6025 while ((de = ReadDir(dir, tblspcpath)) != NULL)
6027 if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
6029 /* Try to remove the init file in each database */
6030 snprintf(initfilename, sizeof(initfilename), "%s/%s/%s",
6031 tblspcpath, de->d_name, RELCACHE_INIT_FILENAME);
6032 unlink_initfile(initfilename);
6040 unlink_initfile(const char *initfilename)
6042 if (unlink(initfilename) < 0)
6044 /* It might not be there, but log any error other than ENOENT */
6045 if (errno != ENOENT)
6046 elog(LOG, "could not remove cache file \"%s\": %m", initfilename);