]> granicus.if.org Git - postgresql/blob - src/backend/utils/cache/relcache.c
Fix an oversight I made in a cleanup patch over a year ago:
[postgresql] / src / backend / utils / cache / relcache.c
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  *        POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.270 2008/04/01 00:48:33 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *              RelationCacheInitialize                 - initialize relcache (to empty)
18  *              RelationCacheInitializePhase2   - finish initializing relcache
19  *              RelationIdGetRelation                   - get a reldesc by relation id
20  *              RelationClose                                   - close an open relation
21  *
22  * NOTES
23  *              The following code contains many undocumented hacks.  Please be
24  *              careful....
25  */
26 #include "postgres.h"
27
28 #include <sys/file.h>
29 #include <fcntl.h>
30 #include <unistd.h>
31
32 #include "access/genam.h"
33 #include "access/heapam.h"
34 #include "access/reloptions.h"
35 #include "access/xact.h"
36 #include "catalog/catalog.h"
37 #include "catalog/index.h"
38 #include "catalog/indexing.h"
39 #include "catalog/namespace.h"
40 #include "catalog/pg_amop.h"
41 #include "catalog/pg_amproc.h"
42 #include "catalog/pg_attrdef.h"
43 #include "catalog/pg_authid.h"
44 #include "catalog/pg_constraint.h"
45 #include "catalog/pg_namespace.h"
46 #include "catalog/pg_opclass.h"
47 #include "catalog/pg_proc.h"
48 #include "catalog/pg_rewrite.h"
49 #include "catalog/pg_type.h"
50 #include "commands/trigger.h"
51 #include "miscadmin.h"
52 #include "optimizer/clauses.h"
53 #include "optimizer/planmain.h"
54 #include "optimizer/prep.h"
55 #include "optimizer/var.h"
56 #include "rewrite/rewriteDefine.h"
57 #include "storage/fd.h"
58 #include "storage/smgr.h"
59 #include "utils/builtins.h"
60 #include "utils/fmgroids.h"
61 #include "utils/inval.h"
62 #include "utils/memutils.h"
63 #include "utils/relcache.h"
64 #include "utils/resowner.h"
65 #include "utils/syscache.h"
66 #include "utils/tqual.h"
67 #include "utils/typcache.h"
68
69
70 /*
71  * name of relcache init file, used to speed up backend startup
72  */
73 #define RELCACHE_INIT_FILENAME  "pg_internal.init"
74
75 #define RELCACHE_INIT_FILEMAGIC         0x573264        /* version ID value */
76
77 /*
78  *              hardcoded tuple descriptors.  see include/catalog/pg_attribute.h
79  */
80 static FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
81 static FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
82 static FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
83 static FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
84 static FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
85
86 /*
87  *              Hash tables that index the relation cache
88  *
89  *              We used to index the cache by both name and OID, but now there
90  *              is only an index by OID.
91  */
92 typedef struct relidcacheent
93 {
94         Oid                     reloid;
95         Relation        reldesc;
96 } RelIdCacheEnt;
97
98 static HTAB *RelationIdCache;
99
100 /*
101  * This flag is false until we have prepared the critical relcache entries
102  * that are needed to do indexscans on the tables read by relcache building.
103  */
104 bool            criticalRelcachesBuilt = false;
105
106 /*
107  * This counter counts relcache inval events received since backend startup
108  * (but only for rels that are actually in cache).      Presently, we use it only
109  * to detect whether data about to be written by write_relcache_init_file()
110  * might already be obsolete.
111  */
112 static long relcacheInvalsReceived = 0L;
113
114 /*
115  * This list remembers the OIDs of the relations cached in the relcache
116  * init file.
117  */
118 static List *initFileRelationIds = NIL;
119
120 /*
121  * This flag lets us optimize away work in AtEO(Sub)Xact_RelationCache().
122  */
123 static bool need_eoxact_work = false;
124
125
126 /*
127  *              macros to manipulate the lookup hashtables
128  */
129 #define RelationCacheInsert(RELATION)   \
130 do { \
131         RelIdCacheEnt *idhentry; bool found; \
132         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
133                                                                                    (void *) &(RELATION->rd_id), \
134                                                                                    HASH_ENTER, \
135                                                                                    &found); \
136         /* used to give notice if found -- now just keep quiet */ \
137         idhentry->reldesc = RELATION; \
138 } while(0)
139
140 #define RelationIdCacheLookup(ID, RELATION) \
141 do { \
142         RelIdCacheEnt *hentry; \
143         hentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
144                                                                                  (void *) &(ID), HASH_FIND,NULL); \
145         if (hentry) \
146                 RELATION = hentry->reldesc; \
147         else \
148                 RELATION = NULL; \
149 } while(0)
150
151 #define RelationCacheDelete(RELATION) \
152 do { \
153         RelIdCacheEnt *idhentry; \
154         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
155                                                                                    (void *) &(RELATION->rd_id), \
156                                                                                    HASH_REMOVE, NULL); \
157         if (idhentry == NULL) \
158                 elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \
159 } while(0)
160
161
162 /*
163  * Special cache for opclass-related information
164  *
165  * Note: only default operators and support procs get cached, ie, those with
166  * lefttype = righttype = opcintype.
167  */
168 typedef struct opclasscacheent
169 {
170         Oid                     opclassoid;             /* lookup key: OID of opclass */
171         bool            valid;                  /* set TRUE after successful fill-in */
172         StrategyNumber numStrats;       /* max # of strategies (from pg_am) */
173         StrategyNumber numSupport;      /* max # of support procs (from pg_am) */
174         Oid                     opcfamily;              /* OID of opclass's family */
175         Oid                     opcintype;              /* OID of opclass's declared input type */
176         Oid                *operatorOids;       /* strategy operators' OIDs */
177         RegProcedure *supportProcs; /* support procs */
178 } OpClassCacheEnt;
179
180 static HTAB *OpClassCache = NULL;
181
182
183 /* non-export function prototypes */
184
185 static void RelationClearRelation(Relation relation, bool rebuild);
186
187 static void RelationReloadIndexInfo(Relation relation);
188 static void RelationFlushRelation(Relation relation);
189 static bool load_relcache_init_file(void);
190 static void write_relcache_init_file(void);
191 static void write_item(const void *data, Size len, FILE *fp);
192
193 static void formrdesc(const char *relationName, Oid relationReltype,
194                   bool hasoids, int natts, FormData_pg_attribute *att);
195
196 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
197 static Relation AllocateRelationDesc(Relation relation, Form_pg_class relp);
198 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
199 static void RelationBuildTupleDesc(Relation relation);
200 static Relation RelationBuildDesc(Oid targetRelId, Relation oldrelation);
201 static void RelationInitPhysicalAddr(Relation relation);
202 static TupleDesc GetPgClassDescriptor(void);
203 static TupleDesc GetPgIndexDescriptor(void);
204 static void AttrDefaultFetch(Relation relation);
205 static void CheckConstraintFetch(Relation relation);
206 static List *insert_ordered_oid(List *list, Oid datum);
207 static void IndexSupportInitialize(oidvector *indclass,
208                                            Oid *indexOperator,
209                                            RegProcedure *indexSupport,
210                                            Oid *opFamily,
211                                            Oid *opcInType,
212                                            StrategyNumber maxStrategyNumber,
213                                            StrategyNumber maxSupportNumber,
214                                            AttrNumber maxAttributeNumber);
215 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
216                                   StrategyNumber numStrats,
217                                   StrategyNumber numSupport);
218
219
220 /*
221  *              ScanPgRelation
222  *
223  *              this is used by RelationBuildDesc to find a pg_class
224  *              tuple matching targetRelId.
225  *
226  *              NB: the returned tuple has been copied into palloc'd storage
227  *              and must eventually be freed with heap_freetuple.
228  */
229 static HeapTuple
230 ScanPgRelation(Oid targetRelId, bool indexOK)
231 {
232         HeapTuple       pg_class_tuple;
233         Relation        pg_class_desc;
234         SysScanDesc pg_class_scan;
235         ScanKeyData key[1];
236
237         /*
238          * form a scan key
239          */
240         ScanKeyInit(&key[0],
241                                 ObjectIdAttributeNumber,
242                                 BTEqualStrategyNumber, F_OIDEQ,
243                                 ObjectIdGetDatum(targetRelId));
244
245         /*
246          * Open pg_class and fetch a tuple.  Force heap scan if we haven't yet
247          * built the critical relcache entries (this includes initdb and startup
248          * without a pg_internal.init file).  The caller can also force a heap
249          * scan by setting indexOK == false.
250          */
251         pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
252         pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
253                                                                            indexOK && criticalRelcachesBuilt,
254                                                                            SnapshotNow,
255                                                                            1, key);
256
257         pg_class_tuple = systable_getnext(pg_class_scan);
258
259         /*
260          * Must copy tuple before releasing buffer.
261          */
262         if (HeapTupleIsValid(pg_class_tuple))
263                 pg_class_tuple = heap_copytuple(pg_class_tuple);
264
265         /* all done */
266         systable_endscan(pg_class_scan);
267         heap_close(pg_class_desc, AccessShareLock);
268
269         return pg_class_tuple;
270 }
271
272 /*
273  *              AllocateRelationDesc
274  *
275  *              This is used to allocate memory for a new relation descriptor
276  *              and initialize the rd_rel field.
277  *
278  *              If 'relation' is NULL, allocate a new RelationData object.
279  *              If not, reuse the given object (that path is taken only when
280  *              we have to rebuild a relcache entry during RelationClearRelation).
281  */
282 static Relation
283 AllocateRelationDesc(Relation relation, Form_pg_class relp)
284 {
285         MemoryContext oldcxt;
286         Form_pg_class relationForm;
287
288         /* Relcache entries must live in CacheMemoryContext */
289         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
290
291         /*
292          * allocate space for new relation descriptor, if needed
293          */
294         if (relation == NULL)
295                 relation = (Relation) palloc(sizeof(RelationData));
296
297         /*
298          * clear all fields of reldesc
299          */
300         MemSet(relation, 0, sizeof(RelationData));
301         relation->rd_targblock = InvalidBlockNumber;
302
303         /* make sure relation is marked as having no open file yet */
304         relation->rd_smgr = NULL;
305
306         /*
307          * Copy the relation tuple form
308          *
309          * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
310          * variable-length fields (relacl, reloptions) are NOT stored in the
311          * relcache --- there'd be little point in it, since we don't copy the
312          * tuple's nulls bitmap and hence wouldn't know if the values are valid.
313          * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
314          * it from the syscache if you need it.  The same goes for the original
315          * form of reloptions (however, we do store the parsed form of reloptions
316          * in rd_options).
317          */
318         relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
319
320         memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
321
322         /* initialize relation tuple form */
323         relation->rd_rel = relationForm;
324
325         /* and allocate attribute tuple form storage */
326         relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
327                                                                                            relationForm->relhasoids);
328         /* which we mark as a reference-counted tupdesc */
329         relation->rd_att->tdrefcount = 1;
330
331         MemoryContextSwitchTo(oldcxt);
332
333         return relation;
334 }
335
336 /*
337  * RelationParseRelOptions
338  *              Convert pg_class.reloptions into pre-parsed rd_options
339  *
340  * tuple is the real pg_class tuple (not rd_rel!) for relation
341  *
342  * Note: rd_rel and (if an index) rd_am must be valid already
343  */
344 static void
345 RelationParseRelOptions(Relation relation, HeapTuple tuple)
346 {
347         Datum           datum;
348         bool            isnull;
349         bytea      *options;
350
351         relation->rd_options = NULL;
352
353         /* Fall out if relkind should not have options */
354         switch (relation->rd_rel->relkind)
355         {
356                 case RELKIND_RELATION:
357                 case RELKIND_TOASTVALUE:
358                 case RELKIND_UNCATALOGED:
359                 case RELKIND_INDEX:
360                         break;
361                 default:
362                         return;
363         }
364
365         /*
366          * Fetch reloptions from tuple; have to use a hardwired descriptor because
367          * we might not have any other for pg_class yet (consider executing this
368          * code for pg_class itself)
369          */
370         datum = fastgetattr(tuple,
371                                                 Anum_pg_class_reloptions,
372                                                 GetPgClassDescriptor(),
373                                                 &isnull);
374         if (isnull)
375                 return;
376
377         /* Parse into appropriate format; don't error out here */
378         switch (relation->rd_rel->relkind)
379         {
380                 case RELKIND_RELATION:
381                 case RELKIND_TOASTVALUE:
382                 case RELKIND_UNCATALOGED:
383                         options = heap_reloptions(relation->rd_rel->relkind, datum,
384                                                                           false);
385                         break;
386                 case RELKIND_INDEX:
387                         options = index_reloptions(relation->rd_am->amoptions, datum,
388                                                                            false);
389                         break;
390                 default:
391                         Assert(false);          /* can't get here */
392                         options = NULL;         /* keep compiler quiet */
393                         break;
394         }
395
396         /* Copy parsed data into CacheMemoryContext */
397         if (options)
398         {
399                 relation->rd_options = MemoryContextAlloc(CacheMemoryContext,
400                                                                                                   VARSIZE(options));
401                 memcpy(relation->rd_options, options, VARSIZE(options));
402         }
403 }
404
405 /*
406  *              RelationBuildTupleDesc
407  *
408  *              Form the relation's tuple descriptor from information in
409  *              the pg_attribute, pg_attrdef & pg_constraint system catalogs.
410  */
411 static void
412 RelationBuildTupleDesc(Relation relation)
413 {
414         HeapTuple       pg_attribute_tuple;
415         Relation        pg_attribute_desc;
416         SysScanDesc pg_attribute_scan;
417         ScanKeyData skey[2];
418         int                     need;
419         TupleConstr *constr;
420         AttrDefault *attrdef = NULL;
421         int                     ndef = 0;
422
423         /* copy some fields from pg_class row to rd_att */
424         relation->rd_att->tdtypeid = relation->rd_rel->reltype;
425         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
426         relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
427
428         constr = (TupleConstr *) MemoryContextAlloc(CacheMemoryContext,
429                                                                                                 sizeof(TupleConstr));
430         constr->has_not_null = false;
431
432         /*
433          * Form a scan key that selects only user attributes (attnum > 0).
434          * (Eliminating system attribute rows at the index level is lots faster
435          * than fetching them.)
436          */
437         ScanKeyInit(&skey[0],
438                                 Anum_pg_attribute_attrelid,
439                                 BTEqualStrategyNumber, F_OIDEQ,
440                                 ObjectIdGetDatum(RelationGetRelid(relation)));
441         ScanKeyInit(&skey[1],
442                                 Anum_pg_attribute_attnum,
443                                 BTGreaterStrategyNumber, F_INT2GT,
444                                 Int16GetDatum(0));
445
446         /*
447          * Open pg_attribute and begin a scan.  Force heap scan if we haven't yet
448          * built the critical relcache entries (this includes initdb and startup
449          * without a pg_internal.init file).
450          */
451         pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
452         pg_attribute_scan = systable_beginscan(pg_attribute_desc,
453                                                                                    AttributeRelidNumIndexId,
454                                                                                    criticalRelcachesBuilt,
455                                                                                    SnapshotNow,
456                                                                                    2, skey);
457
458         /*
459          * add attribute data to relation->rd_att
460          */
461         need = relation->rd_rel->relnatts;
462
463         while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
464         {
465                 Form_pg_attribute attp;
466
467                 attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
468
469                 if (attp->attnum <= 0 ||
470                         attp->attnum > relation->rd_rel->relnatts)
471                         elog(ERROR, "invalid attribute number %d for %s",
472                                  attp->attnum, RelationGetRelationName(relation));
473
474                 memcpy(relation->rd_att->attrs[attp->attnum - 1],
475                            attp,
476                            ATTRIBUTE_TUPLE_SIZE);
477
478                 /* Update constraint/default info */
479                 if (attp->attnotnull)
480                         constr->has_not_null = true;
481
482                 if (attp->atthasdef)
483                 {
484                         if (attrdef == NULL)
485                                 attrdef = (AttrDefault *)
486                                         MemoryContextAllocZero(CacheMemoryContext,
487                                                                                    relation->rd_rel->relnatts *
488                                                                                    sizeof(AttrDefault));
489                         attrdef[ndef].adnum = attp->attnum;
490                         attrdef[ndef].adbin = NULL;
491                         ndef++;
492                 }
493                 need--;
494                 if (need == 0)
495                         break;
496         }
497
498         /*
499          * end the scan and close the attribute relation
500          */
501         systable_endscan(pg_attribute_scan);
502         heap_close(pg_attribute_desc, AccessShareLock);
503
504         if (need != 0)
505                 elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
506                          need, RelationGetRelid(relation));
507
508         /*
509          * The attcacheoff values we read from pg_attribute should all be -1
510          * ("unknown").  Verify this if assert checking is on.  They will be
511          * computed when and if needed during tuple access.
512          */
513 #ifdef USE_ASSERT_CHECKING
514         {
515                 int                     i;
516
517                 for (i = 0; i < relation->rd_rel->relnatts; i++)
518                         Assert(relation->rd_att->attrs[i]->attcacheoff == -1);
519         }
520 #endif
521
522         /*
523          * However, we can easily set the attcacheoff value for the first
524          * attribute: it must be zero.  This eliminates the need for special cases
525          * for attnum=1 that used to exist in fastgetattr() and index_getattr().
526          */
527         if (relation->rd_rel->relnatts > 0)
528                 relation->rd_att->attrs[0]->attcacheoff = 0;
529
530         /*
531          * Set up constraint/default info
532          */
533         if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
534         {
535                 relation->rd_att->constr = constr;
536
537                 if (ndef > 0)                   /* DEFAULTs */
538                 {
539                         if (ndef < relation->rd_rel->relnatts)
540                                 constr->defval = (AttrDefault *)
541                                         repalloc(attrdef, ndef * sizeof(AttrDefault));
542                         else
543                                 constr->defval = attrdef;
544                         constr->num_defval = ndef;
545                         AttrDefaultFetch(relation);
546                 }
547                 else
548                         constr->num_defval = 0;
549
550                 if (relation->rd_rel->relchecks > 0)    /* CHECKs */
551                 {
552                         constr->num_check = relation->rd_rel->relchecks;
553                         constr->check = (ConstrCheck *)
554                                 MemoryContextAllocZero(CacheMemoryContext,
555                                                                         constr->num_check * sizeof(ConstrCheck));
556                         CheckConstraintFetch(relation);
557                 }
558                 else
559                         constr->num_check = 0;
560         }
561         else
562         {
563                 pfree(constr);
564                 relation->rd_att->constr = NULL;
565         }
566 }
567
568 /*
569  *              RelationBuildRuleLock
570  *
571  *              Form the relation's rewrite rules from information in
572  *              the pg_rewrite system catalog.
573  *
574  * Note: The rule parsetrees are potentially very complex node structures.
575  * To allow these trees to be freed when the relcache entry is flushed,
576  * we make a private memory context to hold the RuleLock information for
577  * each relcache entry that has associated rules.  The context is used
578  * just for rule info, not for any other subsidiary data of the relcache
579  * entry, because that keeps the update logic in RelationClearRelation()
580  * manageable.  The other subsidiary data structures are simple enough
581  * to be easy to free explicitly, anyway.
582  */
583 static void
584 RelationBuildRuleLock(Relation relation)
585 {
586         MemoryContext rulescxt;
587         MemoryContext oldcxt;
588         HeapTuple       rewrite_tuple;
589         Relation        rewrite_desc;
590         TupleDesc       rewrite_tupdesc;
591         SysScanDesc rewrite_scan;
592         ScanKeyData key;
593         RuleLock   *rulelock;
594         int                     numlocks;
595         RewriteRule **rules;
596         int                     maxlocks;
597
598         /*
599          * Make the private context.  Parameters are set on the assumption that
600          * it'll probably not contain much data.
601          */
602         rulescxt = AllocSetContextCreate(CacheMemoryContext,
603                                                                          RelationGetRelationName(relation),
604                                                                          ALLOCSET_SMALL_MINSIZE,
605                                                                          ALLOCSET_SMALL_INITSIZE,
606                                                                          ALLOCSET_SMALL_MAXSIZE);
607         relation->rd_rulescxt = rulescxt;
608
609         /*
610          * allocate an array to hold the rewrite rules (the array is extended if
611          * necessary)
612          */
613         maxlocks = 4;
614         rules = (RewriteRule **)
615                 MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
616         numlocks = 0;
617
618         /*
619          * form a scan key
620          */
621         ScanKeyInit(&key,
622                                 Anum_pg_rewrite_ev_class,
623                                 BTEqualStrategyNumber, F_OIDEQ,
624                                 ObjectIdGetDatum(RelationGetRelid(relation)));
625
626         /*
627          * open pg_rewrite and begin a scan
628          *
629          * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
630          * be reading the rules in name order, except possibly during
631          * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
632          * ensures that rules will be fired in name order.
633          */
634         rewrite_desc = heap_open(RewriteRelationId, AccessShareLock);
635         rewrite_tupdesc = RelationGetDescr(rewrite_desc);
636         rewrite_scan = systable_beginscan(rewrite_desc,
637                                                                           RewriteRelRulenameIndexId,
638                                                                           true, SnapshotNow,
639                                                                           1, &key);
640
641         while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
642         {
643                 Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
644                 bool            isnull;
645                 Datum           rule_datum;
646                 char       *rule_str;
647                 RewriteRule *rule;
648
649                 rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
650                                                                                                   sizeof(RewriteRule));
651
652                 rule->ruleId = HeapTupleGetOid(rewrite_tuple);
653
654                 rule->event = rewrite_form->ev_type - '0';
655                 rule->attrno = rewrite_form->ev_attr;
656                 rule->enabled = rewrite_form->ev_enabled;
657                 rule->isInstead = rewrite_form->is_instead;
658
659                 /*
660                  * Must use heap_getattr to fetch ev_action and ev_qual.  Also, the
661                  * rule strings are often large enough to be toasted.  To avoid
662                  * leaking memory in the caller's context, do the detoasting here so
663                  * we can free the detoasted version.
664                  */
665                 rule_datum = heap_getattr(rewrite_tuple,
666                                                                   Anum_pg_rewrite_ev_action,
667                                                                   rewrite_tupdesc,
668                                                                   &isnull);
669                 Assert(!isnull);
670                 rule_str = TextDatumGetCString(rule_datum);
671                 oldcxt = MemoryContextSwitchTo(rulescxt);
672                 rule->actions = (List *) stringToNode(rule_str);
673                 MemoryContextSwitchTo(oldcxt);
674                 pfree(rule_str);
675
676                 rule_datum = heap_getattr(rewrite_tuple,
677                                                                   Anum_pg_rewrite_ev_qual,
678                                                                   rewrite_tupdesc,
679                                                                   &isnull);
680                 Assert(!isnull);
681                 rule_str = TextDatumGetCString(rule_datum);
682                 oldcxt = MemoryContextSwitchTo(rulescxt);
683                 rule->qual = (Node *) stringToNode(rule_str);
684                 MemoryContextSwitchTo(oldcxt);
685                 pfree(rule_str);
686
687                 /*
688                  * We want the rule's table references to be checked as though by the
689                  * table owner, not the user referencing the rule.      Therefore, scan
690                  * through the rule's actions and set the checkAsUser field on all
691                  * rtable entries.      We have to look at the qual as well, in case it
692                  * contains sublinks.
693                  *
694                  * The reason for doing this when the rule is loaded, rather than when
695                  * it is stored, is that otherwise ALTER TABLE OWNER would have to
696                  * grovel through stored rules to update checkAsUser fields. Scanning
697                  * the rule tree during load is relatively cheap (compared to
698                  * constructing it in the first place), so we do it here.
699                  */
700                 setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
701                 setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
702
703                 if (numlocks >= maxlocks)
704                 {
705                         maxlocks *= 2;
706                         rules = (RewriteRule **)
707                                 repalloc(rules, sizeof(RewriteRule *) * maxlocks);
708                 }
709                 rules[numlocks++] = rule;
710         }
711
712         /*
713          * end the scan and close the attribute relation
714          */
715         systable_endscan(rewrite_scan);
716         heap_close(rewrite_desc, AccessShareLock);
717
718         /*
719          * form a RuleLock and insert into relation
720          */
721         rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
722         rulelock->numLocks = numlocks;
723         rulelock->rules = rules;
724
725         relation->rd_rules = rulelock;
726 }
727
728 /*
729  *              equalRuleLocks
730  *
731  *              Determine whether two RuleLocks are equivalent
732  *
733  *              Probably this should be in the rules code someplace...
734  */
735 static bool
736 equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2)
737 {
738         int                     i;
739
740         /*
741          * As of 7.3 we assume the rule ordering is repeatable, because
742          * RelationBuildRuleLock should read 'em in a consistent order.  So just
743          * compare corresponding slots.
744          */
745         if (rlock1 != NULL)
746         {
747                 if (rlock2 == NULL)
748                         return false;
749                 if (rlock1->numLocks != rlock2->numLocks)
750                         return false;
751                 for (i = 0; i < rlock1->numLocks; i++)
752                 {
753                         RewriteRule *rule1 = rlock1->rules[i];
754                         RewriteRule *rule2 = rlock2->rules[i];
755
756                         if (rule1->ruleId != rule2->ruleId)
757                                 return false;
758                         if (rule1->event != rule2->event)
759                                 return false;
760                         if (rule1->attrno != rule2->attrno)
761                                 return false;
762                         if (rule1->isInstead != rule2->isInstead)
763                                 return false;
764                         if (!equal(rule1->qual, rule2->qual))
765                                 return false;
766                         if (!equal(rule1->actions, rule2->actions))
767                                 return false;
768                 }
769         }
770         else if (rlock2 != NULL)
771                 return false;
772         return true;
773 }
774
775
776 /* ----------------------------------
777  *              RelationBuildDesc
778  *
779  *              Build a relation descriptor --- either a new one, or by
780  *              recycling the given old relation object.  The latter case
781  *              supports rebuilding a relcache entry without invalidating
782  *              pointers to it.
783  *
784  *              Returns NULL if no pg_class row could be found for the given relid
785  *              (suggesting we are trying to access a just-deleted relation).
786  *              Any other error is reported via elog.
787  * --------------------------------
788  */
789 static Relation
790 RelationBuildDesc(Oid targetRelId, Relation oldrelation)
791 {
792         Relation        relation;
793         Oid                     relid;
794         HeapTuple       pg_class_tuple;
795         Form_pg_class relp;
796         MemoryContext oldcxt;
797
798         /*
799          * find the tuple in pg_class corresponding to the given relation id
800          */
801         pg_class_tuple = ScanPgRelation(targetRelId, true);
802
803         /*
804          * if no such tuple exists, return NULL
805          */
806         if (!HeapTupleIsValid(pg_class_tuple))
807                 return NULL;
808
809         /*
810          * get information from the pg_class_tuple
811          */
812         relid = HeapTupleGetOid(pg_class_tuple);
813         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
814
815         /*
816          * allocate storage for the relation descriptor, and copy pg_class_tuple
817          * to relation->rd_rel.
818          */
819         relation = AllocateRelationDesc(oldrelation, relp);
820
821         /*
822          * initialize the relation's relation id (relation->rd_id)
823          */
824         RelationGetRelid(relation) = relid;
825
826         /*
827          * normal relations are not nailed into the cache; nor can a pre-existing
828          * relation be new.  It could be temp though.  (Actually, it could be new
829          * too, but it's okay to forget that fact if forced to flush the entry.)
830          */
831         relation->rd_refcnt = 0;
832         relation->rd_isnailed = false;
833         relation->rd_createSubid = InvalidSubTransactionId;
834         relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
835         relation->rd_istemp = isTempOrToastNamespace(relation->rd_rel->relnamespace);
836
837         /*
838          * initialize the tuple descriptor (relation->rd_att).
839          */
840         RelationBuildTupleDesc(relation);
841
842         /*
843          * Fetch rules and triggers that affect this relation
844          */
845         if (relation->rd_rel->relhasrules)
846                 RelationBuildRuleLock(relation);
847         else
848         {
849                 relation->rd_rules = NULL;
850                 relation->rd_rulescxt = NULL;
851         }
852
853         if (relation->rd_rel->reltriggers > 0)
854                 RelationBuildTriggers(relation);
855         else
856                 relation->trigdesc = NULL;
857
858         /*
859          * if it's an index, initialize index-related information
860          */
861         if (OidIsValid(relation->rd_rel->relam))
862                 RelationInitIndexAccessInfo(relation);
863
864         /* extract reloptions if any */
865         RelationParseRelOptions(relation, pg_class_tuple);
866
867         /*
868          * initialize the relation lock manager information
869          */
870         RelationInitLockInfo(relation);         /* see lmgr.c */
871
872         /*
873          * initialize physical addressing information for the relation
874          */
875         RelationInitPhysicalAddr(relation);
876
877         /* make sure relation is marked as having no open file yet */
878         relation->rd_smgr = NULL;
879
880         /*
881          * now we can free the memory allocated for pg_class_tuple
882          */
883         heap_freetuple(pg_class_tuple);
884
885         /*
886          * Insert newly created relation into relcache hash tables.
887          */
888         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
889         RelationCacheInsert(relation);
890         MemoryContextSwitchTo(oldcxt);
891
892         /* It's fully valid */
893         relation->rd_isvalid = true;
894
895         return relation;
896 }
897
898 /*
899  * Initialize the physical addressing info (RelFileNode) for a relcache entry
900  */
901 static void
902 RelationInitPhysicalAddr(Relation relation)
903 {
904         if (relation->rd_rel->reltablespace)
905                 relation->rd_node.spcNode = relation->rd_rel->reltablespace;
906         else
907                 relation->rd_node.spcNode = MyDatabaseTableSpace;
908         if (relation->rd_rel->relisshared)
909                 relation->rd_node.dbNode = InvalidOid;
910         else
911                 relation->rd_node.dbNode = MyDatabaseId;
912         relation->rd_node.relNode = relation->rd_rel->relfilenode;
913 }
914
915 /*
916  * Initialize index-access-method support data for an index relation
917  */
918 void
919 RelationInitIndexAccessInfo(Relation relation)
920 {
921         HeapTuple       tuple;
922         Form_pg_am      aform;
923         Datum           indclassDatum;
924         Datum           indoptionDatum;
925         bool            isnull;
926         oidvector  *indclass;
927         int2vector *indoption;
928         MemoryContext indexcxt;
929         MemoryContext oldcontext;
930         int                     natts;
931         uint16          amstrategies;
932         uint16          amsupport;
933
934         /*
935          * Make a copy of the pg_index entry for the index.  Since pg_index
936          * contains variable-length and possibly-null fields, we have to do this
937          * honestly rather than just treating it as a Form_pg_index struct.
938          */
939         tuple = SearchSysCache(INDEXRELID,
940                                                    ObjectIdGetDatum(RelationGetRelid(relation)),
941                                                    0, 0, 0);
942         if (!HeapTupleIsValid(tuple))
943                 elog(ERROR, "cache lookup failed for index %u",
944                          RelationGetRelid(relation));
945         oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
946         relation->rd_indextuple = heap_copytuple(tuple);
947         relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
948         MemoryContextSwitchTo(oldcontext);
949         ReleaseSysCache(tuple);
950
951         /*
952          * Make a copy of the pg_am entry for the index's access method
953          */
954         tuple = SearchSysCache(AMOID,
955                                                    ObjectIdGetDatum(relation->rd_rel->relam),
956                                                    0, 0, 0);
957         if (!HeapTupleIsValid(tuple))
958                 elog(ERROR, "cache lookup failed for access method %u",
959                          relation->rd_rel->relam);
960         aform = (Form_pg_am) MemoryContextAlloc(CacheMemoryContext, sizeof *aform);
961         memcpy(aform, GETSTRUCT(tuple), sizeof *aform);
962         ReleaseSysCache(tuple);
963         relation->rd_am = aform;
964
965         natts = relation->rd_rel->relnatts;
966         if (natts != relation->rd_index->indnatts)
967                 elog(ERROR, "relnatts disagrees with indnatts for index %u",
968                          RelationGetRelid(relation));
969         amstrategies = aform->amstrategies;
970         amsupport = aform->amsupport;
971
972         /*
973          * Make the private context to hold index access info.  The reason we need
974          * a context, and not just a couple of pallocs, is so that we won't leak
975          * any subsidiary info attached to fmgr lookup records.
976          *
977          * Context parameters are set on the assumption that it'll probably not
978          * contain much data.
979          */
980         indexcxt = AllocSetContextCreate(CacheMemoryContext,
981                                                                          RelationGetRelationName(relation),
982                                                                          ALLOCSET_SMALL_MINSIZE,
983                                                                          ALLOCSET_SMALL_INITSIZE,
984                                                                          ALLOCSET_SMALL_MAXSIZE);
985         relation->rd_indexcxt = indexcxt;
986
987         /*
988          * Allocate arrays to hold data
989          */
990         relation->rd_aminfo = (RelationAmInfo *)
991                 MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
992
993         relation->rd_opfamily = (Oid *)
994                 MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
995         relation->rd_opcintype = (Oid *)
996                 MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
997
998         if (amstrategies > 0)
999                 relation->rd_operator = (Oid *)
1000                         MemoryContextAllocZero(indexcxt,
1001                                                                    natts * amstrategies * sizeof(Oid));
1002         else
1003                 relation->rd_operator = NULL;
1004
1005         if (amsupport > 0)
1006         {
1007                 int                     nsupport = natts * amsupport;
1008
1009                 relation->rd_support = (RegProcedure *)
1010                         MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1011                 relation->rd_supportinfo = (FmgrInfo *)
1012                         MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1013         }
1014         else
1015         {
1016                 relation->rd_support = NULL;
1017                 relation->rd_supportinfo = NULL;
1018         }
1019
1020         relation->rd_indoption = (int16 *)
1021                 MemoryContextAllocZero(indexcxt, natts * sizeof(int16));
1022
1023         /*
1024          * indclass cannot be referenced directly through the C struct, because it
1025          * comes after the variable-width indkey field.  Must extract the datum
1026          * the hard way...
1027          */
1028         indclassDatum = fastgetattr(relation->rd_indextuple,
1029                                                                 Anum_pg_index_indclass,
1030                                                                 GetPgIndexDescriptor(),
1031                                                                 &isnull);
1032         Assert(!isnull);
1033         indclass = (oidvector *) DatumGetPointer(indclassDatum);
1034
1035         /*
1036          * Fill the operator and support procedure OID arrays, as well as the info
1037          * about opfamilies and opclass input types.  (aminfo and supportinfo are
1038          * left as zeroes, and are filled on-the-fly when used)
1039          */
1040         IndexSupportInitialize(indclass,
1041                                                    relation->rd_operator, relation->rd_support,
1042                                                    relation->rd_opfamily, relation->rd_opcintype,
1043                                                    amstrategies, amsupport, natts);
1044
1045         /*
1046          * Similarly extract indoption and copy it to the cache entry
1047          */
1048         indoptionDatum = fastgetattr(relation->rd_indextuple,
1049                                                                  Anum_pg_index_indoption,
1050                                                                  GetPgIndexDescriptor(),
1051                                                                  &isnull);
1052         Assert(!isnull);
1053         indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1054         memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16));
1055
1056         /*
1057          * expressions and predicate cache will be filled later
1058          */
1059         relation->rd_indexprs = NIL;
1060         relation->rd_indpred = NIL;
1061         relation->rd_amcache = NULL;
1062 }
1063
1064 /*
1065  * IndexSupportInitialize
1066  *              Initializes an index's cached opclass information,
1067  *              given the index's pg_index.indclass entry.
1068  *
1069  * Data is returned into *indexOperator, *indexSupport, *opFamily, and
1070  * *opcInType, which are arrays allocated by the caller.
1071  *
1072  * The caller also passes maxStrategyNumber, maxSupportNumber, and
1073  * maxAttributeNumber, since these indicate the size of the arrays
1074  * it has allocated --- but in practice these numbers must always match
1075  * those obtainable from the system catalog entries for the index and
1076  * access method.
1077  */
1078 static void
1079 IndexSupportInitialize(oidvector *indclass,
1080                                            Oid *indexOperator,
1081                                            RegProcedure *indexSupport,
1082                                            Oid *opFamily,
1083                                            Oid *opcInType,
1084                                            StrategyNumber maxStrategyNumber,
1085                                            StrategyNumber maxSupportNumber,
1086                                            AttrNumber maxAttributeNumber)
1087 {
1088         int                     attIndex;
1089
1090         for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1091         {
1092                 OpClassCacheEnt *opcentry;
1093
1094                 if (!OidIsValid(indclass->values[attIndex]))
1095                         elog(ERROR, "bogus pg_index tuple");
1096
1097                 /* look up the info for this opclass, using a cache */
1098                 opcentry = LookupOpclassInfo(indclass->values[attIndex],
1099                                                                          maxStrategyNumber,
1100                                                                          maxSupportNumber);
1101
1102                 /* copy cached data into relcache entry */
1103                 opFamily[attIndex] = opcentry->opcfamily;
1104                 opcInType[attIndex] = opcentry->opcintype;
1105                 if (maxStrategyNumber > 0)
1106                         memcpy(&indexOperator[attIndex * maxStrategyNumber],
1107                                    opcentry->operatorOids,
1108                                    maxStrategyNumber * sizeof(Oid));
1109                 if (maxSupportNumber > 0)
1110                         memcpy(&indexSupport[attIndex * maxSupportNumber],
1111                                    opcentry->supportProcs,
1112                                    maxSupportNumber * sizeof(RegProcedure));
1113         }
1114 }
1115
1116 /*
1117  * LookupOpclassInfo
1118  *
1119  * This routine maintains a per-opclass cache of the information needed
1120  * by IndexSupportInitialize().  This is more efficient than relying on
1121  * the catalog cache, because we can load all the info about a particular
1122  * opclass in a single indexscan of pg_amproc or pg_amop.
1123  *
1124  * The information from pg_am about expected range of strategy and support
1125  * numbers is passed in, rather than being looked up, mainly because the
1126  * caller will have it already.
1127  *
1128  * Note there is no provision for flushing the cache.  This is OK at the
1129  * moment because there is no way to ALTER any interesting properties of an
1130  * existing opclass --- all you can do is drop it, which will result in
1131  * a useless but harmless dead entry in the cache.  To support altering
1132  * opclass membership (not the same as opfamily membership!), we'd need to
1133  * be able to flush this cache as well as the contents of relcache entries
1134  * for indexes.
1135  */
1136 static OpClassCacheEnt *
1137 LookupOpclassInfo(Oid operatorClassOid,
1138                                   StrategyNumber numStrats,
1139                                   StrategyNumber numSupport)
1140 {
1141         OpClassCacheEnt *opcentry;
1142         bool            found;
1143         Relation        rel;
1144         SysScanDesc scan;
1145         ScanKeyData skey[3];
1146         HeapTuple       htup;
1147         bool            indexOK;
1148
1149         if (OpClassCache == NULL)
1150         {
1151                 /* First time through: initialize the opclass cache */
1152                 HASHCTL         ctl;
1153
1154                 if (!CacheMemoryContext)
1155                         CreateCacheMemoryContext();
1156
1157                 MemSet(&ctl, 0, sizeof(ctl));
1158                 ctl.keysize = sizeof(Oid);
1159                 ctl.entrysize = sizeof(OpClassCacheEnt);
1160                 ctl.hash = oid_hash;
1161                 OpClassCache = hash_create("Operator class cache", 64,
1162                                                                    &ctl, HASH_ELEM | HASH_FUNCTION);
1163         }
1164
1165         opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1166                                                                                            (void *) &operatorClassOid,
1167                                                                                            HASH_ENTER, &found);
1168
1169         if (!found)
1170         {
1171                 /* Need to allocate memory for new entry */
1172                 opcentry->valid = false;        /* until known OK */
1173                 opcentry->numStrats = numStrats;
1174                 opcentry->numSupport = numSupport;
1175
1176                 if (numStrats > 0)
1177                         opcentry->operatorOids = (Oid *)
1178                                 MemoryContextAllocZero(CacheMemoryContext,
1179                                                                            numStrats * sizeof(Oid));
1180                 else
1181                         opcentry->operatorOids = NULL;
1182
1183                 if (numSupport > 0)
1184                         opcentry->supportProcs = (RegProcedure *)
1185                                 MemoryContextAllocZero(CacheMemoryContext,
1186                                                                            numSupport * sizeof(RegProcedure));
1187                 else
1188                         opcentry->supportProcs = NULL;
1189         }
1190         else
1191         {
1192                 Assert(numStrats == opcentry->numStrats);
1193                 Assert(numSupport == opcentry->numSupport);
1194         }
1195
1196         /*
1197          * When testing for cache-flush hazards, we intentionally disable the
1198          * operator class cache and force reloading of the info on each call.
1199          * This is helpful because we want to test the case where a cache flush
1200          * occurs while we are loading the info, and it's very hard to provoke
1201          * that if this happens only once per opclass per backend.
1202          */
1203 #if defined(CLOBBER_CACHE_ALWAYS)
1204         opcentry->valid = false;
1205 #endif
1206
1207         if (opcentry->valid)
1208                 return opcentry;
1209
1210         /*
1211          * Need to fill in new entry.
1212          *
1213          * To avoid infinite recursion during startup, force heap scans if we're
1214          * looking up info for the opclasses used by the indexes we would like to
1215          * reference here.
1216          */
1217         indexOK = criticalRelcachesBuilt ||
1218                 (operatorClassOid != OID_BTREE_OPS_OID &&
1219                  operatorClassOid != INT2_BTREE_OPS_OID);
1220
1221         /*
1222          * We have to fetch the pg_opclass row to determine its opfamily and
1223          * opcintype, which are needed to look up the operators and functions.
1224          * It'd be convenient to use the syscache here, but that probably doesn't
1225          * work while bootstrapping.
1226          */
1227         ScanKeyInit(&skey[0],
1228                                 ObjectIdAttributeNumber,
1229                                 BTEqualStrategyNumber, F_OIDEQ,
1230                                 ObjectIdGetDatum(operatorClassOid));
1231         rel = heap_open(OperatorClassRelationId, AccessShareLock);
1232         scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1233                                                           SnapshotNow, 1, skey);
1234
1235         if (HeapTupleIsValid(htup = systable_getnext(scan)))
1236         {
1237                 Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1238
1239                 opcentry->opcfamily = opclassform->opcfamily;
1240                 opcentry->opcintype = opclassform->opcintype;
1241         }
1242         else
1243                 elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1244
1245         systable_endscan(scan);
1246         heap_close(rel, AccessShareLock);
1247
1248
1249         /*
1250          * Scan pg_amop to obtain operators for the opclass.  We only fetch the
1251          * default ones (those with lefttype = righttype = opcintype).
1252          */
1253         if (numStrats > 0)
1254         {
1255                 ScanKeyInit(&skey[0],
1256                                         Anum_pg_amop_amopfamily,
1257                                         BTEqualStrategyNumber, F_OIDEQ,
1258                                         ObjectIdGetDatum(opcentry->opcfamily));
1259                 ScanKeyInit(&skey[1],
1260                                         Anum_pg_amop_amoplefttype,
1261                                         BTEqualStrategyNumber, F_OIDEQ,
1262                                         ObjectIdGetDatum(opcentry->opcintype));
1263                 ScanKeyInit(&skey[2],
1264                                         Anum_pg_amop_amoprighttype,
1265                                         BTEqualStrategyNumber, F_OIDEQ,
1266                                         ObjectIdGetDatum(opcentry->opcintype));
1267                 rel = heap_open(AccessMethodOperatorRelationId, AccessShareLock);
1268                 scan = systable_beginscan(rel, AccessMethodStrategyIndexId, indexOK,
1269                                                                   SnapshotNow, 3, skey);
1270
1271                 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1272                 {
1273                         Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(htup);
1274
1275                         if (amopform->amopstrategy <= 0 ||
1276                                 (StrategyNumber) amopform->amopstrategy > numStrats)
1277                                 elog(ERROR, "invalid amopstrategy number %d for opclass %u",
1278                                          amopform->amopstrategy, operatorClassOid);
1279                         opcentry->operatorOids[amopform->amopstrategy - 1] =
1280                                 amopform->amopopr;
1281                 }
1282
1283                 systable_endscan(scan);
1284                 heap_close(rel, AccessShareLock);
1285         }
1286
1287         /*
1288          * Scan pg_amproc to obtain support procs for the opclass.      We only fetch
1289          * the default ones (those with lefttype = righttype = opcintype).
1290          */
1291         if (numSupport > 0)
1292         {
1293                 ScanKeyInit(&skey[0],
1294                                         Anum_pg_amproc_amprocfamily,
1295                                         BTEqualStrategyNumber, F_OIDEQ,
1296                                         ObjectIdGetDatum(opcentry->opcfamily));
1297                 ScanKeyInit(&skey[1],
1298                                         Anum_pg_amproc_amproclefttype,
1299                                         BTEqualStrategyNumber, F_OIDEQ,
1300                                         ObjectIdGetDatum(opcentry->opcintype));
1301                 ScanKeyInit(&skey[2],
1302                                         Anum_pg_amproc_amprocrighttype,
1303                                         BTEqualStrategyNumber, F_OIDEQ,
1304                                         ObjectIdGetDatum(opcentry->opcintype));
1305                 rel = heap_open(AccessMethodProcedureRelationId, AccessShareLock);
1306                 scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1307                                                                   SnapshotNow, 3, skey);
1308
1309                 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1310                 {
1311                         Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1312
1313                         if (amprocform->amprocnum <= 0 ||
1314                                 (StrategyNumber) amprocform->amprocnum > numSupport)
1315                                 elog(ERROR, "invalid amproc number %d for opclass %u",
1316                                          amprocform->amprocnum, operatorClassOid);
1317
1318                         opcentry->supportProcs[amprocform->amprocnum - 1] =
1319                                 amprocform->amproc;
1320                 }
1321
1322                 systable_endscan(scan);
1323                 heap_close(rel, AccessShareLock);
1324         }
1325
1326         opcentry->valid = true;
1327         return opcentry;
1328 }
1329
1330
1331 /*
1332  *              formrdesc
1333  *
1334  *              This is a special cut-down version of RelationBuildDesc()
1335  *              used by RelationCacheInitializePhase2() in initializing the relcache.
1336  *              The relation descriptor is built just from the supplied parameters,
1337  *              without actually looking at any system table entries.  We cheat
1338  *              quite a lot since we only need to work for a few basic system
1339  *              catalogs.
1340  *
1341  * formrdesc is currently used for: pg_class, pg_attribute, pg_proc,
1342  * and pg_type (see RelationCacheInitializePhase2).
1343  *
1344  * Note that these catalogs can't have constraints (except attnotnull),
1345  * default values, rules, or triggers, since we don't cope with any of that.
1346  *
1347  * NOTE: we assume we are already switched into CacheMemoryContext.
1348  */
1349 static void
1350 formrdesc(const char *relationName, Oid relationReltype,
1351                   bool hasoids, int natts, FormData_pg_attribute *att)
1352 {
1353         Relation        relation;
1354         int                     i;
1355         bool            has_not_null;
1356
1357         /*
1358          * allocate new relation desc, clear all fields of reldesc
1359          */
1360         relation = (Relation) palloc0(sizeof(RelationData));
1361         relation->rd_targblock = InvalidBlockNumber;
1362
1363         /* make sure relation is marked as having no open file yet */
1364         relation->rd_smgr = NULL;
1365
1366         /*
1367          * initialize reference count: 1 because it is nailed in cache
1368          */
1369         relation->rd_refcnt = 1;
1370
1371         /*
1372          * all entries built with this routine are nailed-in-cache; none are for
1373          * new or temp relations.
1374          */
1375         relation->rd_isnailed = true;
1376         relation->rd_createSubid = InvalidSubTransactionId;
1377         relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1378         relation->rd_istemp = false;
1379
1380         /*
1381          * initialize relation tuple form
1382          *
1383          * The data we insert here is pretty incomplete/bogus, but it'll serve to
1384          * get us launched.  RelationCacheInitializePhase2() will read the real
1385          * data from pg_class and replace what we've done here.
1386          */
1387         relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
1388
1389         namestrcpy(&relation->rd_rel->relname, relationName);
1390         relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1391         relation->rd_rel->reltype = relationReltype;
1392
1393         /*
1394          * It's important to distinguish between shared and non-shared relations,
1395          * even at bootstrap time, to make sure we know where they are stored.  At
1396          * present, all relations that formrdesc is used for are not shared.
1397          */
1398         relation->rd_rel->relisshared = false;
1399
1400         relation->rd_rel->relpages = 1;
1401         relation->rd_rel->reltuples = 1;
1402         relation->rd_rel->relkind = RELKIND_RELATION;
1403         relation->rd_rel->relhasoids = hasoids;
1404         relation->rd_rel->relnatts = (int16) natts;
1405
1406         /*
1407          * initialize attribute tuple form
1408          *
1409          * Unlike the case with the relation tuple, this data had better be right
1410          * because it will never be replaced.  The input values must be correctly
1411          * defined by macros in src/include/catalog/ headers.
1412          */
1413         relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1414         relation->rd_att->tdrefcount = 1;       /* mark as refcounted */
1415
1416         relation->rd_att->tdtypeid = relationReltype;
1417         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
1418
1419         /*
1420          * initialize tuple desc info
1421          */
1422         has_not_null = false;
1423         for (i = 0; i < natts; i++)
1424         {
1425                 memcpy(relation->rd_att->attrs[i],
1426                            &att[i],
1427                            ATTRIBUTE_TUPLE_SIZE);
1428                 has_not_null |= att[i].attnotnull;
1429                 /* make sure attcacheoff is valid */
1430                 relation->rd_att->attrs[i]->attcacheoff = -1;
1431         }
1432
1433         /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1434         relation->rd_att->attrs[0]->attcacheoff = 0;
1435
1436         /* mark not-null status */
1437         if (has_not_null)
1438         {
1439                 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1440
1441                 constr->has_not_null = true;
1442                 relation->rd_att->constr = constr;
1443         }
1444
1445         /*
1446          * initialize relation id from info in att array (my, this is ugly)
1447          */
1448         RelationGetRelid(relation) = relation->rd_att->attrs[0]->attrelid;
1449         relation->rd_rel->relfilenode = RelationGetRelid(relation);
1450
1451         /*
1452          * initialize the relation lock manager information
1453          */
1454         RelationInitLockInfo(relation);         /* see lmgr.c */
1455
1456         /*
1457          * initialize physical addressing information for the relation
1458          */
1459         RelationInitPhysicalAddr(relation);
1460
1461         /*
1462          * initialize the rel-has-index flag, using hardwired knowledge
1463          */
1464         if (IsBootstrapProcessingMode())
1465         {
1466                 /* In bootstrap mode, we have no indexes */
1467                 relation->rd_rel->relhasindex = false;
1468         }
1469         else
1470         {
1471                 /* Otherwise, all the rels formrdesc is used for have indexes */
1472                 relation->rd_rel->relhasindex = true;
1473         }
1474
1475         /*
1476          * add new reldesc to relcache
1477          */
1478         RelationCacheInsert(relation);
1479
1480         /* It's fully valid */
1481         relation->rd_isvalid = true;
1482 }
1483
1484
1485 /* ----------------------------------------------------------------
1486  *                               Relation Descriptor Lookup Interface
1487  * ----------------------------------------------------------------
1488  */
1489
1490 /*
1491  *              RelationIdGetRelation
1492  *
1493  *              Lookup a reldesc by OID; make one if not already in cache.
1494  *
1495  *              Returns NULL if no pg_class row could be found for the given relid
1496  *              (suggesting we are trying to access a just-deleted relation).
1497  *              Any other error is reported via elog.
1498  *
1499  *              NB: caller should already have at least AccessShareLock on the
1500  *              relation ID, else there are nasty race conditions.
1501  *
1502  *              NB: relation ref count is incremented, or set to 1 if new entry.
1503  *              Caller should eventually decrement count.  (Usually,
1504  *              that happens by calling RelationClose().)
1505  */
1506 Relation
1507 RelationIdGetRelation(Oid relationId)
1508 {
1509         Relation        rd;
1510
1511         /*
1512          * first try to find reldesc in the cache
1513          */
1514         RelationIdCacheLookup(relationId, rd);
1515
1516         if (RelationIsValid(rd))
1517         {
1518                 RelationIncrementReferenceCount(rd);
1519                 /* revalidate nailed index if necessary */
1520                 if (!rd->rd_isvalid)
1521                         RelationReloadIndexInfo(rd);
1522                 return rd;
1523         }
1524
1525         /*
1526          * no reldesc in the cache, so have RelationBuildDesc() build one and add
1527          * it.
1528          */
1529         rd = RelationBuildDesc(relationId, NULL);
1530         if (RelationIsValid(rd))
1531                 RelationIncrementReferenceCount(rd);
1532         return rd;
1533 }
1534
1535 /* ----------------------------------------------------------------
1536  *                              cache invalidation support routines
1537  * ----------------------------------------------------------------
1538  */
1539
1540 /*
1541  * RelationIncrementReferenceCount
1542  *              Increments relation reference count.
1543  *
1544  * Note: bootstrap mode has its own weird ideas about relation refcount
1545  * behavior; we ought to fix it someday, but for now, just disable
1546  * reference count ownership tracking in bootstrap mode.
1547  */
1548 void
1549 RelationIncrementReferenceCount(Relation rel)
1550 {
1551         ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
1552         rel->rd_refcnt += 1;
1553         if (!IsBootstrapProcessingMode())
1554                 ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
1555 }
1556
1557 /*
1558  * RelationDecrementReferenceCount
1559  *              Decrements relation reference count.
1560  */
1561 void
1562 RelationDecrementReferenceCount(Relation rel)
1563 {
1564         Assert(rel->rd_refcnt > 0);
1565         rel->rd_refcnt -= 1;
1566         if (!IsBootstrapProcessingMode())
1567                 ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
1568 }
1569
1570 /*
1571  * RelationClose - close an open relation
1572  *
1573  *      Actually, we just decrement the refcount.
1574  *
1575  *      NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
1576  *      will be freed as soon as their refcount goes to zero.  In combination
1577  *      with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
1578  *      to catch references to already-released relcache entries.  It slows
1579  *      things down quite a bit, however.
1580  */
1581 void
1582 RelationClose(Relation relation)
1583 {
1584         /* Note: no locking manipulations needed */
1585         RelationDecrementReferenceCount(relation);
1586
1587 #ifdef RELCACHE_FORCE_RELEASE
1588         if (RelationHasReferenceCountZero(relation) &&
1589                 relation->rd_createSubid == InvalidSubTransactionId &&
1590                 relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
1591                 RelationClearRelation(relation, false);
1592 #endif
1593 }
1594
1595 /*
1596  * RelationReloadIndexInfo - reload minimal information for an open index
1597  *
1598  *      This function is used only for indexes.  A relcache inval on an index
1599  *      can mean that its pg_class or pg_index row changed.  There are only
1600  *      very limited changes that are allowed to an existing index's schema,
1601  *      so we can update the relcache entry without a complete rebuild; which
1602  *      is fortunate because we can't rebuild an index entry that is "nailed"
1603  *      and/or in active use.  We support full replacement of the pg_class row,
1604  *      as well as updates of a few simple fields of the pg_index row.
1605  *
1606  *      We can't necessarily reread the catalog rows right away; we might be
1607  *      in a failed transaction when we receive the SI notification.  If so,
1608  *      RelationClearRelation just marks the entry as invalid by setting
1609  *      rd_isvalid to false.  This routine is called to fix the entry when it
1610  *      is next needed.
1611  */
1612 static void
1613 RelationReloadIndexInfo(Relation relation)
1614 {
1615         bool            indexOK;
1616         HeapTuple       pg_class_tuple;
1617         Form_pg_class relp;
1618
1619         /* Should be called only for invalidated indexes */
1620         Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
1621                    !relation->rd_isvalid);
1622         /* Should be closed at smgr level */
1623         Assert(relation->rd_smgr == NULL);
1624
1625         /*
1626          * Read the pg_class row
1627          *
1628          * Don't try to use an indexscan of pg_class_oid_index to reload the info
1629          * for pg_class_oid_index ...
1630          */
1631         indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
1632         pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK);
1633         if (!HeapTupleIsValid(pg_class_tuple))
1634                 elog(ERROR, "could not find pg_class tuple for index %u",
1635                          RelationGetRelid(relation));
1636         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1637         memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
1638         /* Reload reloptions in case they changed */
1639         if (relation->rd_options)
1640                 pfree(relation->rd_options);
1641         RelationParseRelOptions(relation, pg_class_tuple);
1642         /* done with pg_class tuple */
1643         heap_freetuple(pg_class_tuple);
1644         /* We must recalculate physical address in case it changed */
1645         RelationInitPhysicalAddr(relation);
1646         /* Make sure targblock is reset in case rel was truncated */
1647         relation->rd_targblock = InvalidBlockNumber;
1648         /* Must free any AM cached data, too */
1649         if (relation->rd_amcache)
1650                 pfree(relation->rd_amcache);
1651         relation->rd_amcache = NULL;
1652
1653         /*
1654          * For a non-system index, there are fields of the pg_index row that are
1655          * allowed to change, so re-read that row and update the relcache entry.
1656          * Most of the info derived from pg_index (such as support function lookup
1657          * info) cannot change, and indeed the whole point of this routine is to
1658          * update the relcache entry without clobbering that data; so wholesale
1659          * replacement is not appropriate.
1660          */
1661         if (!IsSystemRelation(relation))
1662         {
1663                 HeapTuple       tuple;
1664                 Form_pg_index index;
1665
1666                 tuple = SearchSysCache(INDEXRELID,
1667                                                            ObjectIdGetDatum(RelationGetRelid(relation)),
1668                                                            0, 0, 0);
1669                 if (!HeapTupleIsValid(tuple))
1670                         elog(ERROR, "cache lookup failed for index %u",
1671                                  RelationGetRelid(relation));
1672                 index = (Form_pg_index) GETSTRUCT(tuple);
1673
1674                 relation->rd_index->indisvalid = index->indisvalid;
1675                 relation->rd_index->indcheckxmin = index->indcheckxmin;
1676                 relation->rd_index->indisready = index->indisready;
1677                 HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data,
1678                                                            HeapTupleHeaderGetXmin(tuple->t_data));
1679
1680                 ReleaseSysCache(tuple);
1681         }
1682
1683         /* Okay, now it's valid again */
1684         relation->rd_isvalid = true;
1685 }
1686
1687 /*
1688  * RelationClearRelation
1689  *
1690  *       Physically blow away a relation cache entry, or reset it and rebuild
1691  *       it from scratch (that is, from catalog entries).  The latter path is
1692  *       usually used when we are notified of a change to an open relation
1693  *       (one with refcount > 0).  However, this routine just does whichever
1694  *       it's told to do; callers must determine which they want.
1695  */
1696 static void
1697 RelationClearRelation(Relation relation, bool rebuild)
1698 {
1699         Oid                     old_reltype = relation->rd_rel->reltype;
1700         MemoryContext oldcxt;
1701
1702         /*
1703          * Make sure smgr and lower levels close the relation's files, if they
1704          * weren't closed already.  If the relation is not getting deleted, the
1705          * next smgr access should reopen the files automatically.      This ensures
1706          * that the low-level file access state is updated after, say, a vacuum
1707          * truncation.
1708          */
1709         RelationCloseSmgr(relation);
1710
1711         /*
1712          * Never, never ever blow away a nailed-in system relation, because we'd
1713          * be unable to recover.  However, we must reset rd_targblock, in case we
1714          * got called because of a relation cache flush that was triggered by
1715          * VACUUM.
1716          *
1717          * If it's a nailed index, then we need to re-read the pg_class row to see
1718          * if its relfilenode changed.  We can't necessarily do that here, because
1719          * we might be in a failed transaction.  We assume it's okay to do it if
1720          * there are open references to the relcache entry (cf notes for
1721          * AtEOXact_RelationCache).  Otherwise just mark the entry as possibly
1722          * invalid, and it'll be fixed when next opened.
1723          */
1724         if (relation->rd_isnailed)
1725         {
1726                 relation->rd_targblock = InvalidBlockNumber;
1727                 if (relation->rd_rel->relkind == RELKIND_INDEX)
1728                 {
1729                         relation->rd_isvalid = false;           /* needs to be revalidated */
1730                         if (relation->rd_refcnt > 1)
1731                                 RelationReloadIndexInfo(relation);
1732                 }
1733                 return;
1734         }
1735
1736         /*
1737          * Even non-system indexes should not be blown away if they are open and
1738          * have valid index support information.  This avoids problems with active
1739          * use of the index support information.  As with nailed indexes, we
1740          * re-read the pg_class row to handle possible physical relocation of the
1741          * index, and we check for pg_index updates too.
1742          */
1743         if (relation->rd_rel->relkind == RELKIND_INDEX &&
1744                 relation->rd_refcnt > 0 &&
1745                 relation->rd_indexcxt != NULL)
1746         {
1747                 relation->rd_isvalid = false;   /* needs to be revalidated */
1748                 RelationReloadIndexInfo(relation);
1749                 return;
1750         }
1751
1752         /*
1753          * Remove relation from hash tables
1754          *
1755          * Note: we might be reinserting it momentarily, but we must not have it
1756          * visible in the hash tables until it's valid again, so don't try to
1757          * optimize this away...
1758          */
1759         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
1760         RelationCacheDelete(relation);
1761         MemoryContextSwitchTo(oldcxt);
1762
1763         /* Clear out catcache's entries for this relation */
1764         CatalogCacheFlushRelation(RelationGetRelid(relation));
1765
1766         /*
1767          * Free all the subsidiary data structures of the relcache entry. We
1768          * cannot free rd_att if we are trying to rebuild the entry, however,
1769          * because pointers to it may be cached in various places. The rule
1770          * manager might also have pointers into the rewrite rules. So to begin
1771          * with, we can only get rid of these fields:
1772          */
1773         FreeTriggerDesc(relation->trigdesc);
1774         if (relation->rd_indextuple)
1775                 pfree(relation->rd_indextuple);
1776         if (relation->rd_am)
1777                 pfree(relation->rd_am);
1778         if (relation->rd_rel)
1779                 pfree(relation->rd_rel);
1780         if (relation->rd_options)
1781                 pfree(relation->rd_options);
1782         list_free(relation->rd_indexlist);
1783         bms_free(relation->rd_indexattr);
1784         if (relation->rd_indexcxt)
1785                 MemoryContextDelete(relation->rd_indexcxt);
1786
1787         /*
1788          * If we're really done with the relcache entry, blow it away. But if
1789          * someone is still using it, reconstruct the whole deal without moving
1790          * the physical RelationData record (so that the someone's pointer is
1791          * still valid).
1792          */
1793         if (!rebuild)
1794         {
1795                 /* ok to zap remaining substructure */
1796                 flush_rowtype_cache(old_reltype);
1797                 /* can't use DecrTupleDescRefCount here */
1798                 Assert(relation->rd_att->tdrefcount > 0);
1799                 if (--relation->rd_att->tdrefcount == 0)
1800                         FreeTupleDesc(relation->rd_att);
1801                 if (relation->rd_rulescxt)
1802                         MemoryContextDelete(relation->rd_rulescxt);
1803                 pfree(relation);
1804         }
1805         else
1806         {
1807                 /*
1808                  * When rebuilding an open relcache entry, must preserve ref count and
1809                  * rd_createSubid/rd_newRelfilenodeSubid state.  Also attempt to
1810                  * preserve the tupledesc and rewrite-rule substructures in place.
1811                  * (Note: the refcount mechanism for tupledescs may eventually ensure
1812                  * that we don't really need to preserve the tupledesc in-place, but
1813                  * for now there are still a lot of places that assume an open rel's
1814                  * tupledesc won't move.)
1815                  *
1816                  * Note that this process does not touch CurrentResourceOwner; which
1817                  * is good because whatever ref counts the entry may have do not
1818                  * necessarily belong to that resource owner.
1819                  */
1820                 Oid                     save_relid = RelationGetRelid(relation);
1821                 int                     old_refcnt = relation->rd_refcnt;
1822                 SubTransactionId old_createSubid = relation->rd_createSubid;
1823                 SubTransactionId old_newRelfilenodeSubid = relation->rd_newRelfilenodeSubid;
1824                 struct PgStat_TableStatus *old_pgstat_info = relation->pgstat_info;
1825                 TupleDesc       old_att = relation->rd_att;
1826                 RuleLock   *old_rules = relation->rd_rules;
1827                 MemoryContext old_rulescxt = relation->rd_rulescxt;
1828
1829                 if (RelationBuildDesc(save_relid, relation) != relation)
1830                 {
1831                         /* Should only get here if relation was deleted */
1832                         flush_rowtype_cache(old_reltype);
1833                         Assert(old_att->tdrefcount > 0);
1834                         if (--old_att->tdrefcount == 0)
1835                                 FreeTupleDesc(old_att);
1836                         if (old_rulescxt)
1837                                 MemoryContextDelete(old_rulescxt);
1838                         pfree(relation);
1839                         elog(ERROR, "relation %u deleted while still in use", save_relid);
1840                 }
1841                 relation->rd_refcnt = old_refcnt;
1842                 relation->rd_createSubid = old_createSubid;
1843                 relation->rd_newRelfilenodeSubid = old_newRelfilenodeSubid;
1844                 relation->pgstat_info = old_pgstat_info;
1845
1846                 if (equalTupleDescs(old_att, relation->rd_att))
1847                 {
1848                         /* needn't flush typcache here */
1849                         Assert(relation->rd_att->tdrefcount == 1);
1850                         if (--relation->rd_att->tdrefcount == 0)
1851                                 FreeTupleDesc(relation->rd_att);
1852                         relation->rd_att = old_att;
1853                 }
1854                 else
1855                 {
1856                         flush_rowtype_cache(old_reltype);
1857                         Assert(old_att->tdrefcount > 0);
1858                         if (--old_att->tdrefcount == 0)
1859                                 FreeTupleDesc(old_att);
1860                 }
1861                 if (equalRuleLocks(old_rules, relation->rd_rules))
1862                 {
1863                         if (relation->rd_rulescxt)
1864                                 MemoryContextDelete(relation->rd_rulescxt);
1865                         relation->rd_rules = old_rules;
1866                         relation->rd_rulescxt = old_rulescxt;
1867                 }
1868                 else
1869                 {
1870                         if (old_rulescxt)
1871                                 MemoryContextDelete(old_rulescxt);
1872                 }
1873         }
1874 }
1875
1876 /*
1877  * RelationFlushRelation
1878  *
1879  *       Rebuild the relation if it is open (refcount > 0), else blow it away.
1880  */
1881 static void
1882 RelationFlushRelation(Relation relation)
1883 {
1884         bool            rebuild;
1885
1886         if (relation->rd_createSubid != InvalidSubTransactionId ||
1887                 relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
1888         {
1889                 /*
1890                  * New relcache entries are always rebuilt, not flushed; else we'd
1891                  * forget the "new" status of the relation, which is a useful
1892                  * optimization to have.  Ditto for the new-relfilenode status.
1893                  */
1894                 rebuild = true;
1895         }
1896         else
1897         {
1898                 /*
1899                  * Pre-existing rels can be dropped from the relcache if not open.
1900                  */
1901                 rebuild = !RelationHasReferenceCountZero(relation);
1902         }
1903
1904         RelationClearRelation(relation, rebuild);
1905 }
1906
1907 /*
1908  * RelationForgetRelation - unconditionally remove a relcache entry
1909  *
1910  *                 External interface for destroying a relcache entry when we
1911  *                 drop the relation.
1912  */
1913 void
1914 RelationForgetRelation(Oid rid)
1915 {
1916         Relation        relation;
1917
1918         RelationIdCacheLookup(rid, relation);
1919
1920         if (!PointerIsValid(relation))
1921                 return;                                 /* not in cache, nothing to do */
1922
1923         if (!RelationHasReferenceCountZero(relation))
1924                 elog(ERROR, "relation %u is still open", rid);
1925
1926         /* Unconditionally destroy the relcache entry */
1927         RelationClearRelation(relation, false);
1928 }
1929
1930 /*
1931  *              RelationCacheInvalidateEntry
1932  *
1933  *              This routine is invoked for SI cache flush messages.
1934  *
1935  * Any relcache entry matching the relid must be flushed.  (Note: caller has
1936  * already determined that the relid belongs to our database or is a shared
1937  * relation.)
1938  *
1939  * We used to skip local relations, on the grounds that they could
1940  * not be targets of cross-backend SI update messages; but it seems
1941  * safer to process them, so that our *own* SI update messages will
1942  * have the same effects during CommandCounterIncrement for both
1943  * local and nonlocal relations.
1944  */
1945 void
1946 RelationCacheInvalidateEntry(Oid relationId)
1947 {
1948         Relation        relation;
1949
1950         RelationIdCacheLookup(relationId, relation);
1951
1952         if (PointerIsValid(relation))
1953         {
1954                 relcacheInvalsReceived++;
1955                 RelationFlushRelation(relation);
1956         }
1957 }
1958
1959 /*
1960  * RelationCacheInvalidate
1961  *       Blow away cached relation descriptors that have zero reference counts,
1962  *       and rebuild those with positive reference counts.      Also reset the smgr
1963  *       relation cache.
1964  *
1965  *       This is currently used only to recover from SI message buffer overflow,
1966  *       so we do not touch new-in-transaction relations; they cannot be targets
1967  *       of cross-backend SI updates (and our own updates now go through a
1968  *       separate linked list that isn't limited by the SI message buffer size).
1969  *       Likewise, we need not discard new-relfilenode-in-transaction hints,
1970  *       since any invalidation of those would be a local event.
1971  *
1972  *       We do this in two phases: the first pass deletes deletable items, and
1973  *       the second one rebuilds the rebuildable items.  This is essential for
1974  *       safety, because hash_seq_search only copes with concurrent deletion of
1975  *       the element it is currently visiting.  If a second SI overflow were to
1976  *       occur while we are walking the table, resulting in recursive entry to
1977  *       this routine, we could crash because the inner invocation blows away
1978  *       the entry next to be visited by the outer scan.  But this way is OK,
1979  *       because (a) during the first pass we won't process any more SI messages,
1980  *       so hash_seq_search will complete safely; (b) during the second pass we
1981  *       only hold onto pointers to nondeletable entries.
1982  *
1983  *       The two-phase approach also makes it easy to ensure that we process
1984  *       nailed-in-cache indexes before other nondeletable items, and that we
1985  *       process pg_class_oid_index first of all.  In scenarios where a nailed
1986  *       index has been given a new relfilenode, we have to detect that update
1987  *       before the nailed index is used in reloading any other relcache entry.
1988  */
1989 void
1990 RelationCacheInvalidate(void)
1991 {
1992         HASH_SEQ_STATUS status;
1993         RelIdCacheEnt *idhentry;
1994         Relation        relation;
1995         List       *rebuildFirstList = NIL;
1996         List       *rebuildList = NIL;
1997         ListCell   *l;
1998
1999         /* Phase 1 */
2000         hash_seq_init(&status, RelationIdCache);
2001
2002         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2003         {
2004                 relation = idhentry->reldesc;
2005
2006                 /* Must close all smgr references to avoid leaving dangling ptrs */
2007                 RelationCloseSmgr(relation);
2008
2009                 /* Ignore new relations, since they are never SI targets */
2010                 if (relation->rd_createSubid != InvalidSubTransactionId)
2011                         continue;
2012
2013                 relcacheInvalsReceived++;
2014
2015                 if (RelationHasReferenceCountZero(relation))
2016                 {
2017                         /* Delete this entry immediately */
2018                         Assert(!relation->rd_isnailed);
2019                         RelationClearRelation(relation, false);
2020                 }
2021                 else
2022                 {
2023                         /*
2024                          * Add this entry to list of stuff to rebuild in second pass.
2025                          * pg_class_oid_index goes on the front of rebuildFirstList, other
2026                          * nailed indexes on the back, and everything else into
2027                          * rebuildList (in no particular order).
2028                          */
2029                         if (relation->rd_isnailed &&
2030                                 relation->rd_rel->relkind == RELKIND_INDEX)
2031                         {
2032                                 if (RelationGetRelid(relation) == ClassOidIndexId)
2033                                         rebuildFirstList = lcons(relation, rebuildFirstList);
2034                                 else
2035                                         rebuildFirstList = lappend(rebuildFirstList, relation);
2036                         }
2037                         else
2038                                 rebuildList = lcons(relation, rebuildList);
2039                 }
2040         }
2041
2042         /*
2043          * Now zap any remaining smgr cache entries.  This must happen before we
2044          * start to rebuild entries, since that may involve catalog fetches which
2045          * will re-open catalog files.
2046          */
2047         smgrcloseall();
2048
2049         /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2050         foreach(l, rebuildFirstList)
2051         {
2052                 relation = (Relation) lfirst(l);
2053                 RelationClearRelation(relation, true);
2054         }
2055         list_free(rebuildFirstList);
2056         foreach(l, rebuildList)
2057         {
2058                 relation = (Relation) lfirst(l);
2059                 RelationClearRelation(relation, true);
2060         }
2061         list_free(rebuildList);
2062 }
2063
2064 /*
2065  * AtEOXact_RelationCache
2066  *
2067  *      Clean up the relcache at main-transaction commit or abort.
2068  *
2069  * Note: this must be called *before* processing invalidation messages.
2070  * In the case of abort, we don't want to try to rebuild any invalidated
2071  * cache entries (since we can't safely do database accesses).  Therefore
2072  * we must reset refcnts before handling pending invalidations.
2073  *
2074  * As of PostgreSQL 8.1, relcache refcnts should get released by the
2075  * ResourceOwner mechanism.  This routine just does a debugging
2076  * cross-check that no pins remain.  However, we also need to do special
2077  * cleanup when the current transaction created any relations or made use
2078  * of forced index lists.
2079  */
2080 void
2081 AtEOXact_RelationCache(bool isCommit)
2082 {
2083         HASH_SEQ_STATUS status;
2084         RelIdCacheEnt *idhentry;
2085
2086         /*
2087          * To speed up transaction exit, we want to avoid scanning the relcache
2088          * unless there is actually something for this routine to do.  Other than
2089          * the debug-only Assert checks, most transactions don't create any work
2090          * for us to do here, so we keep a static flag that gets set if there is
2091          * anything to do.      (Currently, this means either a relation is created in
2092          * the current xact, or one is given a new relfilenode, or an index list
2093          * is forced.)  For simplicity, the flag remains set till end of top-level
2094          * transaction, even though we could clear it at subtransaction end in
2095          * some cases.
2096          */
2097         if (!need_eoxact_work
2098 #ifdef USE_ASSERT_CHECKING
2099                 && !assert_enabled
2100 #endif
2101                 )
2102                 return;
2103
2104         hash_seq_init(&status, RelationIdCache);
2105
2106         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2107         {
2108                 Relation        relation = idhentry->reldesc;
2109
2110                 /*
2111                  * The relcache entry's ref count should be back to its normal
2112                  * not-in-a-transaction state: 0 unless it's nailed in cache.
2113                  *
2114                  * In bootstrap mode, this is NOT true, so don't check it --- the
2115                  * bootstrap code expects relations to stay open across start/commit
2116                  * transaction calls.  (That seems bogus, but it's not worth fixing.)
2117                  */
2118 #ifdef USE_ASSERT_CHECKING
2119                 if (!IsBootstrapProcessingMode())
2120                 {
2121                         int                     expected_refcnt;
2122
2123                         expected_refcnt = relation->rd_isnailed ? 1 : 0;
2124                         Assert(relation->rd_refcnt == expected_refcnt);
2125                 }
2126 #endif
2127
2128                 /*
2129                  * Is it a relation created in the current transaction?
2130                  *
2131                  * During commit, reset the flag to zero, since we are now out of the
2132                  * creating transaction.  During abort, simply delete the relcache
2133                  * entry --- it isn't interesting any longer.  (NOTE: if we have
2134                  * forgotten the new-ness of a new relation due to a forced cache
2135                  * flush, the entry will get deleted anyway by shared-cache-inval
2136                  * processing of the aborted pg_class insertion.)
2137                  */
2138                 if (relation->rd_createSubid != InvalidSubTransactionId)
2139                 {
2140                         if (isCommit)
2141                                 relation->rd_createSubid = InvalidSubTransactionId;
2142                         else
2143                         {
2144                                 RelationClearRelation(relation, false);
2145                                 continue;
2146                         }
2147                 }
2148
2149                 /*
2150                  * Likewise, reset the hint about the relfilenode being new.
2151                  */
2152                 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2153
2154                 /*
2155                  * Flush any temporary index list.
2156                  */
2157                 if (relation->rd_indexvalid == 2)
2158                 {
2159                         list_free(relation->rd_indexlist);
2160                         relation->rd_indexlist = NIL;
2161                         relation->rd_oidindex = InvalidOid;
2162                         relation->rd_indexvalid = 0;
2163                 }
2164         }
2165
2166         /* Once done with the transaction, we can reset need_eoxact_work */
2167         need_eoxact_work = false;
2168 }
2169
2170 /*
2171  * AtEOSubXact_RelationCache
2172  *
2173  *      Clean up the relcache at sub-transaction commit or abort.
2174  *
2175  * Note: this must be called *before* processing invalidation messages.
2176  */
2177 void
2178 AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
2179                                                   SubTransactionId parentSubid)
2180 {
2181         HASH_SEQ_STATUS status;
2182         RelIdCacheEnt *idhentry;
2183
2184         /*
2185          * Skip the relcache scan if nothing to do --- see notes for
2186          * AtEOXact_RelationCache.
2187          */
2188         if (!need_eoxact_work)
2189                 return;
2190
2191         hash_seq_init(&status, RelationIdCache);
2192
2193         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2194         {
2195                 Relation        relation = idhentry->reldesc;
2196
2197                 /*
2198                  * Is it a relation created in the current subtransaction?
2199                  *
2200                  * During subcommit, mark it as belonging to the parent, instead.
2201                  * During subabort, simply delete the relcache entry.
2202                  */
2203                 if (relation->rd_createSubid == mySubid)
2204                 {
2205                         if (isCommit)
2206                                 relation->rd_createSubid = parentSubid;
2207                         else
2208                         {
2209                                 Assert(RelationHasReferenceCountZero(relation));
2210                                 RelationClearRelation(relation, false);
2211                                 continue;
2212                         }
2213                 }
2214
2215                 /*
2216                  * Likewise, update or drop any new-relfilenode-in-subtransaction
2217                  * hint.
2218                  */
2219                 if (relation->rd_newRelfilenodeSubid == mySubid)
2220                 {
2221                         if (isCommit)
2222                                 relation->rd_newRelfilenodeSubid = parentSubid;
2223                         else
2224                                 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2225                 }
2226
2227                 /*
2228                  * Flush any temporary index list.
2229                  */
2230                 if (relation->rd_indexvalid == 2)
2231                 {
2232                         list_free(relation->rd_indexlist);
2233                         relation->rd_indexlist = NIL;
2234                         relation->rd_oidindex = InvalidOid;
2235                         relation->rd_indexvalid = 0;
2236                 }
2237         }
2238 }
2239
2240 /*
2241  * RelationCacheMarkNewRelfilenode
2242  *
2243  *      Mark the rel as having been given a new relfilenode in the current
2244  *      (sub) transaction.      This is a hint that can be used to optimize
2245  *      later operations on the rel in the same transaction.
2246  */
2247 void
2248 RelationCacheMarkNewRelfilenode(Relation rel)
2249 {
2250         /* Mark it... */
2251         rel->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
2252         /* ... and now we have eoxact cleanup work to do */
2253         need_eoxact_work = true;
2254 }
2255
2256
2257 /*
2258  *              RelationBuildLocalRelation
2259  *                      Build a relcache entry for an about-to-be-created relation,
2260  *                      and enter it into the relcache.
2261  */
2262 Relation
2263 RelationBuildLocalRelation(const char *relname,
2264                                                    Oid relnamespace,
2265                                                    TupleDesc tupDesc,
2266                                                    Oid relid,
2267                                                    Oid reltablespace,
2268                                                    bool shared_relation)
2269 {
2270         Relation        rel;
2271         MemoryContext oldcxt;
2272         int                     natts = tupDesc->natts;
2273         int                     i;
2274         bool            has_not_null;
2275         bool            nailit;
2276
2277         AssertArg(natts >= 0);
2278
2279         /*
2280          * check for creation of a rel that must be nailed in cache.
2281          *
2282          * XXX this list had better match RelationCacheInitializePhase2's list.
2283          */
2284         switch (relid)
2285         {
2286                 case RelationRelationId:
2287                 case AttributeRelationId:
2288                 case ProcedureRelationId:
2289                 case TypeRelationId:
2290                         nailit = true;
2291                         break;
2292                 default:
2293                         nailit = false;
2294                         break;
2295         }
2296
2297         /*
2298          * check that hardwired list of shared rels matches what's in the
2299          * bootstrap .bki file.  If you get a failure here during initdb, you
2300          * probably need to fix IsSharedRelation() to match whatever you've done
2301          * to the set of shared relations.
2302          */
2303         if (shared_relation != IsSharedRelation(relid))
2304                 elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
2305                          relname, relid);
2306
2307         /*
2308          * switch to the cache context to create the relcache entry.
2309          */
2310         if (!CacheMemoryContext)
2311                 CreateCacheMemoryContext();
2312
2313         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2314
2315         /*
2316          * allocate a new relation descriptor and fill in basic state fields.
2317          */
2318         rel = (Relation) palloc0(sizeof(RelationData));
2319
2320         rel->rd_targblock = InvalidBlockNumber;
2321
2322         /* make sure relation is marked as having no open file yet */
2323         rel->rd_smgr = NULL;
2324
2325         /* mark it nailed if appropriate */
2326         rel->rd_isnailed = nailit;
2327
2328         rel->rd_refcnt = nailit ? 1 : 0;
2329
2330         /* it's being created in this transaction */
2331         rel->rd_createSubid = GetCurrentSubTransactionId();
2332         rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2333
2334         /* must flag that we have rels created in this transaction */
2335         need_eoxact_work = true;
2336
2337         /* is it a temporary relation? */
2338         rel->rd_istemp = isTempOrToastNamespace(relnamespace);
2339
2340         /*
2341          * create a new tuple descriptor from the one passed in.  We do this
2342          * partly to copy it into the cache context, and partly because the new
2343          * relation can't have any defaults or constraints yet; they have to be
2344          * added in later steps, because they require additions to multiple system
2345          * catalogs.  We can copy attnotnull constraints here, however.
2346          */
2347         rel->rd_att = CreateTupleDescCopy(tupDesc);
2348         rel->rd_att->tdrefcount = 1;    /* mark as refcounted */
2349         has_not_null = false;
2350         for (i = 0; i < natts; i++)
2351         {
2352                 rel->rd_att->attrs[i]->attnotnull = tupDesc->attrs[i]->attnotnull;
2353                 has_not_null |= tupDesc->attrs[i]->attnotnull;
2354         }
2355
2356         if (has_not_null)
2357         {
2358                 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
2359
2360                 constr->has_not_null = true;
2361                 rel->rd_att->constr = constr;
2362         }
2363
2364         /*
2365          * initialize relation tuple form (caller may add/override data later)
2366          */
2367         rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
2368
2369         namestrcpy(&rel->rd_rel->relname, relname);
2370         rel->rd_rel->relnamespace = relnamespace;
2371
2372         rel->rd_rel->relkind = RELKIND_UNCATALOGED;
2373         rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
2374         rel->rd_rel->relnatts = natts;
2375         rel->rd_rel->reltype = InvalidOid;
2376         /* needed when bootstrapping: */
2377         rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
2378
2379         /*
2380          * Insert relation physical and logical identifiers (OIDs) into the right
2381          * places.      Note that the physical ID (relfilenode) is initially the same
2382          * as the logical ID (OID).
2383          */
2384         rel->rd_rel->relisshared = shared_relation;
2385
2386         RelationGetRelid(rel) = relid;
2387
2388         for (i = 0; i < natts; i++)
2389                 rel->rd_att->attrs[i]->attrelid = relid;
2390
2391         rel->rd_rel->relfilenode = relid;
2392         rel->rd_rel->reltablespace = reltablespace;
2393
2394         RelationInitLockInfo(rel);      /* see lmgr.c */
2395
2396         RelationInitPhysicalAddr(rel);
2397
2398         /*
2399          * Okay to insert into the relcache hash tables.
2400          */
2401         RelationCacheInsert(rel);
2402
2403         /*
2404          * done building relcache entry.
2405          */
2406         MemoryContextSwitchTo(oldcxt);
2407
2408         /* It's fully valid */
2409         rel->rd_isvalid = true;
2410
2411         /*
2412          * Caller expects us to pin the returned entry.
2413          */
2414         RelationIncrementReferenceCount(rel);
2415
2416         return rel;
2417 }
2418
2419 /*
2420  *              RelationCacheInitialize
2421  *
2422  *              This initializes the relation descriptor cache.  At the time
2423  *              that this is invoked, we can't do database access yet (mainly
2424  *              because the transaction subsystem is not up); all we are doing
2425  *              is making an empty cache hashtable.  This must be done before
2426  *              starting the initialization transaction, because otherwise
2427  *              AtEOXact_RelationCache would crash if that transaction aborts
2428  *              before we can get the relcache set up.
2429  */
2430
2431 #define INITRELCACHESIZE                400
2432
2433 void
2434 RelationCacheInitialize(void)
2435 {
2436         MemoryContext oldcxt;
2437         HASHCTL         ctl;
2438
2439         /*
2440          * switch to cache memory context
2441          */
2442         if (!CacheMemoryContext)
2443                 CreateCacheMemoryContext();
2444
2445         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2446
2447         /*
2448          * create hashtable that indexes the relcache
2449          */
2450         MemSet(&ctl, 0, sizeof(ctl));
2451         ctl.keysize = sizeof(Oid);
2452         ctl.entrysize = sizeof(RelIdCacheEnt);
2453         ctl.hash = oid_hash;
2454         RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
2455                                                                   &ctl, HASH_ELEM | HASH_FUNCTION);
2456
2457         MemoryContextSwitchTo(oldcxt);
2458 }
2459
2460 /*
2461  *              RelationCacheInitializePhase2
2462  *
2463  *              This is called as soon as the catcache and transaction system
2464  *              are functional.  At this point we can actually read data from
2465  *              the system catalogs.  We first try to read pre-computed relcache
2466  *              entries from the pg_internal.init file.  If that's missing or
2467  *              broken, make phony entries for the minimum set of nailed-in-cache
2468  *              relations.      Then (unless bootstrapping) make sure we have entries
2469  *              for the critical system indexes.  Once we've done all this, we
2470  *              have enough infrastructure to open any system catalog or use any
2471  *              catcache.  The last step is to rewrite pg_internal.init if needed.
2472  */
2473 void
2474 RelationCacheInitializePhase2(void)
2475 {
2476         HASH_SEQ_STATUS status;
2477         RelIdCacheEnt *idhentry;
2478         MemoryContext oldcxt;
2479         bool            needNewCacheFile = false;
2480
2481         /*
2482          * switch to cache memory context
2483          */
2484         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2485
2486         /*
2487          * Try to load the relcache cache file.  If unsuccessful, bootstrap the
2488          * cache with pre-made descriptors for the critical "nailed-in" system
2489          * catalogs.
2490          */
2491         if (IsBootstrapProcessingMode() ||
2492                 !load_relcache_init_file())
2493         {
2494                 needNewCacheFile = true;
2495
2496                 formrdesc("pg_class", PG_CLASS_RELTYPE_OID,
2497                                   true, Natts_pg_class, Desc_pg_class);
2498                 formrdesc("pg_attribute", PG_ATTRIBUTE_RELTYPE_OID,
2499                                   false, Natts_pg_attribute, Desc_pg_attribute);
2500                 formrdesc("pg_proc", PG_PROC_RELTYPE_OID,
2501                                   true, Natts_pg_proc, Desc_pg_proc);
2502                 formrdesc("pg_type", PG_TYPE_RELTYPE_OID,
2503                                   true, Natts_pg_type, Desc_pg_type);
2504
2505 #define NUM_CRITICAL_RELS       4       /* fix if you change list above */
2506         }
2507
2508         MemoryContextSwitchTo(oldcxt);
2509
2510         /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
2511         if (IsBootstrapProcessingMode())
2512                 return;
2513
2514         /*
2515          * If we didn't get the critical system indexes loaded into relcache, do
2516          * so now.      These are critical because the catcache and/or opclass cache
2517          * depend on them for fetches done during relcache load.  Thus, we have an
2518          * infinite-recursion problem.  We can break the recursion by doing
2519          * heapscans instead of indexscans at certain key spots. To avoid hobbling
2520          * performance, we only want to do that until we have the critical indexes
2521          * loaded into relcache.  Thus, the flag criticalRelcachesBuilt is used to
2522          * decide whether to do heapscan or indexscan at the key spots, and we set
2523          * it true after we've loaded the critical indexes.
2524          *
2525          * The critical indexes are marked as "nailed in cache", partly to make it
2526          * easy for load_relcache_init_file to count them, but mainly because we
2527          * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
2528          * true.  (NOTE: perhaps it would be possible to reload them by
2529          * temporarily setting criticalRelcachesBuilt to false again.  For now,
2530          * though, we just nail 'em in.)
2531          *
2532          * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
2533          * in the same way as the others, because the critical catalogs don't
2534          * (currently) have any rules or triggers, and so these indexes can be
2535          * rebuilt without inducing recursion.  However they are used during
2536          * relcache load when a rel does have rules or triggers, so we choose to
2537          * nail them for performance reasons.
2538          */
2539         if (!criticalRelcachesBuilt)
2540         {
2541                 Relation        ird;
2542
2543 #define LOAD_CRIT_INDEX(indexoid) \
2544                 do { \
2545                         ird = RelationBuildDesc(indexoid, NULL); \
2546                         if (ird == NULL) \
2547                                 elog(PANIC, "could not open critical system index %u", \
2548                                          indexoid); \
2549                         ird->rd_isnailed = true; \
2550                         ird->rd_refcnt = 1; \
2551                 } while (0)
2552
2553                 LOAD_CRIT_INDEX(ClassOidIndexId);
2554                 LOAD_CRIT_INDEX(AttributeRelidNumIndexId);
2555                 LOAD_CRIT_INDEX(IndexRelidIndexId);
2556                 LOAD_CRIT_INDEX(OpclassOidIndexId);
2557                 LOAD_CRIT_INDEX(AccessMethodStrategyIndexId);
2558                 LOAD_CRIT_INDEX(AccessMethodProcedureIndexId);
2559                 LOAD_CRIT_INDEX(OperatorOidIndexId);
2560                 LOAD_CRIT_INDEX(RewriteRelRulenameIndexId);
2561                 LOAD_CRIT_INDEX(TriggerRelidNameIndexId);
2562
2563 #define NUM_CRITICAL_INDEXES    9               /* fix if you change list above */
2564
2565                 criticalRelcachesBuilt = true;
2566         }
2567
2568         /*
2569          * Now, scan all the relcache entries and update anything that might be
2570          * wrong in the results from formrdesc or the relcache cache file. If we
2571          * faked up relcache entries using formrdesc, then read the real pg_class
2572          * rows and replace the fake entries with them. Also, if any of the
2573          * relcache entries have rules or triggers, load that info the hard way
2574          * since it isn't recorded in the cache file.
2575          */
2576         hash_seq_init(&status, RelationIdCache);
2577
2578         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2579         {
2580                 Relation        relation = idhentry->reldesc;
2581
2582                 /*
2583                  * If it's a faked-up entry, read the real pg_class tuple.
2584                  */
2585                 if (needNewCacheFile && relation->rd_isnailed)
2586                 {
2587                         HeapTuple       htup;
2588                         Form_pg_class relp;
2589
2590                         htup = SearchSysCache(RELOID,
2591                                                                 ObjectIdGetDatum(RelationGetRelid(relation)),
2592                                                                   0, 0, 0);
2593                         if (!HeapTupleIsValid(htup))
2594                                 elog(FATAL, "cache lookup failed for relation %u",
2595                                          RelationGetRelid(relation));
2596                         relp = (Form_pg_class) GETSTRUCT(htup);
2597
2598                         /*
2599                          * Copy tuple to relation->rd_rel. (See notes in
2600                          * AllocateRelationDesc())
2601                          */
2602                         Assert(relation->rd_rel != NULL);
2603                         memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
2604
2605                         /* Update rd_options while we have the tuple */
2606                         if (relation->rd_options)
2607                                 pfree(relation->rd_options);
2608                         RelationParseRelOptions(relation, htup);
2609
2610                         /*
2611                          * Also update the derived fields in rd_att.
2612                          */
2613                         relation->rd_att->tdtypeid = relp->reltype;
2614                         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
2615                         relation->rd_att->tdhasoid = relp->relhasoids;
2616
2617                         ReleaseSysCache(htup);
2618                 }
2619
2620                 /*
2621                  * Fix data that isn't saved in relcache cache file.
2622                  */
2623                 if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
2624                         RelationBuildRuleLock(relation);
2625                 if (relation->rd_rel->reltriggers > 0 && relation->trigdesc == NULL)
2626                         RelationBuildTriggers(relation);
2627         }
2628
2629         /*
2630          * Lastly, write out a new relcache cache file if one is needed.
2631          */
2632         if (needNewCacheFile)
2633         {
2634                 /*
2635                  * Force all the catcaches to finish initializing and thereby open the
2636                  * catalogs and indexes they use.  This will preload the relcache with
2637                  * entries for all the most important system catalogs and indexes, so
2638                  * that the init file will be most useful for future backends.
2639                  */
2640                 InitCatalogCachePhase2();
2641
2642                 /* now write the file */
2643                 write_relcache_init_file();
2644         }
2645 }
2646
2647 /*
2648  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
2649  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
2650  *
2651  * We need this kluge because we have to be able to access non-fixed-width
2652  * fields of pg_class and pg_index before we have the standard catalog caches
2653  * available.  We use predefined data that's set up in just the same way as
2654  * the bootstrapped reldescs used by formrdesc().  The resulting tupdesc is
2655  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
2656  * does it have a TupleConstr field.  But it's good enough for the purpose of
2657  * extracting fields.
2658  */
2659 static TupleDesc
2660 BuildHardcodedDescriptor(int natts, Form_pg_attribute attrs, bool hasoids)
2661 {
2662         TupleDesc       result;
2663         MemoryContext oldcxt;
2664         int                     i;
2665
2666         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2667
2668         result = CreateTemplateTupleDesc(natts, hasoids);
2669         result->tdtypeid = RECORDOID;           /* not right, but we don't care */
2670         result->tdtypmod = -1;
2671
2672         for (i = 0; i < natts; i++)
2673         {
2674                 memcpy(result->attrs[i], &attrs[i], ATTRIBUTE_TUPLE_SIZE);
2675                 /* make sure attcacheoff is valid */
2676                 result->attrs[i]->attcacheoff = -1;
2677         }
2678
2679         /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
2680         result->attrs[0]->attcacheoff = 0;
2681
2682         /* Note: we don't bother to set up a TupleConstr entry */
2683
2684         MemoryContextSwitchTo(oldcxt);
2685
2686         return result;
2687 }
2688
2689 static TupleDesc
2690 GetPgClassDescriptor(void)
2691 {
2692         static TupleDesc pgclassdesc = NULL;
2693
2694         /* Already done? */
2695         if (pgclassdesc == NULL)
2696                 pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
2697                                                                                            Desc_pg_class,
2698                                                                                            true);
2699
2700         return pgclassdesc;
2701 }
2702
2703 static TupleDesc
2704 GetPgIndexDescriptor(void)
2705 {
2706         static TupleDesc pgindexdesc = NULL;
2707
2708         /* Already done? */
2709         if (pgindexdesc == NULL)
2710                 pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
2711                                                                                            Desc_pg_index,
2712                                                                                            false);
2713
2714         return pgindexdesc;
2715 }
2716
2717 static void
2718 AttrDefaultFetch(Relation relation)
2719 {
2720         AttrDefault *attrdef = relation->rd_att->constr->defval;
2721         int                     ndef = relation->rd_att->constr->num_defval;
2722         Relation        adrel;
2723         SysScanDesc adscan;
2724         ScanKeyData skey;
2725         HeapTuple       htup;
2726         Datum           val;
2727         bool            isnull;
2728         int                     found;
2729         int                     i;
2730
2731         ScanKeyInit(&skey,
2732                                 Anum_pg_attrdef_adrelid,
2733                                 BTEqualStrategyNumber, F_OIDEQ,
2734                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2735
2736         adrel = heap_open(AttrDefaultRelationId, AccessShareLock);
2737         adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
2738                                                                 SnapshotNow, 1, &skey);
2739         found = 0;
2740
2741         while (HeapTupleIsValid(htup = systable_getnext(adscan)))
2742         {
2743                 Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
2744
2745                 for (i = 0; i < ndef; i++)
2746                 {
2747                         if (adform->adnum != attrdef[i].adnum)
2748                                 continue;
2749                         if (attrdef[i].adbin != NULL)
2750                                 elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
2751                                 NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
2752                                          RelationGetRelationName(relation));
2753                         else
2754                                 found++;
2755
2756                         val = fastgetattr(htup,
2757                                                           Anum_pg_attrdef_adbin,
2758                                                           adrel->rd_att, &isnull);
2759                         if (isnull)
2760                                 elog(WARNING, "null adbin for attr %s of rel %s",
2761                                 NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
2762                                          RelationGetRelationName(relation));
2763                         else
2764                                 attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext,
2765                                                                                                         TextDatumGetCString(val));
2766                         break;
2767                 }
2768
2769                 if (i >= ndef)
2770                         elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
2771                                  adform->adnum, RelationGetRelationName(relation));
2772         }
2773
2774         systable_endscan(adscan);
2775         heap_close(adrel, AccessShareLock);
2776
2777         if (found != ndef)
2778                 elog(WARNING, "%d attrdef record(s) missing for rel %s",
2779                          ndef - found, RelationGetRelationName(relation));
2780 }
2781
2782 static void
2783 CheckConstraintFetch(Relation relation)
2784 {
2785         ConstrCheck *check = relation->rd_att->constr->check;
2786         int                     ncheck = relation->rd_att->constr->num_check;
2787         Relation        conrel;
2788         SysScanDesc conscan;
2789         ScanKeyData skey[1];
2790         HeapTuple       htup;
2791         Datum           val;
2792         bool            isnull;
2793         int                     found = 0;
2794
2795         ScanKeyInit(&skey[0],
2796                                 Anum_pg_constraint_conrelid,
2797                                 BTEqualStrategyNumber, F_OIDEQ,
2798                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2799
2800         conrel = heap_open(ConstraintRelationId, AccessShareLock);
2801         conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
2802                                                                  SnapshotNow, 1, skey);
2803
2804         while (HeapTupleIsValid(htup = systable_getnext(conscan)))
2805         {
2806                 Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
2807
2808                 /* We want check constraints only */
2809                 if (conform->contype != CONSTRAINT_CHECK)
2810                         continue;
2811
2812                 if (found >= ncheck)
2813                         elog(ERROR, "unexpected constraint record found for rel %s",
2814                                  RelationGetRelationName(relation));
2815
2816                 check[found].ccname = MemoryContextStrdup(CacheMemoryContext,
2817                                                                                                   NameStr(conform->conname));
2818
2819                 /* Grab and test conbin is actually set */
2820                 val = fastgetattr(htup,
2821                                                   Anum_pg_constraint_conbin,
2822                                                   conrel->rd_att, &isnull);
2823                 if (isnull)
2824                         elog(ERROR, "null conbin for rel %s",
2825                                  RelationGetRelationName(relation));
2826
2827                 check[found].ccbin = MemoryContextStrdup(CacheMemoryContext,
2828                                                                                                  TextDatumGetCString(val));
2829                 found++;
2830         }
2831
2832         systable_endscan(conscan);
2833         heap_close(conrel, AccessShareLock);
2834
2835         if (found != ncheck)
2836                 elog(ERROR, "%d constraint record(s) missing for rel %s",
2837                          ncheck - found, RelationGetRelationName(relation));
2838 }
2839
2840 /*
2841  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
2842  *
2843  * The index list is created only if someone requests it.  We scan pg_index
2844  * to find relevant indexes, and add the list to the relcache entry so that
2845  * we won't have to compute it again.  Note that shared cache inval of a
2846  * relcache entry will delete the old list and set rd_indexvalid to 0,
2847  * so that we must recompute the index list on next request.  This handles
2848  * creation or deletion of an index.
2849  *
2850  * The returned list is guaranteed to be sorted in order by OID.  This is
2851  * needed by the executor, since for index types that we obtain exclusive
2852  * locks on when updating the index, all backends must lock the indexes in
2853  * the same order or we will get deadlocks (see ExecOpenIndices()).  Any
2854  * consistent ordering would do, but ordering by OID is easy.
2855  *
2856  * Since shared cache inval causes the relcache's copy of the list to go away,
2857  * we return a copy of the list palloc'd in the caller's context.  The caller
2858  * may list_free() the returned list after scanning it. This is necessary
2859  * since the caller will typically be doing syscache lookups on the relevant
2860  * indexes, and syscache lookup could cause SI messages to be processed!
2861  *
2862  * We also update rd_oidindex, which this module treats as effectively part
2863  * of the index list.  rd_oidindex is valid when rd_indexvalid isn't zero;
2864  * it is the pg_class OID of a unique index on OID when the relation has one,
2865  * and InvalidOid if there is no such index.
2866  */
2867 List *
2868 RelationGetIndexList(Relation relation)
2869 {
2870         Relation        indrel;
2871         SysScanDesc indscan;
2872         ScanKeyData skey;
2873         HeapTuple       htup;
2874         List       *result;
2875         Oid                     oidIndex;
2876         MemoryContext oldcxt;
2877
2878         /* Quick exit if we already computed the list. */
2879         if (relation->rd_indexvalid != 0)
2880                 return list_copy(relation->rd_indexlist);
2881
2882         /*
2883          * We build the list we intend to return (in the caller's context) while
2884          * doing the scan.      After successfully completing the scan, we copy that
2885          * list into the relcache entry.  This avoids cache-context memory leakage
2886          * if we get some sort of error partway through.
2887          */
2888         result = NIL;
2889         oidIndex = InvalidOid;
2890
2891         /* Prepare to scan pg_index for entries having indrelid = this rel. */
2892         ScanKeyInit(&skey,
2893                                 Anum_pg_index_indrelid,
2894                                 BTEqualStrategyNumber, F_OIDEQ,
2895                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2896
2897         indrel = heap_open(IndexRelationId, AccessShareLock);
2898         indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
2899                                                                  SnapshotNow, 1, &skey);
2900
2901         while (HeapTupleIsValid(htup = systable_getnext(indscan)))
2902         {
2903                 Form_pg_index index = (Form_pg_index) GETSTRUCT(htup);
2904
2905                 /* Add index's OID to result list in the proper order */
2906                 result = insert_ordered_oid(result, index->indexrelid);
2907
2908                 /* Check to see if it is a unique, non-partial btree index on OID */
2909                 if (index->indnatts == 1 &&
2910                         index->indisunique &&
2911                         index->indkey.values[0] == ObjectIdAttributeNumber &&
2912                         index->indclass.values[0] == OID_BTREE_OPS_OID &&
2913                         heap_attisnull(htup, Anum_pg_index_indpred))
2914                         oidIndex = index->indexrelid;
2915         }
2916
2917         systable_endscan(indscan);
2918         heap_close(indrel, AccessShareLock);
2919
2920         /* Now save a copy of the completed list in the relcache entry. */
2921         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2922         relation->rd_indexlist = list_copy(result);
2923         relation->rd_oidindex = oidIndex;
2924         relation->rd_indexvalid = 1;
2925         MemoryContextSwitchTo(oldcxt);
2926
2927         return result;
2928 }
2929
2930 /*
2931  * insert_ordered_oid
2932  *              Insert a new Oid into a sorted list of Oids, preserving ordering
2933  *
2934  * Building the ordered list this way is O(N^2), but with a pretty small
2935  * constant, so for the number of entries we expect it will probably be
2936  * faster than trying to apply qsort().  Most tables don't have very many
2937  * indexes...
2938  */
2939 static List *
2940 insert_ordered_oid(List *list, Oid datum)
2941 {
2942         ListCell   *prev;
2943
2944         /* Does the datum belong at the front? */
2945         if (list == NIL || datum < linitial_oid(list))
2946                 return lcons_oid(datum, list);
2947         /* No, so find the entry it belongs after */
2948         prev = list_head(list);
2949         for (;;)
2950         {
2951                 ListCell   *curr = lnext(prev);
2952
2953                 if (curr == NULL || datum < lfirst_oid(curr))
2954                         break;                          /* it belongs after 'prev', before 'curr' */
2955
2956                 prev = curr;
2957         }
2958         /* Insert datum into list after 'prev' */
2959         lappend_cell_oid(list, prev, datum);
2960         return list;
2961 }
2962
2963 /*
2964  * RelationSetIndexList -- externally force the index list contents
2965  *
2966  * This is used to temporarily override what we think the set of valid
2967  * indexes is (including the presence or absence of an OID index).
2968  * The forcing will be valid only until transaction commit or abort.
2969  *
2970  * This should only be applied to nailed relations, because in a non-nailed
2971  * relation the hacked index list could be lost at any time due to SI
2972  * messages.  In practice it is only used on pg_class (see REINDEX).
2973  *
2974  * It is up to the caller to make sure the given list is correctly ordered.
2975  */
2976 void
2977 RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
2978 {
2979         MemoryContext oldcxt;
2980
2981         Assert(relation->rd_isnailed);
2982         /* Copy the list into the cache context (could fail for lack of mem) */
2983         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2984         indexIds = list_copy(indexIds);
2985         MemoryContextSwitchTo(oldcxt);
2986         /* Okay to replace old list */
2987         list_free(relation->rd_indexlist);
2988         relation->rd_indexlist = indexIds;
2989         relation->rd_oidindex = oidIndex;
2990         relation->rd_indexvalid = 2;    /* mark list as forced */
2991         /* must flag that we have a forced index list */
2992         need_eoxact_work = true;
2993         /* we deliberately do not change rd_indexattr */
2994 }
2995
2996 /*
2997  * RelationGetOidIndex -- get the pg_class OID of the relation's OID index
2998  *
2999  * Returns InvalidOid if there is no such index.
3000  */
3001 Oid
3002 RelationGetOidIndex(Relation relation)
3003 {
3004         List       *ilist;
3005
3006         /*
3007          * If relation doesn't have OIDs at all, caller is probably confused. (We
3008          * could just silently return InvalidOid, but it seems better to throw an
3009          * assertion.)
3010          */
3011         Assert(relation->rd_rel->relhasoids);
3012
3013         if (relation->rd_indexvalid == 0)
3014         {
3015                 /* RelationGetIndexList does the heavy lifting. */
3016                 ilist = RelationGetIndexList(relation);
3017                 list_free(ilist);
3018                 Assert(relation->rd_indexvalid != 0);
3019         }
3020
3021         return relation->rd_oidindex;
3022 }
3023
3024 /*
3025  * RelationGetIndexExpressions -- get the index expressions for an index
3026  *
3027  * We cache the result of transforming pg_index.indexprs into a node tree.
3028  * If the rel is not an index or has no expressional columns, we return NIL.
3029  * Otherwise, the returned tree is copied into the caller's memory context.
3030  * (We don't want to return a pointer to the relcache copy, since it could
3031  * disappear due to relcache invalidation.)
3032  */
3033 List *
3034 RelationGetIndexExpressions(Relation relation)
3035 {
3036         List       *result;
3037         Datum           exprsDatum;
3038         bool            isnull;
3039         char       *exprsString;
3040         MemoryContext oldcxt;
3041
3042         /* Quick exit if we already computed the result. */
3043         if (relation->rd_indexprs)
3044                 return (List *) copyObject(relation->rd_indexprs);
3045
3046         /* Quick exit if there is nothing to do. */
3047         if (relation->rd_indextuple == NULL ||
3048                 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs))
3049                 return NIL;
3050
3051         /*
3052          * We build the tree we intend to return in the caller's context. After
3053          * successfully completing the work, we copy it into the relcache entry.
3054          * This avoids problems if we get some sort of error partway through.
3055          */
3056         exprsDatum = heap_getattr(relation->rd_indextuple,
3057                                                           Anum_pg_index_indexprs,
3058                                                           GetPgIndexDescriptor(),
3059                                                           &isnull);
3060         Assert(!isnull);
3061         exprsString = TextDatumGetCString(exprsDatum);
3062         result = (List *) stringToNode(exprsString);
3063         pfree(exprsString);
3064
3065         /*
3066          * Run the expressions through eval_const_expressions. This is not just an
3067          * optimization, but is necessary, because the planner will be comparing
3068          * them to similarly-processed qual clauses, and may fail to detect valid
3069          * matches without this.  We don't bother with canonicalize_qual, however.
3070          */
3071         result = (List *) eval_const_expressions(NULL, (Node *) result);
3072
3073         /*
3074          * Also mark any coercion format fields as "don't care", so that the
3075          * planner can match to both explicit and implicit coercions.
3076          */
3077         set_coercionform_dontcare((Node *) result);
3078
3079         /* May as well fix opfuncids too */
3080         fix_opfuncids((Node *) result);
3081
3082         /* Now save a copy of the completed tree in the relcache entry. */
3083         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3084         relation->rd_indexprs = (List *) copyObject(result);
3085         MemoryContextSwitchTo(oldcxt);
3086
3087         return result;
3088 }
3089
3090 /*
3091  * RelationGetIndexPredicate -- get the index predicate for an index
3092  *
3093  * We cache the result of transforming pg_index.indpred into an implicit-AND
3094  * node tree (suitable for ExecQual).
3095  * If the rel is not an index or has no predicate, we return NIL.
3096  * Otherwise, the returned tree is copied into the caller's memory context.
3097  * (We don't want to return a pointer to the relcache copy, since it could
3098  * disappear due to relcache invalidation.)
3099  */
3100 List *
3101 RelationGetIndexPredicate(Relation relation)
3102 {
3103         List       *result;
3104         Datum           predDatum;
3105         bool            isnull;
3106         char       *predString;
3107         MemoryContext oldcxt;
3108
3109         /* Quick exit if we already computed the result. */
3110         if (relation->rd_indpred)
3111                 return (List *) copyObject(relation->rd_indpred);
3112
3113         /* Quick exit if there is nothing to do. */
3114         if (relation->rd_indextuple == NULL ||
3115                 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred))
3116                 return NIL;
3117
3118         /*
3119          * We build the tree we intend to return in the caller's context. After
3120          * successfully completing the work, we copy it into the relcache entry.
3121          * This avoids problems if we get some sort of error partway through.
3122          */
3123         predDatum = heap_getattr(relation->rd_indextuple,
3124                                                          Anum_pg_index_indpred,
3125                                                          GetPgIndexDescriptor(),
3126                                                          &isnull);
3127         Assert(!isnull);
3128         predString = TextDatumGetCString(predDatum);
3129         result = (List *) stringToNode(predString);
3130         pfree(predString);
3131
3132         /*
3133          * Run the expression through const-simplification and canonicalization.
3134          * This is not just an optimization, but is necessary, because the planner
3135          * will be comparing it to similarly-processed qual clauses, and may fail
3136          * to detect valid matches without this.  This must match the processing
3137          * done to qual clauses in preprocess_expression()!  (We can skip the
3138          * stuff involving subqueries, however, since we don't allow any in index
3139          * predicates.)
3140          */
3141         result = (List *) eval_const_expressions(NULL, (Node *) result);
3142
3143         result = (List *) canonicalize_qual((Expr *) result);
3144
3145         /*
3146          * Also mark any coercion format fields as "don't care", so that the
3147          * planner can match to both explicit and implicit coercions.
3148          */
3149         set_coercionform_dontcare((Node *) result);
3150
3151         /* Also convert to implicit-AND format */
3152         result = make_ands_implicit((Expr *) result);
3153
3154         /* May as well fix opfuncids too */
3155         fix_opfuncids((Node *) result);
3156
3157         /* Now save a copy of the completed tree in the relcache entry. */
3158         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3159         relation->rd_indpred = (List *) copyObject(result);
3160         MemoryContextSwitchTo(oldcxt);
3161
3162         return result;
3163 }
3164
3165 /*
3166  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
3167  *
3168  * The result has a bit set for each attribute used anywhere in the index
3169  * definitions of all the indexes on this relation.  (This includes not only
3170  * simple index keys, but attributes used in expressions and partial-index
3171  * predicates.)
3172  *
3173  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
3174  * we can include system attributes (e.g., OID) in the bitmap representation.
3175  *
3176  * The returned result is palloc'd in the caller's memory context and should
3177  * be bms_free'd when not needed anymore.
3178  */
3179 Bitmapset *
3180 RelationGetIndexAttrBitmap(Relation relation)
3181 {
3182         Bitmapset  *indexattrs;
3183         List       *indexoidlist;
3184         ListCell   *l;
3185         MemoryContext oldcxt;
3186
3187         /* Quick exit if we already computed the result. */
3188         if (relation->rd_indexattr != NULL)
3189                 return bms_copy(relation->rd_indexattr);
3190
3191         /* Fast path if definitely no indexes */
3192         if (!RelationGetForm(relation)->relhasindex)
3193                 return NULL;
3194
3195         /*
3196          * Get cached list of index OIDs
3197          */
3198         indexoidlist = RelationGetIndexList(relation);
3199
3200         /* Fall out if no indexes (but relhasindex was set) */
3201         if (indexoidlist == NIL)
3202                 return NULL;
3203
3204         /*
3205          * For each index, add referenced attributes to indexattrs.
3206          */
3207         indexattrs = NULL;
3208         foreach(l, indexoidlist)
3209         {
3210                 Oid                     indexOid = lfirst_oid(l);
3211                 Relation        indexDesc;
3212                 IndexInfo  *indexInfo;
3213                 int                     i;
3214
3215                 indexDesc = index_open(indexOid, AccessShareLock);
3216
3217                 /* Extract index key information from the index's pg_index row */
3218                 indexInfo = BuildIndexInfo(indexDesc);
3219
3220                 /* Collect simple attribute references */
3221                 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
3222                 {
3223                         int                     attrnum = indexInfo->ii_KeyAttrNumbers[i];
3224
3225                         if (attrnum != 0)
3226                                 indexattrs = bms_add_member(indexattrs,
3227                                                            attrnum - FirstLowInvalidHeapAttributeNumber);
3228                 }
3229
3230                 /* Collect all attributes used in expressions, too */
3231                 pull_varattnos((Node *) indexInfo->ii_Expressions, &indexattrs);
3232
3233                 /* Collect all attributes in the index predicate, too */
3234                 pull_varattnos((Node *) indexInfo->ii_Predicate, &indexattrs);
3235
3236                 index_close(indexDesc, AccessShareLock);
3237         }
3238
3239         list_free(indexoidlist);
3240
3241         /* Now save a copy of the bitmap in the relcache entry. */
3242         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3243         relation->rd_indexattr = bms_copy(indexattrs);
3244         MemoryContextSwitchTo(oldcxt);
3245
3246         /* We return our original working copy for caller to play with */
3247         return indexattrs;
3248 }
3249
3250
3251 /*
3252  *      load_relcache_init_file, write_relcache_init_file
3253  *
3254  *              In late 1992, we started regularly having databases with more than
3255  *              a thousand classes in them.  With this number of classes, it became
3256  *              critical to do indexed lookups on the system catalogs.
3257  *
3258  *              Bootstrapping these lookups is very hard.  We want to be able to
3259  *              use an index on pg_attribute, for example, but in order to do so,
3260  *              we must have read pg_attribute for the attributes in the index,
3261  *              which implies that we need to use the index.
3262  *
3263  *              In order to get around the problem, we do the following:
3264  *
3265  *                 +  When the database system is initialized (at initdb time), we
3266  *                        don't use indexes.  We do sequential scans.
3267  *
3268  *                 +  When the backend is started up in normal mode, we load an image
3269  *                        of the appropriate relation descriptors, in internal format,
3270  *                        from an initialization file in the data/base/... directory.
3271  *
3272  *                 +  If the initialization file isn't there, then we create the
3273  *                        relation descriptors using sequential scans and write 'em to
3274  *                        the initialization file for use by subsequent backends.
3275  *
3276  *              We could dispense with the initialization file and just build the
3277  *              critical reldescs the hard way on every backend startup, but that
3278  *              slows down backend startup noticeably.
3279  *
3280  *              We can in fact go further, and save more relcache entries than
3281  *              just the ones that are absolutely critical; this allows us to speed
3282  *              up backend startup by not having to build such entries the hard way.
3283  *              Presently, all the catalog and index entries that are referred to
3284  *              by catcaches are stored in the initialization file.
3285  *
3286  *              The same mechanism that detects when catcache and relcache entries
3287  *              need to be invalidated (due to catalog updates) also arranges to
3288  *              unlink the initialization file when its contents may be out of date.
3289  *              The file will then be rebuilt during the next backend startup.
3290  */
3291
3292 /*
3293  * load_relcache_init_file -- attempt to load cache from the init file
3294  *
3295  * If successful, return TRUE and set criticalRelcachesBuilt to true.
3296  * If not successful, return FALSE.
3297  *
3298  * NOTE: we assume we are already switched into CacheMemoryContext.
3299  */
3300 static bool
3301 load_relcache_init_file(void)
3302 {
3303         FILE       *fp;
3304         char            initfilename[MAXPGPATH];
3305         Relation   *rels;
3306         int                     relno,
3307                                 num_rels,
3308                                 max_rels,
3309                                 nailed_rels,
3310                                 nailed_indexes,
3311                                 magic;
3312         int                     i;
3313
3314         snprintf(initfilename, sizeof(initfilename), "%s/%s",
3315                          DatabasePath, RELCACHE_INIT_FILENAME);
3316
3317         fp = AllocateFile(initfilename, PG_BINARY_R);
3318         if (fp == NULL)
3319                 return false;
3320
3321         /*
3322          * Read the index relcache entries from the file.  Note we will not enter
3323          * any of them into the cache if the read fails partway through; this
3324          * helps to guard against broken init files.
3325          */
3326         max_rels = 100;
3327         rels = (Relation *) palloc(max_rels * sizeof(Relation));
3328         num_rels = 0;
3329         nailed_rels = nailed_indexes = 0;
3330         initFileRelationIds = NIL;
3331
3332         /* check for correct magic number (compatible version) */
3333         if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
3334                 goto read_failed;
3335         if (magic != RELCACHE_INIT_FILEMAGIC)
3336                 goto read_failed;
3337
3338         for (relno = 0;; relno++)
3339         {
3340                 Size            len;
3341                 size_t          nread;
3342                 Relation        rel;
3343                 Form_pg_class relform;
3344                 bool            has_not_null;
3345
3346                 /* first read the relation descriptor length */
3347                 if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3348                 {
3349                         if (nread == 0)
3350                                 break;                  /* end of file */
3351                         goto read_failed;
3352                 }
3353
3354                 /* safety check for incompatible relcache layout */
3355                 if (len != sizeof(RelationData))
3356                         goto read_failed;
3357
3358                 /* allocate another relcache header */
3359                 if (num_rels >= max_rels)
3360                 {
3361                         max_rels *= 2;
3362                         rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
3363                 }
3364
3365                 rel = rels[num_rels++] = (Relation) palloc(len);
3366
3367                 /* then, read the Relation structure */
3368                 if ((nread = fread(rel, 1, len, fp)) != len)
3369                         goto read_failed;
3370
3371                 /* next read the relation tuple form */
3372                 if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3373                         goto read_failed;
3374
3375                 relform = (Form_pg_class) palloc(len);
3376                 if ((nread = fread(relform, 1, len, fp)) != len)
3377                         goto read_failed;
3378
3379                 rel->rd_rel = relform;
3380
3381                 /* initialize attribute tuple forms */
3382                 rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
3383                                                                                           relform->relhasoids);
3384                 rel->rd_att->tdrefcount = 1;    /* mark as refcounted */
3385
3386                 rel->rd_att->tdtypeid = relform->reltype;
3387                 rel->rd_att->tdtypmod = -1;             /* unnecessary, but... */
3388
3389                 /* next read all the attribute tuple form data entries */
3390                 has_not_null = false;
3391                 for (i = 0; i < relform->relnatts; i++)
3392                 {
3393                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3394                                 goto read_failed;
3395                         if (len != ATTRIBUTE_TUPLE_SIZE)
3396                                 goto read_failed;
3397                         if ((nread = fread(rel->rd_att->attrs[i], 1, len, fp)) != len)
3398                                 goto read_failed;
3399
3400                         has_not_null |= rel->rd_att->attrs[i]->attnotnull;
3401                 }
3402
3403                 /* next read the access method specific field */
3404                 if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3405                         goto read_failed;
3406                 if (len > 0)
3407                 {
3408                         rel->rd_options = palloc(len);
3409                         if ((nread = fread(rel->rd_options, 1, len, fp)) != len)
3410                                 goto read_failed;
3411                         if (len != VARSIZE(rel->rd_options))
3412                                 goto read_failed;               /* sanity check */
3413                 }
3414                 else
3415                 {
3416                         rel->rd_options = NULL;
3417                 }
3418
3419                 /* mark not-null status */
3420                 if (has_not_null)
3421                 {
3422                         TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3423
3424                         constr->has_not_null = true;
3425                         rel->rd_att->constr = constr;
3426                 }
3427
3428                 /* If it's an index, there's more to do */
3429                 if (rel->rd_rel->relkind == RELKIND_INDEX)
3430                 {
3431                         Form_pg_am      am;
3432                         MemoryContext indexcxt;
3433                         Oid                *opfamily;
3434                         Oid                *opcintype;
3435                         Oid                *operator;
3436                         RegProcedure *support;
3437                         int                     nsupport;
3438                         int16      *indoption;
3439
3440                         /* Count nailed indexes to ensure we have 'em all */
3441                         if (rel->rd_isnailed)
3442                                 nailed_indexes++;
3443
3444                         /* next, read the pg_index tuple */
3445                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3446                                 goto read_failed;
3447
3448                         rel->rd_indextuple = (HeapTuple) palloc(len);
3449                         if ((nread = fread(rel->rd_indextuple, 1, len, fp)) != len)
3450                                 goto read_failed;
3451
3452                         /* Fix up internal pointers in the tuple -- see heap_copytuple */
3453                         rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
3454                         rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple);
3455
3456                         /* next, read the access method tuple form */
3457                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3458                                 goto read_failed;
3459
3460                         am = (Form_pg_am) palloc(len);
3461                         if ((nread = fread(am, 1, len, fp)) != len)
3462                                 goto read_failed;
3463                         rel->rd_am = am;
3464
3465                         /*
3466                          * prepare index info context --- parameters should match
3467                          * RelationInitIndexAccessInfo
3468                          */
3469                         indexcxt = AllocSetContextCreate(CacheMemoryContext,
3470                                                                                          RelationGetRelationName(rel),
3471                                                                                          ALLOCSET_SMALL_MINSIZE,
3472                                                                                          ALLOCSET_SMALL_INITSIZE,
3473                                                                                          ALLOCSET_SMALL_MAXSIZE);
3474                         rel->rd_indexcxt = indexcxt;
3475
3476                         /* next, read the vector of opfamily OIDs */
3477                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3478                                 goto read_failed;
3479
3480                         opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
3481                         if ((nread = fread(opfamily, 1, len, fp)) != len)
3482                                 goto read_failed;
3483
3484                         rel->rd_opfamily = opfamily;
3485
3486                         /* next, read the vector of opcintype OIDs */
3487                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3488                                 goto read_failed;
3489
3490                         opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
3491                         if ((nread = fread(opcintype, 1, len, fp)) != len)
3492                                 goto read_failed;
3493
3494                         rel->rd_opcintype = opcintype;
3495
3496                         /* next, read the vector of operator OIDs */
3497                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3498                                 goto read_failed;
3499
3500                         operator = (Oid *) MemoryContextAlloc(indexcxt, len);
3501                         if ((nread = fread(operator, 1, len, fp)) != len)
3502                                 goto read_failed;
3503
3504                         rel->rd_operator = operator;
3505
3506                         /* next, read the vector of support procedures */
3507                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3508                                 goto read_failed;
3509                         support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
3510                         if ((nread = fread(support, 1, len, fp)) != len)
3511                                 goto read_failed;
3512
3513                         rel->rd_support = support;
3514
3515                         /* finally, read the vector of indoption values */
3516                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3517                                 goto read_failed;
3518
3519                         indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
3520                         if ((nread = fread(indoption, 1, len, fp)) != len)
3521                                 goto read_failed;
3522
3523                         rel->rd_indoption = indoption;
3524
3525                         /* set up zeroed fmgr-info vectors */
3526                         rel->rd_aminfo = (RelationAmInfo *)
3527                                 MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
3528                         nsupport = relform->relnatts * am->amsupport;
3529                         rel->rd_supportinfo = (FmgrInfo *)
3530                                 MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
3531                 }
3532                 else
3533                 {
3534                         /* Count nailed rels to ensure we have 'em all */
3535                         if (rel->rd_isnailed)
3536                                 nailed_rels++;
3537
3538                         Assert(rel->rd_index == NULL);
3539                         Assert(rel->rd_indextuple == NULL);
3540                         Assert(rel->rd_am == NULL);
3541                         Assert(rel->rd_indexcxt == NULL);
3542                         Assert(rel->rd_aminfo == NULL);
3543                         Assert(rel->rd_opfamily == NULL);
3544                         Assert(rel->rd_opcintype == NULL);
3545                         Assert(rel->rd_operator == NULL);
3546                         Assert(rel->rd_support == NULL);
3547                         Assert(rel->rd_supportinfo == NULL);
3548                         Assert(rel->rd_indoption == NULL);
3549                 }
3550
3551                 /*
3552                  * Rules and triggers are not saved (mainly because the internal
3553                  * format is complex and subject to change).  They must be rebuilt if
3554                  * needed by RelationCacheInitializePhase2.  This is not expected to
3555                  * be a big performance hit since few system catalogs have such. Ditto
3556                  * for index expressions and predicates.
3557                  */
3558                 rel->rd_rules = NULL;
3559                 rel->rd_rulescxt = NULL;
3560                 rel->trigdesc = NULL;
3561                 rel->rd_indexprs = NIL;
3562                 rel->rd_indpred = NIL;
3563
3564                 /*
3565                  * Reset transient-state fields in the relcache entry
3566                  */
3567                 rel->rd_smgr = NULL;
3568                 rel->rd_targblock = InvalidBlockNumber;
3569                 if (rel->rd_isnailed)
3570                         rel->rd_refcnt = 1;
3571                 else
3572                         rel->rd_refcnt = 0;
3573                 rel->rd_indexvalid = 0;
3574                 rel->rd_indexlist = NIL;
3575                 rel->rd_indexattr = NULL;
3576                 rel->rd_oidindex = InvalidOid;
3577                 rel->rd_createSubid = InvalidSubTransactionId;
3578                 rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3579                 rel->rd_amcache = NULL;
3580                 MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
3581
3582                 /*
3583                  * Recompute lock and physical addressing info.  This is needed in
3584                  * case the pg_internal.init file was copied from some other database
3585                  * by CREATE DATABASE.
3586                  */
3587                 RelationInitLockInfo(rel);
3588                 RelationInitPhysicalAddr(rel);
3589         }
3590
3591         /*
3592          * We reached the end of the init file without apparent problem. Did we
3593          * get the right number of nailed items?  (This is a useful crosscheck in
3594          * case the set of critical rels or indexes changes.)
3595          */
3596         if (nailed_rels != NUM_CRITICAL_RELS ||
3597                 nailed_indexes != NUM_CRITICAL_INDEXES)
3598                 goto read_failed;
3599
3600         /*
3601          * OK, all appears well.
3602          *
3603          * Now insert all the new relcache entries into the cache.
3604          */
3605         for (relno = 0; relno < num_rels; relno++)
3606         {
3607                 RelationCacheInsert(rels[relno]);
3608                 /* also make a list of their OIDs, for RelationIdIsInInitFile */
3609                 initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
3610                                                                                 initFileRelationIds);
3611         }
3612
3613         pfree(rels);
3614         FreeFile(fp);
3615
3616         criticalRelcachesBuilt = true;
3617         return true;
3618
3619         /*
3620          * init file is broken, so do it the hard way.  We don't bother trying to
3621          * free the clutter we just allocated; it's not in the relcache so it
3622          * won't hurt.
3623          */
3624 read_failed:
3625         pfree(rels);
3626         FreeFile(fp);
3627
3628         return false;
3629 }
3630
3631 /*
3632  * Write out a new initialization file with the current contents
3633  * of the relcache.
3634  */
3635 static void
3636 write_relcache_init_file(void)
3637 {
3638         FILE       *fp;
3639         char            tempfilename[MAXPGPATH];
3640         char            finalfilename[MAXPGPATH];
3641         int                     magic;
3642         HASH_SEQ_STATUS status;
3643         RelIdCacheEnt *idhentry;
3644         MemoryContext oldcxt;
3645         int                     i;
3646
3647         /*
3648          * We must write a temporary file and rename it into place. Otherwise,
3649          * another backend starting at about the same time might crash trying to
3650          * read the partially-complete file.
3651          */
3652         snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
3653                          DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
3654         snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
3655                          DatabasePath, RELCACHE_INIT_FILENAME);
3656
3657         unlink(tempfilename);           /* in case it exists w/wrong permissions */
3658
3659         fp = AllocateFile(tempfilename, PG_BINARY_W);
3660         if (fp == NULL)
3661         {
3662                 /*
3663                  * We used to consider this a fatal error, but we might as well
3664                  * continue with backend startup ...
3665                  */
3666                 ereport(WARNING,
3667                                 (errcode_for_file_access(),
3668                                  errmsg("could not create relation-cache initialization file \"%s\": %m",
3669                                                 tempfilename),
3670                           errdetail("Continuing anyway, but there's something wrong.")));
3671                 return;
3672         }
3673
3674         /*
3675          * Write a magic number to serve as a file version identifier.  We can
3676          * change the magic number whenever the relcache layout changes.
3677          */
3678         magic = RELCACHE_INIT_FILEMAGIC;
3679         if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
3680                 elog(FATAL, "could not write init file");
3681
3682         /*
3683          * Write all the reldescs (in no particular order).
3684          */
3685         hash_seq_init(&status, RelationIdCache);
3686
3687         initFileRelationIds = NIL;
3688
3689         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3690         {
3691                 Relation        rel = idhentry->reldesc;
3692                 Form_pg_class relform = rel->rd_rel;
3693
3694                 /* first write the relcache entry proper */
3695                 write_item(rel, sizeof(RelationData), fp);
3696
3697                 /* next write the relation tuple form */
3698                 write_item(relform, CLASS_TUPLE_SIZE, fp);
3699
3700                 /* next, do all the attribute tuple form data entries */
3701                 for (i = 0; i < relform->relnatts; i++)
3702                 {
3703                         write_item(rel->rd_att->attrs[i], ATTRIBUTE_TUPLE_SIZE, fp);
3704                 }
3705
3706                 /* next, do the access method specific field */
3707                 write_item(rel->rd_options,
3708                                    (rel->rd_options ? VARSIZE(rel->rd_options) : 0),
3709                                    fp);
3710
3711                 /* If it's an index, there's more to do */
3712                 if (rel->rd_rel->relkind == RELKIND_INDEX)
3713                 {
3714                         Form_pg_am      am = rel->rd_am;
3715
3716                         /* write the pg_index tuple */
3717                         /* we assume this was created by heap_copytuple! */
3718                         write_item(rel->rd_indextuple,
3719                                            HEAPTUPLESIZE + rel->rd_indextuple->t_len,
3720                                            fp);
3721
3722                         /* next, write the access method tuple form */
3723                         write_item(am, sizeof(FormData_pg_am), fp);
3724
3725                         /* next, write the vector of opfamily OIDs */
3726                         write_item(rel->rd_opfamily,
3727                                            relform->relnatts * sizeof(Oid),
3728                                            fp);
3729
3730                         /* next, write the vector of opcintype OIDs */
3731                         write_item(rel->rd_opcintype,
3732                                            relform->relnatts * sizeof(Oid),
3733                                            fp);
3734
3735                         /* next, write the vector of operator OIDs */
3736                         write_item(rel->rd_operator,
3737                                            relform->relnatts * (am->amstrategies * sizeof(Oid)),
3738                                            fp);
3739
3740                         /* next, write the vector of support procedures */
3741                         write_item(rel->rd_support,
3742                                   relform->relnatts * (am->amsupport * sizeof(RegProcedure)),
3743                                            fp);
3744
3745                         /* finally, write the vector of indoption values */
3746                         write_item(rel->rd_indoption,
3747                                            relform->relnatts * sizeof(int16),
3748                                            fp);
3749                 }
3750
3751                 /* also make a list of their OIDs, for RelationIdIsInInitFile */
3752                 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3753                 initFileRelationIds = lcons_oid(RelationGetRelid(rel),
3754                                                                                 initFileRelationIds);
3755                 MemoryContextSwitchTo(oldcxt);
3756         }
3757
3758         if (FreeFile(fp))
3759                 elog(FATAL, "could not write init file");
3760
3761         /*
3762          * Now we have to check whether the data we've so painstakingly
3763          * accumulated is already obsolete due to someone else's just-committed
3764          * catalog changes.  If so, we just delete the temp file and leave it to
3765          * the next backend to try again.  (Our own relcache entries will be
3766          * updated by SI message processing, but we can't be sure whether what we
3767          * wrote out was up-to-date.)
3768          *
3769          * This mustn't run concurrently with RelationCacheInitFileInvalidate, so
3770          * grab a serialization lock for the duration.
3771          */
3772         LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
3773
3774         /* Make sure we have seen all incoming SI messages */
3775         AcceptInvalidationMessages();
3776
3777         /*
3778          * If we have received any SI relcache invals since backend start, assume
3779          * we may have written out-of-date data.
3780          */
3781         if (relcacheInvalsReceived == 0L)
3782         {
3783                 /*
3784                  * OK, rename the temp file to its final name, deleting any
3785                  * previously-existing init file.
3786                  *
3787                  * Note: a failure here is possible under Cygwin, if some other
3788                  * backend is holding open an unlinked-but-not-yet-gone init file. So
3789                  * treat this as a noncritical failure; just remove the useless temp
3790                  * file on failure.
3791                  */
3792                 if (rename(tempfilename, finalfilename) < 0)
3793                         unlink(tempfilename);
3794         }
3795         else
3796         {
3797                 /* Delete the already-obsolete temp file */
3798                 unlink(tempfilename);
3799         }
3800
3801         LWLockRelease(RelCacheInitLock);
3802 }
3803
3804 /* write a chunk of data preceded by its length */
3805 static void
3806 write_item(const void *data, Size len, FILE *fp)
3807 {
3808         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3809                 elog(FATAL, "could not write init file");
3810         if (fwrite(data, 1, len, fp) != len)
3811                 elog(FATAL, "could not write init file");
3812 }
3813
3814 /*
3815  * Detect whether a given relation (identified by OID) is one of the ones
3816  * we store in the init file.
3817  *
3818  * Note that we effectively assume that all backends running in a database
3819  * would choose to store the same set of relations in the init file;
3820  * otherwise there are cases where we'd fail to detect the need for an init
3821  * file invalidation.  This does not seem likely to be a problem in practice.
3822  */
3823 bool
3824 RelationIdIsInInitFile(Oid relationId)
3825 {
3826         return list_member_oid(initFileRelationIds, relationId);
3827 }
3828
3829 /*
3830  * Invalidate (remove) the init file during commit of a transaction that
3831  * changed one or more of the relation cache entries that are kept in the
3832  * init file.
3833  *
3834  * We actually need to remove the init file twice: once just before sending
3835  * the SI messages that include relcache inval for such relations, and once
3836  * just after sending them.  The unlink before ensures that a backend that's
3837  * currently starting cannot read the now-obsolete init file and then miss
3838  * the SI messages that will force it to update its relcache entries.  (This
3839  * works because the backend startup sequence gets into the PGPROC array before
3840  * trying to load the init file.)  The unlink after is to synchronize with a
3841  * backend that may currently be trying to write an init file based on data
3842  * that we've just rendered invalid.  Such a backend will see the SI messages,
3843  * but we can't leave the init file sitting around to fool later backends.
3844  *
3845  * Ignore any failure to unlink the file, since it might not be there if
3846  * no backend has been started since the last removal.
3847  */
3848 void
3849 RelationCacheInitFileInvalidate(bool beforeSend)
3850 {
3851         char            initfilename[MAXPGPATH];
3852
3853         snprintf(initfilename, sizeof(initfilename), "%s/%s",
3854                          DatabasePath, RELCACHE_INIT_FILENAME);
3855
3856         if (beforeSend)
3857         {
3858                 /* no interlock needed here */
3859                 unlink(initfilename);
3860         }
3861         else
3862         {
3863                 /*
3864                  * We need to interlock this against write_relcache_init_file, to
3865                  * guard against possibility that someone renames a new-but-
3866                  * already-obsolete init file into place just after we unlink. With
3867                  * the interlock, it's certain that write_relcache_init_file will
3868                  * notice our SI inval message before renaming into place, or else
3869                  * that we will execute second and successfully unlink the file.
3870                  */
3871                 LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
3872                 unlink(initfilename);
3873                 LWLockRelease(RelCacheInitLock);
3874         }
3875 }
3876
3877 /*
3878  * Remove the init file for a given database during postmaster startup.
3879  *
3880  * We used to keep the init file across restarts, but that is unsafe in PITR
3881  * scenarios, and even in simple crash-recovery cases there are windows for
3882  * the init file to become out-of-sync with the database.  So now we just
3883  * remove it during startup and expect the first backend launch to rebuild it.
3884  * Of course, this has to happen in each database of the cluster.  For
3885  * simplicity this is driven by flatfiles.c, which has to scan pg_database
3886  * anyway.
3887  */
3888 void
3889 RelationCacheInitFileRemove(const char *dbPath)
3890 {
3891         char            initfilename[MAXPGPATH];
3892
3893         snprintf(initfilename, sizeof(initfilename), "%s/%s",
3894                          dbPath, RELCACHE_INIT_FILENAME);
3895         unlink(initfilename);
3896         /* ignore any error, since it might not be there at all */
3897 }