]> granicus.if.org Git - postgresql/blob - src/backend/utils/cache/relcache.c
Fix up pgstats counting of live and dead tuples to recognize that committed
[postgresql] / src / backend / utils / cache / relcache.c
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  *        POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.261 2007/05/27 03:50:39 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *              RelationCacheInitialize                 - initialize relcache (to empty)
18  *              RelationCacheInitializePhase2   - finish initializing relcache
19  *              RelationIdGetRelation                   - get a reldesc by relation id
20  *              RelationClose                                   - close an open relation
21  *
22  * NOTES
23  *              The following code contains many undocumented hacks.  Please be
24  *              careful....
25  */
26 #include "postgres.h"
27
28 #include <sys/file.h>
29 #include <fcntl.h>
30 #include <unistd.h>
31
32 #include "access/genam.h"
33 #include "access/heapam.h"
34 #include "access/reloptions.h"
35 #include "access/xact.h"
36 #include "catalog/catalog.h"
37 #include "catalog/indexing.h"
38 #include "catalog/namespace.h"
39 #include "catalog/pg_amop.h"
40 #include "catalog/pg_amproc.h"
41 #include "catalog/pg_attrdef.h"
42 #include "catalog/pg_authid.h"
43 #include "catalog/pg_constraint.h"
44 #include "catalog/pg_namespace.h"
45 #include "catalog/pg_opclass.h"
46 #include "catalog/pg_proc.h"
47 #include "catalog/pg_rewrite.h"
48 #include "catalog/pg_type.h"
49 #include "commands/trigger.h"
50 #include "miscadmin.h"
51 #include "optimizer/clauses.h"
52 #include "optimizer/planmain.h"
53 #include "optimizer/prep.h"
54 #include "rewrite/rewriteDefine.h"
55 #include "storage/fd.h"
56 #include "storage/smgr.h"
57 #include "utils/builtins.h"
58 #include "utils/fmgroids.h"
59 #include "utils/inval.h"
60 #include "utils/memutils.h"
61 #include "utils/relcache.h"
62 #include "utils/resowner.h"
63 #include "utils/syscache.h"
64 #include "utils/typcache.h"
65
66
67 /*
68  * name of relcache init file, used to speed up backend startup
69  */
70 #define RELCACHE_INIT_FILENAME  "pg_internal.init"
71
72 #define RELCACHE_INIT_FILEMAGIC         0x573264        /* version ID value */
73
74 /*
75  *              hardcoded tuple descriptors.  see include/catalog/pg_attribute.h
76  */
77 static FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
78 static FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
79 static FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
80 static FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
81 static FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
82
83 /*
84  *              Hash tables that index the relation cache
85  *
86  *              We used to index the cache by both name and OID, but now there
87  *              is only an index by OID.
88  */
89 typedef struct relidcacheent
90 {
91         Oid                     reloid;
92         Relation        reldesc;
93 } RelIdCacheEnt;
94
95 static HTAB *RelationIdCache;
96
97 /*
98  * This flag is false until we have prepared the critical relcache entries
99  * that are needed to do indexscans on the tables read by relcache building.
100  */
101 bool            criticalRelcachesBuilt = false;
102
103 /*
104  * This counter counts relcache inval events received since backend startup
105  * (but only for rels that are actually in cache).      Presently, we use it only
106  * to detect whether data about to be written by write_relcache_init_file()
107  * might already be obsolete.
108  */
109 static long relcacheInvalsReceived = 0L;
110
111 /*
112  * This list remembers the OIDs of the relations cached in the relcache
113  * init file.
114  */
115 static List *initFileRelationIds = NIL;
116
117 /*
118  * This flag lets us optimize away work in AtEO(Sub)Xact_RelationCache().
119  */
120 static bool need_eoxact_work = false;
121
122
123 /*
124  *              macros to manipulate the lookup hashtables
125  */
126 #define RelationCacheInsert(RELATION)   \
127 do { \
128         RelIdCacheEnt *idhentry; bool found; \
129         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
130                                                                                    (void *) &(RELATION->rd_id), \
131                                                                                    HASH_ENTER, \
132                                                                                    &found); \
133         /* used to give notice if found -- now just keep quiet */ \
134         idhentry->reldesc = RELATION; \
135 } while(0)
136
137 #define RelationIdCacheLookup(ID, RELATION) \
138 do { \
139         RelIdCacheEnt *hentry; \
140         hentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
141                                                                                  (void *) &(ID), HASH_FIND,NULL); \
142         if (hentry) \
143                 RELATION = hentry->reldesc; \
144         else \
145                 RELATION = NULL; \
146 } while(0)
147
148 #define RelationCacheDelete(RELATION) \
149 do { \
150         RelIdCacheEnt *idhentry; \
151         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
152                                                                                    (void *) &(RELATION->rd_id), \
153                                                                                    HASH_REMOVE, NULL); \
154         if (idhentry == NULL) \
155                 elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \
156 } while(0)
157
158
159 /*
160  * Special cache for opclass-related information
161  *
162  * Note: only default operators and support procs get cached, ie, those with
163  * lefttype = righttype = opcintype.
164  */
165 typedef struct opclasscacheent
166 {
167         Oid                     opclassoid;             /* lookup key: OID of opclass */
168         bool            valid;                  /* set TRUE after successful fill-in */
169         StrategyNumber numStrats;       /* max # of strategies (from pg_am) */
170         StrategyNumber numSupport;      /* max # of support procs (from pg_am) */
171         Oid                     opcfamily;              /* OID of opclass's family */
172         Oid                     opcintype;              /* OID of opclass's declared input type */
173         Oid                *operatorOids;       /* strategy operators' OIDs */
174         RegProcedure *supportProcs; /* support procs */
175 } OpClassCacheEnt;
176
177 static HTAB *OpClassCache = NULL;
178
179
180 /* non-export function prototypes */
181
182 static void RelationClearRelation(Relation relation, bool rebuild);
183
184 static void RelationReloadIndexInfo(Relation relation);
185 static void RelationFlushRelation(Relation relation);
186 static bool load_relcache_init_file(void);
187 static void write_relcache_init_file(void);
188 static void write_item(const void *data, Size len, FILE *fp);
189
190 static void formrdesc(const char *relationName, Oid relationReltype,
191                   bool hasoids, int natts, FormData_pg_attribute *att);
192
193 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
194 static Relation AllocateRelationDesc(Relation relation, Form_pg_class relp);
195 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
196 static void RelationBuildTupleDesc(Relation relation);
197 static Relation RelationBuildDesc(Oid targetRelId, Relation oldrelation);
198 static void RelationInitPhysicalAddr(Relation relation);
199 static TupleDesc GetPgClassDescriptor(void);
200 static TupleDesc GetPgIndexDescriptor(void);
201 static void AttrDefaultFetch(Relation relation);
202 static void CheckConstraintFetch(Relation relation);
203 static List *insert_ordered_oid(List *list, Oid datum);
204 static void IndexSupportInitialize(oidvector *indclass,
205                                            Oid *indexOperator,
206                                            RegProcedure *indexSupport,
207                                            Oid *opFamily,
208                                            Oid *opcInType,
209                                            StrategyNumber maxStrategyNumber,
210                                            StrategyNumber maxSupportNumber,
211                                            AttrNumber maxAttributeNumber);
212 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
213                                   StrategyNumber numStrats,
214                                   StrategyNumber numSupport);
215
216
217 /*
218  *              ScanPgRelation
219  *
220  *              this is used by RelationBuildDesc to find a pg_class
221  *              tuple matching targetRelId.
222  *
223  *              NB: the returned tuple has been copied into palloc'd storage
224  *              and must eventually be freed with heap_freetuple.
225  */
226 static HeapTuple
227 ScanPgRelation(Oid targetRelId, bool indexOK)
228 {
229         HeapTuple       pg_class_tuple;
230         Relation        pg_class_desc;
231         SysScanDesc pg_class_scan;
232         ScanKeyData key[1];
233
234         /*
235          * form a scan key
236          */
237         ScanKeyInit(&key[0],
238                                 ObjectIdAttributeNumber,
239                                 BTEqualStrategyNumber, F_OIDEQ,
240                                 ObjectIdGetDatum(targetRelId));
241
242         /*
243          * Open pg_class and fetch a tuple.  Force heap scan if we haven't yet
244          * built the critical relcache entries (this includes initdb and startup
245          * without a pg_internal.init file).  The caller can also force a heap
246          * scan by setting indexOK == false.
247          */
248         pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
249         pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
250                                                                            indexOK && criticalRelcachesBuilt,
251                                                                            SnapshotNow,
252                                                                            1, key);
253
254         pg_class_tuple = systable_getnext(pg_class_scan);
255
256         /*
257          * Must copy tuple before releasing buffer.
258          */
259         if (HeapTupleIsValid(pg_class_tuple))
260                 pg_class_tuple = heap_copytuple(pg_class_tuple);
261
262         /* all done */
263         systable_endscan(pg_class_scan);
264         heap_close(pg_class_desc, AccessShareLock);
265
266         return pg_class_tuple;
267 }
268
269 /*
270  *              AllocateRelationDesc
271  *
272  *              This is used to allocate memory for a new relation descriptor
273  *              and initialize the rd_rel field.
274  *
275  *              If 'relation' is NULL, allocate a new RelationData object.
276  *              If not, reuse the given object (that path is taken only when
277  *              we have to rebuild a relcache entry during RelationClearRelation).
278  */
279 static Relation
280 AllocateRelationDesc(Relation relation, Form_pg_class relp)
281 {
282         MemoryContext oldcxt;
283         Form_pg_class relationForm;
284
285         /* Relcache entries must live in CacheMemoryContext */
286         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
287
288         /*
289          * allocate space for new relation descriptor, if needed
290          */
291         if (relation == NULL)
292                 relation = (Relation) palloc(sizeof(RelationData));
293
294         /*
295          * clear all fields of reldesc
296          */
297         MemSet(relation, 0, sizeof(RelationData));
298         relation->rd_targblock = InvalidBlockNumber;
299
300         /* make sure relation is marked as having no open file yet */
301         relation->rd_smgr = NULL;
302
303         /*
304          * Copy the relation tuple form
305          *
306          * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
307          * variable-length fields (relacl, reloptions) are NOT stored in the
308          * relcache --- there'd be little point in it, since we don't copy the
309          * tuple's nulls bitmap and hence wouldn't know if the values are valid.
310          * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
311          * it from the syscache if you need it.  The same goes for the original
312          * form of reloptions (however, we do store the parsed form of reloptions
313          * in rd_options).
314          */
315         relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
316
317         memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
318
319         /* initialize relation tuple form */
320         relation->rd_rel = relationForm;
321
322         /* and allocate attribute tuple form storage */
323         relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
324                                                                                            relationForm->relhasoids);
325         /* which we mark as a reference-counted tupdesc */
326         relation->rd_att->tdrefcount = 1;
327
328         MemoryContextSwitchTo(oldcxt);
329
330         return relation;
331 }
332
333 /*
334  * RelationParseRelOptions
335  *              Convert pg_class.reloptions into pre-parsed rd_options
336  *
337  * tuple is the real pg_class tuple (not rd_rel!) for relation
338  *
339  * Note: rd_rel and (if an index) rd_am must be valid already
340  */
341 static void
342 RelationParseRelOptions(Relation relation, HeapTuple tuple)
343 {
344         Datum           datum;
345         bool            isnull;
346         bytea      *options;
347
348         relation->rd_options = NULL;
349
350         /* Fall out if relkind should not have options */
351         switch (relation->rd_rel->relkind)
352         {
353                 case RELKIND_RELATION:
354                 case RELKIND_TOASTVALUE:
355                 case RELKIND_UNCATALOGED:
356                 case RELKIND_INDEX:
357                         break;
358                 default:
359                         return;
360         }
361
362         /*
363          * Fetch reloptions from tuple; have to use a hardwired descriptor because
364          * we might not have any other for pg_class yet (consider executing this
365          * code for pg_class itself)
366          */
367         datum = fastgetattr(tuple,
368                                                 Anum_pg_class_reloptions,
369                                                 GetPgClassDescriptor(),
370                                                 &isnull);
371         if (isnull)
372                 return;
373
374         /* Parse into appropriate format; don't error out here */
375         switch (relation->rd_rel->relkind)
376         {
377                 case RELKIND_RELATION:
378                 case RELKIND_TOASTVALUE:
379                 case RELKIND_UNCATALOGED:
380                         options = heap_reloptions(relation->rd_rel->relkind, datum,
381                                                                           false);
382                         break;
383                 case RELKIND_INDEX:
384                         options = index_reloptions(relation->rd_am->amoptions, datum,
385                                                                            false);
386                         break;
387                 default:
388                         Assert(false);          /* can't get here */
389                         options = NULL;         /* keep compiler quiet */
390                         break;
391         }
392
393         /* Copy parsed data into CacheMemoryContext */
394         if (options)
395         {
396                 relation->rd_options = MemoryContextAlloc(CacheMemoryContext,
397                                                                                                   VARSIZE(options));
398                 memcpy(relation->rd_options, options, VARSIZE(options));
399         }
400 }
401
402 /*
403  *              RelationBuildTupleDesc
404  *
405  *              Form the relation's tuple descriptor from information in
406  *              the pg_attribute, pg_attrdef & pg_constraint system catalogs.
407  */
408 static void
409 RelationBuildTupleDesc(Relation relation)
410 {
411         HeapTuple       pg_attribute_tuple;
412         Relation        pg_attribute_desc;
413         SysScanDesc pg_attribute_scan;
414         ScanKeyData skey[2];
415         int                     need;
416         TupleConstr *constr;
417         AttrDefault *attrdef = NULL;
418         int                     ndef = 0;
419
420         /* copy some fields from pg_class row to rd_att */
421         relation->rd_att->tdtypeid = relation->rd_rel->reltype;
422         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
423         relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
424
425         constr = (TupleConstr *) MemoryContextAlloc(CacheMemoryContext,
426                                                                                                 sizeof(TupleConstr));
427         constr->has_not_null = false;
428
429         /*
430          * Form a scan key that selects only user attributes (attnum > 0).
431          * (Eliminating system attribute rows at the index level is lots faster
432          * than fetching them.)
433          */
434         ScanKeyInit(&skey[0],
435                                 Anum_pg_attribute_attrelid,
436                                 BTEqualStrategyNumber, F_OIDEQ,
437                                 ObjectIdGetDatum(RelationGetRelid(relation)));
438         ScanKeyInit(&skey[1],
439                                 Anum_pg_attribute_attnum,
440                                 BTGreaterStrategyNumber, F_INT2GT,
441                                 Int16GetDatum(0));
442
443         /*
444          * Open pg_attribute and begin a scan.  Force heap scan if we haven't yet
445          * built the critical relcache entries (this includes initdb and startup
446          * without a pg_internal.init file).
447          */
448         pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
449         pg_attribute_scan = systable_beginscan(pg_attribute_desc,
450                                                                                    AttributeRelidNumIndexId,
451                                                                                    criticalRelcachesBuilt,
452                                                                                    SnapshotNow,
453                                                                                    2, skey);
454
455         /*
456          * add attribute data to relation->rd_att
457          */
458         need = relation->rd_rel->relnatts;
459
460         while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
461         {
462                 Form_pg_attribute attp;
463
464                 attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
465
466                 if (attp->attnum <= 0 ||
467                         attp->attnum > relation->rd_rel->relnatts)
468                         elog(ERROR, "invalid attribute number %d for %s",
469                                  attp->attnum, RelationGetRelationName(relation));
470
471                 memcpy(relation->rd_att->attrs[attp->attnum - 1],
472                            attp,
473                            ATTRIBUTE_TUPLE_SIZE);
474
475                 /* Update constraint/default info */
476                 if (attp->attnotnull)
477                         constr->has_not_null = true;
478
479                 if (attp->atthasdef)
480                 {
481                         if (attrdef == NULL)
482                                 attrdef = (AttrDefault *)
483                                         MemoryContextAllocZero(CacheMemoryContext,
484                                                                                    relation->rd_rel->relnatts *
485                                                                                    sizeof(AttrDefault));
486                         attrdef[ndef].adnum = attp->attnum;
487                         attrdef[ndef].adbin = NULL;
488                         ndef++;
489                 }
490                 need--;
491                 if (need == 0)
492                         break;
493         }
494
495         /*
496          * end the scan and close the attribute relation
497          */
498         systable_endscan(pg_attribute_scan);
499         heap_close(pg_attribute_desc, AccessShareLock);
500
501         if (need != 0)
502                 elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
503                          need, RelationGetRelid(relation));
504
505         /*
506          * The attcacheoff values we read from pg_attribute should all be -1
507          * ("unknown").  Verify this if assert checking is on.  They will be
508          * computed when and if needed during tuple access.
509          */
510 #ifdef USE_ASSERT_CHECKING
511         {
512                 int                     i;
513
514                 for (i = 0; i < relation->rd_rel->relnatts; i++)
515                         Assert(relation->rd_att->attrs[i]->attcacheoff == -1);
516         }
517 #endif
518
519         /*
520          * However, we can easily set the attcacheoff value for the first
521          * attribute: it must be zero.  This eliminates the need for special cases
522          * for attnum=1 that used to exist in fastgetattr() and index_getattr().
523          */
524         if (relation->rd_rel->relnatts > 0)
525                 relation->rd_att->attrs[0]->attcacheoff = 0;
526
527         /*
528          * Set up constraint/default info
529          */
530         if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
531         {
532                 relation->rd_att->constr = constr;
533
534                 if (ndef > 0)                   /* DEFAULTs */
535                 {
536                         if (ndef < relation->rd_rel->relnatts)
537                                 constr->defval = (AttrDefault *)
538                                         repalloc(attrdef, ndef * sizeof(AttrDefault));
539                         else
540                                 constr->defval = attrdef;
541                         constr->num_defval = ndef;
542                         AttrDefaultFetch(relation);
543                 }
544                 else
545                         constr->num_defval = 0;
546
547                 if (relation->rd_rel->relchecks > 0)    /* CHECKs */
548                 {
549                         constr->num_check = relation->rd_rel->relchecks;
550                         constr->check = (ConstrCheck *)
551                                 MemoryContextAllocZero(CacheMemoryContext,
552                                                                         constr->num_check * sizeof(ConstrCheck));
553                         CheckConstraintFetch(relation);
554                 }
555                 else
556                         constr->num_check = 0;
557         }
558         else
559         {
560                 pfree(constr);
561                 relation->rd_att->constr = NULL;
562         }
563 }
564
565 /*
566  *              RelationBuildRuleLock
567  *
568  *              Form the relation's rewrite rules from information in
569  *              the pg_rewrite system catalog.
570  *
571  * Note: The rule parsetrees are potentially very complex node structures.
572  * To allow these trees to be freed when the relcache entry is flushed,
573  * we make a private memory context to hold the RuleLock information for
574  * each relcache entry that has associated rules.  The context is used
575  * just for rule info, not for any other subsidiary data of the relcache
576  * entry, because that keeps the update logic in RelationClearRelation()
577  * manageable.  The other subsidiary data structures are simple enough
578  * to be easy to free explicitly, anyway.
579  */
580 static void
581 RelationBuildRuleLock(Relation relation)
582 {
583         MemoryContext rulescxt;
584         MemoryContext oldcxt;
585         HeapTuple       rewrite_tuple;
586         Relation        rewrite_desc;
587         TupleDesc       rewrite_tupdesc;
588         SysScanDesc rewrite_scan;
589         ScanKeyData key;
590         RuleLock   *rulelock;
591         int                     numlocks;
592         RewriteRule **rules;
593         int                     maxlocks;
594
595         /*
596          * Make the private context.  Parameters are set on the assumption that
597          * it'll probably not contain much data.
598          */
599         rulescxt = AllocSetContextCreate(CacheMemoryContext,
600                                                                          RelationGetRelationName(relation),
601                                                                          ALLOCSET_SMALL_MINSIZE,
602                                                                          ALLOCSET_SMALL_INITSIZE,
603                                                                          ALLOCSET_SMALL_MAXSIZE);
604         relation->rd_rulescxt = rulescxt;
605
606         /*
607          * allocate an array to hold the rewrite rules (the array is extended if
608          * necessary)
609          */
610         maxlocks = 4;
611         rules = (RewriteRule **)
612                 MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
613         numlocks = 0;
614
615         /*
616          * form a scan key
617          */
618         ScanKeyInit(&key,
619                                 Anum_pg_rewrite_ev_class,
620                                 BTEqualStrategyNumber, F_OIDEQ,
621                                 ObjectIdGetDatum(RelationGetRelid(relation)));
622
623         /*
624          * open pg_rewrite and begin a scan
625          *
626          * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
627          * be reading the rules in name order, except possibly during
628          * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
629          * ensures that rules will be fired in name order.
630          */
631         rewrite_desc = heap_open(RewriteRelationId, AccessShareLock);
632         rewrite_tupdesc = RelationGetDescr(rewrite_desc);
633         rewrite_scan = systable_beginscan(rewrite_desc,
634                                                                           RewriteRelRulenameIndexId,
635                                                                           true, SnapshotNow,
636                                                                           1, &key);
637
638         while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
639         {
640                 Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
641                 bool            isnull;
642                 Datum           rule_datum;
643                 text       *rule_text;
644                 char       *rule_str;
645                 RewriteRule *rule;
646
647                 rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
648                                                                                                   sizeof(RewriteRule));
649
650                 rule->ruleId = HeapTupleGetOid(rewrite_tuple);
651
652                 rule->event = rewrite_form->ev_type - '0';
653                 rule->attrno = rewrite_form->ev_attr;
654                 rule->enabled = rewrite_form->ev_enabled;
655                 rule->isInstead = rewrite_form->is_instead;
656
657                 /*
658                  * Must use heap_getattr to fetch ev_action and ev_qual.  Also, the
659                  * rule strings are often large enough to be toasted.  To avoid
660                  * leaking memory in the caller's context, do the detoasting here so
661                  * we can free the detoasted version.
662                  */
663                 rule_datum = heap_getattr(rewrite_tuple,
664                                                                   Anum_pg_rewrite_ev_action,
665                                                                   rewrite_tupdesc,
666                                                                   &isnull);
667                 Assert(!isnull);
668                 rule_text = DatumGetTextP(rule_datum);
669                 rule_str = DatumGetCString(DirectFunctionCall1(textout,
670                                                                                                 PointerGetDatum(rule_text)));
671                 oldcxt = MemoryContextSwitchTo(rulescxt);
672                 rule->actions = (List *) stringToNode(rule_str);
673                 MemoryContextSwitchTo(oldcxt);
674                 pfree(rule_str);
675                 if ((Pointer) rule_text != DatumGetPointer(rule_datum))
676                         pfree(rule_text);
677
678                 rule_datum = heap_getattr(rewrite_tuple,
679                                                                   Anum_pg_rewrite_ev_qual,
680                                                                   rewrite_tupdesc,
681                                                                   &isnull);
682                 Assert(!isnull);
683                 rule_text = DatumGetTextP(rule_datum);
684                 rule_str = DatumGetCString(DirectFunctionCall1(textout,
685                                                                                                 PointerGetDatum(rule_text)));
686                 oldcxt = MemoryContextSwitchTo(rulescxt);
687                 rule->qual = (Node *) stringToNode(rule_str);
688                 MemoryContextSwitchTo(oldcxt);
689                 pfree(rule_str);
690                 if ((Pointer) rule_text != DatumGetPointer(rule_datum))
691                         pfree(rule_text);
692
693                 /*
694                  * We want the rule's table references to be checked as though by the
695                  * table owner, not the user referencing the rule.      Therefore, scan
696                  * through the rule's actions and set the checkAsUser field on all
697                  * rtable entries.      We have to look at the qual as well, in case it
698                  * contains sublinks.
699                  *
700                  * The reason for doing this when the rule is loaded, rather than when
701                  * it is stored, is that otherwise ALTER TABLE OWNER would have to
702                  * grovel through stored rules to update checkAsUser fields. Scanning
703                  * the rule tree during load is relatively cheap (compared to
704                  * constructing it in the first place), so we do it here.
705                  */
706                 setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
707                 setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
708
709                 if (numlocks >= maxlocks)
710                 {
711                         maxlocks *= 2;
712                         rules = (RewriteRule **)
713                                 repalloc(rules, sizeof(RewriteRule *) * maxlocks);
714                 }
715                 rules[numlocks++] = rule;
716         }
717
718         /*
719          * end the scan and close the attribute relation
720          */
721         systable_endscan(rewrite_scan);
722         heap_close(rewrite_desc, AccessShareLock);
723
724         /*
725          * form a RuleLock and insert into relation
726          */
727         rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
728         rulelock->numLocks = numlocks;
729         rulelock->rules = rules;
730
731         relation->rd_rules = rulelock;
732 }
733
734 /*
735  *              equalRuleLocks
736  *
737  *              Determine whether two RuleLocks are equivalent
738  *
739  *              Probably this should be in the rules code someplace...
740  */
741 static bool
742 equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2)
743 {
744         int                     i;
745
746         /*
747          * As of 7.3 we assume the rule ordering is repeatable, because
748          * RelationBuildRuleLock should read 'em in a consistent order.  So just
749          * compare corresponding slots.
750          */
751         if (rlock1 != NULL)
752         {
753                 if (rlock2 == NULL)
754                         return false;
755                 if (rlock1->numLocks != rlock2->numLocks)
756                         return false;
757                 for (i = 0; i < rlock1->numLocks; i++)
758                 {
759                         RewriteRule *rule1 = rlock1->rules[i];
760                         RewriteRule *rule2 = rlock2->rules[i];
761
762                         if (rule1->ruleId != rule2->ruleId)
763                                 return false;
764                         if (rule1->event != rule2->event)
765                                 return false;
766                         if (rule1->attrno != rule2->attrno)
767                                 return false;
768                         if (rule1->isInstead != rule2->isInstead)
769                                 return false;
770                         if (!equal(rule1->qual, rule2->qual))
771                                 return false;
772                         if (!equal(rule1->actions, rule2->actions))
773                                 return false;
774                 }
775         }
776         else if (rlock2 != NULL)
777                 return false;
778         return true;
779 }
780
781
782 /* ----------------------------------
783  *              RelationBuildDesc
784  *
785  *              Build a relation descriptor --- either a new one, or by
786  *              recycling the given old relation object.  The latter case
787  *              supports rebuilding a relcache entry without invalidating
788  *              pointers to it.
789  *
790  *              Returns NULL if no pg_class row could be found for the given relid
791  *              (suggesting we are trying to access a just-deleted relation).
792  *              Any other error is reported via elog.
793  * --------------------------------
794  */
795 static Relation
796 RelationBuildDesc(Oid targetRelId, Relation oldrelation)
797 {
798         Relation        relation;
799         Oid                     relid;
800         HeapTuple       pg_class_tuple;
801         Form_pg_class relp;
802         MemoryContext oldcxt;
803
804         /*
805          * find the tuple in pg_class corresponding to the given relation id
806          */
807         pg_class_tuple = ScanPgRelation(targetRelId, true);
808
809         /*
810          * if no such tuple exists, return NULL
811          */
812         if (!HeapTupleIsValid(pg_class_tuple))
813                 return NULL;
814
815         /*
816          * get information from the pg_class_tuple
817          */
818         relid = HeapTupleGetOid(pg_class_tuple);
819         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
820
821         /*
822          * allocate storage for the relation descriptor, and copy pg_class_tuple
823          * to relation->rd_rel.
824          */
825         relation = AllocateRelationDesc(oldrelation, relp);
826
827         /*
828          * initialize the relation's relation id (relation->rd_id)
829          */
830         RelationGetRelid(relation) = relid;
831
832         /*
833          * normal relations are not nailed into the cache; nor can a pre-existing
834          * relation be new.  It could be temp though.  (Actually, it could be new
835          * too, but it's okay to forget that fact if forced to flush the entry.)
836          */
837         relation->rd_refcnt = 0;
838         relation->rd_isnailed = false;
839         relation->rd_createSubid = InvalidSubTransactionId;
840         relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
841         relation->rd_istemp = isTempNamespace(relation->rd_rel->relnamespace);
842
843         /*
844          * initialize the tuple descriptor (relation->rd_att).
845          */
846         RelationBuildTupleDesc(relation);
847
848         /*
849          * Fetch rules and triggers that affect this relation
850          */
851         if (relation->rd_rel->relhasrules)
852                 RelationBuildRuleLock(relation);
853         else
854         {
855                 relation->rd_rules = NULL;
856                 relation->rd_rulescxt = NULL;
857         }
858
859         if (relation->rd_rel->reltriggers > 0)
860                 RelationBuildTriggers(relation);
861         else
862                 relation->trigdesc = NULL;
863
864         /*
865          * if it's an index, initialize index-related information
866          */
867         if (OidIsValid(relation->rd_rel->relam))
868                 RelationInitIndexAccessInfo(relation);
869
870         /* extract reloptions if any */
871         RelationParseRelOptions(relation, pg_class_tuple);
872
873         /*
874          * initialize the relation lock manager information
875          */
876         RelationInitLockInfo(relation);         /* see lmgr.c */
877
878         /*
879          * initialize physical addressing information for the relation
880          */
881         RelationInitPhysicalAddr(relation);
882
883         /* make sure relation is marked as having no open file yet */
884         relation->rd_smgr = NULL;
885
886         /*
887          * now we can free the memory allocated for pg_class_tuple
888          */
889         heap_freetuple(pg_class_tuple);
890
891         /*
892          * Insert newly created relation into relcache hash tables.
893          */
894         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
895         RelationCacheInsert(relation);
896         MemoryContextSwitchTo(oldcxt);
897
898         /* It's fully valid */
899         relation->rd_isvalid = true;
900
901         return relation;
902 }
903
904 /*
905  * Initialize the physical addressing info (RelFileNode) for a relcache entry
906  */
907 static void
908 RelationInitPhysicalAddr(Relation relation)
909 {
910         if (relation->rd_rel->reltablespace)
911                 relation->rd_node.spcNode = relation->rd_rel->reltablespace;
912         else
913                 relation->rd_node.spcNode = MyDatabaseTableSpace;
914         if (relation->rd_rel->relisshared)
915                 relation->rd_node.dbNode = InvalidOid;
916         else
917                 relation->rd_node.dbNode = MyDatabaseId;
918         relation->rd_node.relNode = relation->rd_rel->relfilenode;
919 }
920
921 /*
922  * Initialize index-access-method support data for an index relation
923  */
924 void
925 RelationInitIndexAccessInfo(Relation relation)
926 {
927         HeapTuple       tuple;
928         Form_pg_am      aform;
929         Datum           indclassDatum;
930         Datum           indoptionDatum;
931         bool            isnull;
932         oidvector  *indclass;
933         int2vector  *indoption;
934         MemoryContext indexcxt;
935         MemoryContext oldcontext;
936         int                     natts;
937         uint16          amstrategies;
938         uint16          amsupport;
939
940         /*
941          * Make a copy of the pg_index entry for the index.  Since pg_index
942          * contains variable-length and possibly-null fields, we have to do this
943          * honestly rather than just treating it as a Form_pg_index struct.
944          */
945         tuple = SearchSysCache(INDEXRELID,
946                                                    ObjectIdGetDatum(RelationGetRelid(relation)),
947                                                    0, 0, 0);
948         if (!HeapTupleIsValid(tuple))
949                 elog(ERROR, "cache lookup failed for index %u",
950                          RelationGetRelid(relation));
951         oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
952         relation->rd_indextuple = heap_copytuple(tuple);
953         relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
954         MemoryContextSwitchTo(oldcontext);
955         ReleaseSysCache(tuple);
956
957         /*
958          * Make a copy of the pg_am entry for the index's access method
959          */
960         tuple = SearchSysCache(AMOID,
961                                                    ObjectIdGetDatum(relation->rd_rel->relam),
962                                                    0, 0, 0);
963         if (!HeapTupleIsValid(tuple))
964                 elog(ERROR, "cache lookup failed for access method %u",
965                          relation->rd_rel->relam);
966         aform = (Form_pg_am) MemoryContextAlloc(CacheMemoryContext, sizeof *aform);
967         memcpy(aform, GETSTRUCT(tuple), sizeof *aform);
968         ReleaseSysCache(tuple);
969         relation->rd_am = aform;
970
971         natts = relation->rd_rel->relnatts;
972         if (natts != relation->rd_index->indnatts)
973                 elog(ERROR, "relnatts disagrees with indnatts for index %u",
974                          RelationGetRelid(relation));
975         amstrategies = aform->amstrategies;
976         amsupport = aform->amsupport;
977
978         /*
979          * Make the private context to hold index access info.  The reason we need
980          * a context, and not just a couple of pallocs, is so that we won't leak
981          * any subsidiary info attached to fmgr lookup records.
982          *
983          * Context parameters are set on the assumption that it'll probably not
984          * contain much data.
985          */
986         indexcxt = AllocSetContextCreate(CacheMemoryContext,
987                                                                          RelationGetRelationName(relation),
988                                                                          ALLOCSET_SMALL_MINSIZE,
989                                                                          ALLOCSET_SMALL_INITSIZE,
990                                                                          ALLOCSET_SMALL_MAXSIZE);
991         relation->rd_indexcxt = indexcxt;
992
993         /*
994          * Allocate arrays to hold data
995          */
996         relation->rd_aminfo = (RelationAmInfo *)
997                 MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
998
999         relation->rd_opfamily = (Oid *)
1000                 MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1001         relation->rd_opcintype = (Oid *)
1002                 MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1003
1004         if (amstrategies > 0)
1005                 relation->rd_operator = (Oid *)
1006                         MemoryContextAllocZero(indexcxt,
1007                                                                    natts * amstrategies * sizeof(Oid));
1008         else
1009                 relation->rd_operator = NULL;
1010
1011         if (amsupport > 0)
1012         {
1013                 int                     nsupport = natts * amsupport;
1014
1015                 relation->rd_support = (RegProcedure *)
1016                         MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1017                 relation->rd_supportinfo = (FmgrInfo *)
1018                         MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1019         }
1020         else
1021         {
1022                 relation->rd_support = NULL;
1023                 relation->rd_supportinfo = NULL;
1024         }
1025
1026         relation->rd_indoption = (int16 *)
1027                 MemoryContextAllocZero(indexcxt, natts * sizeof(int16));
1028
1029         /*
1030          * indclass cannot be referenced directly through the C struct, because it
1031          * comes after the variable-width indkey field.  Must extract the
1032          * datum the hard way...
1033          */
1034         indclassDatum = fastgetattr(relation->rd_indextuple,
1035                                                                 Anum_pg_index_indclass,
1036                                                                 GetPgIndexDescriptor(),
1037                                                                 &isnull);
1038         Assert(!isnull);
1039         indclass = (oidvector *) DatumGetPointer(indclassDatum);
1040
1041         /*
1042          * Fill the operator and support procedure OID arrays, as well as the
1043          * info about opfamilies and opclass input types.  (aminfo and
1044          * supportinfo are left as zeroes, and are filled on-the-fly when used)
1045          */
1046         IndexSupportInitialize(indclass,
1047                                                    relation->rd_operator, relation->rd_support,
1048                                                    relation->rd_opfamily, relation->rd_opcintype,
1049                                                    amstrategies, amsupport, natts);
1050
1051         /*
1052          * Similarly extract indoption and copy it to the cache entry
1053          */
1054         indoptionDatum = fastgetattr(relation->rd_indextuple,
1055                                                                  Anum_pg_index_indoption,
1056                                                                  GetPgIndexDescriptor(),
1057                                                                  &isnull);
1058         Assert(!isnull);
1059         indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1060         memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16));
1061
1062         /*
1063          * expressions and predicate cache will be filled later
1064          */
1065         relation->rd_indexprs = NIL;
1066         relation->rd_indpred = NIL;
1067         relation->rd_amcache = NULL;
1068 }
1069
1070 /*
1071  * IndexSupportInitialize
1072  *              Initializes an index's cached opclass information,
1073  *              given the index's pg_index.indclass entry.
1074  *
1075  * Data is returned into *indexOperator, *indexSupport, *opFamily, and
1076  * *opcInType, which are arrays allocated by the caller.
1077  *
1078  * The caller also passes maxStrategyNumber, maxSupportNumber, and
1079  * maxAttributeNumber, since these indicate the size of the arrays
1080  * it has allocated --- but in practice these numbers must always match
1081  * those obtainable from the system catalog entries for the index and
1082  * access method.
1083  */
1084 static void
1085 IndexSupportInitialize(oidvector *indclass,
1086                                            Oid *indexOperator,
1087                                            RegProcedure *indexSupport,
1088                                            Oid *opFamily,
1089                                            Oid *opcInType,
1090                                            StrategyNumber maxStrategyNumber,
1091                                            StrategyNumber maxSupportNumber,
1092                                            AttrNumber maxAttributeNumber)
1093 {
1094         int                     attIndex;
1095
1096         for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1097         {
1098                 OpClassCacheEnt *opcentry;
1099
1100                 if (!OidIsValid(indclass->values[attIndex]))
1101                         elog(ERROR, "bogus pg_index tuple");
1102
1103                 /* look up the info for this opclass, using a cache */
1104                 opcentry = LookupOpclassInfo(indclass->values[attIndex],
1105                                                                          maxStrategyNumber,
1106                                                                          maxSupportNumber);
1107
1108                 /* copy cached data into relcache entry */
1109                 opFamily[attIndex] = opcentry->opcfamily;
1110                 opcInType[attIndex] = opcentry->opcintype;
1111                 if (maxStrategyNumber > 0)
1112                         memcpy(&indexOperator[attIndex * maxStrategyNumber],
1113                                    opcentry->operatorOids,
1114                                    maxStrategyNumber * sizeof(Oid));
1115                 if (maxSupportNumber > 0)
1116                         memcpy(&indexSupport[attIndex * maxSupportNumber],
1117                                    opcentry->supportProcs,
1118                                    maxSupportNumber * sizeof(RegProcedure));
1119         }
1120 }
1121
1122 /*
1123  * LookupOpclassInfo
1124  *
1125  * This routine maintains a per-opclass cache of the information needed
1126  * by IndexSupportInitialize().  This is more efficient than relying on
1127  * the catalog cache, because we can load all the info about a particular
1128  * opclass in a single indexscan of pg_amproc or pg_amop.
1129  *
1130  * The information from pg_am about expected range of strategy and support
1131  * numbers is passed in, rather than being looked up, mainly because the
1132  * caller will have it already.
1133  *
1134  * XXX There isn't any provision for flushing the cache.  However, there
1135  * isn't any provision for flushing relcache entries when opclass info
1136  * changes, either :-(
1137  */
1138 static OpClassCacheEnt *
1139 LookupOpclassInfo(Oid operatorClassOid,
1140                                   StrategyNumber numStrats,
1141                                   StrategyNumber numSupport)
1142 {
1143         OpClassCacheEnt *opcentry;
1144         bool            found;
1145         Relation        rel;
1146         SysScanDesc scan;
1147         ScanKeyData skey[3];
1148         HeapTuple       htup;
1149         bool            indexOK;
1150
1151         if (OpClassCache == NULL)
1152         {
1153                 /* First time through: initialize the opclass cache */
1154                 HASHCTL         ctl;
1155
1156                 if (!CacheMemoryContext)
1157                         CreateCacheMemoryContext();
1158
1159                 MemSet(&ctl, 0, sizeof(ctl));
1160                 ctl.keysize = sizeof(Oid);
1161                 ctl.entrysize = sizeof(OpClassCacheEnt);
1162                 ctl.hash = oid_hash;
1163                 OpClassCache = hash_create("Operator class cache", 64,
1164                                                                    &ctl, HASH_ELEM | HASH_FUNCTION);
1165         }
1166
1167         opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1168                                                                                            (void *) &operatorClassOid,
1169                                                                                            HASH_ENTER, &found);
1170
1171         if (found && opcentry->valid)
1172         {
1173                 /* Already made an entry for it */
1174                 Assert(numStrats == opcentry->numStrats);
1175                 Assert(numSupport == opcentry->numSupport);
1176                 return opcentry;
1177         }
1178
1179         /* Need to fill in new entry */
1180         opcentry->valid = false;        /* until known OK */
1181         opcentry->numStrats = numStrats;
1182         opcentry->numSupport = numSupport;
1183
1184         if (numStrats > 0)
1185                 opcentry->operatorOids = (Oid *)
1186                         MemoryContextAllocZero(CacheMemoryContext,
1187                                                                    numStrats * sizeof(Oid));
1188         else
1189                 opcentry->operatorOids = NULL;
1190
1191         if (numSupport > 0)
1192                 opcentry->supportProcs = (RegProcedure *)
1193                         MemoryContextAllocZero(CacheMemoryContext,
1194                                                                    numSupport * sizeof(RegProcedure));
1195         else
1196                 opcentry->supportProcs = NULL;
1197
1198         /*
1199          * To avoid infinite recursion during startup, force heap scans if we're
1200          * looking up info for the opclasses used by the indexes we would like to
1201          * reference here.
1202          */
1203         indexOK = criticalRelcachesBuilt ||
1204                 (operatorClassOid != OID_BTREE_OPS_OID &&
1205                  operatorClassOid != INT2_BTREE_OPS_OID);
1206
1207         /*
1208          * We have to fetch the pg_opclass row to determine its opfamily and
1209          * opcintype, which are needed to look up the operators and functions.
1210          * It'd be convenient to use the syscache here, but that probably doesn't
1211          * work while bootstrapping.
1212          */
1213         ScanKeyInit(&skey[0],
1214                                 ObjectIdAttributeNumber,
1215                                 BTEqualStrategyNumber, F_OIDEQ,
1216                                 ObjectIdGetDatum(operatorClassOid));
1217         rel = heap_open(OperatorClassRelationId, AccessShareLock);
1218         scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1219                                                           SnapshotNow, 1, skey);
1220
1221         if (HeapTupleIsValid(htup = systable_getnext(scan)))
1222         {
1223                 Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1224
1225                 opcentry->opcfamily = opclassform->opcfamily;
1226                 opcentry->opcintype = opclassform->opcintype;
1227         }
1228         else
1229                 elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1230
1231         systable_endscan(scan);
1232         heap_close(rel, AccessShareLock);
1233
1234
1235         /*
1236          * Scan pg_amop to obtain operators for the opclass.  We only fetch the
1237          * default ones (those with lefttype = righttype = opcintype).
1238          */
1239         if (numStrats > 0)
1240         {
1241                 ScanKeyInit(&skey[0],
1242                                         Anum_pg_amop_amopfamily,
1243                                         BTEqualStrategyNumber, F_OIDEQ,
1244                                         ObjectIdGetDatum(opcentry->opcfamily));
1245                 ScanKeyInit(&skey[1],
1246                                         Anum_pg_amop_amoplefttype,
1247                                         BTEqualStrategyNumber, F_OIDEQ,
1248                                         ObjectIdGetDatum(opcentry->opcintype));
1249                 ScanKeyInit(&skey[2],
1250                                         Anum_pg_amop_amoprighttype,
1251                                         BTEqualStrategyNumber, F_OIDEQ,
1252                                         ObjectIdGetDatum(opcentry->opcintype));
1253                 rel = heap_open(AccessMethodOperatorRelationId, AccessShareLock);
1254                 scan = systable_beginscan(rel, AccessMethodStrategyIndexId, indexOK,
1255                                                                   SnapshotNow, 3, skey);
1256
1257                 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1258                 {
1259                         Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(htup);
1260
1261                         if (amopform->amopstrategy <= 0 ||
1262                                 (StrategyNumber) amopform->amopstrategy > numStrats)
1263                                 elog(ERROR, "invalid amopstrategy number %d for opclass %u",
1264                                          amopform->amopstrategy, operatorClassOid);
1265                         opcentry->operatorOids[amopform->amopstrategy - 1] =
1266                                 amopform->amopopr;
1267                 }
1268
1269                 systable_endscan(scan);
1270                 heap_close(rel, AccessShareLock);
1271         }
1272
1273         /*
1274          * Scan pg_amproc to obtain support procs for the opclass.      We only fetch
1275          * the default ones (those with lefttype = righttype = opcintype).
1276          */
1277         if (numSupport > 0)
1278         {
1279                 ScanKeyInit(&skey[0],
1280                                         Anum_pg_amproc_amprocfamily,
1281                                         BTEqualStrategyNumber, F_OIDEQ,
1282                                         ObjectIdGetDatum(opcentry->opcfamily));
1283                 ScanKeyInit(&skey[1],
1284                                         Anum_pg_amproc_amproclefttype,
1285                                         BTEqualStrategyNumber, F_OIDEQ,
1286                                         ObjectIdGetDatum(opcentry->opcintype));
1287                 ScanKeyInit(&skey[2],
1288                                         Anum_pg_amproc_amprocrighttype,
1289                                         BTEqualStrategyNumber, F_OIDEQ,
1290                                         ObjectIdGetDatum(opcentry->opcintype));
1291                 rel = heap_open(AccessMethodProcedureRelationId, AccessShareLock);
1292                 scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1293                                                                   SnapshotNow, 3, skey);
1294
1295                 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1296                 {
1297                         Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1298
1299                         if (amprocform->amprocnum <= 0 ||
1300                                 (StrategyNumber) amprocform->amprocnum > numSupport)
1301                                 elog(ERROR, "invalid amproc number %d for opclass %u",
1302                                          amprocform->amprocnum, operatorClassOid);
1303
1304                         opcentry->supportProcs[amprocform->amprocnum - 1] =
1305                                 amprocform->amproc;
1306                 }
1307
1308                 systable_endscan(scan);
1309                 heap_close(rel, AccessShareLock);
1310         }
1311
1312         opcentry->valid = true;
1313         return opcentry;
1314 }
1315
1316
1317 /*
1318  *              formrdesc
1319  *
1320  *              This is a special cut-down version of RelationBuildDesc()
1321  *              used by RelationCacheInitializePhase2() in initializing the relcache.
1322  *              The relation descriptor is built just from the supplied parameters,
1323  *              without actually looking at any system table entries.  We cheat
1324  *              quite a lot since we only need to work for a few basic system
1325  *              catalogs.
1326  *
1327  * formrdesc is currently used for: pg_class, pg_attribute, pg_proc,
1328  * and pg_type (see RelationCacheInitializePhase2).
1329  *
1330  * Note that these catalogs can't have constraints (except attnotnull),
1331  * default values, rules, or triggers, since we don't cope with any of that.
1332  *
1333  * NOTE: we assume we are already switched into CacheMemoryContext.
1334  */
1335 static void
1336 formrdesc(const char *relationName, Oid relationReltype,
1337                   bool hasoids, int natts, FormData_pg_attribute *att)
1338 {
1339         Relation        relation;
1340         int                     i;
1341         bool            has_not_null;
1342
1343         /*
1344          * allocate new relation desc, clear all fields of reldesc
1345          */
1346         relation = (Relation) palloc0(sizeof(RelationData));
1347         relation->rd_targblock = InvalidBlockNumber;
1348
1349         /* make sure relation is marked as having no open file yet */
1350         relation->rd_smgr = NULL;
1351
1352         /*
1353          * initialize reference count: 1 because it is nailed in cache
1354          */
1355         relation->rd_refcnt = 1;
1356
1357         /*
1358          * all entries built with this routine are nailed-in-cache; none are for
1359          * new or temp relations.
1360          */
1361         relation->rd_isnailed = true;
1362         relation->rd_createSubid = InvalidSubTransactionId;
1363         relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1364         relation->rd_istemp = false;
1365
1366         /*
1367          * initialize relation tuple form
1368          *
1369          * The data we insert here is pretty incomplete/bogus, but it'll serve to
1370          * get us launched.  RelationCacheInitializePhase2() will read the real
1371          * data from pg_class and replace what we've done here.
1372          */
1373         relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
1374
1375         namestrcpy(&relation->rd_rel->relname, relationName);
1376         relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1377         relation->rd_rel->reltype = relationReltype;
1378
1379         /*
1380          * It's important to distinguish between shared and non-shared relations,
1381          * even at bootstrap time, to make sure we know where they are stored.  At
1382          * present, all relations that formrdesc is used for are not shared.
1383          */
1384         relation->rd_rel->relisshared = false;
1385
1386         relation->rd_rel->relpages = 1;
1387         relation->rd_rel->reltuples = 1;
1388         relation->rd_rel->relkind = RELKIND_RELATION;
1389         relation->rd_rel->relhasoids = hasoids;
1390         relation->rd_rel->relnatts = (int16) natts;
1391
1392         /*
1393          * initialize attribute tuple form
1394          *
1395          * Unlike the case with the relation tuple, this data had better be right
1396          * because it will never be replaced.  The input values must be correctly
1397          * defined by macros in src/include/catalog/ headers.
1398          */
1399         relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1400         relation->rd_att->tdrefcount = 1;       /* mark as refcounted */
1401
1402         relation->rd_att->tdtypeid = relationReltype;
1403         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
1404
1405         /*
1406          * initialize tuple desc info
1407          */
1408         has_not_null = false;
1409         for (i = 0; i < natts; i++)
1410         {
1411                 memcpy(relation->rd_att->attrs[i],
1412                            &att[i],
1413                            ATTRIBUTE_TUPLE_SIZE);
1414                 has_not_null |= att[i].attnotnull;
1415                 /* make sure attcacheoff is valid */
1416                 relation->rd_att->attrs[i]->attcacheoff = -1;
1417         }
1418
1419         /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1420         relation->rd_att->attrs[0]->attcacheoff = 0;
1421
1422         /* mark not-null status */
1423         if (has_not_null)
1424         {
1425                 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1426
1427                 constr->has_not_null = true;
1428                 relation->rd_att->constr = constr;
1429         }
1430
1431         /*
1432          * initialize relation id from info in att array (my, this is ugly)
1433          */
1434         RelationGetRelid(relation) = relation->rd_att->attrs[0]->attrelid;
1435         relation->rd_rel->relfilenode = RelationGetRelid(relation);
1436
1437         /*
1438          * initialize the relation lock manager information
1439          */
1440         RelationInitLockInfo(relation);         /* see lmgr.c */
1441
1442         /*
1443          * initialize physical addressing information for the relation
1444          */
1445         RelationInitPhysicalAddr(relation);
1446
1447         /*
1448          * initialize the rel-has-index flag, using hardwired knowledge
1449          */
1450         if (IsBootstrapProcessingMode())
1451         {
1452                 /* In bootstrap mode, we have no indexes */
1453                 relation->rd_rel->relhasindex = false;
1454         }
1455         else
1456         {
1457                 /* Otherwise, all the rels formrdesc is used for have indexes */
1458                 relation->rd_rel->relhasindex = true;
1459         }
1460
1461         /*
1462          * add new reldesc to relcache
1463          */
1464         RelationCacheInsert(relation);
1465
1466         /* It's fully valid */
1467         relation->rd_isvalid = true;
1468 }
1469
1470
1471 /* ----------------------------------------------------------------
1472  *                               Relation Descriptor Lookup Interface
1473  * ----------------------------------------------------------------
1474  */
1475
1476 /*
1477  *              RelationIdGetRelation
1478  *
1479  *              Lookup a reldesc by OID; make one if not already in cache.
1480  *
1481  *              Returns NULL if no pg_class row could be found for the given relid
1482  *              (suggesting we are trying to access a just-deleted relation).
1483  *              Any other error is reported via elog.
1484  *
1485  *              NB: caller should already have at least AccessShareLock on the
1486  *              relation ID, else there are nasty race conditions.
1487  *
1488  *              NB: relation ref count is incremented, or set to 1 if new entry.
1489  *              Caller should eventually decrement count.  (Usually,
1490  *              that happens by calling RelationClose().)
1491  */
1492 Relation
1493 RelationIdGetRelation(Oid relationId)
1494 {
1495         Relation        rd;
1496
1497         /*
1498          * first try to find reldesc in the cache
1499          */
1500         RelationIdCacheLookup(relationId, rd);
1501
1502         if (RelationIsValid(rd))
1503         {
1504                 RelationIncrementReferenceCount(rd);
1505                 /* revalidate nailed index if necessary */
1506                 if (!rd->rd_isvalid)
1507                         RelationReloadIndexInfo(rd);
1508                 return rd;
1509         }
1510
1511         /*
1512          * no reldesc in the cache, so have RelationBuildDesc() build one and add
1513          * it.
1514          */
1515         rd = RelationBuildDesc(relationId, NULL);
1516         if (RelationIsValid(rd))
1517                 RelationIncrementReferenceCount(rd);
1518         return rd;
1519 }
1520
1521 /* ----------------------------------------------------------------
1522  *                              cache invalidation support routines
1523  * ----------------------------------------------------------------
1524  */
1525
1526 /*
1527  * RelationIncrementReferenceCount
1528  *              Increments relation reference count.
1529  *
1530  * Note: bootstrap mode has its own weird ideas about relation refcount
1531  * behavior; we ought to fix it someday, but for now, just disable
1532  * reference count ownership tracking in bootstrap mode.
1533  */
1534 void
1535 RelationIncrementReferenceCount(Relation rel)
1536 {
1537         ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
1538         rel->rd_refcnt += 1;
1539         if (!IsBootstrapProcessingMode())
1540                 ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
1541 }
1542
1543 /*
1544  * RelationDecrementReferenceCount
1545  *              Decrements relation reference count.
1546  */
1547 void
1548 RelationDecrementReferenceCount(Relation rel)
1549 {
1550         Assert(rel->rd_refcnt > 0);
1551         rel->rd_refcnt -= 1;
1552         if (!IsBootstrapProcessingMode())
1553                 ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
1554 }
1555
1556 /*
1557  * RelationClose - close an open relation
1558  *
1559  *      Actually, we just decrement the refcount.
1560  *
1561  *      NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
1562  *      will be freed as soon as their refcount goes to zero.  In combination
1563  *      with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
1564  *      to catch references to already-released relcache entries.  It slows
1565  *      things down quite a bit, however.
1566  */
1567 void
1568 RelationClose(Relation relation)
1569 {
1570         /* Note: no locking manipulations needed */
1571         RelationDecrementReferenceCount(relation);
1572
1573 #ifdef RELCACHE_FORCE_RELEASE
1574         if (RelationHasReferenceCountZero(relation) &&
1575                 relation->rd_createSubid == InvalidSubTransactionId &&
1576                 relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
1577                 RelationClearRelation(relation, false);
1578 #endif
1579 }
1580
1581 /*
1582  * RelationReloadIndexInfo - reload minimal information for an open index
1583  *
1584  *      This function is used only for indexes.  A relcache inval on an index
1585  *      can mean that its pg_class or pg_index row changed.  There are only
1586  *      very limited changes that are allowed to an existing index's schema,
1587  *      so we can update the relcache entry without a complete rebuild; which
1588  *      is fortunate because we can't rebuild an index entry that is "nailed"
1589  *      and/or in active use.  We support full replacement of the pg_class row,
1590  *      as well as updates of a few simple fields of the pg_index row.
1591  *
1592  *      We can't necessarily reread the catalog rows right away; we might be
1593  *      in a failed transaction when we receive the SI notification.  If so,
1594  *      RelationClearRelation just marks the entry as invalid by setting
1595  *      rd_isvalid to false.  This routine is called to fix the entry when it
1596  *      is next needed.
1597  */
1598 static void
1599 RelationReloadIndexInfo(Relation relation)
1600 {
1601         bool            indexOK;
1602         HeapTuple       pg_class_tuple;
1603         Form_pg_class relp;
1604
1605         /* Should be called only for invalidated indexes */
1606         Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
1607                    !relation->rd_isvalid);
1608         /* Should be closed at smgr level */
1609         Assert(relation->rd_smgr == NULL);
1610
1611         /*
1612          * Read the pg_class row
1613          *
1614          * Don't try to use an indexscan of pg_class_oid_index to reload the info
1615          * for pg_class_oid_index ...
1616          */
1617         indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
1618         pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK);
1619         if (!HeapTupleIsValid(pg_class_tuple))
1620                 elog(ERROR, "could not find pg_class tuple for index %u",
1621                          RelationGetRelid(relation));
1622         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1623         memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
1624         /* Reload reloptions in case they changed */
1625         if (relation->rd_options)
1626                 pfree(relation->rd_options);
1627         RelationParseRelOptions(relation, pg_class_tuple);
1628         /* done with pg_class tuple */
1629         heap_freetuple(pg_class_tuple);
1630         /* We must recalculate physical address in case it changed */
1631         RelationInitPhysicalAddr(relation);
1632         /* Make sure targblock is reset in case rel was truncated */
1633         relation->rd_targblock = InvalidBlockNumber;
1634         /* Must free any AM cached data, too */
1635         if (relation->rd_amcache)
1636                 pfree(relation->rd_amcache);
1637         relation->rd_amcache = NULL;
1638
1639         /*
1640          * For a non-system index, there are fields of the pg_index row that are
1641          * allowed to change, so re-read that row and update the relcache entry.
1642          * Most of the info derived from pg_index (such as support function lookup
1643          * info) cannot change, and indeed the whole point of this routine is to
1644          * update the relcache entry without clobbering that data; so wholesale
1645          * replacement is not appropriate.
1646          */
1647         if (!IsSystemRelation(relation))
1648         {
1649                 HeapTuple       tuple;
1650                 Form_pg_index index;
1651
1652                 tuple = SearchSysCache(INDEXRELID,
1653                                                            ObjectIdGetDatum(RelationGetRelid(relation)),
1654                                                            0, 0, 0);
1655                 if (!HeapTupleIsValid(tuple))
1656                                 elog(ERROR, "cache lookup failed for index %u",
1657                                          RelationGetRelid(relation));
1658                 index = (Form_pg_index) GETSTRUCT(tuple);
1659
1660                 relation->rd_index->indisvalid = index->indisvalid;
1661
1662                 ReleaseSysCache(tuple);
1663         }
1664
1665         /* Okay, now it's valid again */
1666         relation->rd_isvalid = true;
1667 }
1668
1669 /*
1670  * RelationClearRelation
1671  *
1672  *       Physically blow away a relation cache entry, or reset it and rebuild
1673  *       it from scratch (that is, from catalog entries).  The latter path is
1674  *       usually used when we are notified of a change to an open relation
1675  *       (one with refcount > 0).  However, this routine just does whichever
1676  *       it's told to do; callers must determine which they want.
1677  */
1678 static void
1679 RelationClearRelation(Relation relation, bool rebuild)
1680 {
1681         Oid                     old_reltype = relation->rd_rel->reltype;
1682         MemoryContext oldcxt;
1683
1684         /*
1685          * Make sure smgr and lower levels close the relation's files, if they
1686          * weren't closed already.  If the relation is not getting deleted, the
1687          * next smgr access should reopen the files automatically.      This ensures
1688          * that the low-level file access state is updated after, say, a vacuum
1689          * truncation.
1690          */
1691         RelationCloseSmgr(relation);
1692
1693         /*
1694          * Never, never ever blow away a nailed-in system relation, because we'd
1695          * be unable to recover.  However, we must reset rd_targblock, in case we
1696          * got called because of a relation cache flush that was triggered by
1697          * VACUUM.
1698          *
1699          * If it's a nailed index, then we need to re-read the pg_class row to see
1700          * if its relfilenode changed.  We can't necessarily do that here, because
1701          * we might be in a failed transaction.  We assume it's okay to do it if
1702          * there are open references to the relcache entry (cf notes for
1703          * AtEOXact_RelationCache).  Otherwise just mark the entry as possibly
1704          * invalid, and it'll be fixed when next opened.
1705          */
1706         if (relation->rd_isnailed)
1707         {
1708                 relation->rd_targblock = InvalidBlockNumber;
1709                 if (relation->rd_rel->relkind == RELKIND_INDEX)
1710                 {
1711                         relation->rd_isvalid = false;           /* needs to be revalidated */
1712                         if (relation->rd_refcnt > 1)
1713                                 RelationReloadIndexInfo(relation);
1714                 }
1715                 return;
1716         }
1717
1718         /*
1719          * Even non-system indexes should not be blown away if they are open and
1720          * have valid index support information.  This avoids problems with active
1721          * use of the index support information.  As with nailed indexes, we
1722          * re-read the pg_class row to handle possible physical relocation of the
1723          * index, and we check for pg_index updates too.
1724          */
1725         if (relation->rd_rel->relkind == RELKIND_INDEX &&
1726                 relation->rd_refcnt > 0 &&
1727                 relation->rd_indexcxt != NULL)
1728         {
1729                 relation->rd_isvalid = false;   /* needs to be revalidated */
1730                 RelationReloadIndexInfo(relation);
1731                 return;
1732         }
1733
1734         /*
1735          * Remove relation from hash tables
1736          *
1737          * Note: we might be reinserting it momentarily, but we must not have it
1738          * visible in the hash tables until it's valid again, so don't try to
1739          * optimize this away...
1740          */
1741         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
1742         RelationCacheDelete(relation);
1743         MemoryContextSwitchTo(oldcxt);
1744
1745         /* Clear out catcache's entries for this relation */
1746         CatalogCacheFlushRelation(RelationGetRelid(relation));
1747
1748         /*
1749          * Free all the subsidiary data structures of the relcache entry. We
1750          * cannot free rd_att if we are trying to rebuild the entry, however,
1751          * because pointers to it may be cached in various places. The rule
1752          * manager might also have pointers into the rewrite rules. So to begin
1753          * with, we can only get rid of these fields:
1754          */
1755         FreeTriggerDesc(relation->trigdesc);
1756         if (relation->rd_indextuple)
1757                 pfree(relation->rd_indextuple);
1758         if (relation->rd_am)
1759                 pfree(relation->rd_am);
1760         if (relation->rd_rel)
1761                 pfree(relation->rd_rel);
1762         if (relation->rd_options)
1763                 pfree(relation->rd_options);
1764         list_free(relation->rd_indexlist);
1765         if (relation->rd_indexcxt)
1766                 MemoryContextDelete(relation->rd_indexcxt);
1767
1768         /*
1769          * If we're really done with the relcache entry, blow it away. But if
1770          * someone is still using it, reconstruct the whole deal without moving
1771          * the physical RelationData record (so that the someone's pointer is
1772          * still valid).
1773          */
1774         if (!rebuild)
1775         {
1776                 /* ok to zap remaining substructure */
1777                 flush_rowtype_cache(old_reltype);
1778                 /* can't use DecrTupleDescRefCount here */
1779                 Assert(relation->rd_att->tdrefcount > 0);
1780                 if (--relation->rd_att->tdrefcount == 0)
1781                         FreeTupleDesc(relation->rd_att);
1782                 if (relation->rd_rulescxt)
1783                         MemoryContextDelete(relation->rd_rulescxt);
1784                 pfree(relation);
1785         }
1786         else
1787         {
1788                 /*
1789                  * When rebuilding an open relcache entry, must preserve ref count and
1790                  * rd_createSubid/rd_newRelfilenodeSubid state.  Also attempt to
1791                  * preserve the tupledesc and rewrite-rule substructures in place.
1792                  * (Note: the refcount mechanism for tupledescs may eventually ensure
1793                  * that we don't really need to preserve the tupledesc in-place, but
1794                  * for now there are still a lot of places that assume an open rel's
1795                  * tupledesc won't move.)
1796                  *
1797                  * Note that this process does not touch CurrentResourceOwner; which
1798                  * is good because whatever ref counts the entry may have do not
1799                  * necessarily belong to that resource owner.
1800                  */
1801                 Oid                     save_relid = RelationGetRelid(relation);
1802                 int                     old_refcnt = relation->rd_refcnt;
1803                 SubTransactionId old_createSubid = relation->rd_createSubid;
1804                 SubTransactionId old_newRelfilenodeSubid = relation->rd_newRelfilenodeSubid;
1805                 struct PgStat_TableStatus *old_pgstat_info = relation->pgstat_info;
1806                 TupleDesc       old_att = relation->rd_att;
1807                 RuleLock   *old_rules = relation->rd_rules;
1808                 MemoryContext old_rulescxt = relation->rd_rulescxt;
1809
1810                 if (RelationBuildDesc(save_relid, relation) != relation)
1811                 {
1812                         /* Should only get here if relation was deleted */
1813                         flush_rowtype_cache(old_reltype);
1814                         Assert(old_att->tdrefcount > 0);
1815                         if (--old_att->tdrefcount == 0)
1816                                 FreeTupleDesc(old_att);
1817                         if (old_rulescxt)
1818                                 MemoryContextDelete(old_rulescxt);
1819                         pfree(relation);
1820                         elog(ERROR, "relation %u deleted while still in use", save_relid);
1821                 }
1822                 relation->rd_refcnt = old_refcnt;
1823                 relation->rd_createSubid = old_createSubid;
1824                 relation->rd_newRelfilenodeSubid = old_newRelfilenodeSubid;
1825                 relation->pgstat_info = old_pgstat_info;
1826
1827                 if (equalTupleDescs(old_att, relation->rd_att))
1828                 {
1829                         /* needn't flush typcache here */
1830                         Assert(relation->rd_att->tdrefcount == 1);
1831                         if (--relation->rd_att->tdrefcount == 0)
1832                                 FreeTupleDesc(relation->rd_att);
1833                         relation->rd_att = old_att;
1834                 }
1835                 else
1836                 {
1837                         flush_rowtype_cache(old_reltype);
1838                         Assert(old_att->tdrefcount > 0);
1839                         if (--old_att->tdrefcount == 0)
1840                                 FreeTupleDesc(old_att);
1841                 }
1842                 if (equalRuleLocks(old_rules, relation->rd_rules))
1843                 {
1844                         if (relation->rd_rulescxt)
1845                                 MemoryContextDelete(relation->rd_rulescxt);
1846                         relation->rd_rules = old_rules;
1847                         relation->rd_rulescxt = old_rulescxt;
1848                 }
1849                 else
1850                 {
1851                         if (old_rulescxt)
1852                                 MemoryContextDelete(old_rulescxt);
1853                 }
1854         }
1855 }
1856
1857 /*
1858  * RelationFlushRelation
1859  *
1860  *       Rebuild the relation if it is open (refcount > 0), else blow it away.
1861  */
1862 static void
1863 RelationFlushRelation(Relation relation)
1864 {
1865         bool            rebuild;
1866
1867         if (relation->rd_createSubid != InvalidSubTransactionId ||
1868                 relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
1869         {
1870                 /*
1871                  * New relcache entries are always rebuilt, not flushed; else we'd
1872                  * forget the "new" status of the relation, which is a useful
1873                  * optimization to have.  Ditto for the new-relfilenode status.
1874                  */
1875                 rebuild = true;
1876         }
1877         else
1878         {
1879                 /*
1880                  * Pre-existing rels can be dropped from the relcache if not open.
1881                  */
1882                 rebuild = !RelationHasReferenceCountZero(relation);
1883         }
1884
1885         RelationClearRelation(relation, rebuild);
1886 }
1887
1888 /*
1889  * RelationForgetRelation - unconditionally remove a relcache entry
1890  *
1891  *                 External interface for destroying a relcache entry when we
1892  *                 drop the relation.
1893  */
1894 void
1895 RelationForgetRelation(Oid rid)
1896 {
1897         Relation        relation;
1898
1899         RelationIdCacheLookup(rid, relation);
1900
1901         if (!PointerIsValid(relation))
1902                 return;                                 /* not in cache, nothing to do */
1903
1904         if (!RelationHasReferenceCountZero(relation))
1905                 elog(ERROR, "relation %u is still open", rid);
1906
1907         /* Unconditionally destroy the relcache entry */
1908         RelationClearRelation(relation, false);
1909 }
1910
1911 /*
1912  *              RelationCacheInvalidateEntry
1913  *
1914  *              This routine is invoked for SI cache flush messages.
1915  *
1916  * Any relcache entry matching the relid must be flushed.  (Note: caller has
1917  * already determined that the relid belongs to our database or is a shared
1918  * relation.)
1919  *
1920  * We used to skip local relations, on the grounds that they could
1921  * not be targets of cross-backend SI update messages; but it seems
1922  * safer to process them, so that our *own* SI update messages will
1923  * have the same effects during CommandCounterIncrement for both
1924  * local and nonlocal relations.
1925  */
1926 void
1927 RelationCacheInvalidateEntry(Oid relationId)
1928 {
1929         Relation        relation;
1930
1931         RelationIdCacheLookup(relationId, relation);
1932
1933         if (PointerIsValid(relation))
1934         {
1935                 relcacheInvalsReceived++;
1936                 RelationFlushRelation(relation);
1937         }
1938 }
1939
1940 /*
1941  * RelationCacheInvalidate
1942  *       Blow away cached relation descriptors that have zero reference counts,
1943  *       and rebuild those with positive reference counts.      Also reset the smgr
1944  *       relation cache.
1945  *
1946  *       This is currently used only to recover from SI message buffer overflow,
1947  *       so we do not touch new-in-transaction relations; they cannot be targets
1948  *       of cross-backend SI updates (and our own updates now go through a
1949  *       separate linked list that isn't limited by the SI message buffer size).
1950  *       Likewise, we need not discard new-relfilenode-in-transaction hints,
1951  *       since any invalidation of those would be a local event.
1952  *
1953  *       We do this in two phases: the first pass deletes deletable items, and
1954  *       the second one rebuilds the rebuildable items.  This is essential for
1955  *       safety, because hash_seq_search only copes with concurrent deletion of
1956  *       the element it is currently visiting.  If a second SI overflow were to
1957  *       occur while we are walking the table, resulting in recursive entry to
1958  *       this routine, we could crash because the inner invocation blows away
1959  *       the entry next to be visited by the outer scan.  But this way is OK,
1960  *       because (a) during the first pass we won't process any more SI messages,
1961  *       so hash_seq_search will complete safely; (b) during the second pass we
1962  *       only hold onto pointers to nondeletable entries.
1963  *
1964  *       The two-phase approach also makes it easy to ensure that we process
1965  *       nailed-in-cache indexes before other nondeletable items, and that we
1966  *       process pg_class_oid_index first of all.  In scenarios where a nailed
1967  *       index has been given a new relfilenode, we have to detect that update
1968  *       before the nailed index is used in reloading any other relcache entry.
1969  */
1970 void
1971 RelationCacheInvalidate(void)
1972 {
1973         HASH_SEQ_STATUS status;
1974         RelIdCacheEnt *idhentry;
1975         Relation        relation;
1976         List       *rebuildFirstList = NIL;
1977         List       *rebuildList = NIL;
1978         ListCell   *l;
1979
1980         /* Phase 1 */
1981         hash_seq_init(&status, RelationIdCache);
1982
1983         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
1984         {
1985                 relation = idhentry->reldesc;
1986
1987                 /* Must close all smgr references to avoid leaving dangling ptrs */
1988                 RelationCloseSmgr(relation);
1989
1990                 /* Ignore new relations, since they are never SI targets */
1991                 if (relation->rd_createSubid != InvalidSubTransactionId)
1992                         continue;
1993
1994                 relcacheInvalsReceived++;
1995
1996                 if (RelationHasReferenceCountZero(relation))
1997                 {
1998                         /* Delete this entry immediately */
1999                         Assert(!relation->rd_isnailed);
2000                         RelationClearRelation(relation, false);
2001                 }
2002                 else
2003                 {
2004                         /*
2005                          * Add this entry to list of stuff to rebuild in second pass.
2006                          * pg_class_oid_index goes on the front of rebuildFirstList, other
2007                          * nailed indexes on the back, and everything else into
2008                          * rebuildList (in no particular order).
2009                          */
2010                         if (relation->rd_isnailed &&
2011                                 relation->rd_rel->relkind == RELKIND_INDEX)
2012                         {
2013                                 if (RelationGetRelid(relation) == ClassOidIndexId)
2014                                         rebuildFirstList = lcons(relation, rebuildFirstList);
2015                                 else
2016                                         rebuildFirstList = lappend(rebuildFirstList, relation);
2017                         }
2018                         else
2019                                 rebuildList = lcons(relation, rebuildList);
2020                 }
2021         }
2022
2023         /*
2024          * Now zap any remaining smgr cache entries.  This must happen before we
2025          * start to rebuild entries, since that may involve catalog fetches which
2026          * will re-open catalog files.
2027          */
2028         smgrcloseall();
2029
2030         /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2031         foreach(l, rebuildFirstList)
2032         {
2033                 relation = (Relation) lfirst(l);
2034                 RelationClearRelation(relation, true);
2035         }
2036         list_free(rebuildFirstList);
2037         foreach(l, rebuildList)
2038         {
2039                 relation = (Relation) lfirst(l);
2040                 RelationClearRelation(relation, true);
2041         }
2042         list_free(rebuildList);
2043 }
2044
2045 /*
2046  * AtEOXact_RelationCache
2047  *
2048  *      Clean up the relcache at main-transaction commit or abort.
2049  *
2050  * Note: this must be called *before* processing invalidation messages.
2051  * In the case of abort, we don't want to try to rebuild any invalidated
2052  * cache entries (since we can't safely do database accesses).  Therefore
2053  * we must reset refcnts before handling pending invalidations.
2054  *
2055  * As of PostgreSQL 8.1, relcache refcnts should get released by the
2056  * ResourceOwner mechanism.  This routine just does a debugging
2057  * cross-check that no pins remain.  However, we also need to do special
2058  * cleanup when the current transaction created any relations or made use
2059  * of forced index lists.
2060  */
2061 void
2062 AtEOXact_RelationCache(bool isCommit)
2063 {
2064         HASH_SEQ_STATUS status;
2065         RelIdCacheEnt *idhentry;
2066
2067         /*
2068          * To speed up transaction exit, we want to avoid scanning the relcache
2069          * unless there is actually something for this routine to do.  Other than
2070          * the debug-only Assert checks, most transactions don't create any work
2071          * for us to do here, so we keep a static flag that gets set if there is
2072          * anything to do.      (Currently, this means either a relation is created in
2073          * the current xact, or one is given a new relfilenode, or an index list
2074          * is forced.)  For simplicity, the flag remains set till end of top-level
2075          * transaction, even though we could clear it at subtransaction end in
2076          * some cases.
2077          */
2078         if (!need_eoxact_work
2079 #ifdef USE_ASSERT_CHECKING
2080                 && !assert_enabled
2081 #endif
2082                 )
2083                 return;
2084
2085         hash_seq_init(&status, RelationIdCache);
2086
2087         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2088         {
2089                 Relation        relation = idhentry->reldesc;
2090
2091                 /*
2092                  * The relcache entry's ref count should be back to its normal
2093                  * not-in-a-transaction state: 0 unless it's nailed in cache.
2094                  *
2095                  * In bootstrap mode, this is NOT true, so don't check it --- the
2096                  * bootstrap code expects relations to stay open across start/commit
2097                  * transaction calls.  (That seems bogus, but it's not worth fixing.)
2098                  */
2099 #ifdef USE_ASSERT_CHECKING
2100                 if (!IsBootstrapProcessingMode())
2101                 {
2102                         int                     expected_refcnt;
2103
2104                         expected_refcnt = relation->rd_isnailed ? 1 : 0;
2105                         Assert(relation->rd_refcnt == expected_refcnt);
2106                 }
2107 #endif
2108
2109                 /*
2110                  * Is it a relation created in the current transaction?
2111                  *
2112                  * During commit, reset the flag to zero, since we are now out of the
2113                  * creating transaction.  During abort, simply delete the relcache
2114                  * entry --- it isn't interesting any longer.  (NOTE: if we have
2115                  * forgotten the new-ness of a new relation due to a forced cache
2116                  * flush, the entry will get deleted anyway by shared-cache-inval
2117                  * processing of the aborted pg_class insertion.)
2118                  */
2119                 if (relation->rd_createSubid != InvalidSubTransactionId)
2120                 {
2121                         if (isCommit)
2122                                 relation->rd_createSubid = InvalidSubTransactionId;
2123                         else
2124                         {
2125                                 RelationClearRelation(relation, false);
2126                                 continue;
2127                         }
2128                 }
2129
2130                 /*
2131                  * Likewise, reset the hint about the relfilenode being new.
2132                  */
2133                 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2134
2135                 /*
2136                  * Flush any temporary index list.
2137                  */
2138                 if (relation->rd_indexvalid == 2)
2139                 {
2140                         list_free(relation->rd_indexlist);
2141                         relation->rd_indexlist = NIL;
2142                         relation->rd_oidindex = InvalidOid;
2143                         relation->rd_indexvalid = 0;
2144                 }
2145         }
2146
2147         /* Once done with the transaction, we can reset need_eoxact_work */
2148         need_eoxact_work = false;
2149 }
2150
2151 /*
2152  * AtEOSubXact_RelationCache
2153  *
2154  *      Clean up the relcache at sub-transaction commit or abort.
2155  *
2156  * Note: this must be called *before* processing invalidation messages.
2157  */
2158 void
2159 AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
2160                                                   SubTransactionId parentSubid)
2161 {
2162         HASH_SEQ_STATUS status;
2163         RelIdCacheEnt *idhentry;
2164
2165         /*
2166          * Skip the relcache scan if nothing to do --- see notes for
2167          * AtEOXact_RelationCache.
2168          */
2169         if (!need_eoxact_work)
2170                 return;
2171
2172         hash_seq_init(&status, RelationIdCache);
2173
2174         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2175         {
2176                 Relation        relation = idhentry->reldesc;
2177
2178                 /*
2179                  * Is it a relation created in the current subtransaction?
2180                  *
2181                  * During subcommit, mark it as belonging to the parent, instead.
2182                  * During subabort, simply delete the relcache entry.
2183                  */
2184                 if (relation->rd_createSubid == mySubid)
2185                 {
2186                         if (isCommit)
2187                                 relation->rd_createSubid = parentSubid;
2188                         else
2189                         {
2190                                 Assert(RelationHasReferenceCountZero(relation));
2191                                 RelationClearRelation(relation, false);
2192                                 continue;
2193                         }
2194                 }
2195
2196                 /*
2197                  * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
2198                  */
2199                 if (relation->rd_newRelfilenodeSubid == mySubid)
2200                 {
2201                         if (isCommit)
2202                                 relation->rd_newRelfilenodeSubid = parentSubid;
2203                         else
2204                                 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2205                 }
2206
2207                 /*
2208                  * Flush any temporary index list.
2209                  */
2210                 if (relation->rd_indexvalid == 2)
2211                 {
2212                         list_free(relation->rd_indexlist);
2213                         relation->rd_indexlist = NIL;
2214                         relation->rd_oidindex = InvalidOid;
2215                         relation->rd_indexvalid = 0;
2216                 }
2217         }
2218 }
2219
2220 /*
2221  * RelationCacheMarkNewRelfilenode
2222  *
2223  *      Mark the rel as having been given a new relfilenode in the current
2224  *      (sub) transaction.  This is a hint that can be used to optimize
2225  *      later operations on the rel in the same transaction.
2226  */
2227 void
2228 RelationCacheMarkNewRelfilenode(Relation rel)
2229 {
2230         /* Mark it... */
2231         rel->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
2232         /* ... and now we have eoxact cleanup work to do */
2233         need_eoxact_work = true;
2234 }
2235
2236
2237 /*
2238  *              RelationBuildLocalRelation
2239  *                      Build a relcache entry for an about-to-be-created relation,
2240  *                      and enter it into the relcache.
2241  */
2242 Relation
2243 RelationBuildLocalRelation(const char *relname,
2244                                                    Oid relnamespace,
2245                                                    TupleDesc tupDesc,
2246                                                    Oid relid,
2247                                                    Oid reltablespace,
2248                                                    bool shared_relation)
2249 {
2250         Relation        rel;
2251         MemoryContext oldcxt;
2252         int                     natts = tupDesc->natts;
2253         int                     i;
2254         bool            has_not_null;
2255         bool            nailit;
2256
2257         AssertArg(natts >= 0);
2258
2259         /*
2260          * check for creation of a rel that must be nailed in cache.
2261          *
2262          * XXX this list had better match RelationCacheInitializePhase2's list.
2263          */
2264         switch (relid)
2265         {
2266                 case RelationRelationId:
2267                 case AttributeRelationId:
2268                 case ProcedureRelationId:
2269                 case TypeRelationId:
2270                         nailit = true;
2271                         break;
2272                 default:
2273                         nailit = false;
2274                         break;
2275         }
2276
2277         /*
2278          * check that hardwired list of shared rels matches what's in the
2279          * bootstrap .bki file.  If you get a failure here during initdb, you
2280          * probably need to fix IsSharedRelation() to match whatever you've done
2281          * to the set of shared relations.
2282          */
2283         if (shared_relation != IsSharedRelation(relid))
2284                 elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
2285                          relname, relid);
2286
2287         /*
2288          * switch to the cache context to create the relcache entry.
2289          */
2290         if (!CacheMemoryContext)
2291                 CreateCacheMemoryContext();
2292
2293         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2294
2295         /*
2296          * allocate a new relation descriptor and fill in basic state fields.
2297          */
2298         rel = (Relation) palloc0(sizeof(RelationData));
2299
2300         rel->rd_targblock = InvalidBlockNumber;
2301
2302         /* make sure relation is marked as having no open file yet */
2303         rel->rd_smgr = NULL;
2304
2305         /* mark it nailed if appropriate */
2306         rel->rd_isnailed = nailit;
2307
2308         rel->rd_refcnt = nailit ? 1 : 0;
2309
2310         /* it's being created in this transaction */
2311         rel->rd_createSubid = GetCurrentSubTransactionId();
2312         rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2313
2314         /* must flag that we have rels created in this transaction */
2315         need_eoxact_work = true;
2316
2317         /* is it a temporary relation? */
2318         rel->rd_istemp = isTempNamespace(relnamespace);
2319
2320         /*
2321          * create a new tuple descriptor from the one passed in.  We do this
2322          * partly to copy it into the cache context, and partly because the new
2323          * relation can't have any defaults or constraints yet; they have to be
2324          * added in later steps, because they require additions to multiple system
2325          * catalogs.  We can copy attnotnull constraints here, however.
2326          */
2327         rel->rd_att = CreateTupleDescCopy(tupDesc);
2328         rel->rd_att->tdrefcount = 1;    /* mark as refcounted */
2329         has_not_null = false;
2330         for (i = 0; i < natts; i++)
2331         {
2332                 rel->rd_att->attrs[i]->attnotnull = tupDesc->attrs[i]->attnotnull;
2333                 has_not_null |= tupDesc->attrs[i]->attnotnull;
2334         }
2335
2336         if (has_not_null)
2337         {
2338                 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
2339
2340                 constr->has_not_null = true;
2341                 rel->rd_att->constr = constr;
2342         }
2343
2344         /*
2345          * initialize relation tuple form (caller may add/override data later)
2346          */
2347         rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
2348
2349         namestrcpy(&rel->rd_rel->relname, relname);
2350         rel->rd_rel->relnamespace = relnamespace;
2351
2352         rel->rd_rel->relkind = RELKIND_UNCATALOGED;
2353         rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
2354         rel->rd_rel->relnatts = natts;
2355         rel->rd_rel->reltype = InvalidOid;
2356         /* needed when bootstrapping: */
2357         rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
2358
2359         /*
2360          * Insert relation physical and logical identifiers (OIDs) into the right
2361          * places.      Note that the physical ID (relfilenode) is initially the same
2362          * as the logical ID (OID).
2363          */
2364         rel->rd_rel->relisshared = shared_relation;
2365
2366         RelationGetRelid(rel) = relid;
2367
2368         for (i = 0; i < natts; i++)
2369                 rel->rd_att->attrs[i]->attrelid = relid;
2370
2371         rel->rd_rel->relfilenode = relid;
2372         rel->rd_rel->reltablespace = reltablespace;
2373
2374         RelationInitLockInfo(rel);      /* see lmgr.c */
2375
2376         RelationInitPhysicalAddr(rel);
2377
2378         /*
2379          * Okay to insert into the relcache hash tables.
2380          */
2381         RelationCacheInsert(rel);
2382
2383         /*
2384          * done building relcache entry.
2385          */
2386         MemoryContextSwitchTo(oldcxt);
2387
2388         /* It's fully valid */
2389         rel->rd_isvalid = true;
2390
2391         /*
2392          * Caller expects us to pin the returned entry.
2393          */
2394         RelationIncrementReferenceCount(rel);
2395
2396         return rel;
2397 }
2398
2399 /*
2400  *              RelationCacheInitialize
2401  *
2402  *              This initializes the relation descriptor cache.  At the time
2403  *              that this is invoked, we can't do database access yet (mainly
2404  *              because the transaction subsystem is not up); all we are doing
2405  *              is making an empty cache hashtable.  This must be done before
2406  *              starting the initialization transaction, because otherwise
2407  *              AtEOXact_RelationCache would crash if that transaction aborts
2408  *              before we can get the relcache set up.
2409  */
2410
2411 #define INITRELCACHESIZE                400
2412
2413 void
2414 RelationCacheInitialize(void)
2415 {
2416         MemoryContext oldcxt;
2417         HASHCTL         ctl;
2418
2419         /*
2420          * switch to cache memory context
2421          */
2422         if (!CacheMemoryContext)
2423                 CreateCacheMemoryContext();
2424
2425         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2426
2427         /*
2428          * create hashtable that indexes the relcache
2429          */
2430         MemSet(&ctl, 0, sizeof(ctl));
2431         ctl.keysize = sizeof(Oid);
2432         ctl.entrysize = sizeof(RelIdCacheEnt);
2433         ctl.hash = oid_hash;
2434         RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
2435                                                                   &ctl, HASH_ELEM | HASH_FUNCTION);
2436
2437         MemoryContextSwitchTo(oldcxt);
2438 }
2439
2440 /*
2441  *              RelationCacheInitializePhase2
2442  *
2443  *              This is called as soon as the catcache and transaction system
2444  *              are functional.  At this point we can actually read data from
2445  *              the system catalogs.  We first try to read pre-computed relcache
2446  *              entries from the pg_internal.init file.  If that's missing or
2447  *              broken, make phony entries for the minimum set of nailed-in-cache
2448  *              relations.      Then (unless bootstrapping) make sure we have entries
2449  *              for the critical system indexes.  Once we've done all this, we
2450  *              have enough infrastructure to open any system catalog or use any
2451  *              catcache.  The last step is to rewrite pg_internal.init if needed.
2452  */
2453 void
2454 RelationCacheInitializePhase2(void)
2455 {
2456         HASH_SEQ_STATUS status;
2457         RelIdCacheEnt *idhentry;
2458         MemoryContext oldcxt;
2459         bool            needNewCacheFile = false;
2460
2461         /*
2462          * switch to cache memory context
2463          */
2464         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2465
2466         /*
2467          * Try to load the relcache cache file.  If unsuccessful, bootstrap the
2468          * cache with pre-made descriptors for the critical "nailed-in" system
2469          * catalogs.
2470          */
2471         if (IsBootstrapProcessingMode() ||
2472                 !load_relcache_init_file())
2473         {
2474                 needNewCacheFile = true;
2475
2476                 formrdesc("pg_class", PG_CLASS_RELTYPE_OID,
2477                                   true, Natts_pg_class, Desc_pg_class);
2478                 formrdesc("pg_attribute", PG_ATTRIBUTE_RELTYPE_OID,
2479                                   false, Natts_pg_attribute, Desc_pg_attribute);
2480                 formrdesc("pg_proc", PG_PROC_RELTYPE_OID,
2481                                   true, Natts_pg_proc, Desc_pg_proc);
2482                 formrdesc("pg_type", PG_TYPE_RELTYPE_OID,
2483                                   true, Natts_pg_type, Desc_pg_type);
2484
2485 #define NUM_CRITICAL_RELS       4       /* fix if you change list above */
2486         }
2487
2488         MemoryContextSwitchTo(oldcxt);
2489
2490         /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
2491         if (IsBootstrapProcessingMode())
2492                 return;
2493
2494         /*
2495          * If we didn't get the critical system indexes loaded into relcache, do
2496          * so now.      These are critical because the catcache and/or opclass cache
2497          * depend on them for fetches done during relcache load.  Thus, we have an
2498          * infinite-recursion problem.  We can break the recursion by doing
2499          * heapscans instead of indexscans at certain key spots. To avoid hobbling
2500          * performance, we only want to do that until we have the critical indexes
2501          * loaded into relcache.  Thus, the flag criticalRelcachesBuilt is used to
2502          * decide whether to do heapscan or indexscan at the key spots, and we set
2503          * it true after we've loaded the critical indexes.
2504          *
2505          * The critical indexes are marked as "nailed in cache", partly to make it
2506          * easy for load_relcache_init_file to count them, but mainly because we
2507          * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
2508          * true.  (NOTE: perhaps it would be possible to reload them by
2509          * temporarily setting criticalRelcachesBuilt to false again.  For now,
2510          * though, we just nail 'em in.)
2511          *
2512          * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
2513          * in the same way as the others, because the critical catalogs don't
2514          * (currently) have any rules or triggers, and so these indexes can be
2515          * rebuilt without inducing recursion.  However they are used during
2516          * relcache load when a rel does have rules or triggers, so we choose to
2517          * nail them for performance reasons.
2518          */
2519         if (!criticalRelcachesBuilt)
2520         {
2521                 Relation        ird;
2522
2523 #define LOAD_CRIT_INDEX(indexoid) \
2524                 do { \
2525                         ird = RelationBuildDesc((indexoid), NULL); \
2526                         ird->rd_isnailed = true; \
2527                         ird->rd_refcnt = 1; \
2528                 } while (0)
2529
2530                 LOAD_CRIT_INDEX(ClassOidIndexId);
2531                 LOAD_CRIT_INDEX(AttributeRelidNumIndexId);
2532                 LOAD_CRIT_INDEX(IndexRelidIndexId);
2533                 LOAD_CRIT_INDEX(OpclassOidIndexId);
2534                 LOAD_CRIT_INDEX(AccessMethodStrategyIndexId);
2535                 LOAD_CRIT_INDEX(AccessMethodProcedureIndexId);
2536                 LOAD_CRIT_INDEX(OperatorOidIndexId);
2537                 LOAD_CRIT_INDEX(RewriteRelRulenameIndexId);
2538                 LOAD_CRIT_INDEX(TriggerRelidNameIndexId);
2539
2540 #define NUM_CRITICAL_INDEXES    9               /* fix if you change list above */
2541
2542                 criticalRelcachesBuilt = true;
2543         }
2544
2545         /*
2546          * Now, scan all the relcache entries and update anything that might be
2547          * wrong in the results from formrdesc or the relcache cache file. If we
2548          * faked up relcache entries using formrdesc, then read the real pg_class
2549          * rows and replace the fake entries with them. Also, if any of the
2550          * relcache entries have rules or triggers, load that info the hard way
2551          * since it isn't recorded in the cache file.
2552          */
2553         hash_seq_init(&status, RelationIdCache);
2554
2555         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2556         {
2557                 Relation        relation = idhentry->reldesc;
2558
2559                 /*
2560                  * If it's a faked-up entry, read the real pg_class tuple.
2561                  */
2562                 if (needNewCacheFile && relation->rd_isnailed)
2563                 {
2564                         HeapTuple       htup;
2565                         Form_pg_class relp;
2566
2567                         htup = SearchSysCache(RELOID,
2568                                                                 ObjectIdGetDatum(RelationGetRelid(relation)),
2569                                                                   0, 0, 0);
2570                         if (!HeapTupleIsValid(htup))
2571                                 elog(FATAL, "cache lookup failed for relation %u",
2572                                          RelationGetRelid(relation));
2573                         relp = (Form_pg_class) GETSTRUCT(htup);
2574
2575                         /*
2576                          * Copy tuple to relation->rd_rel. (See notes in
2577                          * AllocateRelationDesc())
2578                          */
2579                         Assert(relation->rd_rel != NULL);
2580                         memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
2581
2582                         /* Update rd_options while we have the tuple */
2583                         if (relation->rd_options)
2584                                 pfree(relation->rd_options);
2585                         RelationParseRelOptions(relation, htup);
2586
2587                         /*
2588                          * Also update the derived fields in rd_att.
2589                          */
2590                         relation->rd_att->tdtypeid = relp->reltype;
2591                         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
2592                         relation->rd_att->tdhasoid = relp->relhasoids;
2593
2594                         ReleaseSysCache(htup);
2595                 }
2596
2597                 /*
2598                  * Fix data that isn't saved in relcache cache file.
2599                  */
2600                 if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
2601                         RelationBuildRuleLock(relation);
2602                 if (relation->rd_rel->reltriggers > 0 && relation->trigdesc == NULL)
2603                         RelationBuildTriggers(relation);
2604         }
2605
2606         /*
2607          * Lastly, write out a new relcache cache file if one is needed.
2608          */
2609         if (needNewCacheFile)
2610         {
2611                 /*
2612                  * Force all the catcaches to finish initializing and thereby open the
2613                  * catalogs and indexes they use.  This will preload the relcache with
2614                  * entries for all the most important system catalogs and indexes, so
2615                  * that the init file will be most useful for future backends.
2616                  */
2617                 InitCatalogCachePhase2();
2618
2619                 /* now write the file */
2620                 write_relcache_init_file();
2621         }
2622 }
2623
2624 /*
2625  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
2626  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
2627  *
2628  * We need this kluge because we have to be able to access non-fixed-width
2629  * fields of pg_class and pg_index before we have the standard catalog caches
2630  * available.  We use predefined data that's set up in just the same way as
2631  * the bootstrapped reldescs used by formrdesc().  The resulting tupdesc is
2632  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
2633  * does it have a TupleConstr field.  But it's good enough for the purpose of
2634  * extracting fields.
2635  */
2636 static TupleDesc
2637 BuildHardcodedDescriptor(int natts, Form_pg_attribute attrs, bool hasoids)
2638 {
2639         TupleDesc       result;
2640         MemoryContext oldcxt;
2641         int                     i;
2642
2643         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2644
2645         result = CreateTemplateTupleDesc(natts, hasoids);
2646         result->tdtypeid = RECORDOID;           /* not right, but we don't care */
2647         result->tdtypmod = -1;
2648
2649         for (i = 0; i < natts; i++)
2650         {
2651                 memcpy(result->attrs[i], &attrs[i], ATTRIBUTE_TUPLE_SIZE);
2652                 /* make sure attcacheoff is valid */
2653                 result->attrs[i]->attcacheoff = -1;
2654         }
2655
2656         /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
2657         result->attrs[0]->attcacheoff = 0;
2658
2659         /* Note: we don't bother to set up a TupleConstr entry */
2660
2661         MemoryContextSwitchTo(oldcxt);
2662
2663         return result;
2664 }
2665
2666 static TupleDesc
2667 GetPgClassDescriptor(void)
2668 {
2669         static TupleDesc pgclassdesc = NULL;
2670
2671         /* Already done? */
2672         if (pgclassdesc == NULL)
2673                 pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
2674                                                                                            Desc_pg_class,
2675                                                                                            true);
2676
2677         return pgclassdesc;
2678 }
2679
2680 static TupleDesc
2681 GetPgIndexDescriptor(void)
2682 {
2683         static TupleDesc pgindexdesc = NULL;
2684
2685         /* Already done? */
2686         if (pgindexdesc == NULL)
2687                 pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
2688                                                                                            Desc_pg_index,
2689                                                                                            false);
2690
2691         return pgindexdesc;
2692 }
2693
2694 static void
2695 AttrDefaultFetch(Relation relation)
2696 {
2697         AttrDefault *attrdef = relation->rd_att->constr->defval;
2698         int                     ndef = relation->rd_att->constr->num_defval;
2699         Relation        adrel;
2700         SysScanDesc adscan;
2701         ScanKeyData skey;
2702         HeapTuple       htup;
2703         Datum           val;
2704         bool            isnull;
2705         int                     found;
2706         int                     i;
2707
2708         ScanKeyInit(&skey,
2709                                 Anum_pg_attrdef_adrelid,
2710                                 BTEqualStrategyNumber, F_OIDEQ,
2711                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2712
2713         adrel = heap_open(AttrDefaultRelationId, AccessShareLock);
2714         adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
2715                                                                 SnapshotNow, 1, &skey);
2716         found = 0;
2717
2718         while (HeapTupleIsValid(htup = systable_getnext(adscan)))
2719         {
2720                 Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
2721
2722                 for (i = 0; i < ndef; i++)
2723                 {
2724                         if (adform->adnum != attrdef[i].adnum)
2725                                 continue;
2726                         if (attrdef[i].adbin != NULL)
2727                                 elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
2728                                 NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
2729                                          RelationGetRelationName(relation));
2730                         else
2731                                 found++;
2732
2733                         val = fastgetattr(htup,
2734                                                           Anum_pg_attrdef_adbin,
2735                                                           adrel->rd_att, &isnull);
2736                         if (isnull)
2737                                 elog(WARNING, "null adbin for attr %s of rel %s",
2738                                 NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
2739                                          RelationGetRelationName(relation));
2740                         else
2741                                 attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext,
2742                                                                  DatumGetCString(DirectFunctionCall1(textout,
2743                                                                                                                                          val)));
2744                         break;
2745                 }
2746
2747                 if (i >= ndef)
2748                         elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
2749                                  adform->adnum, RelationGetRelationName(relation));
2750         }
2751
2752         systable_endscan(adscan);
2753         heap_close(adrel, AccessShareLock);
2754
2755         if (found != ndef)
2756                 elog(WARNING, "%d attrdef record(s) missing for rel %s",
2757                          ndef - found, RelationGetRelationName(relation));
2758 }
2759
2760 static void
2761 CheckConstraintFetch(Relation relation)
2762 {
2763         ConstrCheck *check = relation->rd_att->constr->check;
2764         int                     ncheck = relation->rd_att->constr->num_check;
2765         Relation        conrel;
2766         SysScanDesc conscan;
2767         ScanKeyData skey[1];
2768         HeapTuple       htup;
2769         Datum           val;
2770         bool            isnull;
2771         int                     found = 0;
2772
2773         ScanKeyInit(&skey[0],
2774                                 Anum_pg_constraint_conrelid,
2775                                 BTEqualStrategyNumber, F_OIDEQ,
2776                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2777
2778         conrel = heap_open(ConstraintRelationId, AccessShareLock);
2779         conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
2780                                                                  SnapshotNow, 1, skey);
2781
2782         while (HeapTupleIsValid(htup = systable_getnext(conscan)))
2783         {
2784                 Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
2785
2786                 /* We want check constraints only */
2787                 if (conform->contype != CONSTRAINT_CHECK)
2788                         continue;
2789
2790                 if (found >= ncheck)
2791                         elog(ERROR, "unexpected constraint record found for rel %s",
2792                                  RelationGetRelationName(relation));
2793
2794                 check[found].ccname = MemoryContextStrdup(CacheMemoryContext,
2795                                                                                                   NameStr(conform->conname));
2796
2797                 /* Grab and test conbin is actually set */
2798                 val = fastgetattr(htup,
2799                                                   Anum_pg_constraint_conbin,
2800                                                   conrel->rd_att, &isnull);
2801                 if (isnull)
2802                         elog(ERROR, "null conbin for rel %s",
2803                                  RelationGetRelationName(relation));
2804
2805                 check[found].ccbin = MemoryContextStrdup(CacheMemoryContext,
2806                                                                  DatumGetCString(DirectFunctionCall1(textout,
2807                                                                                                                                          val)));
2808                 found++;
2809         }
2810
2811         systable_endscan(conscan);
2812         heap_close(conrel, AccessShareLock);
2813
2814         if (found != ncheck)
2815                 elog(ERROR, "%d constraint record(s) missing for rel %s",
2816                          ncheck - found, RelationGetRelationName(relation));
2817 }
2818
2819 /*
2820  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
2821  *
2822  * The index list is created only if someone requests it.  We scan pg_index
2823  * to find relevant indexes, and add the list to the relcache entry so that
2824  * we won't have to compute it again.  Note that shared cache inval of a
2825  * relcache entry will delete the old list and set rd_indexvalid to 0,
2826  * so that we must recompute the index list on next request.  This handles
2827  * creation or deletion of an index.
2828  *
2829  * The returned list is guaranteed to be sorted in order by OID.  This is
2830  * needed by the executor, since for index types that we obtain exclusive
2831  * locks on when updating the index, all backends must lock the indexes in
2832  * the same order or we will get deadlocks (see ExecOpenIndices()).  Any
2833  * consistent ordering would do, but ordering by OID is easy.
2834  *
2835  * Since shared cache inval causes the relcache's copy of the list to go away,
2836  * we return a copy of the list palloc'd in the caller's context.  The caller
2837  * may list_free() the returned list after scanning it. This is necessary
2838  * since the caller will typically be doing syscache lookups on the relevant
2839  * indexes, and syscache lookup could cause SI messages to be processed!
2840  *
2841  * We also update rd_oidindex, which this module treats as effectively part
2842  * of the index list.  rd_oidindex is valid when rd_indexvalid isn't zero;
2843  * it is the pg_class OID of a unique index on OID when the relation has one,
2844  * and InvalidOid if there is no such index.
2845  */
2846 List *
2847 RelationGetIndexList(Relation relation)
2848 {
2849         Relation        indrel;
2850         SysScanDesc indscan;
2851         ScanKeyData skey;
2852         HeapTuple       htup;
2853         List       *result;
2854         Oid                     oidIndex;
2855         MemoryContext oldcxt;
2856
2857         /* Quick exit if we already computed the list. */
2858         if (relation->rd_indexvalid != 0)
2859                 return list_copy(relation->rd_indexlist);
2860
2861         /*
2862          * We build the list we intend to return (in the caller's context) while
2863          * doing the scan.      After successfully completing the scan, we copy that
2864          * list into the relcache entry.  This avoids cache-context memory leakage
2865          * if we get some sort of error partway through.
2866          */
2867         result = NIL;
2868         oidIndex = InvalidOid;
2869
2870         /* Prepare to scan pg_index for entries having indrelid = this rel. */
2871         ScanKeyInit(&skey,
2872                                 Anum_pg_index_indrelid,
2873                                 BTEqualStrategyNumber, F_OIDEQ,
2874                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2875
2876         indrel = heap_open(IndexRelationId, AccessShareLock);
2877         indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
2878                                                                  SnapshotNow, 1, &skey);
2879
2880         while (HeapTupleIsValid(htup = systable_getnext(indscan)))
2881         {
2882                 Form_pg_index index = (Form_pg_index) GETSTRUCT(htup);
2883
2884                 /* Add index's OID to result list in the proper order */
2885                 result = insert_ordered_oid(result, index->indexrelid);
2886
2887                 /* Check to see if it is a unique, non-partial btree index on OID */
2888                 if (index->indnatts == 1 &&
2889                         index->indisunique &&
2890                         index->indkey.values[0] == ObjectIdAttributeNumber &&
2891                         index->indclass.values[0] == OID_BTREE_OPS_OID &&
2892                         heap_attisnull(htup, Anum_pg_index_indpred))
2893                         oidIndex = index->indexrelid;
2894         }
2895
2896         systable_endscan(indscan);
2897         heap_close(indrel, AccessShareLock);
2898
2899         /* Now save a copy of the completed list in the relcache entry. */
2900         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2901         relation->rd_indexlist = list_copy(result);
2902         relation->rd_oidindex = oidIndex;
2903         relation->rd_indexvalid = 1;
2904         MemoryContextSwitchTo(oldcxt);
2905
2906         return result;
2907 }
2908
2909 /*
2910  * insert_ordered_oid
2911  *              Insert a new Oid into a sorted list of Oids, preserving ordering
2912  *
2913  * Building the ordered list this way is O(N^2), but with a pretty small
2914  * constant, so for the number of entries we expect it will probably be
2915  * faster than trying to apply qsort().  Most tables don't have very many
2916  * indexes...
2917  */
2918 static List *
2919 insert_ordered_oid(List *list, Oid datum)
2920 {
2921         ListCell   *prev;
2922
2923         /* Does the datum belong at the front? */
2924         if (list == NIL || datum < linitial_oid(list))
2925                 return lcons_oid(datum, list);
2926         /* No, so find the entry it belongs after */
2927         prev = list_head(list);
2928         for (;;)
2929         {
2930                 ListCell   *curr = lnext(prev);
2931
2932                 if (curr == NULL || datum < lfirst_oid(curr))
2933                         break;                          /* it belongs after 'prev', before 'curr' */
2934
2935                 prev = curr;
2936         }
2937         /* Insert datum into list after 'prev' */
2938         lappend_cell_oid(list, prev, datum);
2939         return list;
2940 }
2941
2942 /*
2943  * RelationSetIndexList -- externally force the index list contents
2944  *
2945  * This is used to temporarily override what we think the set of valid
2946  * indexes is (including the presence or absence of an OID index).
2947  * The forcing will be valid only until transaction commit or abort.
2948  *
2949  * This should only be applied to nailed relations, because in a non-nailed
2950  * relation the hacked index list could be lost at any time due to SI
2951  * messages.  In practice it is only used on pg_class (see REINDEX).
2952  *
2953  * It is up to the caller to make sure the given list is correctly ordered.
2954  */
2955 void
2956 RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
2957 {
2958         MemoryContext oldcxt;
2959
2960         Assert(relation->rd_isnailed);
2961         /* Copy the list into the cache context (could fail for lack of mem) */
2962         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2963         indexIds = list_copy(indexIds);
2964         MemoryContextSwitchTo(oldcxt);
2965         /* Okay to replace old list */
2966         list_free(relation->rd_indexlist);
2967         relation->rd_indexlist = indexIds;
2968         relation->rd_oidindex = oidIndex;
2969         relation->rd_indexvalid = 2;    /* mark list as forced */
2970         /* must flag that we have a forced index list */
2971         need_eoxact_work = true;
2972 }
2973
2974 /*
2975  * RelationGetOidIndex -- get the pg_class OID of the relation's OID index
2976  *
2977  * Returns InvalidOid if there is no such index.
2978  */
2979 Oid
2980 RelationGetOidIndex(Relation relation)
2981 {
2982         List       *ilist;
2983
2984         /*
2985          * If relation doesn't have OIDs at all, caller is probably confused. (We
2986          * could just silently return InvalidOid, but it seems better to throw an
2987          * assertion.)
2988          */
2989         Assert(relation->rd_rel->relhasoids);
2990
2991         if (relation->rd_indexvalid == 0)
2992         {
2993                 /* RelationGetIndexList does the heavy lifting. */
2994                 ilist = RelationGetIndexList(relation);
2995                 list_free(ilist);
2996                 Assert(relation->rd_indexvalid != 0);
2997         }
2998
2999         return relation->rd_oidindex;
3000 }
3001
3002 /*
3003  * RelationGetIndexExpressions -- get the index expressions for an index
3004  *
3005  * We cache the result of transforming pg_index.indexprs into a node tree.
3006  * If the rel is not an index or has no expressional columns, we return NIL.
3007  * Otherwise, the returned tree is copied into the caller's memory context.
3008  * (We don't want to return a pointer to the relcache copy, since it could
3009  * disappear due to relcache invalidation.)
3010  */
3011 List *
3012 RelationGetIndexExpressions(Relation relation)
3013 {
3014         List       *result;
3015         Datum           exprsDatum;
3016         bool            isnull;
3017         char       *exprsString;
3018         MemoryContext oldcxt;
3019
3020         /* Quick exit if we already computed the result. */
3021         if (relation->rd_indexprs)
3022                 return (List *) copyObject(relation->rd_indexprs);
3023
3024         /* Quick exit if there is nothing to do. */
3025         if (relation->rd_indextuple == NULL ||
3026                 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs))
3027                 return NIL;
3028
3029         /*
3030          * We build the tree we intend to return in the caller's context. After
3031          * successfully completing the work, we copy it into the relcache entry.
3032          * This avoids problems if we get some sort of error partway through.
3033          */
3034         exprsDatum = heap_getattr(relation->rd_indextuple,
3035                                                           Anum_pg_index_indexprs,
3036                                                           GetPgIndexDescriptor(),
3037                                                           &isnull);
3038         Assert(!isnull);
3039         exprsString = DatumGetCString(DirectFunctionCall1(textout, exprsDatum));
3040         result = (List *) stringToNode(exprsString);
3041         pfree(exprsString);
3042
3043         /*
3044          * Run the expressions through eval_const_expressions. This is not just an
3045          * optimization, but is necessary, because the planner will be comparing
3046          * them to similarly-processed qual clauses, and may fail to detect valid
3047          * matches without this.  We don't bother with canonicalize_qual, however.
3048          */
3049         result = (List *) eval_const_expressions((Node *) result);
3050
3051         /*
3052          * Also mark any coercion format fields as "don't care", so that the
3053          * planner can match to both explicit and implicit coercions.
3054          */
3055         set_coercionform_dontcare((Node *) result);
3056
3057         /* May as well fix opfuncids too */
3058         fix_opfuncids((Node *) result);
3059
3060         /* Now save a copy of the completed tree in the relcache entry. */
3061         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3062         relation->rd_indexprs = (List *) copyObject(result);
3063         MemoryContextSwitchTo(oldcxt);
3064
3065         return result;
3066 }
3067
3068 /*
3069  * RelationGetIndexPredicate -- get the index predicate for an index
3070  *
3071  * We cache the result of transforming pg_index.indpred into an implicit-AND
3072  * node tree (suitable for ExecQual).
3073  * If the rel is not an index or has no predicate, we return NIL.
3074  * Otherwise, the returned tree is copied into the caller's memory context.
3075  * (We don't want to return a pointer to the relcache copy, since it could
3076  * disappear due to relcache invalidation.)
3077  */
3078 List *
3079 RelationGetIndexPredicate(Relation relation)
3080 {
3081         List       *result;
3082         Datum           predDatum;
3083         bool            isnull;
3084         char       *predString;
3085         MemoryContext oldcxt;
3086
3087         /* Quick exit if we already computed the result. */
3088         if (relation->rd_indpred)
3089                 return (List *) copyObject(relation->rd_indpred);
3090
3091         /* Quick exit if there is nothing to do. */
3092         if (relation->rd_indextuple == NULL ||
3093                 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred))
3094                 return NIL;
3095
3096         /*
3097          * We build the tree we intend to return in the caller's context. After
3098          * successfully completing the work, we copy it into the relcache entry.
3099          * This avoids problems if we get some sort of error partway through.
3100          */
3101         predDatum = heap_getattr(relation->rd_indextuple,
3102                                                          Anum_pg_index_indpred,
3103                                                          GetPgIndexDescriptor(),
3104                                                          &isnull);
3105         Assert(!isnull);
3106         predString = DatumGetCString(DirectFunctionCall1(textout, predDatum));
3107         result = (List *) stringToNode(predString);
3108         pfree(predString);
3109
3110         /*
3111          * Run the expression through const-simplification and canonicalization.
3112          * This is not just an optimization, but is necessary, because the planner
3113          * will be comparing it to similarly-processed qual clauses, and may fail
3114          * to detect valid matches without this.  This must match the processing
3115          * done to qual clauses in preprocess_expression()!  (We can skip the
3116          * stuff involving subqueries, however, since we don't allow any in index
3117          * predicates.)
3118          */
3119         result = (List *) eval_const_expressions((Node *) result);
3120
3121         result = (List *) canonicalize_qual((Expr *) result);
3122
3123         /*
3124          * Also mark any coercion format fields as "don't care", so that the
3125          * planner can match to both explicit and implicit coercions.
3126          */
3127         set_coercionform_dontcare((Node *) result);
3128
3129         /* Also convert to implicit-AND format */
3130         result = make_ands_implicit((Expr *) result);
3131
3132         /* May as well fix opfuncids too */
3133         fix_opfuncids((Node *) result);
3134
3135         /* Now save a copy of the completed tree in the relcache entry. */
3136         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3137         relation->rd_indpred = (List *) copyObject(result);
3138         MemoryContextSwitchTo(oldcxt);
3139
3140         return result;
3141 }
3142
3143
3144 /*
3145  *      load_relcache_init_file, write_relcache_init_file
3146  *
3147  *              In late 1992, we started regularly having databases with more than
3148  *              a thousand classes in them.  With this number of classes, it became
3149  *              critical to do indexed lookups on the system catalogs.
3150  *
3151  *              Bootstrapping these lookups is very hard.  We want to be able to
3152  *              use an index on pg_attribute, for example, but in order to do so,
3153  *              we must have read pg_attribute for the attributes in the index,
3154  *              which implies that we need to use the index.
3155  *
3156  *              In order to get around the problem, we do the following:
3157  *
3158  *                 +  When the database system is initialized (at initdb time), we
3159  *                        don't use indexes.  We do sequential scans.
3160  *
3161  *                 +  When the backend is started up in normal mode, we load an image
3162  *                        of the appropriate relation descriptors, in internal format,
3163  *                        from an initialization file in the data/base/... directory.
3164  *
3165  *                 +  If the initialization file isn't there, then we create the
3166  *                        relation descriptors using sequential scans and write 'em to
3167  *                        the initialization file for use by subsequent backends.
3168  *
3169  *              We could dispense with the initialization file and just build the
3170  *              critical reldescs the hard way on every backend startup, but that
3171  *              slows down backend startup noticeably.
3172  *
3173  *              We can in fact go further, and save more relcache entries than
3174  *              just the ones that are absolutely critical; this allows us to speed
3175  *              up backend startup by not having to build such entries the hard way.
3176  *              Presently, all the catalog and index entries that are referred to
3177  *              by catcaches are stored in the initialization file.
3178  *
3179  *              The same mechanism that detects when catcache and relcache entries
3180  *              need to be invalidated (due to catalog updates) also arranges to
3181  *              unlink the initialization file when its contents may be out of date.
3182  *              The file will then be rebuilt during the next backend startup.
3183  */
3184
3185 /*
3186  * load_relcache_init_file -- attempt to load cache from the init file
3187  *
3188  * If successful, return TRUE and set criticalRelcachesBuilt to true.
3189  * If not successful, return FALSE.
3190  *
3191  * NOTE: we assume we are already switched into CacheMemoryContext.
3192  */
3193 static bool
3194 load_relcache_init_file(void)
3195 {
3196         FILE       *fp;
3197         char            initfilename[MAXPGPATH];
3198         Relation   *rels;
3199         int                     relno,
3200                                 num_rels,
3201                                 max_rels,
3202                                 nailed_rels,
3203                                 nailed_indexes,
3204                                 magic;
3205         int                     i;
3206
3207         snprintf(initfilename, sizeof(initfilename), "%s/%s",
3208                          DatabasePath, RELCACHE_INIT_FILENAME);
3209
3210         fp = AllocateFile(initfilename, PG_BINARY_R);
3211         if (fp == NULL)
3212                 return false;
3213
3214         /*
3215          * Read the index relcache entries from the file.  Note we will not enter
3216          * any of them into the cache if the read fails partway through; this
3217          * helps to guard against broken init files.
3218          */
3219         max_rels = 100;
3220         rels = (Relation *) palloc(max_rels * sizeof(Relation));
3221         num_rels = 0;
3222         nailed_rels = nailed_indexes = 0;
3223         initFileRelationIds = NIL;
3224
3225         /* check for correct magic number (compatible version) */
3226         if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
3227                 goto read_failed;
3228         if (magic != RELCACHE_INIT_FILEMAGIC)
3229                 goto read_failed;
3230
3231         for (relno = 0;; relno++)
3232         {
3233                 Size            len;
3234                 size_t          nread;
3235                 Relation        rel;
3236                 Form_pg_class relform;
3237                 bool            has_not_null;
3238
3239                 /* first read the relation descriptor length */
3240                 if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3241                 {
3242                         if (nread == 0)
3243                                 break;                  /* end of file */
3244                         goto read_failed;
3245                 }
3246
3247                 /* safety check for incompatible relcache layout */
3248                 if (len != sizeof(RelationData))
3249                         goto read_failed;
3250
3251                 /* allocate another relcache header */
3252                 if (num_rels >= max_rels)
3253                 {
3254                         max_rels *= 2;
3255                         rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
3256                 }
3257
3258                 rel = rels[num_rels++] = (Relation) palloc(len);
3259
3260                 /* then, read the Relation structure */
3261                 if ((nread = fread(rel, 1, len, fp)) != len)
3262                         goto read_failed;
3263
3264                 /* next read the relation tuple form */
3265                 if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3266                         goto read_failed;
3267
3268                 relform = (Form_pg_class) palloc(len);
3269                 if ((nread = fread(relform, 1, len, fp)) != len)
3270                         goto read_failed;
3271
3272                 rel->rd_rel = relform;
3273
3274                 /* initialize attribute tuple forms */
3275                 rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
3276                                                                                           relform->relhasoids);
3277                 rel->rd_att->tdrefcount = 1;    /* mark as refcounted */
3278
3279                 rel->rd_att->tdtypeid = relform->reltype;
3280                 rel->rd_att->tdtypmod = -1;             /* unnecessary, but... */
3281
3282                 /* next read all the attribute tuple form data entries */
3283                 has_not_null = false;
3284                 for (i = 0; i < relform->relnatts; i++)
3285                 {
3286                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3287                                 goto read_failed;
3288                         if (len != ATTRIBUTE_TUPLE_SIZE)
3289                                 goto read_failed;
3290                         if ((nread = fread(rel->rd_att->attrs[i], 1, len, fp)) != len)
3291                                 goto read_failed;
3292
3293                         has_not_null |= rel->rd_att->attrs[i]->attnotnull;
3294                 }
3295
3296                 /* next read the access method specific field */
3297                 if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3298                         goto read_failed;
3299                 if (len > 0)
3300                 {
3301                         rel->rd_options = palloc(len);
3302                         if ((nread = fread(rel->rd_options, 1, len, fp)) != len)
3303                                 goto read_failed;
3304                         if (len != VARSIZE(rel->rd_options))
3305                                 goto read_failed;               /* sanity check */
3306                 }
3307                 else
3308                 {
3309                         rel->rd_options = NULL;
3310                 }
3311
3312                 /* mark not-null status */
3313                 if (has_not_null)
3314                 {
3315                         TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3316
3317                         constr->has_not_null = true;
3318                         rel->rd_att->constr = constr;
3319                 }
3320
3321                 /* If it's an index, there's more to do */
3322                 if (rel->rd_rel->relkind == RELKIND_INDEX)
3323                 {
3324                         Form_pg_am      am;
3325                         MemoryContext indexcxt;
3326                         Oid                *opfamily;
3327                         Oid                *opcintype;
3328                         Oid                *operator;
3329                         RegProcedure *support;
3330                         int                     nsupport;
3331                         int16      *indoption;
3332
3333                         /* Count nailed indexes to ensure we have 'em all */
3334                         if (rel->rd_isnailed)
3335                                 nailed_indexes++;
3336
3337                         /* next, read the pg_index tuple */
3338                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3339                                 goto read_failed;
3340
3341                         rel->rd_indextuple = (HeapTuple) palloc(len);
3342                         if ((nread = fread(rel->rd_indextuple, 1, len, fp)) != len)
3343                                 goto read_failed;
3344
3345                         /* Fix up internal pointers in the tuple -- see heap_copytuple */
3346                         rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
3347                         rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple);
3348
3349                         /* next, read the access method tuple form */
3350                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3351                                 goto read_failed;
3352
3353                         am = (Form_pg_am) palloc(len);
3354                         if ((nread = fread(am, 1, len, fp)) != len)
3355                                 goto read_failed;
3356                         rel->rd_am = am;
3357
3358                         /*
3359                          * prepare index info context --- parameters should match
3360                          * RelationInitIndexAccessInfo
3361                          */
3362                         indexcxt = AllocSetContextCreate(CacheMemoryContext,
3363                                                                                          RelationGetRelationName(rel),
3364                                                                                          ALLOCSET_SMALL_MINSIZE,
3365                                                                                          ALLOCSET_SMALL_INITSIZE,
3366                                                                                          ALLOCSET_SMALL_MAXSIZE);
3367                         rel->rd_indexcxt = indexcxt;
3368
3369                         /* next, read the vector of opfamily OIDs */
3370                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3371                                 goto read_failed;
3372
3373                         opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
3374                         if ((nread = fread(opfamily, 1, len, fp)) != len)
3375                                 goto read_failed;
3376
3377                         rel->rd_opfamily = opfamily;
3378
3379                         /* next, read the vector of opcintype OIDs */
3380                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3381                                 goto read_failed;
3382
3383                         opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
3384                         if ((nread = fread(opcintype, 1, len, fp)) != len)
3385                                 goto read_failed;
3386
3387                         rel->rd_opcintype = opcintype;
3388
3389                         /* next, read the vector of operator OIDs */
3390                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3391                                 goto read_failed;
3392
3393                         operator = (Oid *) MemoryContextAlloc(indexcxt, len);
3394                         if ((nread = fread(operator, 1, len, fp)) != len)
3395                                 goto read_failed;
3396
3397                         rel->rd_operator = operator;
3398
3399                         /* next, read the vector of support procedures */
3400                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3401                                 goto read_failed;
3402                         support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
3403                         if ((nread = fread(support, 1, len, fp)) != len)
3404                                 goto read_failed;
3405
3406                         rel->rd_support = support;
3407
3408                         /* finally, read the vector of indoption values */
3409                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3410                                 goto read_failed;
3411
3412                         indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
3413                         if ((nread = fread(indoption, 1, len, fp)) != len)
3414                                 goto read_failed;
3415
3416                         rel->rd_indoption = indoption;
3417
3418                         /* set up zeroed fmgr-info vectors */
3419                         rel->rd_aminfo = (RelationAmInfo *)
3420                                 MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
3421                         nsupport = relform->relnatts * am->amsupport;
3422                         rel->rd_supportinfo = (FmgrInfo *)
3423                                 MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
3424                 }
3425                 else
3426                 {
3427                         /* Count nailed rels to ensure we have 'em all */
3428                         if (rel->rd_isnailed)
3429                                 nailed_rels++;
3430
3431                         Assert(rel->rd_index == NULL);
3432                         Assert(rel->rd_indextuple == NULL);
3433                         Assert(rel->rd_am == NULL);
3434                         Assert(rel->rd_indexcxt == NULL);
3435                         Assert(rel->rd_aminfo == NULL);
3436                         Assert(rel->rd_opfamily == NULL);
3437                         Assert(rel->rd_opcintype == NULL);
3438                         Assert(rel->rd_operator == NULL);
3439                         Assert(rel->rd_support == NULL);
3440                         Assert(rel->rd_supportinfo == NULL);
3441                         Assert(rel->rd_indoption == NULL);
3442                 }
3443
3444                 /*
3445                  * Rules and triggers are not saved (mainly because the internal
3446                  * format is complex and subject to change).  They must be rebuilt if
3447                  * needed by RelationCacheInitializePhase2.  This is not expected to
3448                  * be a big performance hit since few system catalogs have such. Ditto
3449                  * for index expressions and predicates.
3450                  */
3451                 rel->rd_rules = NULL;
3452                 rel->rd_rulescxt = NULL;
3453                 rel->trigdesc = NULL;
3454                 rel->rd_indexprs = NIL;
3455                 rel->rd_indpred = NIL;
3456
3457                 /*
3458                  * Reset transient-state fields in the relcache entry
3459                  */
3460                 rel->rd_smgr = NULL;
3461                 rel->rd_targblock = InvalidBlockNumber;
3462                 if (rel->rd_isnailed)
3463                         rel->rd_refcnt = 1;
3464                 else
3465                         rel->rd_refcnt = 0;
3466                 rel->rd_indexvalid = 0;
3467                 rel->rd_indexlist = NIL;
3468                 rel->rd_oidindex = InvalidOid;
3469                 rel->rd_createSubid = InvalidSubTransactionId;
3470                 rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3471                 rel->rd_amcache = NULL;
3472                 MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
3473
3474                 /*
3475                  * Recompute lock and physical addressing info.  This is needed in
3476                  * case the pg_internal.init file was copied from some other database
3477                  * by CREATE DATABASE.
3478                  */
3479                 RelationInitLockInfo(rel);
3480                 RelationInitPhysicalAddr(rel);
3481         }
3482
3483         /*
3484          * We reached the end of the init file without apparent problem. Did we
3485          * get the right number of nailed items?  (This is a useful crosscheck in
3486          * case the set of critical rels or indexes changes.)
3487          */
3488         if (nailed_rels != NUM_CRITICAL_RELS ||
3489                 nailed_indexes != NUM_CRITICAL_INDEXES)
3490                 goto read_failed;
3491
3492         /*
3493          * OK, all appears well.
3494          *
3495          * Now insert all the new relcache entries into the cache.
3496          */
3497         for (relno = 0; relno < num_rels; relno++)
3498         {
3499                 RelationCacheInsert(rels[relno]);
3500                 /* also make a list of their OIDs, for RelationIdIsInInitFile */
3501                 initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
3502                                                                                 initFileRelationIds);
3503         }
3504
3505         pfree(rels);
3506         FreeFile(fp);
3507
3508         criticalRelcachesBuilt = true;
3509         return true;
3510
3511         /*
3512          * init file is broken, so do it the hard way.  We don't bother trying to
3513          * free the clutter we just allocated; it's not in the relcache so it
3514          * won't hurt.
3515          */
3516 read_failed:
3517         pfree(rels);
3518         FreeFile(fp);
3519
3520         return false;
3521 }
3522
3523 /*
3524  * Write out a new initialization file with the current contents
3525  * of the relcache.
3526  */
3527 static void
3528 write_relcache_init_file(void)
3529 {
3530         FILE       *fp;
3531         char            tempfilename[MAXPGPATH];
3532         char            finalfilename[MAXPGPATH];
3533         int                     magic;
3534         HASH_SEQ_STATUS status;
3535         RelIdCacheEnt *idhentry;
3536         MemoryContext oldcxt;
3537         int                     i;
3538
3539         /*
3540          * We must write a temporary file and rename it into place. Otherwise,
3541          * another backend starting at about the same time might crash trying to
3542          * read the partially-complete file.
3543          */
3544         snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
3545                          DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
3546         snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
3547                          DatabasePath, RELCACHE_INIT_FILENAME);
3548
3549         unlink(tempfilename);           /* in case it exists w/wrong permissions */
3550
3551         fp = AllocateFile(tempfilename, PG_BINARY_W);
3552         if (fp == NULL)
3553         {
3554                 /*
3555                  * We used to consider this a fatal error, but we might as well
3556                  * continue with backend startup ...
3557                  */
3558                 ereport(WARNING,
3559                                 (errcode_for_file_access(),
3560                                  errmsg("could not create relation-cache initialization file \"%s\": %m",
3561                                                 tempfilename),
3562                           errdetail("Continuing anyway, but there's something wrong.")));
3563                 return;
3564         }
3565
3566         /*
3567          * Write a magic number to serve as a file version identifier.  We can
3568          * change the magic number whenever the relcache layout changes.
3569          */
3570         magic = RELCACHE_INIT_FILEMAGIC;
3571         if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
3572                 elog(FATAL, "could not write init file");
3573
3574         /*
3575          * Write all the reldescs (in no particular order).
3576          */
3577         hash_seq_init(&status, RelationIdCache);
3578
3579         initFileRelationIds = NIL;
3580
3581         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3582         {
3583                 Relation        rel = idhentry->reldesc;
3584                 Form_pg_class relform = rel->rd_rel;
3585
3586                 /* first write the relcache entry proper */
3587                 write_item(rel, sizeof(RelationData), fp);
3588
3589                 /* next write the relation tuple form */
3590                 write_item(relform, CLASS_TUPLE_SIZE, fp);
3591
3592                 /* next, do all the attribute tuple form data entries */
3593                 for (i = 0; i < relform->relnatts; i++)
3594                 {
3595                         write_item(rel->rd_att->attrs[i], ATTRIBUTE_TUPLE_SIZE, fp);
3596                 }
3597
3598                 /* next, do the access method specific field */
3599                 write_item(rel->rd_options,
3600                                    (rel->rd_options ? VARSIZE(rel->rd_options) : 0),
3601                                    fp);
3602
3603                 /* If it's an index, there's more to do */
3604                 if (rel->rd_rel->relkind == RELKIND_INDEX)
3605                 {
3606                         Form_pg_am      am = rel->rd_am;
3607
3608                         /* write the pg_index tuple */
3609                         /* we assume this was created by heap_copytuple! */
3610                         write_item(rel->rd_indextuple,
3611                                            HEAPTUPLESIZE + rel->rd_indextuple->t_len,
3612                                            fp);
3613
3614                         /* next, write the access method tuple form */
3615                         write_item(am, sizeof(FormData_pg_am), fp);
3616
3617                         /* next, write the vector of opfamily OIDs */
3618                         write_item(rel->rd_opfamily,
3619                                            relform->relnatts * sizeof(Oid),
3620                                            fp);
3621
3622                         /* next, write the vector of opcintype OIDs */
3623                         write_item(rel->rd_opcintype,
3624                                            relform->relnatts * sizeof(Oid),
3625                                            fp);
3626
3627                         /* next, write the vector of operator OIDs */
3628                         write_item(rel->rd_operator,
3629                                            relform->relnatts * (am->amstrategies * sizeof(Oid)),
3630                                            fp);
3631
3632                         /* next, write the vector of support procedures */
3633                         write_item(rel->rd_support,
3634                                   relform->relnatts * (am->amsupport * sizeof(RegProcedure)),
3635                                            fp);
3636
3637                         /* finally, write the vector of indoption values */
3638                         write_item(rel->rd_indoption,
3639                                            relform->relnatts * sizeof(int16),
3640                                            fp);
3641                 }
3642
3643                 /* also make a list of their OIDs, for RelationIdIsInInitFile */
3644                 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3645                 initFileRelationIds = lcons_oid(RelationGetRelid(rel),
3646                                                                                 initFileRelationIds);
3647                 MemoryContextSwitchTo(oldcxt);
3648         }
3649
3650         if (FreeFile(fp))
3651                 elog(FATAL, "could not write init file");
3652
3653         /*
3654          * Now we have to check whether the data we've so painstakingly
3655          * accumulated is already obsolete due to someone else's just-committed
3656          * catalog changes.  If so, we just delete the temp file and leave it to
3657          * the next backend to try again.  (Our own relcache entries will be
3658          * updated by SI message processing, but we can't be sure whether what we
3659          * wrote out was up-to-date.)
3660          *
3661          * This mustn't run concurrently with RelationCacheInitFileInvalidate, so
3662          * grab a serialization lock for the duration.
3663          */
3664         LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
3665
3666         /* Make sure we have seen all incoming SI messages */
3667         AcceptInvalidationMessages();
3668
3669         /*
3670          * If we have received any SI relcache invals since backend start, assume
3671          * we may have written out-of-date data.
3672          */
3673         if (relcacheInvalsReceived == 0L)
3674         {
3675                 /*
3676                  * OK, rename the temp file to its final name, deleting any
3677                  * previously-existing init file.
3678                  *
3679                  * Note: a failure here is possible under Cygwin, if some other
3680                  * backend is holding open an unlinked-but-not-yet-gone init file. So
3681                  * treat this as a noncritical failure; just remove the useless temp
3682                  * file on failure.
3683                  */
3684                 if (rename(tempfilename, finalfilename) < 0)
3685                         unlink(tempfilename);
3686         }
3687         else
3688         {
3689                 /* Delete the already-obsolete temp file */
3690                 unlink(tempfilename);
3691         }
3692
3693         LWLockRelease(RelCacheInitLock);
3694 }
3695
3696 /* write a chunk of data preceded by its length */
3697 static void
3698 write_item(const void *data, Size len, FILE *fp)
3699 {
3700         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3701                 elog(FATAL, "could not write init file");
3702         if (fwrite(data, 1, len, fp) != len)
3703                 elog(FATAL, "could not write init file");
3704 }
3705
3706 /*
3707  * Detect whether a given relation (identified by OID) is one of the ones
3708  * we store in the init file.
3709  *
3710  * Note that we effectively assume that all backends running in a database
3711  * would choose to store the same set of relations in the init file;
3712  * otherwise there are cases where we'd fail to detect the need for an init
3713  * file invalidation.  This does not seem likely to be a problem in practice.
3714  */
3715 bool
3716 RelationIdIsInInitFile(Oid relationId)
3717 {
3718         return list_member_oid(initFileRelationIds, relationId);
3719 }
3720
3721 /*
3722  * Invalidate (remove) the init file during commit of a transaction that
3723  * changed one or more of the relation cache entries that are kept in the
3724  * init file.
3725  *
3726  * We actually need to remove the init file twice: once just before sending
3727  * the SI messages that include relcache inval for such relations, and once
3728  * just after sending them.  The unlink before ensures that a backend that's
3729  * currently starting cannot read the now-obsolete init file and then miss
3730  * the SI messages that will force it to update its relcache entries.  (This
3731  * works because the backend startup sequence gets into the PGPROC array before
3732  * trying to load the init file.)  The unlink after is to synchronize with a
3733  * backend that may currently be trying to write an init file based on data
3734  * that we've just rendered invalid.  Such a backend will see the SI messages,
3735  * but we can't leave the init file sitting around to fool later backends.
3736  *
3737  * Ignore any failure to unlink the file, since it might not be there if
3738  * no backend has been started since the last removal.
3739  */
3740 void
3741 RelationCacheInitFileInvalidate(bool beforeSend)
3742 {
3743         char            initfilename[MAXPGPATH];
3744
3745         snprintf(initfilename, sizeof(initfilename), "%s/%s",
3746                          DatabasePath, RELCACHE_INIT_FILENAME);
3747
3748         if (beforeSend)
3749         {
3750                 /* no interlock needed here */
3751                 unlink(initfilename);
3752         }
3753         else
3754         {
3755                 /*
3756                  * We need to interlock this against write_relcache_init_file, to
3757                  * guard against possibility that someone renames a new-but-
3758                  * already-obsolete init file into place just after we unlink. With
3759                  * the interlock, it's certain that write_relcache_init_file will
3760                  * notice our SI inval message before renaming into place, or else
3761                  * that we will execute second and successfully unlink the file.
3762                  */
3763                 LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
3764                 unlink(initfilename);
3765                 LWLockRelease(RelCacheInitLock);
3766         }
3767 }
3768
3769 /*
3770  * Remove the init file for a given database during postmaster startup.
3771  *
3772  * We used to keep the init file across restarts, but that is unsafe in PITR
3773  * scenarios, and even in simple crash-recovery cases there are windows for
3774  * the init file to become out-of-sync with the database.  So now we just
3775  * remove it during startup and expect the first backend launch to rebuild it.
3776  * Of course, this has to happen in each database of the cluster.  For
3777  * simplicity this is driven by flatfiles.c, which has to scan pg_database
3778  * anyway.
3779  */
3780 void
3781 RelationCacheInitFileRemove(const char *dbPath)
3782 {
3783         char            initfilename[MAXPGPATH];
3784
3785         snprintf(initfilename, sizeof(initfilename), "%s/%s",
3786                          dbPath, RELCACHE_INIT_FILENAME);
3787         unlink(initfilename);
3788         /* ignore any error, since it might not be there at all */
3789 }