]> granicus.if.org Git - postgresql/blob - src/backend/utils/cache/relcache.c
Create the planner mechanism for optimizing simple MIN and MAX queries
[postgresql] / src / backend / utils / cache / relcache.c
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  *        POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.218 2005/03/29 00:17:11 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *              RelationCacheInitialize                 - initialize relcache
18  *              RelationCacheInitializePhase2   - finish initializing relcache
19  *              RelationIdGetRelation                   - get a reldesc by relation id
20  *              RelationSysNameGetRelation              - get a reldesc by system rel name
21  *              RelationIdCacheGetRelation              - get a cached reldesc by relid
22  *              RelationClose                                   - close an open relation
23  *
24  * NOTES
25  *              The following code contains many undocumented hacks.  Please be
26  *              careful....
27  */
28 #include "postgres.h"
29
30 #include <sys/file.h>
31 #include <fcntl.h>
32 #include <unistd.h>
33
34 #include "access/genam.h"
35 #include "access/heapam.h"
36 #include "catalog/catalog.h"
37 #include "catalog/catname.h"
38 #include "catalog/indexing.h"
39 #include "catalog/namespace.h"
40 #include "catalog/pg_amop.h"
41 #include "catalog/pg_amproc.h"
42 #include "catalog/pg_attrdef.h"
43 #include "catalog/pg_attribute.h"
44 #include "catalog/pg_constraint.h"
45 #include "catalog/pg_index.h"
46 #include "catalog/pg_namespace.h"
47 #include "catalog/pg_opclass.h"
48 #include "catalog/pg_proc.h"
49 #include "catalog/pg_rewrite.h"
50 #include "catalog/pg_type.h"
51 #include "commands/trigger.h"
52 #include "miscadmin.h"
53 #include "optimizer/clauses.h"
54 #include "optimizer/planmain.h"
55 #include "optimizer/prep.h"
56 #include "storage/fd.h"
57 #include "storage/smgr.h"
58 #include "utils/builtins.h"
59 #include "utils/catcache.h"
60 #include "utils/fmgroids.h"
61 #include "utils/inval.h"
62 #include "utils/lsyscache.h"
63 #include "utils/relcache.h"
64 #include "utils/resowner.h"
65 #include "utils/syscache.h"
66 #include "utils/typcache.h"
67
68
69 /*
70  * name of relcache init file, used to speed up backend startup
71  */
72 #define RELCACHE_INIT_FILENAME  "pg_internal.init"
73
74 #define RELCACHE_INIT_FILEMAGIC         0x573262        /* version ID value */
75
76 /*
77  *              hardcoded tuple descriptors.  see include/catalog/pg_attribute.h
78  */
79 static FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
80 static FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
81 static FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
82 static FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
83 static FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
84
85 /*
86  *              Hash tables that index the relation cache
87  *
88  *              Relations are looked up two ways, by OID and by name,
89  *              thus there are two hash tables for referencing them.
90  *
91  *              The OID index covers all relcache entries.      The name index
92  *              covers *only* system relations (only those in PG_CATALOG_NAMESPACE).
93  */
94 static HTAB *RelationIdCache;
95 static HTAB *RelationSysNameCache;
96
97 /*
98  * This flag is false until we have prepared the critical relcache entries
99  * that are needed to do indexscans on the tables read by relcache building.
100  */
101 bool            criticalRelcachesBuilt = false;
102
103 /*
104  * This flag is set if we discover that we need to write a new relcache
105  * cache file at the end of startup.
106  */
107 static bool needNewCacheFile = false;
108
109 /*
110  * This counter counts relcache inval events received since backend startup
111  * (but only for rels that are actually in cache).      Presently, we use it only
112  * to detect whether data about to be written by write_relcache_init_file()
113  * might already be obsolete.
114  */
115 static long relcacheInvalsReceived = 0L;
116
117 /*
118  * This list remembers the OIDs of the relations cached in the relcache
119  * init file.
120  */
121 static List *initFileRelationIds = NIL;
122
123 /*
124  * This flag lets us optimize away work in AtEOSubXact_RelationCache().
125  */
126 static bool need_eosubxact_work = false;
127
128 /*
129  *              RelationBuildDescInfo exists so code can be shared
130  *              between RelationIdGetRelation() and RelationSysNameGetRelation()
131  */
132 typedef struct RelationBuildDescInfo
133 {
134         int                     infotype;               /* lookup by id or by name */
135 #define INFO_RELID 1
136 #define INFO_RELNAME 2
137         union
138         {
139                 Oid                     info_id;        /* relation object id */
140                 char       *info_name;  /* system relation name */
141         }                       i;
142 } RelationBuildDescInfo;
143
144 typedef struct relidcacheent
145 {
146         Oid                     reloid;
147         Relation        reldesc;
148 } RelIdCacheEnt;
149
150 typedef struct relnamecacheent
151 {
152         NameData        relname;
153         Relation        reldesc;
154 } RelNameCacheEnt;
155
156 /*
157  *              macros to manipulate the lookup hashtables
158  */
159 #define RelationCacheInsert(RELATION)   \
160 do { \
161         RelIdCacheEnt *idhentry; bool found; \
162         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
163                                                                                    (void *) &(RELATION->rd_id), \
164                                                                                    HASH_ENTER, \
165                                                                                    &found); \
166         if (idhentry == NULL) \
167                 ereport(ERROR, \
168                                 (errcode(ERRCODE_OUT_OF_MEMORY), \
169                                  errmsg("out of memory"))); \
170         /* used to give notice if found -- now just keep quiet */ \
171         idhentry->reldesc = RELATION; \
172         if (IsSystemNamespace(RelationGetNamespace(RELATION))) \
173         { \
174                 char *relname = RelationGetRelationName(RELATION); \
175                 RelNameCacheEnt *namehentry; \
176                 namehentry = (RelNameCacheEnt*)hash_search(RelationSysNameCache, \
177                                                                                                    relname, \
178                                                                                                    HASH_ENTER, \
179                                                                                                    &found); \
180                 if (namehentry == NULL) \
181                         ereport(ERROR, \
182                                         (errcode(ERRCODE_OUT_OF_MEMORY), \
183                                          errmsg("out of memory"))); \
184                 /* used to give notice if found -- now just keep quiet */ \
185                 namehentry->reldesc = RELATION; \
186         } \
187 } while(0)
188
189 #define RelationIdCacheLookup(ID, RELATION) \
190 do { \
191         RelIdCacheEnt *hentry; \
192         hentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
193                                                                                  (void *)&(ID), HASH_FIND,NULL); \
194         if (hentry) \
195                 RELATION = hentry->reldesc; \
196         else \
197                 RELATION = NULL; \
198 } while(0)
199
200 #define RelationSysNameCacheLookup(NAME, RELATION) \
201 do { \
202         RelNameCacheEnt *hentry; \
203         hentry = (RelNameCacheEnt*)hash_search(RelationSysNameCache, \
204                                                                                    (void *) (NAME), HASH_FIND,NULL); \
205         if (hentry) \
206                 RELATION = hentry->reldesc; \
207         else \
208                 RELATION = NULL; \
209 } while(0)
210
211 #define RelationCacheDelete(RELATION) \
212 do { \
213         RelIdCacheEnt *idhentry; \
214         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
215                                                                                    (void *)&(RELATION->rd_id), \
216                                                                                    HASH_REMOVE, NULL); \
217         if (idhentry == NULL) \
218                 elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \
219         if (IsSystemNamespace(RelationGetNamespace(RELATION))) \
220         { \
221                 char *relname = RelationGetRelationName(RELATION); \
222                 RelNameCacheEnt *namehentry; \
223                 namehentry = (RelNameCacheEnt*)hash_search(RelationSysNameCache, \
224                                                                                                    relname, \
225                                                                                                    HASH_REMOVE, NULL); \
226                 if (namehentry == NULL) \
227                         elog(WARNING, "trying to delete a relname reldesc that does not exist"); \
228         } \
229 } while(0)
230
231
232 /*
233  * Special cache for opclass-related information
234  *
235  * Note: only default-subtype operators and support procs get cached
236  */
237 typedef struct opclasscacheent
238 {
239         Oid                     opclassoid;             /* lookup key: OID of opclass */
240         bool            valid;                  /* set TRUE after successful fill-in */
241         StrategyNumber numStrats;       /* max # of strategies (from pg_am) */
242         StrategyNumber numSupport;      /* max # of support procs (from pg_am) */
243         Oid                *operatorOids;       /* strategy operators' OIDs */
244         RegProcedure *supportProcs; /* support procs */
245 } OpClassCacheEnt;
246
247 static HTAB *OpClassCache = NULL;
248
249
250 /* non-export function prototypes */
251
252 static void RelationClearRelation(Relation relation, bool rebuild);
253
254 static void RelationReloadClassinfo(Relation relation);
255 static void RelationFlushRelation(Relation relation);
256 static Relation RelationSysNameCacheGetRelation(const char *relationName);
257 static bool load_relcache_init_file(void);
258 static void write_relcache_init_file(void);
259
260 static void formrdesc(const char *relationName, Oid relationReltype,
261                                           bool hasoids, int natts, FormData_pg_attribute *att);
262
263 static HeapTuple ScanPgRelation(RelationBuildDescInfo buildinfo, bool indexOK);
264 static Relation AllocateRelationDesc(Relation relation, Form_pg_class relp);
265 static void RelationBuildTupleDesc(RelationBuildDescInfo buildinfo,
266                                            Relation relation);
267 static Relation RelationBuildDesc(RelationBuildDescInfo buildinfo,
268                                   Relation oldrelation);
269 static void RelationInitPhysicalAddr(Relation relation);
270 static TupleDesc GetPgIndexDescriptor(void);
271 static void AttrDefaultFetch(Relation relation);
272 static void CheckConstraintFetch(Relation relation);
273 static List *insert_ordered_oid(List *list, Oid datum);
274 static void IndexSupportInitialize(oidvector *indclass,
275                                            Oid *indexOperator,
276                                            RegProcedure *indexSupport,
277                                            StrategyNumber maxStrategyNumber,
278                                            StrategyNumber maxSupportNumber,
279                                            AttrNumber maxAttributeNumber);
280 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
281                                   StrategyNumber numStrats,
282                                   StrategyNumber numSupport);
283
284
285 /*
286  *              ScanPgRelation
287  *
288  *              this is used by RelationBuildDesc to find a pg_class
289  *              tuple matching either a relation name or a relation id
290  *              as specified in buildinfo.
291  *
292  *              NB: the returned tuple has been copied into palloc'd storage
293  *              and must eventually be freed with heap_freetuple.
294  */
295 static HeapTuple
296 ScanPgRelation(RelationBuildDescInfo buildinfo, bool indexOK)
297 {
298         HeapTuple       pg_class_tuple;
299         Relation        pg_class_desc;
300         const char *indexRelname;
301         SysScanDesc pg_class_scan;
302         ScanKeyData key[2];
303         int                     nkeys;
304
305         /*
306          * form a scan key
307          */
308         switch (buildinfo.infotype)
309         {
310                 case INFO_RELID:
311                         ScanKeyInit(&key[0],
312                                                 ObjectIdAttributeNumber,
313                                                 BTEqualStrategyNumber, F_OIDEQ,
314                                                 ObjectIdGetDatum(buildinfo.i.info_id));
315                         nkeys = 1;
316                         indexRelname = ClassOidIndex;
317                         break;
318
319                 case INFO_RELNAME:
320                         ScanKeyInit(&key[0],
321                                                 Anum_pg_class_relname,
322                                                 BTEqualStrategyNumber, F_NAMEEQ,
323                                                 NameGetDatum(buildinfo.i.info_name));
324                         ScanKeyInit(&key[1],
325                                                 Anum_pg_class_relnamespace,
326                                                 BTEqualStrategyNumber, F_OIDEQ,
327                                                 ObjectIdGetDatum(PG_CATALOG_NAMESPACE));
328                         nkeys = 2;
329                         indexRelname = ClassNameNspIndex;
330                         break;
331
332                 default:
333                         elog(ERROR, "unrecognized buildinfo type: %d",
334                                  buildinfo.infotype);
335                         return NULL;            /* keep compiler quiet */
336         }
337
338         /*
339          * Open pg_class and fetch a tuple.  Force heap scan if we haven't yet
340          * built the critical relcache entries (this includes initdb and
341          * startup without a pg_internal.init file).  The caller can also
342          * force a heap scan by setting indexOK == false.
343          */
344         pg_class_desc = heap_openr(RelationRelationName, AccessShareLock);
345         pg_class_scan = systable_beginscan(pg_class_desc, indexRelname,
346                                                                            indexOK && criticalRelcachesBuilt,
347                                                                            SnapshotNow,
348                                                                            nkeys, key);
349
350         pg_class_tuple = systable_getnext(pg_class_scan);
351
352         /*
353          * Must copy tuple before releasing buffer.
354          */
355         if (HeapTupleIsValid(pg_class_tuple))
356                 pg_class_tuple = heap_copytuple(pg_class_tuple);
357
358         /* all done */
359         systable_endscan(pg_class_scan);
360         heap_close(pg_class_desc, AccessShareLock);
361
362         return pg_class_tuple;
363 }
364
365 /*
366  *              AllocateRelationDesc
367  *
368  *              This is used to allocate memory for a new relation descriptor
369  *              and initialize the rd_rel field.
370  *
371  *              If 'relation' is NULL, allocate a new RelationData object.
372  *              If not, reuse the given object (that path is taken only when
373  *              we have to rebuild a relcache entry during RelationClearRelation).
374  */
375 static Relation
376 AllocateRelationDesc(Relation relation, Form_pg_class relp)
377 {
378         MemoryContext oldcxt;
379         Form_pg_class relationForm;
380
381         /* Relcache entries must live in CacheMemoryContext */
382         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
383
384         /*
385          * allocate space for new relation descriptor, if needed
386          */
387         if (relation == NULL)
388                 relation = (Relation) palloc(sizeof(RelationData));
389
390         /*
391          * clear all fields of reldesc
392          */
393         MemSet((char *) relation, 0, sizeof(RelationData));
394         relation->rd_targblock = InvalidBlockNumber;
395
396         /* make sure relation is marked as having no open file yet */
397         relation->rd_smgr = NULL;
398
399         /*
400          * Copy the relation tuple form
401          *
402          * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE.
403          * relacl is NOT stored in the relcache --- there'd be little point in
404          * it, since we don't copy the tuple's nullvalues bitmap and hence
405          * wouldn't know if the value is valid ... bottom line is that relacl
406          * *cannot* be retrieved from the relcache.  Get it from the syscache
407          * if you need it.
408          */
409         relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
410
411         memcpy((char *) relationForm, (char *) relp, CLASS_TUPLE_SIZE);
412
413         /* initialize relation tuple form */
414         relation->rd_rel = relationForm;
415
416         /* and allocate attribute tuple form storage */
417         relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
418                                                                                            relationForm->relhasoids);
419
420         MemoryContextSwitchTo(oldcxt);
421
422         return relation;
423 }
424
425 /*
426  *              RelationBuildTupleDesc
427  *
428  *              Form the relation's tuple descriptor from information in
429  *              the pg_attribute, pg_attrdef & pg_constraint system catalogs.
430  */
431 static void
432 RelationBuildTupleDesc(RelationBuildDescInfo buildinfo,
433                                            Relation relation)
434 {
435         HeapTuple       pg_attribute_tuple;
436         Relation        pg_attribute_desc;
437         SysScanDesc pg_attribute_scan;
438         ScanKeyData skey[2];
439         int                     need;
440         TupleConstr *constr;
441         AttrDefault *attrdef = NULL;
442         int                     ndef = 0;
443
444         /* copy some fields from pg_class row to rd_att */
445         relation->rd_att->tdtypeid = relation->rd_rel->reltype;
446         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
447         relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
448
449         constr = (TupleConstr *) MemoryContextAlloc(CacheMemoryContext,
450                                                                                                 sizeof(TupleConstr));
451         constr->has_not_null = false;
452
453         /*
454          * Form a scan key that selects only user attributes (attnum > 0).
455          * (Eliminating system attribute rows at the index level is lots
456          * faster than fetching them.)
457          */
458         ScanKeyInit(&skey[0],
459                                 Anum_pg_attribute_attrelid,
460                                 BTEqualStrategyNumber, F_OIDEQ,
461                                 ObjectIdGetDatum(RelationGetRelid(relation)));
462         ScanKeyInit(&skey[1],
463                                 Anum_pg_attribute_attnum,
464                                 BTGreaterStrategyNumber, F_INT2GT,
465                                 Int16GetDatum(0));
466
467         /*
468          * Open pg_attribute and begin a scan.  Force heap scan if we haven't
469          * yet built the critical relcache entries (this includes initdb and
470          * startup without a pg_internal.init file).
471          */
472         pg_attribute_desc = heap_openr(AttributeRelationName, AccessShareLock);
473         pg_attribute_scan = systable_beginscan(pg_attribute_desc,
474                                                                                    AttributeRelidNumIndex,
475                                                                                    criticalRelcachesBuilt,
476                                                                                    SnapshotNow,
477                                                                                    2, skey);
478
479         /*
480          * add attribute data to relation->rd_att
481          */
482         need = relation->rd_rel->relnatts;
483
484         while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
485         {
486                 Form_pg_attribute attp;
487
488                 attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
489
490                 if (attp->attnum <= 0 ||
491                         attp->attnum > relation->rd_rel->relnatts)
492                         elog(ERROR, "invalid attribute number %d for %s",
493                                  attp->attnum, RelationGetRelationName(relation));
494
495                 memcpy(relation->rd_att->attrs[attp->attnum - 1],
496                            attp,
497                            ATTRIBUTE_TUPLE_SIZE);
498
499                 /* Update constraint/default info */
500                 if (attp->attnotnull)
501                         constr->has_not_null = true;
502
503                 if (attp->atthasdef)
504                 {
505                         if (attrdef == NULL)
506                                 attrdef = (AttrDefault *)
507                                         MemoryContextAllocZero(CacheMemoryContext,
508                                                                                    relation->rd_rel->relnatts *
509                                                                                    sizeof(AttrDefault));
510                         attrdef[ndef].adnum = attp->attnum;
511                         attrdef[ndef].adbin = NULL;
512                         ndef++;
513                 }
514                 need--;
515                 if (need == 0)
516                         break;
517         }
518
519         /*
520          * end the scan and close the attribute relation
521          */
522         systable_endscan(pg_attribute_scan);
523         heap_close(pg_attribute_desc, AccessShareLock);
524
525         if (need != 0)
526                 elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
527                          need, RelationGetRelid(relation));
528
529         /*
530          * The attcacheoff values we read from pg_attribute should all be -1
531          * ("unknown").  Verify this if assert checking is on.  They will be
532          * computed when and if needed during tuple access.
533          */
534 #ifdef USE_ASSERT_CHECKING
535         {
536                 int                     i;
537
538                 for (i = 0; i < relation->rd_rel->relnatts; i++)
539                         Assert(relation->rd_att->attrs[i]->attcacheoff == -1);
540         }
541 #endif
542
543         /*
544          * However, we can easily set the attcacheoff value for the first
545          * attribute: it must be zero.  This eliminates the need for special
546          * cases for attnum=1 that used to exist in fastgetattr() and
547          * index_getattr().
548          */
549         if (relation->rd_rel->relnatts > 0)
550                 relation->rd_att->attrs[0]->attcacheoff = 0;
551
552         /*
553          * Set up constraint/default info
554          */
555         if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
556         {
557                 relation->rd_att->constr = constr;
558
559                 if (ndef > 0)                   /* DEFAULTs */
560                 {
561                         if (ndef < relation->rd_rel->relnatts)
562                                 constr->defval = (AttrDefault *)
563                                         repalloc(attrdef, ndef * sizeof(AttrDefault));
564                         else
565                                 constr->defval = attrdef;
566                         constr->num_defval = ndef;
567                         AttrDefaultFetch(relation);
568                 }
569                 else
570                         constr->num_defval = 0;
571
572                 if (relation->rd_rel->relchecks > 0)    /* CHECKs */
573                 {
574                         constr->num_check = relation->rd_rel->relchecks;
575                         constr->check = (ConstrCheck *)
576                                 MemoryContextAllocZero(CacheMemoryContext,
577                                                                 constr->num_check * sizeof(ConstrCheck));
578                         CheckConstraintFetch(relation);
579                 }
580                 else
581                         constr->num_check = 0;
582         }
583         else
584         {
585                 pfree(constr);
586                 relation->rd_att->constr = NULL;
587         }
588 }
589
590 /*
591  *              RelationBuildRuleLock
592  *
593  *              Form the relation's rewrite rules from information in
594  *              the pg_rewrite system catalog.
595  *
596  * Note: The rule parsetrees are potentially very complex node structures.
597  * To allow these trees to be freed when the relcache entry is flushed,
598  * we make a private memory context to hold the RuleLock information for
599  * each relcache entry that has associated rules.  The context is used
600  * just for rule info, not for any other subsidiary data of the relcache
601  * entry, because that keeps the update logic in RelationClearRelation()
602  * manageable.  The other subsidiary data structures are simple enough
603  * to be easy to free explicitly, anyway.
604  */
605 static void
606 RelationBuildRuleLock(Relation relation)
607 {
608         MemoryContext rulescxt;
609         MemoryContext oldcxt;
610         HeapTuple       rewrite_tuple;
611         Relation        rewrite_desc;
612         TupleDesc       rewrite_tupdesc;
613         SysScanDesc rewrite_scan;
614         ScanKeyData key;
615         RuleLock   *rulelock;
616         int                     numlocks;
617         RewriteRule **rules;
618         int                     maxlocks;
619
620         /*
621          * Make the private context.  Parameters are set on the assumption
622          * that it'll probably not contain much data.
623          */
624         rulescxt = AllocSetContextCreate(CacheMemoryContext,
625                                                                          RelationGetRelationName(relation),
626                                                                          ALLOCSET_SMALL_MINSIZE,
627                                                                          ALLOCSET_SMALL_INITSIZE,
628                                                                          ALLOCSET_SMALL_MAXSIZE);
629         relation->rd_rulescxt = rulescxt;
630
631         /*
632          * allocate an array to hold the rewrite rules (the array is extended
633          * if necessary)
634          */
635         maxlocks = 4;
636         rules = (RewriteRule **)
637                 MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
638         numlocks = 0;
639
640         /*
641          * form a scan key
642          */
643         ScanKeyInit(&key,
644                                 Anum_pg_rewrite_ev_class,
645                                 BTEqualStrategyNumber, F_OIDEQ,
646                                 ObjectIdGetDatum(RelationGetRelid(relation)));
647
648         /*
649          * open pg_rewrite and begin a scan
650          *
651          * Note: since we scan the rules using RewriteRelRulenameIndex, we will
652          * be reading the rules in name order, except possibly during
653          * emergency-recovery operations (ie, IsIgnoringSystemIndexes). This
654          * in turn ensures that rules will be fired in name order.
655          */
656         rewrite_desc = heap_openr(RewriteRelationName, AccessShareLock);
657         rewrite_tupdesc = RelationGetDescr(rewrite_desc);
658         rewrite_scan = systable_beginscan(rewrite_desc,
659                                                                           RewriteRelRulenameIndex,
660                                                                           true, SnapshotNow,
661                                                                           1, &key);
662
663         while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
664         {
665                 Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
666                 bool            isnull;
667                 Datum           ruleaction;
668                 Datum           rule_evqual;
669                 char       *ruleaction_str;
670                 char       *rule_evqual_str;
671                 RewriteRule *rule;
672
673                 rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
674                                                                                                   sizeof(RewriteRule));
675
676                 rule->ruleId = HeapTupleGetOid(rewrite_tuple);
677
678                 rule->event = rewrite_form->ev_type - '0';
679                 rule->attrno = rewrite_form->ev_attr;
680                 rule->isInstead = rewrite_form->is_instead;
681
682                 /* Must use heap_getattr to fetch ev_qual and ev_action */
683
684                 ruleaction = heap_getattr(rewrite_tuple,
685                                                                   Anum_pg_rewrite_ev_action,
686                                                                   rewrite_tupdesc,
687                                                                   &isnull);
688                 Assert(!isnull);
689                 ruleaction_str = DatumGetCString(DirectFunctionCall1(textout,
690                                                                                                                          ruleaction));
691                 oldcxt = MemoryContextSwitchTo(rulescxt);
692                 rule->actions = (List *) stringToNode(ruleaction_str);
693                 MemoryContextSwitchTo(oldcxt);
694                 pfree(ruleaction_str);
695
696                 rule_evqual = heap_getattr(rewrite_tuple,
697                                                                    Anum_pg_rewrite_ev_qual,
698                                                                    rewrite_tupdesc,
699                                                                    &isnull);
700                 Assert(!isnull);
701                 rule_evqual_str = DatumGetCString(DirectFunctionCall1(textout,
702                                                                                                                    rule_evqual));
703                 oldcxt = MemoryContextSwitchTo(rulescxt);
704                 rule->qual = (Node *) stringToNode(rule_evqual_str);
705                 MemoryContextSwitchTo(oldcxt);
706                 pfree(rule_evqual_str);
707
708                 if (numlocks >= maxlocks)
709                 {
710                         maxlocks *= 2;
711                         rules = (RewriteRule **)
712                                 repalloc(rules, sizeof(RewriteRule *) * maxlocks);
713                 }
714                 rules[numlocks++] = rule;
715         }
716
717         /*
718          * end the scan and close the attribute relation
719          */
720         systable_endscan(rewrite_scan);
721         heap_close(rewrite_desc, AccessShareLock);
722
723         /*
724          * form a RuleLock and insert into relation
725          */
726         rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
727         rulelock->numLocks = numlocks;
728         rulelock->rules = rules;
729
730         relation->rd_rules = rulelock;
731 }
732
733 /*
734  *              equalRuleLocks
735  *
736  *              Determine whether two RuleLocks are equivalent
737  *
738  *              Probably this should be in the rules code someplace...
739  */
740 static bool
741 equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2)
742 {
743         int                     i;
744
745         /*
746          * As of 7.3 we assume the rule ordering is repeatable, because
747          * RelationBuildRuleLock should read 'em in a consistent order.  So
748          * just compare corresponding slots.
749          */
750         if (rlock1 != NULL)
751         {
752                 if (rlock2 == NULL)
753                         return false;
754                 if (rlock1->numLocks != rlock2->numLocks)
755                         return false;
756                 for (i = 0; i < rlock1->numLocks; i++)
757                 {
758                         RewriteRule *rule1 = rlock1->rules[i];
759                         RewriteRule *rule2 = rlock2->rules[i];
760
761                         if (rule1->ruleId != rule2->ruleId)
762                                 return false;
763                         if (rule1->event != rule2->event)
764                                 return false;
765                         if (rule1->attrno != rule2->attrno)
766                                 return false;
767                         if (rule1->isInstead != rule2->isInstead)
768                                 return false;
769                         if (!equal(rule1->qual, rule2->qual))
770                                 return false;
771                         if (!equal(rule1->actions, rule2->actions))
772                                 return false;
773                 }
774         }
775         else if (rlock2 != NULL)
776                 return false;
777         return true;
778 }
779
780
781 /* ----------------------------------
782  *              RelationBuildDesc
783  *
784  *              Build a relation descriptor --- either a new one, or by
785  *              recycling the given old relation object.  The latter case
786  *              supports rebuilding a relcache entry without invalidating
787  *              pointers to it.
788  * --------------------------------
789  */
790 static Relation
791 RelationBuildDesc(RelationBuildDescInfo buildinfo,
792                                   Relation oldrelation)
793 {
794         Relation        relation;
795         Oid                     relid;
796         HeapTuple       pg_class_tuple;
797         Form_pg_class relp;
798         MemoryContext oldcxt;
799
800         /*
801          * find the tuple in pg_class corresponding to the given relation id
802          */
803         pg_class_tuple = ScanPgRelation(buildinfo, true);
804
805         /*
806          * if no such tuple exists, return NULL
807          */
808         if (!HeapTupleIsValid(pg_class_tuple))
809                 return NULL;
810
811         /*
812          * get information from the pg_class_tuple
813          */
814         relid = HeapTupleGetOid(pg_class_tuple);
815         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
816
817         /*
818          * allocate storage for the relation descriptor, and copy
819          * pg_class_tuple to relation->rd_rel.
820          */
821         relation = AllocateRelationDesc(oldrelation, relp);
822
823         /*
824          * now we can free the memory allocated for pg_class_tuple
825          */
826         heap_freetuple(pg_class_tuple);
827
828         /*
829          * initialize the relation's relation id (relation->rd_id)
830          */
831         RelationGetRelid(relation) = relid;
832
833         /*
834          * normal relations are not nailed into the cache; nor can a
835          * pre-existing relation be new.  It could be temp though.      (Actually,
836          * it could be new too, but it's okay to forget that fact if forced to
837          * flush the entry.)
838          */
839         relation->rd_refcnt = 0;
840         relation->rd_isnailed = false;
841         relation->rd_createSubid = InvalidSubTransactionId;
842         relation->rd_istemp = isTempNamespace(relation->rd_rel->relnamespace);
843
844         /*
845          * initialize the tuple descriptor (relation->rd_att).
846          */
847         RelationBuildTupleDesc(buildinfo, relation);
848
849         /*
850          * Fetch rules and triggers that affect this relation
851          */
852         if (relation->rd_rel->relhasrules)
853                 RelationBuildRuleLock(relation);
854         else
855         {
856                 relation->rd_rules = NULL;
857                 relation->rd_rulescxt = NULL;
858         }
859
860         if (relation->rd_rel->reltriggers > 0)
861                 RelationBuildTriggers(relation);
862         else
863                 relation->trigdesc = NULL;
864
865         /*
866          * if it's an index, initialize index-related information
867          */
868         if (OidIsValid(relation->rd_rel->relam))
869                 RelationInitIndexAccessInfo(relation);
870
871         /*
872          * initialize the relation lock manager information
873          */
874         RelationInitLockInfo(relation);         /* see lmgr.c */
875
876         /*
877          * initialize physical addressing information for the relation
878          */
879         RelationInitPhysicalAddr(relation);
880
881         /* make sure relation is marked as having no open file yet */
882         relation->rd_smgr = NULL;
883
884         /*
885          * Insert newly created relation into relcache hash tables.
886          */
887         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
888         RelationCacheInsert(relation);
889         MemoryContextSwitchTo(oldcxt);
890
891         /* It's fully valid */
892         relation->rd_isvalid = true;
893
894         return relation;
895 }
896
897 /*
898  * Initialize the physical addressing info (RelFileNode) for a relcache entry
899  */
900 static void
901 RelationInitPhysicalAddr(Relation relation)
902 {
903         if (relation->rd_rel->reltablespace)
904                 relation->rd_node.spcNode = relation->rd_rel->reltablespace;
905         else
906                 relation->rd_node.spcNode = MyDatabaseTableSpace;
907         if (relation->rd_rel->relisshared)
908                 relation->rd_node.dbNode = InvalidOid;
909         else
910                 relation->rd_node.dbNode = MyDatabaseId;
911         relation->rd_node.relNode = relation->rd_rel->relfilenode;
912 }
913
914 /*
915  * Initialize index-access-method support data for an index relation
916  */
917 void
918 RelationInitIndexAccessInfo(Relation relation)
919 {
920         HeapTuple       tuple;
921         Form_pg_am      aform;
922         Datum           indclassDatum;
923         bool            isnull;
924         MemoryContext indexcxt;
925         MemoryContext oldcontext;
926         Oid                *operator;
927         RegProcedure *support;
928         FmgrInfo   *supportinfo;
929         int                     natts;
930         uint16          amstrategies;
931         uint16          amsupport;
932
933         /*
934          * Make a copy of the pg_index entry for the index.  Since pg_index
935          * contains variable-length and possibly-null fields, we have to do
936          * this honestly rather than just treating it as a Form_pg_index
937          * struct.
938          */
939         tuple = SearchSysCache(INDEXRELID,
940                                                    ObjectIdGetDatum(RelationGetRelid(relation)),
941                                                    0, 0, 0);
942         if (!HeapTupleIsValid(tuple))
943                 elog(ERROR, "cache lookup failed for index %u",
944                          RelationGetRelid(relation));
945         oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
946         relation->rd_indextuple = heap_copytuple(tuple);
947         relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
948         MemoryContextSwitchTo(oldcontext);
949         ReleaseSysCache(tuple);
950
951         /*
952          * indclass cannot be referenced directly through the C struct, because
953          * it is after the variable-width indkey field.  Therefore we extract
954          * the datum the hard way and provide a direct link in the relcache.
955          */
956         indclassDatum = fastgetattr(relation->rd_indextuple,
957                                                                 Anum_pg_index_indclass,
958                                                                 GetPgIndexDescriptor(),
959                                                                 &isnull);
960         Assert(!isnull);
961         relation->rd_indclass = (oidvector *) DatumGetPointer(indclassDatum);
962
963         /*
964          * Make a copy of the pg_am entry for the index's access method
965          */
966         tuple = SearchSysCache(AMOID,
967                                                    ObjectIdGetDatum(relation->rd_rel->relam),
968                                                    0, 0, 0);
969         if (!HeapTupleIsValid(tuple))
970                 elog(ERROR, "cache lookup failed for access method %u",
971                          relation->rd_rel->relam);
972         aform = (Form_pg_am) MemoryContextAlloc(CacheMemoryContext, sizeof *aform);
973         memcpy(aform, GETSTRUCT(tuple), sizeof *aform);
974         ReleaseSysCache(tuple);
975         relation->rd_am = aform;
976
977         natts = relation->rd_rel->relnatts;
978         if (natts != relation->rd_index->indnatts)
979                 elog(ERROR, "relnatts disagrees with indnatts for index %u",
980                          RelationGetRelid(relation));
981         amstrategies = aform->amstrategies;
982         amsupport = aform->amsupport;
983
984         /*
985          * Make the private context to hold index access info.  The reason we
986          * need a context, and not just a couple of pallocs, is so that we
987          * won't leak any subsidiary info attached to fmgr lookup records.
988          *
989          * Context parameters are set on the assumption that it'll probably not
990          * contain much data.
991          */
992         indexcxt = AllocSetContextCreate(CacheMemoryContext,
993                                                                          RelationGetRelationName(relation),
994                                                                          ALLOCSET_SMALL_MINSIZE,
995                                                                          ALLOCSET_SMALL_INITSIZE,
996                                                                          ALLOCSET_SMALL_MAXSIZE);
997         relation->rd_indexcxt = indexcxt;
998
999         /*
1000          * Allocate arrays to hold data
1001          */
1002         if (amstrategies > 0)
1003                 operator = (Oid *)
1004                         MemoryContextAllocZero(indexcxt,
1005                                                                    natts * amstrategies * sizeof(Oid));
1006         else
1007                 operator = NULL;
1008
1009         if (amsupport > 0)
1010         {
1011                 int                     nsupport = natts * amsupport;
1012
1013                 support = (RegProcedure *)
1014                         MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1015                 supportinfo = (FmgrInfo *)
1016                         MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1017         }
1018         else
1019         {
1020                 support = NULL;
1021                 supportinfo = NULL;
1022         }
1023
1024         relation->rd_operator = operator;
1025         relation->rd_support = support;
1026         relation->rd_supportinfo = supportinfo;
1027
1028         /*
1029          * Fill the operator and support procedure OID arrays. (supportinfo is
1030          * left as zeroes, and is filled on-the-fly when used)
1031          */
1032         IndexSupportInitialize(relation->rd_indclass,
1033                                                    operator, support,
1034                                                    amstrategies, amsupport, natts);
1035
1036         /*
1037          * expressions and predicate cache will be filled later
1038          */
1039         relation->rd_indexprs = NIL;
1040         relation->rd_indpred = NIL;
1041 }
1042
1043 /*
1044  * IndexSupportInitialize
1045  *              Initializes an index's cached opclass information,
1046  *              given the index's pg_index.indclass entry.
1047  *
1048  * Data is returned into *indexOperator and *indexSupport, which are arrays
1049  * allocated by the caller.
1050  *
1051  * The caller also passes maxStrategyNumber, maxSupportNumber, and
1052  * maxAttributeNumber, since these indicate the size of the arrays
1053  * it has allocated --- but in practice these numbers must always match
1054  * those obtainable from the system catalog entries for the index and
1055  * access method.
1056  */
1057 static void
1058 IndexSupportInitialize(oidvector *indclass,
1059                                            Oid *indexOperator,
1060                                            RegProcedure *indexSupport,
1061                                            StrategyNumber maxStrategyNumber,
1062                                            StrategyNumber maxSupportNumber,
1063                                            AttrNumber maxAttributeNumber)
1064 {
1065         int                     attIndex;
1066
1067         for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1068         {
1069                 OpClassCacheEnt *opcentry;
1070
1071                 if (!OidIsValid(indclass->values[attIndex]))
1072                         elog(ERROR, "bogus pg_index tuple");
1073
1074                 /* look up the info for this opclass, using a cache */
1075                 opcentry = LookupOpclassInfo(indclass->values[attIndex],
1076                                                                          maxStrategyNumber,
1077                                                                          maxSupportNumber);
1078
1079                 /* copy cached data into relcache entry */
1080                 if (maxStrategyNumber > 0)
1081                         memcpy(&indexOperator[attIndex * maxStrategyNumber],
1082                                    opcentry->operatorOids,
1083                                    maxStrategyNumber * sizeof(Oid));
1084                 if (maxSupportNumber > 0)
1085                         memcpy(&indexSupport[attIndex * maxSupportNumber],
1086                                    opcentry->supportProcs,
1087                                    maxSupportNumber * sizeof(RegProcedure));
1088         }
1089 }
1090
1091 /*
1092  * LookupOpclassInfo
1093  *
1094  * This routine maintains a per-opclass cache of the information needed
1095  * by IndexSupportInitialize().  This is more efficient than relying on
1096  * the catalog cache, because we can load all the info about a particular
1097  * opclass in a single indexscan of pg_amproc or pg_amop.
1098  *
1099  * The information from pg_am about expected range of strategy and support
1100  * numbers is passed in, rather than being looked up, mainly because the
1101  * caller will have it already.
1102  *
1103  * XXX There isn't any provision for flushing the cache.  However, there
1104  * isn't any provision for flushing relcache entries when opclass info
1105  * changes, either :-(
1106  */
1107 static OpClassCacheEnt *
1108 LookupOpclassInfo(Oid operatorClassOid,
1109                                   StrategyNumber numStrats,
1110                                   StrategyNumber numSupport)
1111 {
1112         OpClassCacheEnt *opcentry;
1113         bool            found;
1114         Relation        rel;
1115         SysScanDesc scan;
1116         ScanKeyData skey[2];
1117         HeapTuple       htup;
1118         bool            indexOK;
1119
1120         if (OpClassCache == NULL)
1121         {
1122                 /* First time through: initialize the opclass cache */
1123                 HASHCTL         ctl;
1124
1125                 if (!CacheMemoryContext)
1126                         CreateCacheMemoryContext();
1127
1128                 MemSet(&ctl, 0, sizeof(ctl));
1129                 ctl.keysize = sizeof(Oid);
1130                 ctl.entrysize = sizeof(OpClassCacheEnt);
1131                 ctl.hash = tag_hash;
1132                 OpClassCache = hash_create("Operator class cache", 64,
1133                                                                    &ctl, HASH_ELEM | HASH_FUNCTION);
1134         }
1135
1136         opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1137                                                                                            (void *) &operatorClassOid,
1138                                                                                            HASH_ENTER, &found);
1139         if (opcentry == NULL)
1140                 ereport(ERROR,
1141                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1142                                  errmsg("out of memory")));
1143
1144         if (found && opcentry->valid)
1145         {
1146                 /* Already made an entry for it */
1147                 Assert(numStrats == opcentry->numStrats);
1148                 Assert(numSupport == opcentry->numSupport);
1149                 return opcentry;
1150         }
1151
1152         /* Need to fill in new entry */
1153         opcentry->valid = false;        /* until known OK */
1154         opcentry->numStrats = numStrats;
1155         opcentry->numSupport = numSupport;
1156
1157         if (numStrats > 0)
1158                 opcentry->operatorOids = (Oid *)
1159                         MemoryContextAllocZero(CacheMemoryContext,
1160                                                                    numStrats * sizeof(Oid));
1161         else
1162                 opcentry->operatorOids = NULL;
1163
1164         if (numSupport > 0)
1165                 opcentry->supportProcs = (RegProcedure *)
1166                         MemoryContextAllocZero(CacheMemoryContext,
1167                                                                    numSupport * sizeof(RegProcedure));
1168         else
1169                 opcentry->supportProcs = NULL;
1170
1171         /*
1172          * To avoid infinite recursion during startup, force heap scans if
1173          * we're looking up info for the opclasses used by the indexes we
1174          * would like to reference here.
1175          */
1176         indexOK = criticalRelcachesBuilt ||
1177                 (operatorClassOid != OID_BTREE_OPS_OID &&
1178                  operatorClassOid != INT2_BTREE_OPS_OID);
1179
1180         /*
1181          * Scan pg_amop to obtain operators for the opclass.  We only fetch
1182          * the default ones (those with subtype zero).
1183          */
1184         if (numStrats > 0)
1185         {
1186                 ScanKeyInit(&skey[0],
1187                                         Anum_pg_amop_amopclaid,
1188                                         BTEqualStrategyNumber, F_OIDEQ,
1189                                         ObjectIdGetDatum(operatorClassOid));
1190                 ScanKeyInit(&skey[1],
1191                                         Anum_pg_amop_amopsubtype,
1192                                         BTEqualStrategyNumber, F_OIDEQ,
1193                                         ObjectIdGetDatum(InvalidOid));
1194                 rel = heap_openr(AccessMethodOperatorRelationName,
1195                                                  AccessShareLock);
1196                 scan = systable_beginscan(rel, AccessMethodStrategyIndex, indexOK,
1197                                                                   SnapshotNow, 2, skey);
1198
1199                 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1200                 {
1201                         Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(htup);
1202
1203                         if (amopform->amopstrategy <= 0 ||
1204                                 (StrategyNumber) amopform->amopstrategy > numStrats)
1205                                 elog(ERROR, "invalid amopstrategy number %d for opclass %u",
1206                                          amopform->amopstrategy, operatorClassOid);
1207                         opcentry->operatorOids[amopform->amopstrategy - 1] =
1208                                 amopform->amopopr;
1209                 }
1210
1211                 systable_endscan(scan);
1212                 heap_close(rel, AccessShareLock);
1213         }
1214
1215         /*
1216          * Scan pg_amproc to obtain support procs for the opclass.      We only
1217          * fetch the default ones (those with subtype zero).
1218          */
1219         if (numSupport > 0)
1220         {
1221                 ScanKeyInit(&skey[0],
1222                                         Anum_pg_amproc_amopclaid,
1223                                         BTEqualStrategyNumber, F_OIDEQ,
1224                                         ObjectIdGetDatum(operatorClassOid));
1225                 ScanKeyInit(&skey[1],
1226                                         Anum_pg_amproc_amprocsubtype,
1227                                         BTEqualStrategyNumber, F_OIDEQ,
1228                                         ObjectIdGetDatum(InvalidOid));
1229                 rel = heap_openr(AccessMethodProcedureRelationName,
1230                                                  AccessShareLock);
1231                 scan = systable_beginscan(rel, AccessMethodProcedureIndex, indexOK,
1232                                                                   SnapshotNow, 2, skey);
1233
1234                 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1235                 {
1236                         Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1237
1238                         if (amprocform->amprocnum <= 0 ||
1239                                 (StrategyNumber) amprocform->amprocnum > numSupport)
1240                                 elog(ERROR, "invalid amproc number %d for opclass %u",
1241                                          amprocform->amprocnum, operatorClassOid);
1242
1243                         opcentry->supportProcs[amprocform->amprocnum - 1] =
1244                                 amprocform->amproc;
1245                 }
1246
1247                 systable_endscan(scan);
1248                 heap_close(rel, AccessShareLock);
1249         }
1250
1251         opcentry->valid = true;
1252         return opcentry;
1253 }
1254
1255
1256 /*
1257  *              formrdesc
1258  *
1259  *              This is a special cut-down version of RelationBuildDesc()
1260  *              used by RelationCacheInitialize() in initializing the relcache.
1261  *              The relation descriptor is built just from the supplied parameters,
1262  *              without actually looking at any system table entries.  We cheat
1263  *              quite a lot since we only need to work for a few basic system
1264  *              catalogs.
1265  *
1266  * formrdesc is currently used for: pg_class, pg_attribute, pg_proc,
1267  * and pg_type (see RelationCacheInitialize).
1268  *
1269  * Note that these catalogs can't have constraints (except attnotnull),
1270  * default values, rules, or triggers, since we don't cope with any of that.
1271  *
1272  * NOTE: we assume we are already switched into CacheMemoryContext.
1273  */
1274 static void
1275 formrdesc(const char *relationName, Oid relationReltype,
1276                   bool hasoids, int natts, FormData_pg_attribute *att)
1277 {
1278         Relation        relation;
1279         int                     i;
1280         bool            has_not_null;
1281
1282         /*
1283          * allocate new relation desc, clear all fields of reldesc
1284          */
1285         relation = (Relation) palloc0(sizeof(RelationData));
1286         relation->rd_targblock = InvalidBlockNumber;
1287
1288         /* make sure relation is marked as having no open file yet */
1289         relation->rd_smgr = NULL;
1290
1291         /*
1292          * initialize reference count: 1 because it is nailed in cache
1293          */
1294         relation->rd_refcnt = 1;
1295
1296         /*
1297          * all entries built with this routine are nailed-in-cache; none are
1298          * for new or temp relations.
1299          */
1300         relation->rd_isnailed = true;
1301         relation->rd_createSubid = InvalidSubTransactionId;
1302         relation->rd_istemp = false;
1303
1304         /*
1305          * initialize relation tuple form
1306          *
1307          * The data we insert here is pretty incomplete/bogus, but it'll serve to
1308          * get us launched.  RelationCacheInitializePhase2() will read the
1309          * real data from pg_class and replace what we've done here.
1310          */
1311         relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
1312
1313         namestrcpy(&relation->rd_rel->relname, relationName);
1314         relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1315         relation->rd_rel->reltype = relationReltype;
1316
1317         /*
1318          * It's important to distinguish between shared and non-shared
1319          * relations, even at bootstrap time, to make sure we know where they
1320          * are stored.  At present, all relations that formrdesc is used for
1321          * are not shared.
1322          */
1323         relation->rd_rel->relisshared = false;
1324
1325         relation->rd_rel->relpages = 1;
1326         relation->rd_rel->reltuples = 1;
1327         relation->rd_rel->relkind = RELKIND_RELATION;
1328         relation->rd_rel->relhasoids = hasoids;
1329         relation->rd_rel->relnatts = (int16) natts;
1330
1331         /*
1332          * initialize attribute tuple form
1333          *
1334          * Unlike the case with the relation tuple, this data had better be right
1335          * because it will never be replaced.  The input values must be
1336          * correctly defined by macros in src/include/catalog/ headers.
1337          */
1338         relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1339         relation->rd_att->tdtypeid = relationReltype;
1340         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
1341
1342         /*
1343          * initialize tuple desc info
1344          */
1345         has_not_null = false;
1346         for (i = 0; i < natts; i++)
1347         {
1348                 memcpy(relation->rd_att->attrs[i],
1349                            &att[i],
1350                            ATTRIBUTE_TUPLE_SIZE);
1351                 has_not_null |= att[i].attnotnull;
1352                 /* make sure attcacheoff is valid */
1353                 relation->rd_att->attrs[i]->attcacheoff = -1;
1354         }
1355
1356         /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1357         relation->rd_att->attrs[0]->attcacheoff = 0;
1358
1359         /* mark not-null status */
1360         if (has_not_null)
1361         {
1362                 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1363
1364                 constr->has_not_null = true;
1365                 relation->rd_att->constr = constr;
1366         }
1367
1368         /*
1369          * initialize relation id from info in att array (my, this is ugly)
1370          */
1371         RelationGetRelid(relation) = relation->rd_att->attrs[0]->attrelid;
1372         relation->rd_rel->relfilenode = RelationGetRelid(relation);
1373
1374         /*
1375          * initialize the relation lock manager information
1376          */
1377         RelationInitLockInfo(relation);         /* see lmgr.c */
1378
1379         /*
1380          * initialize physical addressing information for the relation
1381          */
1382         RelationInitPhysicalAddr(relation);
1383
1384         /*
1385          * initialize the rel-has-index flag, using hardwired knowledge
1386          */
1387         if (IsBootstrapProcessingMode())
1388         {
1389                 /* In bootstrap mode, we have no indexes */
1390                 relation->rd_rel->relhasindex = false;
1391         }
1392         else
1393         {
1394                 /* Otherwise, all the rels formrdesc is used for have indexes */
1395                 relation->rd_rel->relhasindex = true;
1396         }
1397
1398         /*
1399          * add new reldesc to relcache
1400          */
1401         RelationCacheInsert(relation);
1402
1403         /* It's fully valid */
1404         relation->rd_isvalid = true;
1405 }
1406
1407
1408 /* ----------------------------------------------------------------
1409  *                               Relation Descriptor Lookup Interface
1410  * ----------------------------------------------------------------
1411  */
1412
1413 /*
1414  *              RelationIdCacheGetRelation
1415  *
1416  *              Lookup an existing reldesc by OID.
1417  *
1418  *              Only try to get the reldesc by looking in the cache,
1419  *              do not go to the disk if it's not present.
1420  *
1421  *              NB: relation ref count is incremented if successful.
1422  *              Caller should eventually decrement count.  (Usually,
1423  *              that happens by calling RelationClose().)
1424  */
1425 Relation
1426 RelationIdCacheGetRelation(Oid relationId)
1427 {
1428         Relation        rd;
1429
1430         RelationIdCacheLookup(relationId, rd);
1431
1432         if (RelationIsValid(rd))
1433         {
1434                 RelationIncrementReferenceCount(rd);
1435                 /* revalidate nailed index if necessary */
1436                 if (!rd->rd_isvalid)
1437                         RelationReloadClassinfo(rd);
1438         }
1439
1440         return rd;
1441 }
1442
1443 /*
1444  *              RelationSysNameCacheGetRelation
1445  *
1446  *              As above, but lookup by name; only works for system catalogs.
1447  */
1448 static Relation
1449 RelationSysNameCacheGetRelation(const char *relationName)
1450 {
1451         Relation        rd;
1452         NameData        name;
1453
1454         /*
1455          * make sure that the name key used for hash lookup is properly
1456          * null-padded
1457          */
1458         namestrcpy(&name, relationName);
1459         RelationSysNameCacheLookup(NameStr(name), rd);
1460
1461         if (RelationIsValid(rd))
1462         {
1463                 RelationIncrementReferenceCount(rd);
1464                 /* revalidate nailed index if necessary */
1465                 if (!rd->rd_isvalid)
1466                         RelationReloadClassinfo(rd);
1467         }
1468
1469         return rd;
1470 }
1471
1472 /*
1473  *              RelationIdGetRelation
1474  *
1475  *              Lookup a reldesc by OID; make one if not already in cache.
1476  *
1477  *              NB: relation ref count is incremented, or set to 1 if new entry.
1478  *              Caller should eventually decrement count.  (Usually,
1479  *              that happens by calling RelationClose().)
1480  */
1481 Relation
1482 RelationIdGetRelation(Oid relationId)
1483 {
1484         Relation        rd;
1485         RelationBuildDescInfo buildinfo;
1486
1487         /*
1488          * first try and get a reldesc from the cache
1489          */
1490         rd = RelationIdCacheGetRelation(relationId);
1491         if (RelationIsValid(rd))
1492                 return rd;
1493
1494         /*
1495          * no reldesc in the cache, so have RelationBuildDesc() build one and
1496          * add it.
1497          */
1498         buildinfo.infotype = INFO_RELID;
1499         buildinfo.i.info_id = relationId;
1500
1501         rd = RelationBuildDesc(buildinfo, NULL);
1502         if (RelationIsValid(rd))
1503                 RelationIncrementReferenceCount(rd);
1504         return rd;
1505 }
1506
1507 /*
1508  *              RelationSysNameGetRelation
1509  *
1510  *              As above, but lookup by name; only works for system catalogs.
1511  */
1512 Relation
1513 RelationSysNameGetRelation(const char *relationName)
1514 {
1515         Relation        rd;
1516         RelationBuildDescInfo buildinfo;
1517
1518         /*
1519          * first try and get a reldesc from the cache
1520          */
1521         rd = RelationSysNameCacheGetRelation(relationName);
1522         if (RelationIsValid(rd))
1523                 return rd;
1524
1525         /*
1526          * no reldesc in the cache, so have RelationBuildDesc() build one and
1527          * add it.
1528          */
1529         buildinfo.infotype = INFO_RELNAME;
1530         buildinfo.i.info_name = (char *) relationName;
1531
1532         rd = RelationBuildDesc(buildinfo, NULL);
1533         if (RelationIsValid(rd))
1534                 RelationIncrementReferenceCount(rd);
1535         return rd;
1536 }
1537
1538 /* ----------------------------------------------------------------
1539  *                              cache invalidation support routines
1540  * ----------------------------------------------------------------
1541  */
1542
1543 /*
1544  * RelationIncrementReferenceCount
1545  *              Increments relation reference count.
1546  *
1547  * Note: bootstrap mode has its own weird ideas about relation refcount
1548  * behavior; we ought to fix it someday, but for now, just disable
1549  * reference count ownership tracking in bootstrap mode.
1550  */
1551 void
1552 RelationIncrementReferenceCount(Relation rel)
1553 {
1554         ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
1555         rel->rd_refcnt += 1;
1556         if (!IsBootstrapProcessingMode())
1557                 ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
1558 }
1559
1560 /*
1561  * RelationDecrementReferenceCount
1562  *              Decrements relation reference count.
1563  */
1564 void
1565 RelationDecrementReferenceCount(Relation rel)
1566 {
1567         Assert(rel->rd_refcnt > 0);
1568         rel->rd_refcnt -= 1;
1569         if (!IsBootstrapProcessingMode())
1570                 ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
1571 }
1572
1573 /*
1574  * RelationClose - close an open relation
1575  *
1576  *      Actually, we just decrement the refcount.
1577  *
1578  *      NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
1579  *      will be freed as soon as their refcount goes to zero.  In combination
1580  *      with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
1581  *      to catch references to already-released relcache entries.  It slows
1582  *      things down quite a bit, however.
1583  */
1584 void
1585 RelationClose(Relation relation)
1586 {
1587         /* Note: no locking manipulations needed */
1588         RelationDecrementReferenceCount(relation);
1589
1590 #ifdef RELCACHE_FORCE_RELEASE
1591         if (RelationHasReferenceCountZero(relation) &&
1592                 relation->rd_createSubid == InvalidSubTransactionId)
1593                 RelationClearRelation(relation, false);
1594 #endif
1595 }
1596
1597 /*
1598  * RelationReloadClassinfo - reload the pg_class row (only)
1599  *
1600  *      This function is used only for nailed indexes.  Since a REINDEX can
1601  *      change the relfilenode value for a nailed index, we have to reread
1602  *      the pg_class row anytime we get an SI invalidation on a nailed index
1603  *      (without throwing away the whole relcache entry, since we'd be unable
1604  *      to rebuild it).
1605  *
1606  *      We can't necessarily reread the pg_class row right away; we might be
1607  *      in a failed transaction when we receive the SI notification.  If so,
1608  *      RelationClearRelation just marks the entry as invalid by setting
1609  *      rd_isvalid to false.  This routine is called to fix the entry when it
1610  *      is next needed.
1611  */
1612 static void
1613 RelationReloadClassinfo(Relation relation)
1614 {
1615         RelationBuildDescInfo buildinfo;
1616         bool            indexOK;
1617         HeapTuple       pg_class_tuple;
1618         Form_pg_class relp;
1619
1620         /* Should be called only for invalidated nailed indexes */
1621         Assert(relation->rd_isnailed && !relation->rd_isvalid &&
1622                    relation->rd_rel->relkind == RELKIND_INDEX);
1623         /* Read the pg_class row */
1624         buildinfo.infotype = INFO_RELID;
1625         buildinfo.i.info_id = relation->rd_id;
1626
1627         /*
1628          * Don't try to use an indexscan of pg_class_oid_index to reload the
1629          * info for pg_class_oid_index ...
1630          */
1631         indexOK = strcmp(RelationGetRelationName(relation), ClassOidIndex) != 0;
1632         pg_class_tuple = ScanPgRelation(buildinfo, indexOK);
1633         if (!HeapTupleIsValid(pg_class_tuple))
1634                 elog(ERROR, "could not find tuple for system relation %u",
1635                          relation->rd_id);
1636         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1637         memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
1638         /* Now we can recalculate physical address */
1639         RelationInitPhysicalAddr(relation);
1640         heap_freetuple(pg_class_tuple);
1641         relation->rd_targblock = InvalidBlockNumber;
1642         /* Okay, now it's valid again */
1643         relation->rd_isvalid = true;
1644 }
1645
1646 /*
1647  * RelationClearRelation
1648  *
1649  *       Physically blow away a relation cache entry, or reset it and rebuild
1650  *       it from scratch (that is, from catalog entries).  The latter path is
1651  *       usually used when we are notified of a change to an open relation
1652  *       (one with refcount > 0).  However, this routine just does whichever
1653  *       it's told to do; callers must determine which they want.
1654  */
1655 static void
1656 RelationClearRelation(Relation relation, bool rebuild)
1657 {
1658         Oid                     old_reltype = relation->rd_rel->reltype;
1659         MemoryContext oldcxt;
1660
1661         /*
1662          * Make sure smgr and lower levels close the relation's files, if they
1663          * weren't closed already.  If the relation is not getting deleted,
1664          * the next smgr access should reopen the files automatically.  This
1665          * ensures that the low-level file access state is updated after, say,
1666          * a vacuum truncation.
1667          */
1668         RelationCloseSmgr(relation);
1669
1670         /*
1671          * Never, never ever blow away a nailed-in system relation, because
1672          * we'd be unable to recover.  However, we must reset rd_targblock, in
1673          * case we got called because of a relation cache flush that was
1674          * triggered by VACUUM.
1675          *
1676          * If it's a nailed index, then we need to re-read the pg_class row to
1677          * see if its relfilenode changed.      We can't necessarily do that here,
1678          * because we might be in a failed transaction.  We assume it's okay
1679          * to do it if there are open references to the relcache entry (cf
1680          * notes for AtEOXact_RelationCache).  Otherwise just mark the entry
1681          * as possibly invalid, and it'll be fixed when next opened.
1682          */
1683         if (relation->rd_isnailed)
1684         {
1685                 relation->rd_targblock = InvalidBlockNumber;
1686                 if (relation->rd_rel->relkind == RELKIND_INDEX)
1687                 {
1688                         relation->rd_isvalid = false;           /* needs to be revalidated */
1689                         if (relation->rd_refcnt > 1)
1690                                 RelationReloadClassinfo(relation);
1691                 }
1692                 return;
1693         }
1694
1695         /*
1696          * Remove relation from hash tables
1697          *
1698          * Note: we might be reinserting it momentarily, but we must not have it
1699          * visible in the hash tables until it's valid again, so don't try to
1700          * optimize this away...
1701          */
1702         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
1703         RelationCacheDelete(relation);
1704         MemoryContextSwitchTo(oldcxt);
1705
1706         /* Clear out catcache's entries for this relation */
1707         CatalogCacheFlushRelation(RelationGetRelid(relation));
1708
1709         /*
1710          * Free all the subsidiary data structures of the relcache entry. We
1711          * cannot free rd_att if we are trying to rebuild the entry, however,
1712          * because pointers to it may be cached in various places. The rule
1713          * manager might also have pointers into the rewrite rules. So to
1714          * begin with, we can only get rid of these fields:
1715          */
1716         FreeTriggerDesc(relation->trigdesc);
1717         if (relation->rd_indextuple)
1718                 pfree(relation->rd_indextuple);
1719         if (relation->rd_am)
1720                 pfree(relation->rd_am);
1721         if (relation->rd_rel)
1722                 pfree(relation->rd_rel);
1723         list_free(relation->rd_indexlist);
1724         if (relation->rd_indexcxt)
1725                 MemoryContextDelete(relation->rd_indexcxt);
1726
1727         /*
1728          * If we're really done with the relcache entry, blow it away. But if
1729          * someone is still using it, reconstruct the whole deal without
1730          * moving the physical RelationData record (so that the someone's
1731          * pointer is still valid).
1732          */
1733         if (!rebuild)
1734         {
1735                 /* ok to zap remaining substructure */
1736                 flush_rowtype_cache(old_reltype);
1737                 FreeTupleDesc(relation->rd_att);
1738                 if (relation->rd_rulescxt)
1739                         MemoryContextDelete(relation->rd_rulescxt);
1740                 pfree(relation);
1741         }
1742         else
1743         {
1744                 /*
1745                  * When rebuilding an open relcache entry, must preserve ref count
1746                  * and rd_createSubid state.  Also attempt to preserve the
1747                  * tupledesc and rewrite-rule substructures in place.
1748                  *
1749                  * Note that this process does not touch CurrentResourceOwner; which
1750                  * is good because whatever ref counts the entry may have do not
1751                  * necessarily belong to that resource owner.
1752                  */
1753                 int                     old_refcnt = relation->rd_refcnt;
1754                 SubTransactionId old_createSubid = relation->rd_createSubid;
1755                 TupleDesc       old_att = relation->rd_att;
1756                 RuleLock   *old_rules = relation->rd_rules;
1757                 MemoryContext old_rulescxt = relation->rd_rulescxt;
1758                 RelationBuildDescInfo buildinfo;
1759
1760                 buildinfo.infotype = INFO_RELID;
1761                 buildinfo.i.info_id = RelationGetRelid(relation);
1762
1763                 if (RelationBuildDesc(buildinfo, relation) != relation)
1764                 {
1765                         /* Should only get here if relation was deleted */
1766                         flush_rowtype_cache(old_reltype);
1767                         FreeTupleDesc(old_att);
1768                         if (old_rulescxt)
1769                                 MemoryContextDelete(old_rulescxt);
1770                         pfree(relation);
1771                         elog(ERROR, "relation %u deleted while still in use",
1772                                  buildinfo.i.info_id);
1773                 }
1774                 relation->rd_refcnt = old_refcnt;
1775                 relation->rd_createSubid = old_createSubid;
1776                 if (equalTupleDescs(old_att, relation->rd_att))
1777                 {
1778                         /* needn't flush typcache here */
1779                         FreeTupleDesc(relation->rd_att);
1780                         relation->rd_att = old_att;
1781                 }
1782                 else
1783                 {
1784                         flush_rowtype_cache(old_reltype);
1785                         FreeTupleDesc(old_att);
1786                 }
1787                 if (equalRuleLocks(old_rules, relation->rd_rules))
1788                 {
1789                         if (relation->rd_rulescxt)
1790                                 MemoryContextDelete(relation->rd_rulescxt);
1791                         relation->rd_rules = old_rules;
1792                         relation->rd_rulescxt = old_rulescxt;
1793                 }
1794                 else
1795                 {
1796                         if (old_rulescxt)
1797                                 MemoryContextDelete(old_rulescxt);
1798                 }
1799         }
1800 }
1801
1802 /*
1803  * RelationFlushRelation
1804  *
1805  *       Rebuild the relation if it is open (refcount > 0), else blow it away.
1806  */
1807 static void
1808 RelationFlushRelation(Relation relation)
1809 {
1810         bool            rebuild;
1811
1812         if (relation->rd_createSubid != InvalidSubTransactionId)
1813         {
1814                 /*
1815                  * New relcache entries are always rebuilt, not flushed; else we'd
1816                  * forget the "new" status of the relation, which is a useful
1817                  * optimization to have.
1818                  */
1819                 rebuild = true;
1820         }
1821         else
1822         {
1823                 /*
1824                  * Pre-existing rels can be dropped from the relcache if not open.
1825                  */
1826                 rebuild = !RelationHasReferenceCountZero(relation);
1827         }
1828
1829         RelationClearRelation(relation, rebuild);
1830 }
1831
1832 /*
1833  * RelationForgetRelation - unconditionally remove a relcache entry
1834  *
1835  *                 External interface for destroying a relcache entry when we
1836  *                 drop the relation.
1837  */
1838 void
1839 RelationForgetRelation(Oid rid)
1840 {
1841         Relation        relation;
1842
1843         RelationIdCacheLookup(rid, relation);
1844
1845         if (!PointerIsValid(relation))
1846                 return;                                 /* not in cache, nothing to do */
1847
1848         if (!RelationHasReferenceCountZero(relation))
1849                 elog(ERROR, "relation %u is still open", rid);
1850
1851         /* Unconditionally destroy the relcache entry */
1852         RelationClearRelation(relation, false);
1853 }
1854
1855 /*
1856  *              RelationCacheInvalidateEntry
1857  *
1858  *              This routine is invoked for SI cache flush messages.
1859  *
1860  * Any relcache entry matching the relid must be flushed.  (Note: caller has
1861  * already determined that the relid belongs to our database or is a shared
1862  * relation.)
1863  *
1864  * We used to skip local relations, on the grounds that they could
1865  * not be targets of cross-backend SI update messages; but it seems
1866  * safer to process them, so that our *own* SI update messages will
1867  * have the same effects during CommandCounterIncrement for both
1868  * local and nonlocal relations.
1869  */
1870 void
1871 RelationCacheInvalidateEntry(Oid relationId)
1872 {
1873         Relation        relation;
1874
1875         RelationIdCacheLookup(relationId, relation);
1876
1877         if (PointerIsValid(relation))
1878         {
1879                 relcacheInvalsReceived++;
1880                 RelationFlushRelation(relation);
1881         }
1882 }
1883
1884 /*
1885  * RelationCacheInvalidate
1886  *       Blow away cached relation descriptors that have zero reference counts,
1887  *       and rebuild those with positive reference counts.      Also reset the smgr
1888  *       relation cache.
1889  *
1890  *       This is currently used only to recover from SI message buffer overflow,
1891  *       so we do not touch new-in-transaction relations; they cannot be targets
1892  *       of cross-backend SI updates (and our own updates now go through a
1893  *       separate linked list that isn't limited by the SI message buffer size).
1894  *
1895  *       We do this in two phases: the first pass deletes deletable items, and
1896  *       the second one rebuilds the rebuildable items.  This is essential for
1897  *       safety, because hash_seq_search only copes with concurrent deletion of
1898  *       the element it is currently visiting.  If a second SI overflow were to
1899  *       occur while we are walking the table, resulting in recursive entry to
1900  *       this routine, we could crash because the inner invocation blows away
1901  *       the entry next to be visited by the outer scan.  But this way is OK,
1902  *       because (a) during the first pass we won't process any more SI messages,
1903  *       so hash_seq_search will complete safely; (b) during the second pass we
1904  *       only hold onto pointers to nondeletable entries.
1905  *
1906  *       The two-phase approach also makes it easy to ensure that we process
1907  *       nailed-in-cache indexes before other nondeletable items, and that we
1908  *       process pg_class_oid_index first of all.  In scenarios where a nailed
1909  *       index has been given a new relfilenode, we have to detect that update
1910  *       before the nailed index is used in reloading any other relcache entry.
1911  */
1912 void
1913 RelationCacheInvalidate(void)
1914 {
1915         HASH_SEQ_STATUS status;
1916         RelIdCacheEnt *idhentry;
1917         Relation        relation;
1918         List       *rebuildFirstList = NIL;
1919         List       *rebuildList = NIL;
1920         ListCell   *l;
1921
1922         /* Phase 1 */
1923         hash_seq_init(&status, RelationIdCache);
1924
1925         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
1926         {
1927                 relation = idhentry->reldesc;
1928
1929                 /* Must close all smgr references to avoid leaving dangling ptrs */
1930                 RelationCloseSmgr(relation);
1931
1932                 /* Ignore new relations, since they are never SI targets */
1933                 if (relation->rd_createSubid != InvalidSubTransactionId)
1934                         continue;
1935
1936                 relcacheInvalsReceived++;
1937
1938                 if (RelationHasReferenceCountZero(relation))
1939                 {
1940                         /* Delete this entry immediately */
1941                         Assert(!relation->rd_isnailed);
1942                         RelationClearRelation(relation, false);
1943                 }
1944                 else
1945                 {
1946                         /*
1947                          * Add this entry to list of stuff to rebuild in second pass.
1948                          * pg_class_oid_index goes on the front of rebuildFirstList,
1949                          * other nailed indexes on the back, and everything else into
1950                          * rebuildList (in no particular order).
1951                          */
1952                         if (relation->rd_isnailed &&
1953                                 relation->rd_rel->relkind == RELKIND_INDEX)
1954                         {
1955                                 if (strcmp(RelationGetRelationName(relation),
1956                                                    ClassOidIndex) == 0)
1957                                         rebuildFirstList = lcons(relation, rebuildFirstList);
1958                                 else
1959                                         rebuildFirstList = lappend(rebuildFirstList, relation);
1960                         }
1961                         else
1962                                 rebuildList = lcons(relation, rebuildList);
1963                 }
1964         }
1965
1966         rebuildList = list_concat(rebuildFirstList, rebuildList);
1967
1968         /*
1969          * Now zap any remaining smgr cache entries.  This must happen before
1970          * we start to rebuild entries, since that may involve catalog fetches
1971          * which will re-open catalog files.
1972          */
1973         smgrcloseall();
1974
1975         /* Phase 2: rebuild the items found to need rebuild in phase 1 */
1976         foreach(l, rebuildList)
1977         {
1978                 relation = (Relation) lfirst(l);
1979                 RelationClearRelation(relation, true);
1980         }
1981         list_free(rebuildList);
1982 }
1983
1984 /*
1985  * AtEOXact_RelationCache
1986  *
1987  *      Clean up the relcache at main-transaction commit or abort.
1988  *
1989  * Note: this must be called *before* processing invalidation messages.
1990  * In the case of abort, we don't want to try to rebuild any invalidated
1991  * cache entries (since we can't safely do database accesses).  Therefore
1992  * we must reset refcnts before handling pending invalidations.
1993  */
1994 void
1995 AtEOXact_RelationCache(bool isCommit)
1996 {
1997         HASH_SEQ_STATUS status;
1998         RelIdCacheEnt *idhentry;
1999
2000         hash_seq_init(&status, RelationIdCache);
2001
2002         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2003         {
2004                 Relation        relation = idhentry->reldesc;
2005                 int                     expected_refcnt;
2006
2007                 /*
2008                  * Is it a relation created in the current transaction?
2009                  *
2010                  * During commit, reset the flag to zero, since we are now out of the
2011                  * creating transaction.  During abort, simply delete the relcache
2012                  * entry --- it isn't interesting any longer.  (NOTE: if we have
2013                  * forgotten the new-ness of a new relation due to a forced cache
2014                  * flush, the entry will get deleted anyway by shared-cache-inval
2015                  * processing of the aborted pg_class insertion.)
2016                  */
2017                 if (relation->rd_createSubid != InvalidSubTransactionId)
2018                 {
2019                         if (isCommit)
2020                                 relation->rd_createSubid = InvalidSubTransactionId;
2021                         else
2022                         {
2023                                 RelationClearRelation(relation, false);
2024                                 continue;
2025                         }
2026                 }
2027
2028                 /*
2029                  * During transaction abort, we must also reset relcache entry ref
2030                  * counts to their normal not-in-a-transaction state.  A ref count
2031                  * may be too high because some routine was exited by ereport()
2032                  * between incrementing and decrementing the count.
2033                  *
2034                  * During commit, we should not have to do this, but it's still
2035                  * useful to check that the counts are correct to catch missed
2036                  * relcache closes.
2037                  *
2038                  * In bootstrap mode, do NOT reset the refcnt nor complain that it's
2039                  * nonzero --- the bootstrap code expects relations to stay open
2040                  * across start/commit transaction calls.  (That seems bogus, but
2041                  * it's not worth fixing.)
2042                  */
2043                 expected_refcnt = relation->rd_isnailed ? 1 : 0;
2044
2045                 if (isCommit)
2046                 {
2047                         if (relation->rd_refcnt != expected_refcnt &&
2048                                 !IsBootstrapProcessingMode())
2049                         {
2050                                 elog(WARNING, "relcache reference leak: relation \"%s\" has refcnt %d instead of %d",
2051                                          RelationGetRelationName(relation),
2052                                          relation->rd_refcnt, expected_refcnt);
2053                                 relation->rd_refcnt = expected_refcnt;
2054                         }
2055                 }
2056                 else
2057                 {
2058                         /* abort case, just reset it quietly */
2059                         relation->rd_refcnt = expected_refcnt;
2060                 }
2061
2062                 /*
2063                  * Flush any temporary index list.
2064                  */
2065                 if (relation->rd_indexvalid == 2)
2066                 {
2067                         list_free(relation->rd_indexlist);
2068                         relation->rd_indexlist = NIL;
2069                         relation->rd_indexvalid = 0;
2070                 }
2071         }
2072
2073         /* Once done with the transaction, we can reset need_eosubxact_work */
2074         need_eosubxact_work = false;
2075 }
2076
2077 /*
2078  * AtEOSubXact_RelationCache
2079  *
2080  *      Clean up the relcache at sub-transaction commit or abort.
2081  *
2082  * Note: this must be called *before* processing invalidation messages.
2083  */
2084 void
2085 AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
2086                                                   SubTransactionId parentSubid)
2087 {
2088         HASH_SEQ_STATUS status;
2089         RelIdCacheEnt *idhentry;
2090
2091         /*
2092          * In the majority of subtransactions there is not anything for this
2093          * routine to do, and since there are usually many entries in the
2094          * relcache, uselessly scanning the cache represents a surprisingly
2095          * large fraction of the subtransaction entry/exit overhead.  To avoid
2096          * this, we keep a static flag that must be set whenever a condition
2097          * is created that requires subtransaction-end work.  (Currently, this
2098          * means either a relation is created in the current xact, or an index
2099          * list is forced.)  For simplicity, the flag remains set till end of
2100          * top-level transaction, even though we could clear it earlier in some
2101          * cases.
2102          */
2103         if (!need_eosubxact_work)
2104                 return;
2105
2106         hash_seq_init(&status, RelationIdCache);
2107
2108         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2109         {
2110                 Relation        relation = idhentry->reldesc;
2111
2112                 /*
2113                  * Is it a relation created in the current subtransaction?
2114                  *
2115                  * During subcommit, mark it as belonging to the parent, instead.
2116                  * During subabort, simply delete the relcache entry.
2117                  */
2118                 if (relation->rd_createSubid == mySubid)
2119                 {
2120                         if (isCommit)
2121                                 relation->rd_createSubid = parentSubid;
2122                         else
2123                         {
2124                                 Assert(RelationHasReferenceCountZero(relation));
2125                                 RelationClearRelation(relation, false);
2126                                 continue;
2127                         }
2128                 }
2129
2130                 /*
2131                  * Flush any temporary index list.
2132                  */
2133                 if (relation->rd_indexvalid == 2)
2134                 {
2135                         list_free(relation->rd_indexlist);
2136                         relation->rd_indexlist = NIL;
2137                         relation->rd_indexvalid = 0;
2138                 }
2139         }
2140 }
2141
2142 /*
2143  *              RelationBuildLocalRelation
2144  *                      Build a relcache entry for an about-to-be-created relation,
2145  *                      and enter it into the relcache.
2146  */
2147 Relation
2148 RelationBuildLocalRelation(const char *relname,
2149                                                    Oid relnamespace,
2150                                                    TupleDesc tupDesc,
2151                                                    Oid relid,
2152                                                    Oid reltablespace,
2153                                                    bool shared_relation,
2154                                                    bool nailit)
2155 {
2156         Relation        rel;
2157         MemoryContext oldcxt;
2158         int                     natts = tupDesc->natts;
2159         int                     i;
2160         bool            has_not_null;
2161
2162         AssertArg(natts >= 0);
2163
2164         /*
2165          * switch to the cache context to create the relcache entry.
2166          */
2167         if (!CacheMemoryContext)
2168                 CreateCacheMemoryContext();
2169
2170         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2171
2172         /*
2173          * allocate a new relation descriptor and fill in basic state fields.
2174          */
2175         rel = (Relation) palloc0(sizeof(RelationData));
2176
2177         rel->rd_targblock = InvalidBlockNumber;
2178
2179         /* make sure relation is marked as having no open file yet */
2180         rel->rd_smgr = NULL;
2181
2182         rel->rd_refcnt = nailit ? 1 : 0;
2183
2184         /* it's being created in this transaction */
2185         rel->rd_createSubid = GetCurrentSubTransactionId();
2186
2187         /* must flag that we have rels created in this transaction */
2188         need_eosubxact_work = true;
2189
2190         /* is it a temporary relation? */
2191         rel->rd_istemp = isTempNamespace(relnamespace);
2192
2193         /*
2194          * nail the reldesc if this is a bootstrap create reln and we may need
2195          * it in the cache later on in the bootstrap process so we don't ever
2196          * want it kicked out.  e.g. pg_attribute!!!
2197          */
2198         if (nailit)
2199                 rel->rd_isnailed = true;
2200
2201         /*
2202          * create a new tuple descriptor from the one passed in.  We do this
2203          * partly to copy it into the cache context, and partly because the
2204          * new relation can't have any defaults or constraints yet; they have
2205          * to be added in later steps, because they require additions to
2206          * multiple system catalogs.  We can copy attnotnull constraints here,
2207          * however.
2208          */
2209         rel->rd_att = CreateTupleDescCopy(tupDesc);
2210         has_not_null = false;
2211         for (i = 0; i < natts; i++)
2212         {
2213                 rel->rd_att->attrs[i]->attnotnull = tupDesc->attrs[i]->attnotnull;
2214                 has_not_null |= tupDesc->attrs[i]->attnotnull;
2215         }
2216
2217         if (has_not_null)
2218         {
2219                 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
2220
2221                 constr->has_not_null = true;
2222                 rel->rd_att->constr = constr;
2223         }
2224
2225         /*
2226          * initialize relation tuple form (caller may add/override data later)
2227          */
2228         rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
2229
2230         namestrcpy(&rel->rd_rel->relname, relname);
2231         rel->rd_rel->relnamespace = relnamespace;
2232
2233         rel->rd_rel->relkind = RELKIND_UNCATALOGED;
2234         rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
2235         rel->rd_rel->relnatts = natts;
2236         rel->rd_rel->reltype = InvalidOid;
2237
2238         /*
2239          * Insert relation physical and logical identifiers (OIDs) into the
2240          * right places.  Note that the physical ID (relfilenode) is initially
2241          * the same as the logical ID (OID).
2242          */
2243         rel->rd_rel->relisshared = shared_relation;
2244
2245         RelationGetRelid(rel) = relid;
2246
2247         for (i = 0; i < natts; i++)
2248                 rel->rd_att->attrs[i]->attrelid = relid;
2249
2250         rel->rd_rel->relfilenode = relid;
2251         rel->rd_rel->reltablespace = reltablespace;
2252
2253         RelationInitLockInfo(rel);      /* see lmgr.c */
2254
2255         RelationInitPhysicalAddr(rel);
2256
2257         /*
2258          * Okay to insert into the relcache hash tables.
2259          */
2260         RelationCacheInsert(rel);
2261
2262         /*
2263          * done building relcache entry.
2264          */
2265         MemoryContextSwitchTo(oldcxt);
2266
2267         /* It's fully valid */
2268         rel->rd_isvalid = true;
2269
2270         /*
2271          * Caller expects us to pin the returned entry.
2272          */
2273         RelationIncrementReferenceCount(rel);
2274
2275         return rel;
2276 }
2277
2278 /*
2279  *              RelationCacheInitialize
2280  *
2281  *              This initializes the relation descriptor cache.  At the time
2282  *              that this is invoked, we can't do database access yet (mainly
2283  *              because the transaction subsystem is not up), so we can't get
2284  *              "real" info.  However it's okay to read the pg_internal.init
2285  *              cache file, if one is available.  Otherwise we make phony
2286  *              entries for the minimum set of nailed-in-cache relations.
2287  */
2288
2289 #define INITRELCACHESIZE                400
2290
2291 void
2292 RelationCacheInitialize(void)
2293 {
2294         MemoryContext oldcxt;
2295         HASHCTL         ctl;
2296
2297         /*
2298          * switch to cache memory context
2299          */
2300         if (!CacheMemoryContext)
2301                 CreateCacheMemoryContext();
2302
2303         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2304
2305         /*
2306          * create hashtables that index the relcache
2307          */
2308         MemSet(&ctl, 0, sizeof(ctl));
2309         ctl.keysize = sizeof(NameData);
2310         ctl.entrysize = sizeof(RelNameCacheEnt);
2311         RelationSysNameCache = hash_create("Relcache by name", INITRELCACHESIZE,
2312                                                                            &ctl, HASH_ELEM);
2313
2314         ctl.keysize = sizeof(Oid);
2315         ctl.entrysize = sizeof(RelIdCacheEnt);
2316         ctl.hash = tag_hash;
2317         RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
2318                                                                   &ctl, HASH_ELEM | HASH_FUNCTION);
2319
2320         /*
2321          * Try to load the relcache cache file.  If successful, we're done for
2322          * now.  Otherwise, initialize the cache with pre-made descriptors for
2323          * the critical "nailed-in" system catalogs.
2324          */
2325         if (IsBootstrapProcessingMode() ||
2326                 !load_relcache_init_file())
2327         {
2328                 formrdesc(RelationRelationName, PG_CLASS_RELTYPE_OID,
2329                                   true, Natts_pg_class, Desc_pg_class);
2330                 formrdesc(AttributeRelationName, PG_ATTRIBUTE_RELTYPE_OID,
2331                                   false, Natts_pg_attribute, Desc_pg_attribute);
2332                 formrdesc(ProcedureRelationName, PG_PROC_RELTYPE_OID,
2333                                   true, Natts_pg_proc, Desc_pg_proc);
2334                 formrdesc(TypeRelationName, PG_TYPE_RELTYPE_OID,
2335                                   true, Natts_pg_type, Desc_pg_type);
2336
2337 #define NUM_CRITICAL_RELS       4       /* fix if you change list above */
2338         }
2339
2340         MemoryContextSwitchTo(oldcxt);
2341 }
2342
2343 /*
2344  *              RelationCacheInitializePhase2
2345  *
2346  *              This is called as soon as the catcache and transaction system
2347  *              are functional.  At this point we can actually read data from
2348  *              the system catalogs.  Update the relcache entries made during
2349  *              RelationCacheInitialize, and make sure we have entries for the
2350  *              critical system indexes.
2351  */
2352 void
2353 RelationCacheInitializePhase2(void)
2354 {
2355         HASH_SEQ_STATUS status;
2356         RelIdCacheEnt *idhentry;
2357
2358         if (IsBootstrapProcessingMode())
2359                 return;
2360
2361         /*
2362          * If we didn't get the critical system indexes loaded into relcache,
2363          * do so now.  These are critical because the catcache depends on them
2364          * for catcache fetches that are done during relcache load.  Thus, we
2365          * have an infinite-recursion problem.  We can break the recursion by
2366          * doing heapscans instead of indexscans at certain key spots. To
2367          * avoid hobbling performance, we only want to do that until we have
2368          * the critical indexes loaded into relcache.  Thus, the flag
2369          * criticalRelcachesBuilt is used to decide whether to do heapscan or
2370          * indexscan at the key spots, and we set it true after we've loaded
2371          * the critical indexes.
2372          *
2373          * The critical indexes are marked as "nailed in cache", partly to make
2374          * it easy for load_relcache_init_file to count them, but mainly
2375          * because we cannot flush and rebuild them once we've set
2376          * criticalRelcachesBuilt to true.      (NOTE: perhaps it would be
2377          * possible to reload them by temporarily setting
2378          * criticalRelcachesBuilt to false again.  For now, though, we just
2379          * nail 'em in.)
2380          */
2381         if (!criticalRelcachesBuilt)
2382         {
2383                 RelationBuildDescInfo buildinfo;
2384                 Relation        ird;
2385
2386 #define LOAD_CRIT_INDEX(indname) \
2387                 do { \
2388                         buildinfo.infotype = INFO_RELNAME; \
2389                         buildinfo.i.info_name = (indname); \
2390                         ird = RelationBuildDesc(buildinfo, NULL); \
2391                         ird->rd_isnailed = true; \
2392                         ird->rd_refcnt = 1; \
2393                 } while (0)
2394
2395                 LOAD_CRIT_INDEX(ClassNameNspIndex);
2396                 LOAD_CRIT_INDEX(ClassOidIndex);
2397                 LOAD_CRIT_INDEX(AttributeRelidNumIndex);
2398                 LOAD_CRIT_INDEX(IndexRelidIndex);
2399                 LOAD_CRIT_INDEX(AccessMethodStrategyIndex);
2400                 LOAD_CRIT_INDEX(AccessMethodProcedureIndex);
2401                 LOAD_CRIT_INDEX(OperatorOidIndex);
2402
2403 #define NUM_CRITICAL_INDEXES    7               /* fix if you change list above */
2404
2405                 criticalRelcachesBuilt = true;
2406         }
2407
2408         /*
2409          * Now, scan all the relcache entries and update anything that might
2410          * be wrong in the results from formrdesc or the relcache cache file.
2411          * If we faked up relcache entries using formrdesc, then read the real
2412          * pg_class rows and replace the fake entries with them. Also, if any
2413          * of the relcache entries have rules or triggers, load that info the
2414          * hard way since it isn't recorded in the cache file.
2415          */
2416         hash_seq_init(&status, RelationIdCache);
2417
2418         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2419         {
2420                 Relation        relation = idhentry->reldesc;
2421
2422                 /*
2423                  * If it's a faked-up entry, read the real pg_class tuple.
2424                  */
2425                 if (needNewCacheFile && relation->rd_isnailed)
2426                 {
2427                         HeapTuple       htup;
2428                         Form_pg_class relp;
2429
2430                         htup = SearchSysCache(RELOID,
2431                                                         ObjectIdGetDatum(RelationGetRelid(relation)),
2432                                                                   0, 0, 0);
2433                         if (!HeapTupleIsValid(htup))
2434                                 elog(FATAL, "cache lookup failed for relation %u",
2435                                          RelationGetRelid(relation));
2436                         relp = (Form_pg_class) GETSTRUCT(htup);
2437
2438                         /*
2439                          * Copy tuple to relation->rd_rel. (See notes in
2440                          * AllocateRelationDesc())
2441                          */
2442                         Assert(relation->rd_rel != NULL);
2443                         memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
2444
2445                         /*
2446                          * Also update the derived fields in rd_att.
2447                          */
2448                         relation->rd_att->tdtypeid = relp->reltype;
2449                         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
2450                         relation->rd_att->tdhasoid = relp->relhasoids;
2451
2452                         ReleaseSysCache(htup);
2453                 }
2454
2455                 /*
2456                  * Fix data that isn't saved in relcache cache file.
2457                  */
2458                 if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
2459                         RelationBuildRuleLock(relation);
2460                 if (relation->rd_rel->reltriggers > 0 && relation->trigdesc == NULL)
2461                         RelationBuildTriggers(relation);
2462         }
2463 }
2464
2465 /*
2466  *              RelationCacheInitializePhase3
2467  *
2468  *              Final step of relcache initialization: write out a new relcache
2469  *              cache file if one is needed.
2470  */
2471 void
2472 RelationCacheInitializePhase3(void)
2473 {
2474         if (IsBootstrapProcessingMode())
2475                 return;
2476
2477         if (needNewCacheFile)
2478         {
2479                 /*
2480                  * Force all the catcaches to finish initializing and thereby open
2481                  * the catalogs and indexes they use.  This will preload the
2482                  * relcache with entries for all the most important system
2483                  * catalogs and indexes, so that the init file will be most useful
2484                  * for future backends.
2485                  */
2486                 InitCatalogCachePhase2();
2487
2488                 /* now write the file */
2489                 write_relcache_init_file();
2490         }
2491 }
2492
2493 /*
2494  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
2495  *
2496  * We need this kluge because we have to be able to access non-fixed-width
2497  * fields of pg_index before we have the standard catalog caches available.
2498  * We use predefined data that's set up in just the same way as the
2499  * bootstrapped reldescs used by formrdesc().  The resulting tupdesc is
2500  * not 100% kosher: it does not have the correct relation OID in attrelid,
2501  * nor does it have a TupleConstr field.  But it's good enough for the
2502  * purpose of extracting fields.
2503  */
2504 static TupleDesc
2505 GetPgIndexDescriptor(void)
2506 {
2507         static TupleDesc pgindexdesc = NULL;
2508         MemoryContext oldcxt;
2509         int                     i;
2510
2511         /* Already done? */
2512         if (pgindexdesc)
2513                 return pgindexdesc;
2514
2515         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2516
2517         pgindexdesc = CreateTemplateTupleDesc(Natts_pg_index, false);
2518         pgindexdesc->tdtypeid = RECORDOID; /* not right, but we don't care */
2519         pgindexdesc->tdtypmod = -1;
2520
2521         for (i = 0; i < Natts_pg_index; i++)
2522         {
2523                 memcpy(pgindexdesc->attrs[i],
2524                            &Desc_pg_index[i],
2525                            ATTRIBUTE_TUPLE_SIZE);
2526                 /* make sure attcacheoff is valid */
2527                 pgindexdesc->attrs[i]->attcacheoff = -1;
2528         }
2529
2530         /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
2531         pgindexdesc->attrs[0]->attcacheoff = 0;
2532
2533         /* Note: we don't bother to set up a TupleConstr entry */
2534
2535         MemoryContextSwitchTo(oldcxt);
2536
2537         return pgindexdesc;
2538 }
2539
2540 static void
2541 AttrDefaultFetch(Relation relation)
2542 {
2543         AttrDefault *attrdef = relation->rd_att->constr->defval;
2544         int                     ndef = relation->rd_att->constr->num_defval;
2545         Relation        adrel;
2546         SysScanDesc adscan;
2547         ScanKeyData skey;
2548         HeapTuple       htup;
2549         Datum           val;
2550         bool            isnull;
2551         int                     found;
2552         int                     i;
2553
2554         ScanKeyInit(&skey,
2555                                 Anum_pg_attrdef_adrelid,
2556                                 BTEqualStrategyNumber, F_OIDEQ,
2557                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2558
2559         adrel = heap_openr(AttrDefaultRelationName, AccessShareLock);
2560         adscan = systable_beginscan(adrel, AttrDefaultIndex, true,
2561                                                                 SnapshotNow, 1, &skey);
2562         found = 0;
2563
2564         while (HeapTupleIsValid(htup = systable_getnext(adscan)))
2565         {
2566                 Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
2567
2568                 for (i = 0; i < ndef; i++)
2569                 {
2570                         if (adform->adnum != attrdef[i].adnum)
2571                                 continue;
2572                         if (attrdef[i].adbin != NULL)
2573                                 elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
2574                                          NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
2575                                          RelationGetRelationName(relation));
2576                         else
2577                                 found++;
2578
2579                         val = fastgetattr(htup,
2580                                                           Anum_pg_attrdef_adbin,
2581                                                           adrel->rd_att, &isnull);
2582                         if (isnull)
2583                                 elog(WARNING, "null adbin for attr %s of rel %s",
2584                                          NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
2585                                          RelationGetRelationName(relation));
2586                         else
2587                                 attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext,
2588                                                          DatumGetCString(DirectFunctionCall1(textout,
2589                                                                                                                                  val)));
2590                         break;
2591                 }
2592
2593                 if (i >= ndef)
2594                         elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
2595                                  adform->adnum, RelationGetRelationName(relation));
2596         }
2597
2598         systable_endscan(adscan);
2599         heap_close(adrel, AccessShareLock);
2600
2601         if (found != ndef)
2602                 elog(WARNING, "%d attrdef record(s) missing for rel %s",
2603                          ndef - found, RelationGetRelationName(relation));
2604 }
2605
2606 static void
2607 CheckConstraintFetch(Relation relation)
2608 {
2609         ConstrCheck *check = relation->rd_att->constr->check;
2610         int                     ncheck = relation->rd_att->constr->num_check;
2611         Relation        conrel;
2612         SysScanDesc conscan;
2613         ScanKeyData skey[1];
2614         HeapTuple       htup;
2615         Datum           val;
2616         bool            isnull;
2617         int                     found = 0;
2618
2619         ScanKeyInit(&skey[0],
2620                                 Anum_pg_constraint_conrelid,
2621                                 BTEqualStrategyNumber, F_OIDEQ,
2622                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2623
2624         conrel = heap_openr(ConstraintRelationName, AccessShareLock);
2625         conscan = systable_beginscan(conrel, ConstraintRelidIndex, true,
2626                                                                  SnapshotNow, 1, skey);
2627
2628         while (HeapTupleIsValid(htup = systable_getnext(conscan)))
2629         {
2630                 Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
2631
2632                 /* We want check constraints only */
2633                 if (conform->contype != CONSTRAINT_CHECK)
2634                         continue;
2635
2636                 if (found >= ncheck)
2637                         elog(ERROR, "unexpected constraint record found for rel %s",
2638                                  RelationGetRelationName(relation));
2639
2640                 check[found].ccname = MemoryContextStrdup(CacheMemoryContext,
2641                                                                                           NameStr(conform->conname));
2642
2643                 /* Grab and test conbin is actually set */
2644                 val = fastgetattr(htup,
2645                                                   Anum_pg_constraint_conbin,
2646                                                   conrel->rd_att, &isnull);
2647                 if (isnull)
2648                         elog(ERROR, "null conbin for rel %s",
2649                                  RelationGetRelationName(relation));
2650
2651                 check[found].ccbin = MemoryContextStrdup(CacheMemoryContext,
2652                                                          DatumGetCString(DirectFunctionCall1(textout,
2653                                                                                                                                  val)));
2654                 found++;
2655         }
2656
2657         systable_endscan(conscan);
2658         heap_close(conrel, AccessShareLock);
2659
2660         if (found != ncheck)
2661                 elog(ERROR, "%d constraint record(s) missing for rel %s",
2662                          ncheck - found, RelationGetRelationName(relation));
2663 }
2664
2665 /*
2666  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
2667  *
2668  * The index list is created only if someone requests it.  We scan pg_index
2669  * to find relevant indexes, and add the list to the relcache entry so that
2670  * we won't have to compute it again.  Note that shared cache inval of a
2671  * relcache entry will delete the old list and set rd_indexvalid to 0,
2672  * so that we must recompute the index list on next request.  This handles
2673  * creation or deletion of an index.
2674  *
2675  * The returned list is guaranteed to be sorted in order by OID.  This is
2676  * needed by the executor, since for index types that we obtain exclusive
2677  * locks on when updating the index, all backends must lock the indexes in
2678  * the same order or we will get deadlocks (see ExecOpenIndices()).  Any
2679  * consistent ordering would do, but ordering by OID is easy.
2680  *
2681  * Since shared cache inval causes the relcache's copy of the list to go away,
2682  * we return a copy of the list palloc'd in the caller's context.  The caller
2683  * may freeList() the returned list after scanning it.  This is necessary
2684  * since the caller will typically be doing syscache lookups on the relevant
2685  * indexes, and syscache lookup could cause SI messages to be processed!
2686  */
2687 List *
2688 RelationGetIndexList(Relation relation)
2689 {
2690         Relation        indrel;
2691         SysScanDesc indscan;
2692         ScanKeyData skey;
2693         HeapTuple       htup;
2694         List       *result;
2695         MemoryContext oldcxt;
2696
2697         /* Quick exit if we already computed the list. */
2698         if (relation->rd_indexvalid != 0)
2699                 return list_copy(relation->rd_indexlist);
2700
2701         /*
2702          * We build the list we intend to return (in the caller's context)
2703          * while doing the scan.  After successfully completing the scan, we
2704          * copy that list into the relcache entry.      This avoids cache-context
2705          * memory leakage if we get some sort of error partway through.
2706          */
2707         result = NIL;
2708
2709         /* Prepare to scan pg_index for entries having indrelid = this rel. */
2710         ScanKeyInit(&skey,
2711                                 Anum_pg_index_indrelid,
2712                                 BTEqualStrategyNumber, F_OIDEQ,
2713                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2714
2715         indrel = heap_openr(IndexRelationName, AccessShareLock);
2716         indscan = systable_beginscan(indrel, IndexIndrelidIndex, true,
2717                                                                  SnapshotNow, 1, &skey);
2718
2719         while (HeapTupleIsValid(htup = systable_getnext(indscan)))
2720         {
2721                 Form_pg_index index = (Form_pg_index) GETSTRUCT(htup);
2722
2723                 result = insert_ordered_oid(result, index->indexrelid);
2724         }
2725
2726         systable_endscan(indscan);
2727         heap_close(indrel, AccessShareLock);
2728
2729         /* Now save a copy of the completed list in the relcache entry. */
2730         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2731         relation->rd_indexlist = list_copy(result);
2732         relation->rd_indexvalid = 1;
2733         MemoryContextSwitchTo(oldcxt);
2734
2735         return result;
2736 }
2737
2738 /*
2739  * insert_ordered_oid
2740  *              Insert a new Oid into a sorted list of Oids, preserving ordering
2741  *
2742  * Building the ordered list this way is O(N^2), but with a pretty small
2743  * constant, so for the number of entries we expect it will probably be
2744  * faster than trying to apply qsort().  Most tables don't have very many
2745  * indexes...
2746  */
2747 static List *
2748 insert_ordered_oid(List *list, Oid datum)
2749 {
2750         ListCell   *prev;
2751
2752         /* Does the datum belong at the front? */
2753         if (list == NIL || datum < linitial_oid(list))
2754                 return lcons_oid(datum, list);
2755         /* No, so find the entry it belongs after */
2756         prev = list_head(list);
2757         for (;;)
2758         {
2759                 ListCell   *curr = lnext(prev);
2760
2761                 if (curr == NULL || datum < lfirst_oid(curr))
2762                         break;                          /* it belongs after 'prev', before 'curr' */
2763
2764                 prev = curr;
2765         }
2766         /* Insert datum into list after 'prev' */
2767         lappend_cell_oid(list, prev, datum);
2768         return list;
2769 }
2770
2771 /*
2772  * RelationSetIndexList -- externally force the index list contents
2773  *
2774  * This is used to temporarily override what we think the set of valid
2775  * indexes is.  The forcing will be valid only until transaction commit
2776  * or abort.
2777  *
2778  * This should only be applied to nailed relations, because in a non-nailed
2779  * relation the hacked index list could be lost at any time due to SI
2780  * messages.  In practice it is only used on pg_class (see REINDEX).
2781  *
2782  * It is up to the caller to make sure the given list is correctly ordered.
2783  */
2784 void
2785 RelationSetIndexList(Relation relation, List *indexIds)
2786 {
2787         MemoryContext oldcxt;
2788
2789         Assert(relation->rd_isnailed);
2790         /* Copy the list into the cache context (could fail for lack of mem) */
2791         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2792         indexIds = list_copy(indexIds);
2793         MemoryContextSwitchTo(oldcxt);
2794         /* Okay to replace old list */
2795         list_free(relation->rd_indexlist);
2796         relation->rd_indexlist = indexIds;
2797         relation->rd_indexvalid = 2;    /* mark list as forced */
2798         /* must flag that we have a forced index list */
2799         need_eosubxact_work = true;
2800 }
2801
2802 /*
2803  * RelationGetIndexExpressions -- get the index expressions for an index
2804  *
2805  * We cache the result of transforming pg_index.indexprs into a node tree.
2806  * If the rel is not an index or has no expressional columns, we return NIL.
2807  * Otherwise, the returned tree is copied into the caller's memory context.
2808  * (We don't want to return a pointer to the relcache copy, since it could
2809  * disappear due to relcache invalidation.)
2810  */
2811 List *
2812 RelationGetIndexExpressions(Relation relation)
2813 {
2814         List       *result;
2815         Datum           exprsDatum;
2816         bool            isnull;
2817         char       *exprsString;
2818         MemoryContext oldcxt;
2819
2820         /* Quick exit if we already computed the result. */
2821         if (relation->rd_indexprs)
2822                 return (List *) copyObject(relation->rd_indexprs);
2823
2824         /* Quick exit if there is nothing to do. */
2825         if (relation->rd_indextuple == NULL ||
2826                 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs))
2827                 return NIL;
2828
2829         /*
2830          * We build the tree we intend to return in the caller's context.
2831          * After successfully completing the work, we copy it into the
2832          * relcache entry.      This avoids problems if we get some sort of error
2833          * partway through.
2834          */
2835         exprsDatum = heap_getattr(relation->rd_indextuple,
2836                                                           Anum_pg_index_indexprs,
2837                                                           GetPgIndexDescriptor(),
2838                                                           &isnull);
2839         Assert(!isnull);
2840         exprsString = DatumGetCString(DirectFunctionCall1(textout, exprsDatum));
2841         result = (List *) stringToNode(exprsString);
2842         pfree(exprsString);
2843
2844         /*
2845          * Run the expressions through eval_const_expressions. This is not just an
2846          * optimization, but is necessary, because the planner will be comparing
2847          * them to similarly-processed qual clauses, and may fail to detect valid
2848          * matches without this.  We don't bother with canonicalize_qual, however.
2849          */
2850         result = (List *) eval_const_expressions((Node *) result);
2851
2852         /*
2853          * Also mark any coercion format fields as "don't care", so that the
2854          * planner can match to both explicit and implicit coercions.
2855          */
2856         set_coercionform_dontcare((Node *) result);
2857
2858         /* May as well fix opfuncids too */
2859         fix_opfuncids((Node *) result);
2860
2861         /* Now save a copy of the completed tree in the relcache entry. */
2862         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2863         relation->rd_indexprs = (List *) copyObject(result);
2864         MemoryContextSwitchTo(oldcxt);
2865
2866         return result;
2867 }
2868
2869 /*
2870  * RelationGetIndexPredicate -- get the index predicate for an index
2871  *
2872  * We cache the result of transforming pg_index.indpred into an implicit-AND
2873  * node tree (suitable for ExecQual).
2874  * If the rel is not an index or has no predicate, we return NIL.
2875  * Otherwise, the returned tree is copied into the caller's memory context.
2876  * (We don't want to return a pointer to the relcache copy, since it could
2877  * disappear due to relcache invalidation.)
2878  */
2879 List *
2880 RelationGetIndexPredicate(Relation relation)
2881 {
2882         List       *result;
2883         Datum           predDatum;
2884         bool            isnull;
2885         char       *predString;
2886         MemoryContext oldcxt;
2887
2888         /* Quick exit if we already computed the result. */
2889         if (relation->rd_indpred)
2890                 return (List *) copyObject(relation->rd_indpred);
2891
2892         /* Quick exit if there is nothing to do. */
2893         if (relation->rd_indextuple == NULL ||
2894                 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred))
2895                 return NIL;
2896
2897         /*
2898          * We build the tree we intend to return in the caller's context.
2899          * After successfully completing the work, we copy it into the
2900          * relcache entry.      This avoids problems if we get some sort of error
2901          * partway through.
2902          */
2903         predDatum = heap_getattr(relation->rd_indextuple,
2904                                                          Anum_pg_index_indpred,
2905                                                          GetPgIndexDescriptor(),
2906                                                          &isnull);
2907         Assert(!isnull);
2908         predString = DatumGetCString(DirectFunctionCall1(textout, predDatum));
2909         result = (List *) stringToNode(predString);
2910         pfree(predString);
2911
2912         /*
2913          * Run the expression through const-simplification and canonicalization.
2914          * This is not just an optimization, but is necessary, because the planner
2915          * will be comparing it to similarly-processed qual clauses, and may fail
2916          * to detect valid matches without this.  This must match the processing
2917          * done to qual clauses in preprocess_expression()!  (We can skip the
2918          * stuff involving subqueries, however, since we don't allow any in
2919          * index predicates.)
2920          */
2921         result = (List *) eval_const_expressions((Node *) result);
2922
2923         result = (List *) canonicalize_qual((Expr *) result);
2924
2925         /*
2926          * Also mark any coercion format fields as "don't care", so that the
2927          * planner can match to both explicit and implicit coercions.
2928          */
2929         set_coercionform_dontcare((Node *) result);
2930
2931         /* Also convert to implicit-AND format */
2932         result = make_ands_implicit((Expr *) result);
2933
2934         /* May as well fix opfuncids too */
2935         fix_opfuncids((Node *) result);
2936
2937         /* Now save a copy of the completed tree in the relcache entry. */
2938         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2939         relation->rd_indpred = (List *) copyObject(result);
2940         MemoryContextSwitchTo(oldcxt);
2941
2942         return result;
2943 }
2944
2945
2946 /*
2947  *      load_relcache_init_file, write_relcache_init_file
2948  *
2949  *              In late 1992, we started regularly having databases with more than
2950  *              a thousand classes in them.  With this number of classes, it became
2951  *              critical to do indexed lookups on the system catalogs.
2952  *
2953  *              Bootstrapping these lookups is very hard.  We want to be able to
2954  *              use an index on pg_attribute, for example, but in order to do so,
2955  *              we must have read pg_attribute for the attributes in the index,
2956  *              which implies that we need to use the index.
2957  *
2958  *              In order to get around the problem, we do the following:
2959  *
2960  *                 +  When the database system is initialized (at initdb time), we
2961  *                        don't use indexes.  We do sequential scans.
2962  *
2963  *                 +  When the backend is started up in normal mode, we load an image
2964  *                        of the appropriate relation descriptors, in internal format,
2965  *                        from an initialization file in the data/base/... directory.
2966  *
2967  *                 +  If the initialization file isn't there, then we create the
2968  *                        relation descriptors using sequential scans and write 'em to
2969  *                        the initialization file for use by subsequent backends.
2970  *
2971  *              We could dispense with the initialization file and just build the
2972  *              critical reldescs the hard way on every backend startup, but that
2973  *              slows down backend startup noticeably.
2974  *
2975  *              We can in fact go further, and save more relcache entries than
2976  *              just the ones that are absolutely critical; this allows us to speed
2977  *              up backend startup by not having to build such entries the hard way.
2978  *              Presently, all the catalog and index entries that are referred to
2979  *              by catcaches are stored in the initialization file.
2980  *
2981  *              The same mechanism that detects when catcache and relcache entries
2982  *              need to be invalidated (due to catalog updates) also arranges to
2983  *              unlink the initialization file when its contents may be out of date.
2984  *              The file will then be rebuilt during the next backend startup.
2985  */
2986
2987 /*
2988  * load_relcache_init_file -- attempt to load cache from the init file
2989  *
2990  * If successful, return TRUE and set criticalRelcachesBuilt to true.
2991  * If not successful, return FALSE and set needNewCacheFile to true.
2992  *
2993  * NOTE: we assume we are already switched into CacheMemoryContext.
2994  */
2995 static bool
2996 load_relcache_init_file(void)
2997 {
2998         FILE       *fp;
2999         char            initfilename[MAXPGPATH];
3000         Relation   *rels;
3001         int                     relno,
3002                                 num_rels,
3003                                 max_rels,
3004                                 nailed_rels,
3005                                 nailed_indexes,
3006                                 magic;
3007         int                     i;
3008
3009         snprintf(initfilename, sizeof(initfilename), "%s/%s",
3010                          DatabasePath, RELCACHE_INIT_FILENAME);
3011
3012         fp = AllocateFile(initfilename, PG_BINARY_R);
3013         if (fp == NULL)
3014         {
3015                 needNewCacheFile = true;
3016                 return false;
3017         }
3018
3019         /*
3020          * Read the index relcache entries from the file.  Note we will not
3021          * enter any of them into the cache if the read fails partway through;
3022          * this helps to guard against broken init files.
3023          */
3024         max_rels = 100;
3025         rels = (Relation *) palloc(max_rels * sizeof(Relation));
3026         num_rels = 0;
3027         nailed_rels = nailed_indexes = 0;
3028         initFileRelationIds = NIL;
3029
3030         /* check for correct magic number (compatible version) */
3031         if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
3032                 goto read_failed;
3033         if (magic != RELCACHE_INIT_FILEMAGIC)
3034                 goto read_failed;
3035
3036         for (relno = 0;; relno++)
3037         {
3038                 Size            len;
3039                 size_t          nread;
3040                 Relation        rel;
3041                 Form_pg_class relform;
3042                 bool            has_not_null;
3043                 Datum           indclassDatum;
3044                 bool            isnull;
3045
3046                 /* first read the relation descriptor length */
3047                 if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3048                 {
3049                         if (nread == 0)
3050                                 break;                  /* end of file */
3051                         goto read_failed;
3052                 }
3053
3054                 /* safety check for incompatible relcache layout */
3055                 if (len != sizeof(RelationData))
3056                         goto read_failed;
3057
3058                 /* allocate another relcache header */
3059                 if (num_rels >= max_rels)
3060                 {
3061                         max_rels *= 2;
3062                         rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
3063                 }
3064
3065                 rel = rels[num_rels++] = (Relation) palloc(len);
3066
3067                 /* then, read the Relation structure */
3068                 if ((nread = fread(rel, 1, len, fp)) != len)
3069                         goto read_failed;
3070
3071                 /* next read the relation tuple form */
3072                 if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3073                         goto read_failed;
3074
3075                 relform = (Form_pg_class) palloc(len);
3076                 if ((nread = fread(relform, 1, len, fp)) != len)
3077                         goto read_failed;
3078
3079                 rel->rd_rel = relform;
3080
3081                 /* initialize attribute tuple forms */
3082                 rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
3083                                                                                           relform->relhasoids);
3084                 rel->rd_att->tdtypeid = relform->reltype;
3085                 rel->rd_att->tdtypmod = -1;             /* unnecessary, but... */
3086
3087                 /* next read all the attribute tuple form data entries */
3088                 has_not_null = false;
3089                 for (i = 0; i < relform->relnatts; i++)
3090                 {
3091                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3092                                 goto read_failed;
3093                         if (len != ATTRIBUTE_TUPLE_SIZE)
3094                                 goto read_failed;
3095                         if ((nread = fread(rel->rd_att->attrs[i], 1, len, fp)) != len)
3096                                 goto read_failed;
3097
3098                         has_not_null |= rel->rd_att->attrs[i]->attnotnull;
3099                 }
3100
3101                 /* mark not-null status */
3102                 if (has_not_null)
3103                 {
3104                         TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3105
3106                         constr->has_not_null = true;
3107                         rel->rd_att->constr = constr;
3108                 }
3109
3110                 /* If it's an index, there's more to do */
3111                 if (rel->rd_rel->relkind == RELKIND_INDEX)
3112                 {
3113                         Form_pg_am      am;
3114                         MemoryContext indexcxt;
3115                         Oid                *operator;
3116                         RegProcedure *support;
3117                         int                     nsupport;
3118
3119                         /* Count nailed indexes to ensure we have 'em all */
3120                         if (rel->rd_isnailed)
3121                                 nailed_indexes++;
3122
3123                         /* next, read the pg_index tuple */
3124                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3125                                 goto read_failed;
3126
3127                         rel->rd_indextuple = (HeapTuple) palloc(len);
3128                         if ((nread = fread(rel->rd_indextuple, 1, len, fp)) != len)
3129                                 goto read_failed;
3130
3131                         /* Fix up internal pointers in the tuple -- see heap_copytuple */
3132                         rel->rd_indextuple->t_datamcxt = CurrentMemoryContext;
3133                         rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
3134                         rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple);
3135
3136                         /* fix up indclass pointer too */
3137                         indclassDatum = fastgetattr(rel->rd_indextuple,
3138                                                                                 Anum_pg_index_indclass,
3139                                                                                 GetPgIndexDescriptor(),
3140                                                                                 &isnull);
3141                         Assert(!isnull);
3142                         rel->rd_indclass = (oidvector *) DatumGetPointer(indclassDatum);
3143
3144                         /* next, read the access method tuple form */
3145                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3146                                 goto read_failed;
3147
3148                         am = (Form_pg_am) palloc(len);
3149                         if ((nread = fread(am, 1, len, fp)) != len)
3150                                 goto read_failed;
3151                         rel->rd_am = am;
3152
3153                         /*
3154                          * prepare index info context --- parameters should match
3155                          * RelationInitIndexAccessInfo
3156                          */
3157                         indexcxt = AllocSetContextCreate(CacheMemoryContext,
3158                                                                                          RelationGetRelationName(rel),
3159                                                                                          ALLOCSET_SMALL_MINSIZE,
3160                                                                                          ALLOCSET_SMALL_INITSIZE,
3161                                                                                          ALLOCSET_SMALL_MAXSIZE);
3162                         rel->rd_indexcxt = indexcxt;
3163
3164                         /* next, read the vector of operator OIDs */
3165                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3166                                 goto read_failed;
3167
3168                         operator = (Oid *) MemoryContextAlloc(indexcxt, len);
3169                         if ((nread = fread(operator, 1, len, fp)) != len)
3170                                 goto read_failed;
3171
3172                         rel->rd_operator = operator;
3173
3174                         /* finally, read the vector of support procedures */
3175                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3176                                 goto read_failed;
3177                         support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
3178                         if ((nread = fread(support, 1, len, fp)) != len)
3179                                 goto read_failed;
3180
3181                         rel->rd_support = support;
3182
3183                         /* add a zeroed support-fmgr-info vector */
3184                         nsupport = relform->relnatts * am->amsupport;
3185                         rel->rd_supportinfo = (FmgrInfo *)
3186                                 MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
3187                 }
3188                 else
3189                 {
3190                         /* Count nailed rels to ensure we have 'em all */
3191                         if (rel->rd_isnailed)
3192                                 nailed_rels++;
3193
3194                         Assert(rel->rd_index == NULL);
3195                         Assert(rel->rd_indextuple == NULL);
3196                         Assert(rel->rd_indclass == NULL);
3197                         Assert(rel->rd_am == NULL);
3198                         Assert(rel->rd_indexcxt == NULL);
3199                         Assert(rel->rd_operator == NULL);
3200                         Assert(rel->rd_support == NULL);
3201                         Assert(rel->rd_supportinfo == NULL);
3202                 }
3203
3204                 /*
3205                  * Rules and triggers are not saved (mainly because the internal
3206                  * format is complex and subject to change).  They must be rebuilt
3207                  * if needed by RelationCacheInitializePhase2.  This is not
3208                  * expected to be a big performance hit since few system catalogs
3209                  * have such.  Ditto for index expressions and predicates.
3210                  */
3211                 rel->rd_rules = NULL;
3212                 rel->rd_rulescxt = NULL;
3213                 rel->trigdesc = NULL;
3214                 rel->rd_indexprs = NIL;
3215                 rel->rd_indpred = NIL;
3216
3217                 /*
3218                  * Reset transient-state fields in the relcache entry
3219                  */
3220                 rel->rd_smgr = NULL;
3221                 rel->rd_targblock = InvalidBlockNumber;
3222                 if (rel->rd_isnailed)
3223                         rel->rd_refcnt = 1;
3224                 else
3225                         rel->rd_refcnt = 0;
3226                 rel->rd_indexvalid = 0;
3227                 rel->rd_indexlist = NIL;
3228                 rel->rd_createSubid = InvalidSubTransactionId;
3229                 MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
3230
3231                 /*
3232                  * Recompute lock and physical addressing info.  This is needed in
3233                  * case the pg_internal.init file was copied from some other
3234                  * database by CREATE DATABASE.
3235                  */
3236                 RelationInitLockInfo(rel);
3237                 RelationInitPhysicalAddr(rel);
3238         }
3239
3240         /*
3241          * We reached the end of the init file without apparent problem. Did
3242          * we get the right number of nailed items?  (This is a useful
3243          * crosscheck in case the set of critical rels or indexes changes.)
3244          */
3245         if (nailed_rels != NUM_CRITICAL_RELS ||
3246                 nailed_indexes != NUM_CRITICAL_INDEXES)
3247                 goto read_failed;
3248
3249         /*
3250          * OK, all appears well.
3251          *
3252          * Now insert all the new relcache entries into the cache.
3253          */
3254         for (relno = 0; relno < num_rels; relno++)
3255         {
3256                 RelationCacheInsert(rels[relno]);
3257                 /* also make a list of their OIDs, for RelationIdIsInInitFile */
3258                 initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
3259                                                                                 initFileRelationIds);
3260         }
3261
3262         pfree(rels);
3263         FreeFile(fp);
3264
3265         criticalRelcachesBuilt = true;
3266         return true;
3267
3268         /*
3269          * init file is broken, so do it the hard way.  We don't bother trying
3270          * to free the clutter we just allocated; it's not in the relcache so
3271          * it won't hurt.
3272          */
3273 read_failed:
3274         pfree(rels);
3275         FreeFile(fp);
3276
3277         needNewCacheFile = true;
3278         return false;
3279 }
3280
3281 /*
3282  * Write out a new initialization file with the current contents
3283  * of the relcache.
3284  */
3285 static void
3286 write_relcache_init_file(void)
3287 {
3288         FILE       *fp;
3289         char            tempfilename[MAXPGPATH];
3290         char            finalfilename[MAXPGPATH];
3291         int                     magic;
3292         HASH_SEQ_STATUS status;
3293         RelIdCacheEnt *idhentry;
3294         MemoryContext oldcxt;
3295         int                     i;
3296
3297         /*
3298          * We must write a temporary file and rename it into place. Otherwise,
3299          * another backend starting at about the same time might crash trying
3300          * to read the partially-complete file.
3301          */
3302         snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
3303                          DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
3304         snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
3305                          DatabasePath, RELCACHE_INIT_FILENAME);
3306
3307         unlink(tempfilename);           /* in case it exists w/wrong permissions */
3308
3309         fp = AllocateFile(tempfilename, PG_BINARY_W);
3310         if (fp == NULL)
3311         {
3312                 /*
3313                  * We used to consider this a fatal error, but we might as well
3314                  * continue with backend startup ...
3315                  */
3316                 ereport(WARNING,
3317                                 (errcode_for_file_access(),
3318                                  errmsg("could not create relation-cache initialization file \"%s\": %m",
3319                                                 tempfilename),
3320                   errdetail("Continuing anyway, but there's something wrong.")));
3321                 return;
3322         }
3323
3324         /*
3325          * Write a magic number to serve as a file version identifier.  We can
3326          * change the magic number whenever the relcache layout changes.
3327          */
3328         magic = RELCACHE_INIT_FILEMAGIC;
3329         if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
3330                 elog(FATAL, "could not write init file");
3331
3332         /*
3333          * Write all the reldescs (in no particular order).
3334          */
3335         hash_seq_init(&status, RelationIdCache);
3336
3337         initFileRelationIds = NIL;
3338
3339         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3340         {
3341                 Relation        rel = idhentry->reldesc;
3342                 Form_pg_class relform = rel->rd_rel;
3343                 Size            len;
3344
3345                 /*
3346                  * first write the relcache entry proper
3347                  */
3348                 len = sizeof(RelationData);
3349
3350                 /* first, write the relation descriptor length */
3351                 if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3352                         elog(FATAL, "could not write init file");
3353
3354                 /* next, write out the Relation structure */
3355                 if (fwrite(rel, 1, len, fp) != len)
3356                         elog(FATAL, "could not write init file");
3357
3358                 /* next write the relation tuple form */
3359                 len = sizeof(FormData_pg_class);
3360                 if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3361                         elog(FATAL, "could not write init file");
3362
3363                 if (fwrite(relform, 1, len, fp) != len)
3364                         elog(FATAL, "could not write init file");
3365
3366                 /* next, do all the attribute tuple form data entries */
3367                 for (i = 0; i < relform->relnatts; i++)
3368                 {
3369                         len = ATTRIBUTE_TUPLE_SIZE;
3370                         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3371                                 elog(FATAL, "could not write init file");
3372                         if (fwrite(rel->rd_att->attrs[i], 1, len, fp) != len)
3373                                 elog(FATAL, "could not write init file");
3374                 }
3375
3376                 /* If it's an index, there's more to do */
3377                 if (rel->rd_rel->relkind == RELKIND_INDEX)
3378                 {
3379                         Form_pg_am      am = rel->rd_am;
3380
3381                         /* write the pg_index tuple */
3382                         /* we assume this was created by heap_copytuple! */
3383                         len = HEAPTUPLESIZE + rel->rd_indextuple->t_len;
3384                         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3385                                 elog(FATAL, "could not write init file");
3386
3387                         if (fwrite(rel->rd_indextuple, 1, len, fp) != len)
3388                                 elog(FATAL, "could not write init file");
3389
3390                         /* next, write the access method tuple form */
3391                         len = sizeof(FormData_pg_am);
3392                         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3393                                 elog(FATAL, "could not write init file");
3394
3395                         if (fwrite(am, 1, len, fp) != len)
3396                                 elog(FATAL, "could not write init file");
3397
3398                         /* next, write the vector of operator OIDs */
3399                         len = relform->relnatts * (am->amstrategies * sizeof(Oid));
3400                         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3401                                 elog(FATAL, "could not write init file");
3402
3403                         if (fwrite(rel->rd_operator, 1, len, fp) != len)
3404                                 elog(FATAL, "could not write init file");
3405
3406                         /* finally, write the vector of support procedures */
3407                         len = relform->relnatts * (am->amsupport * sizeof(RegProcedure));
3408                         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3409                                 elog(FATAL, "could not write init file");
3410
3411                         if (fwrite(rel->rd_support, 1, len, fp) != len)
3412                                 elog(FATAL, "could not write init file");
3413                 }
3414
3415                 /* also make a list of their OIDs, for RelationIdIsInInitFile */
3416                 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3417                 initFileRelationIds = lcons_oid(RelationGetRelid(rel),
3418                                                                                 initFileRelationIds);
3419                 MemoryContextSwitchTo(oldcxt);
3420         }
3421
3422         if (FreeFile(fp))
3423                 elog(FATAL, "could not write init file");
3424
3425         /*
3426          * Now we have to check whether the data we've so painstakingly
3427          * accumulated is already obsolete due to someone else's
3428          * just-committed catalog changes.      If so, we just delete the temp
3429          * file and leave it to the next backend to try again.  (Our own
3430          * relcache entries will be updated by SI message processing, but we
3431          * can't be sure whether what we wrote out was up-to-date.)
3432          *
3433          * This mustn't run concurrently with RelationCacheInitFileInvalidate, so
3434          * grab a serialization lock for the duration.
3435          */
3436         LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
3437
3438         /* Make sure we have seen all incoming SI messages */
3439         AcceptInvalidationMessages();
3440
3441         /*
3442          * If we have received any SI relcache invals since backend start,
3443          * assume we may have written out-of-date data.
3444          */
3445         if (relcacheInvalsReceived == 0L)
3446         {
3447                 /*
3448                  * OK, rename the temp file to its final name, deleting any
3449                  * previously-existing init file.
3450                  *
3451                  * Note: a failure here is possible under Cygwin, if some other
3452                  * backend is holding open an unlinked-but-not-yet-gone init file.
3453                  * So treat this as a noncritical failure; just remove the useless
3454                  * temp file on failure.
3455                  */
3456                 if (rename(tempfilename, finalfilename) < 0)
3457                         unlink(tempfilename);
3458         }
3459         else
3460         {
3461                 /* Delete the already-obsolete temp file */
3462                 unlink(tempfilename);
3463         }
3464
3465         LWLockRelease(RelCacheInitLock);
3466 }
3467
3468 /*
3469  * Detect whether a given relation (identified by OID) is one of the ones
3470  * we store in the init file.
3471  *
3472  * Note that we effectively assume that all backends running in a database
3473  * would choose to store the same set of relations in the init file;
3474  * otherwise there are cases where we'd fail to detect the need for an init
3475  * file invalidation.  This does not seem likely to be a problem in practice.
3476  */
3477 bool
3478 RelationIdIsInInitFile(Oid relationId)
3479 {
3480         return list_member_oid(initFileRelationIds, relationId);
3481 }
3482
3483 /*
3484  * Invalidate (remove) the init file during commit of a transaction that
3485  * changed one or more of the relation cache entries that are kept in the
3486  * init file.
3487  *
3488  * We actually need to remove the init file twice: once just before sending
3489  * the SI messages that include relcache inval for such relations, and once
3490  * just after sending them.  The unlink before ensures that a backend that's
3491  * currently starting cannot read the now-obsolete init file and then miss
3492  * the SI messages that will force it to update its relcache entries.  (This
3493  * works because the backend startup sequence gets into the PROC array before
3494  * trying to load the init file.)  The unlink after is to synchronize with a
3495  * backend that may currently be trying to write an init file based on data
3496  * that we've just rendered invalid.  Such a backend will see the SI messages,
3497  * but we can't leave the init file sitting around to fool later backends.
3498  *
3499  * Ignore any failure to unlink the file, since it might not be there if
3500  * no backend has been started since the last removal.
3501  */
3502 void
3503 RelationCacheInitFileInvalidate(bool beforeSend)
3504 {
3505         char            initfilename[MAXPGPATH];
3506
3507         snprintf(initfilename, sizeof(initfilename), "%s/%s",
3508                          DatabasePath, RELCACHE_INIT_FILENAME);
3509
3510         if (beforeSend)
3511         {
3512                 /* no interlock needed here */
3513                 unlink(initfilename);
3514         }
3515         else
3516         {
3517                 /*
3518                  * We need to interlock this against write_relcache_init_file, to
3519                  * guard against possibility that someone renames a new-but-
3520                  * already-obsolete init file into place just after we unlink.
3521                  * With the interlock, it's certain that write_relcache_init_file
3522                  * will notice our SI inval message before renaming into place, or
3523                  * else that we will execute second and successfully unlink the
3524                  * file.
3525                  */
3526                 LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
3527                 unlink(initfilename);
3528                 LWLockRelease(RelCacheInitLock);
3529         }
3530 }