]> granicus.if.org Git - postgresql/blob - src/backend/catalog/index.c
Revert no-op changes to BufferGetPage()
[postgresql] / src / backend / catalog / index.c
1 /*-------------------------------------------------------------------------
2  *
3  * index.c
4  *        code to create and destroy POSTGRES index relations
5  *
6  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/catalog/index.c
12  *
13  *
14  * INTERFACE ROUTINES
15  *              index_create()                  - Create a cataloged index relation
16  *              index_drop()                    - Removes index relation from catalogs
17  *              BuildIndexInfo()                - Prepare to insert index tuples
18  *              FormIndexDatum()                - Construct datum vector for one index tuple
19  *
20  *-------------------------------------------------------------------------
21  */
22 #include "postgres.h"
23
24 #include <unistd.h>
25
26 #include "access/amapi.h"
27 #include "access/multixact.h"
28 #include "access/relscan.h"
29 #include "access/sysattr.h"
30 #include "access/transam.h"
31 #include "access/visibilitymap.h"
32 #include "access/xact.h"
33 #include "bootstrap/bootstrap.h"
34 #include "catalog/binary_upgrade.h"
35 #include "catalog/catalog.h"
36 #include "catalog/dependency.h"
37 #include "catalog/heap.h"
38 #include "catalog/index.h"
39 #include "catalog/objectaccess.h"
40 #include "catalog/pg_am.h"
41 #include "catalog/pg_collation.h"
42 #include "catalog/pg_constraint.h"
43 #include "catalog/pg_constraint_fn.h"
44 #include "catalog/pg_operator.h"
45 #include "catalog/pg_opclass.h"
46 #include "catalog/pg_tablespace.h"
47 #include "catalog/pg_trigger.h"
48 #include "catalog/pg_type.h"
49 #include "catalog/storage.h"
50 #include "commands/tablecmds.h"
51 #include "commands/trigger.h"
52 #include "executor/executor.h"
53 #include "miscadmin.h"
54 #include "nodes/makefuncs.h"
55 #include "nodes/nodeFuncs.h"
56 #include "optimizer/clauses.h"
57 #include "parser/parser.h"
58 #include "storage/bufmgr.h"
59 #include "storage/lmgr.h"
60 #include "storage/predicate.h"
61 #include "storage/procarray.h"
62 #include "storage/smgr.h"
63 #include "utils/builtins.h"
64 #include "utils/fmgroids.h"
65 #include "utils/guc.h"
66 #include "utils/inval.h"
67 #include "utils/lsyscache.h"
68 #include "utils/memutils.h"
69 #include "utils/pg_rusage.h"
70 #include "utils/syscache.h"
71 #include "utils/tuplesort.h"
72 #include "utils/snapmgr.h"
73 #include "utils/tqual.h"
74
75
76 /* Potentially set by pg_upgrade_support functions */
77 Oid                     binary_upgrade_next_index_pg_class_oid = InvalidOid;
78
79 /* state info for validate_index bulkdelete callback */
80 typedef struct
81 {
82         Tuplesortstate *tuplesort;      /* for sorting the index TIDs */
83         /* statistics (for debug purposes only): */
84         double          htups,
85                                 itups,
86                                 tups_inserted;
87 } v_i_state;
88
89 /* non-export function prototypes */
90 static bool relationHasPrimaryKey(Relation rel);
91 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
92                                                  IndexInfo *indexInfo,
93                                                  List *indexColNames,
94                                                  Oid accessMethodObjectId,
95                                                  Oid *collationObjectId,
96                                                  Oid *classObjectId);
97 static void InitializeAttributeOids(Relation indexRelation,
98                                                 int numatts, Oid indexoid);
99 static void AppendAttributeTuples(Relation indexRelation, int numatts);
100 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
101                                         IndexInfo *indexInfo,
102                                         Oid *collationOids,
103                                         Oid *classOids,
104                                         int16 *coloptions,
105                                         bool primary,
106                                         bool isexclusion,
107                                         bool immediate,
108                                         bool isvalid);
109 static void index_update_stats(Relation rel,
110                                    bool hasindex, bool isprimary,
111                                    double reltuples);
112 static void IndexCheckExclusion(Relation heapRelation,
113                                         Relation indexRelation,
114                                         IndexInfo *indexInfo);
115 static inline int64 itemptr_encode(ItemPointer itemptr);
116 static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
117 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
118 static void validate_index_heapscan(Relation heapRelation,
119                                                 Relation indexRelation,
120                                                 IndexInfo *indexInfo,
121                                                 Snapshot snapshot,
122                                                 v_i_state *state);
123 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
124 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
125 static void ResetReindexProcessing(void);
126 static void SetReindexPending(List *indexes);
127 static void RemoveReindexPending(Oid indexOid);
128 static void ResetReindexPending(void);
129
130
131 /*
132  * relationHasPrimaryKey
133  *              See whether an existing relation has a primary key.
134  *
135  * Caller must have suitable lock on the relation.
136  *
137  * Note: we intentionally do not check IndexIsValid here; that's because this
138  * is used to enforce the rule that there can be only one indisprimary index,
139  * and we want that to be true even if said index is invalid.
140  */
141 static bool
142 relationHasPrimaryKey(Relation rel)
143 {
144         bool            result = false;
145         List       *indexoidlist;
146         ListCell   *indexoidscan;
147
148         /*
149          * Get the list of index OIDs for the table from the relcache, and look up
150          * each one in the pg_index syscache until we find one marked primary key
151          * (hopefully there isn't more than one such).
152          */
153         indexoidlist = RelationGetIndexList(rel);
154
155         foreach(indexoidscan, indexoidlist)
156         {
157                 Oid                     indexoid = lfirst_oid(indexoidscan);
158                 HeapTuple       indexTuple;
159
160                 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
161                 if (!HeapTupleIsValid(indexTuple))              /* should not happen */
162                         elog(ERROR, "cache lookup failed for index %u", indexoid);
163                 result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
164                 ReleaseSysCache(indexTuple);
165                 if (result)
166                         break;
167         }
168
169         list_free(indexoidlist);
170
171         return result;
172 }
173
174 /*
175  * index_check_primary_key
176  *              Apply special checks needed before creating a PRIMARY KEY index
177  *
178  * This processing used to be in DefineIndex(), but has been split out
179  * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
180  *
181  * We check for a pre-existing primary key, and that all columns of the index
182  * are simple column references (not expressions), and that all those
183  * columns are marked NOT NULL.  If they aren't (which can only happen during
184  * ALTER TABLE ADD CONSTRAINT, since the parser forces such columns to be
185  * created NOT NULL during CREATE TABLE), do an ALTER SET NOT NULL to mark
186  * them so --- or fail if they are not in fact nonnull.
187  *
188  * Caller had better have at least ShareLock on the table, else the not-null
189  * checking isn't trustworthy.
190  */
191 void
192 index_check_primary_key(Relation heapRel,
193                                                 IndexInfo *indexInfo,
194                                                 bool is_alter_table)
195 {
196         List       *cmds;
197         int                     i;
198
199         /*
200          * If ALTER TABLE, check that there isn't already a PRIMARY KEY. In CREATE
201          * TABLE, we have faith that the parser rejected multiple pkey clauses;
202          * and CREATE INDEX doesn't have a way to say PRIMARY KEY, so it's no
203          * problem either.
204          */
205         if (is_alter_table &&
206                 relationHasPrimaryKey(heapRel))
207         {
208                 ereport(ERROR,
209                                 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
210                          errmsg("multiple primary keys for table \"%s\" are not allowed",
211                                         RelationGetRelationName(heapRel))));
212         }
213
214         /*
215          * Check that all of the attributes in a primary key are marked as not
216          * null, otherwise attempt to ALTER TABLE .. SET NOT NULL
217          */
218         cmds = NIL;
219         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
220         {
221                 AttrNumber      attnum = indexInfo->ii_KeyAttrNumbers[i];
222                 HeapTuple       atttuple;
223                 Form_pg_attribute attform;
224
225                 if (attnum == 0)
226                         ereport(ERROR,
227                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
228                                          errmsg("primary keys cannot be expressions")));
229
230                 /* System attributes are never null, so no need to check */
231                 if (attnum < 0)
232                         continue;
233
234                 atttuple = SearchSysCache2(ATTNUM,
235                                                                  ObjectIdGetDatum(RelationGetRelid(heapRel)),
236                                                                    Int16GetDatum(attnum));
237                 if (!HeapTupleIsValid(atttuple))
238                         elog(ERROR, "cache lookup failed for attribute %d of relation %u",
239                                  attnum, RelationGetRelid(heapRel));
240                 attform = (Form_pg_attribute) GETSTRUCT(atttuple);
241
242                 if (!attform->attnotnull)
243                 {
244                         /* Add a subcommand to make this one NOT NULL */
245                         AlterTableCmd *cmd = makeNode(AlterTableCmd);
246
247                         cmd->subtype = AT_SetNotNull;
248                         cmd->name = pstrdup(NameStr(attform->attname));
249                         cmds = lappend(cmds, cmd);
250                 }
251
252                 ReleaseSysCache(atttuple);
253         }
254
255         /*
256          * XXX: Shouldn't the ALTER TABLE .. SET NOT NULL cascade to child tables?
257          * Currently, since the PRIMARY KEY itself doesn't cascade, we don't
258          * cascade the notnull constraint(s) either; but this is pretty debatable.
259          *
260          * XXX: possible future improvement: when being called from ALTER TABLE,
261          * it would be more efficient to merge this with the outer ALTER TABLE, so
262          * as to avoid two scans.  But that seems to complicate DefineIndex's API
263          * unduly.
264          */
265         if (cmds)
266                 AlterTableInternal(RelationGetRelid(heapRel), cmds, false);
267 }
268
269 /*
270  *              ConstructTupleDescriptor
271  *
272  * Build an index tuple descriptor for a new index
273  */
274 static TupleDesc
275 ConstructTupleDescriptor(Relation heapRelation,
276                                                  IndexInfo *indexInfo,
277                                                  List *indexColNames,
278                                                  Oid accessMethodObjectId,
279                                                  Oid *collationObjectId,
280                                                  Oid *classObjectId)
281 {
282         int                     numatts = indexInfo->ii_NumIndexAttrs;
283         ListCell   *colnames_item = list_head(indexColNames);
284         ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
285         IndexAmRoutine *amroutine;
286         TupleDesc       heapTupDesc;
287         TupleDesc       indexTupDesc;
288         int                     natts;                  /* #atts in heap rel --- for error checks */
289         int                     i;
290
291         /* We need access to the index AM's API struct */
292         amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId);
293
294         /* ... and to the table's tuple descriptor */
295         heapTupDesc = RelationGetDescr(heapRelation);
296         natts = RelationGetForm(heapRelation)->relnatts;
297
298         /*
299          * allocate the new tuple descriptor
300          */
301         indexTupDesc = CreateTemplateTupleDesc(numatts, false);
302
303         /*
304          * For simple index columns, we copy the pg_attribute row from the parent
305          * relation and modify it as necessary.  For expressions we have to cons
306          * up a pg_attribute row the hard way.
307          */
308         for (i = 0; i < numatts; i++)
309         {
310                 AttrNumber      atnum = indexInfo->ii_KeyAttrNumbers[i];
311                 Form_pg_attribute to = indexTupDesc->attrs[i];
312                 HeapTuple       tuple;
313                 Form_pg_type typeTup;
314                 Form_pg_opclass opclassTup;
315                 Oid                     keyType;
316
317                 if (atnum != 0)
318                 {
319                         /* Simple index column */
320                         Form_pg_attribute from;
321
322                         if (atnum < 0)
323                         {
324                                 /*
325                                  * here we are indexing on a system attribute (-1...-n)
326                                  */
327                                 from = SystemAttributeDefinition(atnum,
328                                                                                    heapRelation->rd_rel->relhasoids);
329                         }
330                         else
331                         {
332                                 /*
333                                  * here we are indexing on a normal attribute (1...n)
334                                  */
335                                 if (atnum > natts)              /* safety check */
336                                         elog(ERROR, "invalid column number %d", atnum);
337                                 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
338                         }
339
340                         /*
341                          * now that we've determined the "from", let's copy the tuple desc
342                          * data...
343                          */
344                         memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
345
346                         /*
347                          * Fix the stuff that should not be the same as the underlying
348                          * attr
349                          */
350                         to->attnum = i + 1;
351
352                         to->attstattarget = -1;
353                         to->attcacheoff = -1;
354                         to->attnotnull = false;
355                         to->atthasdef = false;
356                         to->attislocal = true;
357                         to->attinhcount = 0;
358                         to->attcollation = collationObjectId[i];
359                 }
360                 else
361                 {
362                         /* Expressional index */
363                         Node       *indexkey;
364
365                         MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
366
367                         if (indexpr_item == NULL)       /* shouldn't happen */
368                                 elog(ERROR, "too few entries in indexprs list");
369                         indexkey = (Node *) lfirst(indexpr_item);
370                         indexpr_item = lnext(indexpr_item);
371
372                         /*
373                          * Lookup the expression type in pg_type for the type length etc.
374                          */
375                         keyType = exprType(indexkey);
376                         tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
377                         if (!HeapTupleIsValid(tuple))
378                                 elog(ERROR, "cache lookup failed for type %u", keyType);
379                         typeTup = (Form_pg_type) GETSTRUCT(tuple);
380
381                         /*
382                          * Assign some of the attributes values. Leave the rest as 0.
383                          */
384                         to->attnum = i + 1;
385                         to->atttypid = keyType;
386                         to->attlen = typeTup->typlen;
387                         to->attbyval = typeTup->typbyval;
388                         to->attstorage = typeTup->typstorage;
389                         to->attalign = typeTup->typalign;
390                         to->attstattarget = -1;
391                         to->attcacheoff = -1;
392                         to->atttypmod = exprTypmod(indexkey);
393                         to->attislocal = true;
394                         to->attcollation = collationObjectId[i];
395
396                         ReleaseSysCache(tuple);
397
398                         /*
399                          * Make sure the expression yields a type that's safe to store in
400                          * an index.  We need this defense because we have index opclasses
401                          * for pseudo-types such as "record", and the actually stored type
402                          * had better be safe; eg, a named composite type is okay, an
403                          * anonymous record type is not.  The test is the same as for
404                          * whether a table column is of a safe type (which is why we
405                          * needn't check for the non-expression case).
406                          */
407                         CheckAttributeType(NameStr(to->attname),
408                                                            to->atttypid, to->attcollation,
409                                                            NIL, false);
410                 }
411
412                 /*
413                  * We do not yet have the correct relation OID for the index, so just
414                  * set it invalid for now.  InitializeAttributeOids() will fix it
415                  * later.
416                  */
417                 to->attrelid = InvalidOid;
418
419                 /*
420                  * Set the attribute name as specified by caller.
421                  */
422                 if (colnames_item == NULL)              /* shouldn't happen */
423                         elog(ERROR, "too few entries in colnames list");
424                 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
425                 colnames_item = lnext(colnames_item);
426
427                 /*
428                  * Check the opclass and index AM to see if either provides a keytype
429                  * (overriding the attribute type).  Opclass takes precedence.
430                  */
431                 tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
432                 if (!HeapTupleIsValid(tuple))
433                         elog(ERROR, "cache lookup failed for opclass %u",
434                                  classObjectId[i]);
435                 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
436                 if (OidIsValid(opclassTup->opckeytype))
437                         keyType = opclassTup->opckeytype;
438                 else
439                         keyType = amroutine->amkeytype;
440                 ReleaseSysCache(tuple);
441
442                 if (OidIsValid(keyType) && keyType != to->atttypid)
443                 {
444                         /* index value and heap value have different types */
445                         tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
446                         if (!HeapTupleIsValid(tuple))
447                                 elog(ERROR, "cache lookup failed for type %u", keyType);
448                         typeTup = (Form_pg_type) GETSTRUCT(tuple);
449
450                         to->atttypid = keyType;
451                         to->atttypmod = -1;
452                         to->attlen = typeTup->typlen;
453                         to->attbyval = typeTup->typbyval;
454                         to->attalign = typeTup->typalign;
455                         to->attstorage = typeTup->typstorage;
456
457                         ReleaseSysCache(tuple);
458                 }
459         }
460
461         pfree(amroutine);
462
463         return indexTupDesc;
464 }
465
466 /* ----------------------------------------------------------------
467  *              InitializeAttributeOids
468  * ----------------------------------------------------------------
469  */
470 static void
471 InitializeAttributeOids(Relation indexRelation,
472                                                 int numatts,
473                                                 Oid indexoid)
474 {
475         TupleDesc       tupleDescriptor;
476         int                     i;
477
478         tupleDescriptor = RelationGetDescr(indexRelation);
479
480         for (i = 0; i < numatts; i += 1)
481                 tupleDescriptor->attrs[i]->attrelid = indexoid;
482 }
483
484 /* ----------------------------------------------------------------
485  *              AppendAttributeTuples
486  * ----------------------------------------------------------------
487  */
488 static void
489 AppendAttributeTuples(Relation indexRelation, int numatts)
490 {
491         Relation        pg_attribute;
492         CatalogIndexState indstate;
493         TupleDesc       indexTupDesc;
494         int                     i;
495
496         /*
497          * open the attribute relation and its indexes
498          */
499         pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
500
501         indstate = CatalogOpenIndexes(pg_attribute);
502
503         /*
504          * insert data from new index's tupdesc into pg_attribute
505          */
506         indexTupDesc = RelationGetDescr(indexRelation);
507
508         for (i = 0; i < numatts; i++)
509         {
510                 /*
511                  * There used to be very grotty code here to set these fields, but I
512                  * think it's unnecessary.  They should be set already.
513                  */
514                 Assert(indexTupDesc->attrs[i]->attnum == i + 1);
515                 Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
516
517                 InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
518         }
519
520         CatalogCloseIndexes(indstate);
521
522         heap_close(pg_attribute, RowExclusiveLock);
523 }
524
525 /* ----------------------------------------------------------------
526  *              UpdateIndexRelation
527  *
528  * Construct and insert a new entry in the pg_index catalog
529  * ----------------------------------------------------------------
530  */
531 static void
532 UpdateIndexRelation(Oid indexoid,
533                                         Oid heapoid,
534                                         IndexInfo *indexInfo,
535                                         Oid *collationOids,
536                                         Oid *classOids,
537                                         int16 *coloptions,
538                                         bool primary,
539                                         bool isexclusion,
540                                         bool immediate,
541                                         bool isvalid)
542 {
543         int2vector *indkey;
544         oidvector  *indcollation;
545         oidvector  *indclass;
546         int2vector *indoption;
547         Datum           exprsDatum;
548         Datum           predDatum;
549         Datum           values[Natts_pg_index];
550         bool            nulls[Natts_pg_index];
551         Relation        pg_index;
552         HeapTuple       tuple;
553         int                     i;
554
555         /*
556          * Copy the index key, opclass, and indoption info into arrays (should we
557          * make the caller pass them like this to start with?)
558          */
559         indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
560         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
561                 indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
562         indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexAttrs);
563         indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
564         indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
565
566         /*
567          * Convert the index expressions (if any) to a text datum
568          */
569         if (indexInfo->ii_Expressions != NIL)
570         {
571                 char       *exprsString;
572
573                 exprsString = nodeToString(indexInfo->ii_Expressions);
574                 exprsDatum = CStringGetTextDatum(exprsString);
575                 pfree(exprsString);
576         }
577         else
578                 exprsDatum = (Datum) 0;
579
580         /*
581          * Convert the index predicate (if any) to a text datum.  Note we convert
582          * implicit-AND format to normal explicit-AND for storage.
583          */
584         if (indexInfo->ii_Predicate != NIL)
585         {
586                 char       *predString;
587
588                 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
589                 predDatum = CStringGetTextDatum(predString);
590                 pfree(predString);
591         }
592         else
593                 predDatum = (Datum) 0;
594
595         /*
596          * open the system catalog index relation
597          */
598         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
599
600         /*
601          * Build a pg_index tuple
602          */
603         MemSet(nulls, false, sizeof(nulls));
604
605         values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
606         values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
607         values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
608         values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
609         values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
610         values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
611         values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
612         values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
613         values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
614         values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
615         /* we set isvalid and isready the same way */
616         values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
617         values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
618         values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
619         values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
620         values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
621         values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
622         values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
623         values[Anum_pg_index_indexprs - 1] = exprsDatum;
624         if (exprsDatum == (Datum) 0)
625                 nulls[Anum_pg_index_indexprs - 1] = true;
626         values[Anum_pg_index_indpred - 1] = predDatum;
627         if (predDatum == (Datum) 0)
628                 nulls[Anum_pg_index_indpred - 1] = true;
629
630         tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
631
632         /*
633          * insert the tuple into the pg_index catalog
634          */
635         simple_heap_insert(pg_index, tuple);
636
637         /* update the indexes on pg_index */
638         CatalogUpdateIndexes(pg_index, tuple);
639
640         /*
641          * close the relation and free the tuple
642          */
643         heap_close(pg_index, RowExclusiveLock);
644         heap_freetuple(tuple);
645 }
646
647
648 /*
649  * index_create
650  *
651  * heapRelation: table to build index on (suitably locked by caller)
652  * indexRelationName: what it say
653  * indexRelationId: normally, pass InvalidOid to let this routine
654  *              generate an OID for the index.  During bootstrap this may be
655  *              nonzero to specify a preselected OID.
656  * relFileNode: normally, pass InvalidOid to get new storage.  May be
657  *              nonzero to attach an existing valid build.
658  * indexInfo: same info executor uses to insert into the index
659  * indexColNames: column names to use for index (List of char *)
660  * accessMethodObjectId: OID of index AM to use
661  * tableSpaceId: OID of tablespace to use
662  * collationObjectId: array of collation OIDs, one per index column
663  * classObjectId: array of index opclass OIDs, one per index column
664  * coloptions: array of per-index-column indoption settings
665  * reloptions: AM-specific options
666  * isprimary: index is a PRIMARY KEY
667  * isconstraint: index is owned by PRIMARY KEY, UNIQUE, or EXCLUSION constraint
668  * deferrable: constraint is DEFERRABLE
669  * initdeferred: constraint is INITIALLY DEFERRED
670  * allow_system_table_mods: allow table to be a system catalog
671  * skip_build: true to skip the index_build() step for the moment; caller
672  *              must do it later (typically via reindex_index())
673  * concurrent: if true, do not lock the table against writers.  The index
674  *              will be marked "invalid" and the caller must take additional steps
675  *              to fix it up.
676  * is_internal: if true, post creation hook for new index
677  * if_not_exists: if true, do not throw an error if a relation with
678  *              the same name already exists.
679  *
680  * Returns the OID of the created index.
681  */
682 Oid
683 index_create(Relation heapRelation,
684                          const char *indexRelationName,
685                          Oid indexRelationId,
686                          Oid relFileNode,
687                          IndexInfo *indexInfo,
688                          List *indexColNames,
689                          Oid accessMethodObjectId,
690                          Oid tableSpaceId,
691                          Oid *collationObjectId,
692                          Oid *classObjectId,
693                          int16 *coloptions,
694                          Datum reloptions,
695                          bool isprimary,
696                          bool isconstraint,
697                          bool deferrable,
698                          bool initdeferred,
699                          bool allow_system_table_mods,
700                          bool skip_build,
701                          bool concurrent,
702                          bool is_internal,
703                          bool if_not_exists)
704 {
705         Oid                     heapRelationId = RelationGetRelid(heapRelation);
706         Relation        pg_class;
707         Relation        indexRelation;
708         TupleDesc       indexTupDesc;
709         bool            shared_relation;
710         bool            mapped_relation;
711         bool            is_exclusion;
712         Oid                     namespaceId;
713         int                     i;
714         char            relpersistence;
715
716         is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
717
718         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
719
720         /*
721          * The index will be in the same namespace as its parent table, and is
722          * shared across databases if and only if the parent is.  Likewise, it
723          * will use the relfilenode map if and only if the parent does; and it
724          * inherits the parent's relpersistence.
725          */
726         namespaceId = RelationGetNamespace(heapRelation);
727         shared_relation = heapRelation->rd_rel->relisshared;
728         mapped_relation = RelationIsMapped(heapRelation);
729         relpersistence = heapRelation->rd_rel->relpersistence;
730
731         /*
732          * check parameters
733          */
734         if (indexInfo->ii_NumIndexAttrs < 1)
735                 elog(ERROR, "must index at least one column");
736
737         if (!allow_system_table_mods &&
738                 IsSystemRelation(heapRelation) &&
739                 IsNormalProcessingMode())
740                 ereport(ERROR,
741                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
742                                  errmsg("user-defined indexes on system catalog tables are not supported")));
743
744         /*
745          * concurrent index build on a system catalog is unsafe because we tend to
746          * release locks before committing in catalogs
747          */
748         if (concurrent &&
749                 IsSystemRelation(heapRelation))
750                 ereport(ERROR,
751                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
752                                  errmsg("concurrent index creation on system catalog tables is not supported")));
753
754         /*
755          * This case is currently not supported, but there's no way to ask for it
756          * in the grammar anyway, so it can't happen.
757          */
758         if (concurrent && is_exclusion)
759                 ereport(ERROR,
760                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
761                                  errmsg_internal("concurrent index creation for exclusion constraints is not supported")));
762
763         /*
764          * We cannot allow indexing a shared relation after initdb (because
765          * there's no way to make the entry in other databases' pg_class).
766          */
767         if (shared_relation && !IsBootstrapProcessingMode())
768                 ereport(ERROR,
769                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
770                                  errmsg("shared indexes cannot be created after initdb")));
771
772         /*
773          * Shared relations must be in pg_global, too (last-ditch check)
774          */
775         if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
776                 elog(ERROR, "shared relations must be placed in pg_global tablespace");
777
778         if (get_relname_relid(indexRelationName, namespaceId))
779         {
780                 if (if_not_exists)
781                 {
782                         ereport(NOTICE,
783                                         (errcode(ERRCODE_DUPLICATE_TABLE),
784                                          errmsg("relation \"%s\" already exists, skipping",
785                                                         indexRelationName)));
786                         heap_close(pg_class, RowExclusiveLock);
787                         return InvalidOid;
788                 }
789
790                 ereport(ERROR,
791                                 (errcode(ERRCODE_DUPLICATE_TABLE),
792                                  errmsg("relation \"%s\" already exists",
793                                                 indexRelationName)));
794         }
795
796         /*
797          * construct tuple descriptor for index tuples
798          */
799         indexTupDesc = ConstructTupleDescriptor(heapRelation,
800                                                                                         indexInfo,
801                                                                                         indexColNames,
802                                                                                         accessMethodObjectId,
803                                                                                         collationObjectId,
804                                                                                         classObjectId);
805
806         /*
807          * Allocate an OID for the index, unless we were told what to use.
808          *
809          * The OID will be the relfilenode as well, so make sure it doesn't
810          * collide with either pg_class OIDs or existing physical files.
811          */
812         if (!OidIsValid(indexRelationId))
813         {
814                 /* Use binary-upgrade override for pg_class.oid/relfilenode? */
815                 if (IsBinaryUpgrade)
816                 {
817                         if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
818                                 ereport(ERROR,
819                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
820                                                  errmsg("pg_class index OID value not set when in binary upgrade mode")));
821
822                         indexRelationId = binary_upgrade_next_index_pg_class_oid;
823                         binary_upgrade_next_index_pg_class_oid = InvalidOid;
824                 }
825                 else
826                 {
827                         indexRelationId =
828                                 GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
829                 }
830         }
831
832         /*
833          * create the index relation's relcache entry and physical disk file. (If
834          * we fail further down, it's the smgr's responsibility to remove the disk
835          * file again.)
836          */
837         indexRelation = heap_create(indexRelationName,
838                                                                 namespaceId,
839                                                                 tableSpaceId,
840                                                                 indexRelationId,
841                                                                 relFileNode,
842                                                                 indexTupDesc,
843                                                                 RELKIND_INDEX,
844                                                                 relpersistence,
845                                                                 shared_relation,
846                                                                 mapped_relation,
847                                                                 allow_system_table_mods);
848
849         Assert(indexRelationId == RelationGetRelid(indexRelation));
850
851         /*
852          * Obtain exclusive lock on it.  Although no other backends can see it
853          * until we commit, this prevents deadlock-risk complaints from lock
854          * manager in cases such as CLUSTER.
855          */
856         LockRelation(indexRelation, AccessExclusiveLock);
857
858         /*
859          * Fill in fields of the index's pg_class entry that are not set correctly
860          * by heap_create.
861          *
862          * XXX should have a cleaner way to create cataloged indexes
863          */
864         indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
865         indexRelation->rd_rel->relam = accessMethodObjectId;
866         indexRelation->rd_rel->relhasoids = false;
867
868         /*
869          * store index's pg_class entry
870          */
871         InsertPgClassTuple(pg_class, indexRelation,
872                                            RelationGetRelid(indexRelation),
873                                            (Datum) 0,
874                                            reloptions);
875
876         /* done with pg_class */
877         heap_close(pg_class, RowExclusiveLock);
878
879         /*
880          * now update the object id's of all the attribute tuple forms in the
881          * index relation's tuple descriptor
882          */
883         InitializeAttributeOids(indexRelation,
884                                                         indexInfo->ii_NumIndexAttrs,
885                                                         indexRelationId);
886
887         /*
888          * append ATTRIBUTE tuples for the index
889          */
890         AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
891
892         /* ----------------
893          *        update pg_index
894          *        (append INDEX tuple)
895          *
896          *        Note that this stows away a representation of "predicate".
897          *        (Or, could define a rule to maintain the predicate) --Nels, Feb '92
898          * ----------------
899          */
900         UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
901                                                 collationObjectId, classObjectId, coloptions,
902                                                 isprimary, is_exclusion,
903                                                 !deferrable,
904                                                 !concurrent);
905
906         /*
907          * Register constraint and dependencies for the index.
908          *
909          * If the index is from a CONSTRAINT clause, construct a pg_constraint
910          * entry.  The index will be linked to the constraint, which in turn is
911          * linked to the table.  If it's not a CONSTRAINT, we need to make a
912          * dependency directly on the table.
913          *
914          * We don't need a dependency on the namespace, because there'll be an
915          * indirect dependency via our parent table.
916          *
917          * During bootstrap we can't register any dependencies, and we don't try
918          * to make a constraint either.
919          */
920         if (!IsBootstrapProcessingMode())
921         {
922                 ObjectAddress myself,
923                                         referenced;
924
925                 myself.classId = RelationRelationId;
926                 myself.objectId = indexRelationId;
927                 myself.objectSubId = 0;
928
929                 if (isconstraint)
930                 {
931                         char            constraintType;
932
933                         if (isprimary)
934                                 constraintType = CONSTRAINT_PRIMARY;
935                         else if (indexInfo->ii_Unique)
936                                 constraintType = CONSTRAINT_UNIQUE;
937                         else if (is_exclusion)
938                                 constraintType = CONSTRAINT_EXCLUSION;
939                         else
940                         {
941                                 elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
942                                 constraintType = 0;             /* keep compiler quiet */
943                         }
944
945                         index_constraint_create(heapRelation,
946                                                                         indexRelationId,
947                                                                         indexInfo,
948                                                                         indexRelationName,
949                                                                         constraintType,
950                                                                         deferrable,
951                                                                         initdeferred,
952                                                                         false,          /* already marked primary */
953                                                                         false,          /* pg_index entry is OK */
954                                                                         false,          /* no old dependencies */
955                                                                         allow_system_table_mods,
956                                                                         is_internal);
957                 }
958                 else
959                 {
960                         bool            have_simple_col = false;
961
962                         /* Create auto dependencies on simply-referenced columns */
963                         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
964                         {
965                                 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
966                                 {
967                                         referenced.classId = RelationRelationId;
968                                         referenced.objectId = heapRelationId;
969                                         referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
970
971                                         recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
972
973                                         have_simple_col = true;
974                                 }
975                         }
976
977                         /*
978                          * If there are no simply-referenced columns, give the index an
979                          * auto dependency on the whole table.  In most cases, this will
980                          * be redundant, but it might not be if the index expressions and
981                          * predicate contain no Vars or only whole-row Vars.
982                          */
983                         if (!have_simple_col)
984                         {
985                                 referenced.classId = RelationRelationId;
986                                 referenced.objectId = heapRelationId;
987                                 referenced.objectSubId = 0;
988
989                                 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
990                         }
991
992                         /* Non-constraint indexes can't be deferrable */
993                         Assert(!deferrable);
994                         Assert(!initdeferred);
995                 }
996
997                 /* Store dependency on collations */
998                 /* The default collation is pinned, so don't bother recording it */
999                 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1000                 {
1001                         if (OidIsValid(collationObjectId[i]) &&
1002                                 collationObjectId[i] != DEFAULT_COLLATION_OID)
1003                         {
1004                                 referenced.classId = CollationRelationId;
1005                                 referenced.objectId = collationObjectId[i];
1006                                 referenced.objectSubId = 0;
1007
1008                                 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1009                         }
1010                 }
1011
1012                 /* Store dependency on operator classes */
1013                 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1014                 {
1015                         referenced.classId = OperatorClassRelationId;
1016                         referenced.objectId = classObjectId[i];
1017                         referenced.objectSubId = 0;
1018
1019                         recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1020                 }
1021
1022                 /* Store dependencies on anything mentioned in index expressions */
1023                 if (indexInfo->ii_Expressions)
1024                 {
1025                         recordDependencyOnSingleRelExpr(&myself,
1026                                                                                   (Node *) indexInfo->ii_Expressions,
1027                                                                                         heapRelationId,
1028                                                                                         DEPENDENCY_NORMAL,
1029                                                                                         DEPENDENCY_AUTO);
1030                 }
1031
1032                 /* Store dependencies on anything mentioned in predicate */
1033                 if (indexInfo->ii_Predicate)
1034                 {
1035                         recordDependencyOnSingleRelExpr(&myself,
1036                                                                                         (Node *) indexInfo->ii_Predicate,
1037                                                                                         heapRelationId,
1038                                                                                         DEPENDENCY_NORMAL,
1039                                                                                         DEPENDENCY_AUTO);
1040                 }
1041         }
1042         else
1043         {
1044                 /* Bootstrap mode - assert we weren't asked for constraint support */
1045                 Assert(!isconstraint);
1046                 Assert(!deferrable);
1047                 Assert(!initdeferred);
1048         }
1049
1050         /* Post creation hook for new index */
1051         InvokeObjectPostCreateHookArg(RelationRelationId,
1052                                                                   indexRelationId, 0, is_internal);
1053
1054         /*
1055          * Advance the command counter so that we can see the newly-entered
1056          * catalog tuples for the index.
1057          */
1058         CommandCounterIncrement();
1059
1060         /*
1061          * In bootstrap mode, we have to fill in the index strategy structure with
1062          * information from the catalogs.  If we aren't bootstrapping, then the
1063          * relcache entry has already been rebuilt thanks to sinval update during
1064          * CommandCounterIncrement.
1065          */
1066         if (IsBootstrapProcessingMode())
1067                 RelationInitIndexAccessInfo(indexRelation);
1068         else
1069                 Assert(indexRelation->rd_indexcxt != NULL);
1070
1071         /*
1072          * If this is bootstrap (initdb) time, then we don't actually fill in the
1073          * index yet.  We'll be creating more indexes and classes later, so we
1074          * delay filling them in until just before we're done with bootstrapping.
1075          * Similarly, if the caller specified skip_build then filling the index is
1076          * delayed till later (ALTER TABLE can save work in some cases with this).
1077          * Otherwise, we call the AM routine that constructs the index.
1078          */
1079         if (IsBootstrapProcessingMode())
1080         {
1081                 index_register(heapRelationId, indexRelationId, indexInfo);
1082         }
1083         else if (skip_build)
1084         {
1085                 /*
1086                  * Caller is responsible for filling the index later on.  However,
1087                  * we'd better make sure that the heap relation is correctly marked as
1088                  * having an index.
1089                  */
1090                 index_update_stats(heapRelation,
1091                                                    true,
1092                                                    isprimary,
1093                                                    -1.0);
1094                 /* Make the above update visible */
1095                 CommandCounterIncrement();
1096         }
1097         else
1098         {
1099                 index_build(heapRelation, indexRelation, indexInfo, isprimary, false);
1100         }
1101
1102         /*
1103          * Close the index; but we keep the lock that we acquired above until end
1104          * of transaction.  Closing the heap is caller's responsibility.
1105          */
1106         index_close(indexRelation, NoLock);
1107
1108         return indexRelationId;
1109 }
1110
1111 /*
1112  * index_constraint_create
1113  *
1114  * Set up a constraint associated with an index.  Return the new constraint's
1115  * address.
1116  *
1117  * heapRelation: table owning the index (must be suitably locked by caller)
1118  * indexRelationId: OID of the index
1119  * indexInfo: same info executor uses to insert into the index
1120  * constraintName: what it say (generally, should match name of index)
1121  * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1122  *              CONSTRAINT_EXCLUSION
1123  * deferrable: constraint is DEFERRABLE
1124  * initdeferred: constraint is INITIALLY DEFERRED
1125  * mark_as_primary: if true, set flags to mark index as primary key
1126  * update_pgindex: if true, update pg_index row (else caller's done that)
1127  * remove_old_dependencies: if true, remove existing dependencies of index
1128  *              on table's columns
1129  * allow_system_table_mods: allow table to be a system catalog
1130  * is_internal: index is constructed due to internal process
1131  */
1132 ObjectAddress
1133 index_constraint_create(Relation heapRelation,
1134                                                 Oid indexRelationId,
1135                                                 IndexInfo *indexInfo,
1136                                                 const char *constraintName,
1137                                                 char constraintType,
1138                                                 bool deferrable,
1139                                                 bool initdeferred,
1140                                                 bool mark_as_primary,
1141                                                 bool update_pgindex,
1142                                                 bool remove_old_dependencies,
1143                                                 bool allow_system_table_mods,
1144                                                 bool is_internal)
1145 {
1146         Oid                     namespaceId = RelationGetNamespace(heapRelation);
1147         ObjectAddress myself,
1148                                 referenced;
1149         Oid                     conOid;
1150
1151         /* constraint creation support doesn't work while bootstrapping */
1152         Assert(!IsBootstrapProcessingMode());
1153
1154         /* enforce system-table restriction */
1155         if (!allow_system_table_mods &&
1156                 IsSystemRelation(heapRelation) &&
1157                 IsNormalProcessingMode())
1158                 ereport(ERROR,
1159                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1160                                  errmsg("user-defined indexes on system catalog tables are not supported")));
1161
1162         /* primary/unique constraints shouldn't have any expressions */
1163         if (indexInfo->ii_Expressions &&
1164                 constraintType != CONSTRAINT_EXCLUSION)
1165                 elog(ERROR, "constraints cannot have index expressions");
1166
1167         /*
1168          * If we're manufacturing a constraint for a pre-existing index, we need
1169          * to get rid of the existing auto dependencies for the index (the ones
1170          * that index_create() would have made instead of calling this function).
1171          *
1172          * Note: this code would not necessarily do the right thing if the index
1173          * has any expressions or predicate, but we'd never be turning such an
1174          * index into a UNIQUE or PRIMARY KEY constraint.
1175          */
1176         if (remove_old_dependencies)
1177                 deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1178                                                                                 RelationRelationId, DEPENDENCY_AUTO);
1179
1180         /*
1181          * Construct a pg_constraint entry.
1182          */
1183         conOid = CreateConstraintEntry(constraintName,
1184                                                                    namespaceId,
1185                                                                    constraintType,
1186                                                                    deferrable,
1187                                                                    initdeferred,
1188                                                                    true,
1189                                                                    RelationGetRelid(heapRelation),
1190                                                                    indexInfo->ii_KeyAttrNumbers,
1191                                                                    indexInfo->ii_NumIndexAttrs,
1192                                                                    InvalidOid,  /* no domain */
1193                                                                    indexRelationId,             /* index OID */
1194                                                                    InvalidOid,  /* no foreign key */
1195                                                                    NULL,
1196                                                                    NULL,
1197                                                                    NULL,
1198                                                                    NULL,
1199                                                                    0,
1200                                                                    ' ',
1201                                                                    ' ',
1202                                                                    ' ',
1203                                                                    indexInfo->ii_ExclusionOps,
1204                                                                    NULL,                /* no check constraint */
1205                                                                    NULL,
1206                                                                    NULL,
1207                                                                    true,                /* islocal */
1208                                                                    0,   /* inhcount */
1209                                                                    true,                /* noinherit */
1210                                                                    is_internal);
1211
1212         /*
1213          * Register the index as internally dependent on the constraint.
1214          *
1215          * Note that the constraint has a dependency on the table, so we don't
1216          * need (or want) any direct dependency from the index to the table.
1217          */
1218         myself.classId = RelationRelationId;
1219         myself.objectId = indexRelationId;
1220         myself.objectSubId = 0;
1221
1222         referenced.classId = ConstraintRelationId;
1223         referenced.objectId = conOid;
1224         referenced.objectSubId = 0;
1225
1226         recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
1227
1228         /*
1229          * If the constraint is deferrable, create the deferred uniqueness
1230          * checking trigger.  (The trigger will be given an internal dependency on
1231          * the constraint by CreateTrigger.)
1232          */
1233         if (deferrable)
1234         {
1235                 CreateTrigStmt *trigger;
1236
1237                 trigger = makeNode(CreateTrigStmt);
1238                 trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
1239                         "PK_ConstraintTrigger" :
1240                         "Unique_ConstraintTrigger";
1241                 trigger->relation = NULL;
1242                 trigger->funcname = SystemFuncName("unique_key_recheck");
1243                 trigger->args = NIL;
1244                 trigger->row = true;
1245                 trigger->timing = TRIGGER_TYPE_AFTER;
1246                 trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
1247                 trigger->columns = NIL;
1248                 trigger->whenClause = NULL;
1249                 trigger->isconstraint = true;
1250                 trigger->deferrable = true;
1251                 trigger->initdeferred = initdeferred;
1252                 trigger->constrrel = NULL;
1253
1254                 (void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
1255                                                          InvalidOid, conOid, indexRelationId, true);
1256         }
1257
1258         /*
1259          * If needed, mark the table as having a primary key.  We assume it can't
1260          * have been so marked already, so no need to clear the flag in the other
1261          * case.
1262          *
1263          * Note: this might better be done by callers.  We do it here to avoid
1264          * exposing index_update_stats() globally, but that wouldn't be necessary
1265          * if relhaspkey went away.
1266          */
1267         if (mark_as_primary)
1268                 index_update_stats(heapRelation,
1269                                                    true,
1270                                                    true,
1271                                                    -1.0);
1272
1273         /*
1274          * If needed, mark the index as primary and/or deferred in pg_index.
1275          *
1276          * Note: When making an existing index into a constraint, caller must have
1277          * a table lock that prevents concurrent table updates; otherwise, there
1278          * is a risk that concurrent readers of the table will miss seeing this
1279          * index at all.
1280          */
1281         if (update_pgindex && (mark_as_primary || deferrable))
1282         {
1283                 Relation        pg_index;
1284                 HeapTuple       indexTuple;
1285                 Form_pg_index indexForm;
1286                 bool            dirty = false;
1287
1288                 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1289
1290                 indexTuple = SearchSysCacheCopy1(INDEXRELID,
1291                                                                                  ObjectIdGetDatum(indexRelationId));
1292                 if (!HeapTupleIsValid(indexTuple))
1293                         elog(ERROR, "cache lookup failed for index %u", indexRelationId);
1294                 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1295
1296                 if (mark_as_primary && !indexForm->indisprimary)
1297                 {
1298                         indexForm->indisprimary = true;
1299                         dirty = true;
1300                 }
1301
1302                 if (deferrable && indexForm->indimmediate)
1303                 {
1304                         indexForm->indimmediate = false;
1305                         dirty = true;
1306                 }
1307
1308                 if (dirty)
1309                 {
1310                         simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1311                         CatalogUpdateIndexes(pg_index, indexTuple);
1312
1313                         InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
1314                                                                                  InvalidOid, is_internal);
1315                 }
1316
1317                 heap_freetuple(indexTuple);
1318                 heap_close(pg_index, RowExclusiveLock);
1319         }
1320
1321         return referenced;
1322 }
1323
1324 /*
1325  *              index_drop
1326  *
1327  * NOTE: this routine should now only be called through performDeletion(),
1328  * else associated dependencies won't be cleaned up.
1329  */
1330 void
1331 index_drop(Oid indexId, bool concurrent)
1332 {
1333         Oid                     heapId;
1334         Relation        userHeapRelation;
1335         Relation        userIndexRelation;
1336         Relation        indexRelation;
1337         HeapTuple       tuple;
1338         bool            hasexprs;
1339         LockRelId       heaprelid,
1340                                 indexrelid;
1341         LOCKTAG         heaplocktag;
1342         LOCKMODE        lockmode;
1343
1344         /*
1345          * To drop an index safely, we must grab exclusive lock on its parent
1346          * table.  Exclusive lock on the index alone is insufficient because
1347          * another backend might be about to execute a query on the parent table.
1348          * If it relies on a previously cached list of index OIDs, then it could
1349          * attempt to access the just-dropped index.  We must therefore take a
1350          * table lock strong enough to prevent all queries on the table from
1351          * proceeding until we commit and send out a shared-cache-inval notice
1352          * that will make them update their index lists.
1353          *
1354          * In the concurrent case we avoid this requirement by disabling index use
1355          * in multiple steps and waiting out any transactions that might be using
1356          * the index, so we don't need exclusive lock on the parent table. Instead
1357          * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
1358          * doing CREATE/DROP INDEX CONCURRENTLY on the same index.  (We will get
1359          * AccessExclusiveLock on the index below, once we're sure nobody else is
1360          * using it.)
1361          */
1362         heapId = IndexGetRelation(indexId, false);
1363         lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
1364         userHeapRelation = heap_open(heapId, lockmode);
1365         userIndexRelation = index_open(indexId, lockmode);
1366
1367         /*
1368          * We might still have open queries using it in our own session, which the
1369          * above locking won't prevent, so test explicitly.
1370          */
1371         CheckTableNotInUse(userIndexRelation, "DROP INDEX");
1372
1373         /*
1374          * Drop Index Concurrently is more or less the reverse process of Create
1375          * Index Concurrently.
1376          *
1377          * First we unset indisvalid so queries starting afterwards don't use the
1378          * index to answer queries anymore.  We have to keep indisready = true so
1379          * transactions that are still scanning the index can continue to see
1380          * valid index contents.  For instance, if they are using READ COMMITTED
1381          * mode, and another transaction makes changes and commits, they need to
1382          * see those new tuples in the index.
1383          *
1384          * After all transactions that could possibly have used the index for
1385          * queries end, we can unset indisready and indislive, then wait till
1386          * nobody could be touching it anymore.  (Note: we need indislive because
1387          * this state must be distinct from the initial state during CREATE INDEX
1388          * CONCURRENTLY, which has indislive true while indisready and indisvalid
1389          * are false.  That's because in that state, transactions must examine the
1390          * index for HOT-safety decisions, while in this state we don't want them
1391          * to open it at all.)
1392          *
1393          * Since all predicate locks on the index are about to be made invalid, we
1394          * must promote them to predicate locks on the heap.  In the
1395          * non-concurrent case we can just do that now.  In the concurrent case
1396          * it's a bit trickier.  The predicate locks must be moved when there are
1397          * no index scans in progress on the index and no more can subsequently
1398          * start, so that no new predicate locks can be made on the index.  Also,
1399          * they must be moved before heap inserts stop maintaining the index, else
1400          * the conflict with the predicate lock on the index gap could be missed
1401          * before the lock on the heap relation is in place to detect a conflict
1402          * based on the heap tuple insert.
1403          */
1404         if (concurrent)
1405         {
1406                 /*
1407                  * We must commit our transaction in order to make the first pg_index
1408                  * state update visible to other sessions.  If the DROP machinery has
1409                  * already performed any other actions (removal of other objects,
1410                  * pg_depend entries, etc), the commit would make those actions
1411                  * permanent, which would leave us with inconsistent catalog state if
1412                  * we fail partway through the following sequence.  Since DROP INDEX
1413                  * CONCURRENTLY is restricted to dropping just one index that has no
1414                  * dependencies, we should get here before anything's been done ---
1415                  * but let's check that to be sure.  We can verify that the current
1416                  * transaction has not executed any transactional updates by checking
1417                  * that no XID has been assigned.
1418                  */
1419                 if (GetTopTransactionIdIfAny() != InvalidTransactionId)
1420                         ereport(ERROR,
1421                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1422                                          errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
1423
1424                 /*
1425                  * Mark index invalid by updating its pg_index entry
1426                  */
1427                 index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
1428
1429                 /*
1430                  * Invalidate the relcache for the table, so that after this commit
1431                  * all sessions will refresh any cached plans that might reference the
1432                  * index.
1433                  */
1434                 CacheInvalidateRelcache(userHeapRelation);
1435
1436                 /* save lockrelid and locktag for below, then close but keep locks */
1437                 heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
1438                 SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1439                 indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
1440
1441                 heap_close(userHeapRelation, NoLock);
1442                 index_close(userIndexRelation, NoLock);
1443
1444                 /*
1445                  * We must commit our current transaction so that the indisvalid
1446                  * update becomes visible to other transactions; then start another.
1447                  * Note that any previously-built data structures are lost in the
1448                  * commit.  The only data we keep past here are the relation IDs.
1449                  *
1450                  * Before committing, get a session-level lock on the table, to ensure
1451                  * that neither it nor the index can be dropped before we finish. This
1452                  * cannot block, even if someone else is waiting for access, because
1453                  * we already have the same lock within our transaction.
1454                  */
1455                 LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1456                 LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1457
1458                 PopActiveSnapshot();
1459                 CommitTransactionCommand();
1460                 StartTransactionCommand();
1461
1462                 /*
1463                  * Now we must wait until no running transaction could be using the
1464                  * index for a query.  Use AccessExclusiveLock here to check for
1465                  * running transactions that hold locks of any kind on the table. Note
1466                  * we do not need to worry about xacts that open the table for reading
1467                  * after this point; they will see the index as invalid when they open
1468                  * the relation.
1469                  *
1470                  * Note: the reason we use actual lock acquisition here, rather than
1471                  * just checking the ProcArray and sleeping, is that deadlock is
1472                  * possible if one of the transactions in question is blocked trying
1473                  * to acquire an exclusive lock on our table.  The lock code will
1474                  * detect deadlock and error out properly.
1475                  */
1476                 WaitForLockers(heaplocktag, AccessExclusiveLock);
1477
1478                 /*
1479                  * No more predicate locks will be acquired on this index, and we're
1480                  * about to stop doing inserts into the index which could show
1481                  * conflicts with existing predicate locks, so now is the time to move
1482                  * them to the heap relation.
1483                  */
1484                 userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1485                 userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1486                 TransferPredicateLocksToHeapRelation(userIndexRelation);
1487
1488                 /*
1489                  * Now we are sure that nobody uses the index for queries; they just
1490                  * might have it open for updating it.  So now we can unset indisready
1491                  * and indislive, then wait till nobody could be using it at all
1492                  * anymore.
1493                  */
1494                 index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1495
1496                 /*
1497                  * Invalidate the relcache for the table, so that after this commit
1498                  * all sessions will refresh the table's index list.  Forgetting just
1499                  * the index's relcache entry is not enough.
1500                  */
1501                 CacheInvalidateRelcache(userHeapRelation);
1502
1503                 /*
1504                  * Close the relations again, though still holding session lock.
1505                  */
1506                 heap_close(userHeapRelation, NoLock);
1507                 index_close(userIndexRelation, NoLock);
1508
1509                 /*
1510                  * Again, commit the transaction to make the pg_index update visible
1511                  * to other sessions.
1512                  */
1513                 CommitTransactionCommand();
1514                 StartTransactionCommand();
1515
1516                 /*
1517                  * Wait till every transaction that saw the old index state has
1518                  * finished.
1519                  */
1520                 WaitForLockers(heaplocktag, AccessExclusiveLock);
1521
1522                 /*
1523                  * Re-open relations to allow us to complete our actions.
1524                  *
1525                  * At this point, nothing should be accessing the index, but lets
1526                  * leave nothing to chance and grab AccessExclusiveLock on the index
1527                  * before the physical deletion.
1528                  */
1529                 userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1530                 userIndexRelation = index_open(indexId, AccessExclusiveLock);
1531         }
1532         else
1533         {
1534                 /* Not concurrent, so just transfer predicate locks and we're good */
1535                 TransferPredicateLocksToHeapRelation(userIndexRelation);
1536         }
1537
1538         /*
1539          * Schedule physical removal of the files
1540          */
1541         RelationDropStorage(userIndexRelation);
1542
1543         /*
1544          * Close and flush the index's relcache entry, to ensure relcache doesn't
1545          * try to rebuild it while we're deleting catalog entries. We keep the
1546          * lock though.
1547          */
1548         index_close(userIndexRelation, NoLock);
1549
1550         RelationForgetRelation(indexId);
1551
1552         /*
1553          * fix INDEX relation, and check for expressional index
1554          */
1555         indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
1556
1557         tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
1558         if (!HeapTupleIsValid(tuple))
1559                 elog(ERROR, "cache lookup failed for index %u", indexId);
1560
1561         hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
1562
1563         simple_heap_delete(indexRelation, &tuple->t_self);
1564
1565         ReleaseSysCache(tuple);
1566         heap_close(indexRelation, RowExclusiveLock);
1567
1568         /*
1569          * if it has any expression columns, we might have stored statistics about
1570          * them.
1571          */
1572         if (hasexprs)
1573                 RemoveStatistics(indexId, 0);
1574
1575         /*
1576          * fix ATTRIBUTE relation
1577          */
1578         DeleteAttributeTuples(indexId);
1579
1580         /*
1581          * fix RELATION relation
1582          */
1583         DeleteRelationTuple(indexId);
1584
1585         /*
1586          * We are presently too lazy to attempt to compute the new correct value
1587          * of relhasindex (the next VACUUM will fix it if necessary). So there is
1588          * no need to update the pg_class tuple for the owning relation. But we
1589          * must send out a shared-cache-inval notice on the owning relation to
1590          * ensure other backends update their relcache lists of indexes.  (In the
1591          * concurrent case, this is redundant but harmless.)
1592          */
1593         CacheInvalidateRelcache(userHeapRelation);
1594
1595         /*
1596          * Close owning rel, but keep lock
1597          */
1598         heap_close(userHeapRelation, NoLock);
1599
1600         /*
1601          * Release the session locks before we go.
1602          */
1603         if (concurrent)
1604         {
1605                 UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1606                 UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1607         }
1608 }
1609
1610 /* ----------------------------------------------------------------
1611  *                                              index_build support
1612  * ----------------------------------------------------------------
1613  */
1614
1615 /* ----------------
1616  *              BuildIndexInfo
1617  *                      Construct an IndexInfo record for an open index
1618  *
1619  * IndexInfo stores the information about the index that's needed by
1620  * FormIndexDatum, which is used for both index_build() and later insertion
1621  * of individual index tuples.  Normally we build an IndexInfo for an index
1622  * just once per command, and then use it for (potentially) many tuples.
1623  * ----------------
1624  */
1625 IndexInfo *
1626 BuildIndexInfo(Relation index)
1627 {
1628         IndexInfo  *ii = makeNode(IndexInfo);
1629         Form_pg_index indexStruct = index->rd_index;
1630         int                     i;
1631         int                     numKeys;
1632
1633         /* check the number of keys, and copy attr numbers into the IndexInfo */
1634         numKeys = indexStruct->indnatts;
1635         if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1636                 elog(ERROR, "invalid indnatts %d for index %u",
1637                          numKeys, RelationGetRelid(index));
1638         ii->ii_NumIndexAttrs = numKeys;
1639         for (i = 0; i < numKeys; i++)
1640                 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1641
1642         /* fetch any expressions needed for expressional indexes */
1643         ii->ii_Expressions = RelationGetIndexExpressions(index);
1644         ii->ii_ExpressionsState = NIL;
1645
1646         /* fetch index predicate if any */
1647         ii->ii_Predicate = RelationGetIndexPredicate(index);
1648         ii->ii_PredicateState = NIL;
1649
1650         /* fetch exclusion constraint info if any */
1651         if (indexStruct->indisexclusion)
1652         {
1653                 RelationGetExclusionInfo(index,
1654                                                                  &ii->ii_ExclusionOps,
1655                                                                  &ii->ii_ExclusionProcs,
1656                                                                  &ii->ii_ExclusionStrats);
1657         }
1658         else
1659         {
1660                 ii->ii_ExclusionOps = NULL;
1661                 ii->ii_ExclusionProcs = NULL;
1662                 ii->ii_ExclusionStrats = NULL;
1663         }
1664
1665         /* other info */
1666         ii->ii_Unique = indexStruct->indisunique;
1667         ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1668         /* assume not doing speculative insertion for now */
1669         ii->ii_UniqueOps = NULL;
1670         ii->ii_UniqueProcs = NULL;
1671         ii->ii_UniqueStrats = NULL;
1672
1673         /* initialize index-build state to default */
1674         ii->ii_Concurrent = false;
1675         ii->ii_BrokenHotChain = false;
1676
1677         return ii;
1678 }
1679
1680 /* ----------------
1681  *              BuildSpeculativeIndexInfo
1682  *                      Add extra state to IndexInfo record
1683  *
1684  * For unique indexes, we usually don't want to add info to the IndexInfo for
1685  * checking uniqueness, since the B-Tree AM handles that directly.  However,
1686  * in the case of speculative insertion, additional support is required.
1687  *
1688  * Do this processing here rather than in BuildIndexInfo() to not incur the
1689  * overhead in the common non-speculative cases.
1690  * ----------------
1691  */
1692 void
1693 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
1694 {
1695         int                     ncols = index->rd_rel->relnatts;
1696         int                     i;
1697
1698         /*
1699          * fetch info for checking unique indexes
1700          */
1701         Assert(ii->ii_Unique);
1702
1703         if (index->rd_rel->relam != BTREE_AM_OID)
1704                 elog(ERROR, "unexpected non-btree speculative unique index");
1705
1706         ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols);
1707         ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols);
1708         ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols);
1709
1710         /*
1711          * We have to look up the operator's strategy number.  This provides a
1712          * cross-check that the operator does match the index.
1713          */
1714         /* We need the func OIDs and strategy numbers too */
1715         for (i = 0; i < ncols; i++)
1716         {
1717                 ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
1718                 ii->ii_UniqueOps[i] =
1719                         get_opfamily_member(index->rd_opfamily[i],
1720                                                                 index->rd_opcintype[i],
1721                                                                 index->rd_opcintype[i],
1722                                                                 ii->ii_UniqueStrats[i]);
1723                 ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
1724         }
1725 }
1726
1727 /* ----------------
1728  *              FormIndexDatum
1729  *                      Construct values[] and isnull[] arrays for a new index tuple.
1730  *
1731  *      indexInfo               Info about the index
1732  *      slot                    Heap tuple for which we must prepare an index entry
1733  *      estate                  executor state for evaluating any index expressions
1734  *      values                  Array of index Datums (output area)
1735  *      isnull                  Array of is-null indicators (output area)
1736  *
1737  * When there are no index expressions, estate may be NULL.  Otherwise it
1738  * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1739  * context must point to the heap tuple passed in.
1740  *
1741  * Notice we don't actually call index_form_tuple() here; we just prepare
1742  * its input arrays values[] and isnull[].  This is because the index AM
1743  * may wish to alter the data before storage.
1744  * ----------------
1745  */
1746 void
1747 FormIndexDatum(IndexInfo *indexInfo,
1748                            TupleTableSlot *slot,
1749                            EState *estate,
1750                            Datum *values,
1751                            bool *isnull)
1752 {
1753         ListCell   *indexpr_item;
1754         int                     i;
1755
1756         if (indexInfo->ii_Expressions != NIL &&
1757                 indexInfo->ii_ExpressionsState == NIL)
1758         {
1759                 /* First time through, set up expression evaluation state */
1760                 indexInfo->ii_ExpressionsState = (List *)
1761                         ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
1762                                                         estate);
1763                 /* Check caller has set up context correctly */
1764                 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1765         }
1766         indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1767
1768         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1769         {
1770                 int                     keycol = indexInfo->ii_KeyAttrNumbers[i];
1771                 Datum           iDatum;
1772                 bool            isNull;
1773
1774                 if (keycol != 0)
1775                 {
1776                         /*
1777                          * Plain index column; get the value we need directly from the
1778                          * heap tuple.
1779                          */
1780                         iDatum = slot_getattr(slot, keycol, &isNull);
1781                 }
1782                 else
1783                 {
1784                         /*
1785                          * Index expression --- need to evaluate it.
1786                          */
1787                         if (indexpr_item == NULL)
1788                                 elog(ERROR, "wrong number of index expressions");
1789                         iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1790                                                                                            GetPerTupleExprContext(estate),
1791                                                                                            &isNull,
1792                                                                                            NULL);
1793                         indexpr_item = lnext(indexpr_item);
1794                 }
1795                 values[i] = iDatum;
1796                 isnull[i] = isNull;
1797         }
1798
1799         if (indexpr_item != NULL)
1800                 elog(ERROR, "wrong number of index expressions");
1801 }
1802
1803
1804 /*
1805  * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1806  *
1807  * This routine updates the pg_class row of either an index or its parent
1808  * relation after CREATE INDEX or REINDEX.  Its rather bizarre API is designed
1809  * to ensure we can do all the necessary work in just one update.
1810  *
1811  * hasindex: set relhasindex to this value
1812  * isprimary: if true, set relhaspkey true; else no change
1813  * reltuples: if >= 0, set reltuples to this value; else no change
1814  *
1815  * If reltuples >= 0, relpages and relallvisible are also updated (using
1816  * RelationGetNumberOfBlocks() and visibilitymap_count()).
1817  *
1818  * NOTE: an important side-effect of this operation is that an SI invalidation
1819  * message is sent out to all backends --- including me --- causing relcache
1820  * entries to be flushed or updated with the new data.  This must happen even
1821  * if we find that no change is needed in the pg_class row.  When updating
1822  * a heap entry, this ensures that other backends find out about the new
1823  * index.  When updating an index, it's important because some index AMs
1824  * expect a relcache flush to occur after REINDEX.
1825  */
1826 static void
1827 index_update_stats(Relation rel,
1828                                    bool hasindex,
1829                                    bool isprimary,
1830                                    double reltuples)
1831 {
1832         Oid                     relid = RelationGetRelid(rel);
1833         Relation        pg_class;
1834         HeapTuple       tuple;
1835         Form_pg_class rd_rel;
1836         bool            dirty;
1837
1838         /*
1839          * We always update the pg_class row using a non-transactional,
1840          * overwrite-in-place update.  There are several reasons for this:
1841          *
1842          * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1843          *
1844          * 2. We could be reindexing pg_class itself, in which case we can't move
1845          * its pg_class row because CatalogUpdateIndexes might not know about all
1846          * the indexes yet (see reindex_relation).
1847          *
1848          * 3. Because we execute CREATE INDEX with just share lock on the parent
1849          * rel (to allow concurrent index creations), an ordinary update could
1850          * suffer a tuple-concurrently-updated failure against another CREATE
1851          * INDEX committing at about the same time.  We can avoid that by having
1852          * them both do nontransactional updates (we assume they will both be
1853          * trying to change the pg_class row to the same thing, so it doesn't
1854          * matter which goes first).
1855          *
1856          * It is safe to use a non-transactional update even though our
1857          * transaction could still fail before committing.  Setting relhasindex
1858          * true is safe even if there are no indexes (VACUUM will eventually fix
1859          * it), likewise for relhaspkey.  And of course the new relpages and
1860          * reltuples counts are correct regardless.  However, we don't want to
1861          * change relpages (or relallvisible) if the caller isn't providing an
1862          * updated reltuples count, because that would bollix the
1863          * reltuples/relpages ratio which is what's really important.
1864          */
1865
1866         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1867
1868         /*
1869          * Make a copy of the tuple to update.  Normally we use the syscache, but
1870          * we can't rely on that during bootstrap or while reindexing pg_class
1871          * itself.
1872          */
1873         if (IsBootstrapProcessingMode() ||
1874                 ReindexIsProcessingHeap(RelationRelationId))
1875         {
1876                 /* don't assume syscache will work */
1877                 HeapScanDesc pg_class_scan;
1878                 ScanKeyData key[1];
1879
1880                 ScanKeyInit(&key[0],
1881                                         ObjectIdAttributeNumber,
1882                                         BTEqualStrategyNumber, F_OIDEQ,
1883                                         ObjectIdGetDatum(relid));
1884
1885                 pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
1886                 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1887                 tuple = heap_copytuple(tuple);
1888                 heap_endscan(pg_class_scan);
1889         }
1890         else
1891         {
1892                 /* normal case, use syscache */
1893                 tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
1894         }
1895
1896         if (!HeapTupleIsValid(tuple))
1897                 elog(ERROR, "could not find tuple for relation %u", relid);
1898         rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1899
1900         /* Apply required updates, if any, to copied tuple */
1901
1902         dirty = false;
1903         if (rd_rel->relhasindex != hasindex)
1904         {
1905                 rd_rel->relhasindex = hasindex;
1906                 dirty = true;
1907         }
1908         if (isprimary)
1909         {
1910                 if (!rd_rel->relhaspkey)
1911                 {
1912                         rd_rel->relhaspkey = true;
1913                         dirty = true;
1914                 }
1915         }
1916
1917         if (reltuples >= 0)
1918         {
1919                 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1920                 BlockNumber relallvisible;
1921
1922                 if (rd_rel->relkind != RELKIND_INDEX)
1923                         visibilitymap_count(rel, &relallvisible, NULL);
1924                 else    /* don't bother for indexes */
1925                         relallvisible = 0;
1926
1927                 if (rd_rel->relpages != (int32) relpages)
1928                 {
1929                         rd_rel->relpages = (int32) relpages;
1930                         dirty = true;
1931                 }
1932                 if (rd_rel->reltuples != (float4) reltuples)
1933                 {
1934                         rd_rel->reltuples = (float4) reltuples;
1935                         dirty = true;
1936                 }
1937                 if (rd_rel->relallvisible != (int32) relallvisible)
1938                 {
1939                         rd_rel->relallvisible = (int32) relallvisible;
1940                         dirty = true;
1941                 }
1942         }
1943
1944         /*
1945          * If anything changed, write out the tuple
1946          */
1947         if (dirty)
1948         {
1949                 heap_inplace_update(pg_class, tuple);
1950                 /* the above sends a cache inval message */
1951         }
1952         else
1953         {
1954                 /* no need to change tuple, but force relcache inval anyway */
1955                 CacheInvalidateRelcacheByTuple(tuple);
1956         }
1957
1958         heap_freetuple(tuple);
1959
1960         heap_close(pg_class, RowExclusiveLock);
1961 }
1962
1963
1964 /*
1965  * index_build - invoke access-method-specific index build procedure
1966  *
1967  * On entry, the index's catalog entries are valid, and its physical disk
1968  * file has been created but is empty.  We call the AM-specific build
1969  * procedure to fill in the index contents.  We then update the pg_class
1970  * entries of the index and heap relation as needed, using statistics
1971  * returned by ambuild as well as data passed by the caller.
1972  *
1973  * isprimary tells whether to mark the index as a primary-key index.
1974  * isreindex indicates we are recreating a previously-existing index.
1975  *
1976  * Note: when reindexing an existing index, isprimary can be false even if
1977  * the index is a PK; it's already properly marked and need not be re-marked.
1978  *
1979  * Note: before Postgres 8.2, the passed-in heap and index Relations
1980  * were automatically closed by this routine.  This is no longer the case.
1981  * The caller opened 'em, and the caller should close 'em.
1982  */
1983 void
1984 index_build(Relation heapRelation,
1985                         Relation indexRelation,
1986                         IndexInfo *indexInfo,
1987                         bool isprimary,
1988                         bool isreindex)
1989 {
1990         IndexBuildResult *stats;
1991         Oid                     save_userid;
1992         int                     save_sec_context;
1993         int                     save_nestlevel;
1994
1995         /*
1996          * sanity checks
1997          */
1998         Assert(RelationIsValid(indexRelation));
1999         Assert(PointerIsValid(indexRelation->rd_amroutine));
2000         Assert(PointerIsValid(indexRelation->rd_amroutine->ambuild));
2001         Assert(PointerIsValid(indexRelation->rd_amroutine->ambuildempty));
2002
2003         ereport(DEBUG1,
2004                         (errmsg("building index \"%s\" on table \"%s\"",
2005                                         RelationGetRelationName(indexRelation),
2006                                         RelationGetRelationName(heapRelation))));
2007
2008         /*
2009          * Switch to the table owner's userid, so that any index functions are run
2010          * as that user.  Also lock down security-restricted operations and
2011          * arrange to make GUC variable changes local to this command.
2012          */
2013         GetUserIdAndSecContext(&save_userid, &save_sec_context);
2014         SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2015                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
2016         save_nestlevel = NewGUCNestLevel();
2017
2018         /*
2019          * Call the access method's build procedure
2020          */
2021         stats = indexRelation->rd_amroutine->ambuild(heapRelation, indexRelation,
2022                                                                                                  indexInfo);
2023         Assert(PointerIsValid(stats));
2024
2025         /*
2026          * If this is an unlogged index, we may need to write out an init fork for
2027          * it -- but we must first check whether one already exists.  If, for
2028          * example, an unlogged relation is truncated in the transaction that
2029          * created it, or truncated twice in a subsequent transaction, the
2030          * relfilenode won't change, and nothing needs to be done here.
2031          */
2032         if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2033                 !smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2034         {
2035                 RelationOpenSmgr(indexRelation);
2036                 smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2037                 indexRelation->rd_amroutine->ambuildempty(indexRelation);
2038         }
2039
2040         /*
2041          * If we found any potentially broken HOT chains, mark the index as not
2042          * being usable until the current transaction is below the event horizon.
2043          * See src/backend/access/heap/README.HOT for discussion.
2044          *
2045          * However, when reindexing an existing index, we should do nothing here.
2046          * Any HOT chains that are broken with respect to the index must predate
2047          * the index's original creation, so there is no need to change the
2048          * index's usability horizon.  Moreover, we *must not* try to change the
2049          * index's pg_index entry while reindexing pg_index itself, and this
2050          * optimization nicely prevents that.
2051          *
2052          * We also need not set indcheckxmin during a concurrent index build,
2053          * because we won't set indisvalid true until all transactions that care
2054          * about the broken HOT chains are gone.
2055          *
2056          * Therefore, this code path can only be taken during non-concurrent
2057          * CREATE INDEX.  Thus the fact that heap_update will set the pg_index
2058          * tuple's xmin doesn't matter, because that tuple was created in the
2059          * current transaction anyway.  That also means we don't need to worry
2060          * about any concurrent readers of the tuple; no other transaction can see
2061          * it yet.
2062          */
2063         if (indexInfo->ii_BrokenHotChain && !isreindex &&
2064                 !indexInfo->ii_Concurrent)
2065         {
2066                 Oid                     indexId = RelationGetRelid(indexRelation);
2067                 Relation        pg_index;
2068                 HeapTuple       indexTuple;
2069                 Form_pg_index indexForm;
2070
2071                 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2072
2073                 indexTuple = SearchSysCacheCopy1(INDEXRELID,
2074                                                                                  ObjectIdGetDatum(indexId));
2075                 if (!HeapTupleIsValid(indexTuple))
2076                         elog(ERROR, "cache lookup failed for index %u", indexId);
2077                 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2078
2079                 /* If it's a new index, indcheckxmin shouldn't be set ... */
2080                 Assert(!indexForm->indcheckxmin);
2081
2082                 indexForm->indcheckxmin = true;
2083                 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
2084                 CatalogUpdateIndexes(pg_index, indexTuple);
2085
2086                 heap_freetuple(indexTuple);
2087                 heap_close(pg_index, RowExclusiveLock);
2088         }
2089
2090         /*
2091          * Update heap and index pg_class rows
2092          */
2093         index_update_stats(heapRelation,
2094                                            true,
2095                                            isprimary,
2096                                            stats->heap_tuples);
2097
2098         index_update_stats(indexRelation,
2099                                            false,
2100                                            false,
2101                                            stats->index_tuples);
2102
2103         /* Make the updated catalog row versions visible */
2104         CommandCounterIncrement();
2105
2106         /*
2107          * If it's for an exclusion constraint, make a second pass over the heap
2108          * to verify that the constraint is satisfied.  We must not do this until
2109          * the index is fully valid.  (Broken HOT chains shouldn't matter, though;
2110          * see comments for IndexCheckExclusion.)
2111          */
2112         if (indexInfo->ii_ExclusionOps != NULL)
2113                 IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
2114
2115         /* Roll back any GUC changes executed by index functions */
2116         AtEOXact_GUC(false, save_nestlevel);
2117
2118         /* Restore userid and security context */
2119         SetUserIdAndSecContext(save_userid, save_sec_context);
2120 }
2121
2122
2123 /*
2124  * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
2125  *
2126  * This is called back from an access-method-specific index build procedure
2127  * after the AM has done whatever setup it needs.  The parent heap relation
2128  * is scanned to find tuples that should be entered into the index.  Each
2129  * such tuple is passed to the AM's callback routine, which does the right
2130  * things to add it to the new index.  After we return, the AM's index
2131  * build procedure does whatever cleanup it needs.
2132  *
2133  * The total count of heap tuples is returned.  This is for updating pg_class
2134  * statistics.  (It's annoying not to be able to do that here, but we want
2135  * to merge that update with others; see index_update_stats.)  Note that the
2136  * index AM itself must keep track of the number of index tuples; we don't do
2137  * so here because the AM might reject some of the tuples for its own reasons,
2138  * such as being unable to store NULLs.
2139  *
2140  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
2141  * any potentially broken HOT chains.  Currently, we set this if there are
2142  * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
2143  * trying very hard to detect whether they're really incompatible with the
2144  * chain tip.
2145  */
2146 double
2147 IndexBuildHeapScan(Relation heapRelation,
2148                                    Relation indexRelation,
2149                                    IndexInfo *indexInfo,
2150                                    bool allow_sync,
2151                                    IndexBuildCallback callback,
2152                                    void *callback_state)
2153 {
2154         return IndexBuildHeapRangeScan(heapRelation, indexRelation,
2155                                                                    indexInfo, allow_sync,
2156                                                                    false,
2157                                                                    0, InvalidBlockNumber,
2158                                                                    callback, callback_state);
2159 }
2160
2161 /*
2162  * As above, except that instead of scanning the complete heap, only the given
2163  * number of blocks are scanned.  Scan to end-of-rel can be signalled by
2164  * passing InvalidBlockNumber as numblocks.  Note that restricting the range
2165  * to scan cannot be done when requesting syncscan.
2166  *
2167  * When "anyvisible" mode is requested, all tuples visible to any transaction
2168  * are considered, including those inserted or deleted by transactions that are
2169  * still in progress.
2170  */
2171 double
2172 IndexBuildHeapRangeScan(Relation heapRelation,
2173                                                 Relation indexRelation,
2174                                                 IndexInfo *indexInfo,
2175                                                 bool allow_sync,
2176                                                 bool anyvisible,
2177                                                 BlockNumber start_blockno,
2178                                                 BlockNumber numblocks,
2179                                                 IndexBuildCallback callback,
2180                                                 void *callback_state)
2181 {
2182         bool            is_system_catalog;
2183         bool            checking_uniqueness;
2184         HeapScanDesc scan;
2185         HeapTuple       heapTuple;
2186         Datum           values[INDEX_MAX_KEYS];
2187         bool            isnull[INDEX_MAX_KEYS];
2188         double          reltuples;
2189         List       *predicate;
2190         TupleTableSlot *slot;
2191         EState     *estate;
2192         ExprContext *econtext;
2193         Snapshot        snapshot;
2194         TransactionId OldestXmin;
2195         BlockNumber root_blkno = InvalidBlockNumber;
2196         OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2197
2198         /*
2199          * sanity checks
2200          */
2201         Assert(OidIsValid(indexRelation->rd_rel->relam));
2202
2203         /* Remember if it's a system catalog */
2204         is_system_catalog = IsSystemRelation(heapRelation);
2205
2206         /* See whether we're verifying uniqueness/exclusion properties */
2207         checking_uniqueness = (indexInfo->ii_Unique ||
2208                                                    indexInfo->ii_ExclusionOps != NULL);
2209
2210         /*
2211          * "Any visible" mode is not compatible with uniqueness checks; make sure
2212          * only one of those is requested.
2213          */
2214         Assert(!(anyvisible && checking_uniqueness));
2215
2216         /*
2217          * Need an EState for evaluation of index expressions and partial-index
2218          * predicates.  Also a slot to hold the current tuple.
2219          */
2220         estate = CreateExecutorState();
2221         econtext = GetPerTupleExprContext(estate);
2222         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2223
2224         /* Arrange for econtext's scan tuple to be the tuple under test */
2225         econtext->ecxt_scantuple = slot;
2226
2227         /* Set up execution state for predicate, if any. */
2228         predicate = (List *)
2229                 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2230                                                 estate);
2231
2232         /*
2233          * Prepare for scan of the base relation.  In a normal index build, we use
2234          * SnapshotAny because we must retrieve all tuples and do our own time
2235          * qual checks (because we have to index RECENTLY_DEAD tuples). In a
2236          * concurrent build, or during bootstrap, we take a regular MVCC snapshot
2237          * and index whatever's live according to that.
2238          */
2239         if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2240         {
2241                 snapshot = RegisterSnapshot(GetTransactionSnapshot());
2242                 OldestXmin = InvalidTransactionId;              /* not used */
2243
2244                 /* "any visible" mode is not compatible with this */
2245                 Assert(!anyvisible);
2246         }
2247         else
2248         {
2249                 snapshot = SnapshotAny;
2250                 /* okay to ignore lazy VACUUMs here */
2251                 OldestXmin = GetOldestXmin(heapRelation, true);
2252         }
2253
2254         scan = heap_beginscan_strat(heapRelation,       /* relation */
2255                                                                 snapshot,               /* snapshot */
2256                                                                 0,              /* number of keys */
2257                                                                 NULL,   /* scan key */
2258                                                                 true,   /* buffer access strategy OK */
2259                                                                 allow_sync);    /* syncscan OK? */
2260
2261         /* set our scan endpoints */
2262         if (!allow_sync)
2263                 heap_setscanlimits(scan, start_blockno, numblocks);
2264         else
2265         {
2266                 /* syncscan can only be requested on whole relation */
2267                 Assert(start_blockno == 0);
2268                 Assert(numblocks == InvalidBlockNumber);
2269         }
2270
2271         reltuples = 0;
2272
2273         /*
2274          * Scan all tuples in the base relation.
2275          */
2276         while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2277         {
2278                 bool            tupleIsAlive;
2279
2280                 CHECK_FOR_INTERRUPTS();
2281
2282                 /*
2283                  * When dealing with a HOT-chain of updated tuples, we want to index
2284                  * the values of the live tuple (if any), but index it under the TID
2285                  * of the chain's root tuple.  This approach is necessary to preserve
2286                  * the HOT-chain structure in the heap. So we need to be able to find
2287                  * the root item offset for every tuple that's in a HOT-chain.  When
2288                  * first reaching a new page of the relation, call
2289                  * heap_get_root_tuples() to build a map of root item offsets on the
2290                  * page.
2291                  *
2292                  * It might look unsafe to use this information across buffer
2293                  * lock/unlock.  However, we hold ShareLock on the table so no
2294                  * ordinary insert/update/delete should occur; and we hold pin on the
2295                  * buffer continuously while visiting the page, so no pruning
2296                  * operation can occur either.
2297                  *
2298                  * Also, although our opinions about tuple liveness could change while
2299                  * we scan the page (due to concurrent transaction commits/aborts),
2300                  * the chain root locations won't, so this info doesn't need to be
2301                  * rebuilt after waiting for another transaction.
2302                  *
2303                  * Note the implied assumption that there is no more than one live
2304                  * tuple per HOT-chain --- else we could create more than one index
2305                  * entry pointing to the same root tuple.
2306                  */
2307                 if (scan->rs_cblock != root_blkno)
2308                 {
2309                         Page            page = BufferGetPage(scan->rs_cbuf);
2310
2311                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2312                         heap_get_root_tuples(page, root_offsets);
2313                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2314
2315                         root_blkno = scan->rs_cblock;
2316                 }
2317
2318                 if (snapshot == SnapshotAny)
2319                 {
2320                         /* do our own time qual check */
2321                         bool            indexIt;
2322                         TransactionId xwait;
2323
2324         recheck:
2325
2326                         /*
2327                          * We could possibly get away with not locking the buffer here,
2328                          * since caller should hold ShareLock on the relation, but let's
2329                          * be conservative about it.  (This remark is still correct even
2330                          * with HOT-pruning: our pin on the buffer prevents pruning.)
2331                          */
2332                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2333
2334                         switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
2335                                                                                          scan->rs_cbuf))
2336                         {
2337                                 case HEAPTUPLE_DEAD:
2338                                         /* Definitely dead, we can ignore it */
2339                                         indexIt = false;
2340                                         tupleIsAlive = false;
2341                                         break;
2342                                 case HEAPTUPLE_LIVE:
2343                                         /* Normal case, index and unique-check it */
2344                                         indexIt = true;
2345                                         tupleIsAlive = true;
2346                                         break;
2347                                 case HEAPTUPLE_RECENTLY_DEAD:
2348
2349                                         /*
2350                                          * If tuple is recently deleted then we must index it
2351                                          * anyway to preserve MVCC semantics.  (Pre-existing
2352                                          * transactions could try to use the index after we finish
2353                                          * building it, and may need to see such tuples.)
2354                                          *
2355                                          * However, if it was HOT-updated then we must only index
2356                                          * the live tuple at the end of the HOT-chain.  Since this
2357                                          * breaks semantics for pre-existing snapshots, mark the
2358                                          * index as unusable for them.
2359                                          */
2360                                         if (HeapTupleIsHotUpdated(heapTuple))
2361                                         {
2362                                                 indexIt = false;
2363                                                 /* mark the index as unsafe for old snapshots */
2364                                                 indexInfo->ii_BrokenHotChain = true;
2365                                         }
2366                                         else
2367                                                 indexIt = true;
2368                                         /* In any case, exclude the tuple from unique-checking */
2369                                         tupleIsAlive = false;
2370                                         break;
2371                                 case HEAPTUPLE_INSERT_IN_PROGRESS:
2372
2373                                         /*
2374                                          * In "anyvisible" mode, this tuple is visible and we
2375                                          * don't need any further checks.
2376                                          */
2377                                         if (anyvisible)
2378                                         {
2379                                                 indexIt = true;
2380                                                 tupleIsAlive = true;
2381                                                 break;
2382                                         }
2383
2384                                         /*
2385                                          * Since caller should hold ShareLock or better, normally
2386                                          * the only way to see this is if it was inserted earlier
2387                                          * in our own transaction.  However, it can happen in
2388                                          * system catalogs, since we tend to release write lock
2389                                          * before commit there.  Give a warning if neither case
2390                                          * applies.
2391                                          */
2392                                         xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
2393                                         if (!TransactionIdIsCurrentTransactionId(xwait))
2394                                         {
2395                                                 if (!is_system_catalog)
2396                                                         elog(WARNING, "concurrent insert in progress within table \"%s\"",
2397                                                                  RelationGetRelationName(heapRelation));
2398
2399                                                 /*
2400                                                  * If we are performing uniqueness checks, indexing
2401                                                  * such a tuple could lead to a bogus uniqueness
2402                                                  * failure.  In that case we wait for the inserting
2403                                                  * transaction to finish and check again.
2404                                                  */
2405                                                 if (checking_uniqueness)
2406                                                 {
2407                                                         /*
2408                                                          * Must drop the lock on the buffer before we wait
2409                                                          */
2410                                                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2411                                                         XactLockTableWait(xwait, heapRelation,
2412                                                                                           &heapTuple->t_self,
2413                                                                                           XLTW_InsertIndexUnique);
2414                                                         CHECK_FOR_INTERRUPTS();
2415                                                         goto recheck;
2416                                                 }
2417                                         }
2418
2419                                         /*
2420                                          * We must index such tuples, since if the index build
2421                                          * commits then they're good.
2422                                          */
2423                                         indexIt = true;
2424                                         tupleIsAlive = true;
2425                                         break;
2426                                 case HEAPTUPLE_DELETE_IN_PROGRESS:
2427
2428                                         /*
2429                                          * As with INSERT_IN_PROGRESS case, this is unexpected
2430                                          * unless it's our own deletion or a system catalog; but
2431                                          * in anyvisible mode, this tuple is visible.
2432                                          */
2433                                         if (anyvisible)
2434                                         {
2435                                                 indexIt = true;
2436                                                 tupleIsAlive = false;
2437                                                 break;
2438                                         }
2439
2440                                         xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
2441                                         if (!TransactionIdIsCurrentTransactionId(xwait))
2442                                         {
2443                                                 if (!is_system_catalog)
2444                                                         elog(WARNING, "concurrent delete in progress within table \"%s\"",
2445                                                                  RelationGetRelationName(heapRelation));
2446
2447                                                 /*
2448                                                  * If we are performing uniqueness checks, assuming
2449                                                  * the tuple is dead could lead to missing a
2450                                                  * uniqueness violation.  In that case we wait for the
2451                                                  * deleting transaction to finish and check again.
2452                                                  *
2453                                                  * Also, if it's a HOT-updated tuple, we should not
2454                                                  * index it but rather the live tuple at the end of
2455                                                  * the HOT-chain.  However, the deleting transaction
2456                                                  * could abort, possibly leaving this tuple as live
2457                                                  * after all, in which case it has to be indexed. The
2458                                                  * only way to know what to do is to wait for the
2459                                                  * deleting transaction to finish and check again.
2460                                                  */
2461                                                 if (checking_uniqueness ||
2462                                                         HeapTupleIsHotUpdated(heapTuple))
2463                                                 {
2464                                                         /*
2465                                                          * Must drop the lock on the buffer before we wait
2466                                                          */
2467                                                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2468                                                         XactLockTableWait(xwait, heapRelation,
2469                                                                                           &heapTuple->t_self,
2470                                                                                           XLTW_InsertIndexUnique);
2471                                                         CHECK_FOR_INTERRUPTS();
2472                                                         goto recheck;
2473                                                 }
2474
2475                                                 /*
2476                                                  * Otherwise index it but don't check for uniqueness,
2477                                                  * the same as a RECENTLY_DEAD tuple.
2478                                                  */
2479                                                 indexIt = true;
2480                                         }
2481                                         else if (HeapTupleIsHotUpdated(heapTuple))
2482                                         {
2483                                                 /*
2484                                                  * It's a HOT-updated tuple deleted by our own xact.
2485                                                  * We can assume the deletion will commit (else the
2486                                                  * index contents don't matter), so treat the same as
2487                                                  * RECENTLY_DEAD HOT-updated tuples.
2488                                                  */
2489                                                 indexIt = false;
2490                                                 /* mark the index as unsafe for old snapshots */
2491                                                 indexInfo->ii_BrokenHotChain = true;
2492                                         }
2493                                         else
2494                                         {
2495                                                 /*
2496                                                  * It's a regular tuple deleted by our own xact. Index
2497                                                  * it but don't check for uniqueness, the same as a
2498                                                  * RECENTLY_DEAD tuple.
2499                                                  */
2500                                                 indexIt = true;
2501                                         }
2502                                         /* In any case, exclude the tuple from unique-checking */
2503                                         tupleIsAlive = false;
2504                                         break;
2505                                 default:
2506                                         elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2507                                         indexIt = tupleIsAlive = false;         /* keep compiler quiet */
2508                                         break;
2509                         }
2510
2511                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2512
2513                         if (!indexIt)
2514                                 continue;
2515                 }
2516                 else
2517                 {
2518                         /* heap_getnext did the time qual check */
2519                         tupleIsAlive = true;
2520                 }
2521
2522                 reltuples += 1;
2523
2524                 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2525
2526                 /* Set up for predicate or expression evaluation */
2527                 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2528
2529                 /*
2530                  * In a partial index, discard tuples that don't satisfy the
2531                  * predicate.
2532                  */
2533                 if (predicate != NIL)
2534                 {
2535                         if (!ExecQual(predicate, econtext, false))
2536                                 continue;
2537                 }
2538
2539                 /*
2540                  * For the current heap tuple, extract all the attributes we use in
2541                  * this index, and note which are null.  This also performs evaluation
2542                  * of any expressions needed.
2543                  */
2544                 FormIndexDatum(indexInfo,
2545                                            slot,
2546                                            estate,
2547                                            values,
2548                                            isnull);
2549
2550                 /*
2551                  * You'd think we should go ahead and build the index tuple here, but
2552                  * some index AMs want to do further processing on the data first.  So
2553                  * pass the values[] and isnull[] arrays, instead.
2554                  */
2555
2556                 if (HeapTupleIsHeapOnly(heapTuple))
2557                 {
2558                         /*
2559                          * For a heap-only tuple, pretend its TID is that of the root. See
2560                          * src/backend/access/heap/README.HOT for discussion.
2561                          */
2562                         HeapTupleData rootTuple;
2563                         OffsetNumber offnum;
2564
2565                         rootTuple = *heapTuple;
2566                         offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
2567
2568                         if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
2569                                 elog(ERROR, "failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
2570                                          ItemPointerGetBlockNumber(&heapTuple->t_self),
2571                                          offnum, RelationGetRelationName(heapRelation));
2572
2573                         ItemPointerSetOffsetNumber(&rootTuple.t_self,
2574                                                                            root_offsets[offnum - 1]);
2575
2576                         /* Call the AM's callback routine to process the tuple */
2577                         callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
2578                                          callback_state);
2579                 }
2580                 else
2581                 {
2582                         /* Call the AM's callback routine to process the tuple */
2583                         callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
2584                                          callback_state);
2585                 }
2586         }
2587
2588         heap_endscan(scan);
2589
2590         /* we can now forget our snapshot, if set */
2591         if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2592                 UnregisterSnapshot(snapshot);
2593
2594         ExecDropSingleTupleTableSlot(slot);
2595
2596         FreeExecutorState(estate);
2597
2598         /* These may have been pointing to the now-gone estate */
2599         indexInfo->ii_ExpressionsState = NIL;
2600         indexInfo->ii_PredicateState = NIL;
2601
2602         return reltuples;
2603 }
2604
2605
2606 /*
2607  * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
2608  *
2609  * When creating an exclusion constraint, we first build the index normally
2610  * and then rescan the heap to check for conflicts.  We assume that we only
2611  * need to validate tuples that are live according to an up-to-date snapshot,
2612  * and that these were correctly indexed even in the presence of broken HOT
2613  * chains.  This should be OK since we are holding at least ShareLock on the
2614  * table, meaning there can be no uncommitted updates from other transactions.
2615  * (Note: that wouldn't necessarily work for system catalogs, since many
2616  * operations release write lock early on the system catalogs.)
2617  */
2618 static void
2619 IndexCheckExclusion(Relation heapRelation,
2620                                         Relation indexRelation,
2621                                         IndexInfo *indexInfo)
2622 {
2623         HeapScanDesc scan;
2624         HeapTuple       heapTuple;
2625         Datum           values[INDEX_MAX_KEYS];
2626         bool            isnull[INDEX_MAX_KEYS];
2627         List       *predicate;
2628         TupleTableSlot *slot;
2629         EState     *estate;
2630         ExprContext *econtext;
2631         Snapshot        snapshot;
2632
2633         /*
2634          * If we are reindexing the target index, mark it as no longer being
2635          * reindexed, to forestall an Assert in index_beginscan when we try to use
2636          * the index for probes.  This is OK because the index is now fully valid.
2637          */
2638         if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
2639                 ResetReindexProcessing();
2640
2641         /*
2642          * Need an EState for evaluation of index expressions and partial-index
2643          * predicates.  Also a slot to hold the current tuple.
2644          */
2645         estate = CreateExecutorState();
2646         econtext = GetPerTupleExprContext(estate);
2647         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2648
2649         /* Arrange for econtext's scan tuple to be the tuple under test */
2650         econtext->ecxt_scantuple = slot;
2651
2652         /* Set up execution state for predicate, if any. */
2653         predicate = (List *)
2654                 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2655                                                 estate);
2656
2657         /*
2658          * Scan all live tuples in the base relation.
2659          */
2660         snapshot = RegisterSnapshot(GetLatestSnapshot());
2661         scan = heap_beginscan_strat(heapRelation,       /* relation */
2662                                                                 snapshot,               /* snapshot */
2663                                                                 0,              /* number of keys */
2664                                                                 NULL,   /* scan key */
2665                                                                 true,   /* buffer access strategy OK */
2666                                                                 true);  /* syncscan OK */
2667
2668         while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2669         {
2670                 CHECK_FOR_INTERRUPTS();
2671
2672                 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2673
2674                 /* Set up for predicate or expression evaluation */
2675                 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2676
2677                 /*
2678                  * In a partial index, ignore tuples that don't satisfy the predicate.
2679                  */
2680                 if (predicate != NIL)
2681                 {
2682                         if (!ExecQual(predicate, econtext, false))
2683                                 continue;
2684                 }
2685
2686                 /*
2687                  * Extract index column values, including computing expressions.
2688                  */
2689                 FormIndexDatum(indexInfo,
2690                                            slot,
2691                                            estate,
2692                                            values,
2693                                            isnull);
2694
2695                 /*
2696                  * Check that this tuple has no conflicts.
2697                  */
2698                 check_exclusion_constraint(heapRelation,
2699                                                                    indexRelation, indexInfo,
2700                                                                    &(heapTuple->t_self), values, isnull,
2701                                                                    estate, true);
2702         }
2703
2704         heap_endscan(scan);
2705         UnregisterSnapshot(snapshot);
2706
2707         ExecDropSingleTupleTableSlot(slot);
2708
2709         FreeExecutorState(estate);
2710
2711         /* These may have been pointing to the now-gone estate */
2712         indexInfo->ii_ExpressionsState = NIL;
2713         indexInfo->ii_PredicateState = NIL;
2714 }
2715
2716
2717 /*
2718  * validate_index - support code for concurrent index builds
2719  *
2720  * We do a concurrent index build by first inserting the catalog entry for the
2721  * index via index_create(), marking it not indisready and not indisvalid.
2722  * Then we commit our transaction and start a new one, then we wait for all
2723  * transactions that could have been modifying the table to terminate.  Now
2724  * we know that any subsequently-started transactions will see the index and
2725  * honor its constraints on HOT updates; so while existing HOT-chains might
2726  * be broken with respect to the index, no currently live tuple will have an
2727  * incompatible HOT update done to it.  We now build the index normally via
2728  * index_build(), while holding a weak lock that allows concurrent
2729  * insert/update/delete.  Also, we index only tuples that are valid
2730  * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
2731  * build takes care to include recently-dead tuples.  This is OK because
2732  * we won't mark the index valid until all transactions that might be able
2733  * to see those tuples are gone.  The reason for doing that is to avoid
2734  * bogus unique-index failures due to concurrent UPDATEs (we might see
2735  * different versions of the same row as being valid when we pass over them,
2736  * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
2737  * does not contain any tuples added to the table while we built the index.
2738  *
2739  * Next, we mark the index "indisready" (but still not "indisvalid") and
2740  * commit the second transaction and start a third.  Again we wait for all
2741  * transactions that could have been modifying the table to terminate.  Now
2742  * we know that any subsequently-started transactions will see the index and
2743  * insert their new tuples into it.  We then take a new reference snapshot
2744  * which is passed to validate_index().  Any tuples that are valid according
2745  * to this snap, but are not in the index, must be added to the index.
2746  * (Any tuples committed live after the snap will be inserted into the
2747  * index by their originating transaction.  Any tuples committed dead before
2748  * the snap need not be indexed, because we will wait out all transactions
2749  * that might care about them before we mark the index valid.)
2750  *
2751  * validate_index() works by first gathering all the TIDs currently in the
2752  * index, using a bulkdelete callback that just stores the TIDs and doesn't
2753  * ever say "delete it".  (This should be faster than a plain indexscan;
2754  * also, not all index AMs support full-index indexscan.)  Then we sort the
2755  * TIDs, and finally scan the table doing a "merge join" against the TID list
2756  * to see which tuples are missing from the index.  Thus we will ensure that
2757  * all tuples valid according to the reference snapshot are in the index.
2758  *
2759  * Building a unique index this way is tricky: we might try to insert a
2760  * tuple that is already dead or is in process of being deleted, and we
2761  * mustn't have a uniqueness failure against an updated version of the same
2762  * row.  We could try to check the tuple to see if it's already dead and tell
2763  * index_insert() not to do the uniqueness check, but that still leaves us
2764  * with a race condition against an in-progress update.  To handle that,
2765  * we expect the index AM to recheck liveness of the to-be-inserted tuple
2766  * before it declares a uniqueness error.
2767  *
2768  * After completing validate_index(), we wait until all transactions that
2769  * were alive at the time of the reference snapshot are gone; this is
2770  * necessary to be sure there are none left with a transaction snapshot
2771  * older than the reference (and hence possibly able to see tuples we did
2772  * not index).  Then we mark the index "indisvalid" and commit.  Subsequent
2773  * transactions will be able to use it for queries.
2774  *
2775  * Doing two full table scans is a brute-force strategy.  We could try to be
2776  * cleverer, eg storing new tuples in a special area of the table (perhaps
2777  * making the table append-only by setting use_fsm).  However that would
2778  * add yet more locking issues.
2779  */
2780 void
2781 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
2782 {
2783         Relation        heapRelation,
2784                                 indexRelation;
2785         IndexInfo  *indexInfo;
2786         IndexVacuumInfo ivinfo;
2787         v_i_state       state;
2788         Oid                     save_userid;
2789         int                     save_sec_context;
2790         int                     save_nestlevel;
2791
2792         /* Open and lock the parent heap relation */
2793         heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
2794         /* And the target index relation */
2795         indexRelation = index_open(indexId, RowExclusiveLock);
2796
2797         /*
2798          * Fetch info needed for index_insert.  (You might think this should be
2799          * passed in from DefineIndex, but its copy is long gone due to having
2800          * been built in a previous transaction.)
2801          */
2802         indexInfo = BuildIndexInfo(indexRelation);
2803
2804         /* mark build is concurrent just for consistency */
2805         indexInfo->ii_Concurrent = true;
2806
2807         /*
2808          * Switch to the table owner's userid, so that any index functions are run
2809          * as that user.  Also lock down security-restricted operations and
2810          * arrange to make GUC variable changes local to this command.
2811          */
2812         GetUserIdAndSecContext(&save_userid, &save_sec_context);
2813         SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2814                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
2815         save_nestlevel = NewGUCNestLevel();
2816
2817         /*
2818          * Scan the index and gather up all the TIDs into a tuplesort object.
2819          */
2820         ivinfo.index = indexRelation;
2821         ivinfo.analyze_only = false;
2822         ivinfo.estimated_count = true;
2823         ivinfo.message_level = DEBUG2;
2824         ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
2825         ivinfo.strategy = NULL;
2826
2827         /*
2828          * Encode TIDs as int8 values for the sort, rather than directly sorting
2829          * item pointers.  This can be significantly faster, primarily because TID
2830          * is a pass-by-reference type on all platforms, whereas int8 is
2831          * pass-by-value on most platforms.
2832          */
2833         state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
2834                                                                                         InvalidOid, false,
2835                                                                                         maintenance_work_mem,
2836                                                                                         false);
2837         state.htups = state.itups = state.tups_inserted = 0;
2838
2839         (void) index_bulk_delete(&ivinfo, NULL,
2840                                                          validate_index_callback, (void *) &state);
2841
2842         /* Execute the sort */
2843         tuplesort_performsort(state.tuplesort);
2844
2845         /*
2846          * Now scan the heap and "merge" it with the index
2847          */
2848         validate_index_heapscan(heapRelation,
2849                                                         indexRelation,
2850                                                         indexInfo,
2851                                                         snapshot,
2852                                                         &state);
2853
2854         /* Done with tuplesort object */
2855         tuplesort_end(state.tuplesort);
2856
2857         elog(DEBUG2,
2858                  "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
2859                  state.htups, state.itups, state.tups_inserted);
2860
2861         /* Roll back any GUC changes executed by index functions */
2862         AtEOXact_GUC(false, save_nestlevel);
2863
2864         /* Restore userid and security context */
2865         SetUserIdAndSecContext(save_userid, save_sec_context);
2866
2867         /* Close rels, but keep locks */
2868         index_close(indexRelation, NoLock);
2869         heap_close(heapRelation, NoLock);
2870 }
2871
2872 /*
2873  * itemptr_encode - Encode ItemPointer as int64/int8
2874  *
2875  * This representation must produce values encoded as int64 that sort in the
2876  * same order as their corresponding original TID values would (using the
2877  * default int8 opclass to produce a result equivalent to the default TID
2878  * opclass).
2879  *
2880  * As noted in validate_index(), this can be significantly faster.
2881  */
2882 static inline int64
2883 itemptr_encode(ItemPointer itemptr)
2884 {
2885         BlockNumber block = ItemPointerGetBlockNumber(itemptr);
2886         OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
2887         int64           encoded;
2888
2889         /*
2890          * Use the 16 least significant bits for the offset.  32 adjacent bits are
2891          * used for the block number.  Since remaining bits are unused, there
2892          * cannot be negative encoded values (We assume a two's complement
2893          * representation).
2894          */
2895         encoded = ((uint64) block << 16) | (uint16) offset;
2896
2897         return encoded;
2898 }
2899
2900 /*
2901  * itemptr_decode - Decode int64/int8 representation back to ItemPointer
2902  */
2903 static inline void
2904 itemptr_decode(ItemPointer itemptr, int64 encoded)
2905 {
2906         BlockNumber block = (BlockNumber) (encoded >> 16);
2907         OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
2908
2909         ItemPointerSet(itemptr, block, offset);
2910 }
2911
2912 /*
2913  * validate_index_callback - bulkdelete callback to collect the index TIDs
2914  */
2915 static bool
2916 validate_index_callback(ItemPointer itemptr, void *opaque)
2917 {
2918         v_i_state  *state = (v_i_state *) opaque;
2919         int64           encoded = itemptr_encode(itemptr);
2920
2921         tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
2922         state->itups += 1;
2923         return false;                           /* never actually delete anything */
2924 }
2925
2926 /*
2927  * validate_index_heapscan - second table scan for concurrent index build
2928  *
2929  * This has much code in common with IndexBuildHeapScan, but it's enough
2930  * different that it seems cleaner to have two routines not one.
2931  */
2932 static void
2933 validate_index_heapscan(Relation heapRelation,
2934                                                 Relation indexRelation,
2935                                                 IndexInfo *indexInfo,
2936                                                 Snapshot snapshot,
2937                                                 v_i_state *state)
2938 {
2939         HeapScanDesc scan;
2940         HeapTuple       heapTuple;
2941         Datum           values[INDEX_MAX_KEYS];
2942         bool            isnull[INDEX_MAX_KEYS];
2943         List       *predicate;
2944         TupleTableSlot *slot;
2945         EState     *estate;
2946         ExprContext *econtext;
2947         BlockNumber root_blkno = InvalidBlockNumber;
2948         OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2949         bool            in_index[MaxHeapTuplesPerPage];
2950
2951         /* state variables for the merge */
2952         ItemPointer indexcursor = NULL;
2953         ItemPointerData decoded;
2954         bool            tuplesort_empty = false;
2955
2956         /*
2957          * sanity checks
2958          */
2959         Assert(OidIsValid(indexRelation->rd_rel->relam));
2960
2961         /*
2962          * Need an EState for evaluation of index expressions and partial-index
2963          * predicates.  Also a slot to hold the current tuple.
2964          */
2965         estate = CreateExecutorState();
2966         econtext = GetPerTupleExprContext(estate);
2967         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2968
2969         /* Arrange for econtext's scan tuple to be the tuple under test */
2970         econtext->ecxt_scantuple = slot;
2971
2972         /* Set up execution state for predicate, if any. */
2973         predicate = (List *)
2974                 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2975                                                 estate);
2976
2977         /*
2978          * Prepare for scan of the base relation.  We need just those tuples
2979          * satisfying the passed-in reference snapshot.  We must disable syncscan
2980          * here, because it's critical that we read from block zero forward to
2981          * match the sorted TIDs.
2982          */
2983         scan = heap_beginscan_strat(heapRelation,       /* relation */
2984                                                                 snapshot,               /* snapshot */
2985                                                                 0,              /* number of keys */
2986                                                                 NULL,   /* scan key */
2987                                                                 true,   /* buffer access strategy OK */
2988                                                                 false); /* syncscan not OK */
2989
2990         /*
2991          * Scan all tuples matching the snapshot.
2992          */
2993         while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2994         {
2995                 ItemPointer heapcursor = &heapTuple->t_self;
2996                 ItemPointerData rootTuple;
2997                 OffsetNumber root_offnum;
2998
2999                 CHECK_FOR_INTERRUPTS();
3000
3001                 state->htups += 1;
3002
3003                 /*
3004                  * As commented in IndexBuildHeapScan, we should index heap-only
3005                  * tuples under the TIDs of their root tuples; so when we advance onto
3006                  * a new heap page, build a map of root item offsets on the page.
3007                  *
3008                  * This complicates merging against the tuplesort output: we will
3009                  * visit the live tuples in order by their offsets, but the root
3010                  * offsets that we need to compare against the index contents might be
3011                  * ordered differently.  So we might have to "look back" within the
3012                  * tuplesort output, but only within the current page.  We handle that
3013                  * by keeping a bool array in_index[] showing all the
3014                  * already-passed-over tuplesort output TIDs of the current page. We
3015                  * clear that array here, when advancing onto a new heap page.
3016                  */
3017                 if (scan->rs_cblock != root_blkno)
3018                 {
3019                         Page            page = BufferGetPage(scan->rs_cbuf);
3020
3021                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
3022                         heap_get_root_tuples(page, root_offsets);
3023                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
3024
3025                         memset(in_index, 0, sizeof(in_index));
3026
3027                         root_blkno = scan->rs_cblock;
3028                 }
3029
3030                 /* Convert actual tuple TID to root TID */
3031                 rootTuple = *heapcursor;
3032                 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
3033
3034                 if (HeapTupleIsHeapOnly(heapTuple))
3035                 {
3036                         root_offnum = root_offsets[root_offnum - 1];
3037                         if (!OffsetNumberIsValid(root_offnum))
3038                                 elog(ERROR, "failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
3039                                          ItemPointerGetBlockNumber(heapcursor),
3040                                          ItemPointerGetOffsetNumber(heapcursor),
3041                                          RelationGetRelationName(heapRelation));
3042                         ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
3043                 }
3044
3045                 /*
3046                  * "merge" by skipping through the index tuples until we find or pass
3047                  * the current root tuple.
3048                  */
3049                 while (!tuplesort_empty &&
3050                            (!indexcursor ||
3051                                 ItemPointerCompare(indexcursor, &rootTuple) < 0))
3052                 {
3053                         Datum           ts_val;
3054                         bool            ts_isnull;
3055
3056                         if (indexcursor)
3057                         {
3058                                 /*
3059                                  * Remember index items seen earlier on the current heap page
3060                                  */
3061                                 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
3062                                         in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
3063                         }
3064
3065                         tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
3066                                                                                                   &ts_val, &ts_isnull, NULL);
3067                         Assert(tuplesort_empty || !ts_isnull);
3068                         if (!tuplesort_empty)
3069                         {
3070                                 itemptr_decode(&decoded, DatumGetInt64(ts_val));
3071                                 indexcursor = &decoded;
3072
3073                                 /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
3074 #ifndef USE_FLOAT8_BYVAL
3075                                 pfree(DatumGetPointer(ts_val));
3076 #endif
3077                         }
3078                         else
3079                         {
3080                                 /* Be tidy */
3081                                 indexcursor = NULL;
3082                         }
3083                 }
3084
3085                 /*
3086                  * If the tuplesort has overshot *and* we didn't see a match earlier,
3087                  * then this tuple is missing from the index, so insert it.
3088                  */
3089                 if ((tuplesort_empty ||
3090                          ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
3091                         !in_index[root_offnum - 1])
3092                 {
3093                         MemoryContextReset(econtext->ecxt_per_tuple_memory);
3094
3095                         /* Set up for predicate or expression evaluation */
3096                         ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
3097
3098                         /*
3099                          * In a partial index, discard tuples that don't satisfy the
3100                          * predicate.
3101                          */
3102                         if (predicate != NIL)
3103                         {
3104                                 if (!ExecQual(predicate, econtext, false))
3105                                         continue;
3106                         }
3107
3108                         /*
3109                          * For the current heap tuple, extract all the attributes we use
3110                          * in this index, and note which are null.  This also performs
3111                          * evaluation of any expressions needed.
3112                          */
3113                         FormIndexDatum(indexInfo,
3114                                                    slot,
3115                                                    estate,
3116                                                    values,
3117                                                    isnull);
3118
3119                         /*
3120                          * You'd think we should go ahead and build the index tuple here,
3121                          * but some index AMs want to do further processing on the data
3122                          * first. So pass the values[] and isnull[] arrays, instead.
3123                          */
3124
3125                         /*
3126                          * If the tuple is already committed dead, you might think we
3127                          * could suppress uniqueness checking, but this is no longer true
3128                          * in the presence of HOT, because the insert is actually a proxy
3129                          * for a uniqueness check on the whole HOT-chain.  That is, the
3130                          * tuple we have here could be dead because it was already
3131                          * HOT-updated, and if so the updating transaction will not have
3132                          * thought it should insert index entries.  The index AM will
3133                          * check the whole HOT-chain and correctly detect a conflict if
3134                          * there is one.
3135                          */
3136
3137                         index_insert(indexRelation,
3138                                                  values,
3139                                                  isnull,
3140                                                  &rootTuple,
3141                                                  heapRelation,
3142                                                  indexInfo->ii_Unique ?
3143                                                  UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
3144
3145                         state->tups_inserted += 1;
3146                 }
3147         }
3148
3149         heap_endscan(scan);
3150
3151         ExecDropSingleTupleTableSlot(slot);
3152
3153         FreeExecutorState(estate);
3154
3155         /* These may have been pointing to the now-gone estate */
3156         indexInfo->ii_ExpressionsState = NIL;
3157         indexInfo->ii_PredicateState = NIL;
3158 }
3159
3160
3161 /*
3162  * index_set_state_flags - adjust pg_index state flags
3163  *
3164  * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3165  * flags that denote the index's state.  Because the update is not
3166  * transactional and will not roll back on error, this must only be used as
3167  * the last step in a transaction that has not made any transactional catalog
3168  * updates!
3169  *
3170  * Note that heap_inplace_update does send a cache inval message for the
3171  * tuple, so other sessions will hear about the update as soon as we commit.
3172  *
3173  * NB: In releases prior to PostgreSQL 9.4, the use of a non-transactional
3174  * update here would have been unsafe; now that MVCC rules apply even for
3175  * system catalog scans, we could potentially use a transactional update here
3176  * instead.
3177  */
3178 void
3179 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3180 {
3181         Relation        pg_index;
3182         HeapTuple       indexTuple;
3183         Form_pg_index indexForm;
3184
3185         /* Assert that current xact hasn't done any transactional updates */
3186         Assert(GetTopTransactionIdIfAny() == InvalidTransactionId);
3187
3188         /* Open pg_index and fetch a writable copy of the index's tuple */
3189         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3190
3191         indexTuple = SearchSysCacheCopy1(INDEXRELID,
3192                                                                          ObjectIdGetDatum(indexId));
3193         if (!HeapTupleIsValid(indexTuple))
3194                 elog(ERROR, "cache lookup failed for index %u", indexId);
3195         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3196
3197         /* Perform the requested state change on the copy */
3198         switch (action)
3199         {
3200                 case INDEX_CREATE_SET_READY:
3201                         /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3202                         Assert(indexForm->indislive);
3203                         Assert(!indexForm->indisready);
3204                         Assert(!indexForm->indisvalid);
3205                         indexForm->indisready = true;
3206                         break;
3207                 case INDEX_CREATE_SET_VALID:
3208                         /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3209                         Assert(indexForm->indislive);
3210                         Assert(indexForm->indisready);
3211                         Assert(!indexForm->indisvalid);
3212                         indexForm->indisvalid = true;
3213                         break;
3214                 case INDEX_DROP_CLEAR_VALID:
3215
3216                         /*
3217                          * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3218                          *
3219                          * If indisready == true we leave it set so the index still gets
3220                          * maintained by active transactions.  We only need to ensure that
3221                          * indisvalid is false.  (We don't assert that either is initially
3222                          * true, though, since we want to be able to retry a DROP INDEX
3223                          * CONCURRENTLY that failed partway through.)
3224                          *
3225                          * Note: the CLUSTER logic assumes that indisclustered cannot be
3226                          * set on any invalid index, so clear that flag too.
3227                          */
3228                         indexForm->indisvalid = false;
3229                         indexForm->indisclustered = false;
3230                         break;
3231                 case INDEX_DROP_SET_DEAD:
3232
3233                         /*
3234                          * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3235                          *
3236                          * We clear both indisready and indislive, because we not only
3237                          * want to stop updates, we want to prevent sessions from touching
3238                          * the index at all.
3239                          */
3240                         Assert(!indexForm->indisvalid);
3241                         indexForm->indisready = false;
3242                         indexForm->indislive = false;
3243                         break;
3244         }
3245
3246         /* ... and write it back in-place */
3247         heap_inplace_update(pg_index, indexTuple);
3248
3249         heap_close(pg_index, RowExclusiveLock);
3250 }
3251
3252
3253 /*
3254  * IndexGetRelation: given an index's relation OID, get the OID of the
3255  * relation it is an index on.  Uses the system cache.
3256  */
3257 Oid
3258 IndexGetRelation(Oid indexId, bool missing_ok)
3259 {
3260         HeapTuple       tuple;
3261         Form_pg_index index;
3262         Oid                     result;
3263
3264         tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3265         if (!HeapTupleIsValid(tuple))
3266         {
3267                 if (missing_ok)
3268                         return InvalidOid;
3269                 elog(ERROR, "cache lookup failed for index %u", indexId);
3270         }
3271         index = (Form_pg_index) GETSTRUCT(tuple);
3272         Assert(index->indexrelid == indexId);
3273
3274         result = index->indrelid;
3275         ReleaseSysCache(tuple);
3276         return result;
3277 }
3278
3279 /*
3280  * reindex_index - This routine is used to recreate a single index
3281  */
3282 void
3283 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3284                           int options)
3285 {
3286         Relation        iRel,
3287                                 heapRelation;
3288         Oid                     heapId;
3289         IndexInfo  *indexInfo;
3290         volatile bool skipped_constraint = false;
3291         PGRUsage        ru0;
3292
3293         pg_rusage_init(&ru0);
3294
3295         /*
3296          * Open and lock the parent heap relation.  ShareLock is sufficient since
3297          * we only need to be sure no schema or data changes are going on.
3298          */
3299         heapId = IndexGetRelation(indexId, false);
3300         heapRelation = heap_open(heapId, ShareLock);
3301
3302         /*
3303          * Open the target index relation and get an exclusive lock on it, to
3304          * ensure that no one else is touching this particular index.
3305          */
3306         iRel = index_open(indexId, AccessExclusiveLock);
3307
3308         /*
3309          * Don't allow reindex on temp tables of other backends ... their local
3310          * buffer manager is not going to cope.
3311          */
3312         if (RELATION_IS_OTHER_TEMP(iRel))
3313                 ereport(ERROR,
3314                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3315                            errmsg("cannot reindex temporary tables of other sessions")));
3316
3317         /*
3318          * Also check for active uses of the index in the current transaction; we
3319          * don't want to reindex underneath an open indexscan.
3320          */
3321         CheckTableNotInUse(iRel, "REINDEX INDEX");
3322
3323         /*
3324          * All predicate locks on the index are about to be made invalid. Promote
3325          * them to relation locks on the heap.
3326          */
3327         TransferPredicateLocksToHeapRelation(iRel);
3328
3329         PG_TRY();
3330         {
3331                 /* Suppress use of the target index while rebuilding it */
3332                 SetReindexProcessing(heapId, indexId);
3333
3334                 /* Fetch info needed for index_build */
3335                 indexInfo = BuildIndexInfo(iRel);
3336
3337                 /* If requested, skip checking uniqueness/exclusion constraints */
3338                 if (skip_constraint_checks)
3339                 {
3340                         if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3341                                 skipped_constraint = true;
3342                         indexInfo->ii_Unique = false;
3343                         indexInfo->ii_ExclusionOps = NULL;
3344                         indexInfo->ii_ExclusionProcs = NULL;
3345                         indexInfo->ii_ExclusionStrats = NULL;
3346                 }
3347
3348                 /* We'll build a new physical relation for the index */
3349                 RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId,
3350                                                                   InvalidMultiXactId);
3351
3352                 /* Initialize the index and rebuild */
3353                 /* Note: we do not need to re-establish pkey setting */
3354                 index_build(heapRelation, iRel, indexInfo, false, true);
3355         }
3356         PG_CATCH();
3357         {
3358                 /* Make sure flag gets cleared on error exit */
3359                 ResetReindexProcessing();
3360                 PG_RE_THROW();
3361         }
3362         PG_END_TRY();
3363         ResetReindexProcessing();
3364
3365         /*
3366          * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3367          * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3368          * and we didn't skip a uniqueness check, we can now mark it valid.  This
3369          * allows REINDEX to be used to clean up in such cases.
3370          *
3371          * We can also reset indcheckxmin, because we have now done a
3372          * non-concurrent index build, *except* in the case where index_build
3373          * found some still-broken HOT chains. If it did, and we don't have to
3374          * change any of the other flags, we just leave indcheckxmin alone (note
3375          * that index_build won't have changed it, because this is a reindex).
3376          * This is okay and desirable because not updating the tuple leaves the
3377          * index's usability horizon (recorded as the tuple's xmin value) the same
3378          * as it was.
3379          *
3380          * But, if the index was invalid/not-ready/dead and there were broken HOT
3381          * chains, we had better force indcheckxmin true, because the normal
3382          * argument that the HOT chains couldn't conflict with the index is
3383          * suspect for an invalid index.  (A conflict is definitely possible if
3384          * the index was dead.  It probably shouldn't happen otherwise, but let's
3385          * be conservative.)  In this case advancing the usability horizon is
3386          * appropriate.
3387          *
3388          * Another reason for avoiding unnecessary updates here is that while
3389          * reindexing pg_index itself, we must not try to update tuples in it.
3390          * pg_index's indexes should always have these flags in their clean state,
3391          * so that won't happen.
3392          */
3393         if (!skipped_constraint)
3394         {
3395                 Relation        pg_index;
3396                 HeapTuple       indexTuple;
3397                 Form_pg_index indexForm;
3398                 bool            index_bad;
3399
3400                 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3401
3402                 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3403                                                                                  ObjectIdGetDatum(indexId));
3404                 if (!HeapTupleIsValid(indexTuple))
3405                         elog(ERROR, "cache lookup failed for index %u", indexId);
3406                 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3407
3408                 index_bad = (!indexForm->indisvalid ||
3409                                          !indexForm->indisready ||
3410                                          !indexForm->indislive);
3411                 if (index_bad ||
3412                         (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
3413                 {
3414                         if (!indexInfo->ii_BrokenHotChain)
3415                                 indexForm->indcheckxmin = false;
3416                         else if (index_bad)
3417                                 indexForm->indcheckxmin = true;
3418                         indexForm->indisvalid = true;
3419                         indexForm->indisready = true;
3420                         indexForm->indislive = true;
3421                         simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
3422                         CatalogUpdateIndexes(pg_index, indexTuple);
3423
3424                         /*
3425                          * Invalidate the relcache for the table, so that after we commit
3426                          * all sessions will refresh the table's index list.  This ensures
3427                          * that if anyone misses seeing the pg_index row during this
3428                          * update, they'll refresh their list before attempting any update
3429                          * on the table.
3430                          */
3431                         CacheInvalidateRelcache(heapRelation);
3432                 }
3433
3434                 heap_close(pg_index, RowExclusiveLock);
3435         }
3436
3437         /* Log what we did */
3438         if (options & REINDEXOPT_VERBOSE)
3439                 ereport(INFO,
3440                                 (errmsg("index \"%s\" was reindexed",
3441                                                 get_rel_name(indexId)),
3442                                  errdetail("%s.",
3443                                                    pg_rusage_show(&ru0))));
3444
3445         /* Close rels, but keep locks */
3446         index_close(iRel, NoLock);
3447         heap_close(heapRelation, NoLock);
3448 }
3449
3450 /*
3451  * reindex_relation - This routine is used to recreate all indexes
3452  * of a relation (and optionally its toast relation too, if any).
3453  *
3454  * "flags" is a bitmask that can include any combination of these bits:
3455  *
3456  * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3457  *
3458  * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3459  * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3460  * indexes are inconsistent with it.  This makes things tricky if the relation
3461  * is a system catalog that we might consult during the reindexing.  To deal
3462  * with that case, we mark all of the indexes as pending rebuild so that they
3463  * won't be trusted until rebuilt.  The caller is required to call us *without*
3464  * having made the rebuilt table visible by doing CommandCounterIncrement;
3465  * we'll do CCI after having collected the index list.  (This way we can still
3466  * use catalog indexes while collecting the list.)
3467  *
3468  * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3469  * constraint conditions, else don't.  To avoid deadlocks, VACUUM FULL or
3470  * CLUSTER on a system catalog must omit this flag.  REINDEX should be used to
3471  * rebuild an index if constraint inconsistency is suspected.  For optimal
3472  * performance, other callers should include the flag only after transforming
3473  * the data in a manner that risks a change in constraint validity.
3474  *
3475  * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3476  * rebuilt indexes to unlogged.
3477  *
3478  * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3479  * rebuilt indexes to permanent.
3480  *
3481  * Returns true if any indexes were rebuilt (including toast table's index
3482  * when relevant).  Note that a CommandCounterIncrement will occur after each
3483  * index rebuild.
3484  */
3485 bool
3486 reindex_relation(Oid relid, int flags, int options)
3487 {
3488         Relation        rel;
3489         Oid                     toast_relid;
3490         List       *indexIds;
3491         bool            is_pg_class;
3492         bool            result;
3493
3494         /*
3495          * Open and lock the relation.  ShareLock is sufficient since we only need
3496          * to prevent schema and data changes in it.  The lock level used here
3497          * should match ReindexTable().
3498          */
3499         rel = heap_open(relid, ShareLock);
3500
3501         toast_relid = rel->rd_rel->reltoastrelid;
3502
3503         /*
3504          * Get the list of index OIDs for this relation.  (We trust to the
3505          * relcache to get this with a sequential scan if ignoring system
3506          * indexes.)
3507          */
3508         indexIds = RelationGetIndexList(rel);
3509
3510         /*
3511          * reindex_index will attempt to update the pg_class rows for the relation
3512          * and index.  If we are processing pg_class itself, we want to make sure
3513          * that the updates do not try to insert index entries into indexes we
3514          * have not processed yet.  (When we are trying to recover from corrupted
3515          * indexes, that could easily cause a crash.) We can accomplish this
3516          * because CatalogUpdateIndexes will use the relcache's index list to know
3517          * which indexes to update. We just force the index list to be only the
3518          * stuff we've processed.
3519          *
3520          * It is okay to not insert entries into the indexes we have not processed
3521          * yet because all of this is transaction-safe.  If we fail partway
3522          * through, the updated rows are dead and it doesn't matter whether they
3523          * have index entries.  Also, a new pg_class index will be created with a
3524          * correct entry for its own pg_class row because we do
3525          * RelationSetNewRelfilenode() before we do index_build().
3526          *
3527          * Note that we also clear pg_class's rd_oidindex until the loop is done,
3528          * so that that index can't be accessed either.  This means we cannot
3529          * safely generate new relation OIDs while in the loop; shouldn't be a
3530          * problem.
3531          */
3532         is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
3533
3534         /* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
3535         if (is_pg_class)
3536                 (void) RelationGetIndexAttrBitmap(rel, INDEX_ATTR_BITMAP_ALL);
3537
3538         PG_TRY();
3539         {
3540                 List       *doneIndexes;
3541                 ListCell   *indexId;
3542                 char            persistence;
3543
3544                 if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3545                 {
3546                         /* Suppress use of all the indexes until they are rebuilt */
3547                         SetReindexPending(indexIds);
3548
3549                         /*
3550                          * Make the new heap contents visible --- now things might be
3551                          * inconsistent!
3552                          */
3553                         CommandCounterIncrement();
3554                 }
3555
3556                 /*
3557                  * Compute persistence of indexes: same as that of owning rel, unless
3558                  * caller specified otherwise.
3559                  */
3560                 if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
3561                         persistence = RELPERSISTENCE_UNLOGGED;
3562                 else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
3563                         persistence = RELPERSISTENCE_PERMANENT;
3564                 else
3565                         persistence = rel->rd_rel->relpersistence;
3566
3567                 /* Reindex all the indexes. */
3568                 doneIndexes = NIL;
3569                 foreach(indexId, indexIds)
3570                 {
3571                         Oid                     indexOid = lfirst_oid(indexId);
3572
3573                         if (is_pg_class)
3574                                 RelationSetIndexList(rel, doneIndexes, InvalidOid);
3575
3576                         reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
3577                                                   persistence, options);
3578
3579                         CommandCounterIncrement();
3580
3581                         /* Index should no longer be in the pending list */
3582                         Assert(!ReindexIsProcessingIndex(indexOid));
3583
3584                         if (is_pg_class)
3585                                 doneIndexes = lappend_oid(doneIndexes, indexOid);
3586                 }
3587         }
3588         PG_CATCH();
3589         {
3590                 /* Make sure list gets cleared on error exit */
3591                 ResetReindexPending();
3592                 PG_RE_THROW();
3593         }
3594         PG_END_TRY();
3595         ResetReindexPending();
3596
3597         if (is_pg_class)
3598                 RelationSetIndexList(rel, indexIds, ClassOidIndexId);
3599
3600         /*
3601          * Close rel, but continue to hold the lock.
3602          */
3603         heap_close(rel, NoLock);
3604
3605         result = (indexIds != NIL);
3606
3607         /*
3608          * If the relation has a secondary toast rel, reindex that too while we
3609          * still hold the lock on the master table.
3610          */
3611         if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
3612                 result |= reindex_relation(toast_relid, flags, options);
3613
3614         return result;
3615 }
3616
3617
3618 /* ----------------------------------------------------------------
3619  *              System index reindexing support
3620  *
3621  * When we are busy reindexing a system index, this code provides support
3622  * for preventing catalog lookups from using that index.  We also make use
3623  * of this to catch attempted uses of user indexes during reindexing of
3624  * those indexes.
3625  * ----------------------------------------------------------------
3626  */
3627
3628 static Oid      currentlyReindexedHeap = InvalidOid;
3629 static Oid      currentlyReindexedIndex = InvalidOid;
3630 static List *pendingReindexedIndexes = NIL;
3631
3632 /*
3633  * ReindexIsProcessingHeap
3634  *              True if heap specified by OID is currently being reindexed.
3635  */
3636 bool
3637 ReindexIsProcessingHeap(Oid heapOid)
3638 {
3639         return heapOid == currentlyReindexedHeap;
3640 }
3641
3642 /*
3643  * ReindexIsCurrentlyProcessingIndex
3644  *              True if index specified by OID is currently being reindexed.
3645  */
3646 static bool
3647 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
3648 {
3649         return indexOid == currentlyReindexedIndex;
3650 }
3651
3652 /*
3653  * ReindexIsProcessingIndex
3654  *              True if index specified by OID is currently being reindexed,
3655  *              or should be treated as invalid because it is awaiting reindex.
3656  */
3657 bool
3658 ReindexIsProcessingIndex(Oid indexOid)
3659 {
3660         return indexOid == currentlyReindexedIndex ||
3661                 list_member_oid(pendingReindexedIndexes, indexOid);
3662 }
3663
3664 /*
3665  * SetReindexProcessing
3666  *              Set flag that specified heap/index are being reindexed.
3667  *
3668  * NB: caller must use a PG_TRY block to ensure ResetReindexProcessing is done.
3669  */
3670 static void
3671 SetReindexProcessing(Oid heapOid, Oid indexOid)
3672 {
3673         Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
3674         /* Reindexing is not re-entrant. */
3675         if (OidIsValid(currentlyReindexedHeap))
3676                 elog(ERROR, "cannot reindex while reindexing");
3677         currentlyReindexedHeap = heapOid;
3678         currentlyReindexedIndex = indexOid;
3679         /* Index is no longer "pending" reindex. */
3680         RemoveReindexPending(indexOid);
3681 }
3682
3683 /*
3684  * ResetReindexProcessing
3685  *              Unset reindexing status.
3686  */
3687 static void
3688 ResetReindexProcessing(void)
3689 {
3690         currentlyReindexedHeap = InvalidOid;
3691         currentlyReindexedIndex = InvalidOid;
3692 }
3693
3694 /*
3695  * SetReindexPending
3696  *              Mark the given indexes as pending reindex.
3697  *
3698  * NB: caller must use a PG_TRY block to ensure ResetReindexPending is done.
3699  * Also, we assume that the current memory context stays valid throughout.
3700  */
3701 static void
3702 SetReindexPending(List *indexes)
3703 {
3704         /* Reindexing is not re-entrant. */
3705         if (pendingReindexedIndexes)
3706                 elog(ERROR, "cannot reindex while reindexing");
3707         pendingReindexedIndexes = list_copy(indexes);
3708 }
3709
3710 /*
3711  * RemoveReindexPending
3712  *              Remove the given index from the pending list.
3713  */
3714 static void
3715 RemoveReindexPending(Oid indexOid)
3716 {
3717         pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
3718                                                                                           indexOid);
3719 }
3720
3721 /*
3722  * ResetReindexPending
3723  *              Unset reindex-pending status.
3724  */
3725 static void
3726 ResetReindexPending(void)
3727 {
3728         pendingReindexedIndexes = NIL;
3729 }