]> granicus.if.org Git - postgresql/blob - src/backend/catalog/index.c
Fix recently-understood problems with handling of XID freezing, particularly
[postgresql] / src / backend / catalog / index.c
1 /*-------------------------------------------------------------------------
2  *
3  * index.c
4  *        code to create and destroy POSTGRES index relations
5  *
6  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.274 2006/10/04 00:29:50 momjian Exp $
12  *
13  *
14  * INTERFACE ROUTINES
15  *              index_create()                  - Create a cataloged index relation
16  *              index_drop()                    - Removes index relation from catalogs
17  *              BuildIndexInfo()                - Prepare to insert index tuples
18  *              FormIndexDatum()                - Construct datum vector for one index tuple
19  *
20  *-------------------------------------------------------------------------
21  */
22 #include "postgres.h"
23
24 #include <unistd.h>
25
26 #include "access/genam.h"
27 #include "access/heapam.h"
28 #include "access/transam.h"
29 #include "access/xact.h"
30 #include "bootstrap/bootstrap.h"
31 #include "catalog/catalog.h"
32 #include "catalog/dependency.h"
33 #include "catalog/heap.h"
34 #include "catalog/index.h"
35 #include "catalog/indexing.h"
36 #include "catalog/pg_constraint.h"
37 #include "catalog/pg_operator.h"
38 #include "catalog/pg_opclass.h"
39 #include "catalog/pg_type.h"
40 #include "executor/executor.h"
41 #include "miscadmin.h"
42 #include "optimizer/clauses.h"
43 #include "parser/parse_expr.h"
44 #include "storage/procarray.h"
45 #include "storage/smgr.h"
46 #include "utils/builtins.h"
47 #include "utils/fmgroids.h"
48 #include "utils/inval.h"
49 #include "utils/lsyscache.h"
50 #include "utils/memutils.h"
51 #include "utils/relcache.h"
52 #include "utils/syscache.h"
53 #include "utils/tuplesort.h"
54
55
56 /* state info for validate_index bulkdelete callback */
57 typedef struct
58 {
59         Tuplesortstate *tuplesort;      /* for sorting the index TIDs */
60         /* statistics (for debug purposes only): */
61         double          htups,
62                                 itups,
63                                 tups_inserted;
64 } v_i_state;
65
66 /* non-export function prototypes */
67 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
68                                                  IndexInfo *indexInfo,
69                                                  Oid *classObjectId);
70 static void InitializeAttributeOids(Relation indexRelation,
71                                                 int numatts, Oid indexoid);
72 static void AppendAttributeTuples(Relation indexRelation, int numatts);
73 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
74                                         IndexInfo *indexInfo,
75                                         Oid *classOids,
76                                         bool primary,
77                                         bool isvalid);
78 static void index_update_stats(Relation rel, bool hasindex, bool isprimary,
79                                    Oid reltoastidxid, double reltuples);
80 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
81 static void validate_index_heapscan(Relation heapRelation,
82                                                 Relation indexRelation,
83                                                 IndexInfo *indexInfo,
84                                                 Snapshot snapshot,
85                                                 v_i_state *state);
86 static Oid      IndexGetRelation(Oid indexId);
87
88
89 /*
90  *              ConstructTupleDescriptor
91  *
92  * Build an index tuple descriptor for a new index
93  */
94 static TupleDesc
95 ConstructTupleDescriptor(Relation heapRelation,
96                                                  IndexInfo *indexInfo,
97                                                  Oid *classObjectId)
98 {
99         int                     numatts = indexInfo->ii_NumIndexAttrs;
100         ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
101         TupleDesc       heapTupDesc;
102         TupleDesc       indexTupDesc;
103         int                     natts;                  /* #atts in heap rel --- for error checks */
104         int                     i;
105
106         heapTupDesc = RelationGetDescr(heapRelation);
107         natts = RelationGetForm(heapRelation)->relnatts;
108
109         /*
110          * allocate the new tuple descriptor
111          */
112         indexTupDesc = CreateTemplateTupleDesc(numatts, false);
113
114         /*
115          * For simple index columns, we copy the pg_attribute row from the parent
116          * relation and modify it as necessary.  For expressions we have to cons
117          * up a pg_attribute row the hard way.
118          */
119         for (i = 0; i < numatts; i++)
120         {
121                 AttrNumber      atnum = indexInfo->ii_KeyAttrNumbers[i];
122                 Form_pg_attribute to = indexTupDesc->attrs[i];
123                 HeapTuple       tuple;
124                 Form_pg_type typeTup;
125                 Oid                     keyType;
126
127                 if (atnum != 0)
128                 {
129                         /* Simple index column */
130                         Form_pg_attribute from;
131
132                         if (atnum < 0)
133                         {
134                                 /*
135                                  * here we are indexing on a system attribute (-1...-n)
136                                  */
137                                 from = SystemAttributeDefinition(atnum,
138                                                                                    heapRelation->rd_rel->relhasoids);
139                         }
140                         else
141                         {
142                                 /*
143                                  * here we are indexing on a normal attribute (1...n)
144                                  */
145                                 if (atnum > natts)              /* safety check */
146                                         elog(ERROR, "invalid column number %d", atnum);
147                                 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
148                         }
149
150                         /*
151                          * now that we've determined the "from", let's copy the tuple desc
152                          * data...
153                          */
154                         memcpy(to, from, ATTRIBUTE_TUPLE_SIZE);
155
156                         /*
157                          * Fix the stuff that should not be the same as the underlying
158                          * attr
159                          */
160                         to->attnum = i + 1;
161
162                         to->attstattarget = -1;
163                         to->attcacheoff = -1;
164                         to->attnotnull = false;
165                         to->atthasdef = false;
166                         to->attislocal = true;
167                         to->attinhcount = 0;
168                 }
169                 else
170                 {
171                         /* Expressional index */
172                         Node       *indexkey;
173
174                         MemSet(to, 0, ATTRIBUTE_TUPLE_SIZE);
175
176                         if (indexpr_item == NULL)       /* shouldn't happen */
177                                 elog(ERROR, "too few entries in indexprs list");
178                         indexkey = (Node *) lfirst(indexpr_item);
179                         indexpr_item = lnext(indexpr_item);
180
181                         /*
182                          * Make the attribute's name "pg_expresssion_nnn" (maybe think of
183                          * something better later)
184                          */
185                         sprintf(NameStr(to->attname), "pg_expression_%d", i + 1);
186
187                         /*
188                          * Lookup the expression type in pg_type for the type length etc.
189                          */
190                         keyType = exprType(indexkey);
191                         tuple = SearchSysCache(TYPEOID,
192                                                                    ObjectIdGetDatum(keyType),
193                                                                    0, 0, 0);
194                         if (!HeapTupleIsValid(tuple))
195                                 elog(ERROR, "cache lookup failed for type %u", keyType);
196                         typeTup = (Form_pg_type) GETSTRUCT(tuple);
197
198                         /*
199                          * Assign some of the attributes values. Leave the rest as 0.
200                          */
201                         to->attnum = i + 1;
202                         to->atttypid = keyType;
203                         to->attlen = typeTup->typlen;
204                         to->attbyval = typeTup->typbyval;
205                         to->attstorage = typeTup->typstorage;
206                         to->attalign = typeTup->typalign;
207                         to->attstattarget = -1;
208                         to->attcacheoff = -1;
209                         to->atttypmod = -1;
210                         to->attislocal = true;
211
212                         ReleaseSysCache(tuple);
213                 }
214
215                 /*
216                  * We do not yet have the correct relation OID for the index, so just
217                  * set it invalid for now.      InitializeAttributeOids() will fix it
218                  * later.
219                  */
220                 to->attrelid = InvalidOid;
221
222                 /*
223                  * Check the opclass to see if it provides a keytype (overriding the
224                  * attribute type).
225                  */
226                 tuple = SearchSysCache(CLAOID,
227                                                            ObjectIdGetDatum(classObjectId[i]),
228                                                            0, 0, 0);
229                 if (!HeapTupleIsValid(tuple))
230                         elog(ERROR, "cache lookup failed for opclass %u",
231                                  classObjectId[i]);
232                 keyType = ((Form_pg_opclass) GETSTRUCT(tuple))->opckeytype;
233                 ReleaseSysCache(tuple);
234
235                 if (OidIsValid(keyType) && keyType != to->atttypid)
236                 {
237                         /* index value and heap value have different types */
238                         tuple = SearchSysCache(TYPEOID,
239                                                                    ObjectIdGetDatum(keyType),
240                                                                    0, 0, 0);
241                         if (!HeapTupleIsValid(tuple))
242                                 elog(ERROR, "cache lookup failed for type %u", keyType);
243                         typeTup = (Form_pg_type) GETSTRUCT(tuple);
244
245                         to->atttypid = keyType;
246                         to->atttypmod = -1;
247                         to->attlen = typeTup->typlen;
248                         to->attbyval = typeTup->typbyval;
249                         to->attalign = typeTup->typalign;
250                         to->attstorage = typeTup->typstorage;
251
252                         ReleaseSysCache(tuple);
253                 }
254         }
255
256         return indexTupDesc;
257 }
258
259 /* ----------------------------------------------------------------
260  *              InitializeAttributeOids
261  * ----------------------------------------------------------------
262  */
263 static void
264 InitializeAttributeOids(Relation indexRelation,
265                                                 int numatts,
266                                                 Oid indexoid)
267 {
268         TupleDesc       tupleDescriptor;
269         int                     i;
270
271         tupleDescriptor = RelationGetDescr(indexRelation);
272
273         for (i = 0; i < numatts; i += 1)
274                 tupleDescriptor->attrs[i]->attrelid = indexoid;
275 }
276
277 /* ----------------------------------------------------------------
278  *              AppendAttributeTuples
279  * ----------------------------------------------------------------
280  */
281 static void
282 AppendAttributeTuples(Relation indexRelation, int numatts)
283 {
284         Relation        pg_attribute;
285         CatalogIndexState indstate;
286         TupleDesc       indexTupDesc;
287         HeapTuple       new_tuple;
288         int                     i;
289
290         /*
291          * open the attribute relation and its indexes
292          */
293         pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
294
295         indstate = CatalogOpenIndexes(pg_attribute);
296
297         /*
298          * insert data from new index's tupdesc into pg_attribute
299          */
300         indexTupDesc = RelationGetDescr(indexRelation);
301
302         for (i = 0; i < numatts; i++)
303         {
304                 /*
305                  * There used to be very grotty code here to set these fields, but I
306                  * think it's unnecessary.  They should be set already.
307                  */
308                 Assert(indexTupDesc->attrs[i]->attnum == i + 1);
309                 Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
310
311                 new_tuple = heap_addheader(Natts_pg_attribute,
312                                                                    false,
313                                                                    ATTRIBUTE_TUPLE_SIZE,
314                                                                    (void *) indexTupDesc->attrs[i]);
315
316                 simple_heap_insert(pg_attribute, new_tuple);
317
318                 CatalogIndexInsert(indstate, new_tuple);
319
320                 heap_freetuple(new_tuple);
321         }
322
323         CatalogCloseIndexes(indstate);
324
325         heap_close(pg_attribute, RowExclusiveLock);
326 }
327
328 /* ----------------------------------------------------------------
329  *              UpdateIndexRelation
330  *
331  * Construct and insert a new entry in the pg_index catalog
332  * ----------------------------------------------------------------
333  */
334 static void
335 UpdateIndexRelation(Oid indexoid,
336                                         Oid heapoid,
337                                         IndexInfo *indexInfo,
338                                         Oid *classOids,
339                                         bool primary,
340                                         bool isvalid)
341 {
342         int2vector *indkey;
343         oidvector  *indclass;
344         Datum           exprsDatum;
345         Datum           predDatum;
346         Datum           values[Natts_pg_index];
347         char            nulls[Natts_pg_index];
348         Relation        pg_index;
349         HeapTuple       tuple;
350         int                     i;
351
352         /*
353          * Copy the index key and opclass info into arrays (should we make the
354          * caller pass them like this to start with?)
355          */
356         indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
357         indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
358         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
359                 indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
360
361         /*
362          * Convert the index expressions (if any) to a text datum
363          */
364         if (indexInfo->ii_Expressions != NIL)
365         {
366                 char       *exprsString;
367
368                 exprsString = nodeToString(indexInfo->ii_Expressions);
369                 exprsDatum = DirectFunctionCall1(textin,
370                                                                                  CStringGetDatum(exprsString));
371                 pfree(exprsString);
372         }
373         else
374                 exprsDatum = (Datum) 0;
375
376         /*
377          * Convert the index predicate (if any) to a text datum.  Note we convert
378          * implicit-AND format to normal explicit-AND for storage.
379          */
380         if (indexInfo->ii_Predicate != NIL)
381         {
382                 char       *predString;
383
384                 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
385                 predDatum = DirectFunctionCall1(textin,
386                                                                                 CStringGetDatum(predString));
387                 pfree(predString);
388         }
389         else
390                 predDatum = (Datum) 0;
391
392         /*
393          * open the system catalog index relation
394          */
395         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
396
397         /*
398          * Build a pg_index tuple
399          */
400         MemSet(nulls, ' ', sizeof(nulls));
401
402         values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
403         values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
404         values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
405         values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
406         values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
407         values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
408         values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
409         values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
410         values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
411         values[Anum_pg_index_indexprs - 1] = exprsDatum;
412         if (exprsDatum == (Datum) 0)
413                 nulls[Anum_pg_index_indexprs - 1] = 'n';
414         values[Anum_pg_index_indpred - 1] = predDatum;
415         if (predDatum == (Datum) 0)
416                 nulls[Anum_pg_index_indpred - 1] = 'n';
417
418         tuple = heap_formtuple(RelationGetDescr(pg_index), values, nulls);
419
420         /*
421          * insert the tuple into the pg_index catalog
422          */
423         simple_heap_insert(pg_index, tuple);
424
425         /* update the indexes on pg_index */
426         CatalogUpdateIndexes(pg_index, tuple);
427
428         /*
429          * close the relation and free the tuple
430          */
431         heap_close(pg_index, RowExclusiveLock);
432         heap_freetuple(tuple);
433 }
434
435
436 /*
437  * index_create
438  *
439  * heapRelationId: OID of table to build index on
440  * indexRelationName: what it say
441  * indexRelationId: normally, pass InvalidOid to let this routine
442  *              generate an OID for the index.  During bootstrap this may be
443  *              nonzero to specify a preselected OID.
444  * indexInfo: same info executor uses to insert into the index
445  * accessMethodObjectId: OID of index AM to use
446  * tableSpaceId: OID of tablespace to use
447  * classObjectId: array of index opclass OIDs, one per index column
448  * reloptions: AM-specific options
449  * isprimary: index is a PRIMARY KEY
450  * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
451  * allow_system_table_mods: allow table to be a system catalog
452  * skip_build: true to skip the index_build() step for the moment; caller
453  *              must do it later (typically via reindex_index())
454  * concurrent: if true, do not lock the table against writers.  The index
455  *              will be marked "invalid" and the caller must take additional steps
456  *              to fix it up.
457  *
458  * Returns OID of the created index.
459  */
460 Oid
461 index_create(Oid heapRelationId,
462                          const char *indexRelationName,
463                          Oid indexRelationId,
464                          IndexInfo *indexInfo,
465                          Oid accessMethodObjectId,
466                          Oid tableSpaceId,
467                          Oid *classObjectId,
468                          Datum reloptions,
469                          bool isprimary,
470                          bool isconstraint,
471                          bool allow_system_table_mods,
472                          bool skip_build,
473                          bool concurrent)
474 {
475         Relation        pg_class;
476         Relation        heapRelation;
477         Relation        indexRelation;
478         TupleDesc       indexTupDesc;
479         bool            shared_relation;
480         Oid                     namespaceId;
481         int                     i;
482
483         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
484
485         /*
486          * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
487          * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
488          * (but not VACUUM).
489          */
490         heapRelation = heap_open(heapRelationId,
491                                                 (concurrent ? ShareUpdateExclusiveLock : ShareLock));
492
493         /*
494          * The index will be in the same namespace as its parent table, and is
495          * shared across databases if and only if the parent is.
496          */
497         namespaceId = RelationGetNamespace(heapRelation);
498         shared_relation = heapRelation->rd_rel->relisshared;
499
500         /*
501          * check parameters
502          */
503         if (indexInfo->ii_NumIndexAttrs < 1)
504                 elog(ERROR, "must index at least one column");
505
506         if (!allow_system_table_mods &&
507                 IsSystemRelation(heapRelation) &&
508                 IsNormalProcessingMode())
509                 ereport(ERROR,
510                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
511                                  errmsg("user-defined indexes on system catalog tables are not supported")));
512
513         /*
514          * concurrent index build on a system catalog is unsafe because we tend to
515          * release locks before committing in catalogs
516          */
517         if (concurrent &&
518                 IsSystemRelation(heapRelation))
519                 ereport(ERROR,
520                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
521                                  errmsg("concurrent index creation on system catalog tables is not supported")));
522
523         /*
524          * We cannot allow indexing a shared relation after initdb (because
525          * there's no way to make the entry in other databases' pg_class).
526          */
527         if (shared_relation && !IsBootstrapProcessingMode())
528                 ereport(ERROR,
529                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
530                                  errmsg("shared indexes cannot be created after initdb")));
531
532         if (get_relname_relid(indexRelationName, namespaceId))
533                 ereport(ERROR,
534                                 (errcode(ERRCODE_DUPLICATE_TABLE),
535                                  errmsg("relation \"%s\" already exists",
536                                                 indexRelationName)));
537
538         /*
539          * construct tuple descriptor for index tuples
540          */
541         indexTupDesc = ConstructTupleDescriptor(heapRelation,
542                                                                                         indexInfo,
543                                                                                         classObjectId);
544
545         /*
546          * Allocate an OID for the index, unless we were told what to use.
547          *
548          * The OID will be the relfilenode as well, so make sure it doesn't
549          * collide with either pg_class OIDs or existing physical files.
550          */
551         if (!OidIsValid(indexRelationId))
552                 indexRelationId = GetNewRelFileNode(tableSpaceId, shared_relation,
553                                                                                         pg_class);
554
555         /*
556          * create the index relation's relcache entry and physical disk file. (If
557          * we fail further down, it's the smgr's responsibility to remove the disk
558          * file again.)
559          */
560         indexRelation = heap_create(indexRelationName,
561                                                                 namespaceId,
562                                                                 tableSpaceId,
563                                                                 indexRelationId,
564                                                                 indexTupDesc,
565                                                                 RELKIND_INDEX,
566                                                                 shared_relation,
567                                                                 allow_system_table_mods);
568
569         Assert(indexRelationId == RelationGetRelid(indexRelation));
570
571         /*
572          * Obtain exclusive lock on it.  Although no other backends can see it
573          * until we commit, this prevents deadlock-risk complaints from lock
574          * manager in cases such as CLUSTER.
575          */
576         LockRelation(indexRelation, AccessExclusiveLock);
577
578         /*
579          * Fill in fields of the index's pg_class entry that are not set correctly
580          * by heap_create.
581          *
582          * XXX should have a cleaner way to create cataloged indexes
583          */
584         indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
585         indexRelation->rd_rel->relam = accessMethodObjectId;
586         indexRelation->rd_rel->relkind = RELKIND_INDEX;
587         indexRelation->rd_rel->relhasoids = false;
588
589         /*
590          * store index's pg_class entry
591          */
592         InsertPgClassTuple(pg_class, indexRelation,
593                                            RelationGetRelid(indexRelation),
594                                            reloptions);
595
596         /* done with pg_class */
597         heap_close(pg_class, RowExclusiveLock);
598
599         /*
600          * now update the object id's of all the attribute tuple forms in the
601          * index relation's tuple descriptor
602          */
603         InitializeAttributeOids(indexRelation,
604                                                         indexInfo->ii_NumIndexAttrs,
605                                                         indexRelationId);
606
607         /*
608          * append ATTRIBUTE tuples for the index
609          */
610         AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
611
612         /* ----------------
613          *        update pg_index
614          *        (append INDEX tuple)
615          *
616          *        Note that this stows away a representation of "predicate".
617          *        (Or, could define a rule to maintain the predicate) --Nels, Feb '92
618          * ----------------
619          */
620         UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
621                                                 classObjectId, isprimary, !concurrent);
622
623         /*
624          * Register constraint and dependencies for the index.
625          *
626          * If the index is from a CONSTRAINT clause, construct a pg_constraint
627          * entry. The index is then linked to the constraint, which in turn is
628          * linked to the table.  If it's not a CONSTRAINT, make the dependency
629          * directly on the table.
630          *
631          * We don't need a dependency on the namespace, because there'll be an
632          * indirect dependency via our parent table.
633          *
634          * During bootstrap we can't register any dependencies, and we don't try
635          * to make a constraint either.
636          */
637         if (!IsBootstrapProcessingMode())
638         {
639                 ObjectAddress myself,
640                                         referenced;
641
642                 myself.classId = RelationRelationId;
643                 myself.objectId = indexRelationId;
644                 myself.objectSubId = 0;
645
646                 if (isconstraint)
647                 {
648                         char            constraintType;
649                         Oid                     conOid;
650
651                         if (isprimary)
652                                 constraintType = CONSTRAINT_PRIMARY;
653                         else if (indexInfo->ii_Unique)
654                                 constraintType = CONSTRAINT_UNIQUE;
655                         else
656                         {
657                                 elog(ERROR, "constraint must be PRIMARY or UNIQUE");
658                                 constraintType = 0;             /* keep compiler quiet */
659                         }
660
661                         /* Shouldn't have any expressions */
662                         if (indexInfo->ii_Expressions)
663                                 elog(ERROR, "constraints can't have index expressions");
664
665                         conOid = CreateConstraintEntry(indexRelationName,
666                                                                                    namespaceId,
667                                                                                    constraintType,
668                                                                                    false,               /* isDeferrable */
669                                                                                    false,               /* isDeferred */
670                                                                                    heapRelationId,
671                                                                                    indexInfo->ii_KeyAttrNumbers,
672                                                                                    indexInfo->ii_NumIndexAttrs,
673                                                                                    InvalidOid,  /* no domain */
674                                                                                    InvalidOid,  /* no foreign key */
675                                                                                    NULL,
676                                                                                    0,
677                                                                                    ' ',
678                                                                                    ' ',
679                                                                                    ' ',
680                                                                                    InvalidOid,  /* no associated index */
681                                                                                    NULL,                /* no check constraint */
682                                                                                    NULL,
683                                                                                    NULL);
684
685                         referenced.classId = ConstraintRelationId;
686                         referenced.objectId = conOid;
687                         referenced.objectSubId = 0;
688
689                         recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
690                 }
691                 else
692                 {
693                         /* Create auto dependencies on simply-referenced columns */
694                         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
695                         {
696                                 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
697                                 {
698                                         referenced.classId = RelationRelationId;
699                                         referenced.objectId = heapRelationId;
700                                         referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
701
702                                         recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
703                                 }
704                         }
705                 }
706
707                 /* Store dependency on operator classes */
708                 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
709                 {
710                         referenced.classId = OperatorClassRelationId;
711                         referenced.objectId = classObjectId[i];
712                         referenced.objectSubId = 0;
713
714                         recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
715                 }
716
717                 /* Store dependencies on anything mentioned in index expressions */
718                 if (indexInfo->ii_Expressions)
719                 {
720                         recordDependencyOnSingleRelExpr(&myself,
721                                                                                   (Node *) indexInfo->ii_Expressions,
722                                                                                         heapRelationId,
723                                                                                         DEPENDENCY_NORMAL,
724                                                                                         DEPENDENCY_AUTO);
725                 }
726
727                 /* Store dependencies on anything mentioned in predicate */
728                 if (indexInfo->ii_Predicate)
729                 {
730                         recordDependencyOnSingleRelExpr(&myself,
731                                                                                         (Node *) indexInfo->ii_Predicate,
732                                                                                         heapRelationId,
733                                                                                         DEPENDENCY_NORMAL,
734                                                                                         DEPENDENCY_AUTO);
735                 }
736         }
737
738         /*
739          * Advance the command counter so that we can see the newly-entered
740          * catalog tuples for the index.
741          */
742         CommandCounterIncrement();
743
744         /*
745          * In bootstrap mode, we have to fill in the index strategy structure with
746          * information from the catalogs.  If we aren't bootstrapping, then the
747          * relcache entry has already been rebuilt thanks to sinval update during
748          * CommandCounterIncrement.
749          */
750         if (IsBootstrapProcessingMode())
751                 RelationInitIndexAccessInfo(indexRelation);
752         else
753                 Assert(indexRelation->rd_indexcxt != NULL);
754
755         /*
756          * If this is bootstrap (initdb) time, then we don't actually fill in the
757          * index yet.  We'll be creating more indexes and classes later, so we
758          * delay filling them in until just before we're done with bootstrapping.
759          * Similarly, if the caller specified skip_build then filling the index is
760          * delayed till later (ALTER TABLE can save work in some cases with this).
761          * Otherwise, we call the AM routine that constructs the index.
762          */
763         if (IsBootstrapProcessingMode())
764         {
765                 index_register(heapRelationId, indexRelationId, indexInfo);
766         }
767         else if (skip_build)
768         {
769                 /*
770                  * Caller is responsible for filling the index later on.  However,
771                  * we'd better make sure that the heap relation is correctly marked as
772                  * having an index.
773                  */
774                 index_update_stats(heapRelation,
775                                                    true,
776                                                    isprimary,
777                                                    InvalidOid,
778                                                    heapRelation->rd_rel->reltuples);
779                 /* Make the above update visible */
780                 CommandCounterIncrement();
781         }
782         else
783         {
784                 index_build(heapRelation, indexRelation, indexInfo, isprimary);
785         }
786
787         /*
788          * Close the heap and index; but we keep the locks that we acquired above
789          * until end of transaction.
790          */
791         index_close(indexRelation, NoLock);
792         heap_close(heapRelation, NoLock);
793
794         return indexRelationId;
795 }
796
797 /*
798  *              index_drop
799  *
800  * NOTE: this routine should now only be called through performDeletion(),
801  * else associated dependencies won't be cleaned up.
802  */
803 void
804 index_drop(Oid indexId)
805 {
806         Oid                     heapId;
807         Relation        userHeapRelation;
808         Relation        userIndexRelation;
809         Relation        indexRelation;
810         HeapTuple       tuple;
811         bool            hasexprs;
812
813         /*
814          * To drop an index safely, we must grab exclusive lock on its parent
815          * table; otherwise there could be other backends using the index!
816          * Exclusive lock on the index alone is insufficient because another
817          * backend might be in the midst of devising a query plan that will use
818          * the index.  The parser and planner take care to hold an appropriate
819          * lock on the parent table while working, but having them hold locks on
820          * all the indexes too seems overly expensive.  We do grab exclusive lock
821          * on the index too, just to be safe. Both locks must be held till end of
822          * transaction, else other backends will still see this index in pg_index.
823          */
824         heapId = IndexGetRelation(indexId);
825         userHeapRelation = heap_open(heapId, AccessExclusiveLock);
826
827         userIndexRelation = index_open(indexId, AccessExclusiveLock);
828
829         /*
830          * Schedule physical removal of the file
831          */
832         RelationOpenSmgr(userIndexRelation);
833         smgrscheduleunlink(userIndexRelation->rd_smgr,
834                                            userIndexRelation->rd_istemp);
835
836         /*
837          * Close and flush the index's relcache entry, to ensure relcache doesn't
838          * try to rebuild it while we're deleting catalog entries. We keep the
839          * lock though.
840          */
841         index_close(userIndexRelation, NoLock);
842
843         RelationForgetRelation(indexId);
844
845         /*
846          * fix INDEX relation, and check for expressional index
847          */
848         indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
849
850         tuple = SearchSysCache(INDEXRELID,
851                                                    ObjectIdGetDatum(indexId),
852                                                    0, 0, 0);
853         if (!HeapTupleIsValid(tuple))
854                 elog(ERROR, "cache lookup failed for index %u", indexId);
855
856         hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
857
858         simple_heap_delete(indexRelation, &tuple->t_self);
859
860         ReleaseSysCache(tuple);
861         heap_close(indexRelation, RowExclusiveLock);
862
863         /*
864          * if it has any expression columns, we might have stored statistics about
865          * them.
866          */
867         if (hasexprs)
868                 RemoveStatistics(indexId, 0);
869
870         /*
871          * fix ATTRIBUTE relation
872          */
873         DeleteAttributeTuples(indexId);
874
875         /*
876          * fix RELATION relation
877          */
878         DeleteRelationTuple(indexId);
879
880         /*
881          * We are presently too lazy to attempt to compute the new correct value
882          * of relhasindex (the next VACUUM will fix it if necessary). So there is
883          * no need to update the pg_class tuple for the owning relation. But we
884          * must send out a shared-cache-inval notice on the owning relation to
885          * ensure other backends update their relcache lists of indexes.
886          */
887         CacheInvalidateRelcache(userHeapRelation);
888
889         /*
890          * Close owning rel, but keep lock
891          */
892         heap_close(userHeapRelation, NoLock);
893 }
894
895 /* ----------------------------------------------------------------
896  *                                              index_build support
897  * ----------------------------------------------------------------
898  */
899
900 /* ----------------
901  *              BuildIndexInfo
902  *                      Construct an IndexInfo record for an open index
903  *
904  * IndexInfo stores the information about the index that's needed by
905  * FormIndexDatum, which is used for both index_build() and later insertion
906  * of individual index tuples.  Normally we build an IndexInfo for an index
907  * just once per command, and then use it for (potentially) many tuples.
908  * ----------------
909  */
910 IndexInfo *
911 BuildIndexInfo(Relation index)
912 {
913         IndexInfo  *ii = makeNode(IndexInfo);
914         Form_pg_index indexStruct = index->rd_index;
915         int                     i;
916         int                     numKeys;
917
918         /* check the number of keys, and copy attr numbers into the IndexInfo */
919         numKeys = indexStruct->indnatts;
920         if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
921                 elog(ERROR, "invalid indnatts %d for index %u",
922                          numKeys, RelationGetRelid(index));
923         ii->ii_NumIndexAttrs = numKeys;
924         for (i = 0; i < numKeys; i++)
925                 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
926
927         /* fetch any expressions needed for expressional indexes */
928         ii->ii_Expressions = RelationGetIndexExpressions(index);
929         ii->ii_ExpressionsState = NIL;
930
931         /* fetch index predicate if any */
932         ii->ii_Predicate = RelationGetIndexPredicate(index);
933         ii->ii_PredicateState = NIL;
934
935         /* other info */
936         ii->ii_Unique = indexStruct->indisunique;
937         ii->ii_Concurrent = false;      /* assume normal case */
938
939         return ii;
940 }
941
942 /* ----------------
943  *              FormIndexDatum
944  *                      Construct values[] and isnull[] arrays for a new index tuple.
945  *
946  *      indexInfo               Info about the index
947  *      slot                    Heap tuple for which we must prepare an index entry
948  *      estate                  executor state for evaluating any index expressions
949  *      values                  Array of index Datums (output area)
950  *      isnull                  Array of is-null indicators (output area)
951  *
952  * When there are no index expressions, estate may be NULL.  Otherwise it
953  * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
954  * context must point to the heap tuple passed in.
955  *
956  * Notice we don't actually call index_form_tuple() here; we just prepare
957  * its input arrays values[] and isnull[].      This is because the index AM
958  * may wish to alter the data before storage.
959  * ----------------
960  */
961 void
962 FormIndexDatum(IndexInfo *indexInfo,
963                            TupleTableSlot *slot,
964                            EState *estate,
965                            Datum *values,
966                            bool *isnull)
967 {
968         ListCell   *indexpr_item;
969         int                     i;
970
971         if (indexInfo->ii_Expressions != NIL &&
972                 indexInfo->ii_ExpressionsState == NIL)
973         {
974                 /* First time through, set up expression evaluation state */
975                 indexInfo->ii_ExpressionsState = (List *)
976                         ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
977                                                         estate);
978                 /* Check caller has set up context correctly */
979                 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
980         }
981         indexpr_item = list_head(indexInfo->ii_ExpressionsState);
982
983         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
984         {
985                 int                     keycol = indexInfo->ii_KeyAttrNumbers[i];
986                 Datum           iDatum;
987                 bool            isNull;
988
989                 if (keycol != 0)
990                 {
991                         /*
992                          * Plain index column; get the value we need directly from the
993                          * heap tuple.
994                          */
995                         iDatum = slot_getattr(slot, keycol, &isNull);
996                 }
997                 else
998                 {
999                         /*
1000                          * Index expression --- need to evaluate it.
1001                          */
1002                         if (indexpr_item == NULL)
1003                                 elog(ERROR, "wrong number of index expressions");
1004                         iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1005                                                                                            GetPerTupleExprContext(estate),
1006                                                                                            &isNull,
1007                                                                                            NULL);
1008                         indexpr_item = lnext(indexpr_item);
1009                 }
1010                 values[i] = iDatum;
1011                 isnull[i] = isNull;
1012         }
1013
1014         if (indexpr_item != NULL)
1015                 elog(ERROR, "wrong number of index expressions");
1016 }
1017
1018
1019 /*
1020  * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1021  *
1022  * This routine updates the pg_class row of either an index or its parent
1023  * relation after CREATE INDEX or REINDEX.      Its rather bizarre API is designed
1024  * to ensure we can do all the necessary work in just one update.
1025  *
1026  * hasindex: set relhasindex to this value
1027  * isprimary: if true, set relhaspkey true; else no change
1028  * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
1029  *              else no change
1030  * reltuples: set reltuples to this value
1031  *
1032  * relpages is also updated (using RelationGetNumberOfBlocks()).
1033  *
1034  * NOTE: an important side-effect of this operation is that an SI invalidation
1035  * message is sent out to all backends --- including me --- causing relcache
1036  * entries to be flushed or updated with the new data.  This must happen even
1037  * if we find that no change is needed in the pg_class row.  When updating
1038  * a heap entry, this ensures that other backends find out about the new
1039  * index.  When updating an index, it's important because some index AMs
1040  * expect a relcache flush to occur after REINDEX.
1041  */
1042 static void
1043 index_update_stats(Relation rel, bool hasindex, bool isprimary,
1044                                    Oid reltoastidxid, double reltuples)
1045 {
1046         BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1047         Oid                     relid = RelationGetRelid(rel);
1048         Relation        pg_class;
1049         HeapTuple       tuple;
1050         Form_pg_class rd_rel;
1051         bool            dirty;
1052
1053         /*
1054          * We always update the pg_class row using a non-transactional,
1055          * overwrite-in-place update.  There are several reasons for this:
1056          *
1057          * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1058          *
1059          * 2. We could be reindexing pg_class itself, in which case we can't move
1060          * its pg_class row because CatalogUpdateIndexes might not know about all
1061          * the indexes yet (see reindex_relation).
1062          *
1063          * 3. Because we execute CREATE INDEX with just share lock on the parent
1064          * rel (to allow concurrent index creations), an ordinary update could
1065          * suffer a tuple-concurrently-updated failure against another CREATE
1066          * INDEX committing at about the same time.  We can avoid that by having
1067          * them both do nontransactional updates (we assume they will both be
1068          * trying to change the pg_class row to the same thing, so it doesn't
1069          * matter which goes first).
1070          *
1071          * 4. Even with just a single CREATE INDEX, there's a risk factor because
1072          * someone else might be trying to open the rel while we commit, and this
1073          * creates a race condition as to whether he will see both or neither of
1074          * the pg_class row versions as valid.  Again, a non-transactional update
1075          * avoids the risk.  It is indeterminate which state of the row the other
1076          * process will see, but it doesn't matter (if he's only taking
1077          * AccessShareLock, then it's not critical that he see relhasindex true).
1078          *
1079          * It is safe to use a non-transactional update even though our
1080          * transaction could still fail before committing.      Setting relhasindex
1081          * true is safe even if there are no indexes (VACUUM will eventually fix
1082          * it), and of course the relpages and reltuples counts are correct (or at
1083          * least more so than the old values) regardless.
1084          */
1085
1086         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1087
1088         /*
1089          * Make a copy of the tuple to update.  Normally we use the syscache, but
1090          * we can't rely on that during bootstrap or while reindexing pg_class
1091          * itself.
1092          */
1093         if (IsBootstrapProcessingMode() ||
1094                 ReindexIsProcessingHeap(RelationRelationId))
1095         {
1096                 /* don't assume syscache will work */
1097                 HeapScanDesc pg_class_scan;
1098                 ScanKeyData key[1];
1099
1100                 ScanKeyInit(&key[0],
1101                                         ObjectIdAttributeNumber,
1102                                         BTEqualStrategyNumber, F_OIDEQ,
1103                                         ObjectIdGetDatum(relid));
1104
1105                 pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key);
1106                 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1107                 tuple = heap_copytuple(tuple);
1108                 heap_endscan(pg_class_scan);
1109         }
1110         else
1111         {
1112                 /* normal case, use syscache */
1113                 tuple = SearchSysCacheCopy(RELOID,
1114                                                                    ObjectIdGetDatum(relid),
1115                                                                    0, 0, 0);
1116         }
1117
1118         if (!HeapTupleIsValid(tuple))
1119                 elog(ERROR, "could not find tuple for relation %u", relid);
1120         rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1121
1122         /* Apply required updates, if any, to copied tuple */
1123
1124         dirty = false;
1125         if (rd_rel->relhasindex != hasindex)
1126         {
1127                 rd_rel->relhasindex = hasindex;
1128                 dirty = true;
1129         }
1130         if (isprimary)
1131         {
1132                 if (!rd_rel->relhaspkey)
1133                 {
1134                         rd_rel->relhaspkey = true;
1135                         dirty = true;
1136                 }
1137         }
1138         if (OidIsValid(reltoastidxid))
1139         {
1140                 Assert(rd_rel->relkind == RELKIND_TOASTVALUE);
1141                 if (rd_rel->reltoastidxid != reltoastidxid)
1142                 {
1143                         rd_rel->reltoastidxid = reltoastidxid;
1144                         dirty = true;
1145                 }
1146         }
1147         if (rd_rel->reltuples != (float4) reltuples)
1148         {
1149                 rd_rel->reltuples = (float4) reltuples;
1150                 dirty = true;
1151         }
1152         if (rd_rel->relpages != (int32) relpages)
1153         {
1154                 rd_rel->relpages = (int32) relpages;
1155                 dirty = true;
1156         }
1157
1158         /*
1159          * If anything changed, write out the tuple
1160          */
1161         if (dirty)
1162         {
1163                 heap_inplace_update(pg_class, tuple);
1164                 /* the above sends a cache inval message */
1165         }
1166         else
1167         {
1168                 /* no need to change tuple, but force relcache inval anyway */
1169                 CacheInvalidateRelcacheByTuple(tuple);
1170         }
1171
1172         heap_freetuple(tuple);
1173
1174         heap_close(pg_class, RowExclusiveLock);
1175 }
1176
1177 /*
1178  * setNewRelfilenode            - assign a new relfilenode value to the relation
1179  *
1180  * Caller must already hold exclusive lock on the relation.
1181  */
1182 void
1183 setNewRelfilenode(Relation relation)
1184 {
1185         Oid                     newrelfilenode;
1186         RelFileNode newrnode;
1187         SMgrRelation srel;
1188         Relation        pg_class;
1189         HeapTuple       tuple;
1190         Form_pg_class rd_rel;
1191
1192         /* Can't change relfilenode for nailed tables (indexes ok though) */
1193         Assert(!relation->rd_isnailed ||
1194                    relation->rd_rel->relkind == RELKIND_INDEX);
1195         /* Can't change for shared tables or indexes */
1196         Assert(!relation->rd_rel->relisshared);
1197
1198         /* Allocate a new relfilenode */
1199         newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace,
1200                                                                            relation->rd_rel->relisshared,
1201                                                                            NULL);
1202
1203         /*
1204          * Find the pg_class tuple for the given relation.      This is not used
1205          * during bootstrap, so okay to use heap_update always.
1206          */
1207         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1208
1209         tuple = SearchSysCacheCopy(RELOID,
1210                                                            ObjectIdGetDatum(RelationGetRelid(relation)),
1211                                                            0, 0, 0);
1212         if (!HeapTupleIsValid(tuple))
1213                 elog(ERROR, "could not find tuple for relation %u",
1214                          RelationGetRelid(relation));
1215         rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1216
1217         /* create another storage file. Is it a little ugly ? */
1218         /* NOTE: any conflict in relfilenode value will be caught here */
1219         newrnode = relation->rd_node;
1220         newrnode.relNode = newrelfilenode;
1221
1222         srel = smgropen(newrnode);
1223         smgrcreate(srel, relation->rd_istemp, false);
1224         smgrclose(srel);
1225
1226         /* schedule unlinking old relfilenode */
1227         RelationOpenSmgr(relation);
1228         smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp);
1229
1230         /* update the pg_class row */
1231         rd_rel->relfilenode = newrelfilenode;
1232         rd_rel->relpages = 0;           /* it's empty until further notice */
1233         rd_rel->reltuples = 0;
1234         simple_heap_update(pg_class, &tuple->t_self, tuple);
1235         CatalogUpdateIndexes(pg_class, tuple);
1236
1237         heap_freetuple(tuple);
1238
1239         heap_close(pg_class, RowExclusiveLock);
1240
1241         /* Make sure the relfilenode change is visible */
1242         CommandCounterIncrement();
1243 }
1244
1245
1246 /*
1247  * index_build - invoke access-method-specific index build procedure
1248  *
1249  * On entry, the index's catalog entries are valid, and its physical disk
1250  * file has been created but is empty.  We call the AM-specific build
1251  * procedure to fill in the index contents.  We then update the pg_class
1252  * entries of the index and heap relation as needed, using statistics
1253  * returned by ambuild as well as data passed by the caller.
1254  *
1255  * Note: when reindexing an existing index, isprimary can be false;
1256  * the index is already properly marked and need not be re-marked.
1257  *
1258  * Note: before Postgres 8.2, the passed-in heap and index Relations
1259  * were automatically closed by this routine.  This is no longer the case.
1260  * The caller opened 'em, and the caller should close 'em.
1261  */
1262 void
1263 index_build(Relation heapRelation,
1264                         Relation indexRelation,
1265                         IndexInfo *indexInfo,
1266                         bool isprimary)
1267 {
1268         RegProcedure procedure;
1269         IndexBuildResult *stats;
1270
1271         /*
1272          * sanity checks
1273          */
1274         Assert(RelationIsValid(indexRelation));
1275         Assert(PointerIsValid(indexRelation->rd_am));
1276
1277         procedure = indexRelation->rd_am->ambuild;
1278         Assert(RegProcedureIsValid(procedure));
1279
1280         /*
1281          * Call the access method's build procedure
1282          */
1283         stats = (IndexBuildResult *)
1284                 DatumGetPointer(OidFunctionCall3(procedure,
1285                                                                                  PointerGetDatum(heapRelation),
1286                                                                                  PointerGetDatum(indexRelation),
1287                                                                                  PointerGetDatum(indexInfo)));
1288         Assert(PointerIsValid(stats));
1289
1290         /*
1291          * Update heap and index pg_class rows
1292          */
1293         index_update_stats(heapRelation,
1294                                            true,
1295                                            isprimary,
1296                                            (heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ?
1297                                            RelationGetRelid(indexRelation) : InvalidOid,
1298                                            stats->heap_tuples);
1299
1300         index_update_stats(indexRelation,
1301                                            false,
1302                                            false,
1303                                            InvalidOid,
1304                                            stats->index_tuples);
1305
1306         /* Make the updated versions visible */
1307         CommandCounterIncrement();
1308 }
1309
1310
1311 /*
1312  * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
1313  *
1314  * This is called back from an access-method-specific index build procedure
1315  * after the AM has done whatever setup it needs.  The parent heap relation
1316  * is scanned to find tuples that should be entered into the index.  Each
1317  * such tuple is passed to the AM's callback routine, which does the right
1318  * things to add it to the new index.  After we return, the AM's index
1319  * build procedure does whatever cleanup is needed; in particular, it should
1320  * close the heap and index relations.
1321  *
1322  * The total count of heap tuples is returned.  This is for updating pg_class
1323  * statistics.  (It's annoying not to be able to do that here, but we can't
1324  * do it until after the relation is closed.)  Note that the index AM itself
1325  * must keep track of the number of index tuples; we don't do so here because
1326  * the AM might reject some of the tuples for its own reasons, such as being
1327  * unable to store NULLs.
1328  */
1329 double
1330 IndexBuildHeapScan(Relation heapRelation,
1331                                    Relation indexRelation,
1332                                    IndexInfo *indexInfo,
1333                                    IndexBuildCallback callback,
1334                                    void *callback_state)
1335 {
1336         HeapScanDesc scan;
1337         HeapTuple       heapTuple;
1338         Datum           values[INDEX_MAX_KEYS];
1339         bool            isnull[INDEX_MAX_KEYS];
1340         double          reltuples;
1341         List       *predicate;
1342         TupleTableSlot *slot;
1343         EState     *estate;
1344         ExprContext *econtext;
1345         Snapshot        snapshot;
1346         TransactionId OldestXmin;
1347
1348         /*
1349          * sanity checks
1350          */
1351         Assert(OidIsValid(indexRelation->rd_rel->relam));
1352
1353         /*
1354          * Need an EState for evaluation of index expressions and partial-index
1355          * predicates.  Also a slot to hold the current tuple.
1356          */
1357         estate = CreateExecutorState();
1358         econtext = GetPerTupleExprContext(estate);
1359         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
1360
1361         /* Arrange for econtext's scan tuple to be the tuple under test */
1362         econtext->ecxt_scantuple = slot;
1363
1364         /* Set up execution state for predicate, if any. */
1365         predicate = (List *)
1366                 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
1367                                                 estate);
1368
1369         /*
1370          * Prepare for scan of the base relation.  In a normal index build, we use
1371          * SnapshotAny because we must retrieve all tuples and do our own time
1372          * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1373          * concurrent build, we take a regular MVCC snapshot and index whatever's
1374          * live according to that.      During bootstrap we just use SnapshotNow.
1375          */
1376         if (IsBootstrapProcessingMode())
1377         {
1378                 snapshot = SnapshotNow;
1379                 OldestXmin = InvalidTransactionId;              /* not used */
1380         }
1381         else if (indexInfo->ii_Concurrent)
1382         {
1383                 snapshot = CopySnapshot(GetTransactionSnapshot());
1384                 OldestXmin = InvalidTransactionId;              /* not used */
1385         }
1386         else
1387         {
1388                 snapshot = SnapshotAny;
1389                 /* okay to ignore lazy VACUUMs here */
1390                 OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
1391         }
1392
1393         scan = heap_beginscan(heapRelation, /* relation */
1394                                                   snapshot,             /* seeself */
1395                                                   0,    /* number of keys */
1396                                                   NULL);        /* scan key */
1397
1398         reltuples = 0;
1399
1400         /*
1401          * Scan all tuples in the base relation.
1402          */
1403         while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1404         {
1405                 bool            tupleIsAlive;
1406
1407                 CHECK_FOR_INTERRUPTS();
1408
1409                 if (snapshot == SnapshotAny)
1410                 {
1411                         /* do our own time qual check */
1412                         bool            indexIt;
1413
1414                         /*
1415                          * We could possibly get away with not locking the buffer here,
1416                          * since caller should hold ShareLock on the relation, but let's
1417                          * be conservative about it.
1418                          */
1419                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1420
1421                         switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
1422                                                                                          scan->rs_cbuf))
1423                         {
1424                                 case HEAPTUPLE_DEAD:
1425                                         /* Definitely dead, we can ignore it */
1426                                         indexIt = false;
1427                                         tupleIsAlive = false;
1428                                         break;
1429                                 case HEAPTUPLE_LIVE:
1430                                         /* Normal case, index and unique-check it */
1431                                         indexIt = true;
1432                                         tupleIsAlive = true;
1433                                         break;
1434                                 case HEAPTUPLE_RECENTLY_DEAD:
1435
1436                                         /*
1437                                          * If tuple is recently deleted then we must index it
1438                                          * anyway to preserve MVCC semantics.  (Pre-existing
1439                                          * transactions could try to use the index after we finish
1440                                          * building it, and may need to see such tuples.) Exclude
1441                                          * it from unique-checking, however.
1442                                          */
1443                                         indexIt = true;
1444                                         tupleIsAlive = false;
1445                                         break;
1446                                 case HEAPTUPLE_INSERT_IN_PROGRESS:
1447
1448                                         /*
1449                                          * Since caller should hold ShareLock or better, we should
1450                                          * not see any tuples inserted by open transactions ---
1451                                          * unless it's our own transaction. (Consider INSERT
1452                                          * followed by CREATE INDEX within a transaction.)      An
1453                                          * exception occurs when reindexing a system catalog,
1454                                          * because we often release lock on system catalogs before
1455                                          * committing.
1456                                          */
1457                                         if (!TransactionIdIsCurrentTransactionId(
1458                                                                    HeapTupleHeaderGetXmin(heapTuple->t_data))
1459                                                 && !IsSystemRelation(heapRelation))
1460                                                 elog(ERROR, "concurrent insert in progress");
1461                                         indexIt = true;
1462                                         tupleIsAlive = true;
1463                                         break;
1464                                 case HEAPTUPLE_DELETE_IN_PROGRESS:
1465
1466                                         /*
1467                                          * Since caller should hold ShareLock or better, we should
1468                                          * not see any tuples deleted by open transactions ---
1469                                          * unless it's our own transaction. (Consider DELETE
1470                                          * followed by CREATE INDEX within a transaction.)      An
1471                                          * exception occurs when reindexing a system catalog,
1472                                          * because we often release lock on system catalogs before
1473                                          * committing.
1474                                          */
1475                                         Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
1476                                         if (!TransactionIdIsCurrentTransactionId(
1477                                                                    HeapTupleHeaderGetXmax(heapTuple->t_data))
1478                                                 && !IsSystemRelation(heapRelation))
1479                                                 elog(ERROR, "concurrent delete in progress");
1480                                         indexIt = true;
1481                                         tupleIsAlive = false;
1482                                         break;
1483                                 default:
1484                                         elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1485                                         indexIt = tupleIsAlive = false;         /* keep compiler quiet */
1486                                         break;
1487                         }
1488
1489                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1490
1491                         if (!indexIt)
1492                                 continue;
1493                 }
1494                 else
1495                 {
1496                         /* heap_getnext did the time qual check */
1497                         tupleIsAlive = true;
1498                 }
1499
1500                 reltuples += 1;
1501
1502                 MemoryContextReset(econtext->ecxt_per_tuple_memory);
1503
1504                 /* Set up for predicate or expression evaluation */
1505                 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
1506
1507                 /*
1508                  * In a partial index, discard tuples that don't satisfy the
1509                  * predicate.
1510                  */
1511                 if (predicate != NIL)
1512                 {
1513                         if (!ExecQual(predicate, econtext, false))
1514                                 continue;
1515                 }
1516
1517                 /*
1518                  * For the current heap tuple, extract all the attributes we use in
1519                  * this index, and note which are null.  This also performs evaluation
1520                  * of any expressions needed.
1521                  */
1522                 FormIndexDatum(indexInfo,
1523                                            slot,
1524                                            estate,
1525                                            values,
1526                                            isnull);
1527
1528                 /*
1529                  * You'd think we should go ahead and build the index tuple here, but
1530                  * some index AMs want to do further processing on the data first.      So
1531                  * pass the values[] and isnull[] arrays, instead.
1532                  */
1533
1534                 /* Call the AM's callback routine to process the tuple */
1535                 callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
1536                                  callback_state);
1537         }
1538
1539         heap_endscan(scan);
1540
1541         ExecDropSingleTupleTableSlot(slot);
1542
1543         FreeExecutorState(estate);
1544
1545         /* These may have been pointing to the now-gone estate */
1546         indexInfo->ii_ExpressionsState = NIL;
1547         indexInfo->ii_PredicateState = NIL;
1548
1549         return reltuples;
1550 }
1551
1552
1553 /*
1554  * validate_index - support code for concurrent index builds
1555  *
1556  * We do a concurrent index build by first building the index normally via
1557  * index_create(), while holding a weak lock that allows concurrent
1558  * insert/update/delete.  Also, we index only tuples that are valid
1559  * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
1560  * build takes care to include recently-dead tuples.  This is OK because
1561  * we won't mark the index valid until all transactions that might be able
1562  * to see those tuples are gone.  The reason for doing that is to avoid
1563  * bogus unique-index failures due to concurrent UPDATEs (we might see
1564  * different versions of the same row as being valid when we pass over them,
1565  * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
1566  * does not contain any tuples added to the table while we built the index.
1567  *
1568  * Next, we commit the transaction so that the index becomes visible to other
1569  * backends, but it is marked not "indisvalid" to prevent the planner from
1570  * relying on it for indexscans.  Then we wait for all transactions that
1571  * could have been modifying the table to terminate.  At this point we
1572  * know that any subsequently-started transactions will see the index and
1573  * insert their new tuples into it.  We then take a new reference snapshot
1574  * which is passed to validate_index().  Any tuples that are valid according
1575  * to this snap, but are not in the index, must be added to the index.
1576  * (Any tuples committed live after the snap will be inserted into the
1577  * index by their originating transaction.      Any tuples committed dead before
1578  * the snap need not be indexed, because we will wait out all transactions
1579  * that might care about them before we mark the index valid.)
1580  *
1581  * validate_index() works by first gathering all the TIDs currently in the
1582  * index, using a bulkdelete callback that just stores the TIDs and doesn't
1583  * ever say "delete it".  (This should be faster than a plain indexscan;
1584  * also, not all index AMs support full-index indexscan.)  Then we sort the
1585  * TIDs, and finally scan the table doing a "merge join" against the TID list
1586  * to see which tuples are missing from the index.      Thus we will ensure that
1587  * all tuples valid according to the reference snapshot are in the index.
1588  *
1589  * Building a unique index this way is tricky: we might try to insert a
1590  * tuple that is already dead or is in process of being deleted, and we
1591  * mustn't have a uniqueness failure against an updated version of the same
1592  * row.  We can check the tuple to see if it's already dead and tell
1593  * index_insert() not to do the uniqueness check, but that still leaves us
1594  * with a race condition against an in-progress update.  To handle that,
1595  * we expect the index AM to recheck liveness of the to-be-inserted tuple
1596  * before it declares a uniqueness error.
1597  *
1598  * After completing validate_index(), we wait until all transactions that
1599  * were alive at the time of the reference snapshot are gone; this is
1600  * necessary to be sure there are none left with a serializable snapshot
1601  * older than the reference (and hence possibly able to see tuples we did
1602  * not index).  Then we mark the index valid and commit.
1603  *
1604  * Doing two full table scans is a brute-force strategy.  We could try to be
1605  * cleverer, eg storing new tuples in a special area of the table (perhaps
1606  * making the table append-only by setting use_fsm).  However that would
1607  * add yet more locking issues.
1608  */
1609 void
1610 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
1611 {
1612         Relation        heapRelation,
1613                                 indexRelation;
1614         IndexInfo  *indexInfo;
1615         IndexVacuumInfo ivinfo;
1616         v_i_state       state;
1617
1618         /* Open and lock the parent heap relation */
1619         heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1620         /* And the target index relation */
1621         indexRelation = index_open(indexId, RowExclusiveLock);
1622
1623         /*
1624          * Fetch info needed for index_insert.  (You might think this should be
1625          * passed in from DefineIndex, but its copy is long gone due to having
1626          * been built in a previous transaction.)
1627          */
1628         indexInfo = BuildIndexInfo(indexRelation);
1629
1630         /* mark build is concurrent just for consistency */
1631         indexInfo->ii_Concurrent = true;
1632
1633         /*
1634          * Scan the index and gather up all the TIDs into a tuplesort object.
1635          */
1636         ivinfo.index = indexRelation;
1637         ivinfo.vacuum_full = false;
1638         ivinfo.message_level = DEBUG2;
1639         ivinfo.num_heap_tuples = -1;
1640
1641         state.tuplesort = tuplesort_begin_datum(TIDOID,
1642                                                                                         TIDLessOperator,
1643                                                                                         maintenance_work_mem,
1644                                                                                         false);
1645         state.htups = state.itups = state.tups_inserted = 0;
1646
1647         (void) index_bulk_delete(&ivinfo, NULL,
1648                                                          validate_index_callback, (void *) &state);
1649
1650         /* Execute the sort */
1651         tuplesort_performsort(state.tuplesort);
1652
1653         /*
1654          * Now scan the heap and "merge" it with the index
1655          */
1656         validate_index_heapscan(heapRelation,
1657                                                         indexRelation,
1658                                                         indexInfo,
1659                                                         snapshot,
1660                                                         &state);
1661
1662         /* Done with tuplesort object */
1663         tuplesort_end(state.tuplesort);
1664
1665         elog(DEBUG2,
1666                  "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
1667                  state.htups, state.itups, state.tups_inserted);
1668
1669         /* Close rels, but keep locks */
1670         index_close(indexRelation, NoLock);
1671         heap_close(heapRelation, NoLock);
1672 }
1673
1674 /*
1675  * validate_index_callback - bulkdelete callback to collect the index TIDs
1676  */
1677 static bool
1678 validate_index_callback(ItemPointer itemptr, void *opaque)
1679 {
1680         v_i_state  *state = (v_i_state *) opaque;
1681
1682         tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
1683         state->itups += 1;
1684         return false;                           /* never actually delete anything */
1685 }
1686
1687 /*
1688  * validate_index_heapscan - second table scan for concurrent index build
1689  *
1690  * This has much code in common with IndexBuildHeapScan, but it's enough
1691  * different that it seems cleaner to have two routines not one.
1692  */
1693 static void
1694 validate_index_heapscan(Relation heapRelation,
1695                                                 Relation indexRelation,
1696                                                 IndexInfo *indexInfo,
1697                                                 Snapshot snapshot,
1698                                                 v_i_state *state)
1699 {
1700         HeapScanDesc scan;
1701         HeapTuple       heapTuple;
1702         Datum           values[INDEX_MAX_KEYS];
1703         bool            isnull[INDEX_MAX_KEYS];
1704         List       *predicate;
1705         TupleTableSlot *slot;
1706         EState     *estate;
1707         ExprContext *econtext;
1708
1709         /* state variables for the merge */
1710         ItemPointer indexcursor = NULL;
1711         bool            tuplesort_empty = false;
1712
1713         /*
1714          * sanity checks
1715          */
1716         Assert(OidIsValid(indexRelation->rd_rel->relam));
1717
1718         /*
1719          * Need an EState for evaluation of index expressions and partial-index
1720          * predicates.  Also a slot to hold the current tuple.
1721          */
1722         estate = CreateExecutorState();
1723         econtext = GetPerTupleExprContext(estate);
1724         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
1725
1726         /* Arrange for econtext's scan tuple to be the tuple under test */
1727         econtext->ecxt_scantuple = slot;
1728
1729         /* Set up execution state for predicate, if any. */
1730         predicate = (List *)
1731                 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
1732                                                 estate);
1733
1734         /*
1735          * Prepare for scan of the base relation.  We need just those tuples
1736          * satisfying the passed-in reference snapshot.
1737          */
1738         scan = heap_beginscan(heapRelation, /* relation */
1739                                                   snapshot,             /* seeself */
1740                                                   0,    /* number of keys */
1741                                                   NULL);        /* scan key */
1742
1743         /*
1744          * Scan all tuples matching the snapshot.
1745          */
1746         while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1747         {
1748                 ItemPointer heapcursor = &heapTuple->t_self;
1749
1750                 CHECK_FOR_INTERRUPTS();
1751
1752                 state->htups += 1;
1753
1754                 /*
1755                  * "merge" by skipping through the index tuples until we find or pass
1756                  * the current heap tuple.
1757                  */
1758                 while (!tuplesort_empty &&
1759                            (!indexcursor ||
1760                                 ItemPointerCompare(indexcursor, heapcursor) < 0))
1761                 {
1762                         Datum           ts_val;
1763                         bool            ts_isnull;
1764
1765                         if (indexcursor)
1766                                 pfree(indexcursor);
1767                         tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1768                                                                                                   &ts_val, &ts_isnull);
1769                         Assert(tuplesort_empty || !ts_isnull);
1770                         indexcursor = (ItemPointer) DatumGetPointer(ts_val);
1771                 }
1772
1773                 if (tuplesort_empty ||
1774                         ItemPointerCompare(indexcursor, heapcursor) > 0)
1775                 {
1776                         /*
1777                          * We've overshot which means this heap tuple is missing from the
1778                          * index, so insert it.
1779                          */
1780                         bool            check_unique;
1781
1782                         MemoryContextReset(econtext->ecxt_per_tuple_memory);
1783
1784                         /* Set up for predicate or expression evaluation */
1785                         ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
1786
1787                         /*
1788                          * In a partial index, discard tuples that don't satisfy the
1789                          * predicate.
1790                          */
1791                         if (predicate != NIL)
1792                         {
1793                                 if (!ExecQual(predicate, econtext, false))
1794                                         continue;
1795                         }
1796
1797                         /*
1798                          * For the current heap tuple, extract all the attributes we use
1799                          * in this index, and note which are null.      This also performs
1800                          * evaluation of any expressions needed.
1801                          */
1802                         FormIndexDatum(indexInfo,
1803                                                    slot,
1804                                                    estate,
1805                                                    values,
1806                                                    isnull);
1807
1808                         /*
1809                          * If the tuple is already committed dead, we still have to put it
1810                          * in the index (because some xacts might be able to see it), but
1811                          * we might as well suppress uniqueness checking. This is just an
1812                          * optimization because the index AM is not supposed to raise a
1813                          * uniqueness failure anyway.
1814                          */
1815                         if (indexInfo->ii_Unique)
1816                         {
1817                                 /* must hold a buffer lock to call HeapTupleSatisfiesNow */
1818                                 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1819
1820                                 if (HeapTupleSatisfiesNow(heapTuple->t_data, scan->rs_cbuf))
1821                                         check_unique = true;
1822                                 else
1823                                         check_unique = false;
1824
1825                                 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1826                         }
1827                         else
1828                                 check_unique = false;
1829
1830                         /*
1831                          * You'd think we should go ahead and build the index tuple here,
1832                          * but some index AMs want to do further processing on the data
1833                          * first. So pass the values[] and isnull[] arrays, instead.
1834                          */
1835                         index_insert(indexRelation,
1836                                                  values,
1837                                                  isnull,
1838                                                  heapcursor,
1839                                                  heapRelation,
1840                                                  check_unique);
1841
1842                         state->tups_inserted += 1;
1843                 }
1844         }
1845
1846         heap_endscan(scan);
1847
1848         ExecDropSingleTupleTableSlot(slot);
1849
1850         FreeExecutorState(estate);
1851
1852         /* These may have been pointing to the now-gone estate */
1853         indexInfo->ii_ExpressionsState = NIL;
1854         indexInfo->ii_PredicateState = NIL;
1855 }
1856
1857
1858 /*
1859  * IndexGetRelation: given an index's relation OID, get the OID of the
1860  * relation it is an index on.  Uses the system cache.
1861  */
1862 static Oid
1863 IndexGetRelation(Oid indexId)
1864 {
1865         HeapTuple       tuple;
1866         Form_pg_index index;
1867         Oid                     result;
1868
1869         tuple = SearchSysCache(INDEXRELID,
1870                                                    ObjectIdGetDatum(indexId),
1871                                                    0, 0, 0);
1872         if (!HeapTupleIsValid(tuple))
1873                 elog(ERROR, "cache lookup failed for index %u", indexId);
1874         index = (Form_pg_index) GETSTRUCT(tuple);
1875         Assert(index->indexrelid == indexId);
1876
1877         result = index->indrelid;
1878         ReleaseSysCache(tuple);
1879         return result;
1880 }
1881
1882 /*
1883  * reindex_index - This routine is used to recreate a single index
1884  */
1885 void
1886 reindex_index(Oid indexId)
1887 {
1888         Relation        iRel,
1889                                 heapRelation,
1890                                 pg_index;
1891         Oid                     heapId;
1892         bool            inplace;
1893         HeapTuple       indexTuple;
1894         Form_pg_index indexForm;
1895
1896         /*
1897          * Open and lock the parent heap relation.      ShareLock is sufficient since
1898          * we only need to be sure no schema or data changes are going on.
1899          */
1900         heapId = IndexGetRelation(indexId);
1901         heapRelation = heap_open(heapId, ShareLock);
1902
1903         /*
1904          * Open the target index relation and get an exclusive lock on it, to
1905          * ensure that no one else is touching this particular index.
1906          */
1907         iRel = index_open(indexId, AccessExclusiveLock);
1908
1909         /*
1910          * If it's a shared index, we must do inplace processing (because we have
1911          * no way to update relfilenode in other databases).  Otherwise we can do
1912          * it the normal transaction-safe way.
1913          *
1914          * Since inplace processing isn't crash-safe, we only allow it in a
1915          * standalone backend.  (In the REINDEX TABLE and REINDEX DATABASE cases,
1916          * the caller should have detected this.)
1917          */
1918         inplace = iRel->rd_rel->relisshared;
1919
1920         if (inplace && IsUnderPostmaster)
1921                 ereport(ERROR,
1922                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1923                                  errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
1924                                                 RelationGetRelationName(iRel))));
1925
1926         PG_TRY();
1927         {
1928                 IndexInfo  *indexInfo;
1929
1930                 /* Suppress use of the target index while rebuilding it */
1931                 SetReindexProcessing(heapId, indexId);
1932
1933                 /* Fetch info needed for index_build */
1934                 indexInfo = BuildIndexInfo(iRel);
1935
1936                 if (inplace)
1937                 {
1938                         /* Truncate the actual file (and discard buffers) */
1939                         RelationTruncate(iRel, 0);
1940                 }
1941                 else
1942                 {
1943                         /*
1944                          * We'll build a new physical relation for the index.
1945                          */
1946                         setNewRelfilenode(iRel);
1947                 }
1948
1949                 /* Initialize the index and rebuild */
1950                 /* Note: we do not need to re-establish pkey setting */
1951                 index_build(heapRelation, iRel, indexInfo, false);
1952         }
1953         PG_CATCH();
1954         {
1955                 /* Make sure flag gets cleared on error exit */
1956                 ResetReindexProcessing();
1957                 PG_RE_THROW();
1958         }
1959         PG_END_TRY();
1960         ResetReindexProcessing();
1961
1962         /*
1963          * If the index is marked invalid (ie, it's from a failed CREATE INDEX
1964          * CONCURRENTLY), we can now mark it valid.  This allows REINDEX to be
1965          * used to clean up in such cases.
1966          */
1967         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1968
1969         indexTuple = SearchSysCacheCopy(INDEXRELID,
1970                                                                         ObjectIdGetDatum(indexId),
1971                                                                         0, 0, 0);
1972         if (!HeapTupleIsValid(indexTuple))
1973                 elog(ERROR, "cache lookup failed for index %u", indexId);
1974         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1975
1976         if (!indexForm->indisvalid)
1977         {
1978                 indexForm->indisvalid = true;
1979                 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1980                 CatalogUpdateIndexes(pg_index, indexTuple);
1981         }
1982         heap_close(pg_index, RowExclusiveLock);
1983
1984         /* Close rels, but keep locks */
1985         index_close(iRel, NoLock);
1986         heap_close(heapRelation, NoLock);
1987 }
1988
1989 /*
1990  * reindex_relation - This routine is used to recreate all indexes
1991  * of a relation (and optionally its toast relation too, if any).
1992  *
1993  * Returns true if any indexes were rebuilt.  Note that a
1994  * CommandCounterIncrement will occur after each index rebuild.
1995  */
1996 bool
1997 reindex_relation(Oid relid, bool toast_too)
1998 {
1999         Relation        rel;
2000         Oid                     toast_relid;
2001         bool            is_pg_class;
2002         bool            result;
2003         List       *indexIds,
2004                            *doneIndexes;
2005         ListCell   *indexId;
2006
2007         /*
2008          * Open and lock the relation.  ShareLock is sufficient since we only need
2009          * to prevent schema and data changes in it.
2010          */
2011         rel = heap_open(relid, ShareLock);
2012
2013         toast_relid = rel->rd_rel->reltoastrelid;
2014
2015         /*
2016          * Get the list of index OIDs for this relation.  (We trust to the
2017          * relcache to get this with a sequential scan if ignoring system
2018          * indexes.)
2019          */
2020         indexIds = RelationGetIndexList(rel);
2021
2022         /*
2023          * reindex_index will attempt to update the pg_class rows for the relation
2024          * and index.  If we are processing pg_class itself, we want to make sure
2025          * that the updates do not try to insert index entries into indexes we
2026          * have not processed yet.      (When we are trying to recover from corrupted
2027          * indexes, that could easily cause a crash.) We can accomplish this
2028          * because CatalogUpdateIndexes will use the relcache's index list to know
2029          * which indexes to update. We just force the index list to be only the
2030          * stuff we've processed.
2031          *
2032          * It is okay to not insert entries into the indexes we have not processed
2033          * yet because all of this is transaction-safe.  If we fail partway
2034          * through, the updated rows are dead and it doesn't matter whether they
2035          * have index entries.  Also, a new pg_class index will be created with an
2036          * entry for its own pg_class row because we do setNewRelfilenode() before
2037          * we do index_build().
2038          *
2039          * Note that we also clear pg_class's rd_oidindex until the loop is done,
2040          * so that that index can't be accessed either.  This means we cannot
2041          * safely generate new relation OIDs while in the loop; shouldn't be a
2042          * problem.
2043          */
2044         is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
2045         doneIndexes = NIL;
2046
2047         /* Reindex all the indexes. */
2048         foreach(indexId, indexIds)
2049         {
2050                 Oid                     indexOid = lfirst_oid(indexId);
2051
2052                 if (is_pg_class)
2053                         RelationSetIndexList(rel, doneIndexes, InvalidOid);
2054
2055                 reindex_index(indexOid);
2056
2057                 CommandCounterIncrement();
2058
2059                 if (is_pg_class)
2060                         doneIndexes = lappend_oid(doneIndexes, indexOid);
2061         }
2062
2063         if (is_pg_class)
2064                 RelationSetIndexList(rel, indexIds, ClassOidIndexId);
2065
2066         /*
2067          * Close rel, but continue to hold the lock.
2068          */
2069         heap_close(rel, NoLock);
2070
2071         result = (indexIds != NIL);
2072
2073         /*
2074          * If the relation has a secondary toast rel, reindex that too while we
2075          * still hold the lock on the master table.
2076          */
2077         if (toast_too && OidIsValid(toast_relid))
2078                 result |= reindex_relation(toast_relid, false);
2079
2080         return result;
2081 }