1 /*-------------------------------------------------------------------------
4 * code to create and destroy POSTGRES index relations
6 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.274 2006/10/04 00:29:50 momjian Exp $
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
20 *-------------------------------------------------------------------------
26 #include "access/genam.h"
27 #include "access/heapam.h"
28 #include "access/transam.h"
29 #include "access/xact.h"
30 #include "bootstrap/bootstrap.h"
31 #include "catalog/catalog.h"
32 #include "catalog/dependency.h"
33 #include "catalog/heap.h"
34 #include "catalog/index.h"
35 #include "catalog/indexing.h"
36 #include "catalog/pg_constraint.h"
37 #include "catalog/pg_operator.h"
38 #include "catalog/pg_opclass.h"
39 #include "catalog/pg_type.h"
40 #include "executor/executor.h"
41 #include "miscadmin.h"
42 #include "optimizer/clauses.h"
43 #include "parser/parse_expr.h"
44 #include "storage/procarray.h"
45 #include "storage/smgr.h"
46 #include "utils/builtins.h"
47 #include "utils/fmgroids.h"
48 #include "utils/inval.h"
49 #include "utils/lsyscache.h"
50 #include "utils/memutils.h"
51 #include "utils/relcache.h"
52 #include "utils/syscache.h"
53 #include "utils/tuplesort.h"
56 /* state info for validate_index bulkdelete callback */
59 Tuplesortstate *tuplesort; /* for sorting the index TIDs */
60 /* statistics (for debug purposes only): */
66 /* non-export function prototypes */
67 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
70 static void InitializeAttributeOids(Relation indexRelation,
71 int numatts, Oid indexoid);
72 static void AppendAttributeTuples(Relation indexRelation, int numatts);
73 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
78 static void index_update_stats(Relation rel, bool hasindex, bool isprimary,
79 Oid reltoastidxid, double reltuples);
80 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
81 static void validate_index_heapscan(Relation heapRelation,
82 Relation indexRelation,
86 static Oid IndexGetRelation(Oid indexId);
90 * ConstructTupleDescriptor
92 * Build an index tuple descriptor for a new index
95 ConstructTupleDescriptor(Relation heapRelation,
99 int numatts = indexInfo->ii_NumIndexAttrs;
100 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
101 TupleDesc heapTupDesc;
102 TupleDesc indexTupDesc;
103 int natts; /* #atts in heap rel --- for error checks */
106 heapTupDesc = RelationGetDescr(heapRelation);
107 natts = RelationGetForm(heapRelation)->relnatts;
110 * allocate the new tuple descriptor
112 indexTupDesc = CreateTemplateTupleDesc(numatts, false);
115 * For simple index columns, we copy the pg_attribute row from the parent
116 * relation and modify it as necessary. For expressions we have to cons
117 * up a pg_attribute row the hard way.
119 for (i = 0; i < numatts; i++)
121 AttrNumber atnum = indexInfo->ii_KeyAttrNumbers[i];
122 Form_pg_attribute to = indexTupDesc->attrs[i];
124 Form_pg_type typeTup;
129 /* Simple index column */
130 Form_pg_attribute from;
135 * here we are indexing on a system attribute (-1...-n)
137 from = SystemAttributeDefinition(atnum,
138 heapRelation->rd_rel->relhasoids);
143 * here we are indexing on a normal attribute (1...n)
145 if (atnum > natts) /* safety check */
146 elog(ERROR, "invalid column number %d", atnum);
147 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
151 * now that we've determined the "from", let's copy the tuple desc
154 memcpy(to, from, ATTRIBUTE_TUPLE_SIZE);
157 * Fix the stuff that should not be the same as the underlying
162 to->attstattarget = -1;
163 to->attcacheoff = -1;
164 to->attnotnull = false;
165 to->atthasdef = false;
166 to->attislocal = true;
171 /* Expressional index */
174 MemSet(to, 0, ATTRIBUTE_TUPLE_SIZE);
176 if (indexpr_item == NULL) /* shouldn't happen */
177 elog(ERROR, "too few entries in indexprs list");
178 indexkey = (Node *) lfirst(indexpr_item);
179 indexpr_item = lnext(indexpr_item);
182 * Make the attribute's name "pg_expresssion_nnn" (maybe think of
183 * something better later)
185 sprintf(NameStr(to->attname), "pg_expression_%d", i + 1);
188 * Lookup the expression type in pg_type for the type length etc.
190 keyType = exprType(indexkey);
191 tuple = SearchSysCache(TYPEOID,
192 ObjectIdGetDatum(keyType),
194 if (!HeapTupleIsValid(tuple))
195 elog(ERROR, "cache lookup failed for type %u", keyType);
196 typeTup = (Form_pg_type) GETSTRUCT(tuple);
199 * Assign some of the attributes values. Leave the rest as 0.
202 to->atttypid = keyType;
203 to->attlen = typeTup->typlen;
204 to->attbyval = typeTup->typbyval;
205 to->attstorage = typeTup->typstorage;
206 to->attalign = typeTup->typalign;
207 to->attstattarget = -1;
208 to->attcacheoff = -1;
210 to->attislocal = true;
212 ReleaseSysCache(tuple);
216 * We do not yet have the correct relation OID for the index, so just
217 * set it invalid for now. InitializeAttributeOids() will fix it
220 to->attrelid = InvalidOid;
223 * Check the opclass to see if it provides a keytype (overriding the
226 tuple = SearchSysCache(CLAOID,
227 ObjectIdGetDatum(classObjectId[i]),
229 if (!HeapTupleIsValid(tuple))
230 elog(ERROR, "cache lookup failed for opclass %u",
232 keyType = ((Form_pg_opclass) GETSTRUCT(tuple))->opckeytype;
233 ReleaseSysCache(tuple);
235 if (OidIsValid(keyType) && keyType != to->atttypid)
237 /* index value and heap value have different types */
238 tuple = SearchSysCache(TYPEOID,
239 ObjectIdGetDatum(keyType),
241 if (!HeapTupleIsValid(tuple))
242 elog(ERROR, "cache lookup failed for type %u", keyType);
243 typeTup = (Form_pg_type) GETSTRUCT(tuple);
245 to->atttypid = keyType;
247 to->attlen = typeTup->typlen;
248 to->attbyval = typeTup->typbyval;
249 to->attalign = typeTup->typalign;
250 to->attstorage = typeTup->typstorage;
252 ReleaseSysCache(tuple);
259 /* ----------------------------------------------------------------
260 * InitializeAttributeOids
261 * ----------------------------------------------------------------
264 InitializeAttributeOids(Relation indexRelation,
268 TupleDesc tupleDescriptor;
271 tupleDescriptor = RelationGetDescr(indexRelation);
273 for (i = 0; i < numatts; i += 1)
274 tupleDescriptor->attrs[i]->attrelid = indexoid;
277 /* ----------------------------------------------------------------
278 * AppendAttributeTuples
279 * ----------------------------------------------------------------
282 AppendAttributeTuples(Relation indexRelation, int numatts)
284 Relation pg_attribute;
285 CatalogIndexState indstate;
286 TupleDesc indexTupDesc;
291 * open the attribute relation and its indexes
293 pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
295 indstate = CatalogOpenIndexes(pg_attribute);
298 * insert data from new index's tupdesc into pg_attribute
300 indexTupDesc = RelationGetDescr(indexRelation);
302 for (i = 0; i < numatts; i++)
305 * There used to be very grotty code here to set these fields, but I
306 * think it's unnecessary. They should be set already.
308 Assert(indexTupDesc->attrs[i]->attnum == i + 1);
309 Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
311 new_tuple = heap_addheader(Natts_pg_attribute,
313 ATTRIBUTE_TUPLE_SIZE,
314 (void *) indexTupDesc->attrs[i]);
316 simple_heap_insert(pg_attribute, new_tuple);
318 CatalogIndexInsert(indstate, new_tuple);
320 heap_freetuple(new_tuple);
323 CatalogCloseIndexes(indstate);
325 heap_close(pg_attribute, RowExclusiveLock);
328 /* ----------------------------------------------------------------
329 * UpdateIndexRelation
331 * Construct and insert a new entry in the pg_index catalog
332 * ----------------------------------------------------------------
335 UpdateIndexRelation(Oid indexoid,
337 IndexInfo *indexInfo,
346 Datum values[Natts_pg_index];
347 char nulls[Natts_pg_index];
353 * Copy the index key and opclass info into arrays (should we make the
354 * caller pass them like this to start with?)
356 indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
357 indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
358 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
359 indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
362 * Convert the index expressions (if any) to a text datum
364 if (indexInfo->ii_Expressions != NIL)
368 exprsString = nodeToString(indexInfo->ii_Expressions);
369 exprsDatum = DirectFunctionCall1(textin,
370 CStringGetDatum(exprsString));
374 exprsDatum = (Datum) 0;
377 * Convert the index predicate (if any) to a text datum. Note we convert
378 * implicit-AND format to normal explicit-AND for storage.
380 if (indexInfo->ii_Predicate != NIL)
384 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
385 predDatum = DirectFunctionCall1(textin,
386 CStringGetDatum(predString));
390 predDatum = (Datum) 0;
393 * open the system catalog index relation
395 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
398 * Build a pg_index tuple
400 MemSet(nulls, ' ', sizeof(nulls));
402 values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
403 values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
404 values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
405 values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
406 values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
407 values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
408 values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
409 values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
410 values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
411 values[Anum_pg_index_indexprs - 1] = exprsDatum;
412 if (exprsDatum == (Datum) 0)
413 nulls[Anum_pg_index_indexprs - 1] = 'n';
414 values[Anum_pg_index_indpred - 1] = predDatum;
415 if (predDatum == (Datum) 0)
416 nulls[Anum_pg_index_indpred - 1] = 'n';
418 tuple = heap_formtuple(RelationGetDescr(pg_index), values, nulls);
421 * insert the tuple into the pg_index catalog
423 simple_heap_insert(pg_index, tuple);
425 /* update the indexes on pg_index */
426 CatalogUpdateIndexes(pg_index, tuple);
429 * close the relation and free the tuple
431 heap_close(pg_index, RowExclusiveLock);
432 heap_freetuple(tuple);
439 * heapRelationId: OID of table to build index on
440 * indexRelationName: what it say
441 * indexRelationId: normally, pass InvalidOid to let this routine
442 * generate an OID for the index. During bootstrap this may be
443 * nonzero to specify a preselected OID.
444 * indexInfo: same info executor uses to insert into the index
445 * accessMethodObjectId: OID of index AM to use
446 * tableSpaceId: OID of tablespace to use
447 * classObjectId: array of index opclass OIDs, one per index column
448 * reloptions: AM-specific options
449 * isprimary: index is a PRIMARY KEY
450 * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
451 * allow_system_table_mods: allow table to be a system catalog
452 * skip_build: true to skip the index_build() step for the moment; caller
453 * must do it later (typically via reindex_index())
454 * concurrent: if true, do not lock the table against writers. The index
455 * will be marked "invalid" and the caller must take additional steps
458 * Returns OID of the created index.
461 index_create(Oid heapRelationId,
462 const char *indexRelationName,
464 IndexInfo *indexInfo,
465 Oid accessMethodObjectId,
471 bool allow_system_table_mods,
476 Relation heapRelation;
477 Relation indexRelation;
478 TupleDesc indexTupDesc;
479 bool shared_relation;
483 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
486 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
487 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
490 heapRelation = heap_open(heapRelationId,
491 (concurrent ? ShareUpdateExclusiveLock : ShareLock));
494 * The index will be in the same namespace as its parent table, and is
495 * shared across databases if and only if the parent is.
497 namespaceId = RelationGetNamespace(heapRelation);
498 shared_relation = heapRelation->rd_rel->relisshared;
503 if (indexInfo->ii_NumIndexAttrs < 1)
504 elog(ERROR, "must index at least one column");
506 if (!allow_system_table_mods &&
507 IsSystemRelation(heapRelation) &&
508 IsNormalProcessingMode())
510 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
511 errmsg("user-defined indexes on system catalog tables are not supported")));
514 * concurrent index build on a system catalog is unsafe because we tend to
515 * release locks before committing in catalogs
518 IsSystemRelation(heapRelation))
520 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
521 errmsg("concurrent index creation on system catalog tables is not supported")));
524 * We cannot allow indexing a shared relation after initdb (because
525 * there's no way to make the entry in other databases' pg_class).
527 if (shared_relation && !IsBootstrapProcessingMode())
529 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
530 errmsg("shared indexes cannot be created after initdb")));
532 if (get_relname_relid(indexRelationName, namespaceId))
534 (errcode(ERRCODE_DUPLICATE_TABLE),
535 errmsg("relation \"%s\" already exists",
536 indexRelationName)));
539 * construct tuple descriptor for index tuples
541 indexTupDesc = ConstructTupleDescriptor(heapRelation,
546 * Allocate an OID for the index, unless we were told what to use.
548 * The OID will be the relfilenode as well, so make sure it doesn't
549 * collide with either pg_class OIDs or existing physical files.
551 if (!OidIsValid(indexRelationId))
552 indexRelationId = GetNewRelFileNode(tableSpaceId, shared_relation,
556 * create the index relation's relcache entry and physical disk file. (If
557 * we fail further down, it's the smgr's responsibility to remove the disk
560 indexRelation = heap_create(indexRelationName,
567 allow_system_table_mods);
569 Assert(indexRelationId == RelationGetRelid(indexRelation));
572 * Obtain exclusive lock on it. Although no other backends can see it
573 * until we commit, this prevents deadlock-risk complaints from lock
574 * manager in cases such as CLUSTER.
576 LockRelation(indexRelation, AccessExclusiveLock);
579 * Fill in fields of the index's pg_class entry that are not set correctly
582 * XXX should have a cleaner way to create cataloged indexes
584 indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
585 indexRelation->rd_rel->relam = accessMethodObjectId;
586 indexRelation->rd_rel->relkind = RELKIND_INDEX;
587 indexRelation->rd_rel->relhasoids = false;
590 * store index's pg_class entry
592 InsertPgClassTuple(pg_class, indexRelation,
593 RelationGetRelid(indexRelation),
596 /* done with pg_class */
597 heap_close(pg_class, RowExclusiveLock);
600 * now update the object id's of all the attribute tuple forms in the
601 * index relation's tuple descriptor
603 InitializeAttributeOids(indexRelation,
604 indexInfo->ii_NumIndexAttrs,
608 * append ATTRIBUTE tuples for the index
610 AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
614 * (append INDEX tuple)
616 * Note that this stows away a representation of "predicate".
617 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
620 UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
621 classObjectId, isprimary, !concurrent);
624 * Register constraint and dependencies for the index.
626 * If the index is from a CONSTRAINT clause, construct a pg_constraint
627 * entry. The index is then linked to the constraint, which in turn is
628 * linked to the table. If it's not a CONSTRAINT, make the dependency
629 * directly on the table.
631 * We don't need a dependency on the namespace, because there'll be an
632 * indirect dependency via our parent table.
634 * During bootstrap we can't register any dependencies, and we don't try
635 * to make a constraint either.
637 if (!IsBootstrapProcessingMode())
639 ObjectAddress myself,
642 myself.classId = RelationRelationId;
643 myself.objectId = indexRelationId;
644 myself.objectSubId = 0;
652 constraintType = CONSTRAINT_PRIMARY;
653 else if (indexInfo->ii_Unique)
654 constraintType = CONSTRAINT_UNIQUE;
657 elog(ERROR, "constraint must be PRIMARY or UNIQUE");
658 constraintType = 0; /* keep compiler quiet */
661 /* Shouldn't have any expressions */
662 if (indexInfo->ii_Expressions)
663 elog(ERROR, "constraints can't have index expressions");
665 conOid = CreateConstraintEntry(indexRelationName,
668 false, /* isDeferrable */
669 false, /* isDeferred */
671 indexInfo->ii_KeyAttrNumbers,
672 indexInfo->ii_NumIndexAttrs,
673 InvalidOid, /* no domain */
674 InvalidOid, /* no foreign key */
680 InvalidOid, /* no associated index */
681 NULL, /* no check constraint */
685 referenced.classId = ConstraintRelationId;
686 referenced.objectId = conOid;
687 referenced.objectSubId = 0;
689 recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
693 /* Create auto dependencies on simply-referenced columns */
694 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
696 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
698 referenced.classId = RelationRelationId;
699 referenced.objectId = heapRelationId;
700 referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
702 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
707 /* Store dependency on operator classes */
708 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
710 referenced.classId = OperatorClassRelationId;
711 referenced.objectId = classObjectId[i];
712 referenced.objectSubId = 0;
714 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
717 /* Store dependencies on anything mentioned in index expressions */
718 if (indexInfo->ii_Expressions)
720 recordDependencyOnSingleRelExpr(&myself,
721 (Node *) indexInfo->ii_Expressions,
727 /* Store dependencies on anything mentioned in predicate */
728 if (indexInfo->ii_Predicate)
730 recordDependencyOnSingleRelExpr(&myself,
731 (Node *) indexInfo->ii_Predicate,
739 * Advance the command counter so that we can see the newly-entered
740 * catalog tuples for the index.
742 CommandCounterIncrement();
745 * In bootstrap mode, we have to fill in the index strategy structure with
746 * information from the catalogs. If we aren't bootstrapping, then the
747 * relcache entry has already been rebuilt thanks to sinval update during
748 * CommandCounterIncrement.
750 if (IsBootstrapProcessingMode())
751 RelationInitIndexAccessInfo(indexRelation);
753 Assert(indexRelation->rd_indexcxt != NULL);
756 * If this is bootstrap (initdb) time, then we don't actually fill in the
757 * index yet. We'll be creating more indexes and classes later, so we
758 * delay filling them in until just before we're done with bootstrapping.
759 * Similarly, if the caller specified skip_build then filling the index is
760 * delayed till later (ALTER TABLE can save work in some cases with this).
761 * Otherwise, we call the AM routine that constructs the index.
763 if (IsBootstrapProcessingMode())
765 index_register(heapRelationId, indexRelationId, indexInfo);
770 * Caller is responsible for filling the index later on. However,
771 * we'd better make sure that the heap relation is correctly marked as
774 index_update_stats(heapRelation,
778 heapRelation->rd_rel->reltuples);
779 /* Make the above update visible */
780 CommandCounterIncrement();
784 index_build(heapRelation, indexRelation, indexInfo, isprimary);
788 * Close the heap and index; but we keep the locks that we acquired above
789 * until end of transaction.
791 index_close(indexRelation, NoLock);
792 heap_close(heapRelation, NoLock);
794 return indexRelationId;
800 * NOTE: this routine should now only be called through performDeletion(),
801 * else associated dependencies won't be cleaned up.
804 index_drop(Oid indexId)
807 Relation userHeapRelation;
808 Relation userIndexRelation;
809 Relation indexRelation;
814 * To drop an index safely, we must grab exclusive lock on its parent
815 * table; otherwise there could be other backends using the index!
816 * Exclusive lock on the index alone is insufficient because another
817 * backend might be in the midst of devising a query plan that will use
818 * the index. The parser and planner take care to hold an appropriate
819 * lock on the parent table while working, but having them hold locks on
820 * all the indexes too seems overly expensive. We do grab exclusive lock
821 * on the index too, just to be safe. Both locks must be held till end of
822 * transaction, else other backends will still see this index in pg_index.
824 heapId = IndexGetRelation(indexId);
825 userHeapRelation = heap_open(heapId, AccessExclusiveLock);
827 userIndexRelation = index_open(indexId, AccessExclusiveLock);
830 * Schedule physical removal of the file
832 RelationOpenSmgr(userIndexRelation);
833 smgrscheduleunlink(userIndexRelation->rd_smgr,
834 userIndexRelation->rd_istemp);
837 * Close and flush the index's relcache entry, to ensure relcache doesn't
838 * try to rebuild it while we're deleting catalog entries. We keep the
841 index_close(userIndexRelation, NoLock);
843 RelationForgetRelation(indexId);
846 * fix INDEX relation, and check for expressional index
848 indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
850 tuple = SearchSysCache(INDEXRELID,
851 ObjectIdGetDatum(indexId),
853 if (!HeapTupleIsValid(tuple))
854 elog(ERROR, "cache lookup failed for index %u", indexId);
856 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
858 simple_heap_delete(indexRelation, &tuple->t_self);
860 ReleaseSysCache(tuple);
861 heap_close(indexRelation, RowExclusiveLock);
864 * if it has any expression columns, we might have stored statistics about
868 RemoveStatistics(indexId, 0);
871 * fix ATTRIBUTE relation
873 DeleteAttributeTuples(indexId);
876 * fix RELATION relation
878 DeleteRelationTuple(indexId);
881 * We are presently too lazy to attempt to compute the new correct value
882 * of relhasindex (the next VACUUM will fix it if necessary). So there is
883 * no need to update the pg_class tuple for the owning relation. But we
884 * must send out a shared-cache-inval notice on the owning relation to
885 * ensure other backends update their relcache lists of indexes.
887 CacheInvalidateRelcache(userHeapRelation);
890 * Close owning rel, but keep lock
892 heap_close(userHeapRelation, NoLock);
895 /* ----------------------------------------------------------------
896 * index_build support
897 * ----------------------------------------------------------------
902 * Construct an IndexInfo record for an open index
904 * IndexInfo stores the information about the index that's needed by
905 * FormIndexDatum, which is used for both index_build() and later insertion
906 * of individual index tuples. Normally we build an IndexInfo for an index
907 * just once per command, and then use it for (potentially) many tuples.
911 BuildIndexInfo(Relation index)
913 IndexInfo *ii = makeNode(IndexInfo);
914 Form_pg_index indexStruct = index->rd_index;
918 /* check the number of keys, and copy attr numbers into the IndexInfo */
919 numKeys = indexStruct->indnatts;
920 if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
921 elog(ERROR, "invalid indnatts %d for index %u",
922 numKeys, RelationGetRelid(index));
923 ii->ii_NumIndexAttrs = numKeys;
924 for (i = 0; i < numKeys; i++)
925 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
927 /* fetch any expressions needed for expressional indexes */
928 ii->ii_Expressions = RelationGetIndexExpressions(index);
929 ii->ii_ExpressionsState = NIL;
931 /* fetch index predicate if any */
932 ii->ii_Predicate = RelationGetIndexPredicate(index);
933 ii->ii_PredicateState = NIL;
936 ii->ii_Unique = indexStruct->indisunique;
937 ii->ii_Concurrent = false; /* assume normal case */
944 * Construct values[] and isnull[] arrays for a new index tuple.
946 * indexInfo Info about the index
947 * slot Heap tuple for which we must prepare an index entry
948 * estate executor state for evaluating any index expressions
949 * values Array of index Datums (output area)
950 * isnull Array of is-null indicators (output area)
952 * When there are no index expressions, estate may be NULL. Otherwise it
953 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
954 * context must point to the heap tuple passed in.
956 * Notice we don't actually call index_form_tuple() here; we just prepare
957 * its input arrays values[] and isnull[]. This is because the index AM
958 * may wish to alter the data before storage.
962 FormIndexDatum(IndexInfo *indexInfo,
963 TupleTableSlot *slot,
968 ListCell *indexpr_item;
971 if (indexInfo->ii_Expressions != NIL &&
972 indexInfo->ii_ExpressionsState == NIL)
974 /* First time through, set up expression evaluation state */
975 indexInfo->ii_ExpressionsState = (List *)
976 ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
978 /* Check caller has set up context correctly */
979 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
981 indexpr_item = list_head(indexInfo->ii_ExpressionsState);
983 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
985 int keycol = indexInfo->ii_KeyAttrNumbers[i];
992 * Plain index column; get the value we need directly from the
995 iDatum = slot_getattr(slot, keycol, &isNull);
1000 * Index expression --- need to evaluate it.
1002 if (indexpr_item == NULL)
1003 elog(ERROR, "wrong number of index expressions");
1004 iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1005 GetPerTupleExprContext(estate),
1008 indexpr_item = lnext(indexpr_item);
1014 if (indexpr_item != NULL)
1015 elog(ERROR, "wrong number of index expressions");
1020 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1022 * This routine updates the pg_class row of either an index or its parent
1023 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
1024 * to ensure we can do all the necessary work in just one update.
1026 * hasindex: set relhasindex to this value
1027 * isprimary: if true, set relhaspkey true; else no change
1028 * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
1030 * reltuples: set reltuples to this value
1032 * relpages is also updated (using RelationGetNumberOfBlocks()).
1034 * NOTE: an important side-effect of this operation is that an SI invalidation
1035 * message is sent out to all backends --- including me --- causing relcache
1036 * entries to be flushed or updated with the new data. This must happen even
1037 * if we find that no change is needed in the pg_class row. When updating
1038 * a heap entry, this ensures that other backends find out about the new
1039 * index. When updating an index, it's important because some index AMs
1040 * expect a relcache flush to occur after REINDEX.
1043 index_update_stats(Relation rel, bool hasindex, bool isprimary,
1044 Oid reltoastidxid, double reltuples)
1046 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1047 Oid relid = RelationGetRelid(rel);
1050 Form_pg_class rd_rel;
1054 * We always update the pg_class row using a non-transactional,
1055 * overwrite-in-place update. There are several reasons for this:
1057 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1059 * 2. We could be reindexing pg_class itself, in which case we can't move
1060 * its pg_class row because CatalogUpdateIndexes might not know about all
1061 * the indexes yet (see reindex_relation).
1063 * 3. Because we execute CREATE INDEX with just share lock on the parent
1064 * rel (to allow concurrent index creations), an ordinary update could
1065 * suffer a tuple-concurrently-updated failure against another CREATE
1066 * INDEX committing at about the same time. We can avoid that by having
1067 * them both do nontransactional updates (we assume they will both be
1068 * trying to change the pg_class row to the same thing, so it doesn't
1069 * matter which goes first).
1071 * 4. Even with just a single CREATE INDEX, there's a risk factor because
1072 * someone else might be trying to open the rel while we commit, and this
1073 * creates a race condition as to whether he will see both or neither of
1074 * the pg_class row versions as valid. Again, a non-transactional update
1075 * avoids the risk. It is indeterminate which state of the row the other
1076 * process will see, but it doesn't matter (if he's only taking
1077 * AccessShareLock, then it's not critical that he see relhasindex true).
1079 * It is safe to use a non-transactional update even though our
1080 * transaction could still fail before committing. Setting relhasindex
1081 * true is safe even if there are no indexes (VACUUM will eventually fix
1082 * it), and of course the relpages and reltuples counts are correct (or at
1083 * least more so than the old values) regardless.
1086 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1089 * Make a copy of the tuple to update. Normally we use the syscache, but
1090 * we can't rely on that during bootstrap or while reindexing pg_class
1093 if (IsBootstrapProcessingMode() ||
1094 ReindexIsProcessingHeap(RelationRelationId))
1096 /* don't assume syscache will work */
1097 HeapScanDesc pg_class_scan;
1100 ScanKeyInit(&key[0],
1101 ObjectIdAttributeNumber,
1102 BTEqualStrategyNumber, F_OIDEQ,
1103 ObjectIdGetDatum(relid));
1105 pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key);
1106 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1107 tuple = heap_copytuple(tuple);
1108 heap_endscan(pg_class_scan);
1112 /* normal case, use syscache */
1113 tuple = SearchSysCacheCopy(RELOID,
1114 ObjectIdGetDatum(relid),
1118 if (!HeapTupleIsValid(tuple))
1119 elog(ERROR, "could not find tuple for relation %u", relid);
1120 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1122 /* Apply required updates, if any, to copied tuple */
1125 if (rd_rel->relhasindex != hasindex)
1127 rd_rel->relhasindex = hasindex;
1132 if (!rd_rel->relhaspkey)
1134 rd_rel->relhaspkey = true;
1138 if (OidIsValid(reltoastidxid))
1140 Assert(rd_rel->relkind == RELKIND_TOASTVALUE);
1141 if (rd_rel->reltoastidxid != reltoastidxid)
1143 rd_rel->reltoastidxid = reltoastidxid;
1147 if (rd_rel->reltuples != (float4) reltuples)
1149 rd_rel->reltuples = (float4) reltuples;
1152 if (rd_rel->relpages != (int32) relpages)
1154 rd_rel->relpages = (int32) relpages;
1159 * If anything changed, write out the tuple
1163 heap_inplace_update(pg_class, tuple);
1164 /* the above sends a cache inval message */
1168 /* no need to change tuple, but force relcache inval anyway */
1169 CacheInvalidateRelcacheByTuple(tuple);
1172 heap_freetuple(tuple);
1174 heap_close(pg_class, RowExclusiveLock);
1178 * setNewRelfilenode - assign a new relfilenode value to the relation
1180 * Caller must already hold exclusive lock on the relation.
1183 setNewRelfilenode(Relation relation)
1186 RelFileNode newrnode;
1190 Form_pg_class rd_rel;
1192 /* Can't change relfilenode for nailed tables (indexes ok though) */
1193 Assert(!relation->rd_isnailed ||
1194 relation->rd_rel->relkind == RELKIND_INDEX);
1195 /* Can't change for shared tables or indexes */
1196 Assert(!relation->rd_rel->relisshared);
1198 /* Allocate a new relfilenode */
1199 newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace,
1200 relation->rd_rel->relisshared,
1204 * Find the pg_class tuple for the given relation. This is not used
1205 * during bootstrap, so okay to use heap_update always.
1207 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1209 tuple = SearchSysCacheCopy(RELOID,
1210 ObjectIdGetDatum(RelationGetRelid(relation)),
1212 if (!HeapTupleIsValid(tuple))
1213 elog(ERROR, "could not find tuple for relation %u",
1214 RelationGetRelid(relation));
1215 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1217 /* create another storage file. Is it a little ugly ? */
1218 /* NOTE: any conflict in relfilenode value will be caught here */
1219 newrnode = relation->rd_node;
1220 newrnode.relNode = newrelfilenode;
1222 srel = smgropen(newrnode);
1223 smgrcreate(srel, relation->rd_istemp, false);
1226 /* schedule unlinking old relfilenode */
1227 RelationOpenSmgr(relation);
1228 smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp);
1230 /* update the pg_class row */
1231 rd_rel->relfilenode = newrelfilenode;
1232 rd_rel->relpages = 0; /* it's empty until further notice */
1233 rd_rel->reltuples = 0;
1234 simple_heap_update(pg_class, &tuple->t_self, tuple);
1235 CatalogUpdateIndexes(pg_class, tuple);
1237 heap_freetuple(tuple);
1239 heap_close(pg_class, RowExclusiveLock);
1241 /* Make sure the relfilenode change is visible */
1242 CommandCounterIncrement();
1247 * index_build - invoke access-method-specific index build procedure
1249 * On entry, the index's catalog entries are valid, and its physical disk
1250 * file has been created but is empty. We call the AM-specific build
1251 * procedure to fill in the index contents. We then update the pg_class
1252 * entries of the index and heap relation as needed, using statistics
1253 * returned by ambuild as well as data passed by the caller.
1255 * Note: when reindexing an existing index, isprimary can be false;
1256 * the index is already properly marked and need not be re-marked.
1258 * Note: before Postgres 8.2, the passed-in heap and index Relations
1259 * were automatically closed by this routine. This is no longer the case.
1260 * The caller opened 'em, and the caller should close 'em.
1263 index_build(Relation heapRelation,
1264 Relation indexRelation,
1265 IndexInfo *indexInfo,
1268 RegProcedure procedure;
1269 IndexBuildResult *stats;
1274 Assert(RelationIsValid(indexRelation));
1275 Assert(PointerIsValid(indexRelation->rd_am));
1277 procedure = indexRelation->rd_am->ambuild;
1278 Assert(RegProcedureIsValid(procedure));
1281 * Call the access method's build procedure
1283 stats = (IndexBuildResult *)
1284 DatumGetPointer(OidFunctionCall3(procedure,
1285 PointerGetDatum(heapRelation),
1286 PointerGetDatum(indexRelation),
1287 PointerGetDatum(indexInfo)));
1288 Assert(PointerIsValid(stats));
1291 * Update heap and index pg_class rows
1293 index_update_stats(heapRelation,
1296 (heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ?
1297 RelationGetRelid(indexRelation) : InvalidOid,
1298 stats->heap_tuples);
1300 index_update_stats(indexRelation,
1304 stats->index_tuples);
1306 /* Make the updated versions visible */
1307 CommandCounterIncrement();
1312 * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
1314 * This is called back from an access-method-specific index build procedure
1315 * after the AM has done whatever setup it needs. The parent heap relation
1316 * is scanned to find tuples that should be entered into the index. Each
1317 * such tuple is passed to the AM's callback routine, which does the right
1318 * things to add it to the new index. After we return, the AM's index
1319 * build procedure does whatever cleanup is needed; in particular, it should
1320 * close the heap and index relations.
1322 * The total count of heap tuples is returned. This is for updating pg_class
1323 * statistics. (It's annoying not to be able to do that here, but we can't
1324 * do it until after the relation is closed.) Note that the index AM itself
1325 * must keep track of the number of index tuples; we don't do so here because
1326 * the AM might reject some of the tuples for its own reasons, such as being
1327 * unable to store NULLs.
1330 IndexBuildHeapScan(Relation heapRelation,
1331 Relation indexRelation,
1332 IndexInfo *indexInfo,
1333 IndexBuildCallback callback,
1334 void *callback_state)
1337 HeapTuple heapTuple;
1338 Datum values[INDEX_MAX_KEYS];
1339 bool isnull[INDEX_MAX_KEYS];
1342 TupleTableSlot *slot;
1344 ExprContext *econtext;
1346 TransactionId OldestXmin;
1351 Assert(OidIsValid(indexRelation->rd_rel->relam));
1354 * Need an EState for evaluation of index expressions and partial-index
1355 * predicates. Also a slot to hold the current tuple.
1357 estate = CreateExecutorState();
1358 econtext = GetPerTupleExprContext(estate);
1359 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
1361 /* Arrange for econtext's scan tuple to be the tuple under test */
1362 econtext->ecxt_scantuple = slot;
1364 /* Set up execution state for predicate, if any. */
1365 predicate = (List *)
1366 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
1370 * Prepare for scan of the base relation. In a normal index build, we use
1371 * SnapshotAny because we must retrieve all tuples and do our own time
1372 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1373 * concurrent build, we take a regular MVCC snapshot and index whatever's
1374 * live according to that. During bootstrap we just use SnapshotNow.
1376 if (IsBootstrapProcessingMode())
1378 snapshot = SnapshotNow;
1379 OldestXmin = InvalidTransactionId; /* not used */
1381 else if (indexInfo->ii_Concurrent)
1383 snapshot = CopySnapshot(GetTransactionSnapshot());
1384 OldestXmin = InvalidTransactionId; /* not used */
1388 snapshot = SnapshotAny;
1389 /* okay to ignore lazy VACUUMs here */
1390 OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
1393 scan = heap_beginscan(heapRelation, /* relation */
1394 snapshot, /* seeself */
1395 0, /* number of keys */
1396 NULL); /* scan key */
1401 * Scan all tuples in the base relation.
1403 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1407 CHECK_FOR_INTERRUPTS();
1409 if (snapshot == SnapshotAny)
1411 /* do our own time qual check */
1415 * We could possibly get away with not locking the buffer here,
1416 * since caller should hold ShareLock on the relation, but let's
1417 * be conservative about it.
1419 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1421 switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
1424 case HEAPTUPLE_DEAD:
1425 /* Definitely dead, we can ignore it */
1427 tupleIsAlive = false;
1429 case HEAPTUPLE_LIVE:
1430 /* Normal case, index and unique-check it */
1432 tupleIsAlive = true;
1434 case HEAPTUPLE_RECENTLY_DEAD:
1437 * If tuple is recently deleted then we must index it
1438 * anyway to preserve MVCC semantics. (Pre-existing
1439 * transactions could try to use the index after we finish
1440 * building it, and may need to see such tuples.) Exclude
1441 * it from unique-checking, however.
1444 tupleIsAlive = false;
1446 case HEAPTUPLE_INSERT_IN_PROGRESS:
1449 * Since caller should hold ShareLock or better, we should
1450 * not see any tuples inserted by open transactions ---
1451 * unless it's our own transaction. (Consider INSERT
1452 * followed by CREATE INDEX within a transaction.) An
1453 * exception occurs when reindexing a system catalog,
1454 * because we often release lock on system catalogs before
1457 if (!TransactionIdIsCurrentTransactionId(
1458 HeapTupleHeaderGetXmin(heapTuple->t_data))
1459 && !IsSystemRelation(heapRelation))
1460 elog(ERROR, "concurrent insert in progress");
1462 tupleIsAlive = true;
1464 case HEAPTUPLE_DELETE_IN_PROGRESS:
1467 * Since caller should hold ShareLock or better, we should
1468 * not see any tuples deleted by open transactions ---
1469 * unless it's our own transaction. (Consider DELETE
1470 * followed by CREATE INDEX within a transaction.) An
1471 * exception occurs when reindexing a system catalog,
1472 * because we often release lock on system catalogs before
1475 Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
1476 if (!TransactionIdIsCurrentTransactionId(
1477 HeapTupleHeaderGetXmax(heapTuple->t_data))
1478 && !IsSystemRelation(heapRelation))
1479 elog(ERROR, "concurrent delete in progress");
1481 tupleIsAlive = false;
1484 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1485 indexIt = tupleIsAlive = false; /* keep compiler quiet */
1489 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1496 /* heap_getnext did the time qual check */
1497 tupleIsAlive = true;
1502 MemoryContextReset(econtext->ecxt_per_tuple_memory);
1504 /* Set up for predicate or expression evaluation */
1505 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
1508 * In a partial index, discard tuples that don't satisfy the
1511 if (predicate != NIL)
1513 if (!ExecQual(predicate, econtext, false))
1518 * For the current heap tuple, extract all the attributes we use in
1519 * this index, and note which are null. This also performs evaluation
1520 * of any expressions needed.
1522 FormIndexDatum(indexInfo,
1529 * You'd think we should go ahead and build the index tuple here, but
1530 * some index AMs want to do further processing on the data first. So
1531 * pass the values[] and isnull[] arrays, instead.
1534 /* Call the AM's callback routine to process the tuple */
1535 callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
1541 ExecDropSingleTupleTableSlot(slot);
1543 FreeExecutorState(estate);
1545 /* These may have been pointing to the now-gone estate */
1546 indexInfo->ii_ExpressionsState = NIL;
1547 indexInfo->ii_PredicateState = NIL;
1554 * validate_index - support code for concurrent index builds
1556 * We do a concurrent index build by first building the index normally via
1557 * index_create(), while holding a weak lock that allows concurrent
1558 * insert/update/delete. Also, we index only tuples that are valid
1559 * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
1560 * build takes care to include recently-dead tuples. This is OK because
1561 * we won't mark the index valid until all transactions that might be able
1562 * to see those tuples are gone. The reason for doing that is to avoid
1563 * bogus unique-index failures due to concurrent UPDATEs (we might see
1564 * different versions of the same row as being valid when we pass over them,
1565 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
1566 * does not contain any tuples added to the table while we built the index.
1568 * Next, we commit the transaction so that the index becomes visible to other
1569 * backends, but it is marked not "indisvalid" to prevent the planner from
1570 * relying on it for indexscans. Then we wait for all transactions that
1571 * could have been modifying the table to terminate. At this point we
1572 * know that any subsequently-started transactions will see the index and
1573 * insert their new tuples into it. We then take a new reference snapshot
1574 * which is passed to validate_index(). Any tuples that are valid according
1575 * to this snap, but are not in the index, must be added to the index.
1576 * (Any tuples committed live after the snap will be inserted into the
1577 * index by their originating transaction. Any tuples committed dead before
1578 * the snap need not be indexed, because we will wait out all transactions
1579 * that might care about them before we mark the index valid.)
1581 * validate_index() works by first gathering all the TIDs currently in the
1582 * index, using a bulkdelete callback that just stores the TIDs and doesn't
1583 * ever say "delete it". (This should be faster than a plain indexscan;
1584 * also, not all index AMs support full-index indexscan.) Then we sort the
1585 * TIDs, and finally scan the table doing a "merge join" against the TID list
1586 * to see which tuples are missing from the index. Thus we will ensure that
1587 * all tuples valid according to the reference snapshot are in the index.
1589 * Building a unique index this way is tricky: we might try to insert a
1590 * tuple that is already dead or is in process of being deleted, and we
1591 * mustn't have a uniqueness failure against an updated version of the same
1592 * row. We can check the tuple to see if it's already dead and tell
1593 * index_insert() not to do the uniqueness check, but that still leaves us
1594 * with a race condition against an in-progress update. To handle that,
1595 * we expect the index AM to recheck liveness of the to-be-inserted tuple
1596 * before it declares a uniqueness error.
1598 * After completing validate_index(), we wait until all transactions that
1599 * were alive at the time of the reference snapshot are gone; this is
1600 * necessary to be sure there are none left with a serializable snapshot
1601 * older than the reference (and hence possibly able to see tuples we did
1602 * not index). Then we mark the index valid and commit.
1604 * Doing two full table scans is a brute-force strategy. We could try to be
1605 * cleverer, eg storing new tuples in a special area of the table (perhaps
1606 * making the table append-only by setting use_fsm). However that would
1607 * add yet more locking issues.
1610 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
1612 Relation heapRelation,
1614 IndexInfo *indexInfo;
1615 IndexVacuumInfo ivinfo;
1618 /* Open and lock the parent heap relation */
1619 heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1620 /* And the target index relation */
1621 indexRelation = index_open(indexId, RowExclusiveLock);
1624 * Fetch info needed for index_insert. (You might think this should be
1625 * passed in from DefineIndex, but its copy is long gone due to having
1626 * been built in a previous transaction.)
1628 indexInfo = BuildIndexInfo(indexRelation);
1630 /* mark build is concurrent just for consistency */
1631 indexInfo->ii_Concurrent = true;
1634 * Scan the index and gather up all the TIDs into a tuplesort object.
1636 ivinfo.index = indexRelation;
1637 ivinfo.vacuum_full = false;
1638 ivinfo.message_level = DEBUG2;
1639 ivinfo.num_heap_tuples = -1;
1641 state.tuplesort = tuplesort_begin_datum(TIDOID,
1643 maintenance_work_mem,
1645 state.htups = state.itups = state.tups_inserted = 0;
1647 (void) index_bulk_delete(&ivinfo, NULL,
1648 validate_index_callback, (void *) &state);
1650 /* Execute the sort */
1651 tuplesort_performsort(state.tuplesort);
1654 * Now scan the heap and "merge" it with the index
1656 validate_index_heapscan(heapRelation,
1662 /* Done with tuplesort object */
1663 tuplesort_end(state.tuplesort);
1666 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
1667 state.htups, state.itups, state.tups_inserted);
1669 /* Close rels, but keep locks */
1670 index_close(indexRelation, NoLock);
1671 heap_close(heapRelation, NoLock);
1675 * validate_index_callback - bulkdelete callback to collect the index TIDs
1678 validate_index_callback(ItemPointer itemptr, void *opaque)
1680 v_i_state *state = (v_i_state *) opaque;
1682 tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
1684 return false; /* never actually delete anything */
1688 * validate_index_heapscan - second table scan for concurrent index build
1690 * This has much code in common with IndexBuildHeapScan, but it's enough
1691 * different that it seems cleaner to have two routines not one.
1694 validate_index_heapscan(Relation heapRelation,
1695 Relation indexRelation,
1696 IndexInfo *indexInfo,
1701 HeapTuple heapTuple;
1702 Datum values[INDEX_MAX_KEYS];
1703 bool isnull[INDEX_MAX_KEYS];
1705 TupleTableSlot *slot;
1707 ExprContext *econtext;
1709 /* state variables for the merge */
1710 ItemPointer indexcursor = NULL;
1711 bool tuplesort_empty = false;
1716 Assert(OidIsValid(indexRelation->rd_rel->relam));
1719 * Need an EState for evaluation of index expressions and partial-index
1720 * predicates. Also a slot to hold the current tuple.
1722 estate = CreateExecutorState();
1723 econtext = GetPerTupleExprContext(estate);
1724 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
1726 /* Arrange for econtext's scan tuple to be the tuple under test */
1727 econtext->ecxt_scantuple = slot;
1729 /* Set up execution state for predicate, if any. */
1730 predicate = (List *)
1731 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
1735 * Prepare for scan of the base relation. We need just those tuples
1736 * satisfying the passed-in reference snapshot.
1738 scan = heap_beginscan(heapRelation, /* relation */
1739 snapshot, /* seeself */
1740 0, /* number of keys */
1741 NULL); /* scan key */
1744 * Scan all tuples matching the snapshot.
1746 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1748 ItemPointer heapcursor = &heapTuple->t_self;
1750 CHECK_FOR_INTERRUPTS();
1755 * "merge" by skipping through the index tuples until we find or pass
1756 * the current heap tuple.
1758 while (!tuplesort_empty &&
1760 ItemPointerCompare(indexcursor, heapcursor) < 0))
1767 tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1768 &ts_val, &ts_isnull);
1769 Assert(tuplesort_empty || !ts_isnull);
1770 indexcursor = (ItemPointer) DatumGetPointer(ts_val);
1773 if (tuplesort_empty ||
1774 ItemPointerCompare(indexcursor, heapcursor) > 0)
1777 * We've overshot which means this heap tuple is missing from the
1778 * index, so insert it.
1782 MemoryContextReset(econtext->ecxt_per_tuple_memory);
1784 /* Set up for predicate or expression evaluation */
1785 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
1788 * In a partial index, discard tuples that don't satisfy the
1791 if (predicate != NIL)
1793 if (!ExecQual(predicate, econtext, false))
1798 * For the current heap tuple, extract all the attributes we use
1799 * in this index, and note which are null. This also performs
1800 * evaluation of any expressions needed.
1802 FormIndexDatum(indexInfo,
1809 * If the tuple is already committed dead, we still have to put it
1810 * in the index (because some xacts might be able to see it), but
1811 * we might as well suppress uniqueness checking. This is just an
1812 * optimization because the index AM is not supposed to raise a
1813 * uniqueness failure anyway.
1815 if (indexInfo->ii_Unique)
1817 /* must hold a buffer lock to call HeapTupleSatisfiesNow */
1818 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1820 if (HeapTupleSatisfiesNow(heapTuple->t_data, scan->rs_cbuf))
1821 check_unique = true;
1823 check_unique = false;
1825 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1828 check_unique = false;
1831 * You'd think we should go ahead and build the index tuple here,
1832 * but some index AMs want to do further processing on the data
1833 * first. So pass the values[] and isnull[] arrays, instead.
1835 index_insert(indexRelation,
1842 state->tups_inserted += 1;
1848 ExecDropSingleTupleTableSlot(slot);
1850 FreeExecutorState(estate);
1852 /* These may have been pointing to the now-gone estate */
1853 indexInfo->ii_ExpressionsState = NIL;
1854 indexInfo->ii_PredicateState = NIL;
1859 * IndexGetRelation: given an index's relation OID, get the OID of the
1860 * relation it is an index on. Uses the system cache.
1863 IndexGetRelation(Oid indexId)
1866 Form_pg_index index;
1869 tuple = SearchSysCache(INDEXRELID,
1870 ObjectIdGetDatum(indexId),
1872 if (!HeapTupleIsValid(tuple))
1873 elog(ERROR, "cache lookup failed for index %u", indexId);
1874 index = (Form_pg_index) GETSTRUCT(tuple);
1875 Assert(index->indexrelid == indexId);
1877 result = index->indrelid;
1878 ReleaseSysCache(tuple);
1883 * reindex_index - This routine is used to recreate a single index
1886 reindex_index(Oid indexId)
1893 HeapTuple indexTuple;
1894 Form_pg_index indexForm;
1897 * Open and lock the parent heap relation. ShareLock is sufficient since
1898 * we only need to be sure no schema or data changes are going on.
1900 heapId = IndexGetRelation(indexId);
1901 heapRelation = heap_open(heapId, ShareLock);
1904 * Open the target index relation and get an exclusive lock on it, to
1905 * ensure that no one else is touching this particular index.
1907 iRel = index_open(indexId, AccessExclusiveLock);
1910 * If it's a shared index, we must do inplace processing (because we have
1911 * no way to update relfilenode in other databases). Otherwise we can do
1912 * it the normal transaction-safe way.
1914 * Since inplace processing isn't crash-safe, we only allow it in a
1915 * standalone backend. (In the REINDEX TABLE and REINDEX DATABASE cases,
1916 * the caller should have detected this.)
1918 inplace = iRel->rd_rel->relisshared;
1920 if (inplace && IsUnderPostmaster)
1922 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1923 errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
1924 RelationGetRelationName(iRel))));
1928 IndexInfo *indexInfo;
1930 /* Suppress use of the target index while rebuilding it */
1931 SetReindexProcessing(heapId, indexId);
1933 /* Fetch info needed for index_build */
1934 indexInfo = BuildIndexInfo(iRel);
1938 /* Truncate the actual file (and discard buffers) */
1939 RelationTruncate(iRel, 0);
1944 * We'll build a new physical relation for the index.
1946 setNewRelfilenode(iRel);
1949 /* Initialize the index and rebuild */
1950 /* Note: we do not need to re-establish pkey setting */
1951 index_build(heapRelation, iRel, indexInfo, false);
1955 /* Make sure flag gets cleared on error exit */
1956 ResetReindexProcessing();
1960 ResetReindexProcessing();
1963 * If the index is marked invalid (ie, it's from a failed CREATE INDEX
1964 * CONCURRENTLY), we can now mark it valid. This allows REINDEX to be
1965 * used to clean up in such cases.
1967 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1969 indexTuple = SearchSysCacheCopy(INDEXRELID,
1970 ObjectIdGetDatum(indexId),
1972 if (!HeapTupleIsValid(indexTuple))
1973 elog(ERROR, "cache lookup failed for index %u", indexId);
1974 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1976 if (!indexForm->indisvalid)
1978 indexForm->indisvalid = true;
1979 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1980 CatalogUpdateIndexes(pg_index, indexTuple);
1982 heap_close(pg_index, RowExclusiveLock);
1984 /* Close rels, but keep locks */
1985 index_close(iRel, NoLock);
1986 heap_close(heapRelation, NoLock);
1990 * reindex_relation - This routine is used to recreate all indexes
1991 * of a relation (and optionally its toast relation too, if any).
1993 * Returns true if any indexes were rebuilt. Note that a
1994 * CommandCounterIncrement will occur after each index rebuild.
1997 reindex_relation(Oid relid, bool toast_too)
2008 * Open and lock the relation. ShareLock is sufficient since we only need
2009 * to prevent schema and data changes in it.
2011 rel = heap_open(relid, ShareLock);
2013 toast_relid = rel->rd_rel->reltoastrelid;
2016 * Get the list of index OIDs for this relation. (We trust to the
2017 * relcache to get this with a sequential scan if ignoring system
2020 indexIds = RelationGetIndexList(rel);
2023 * reindex_index will attempt to update the pg_class rows for the relation
2024 * and index. If we are processing pg_class itself, we want to make sure
2025 * that the updates do not try to insert index entries into indexes we
2026 * have not processed yet. (When we are trying to recover from corrupted
2027 * indexes, that could easily cause a crash.) We can accomplish this
2028 * because CatalogUpdateIndexes will use the relcache's index list to know
2029 * which indexes to update. We just force the index list to be only the
2030 * stuff we've processed.
2032 * It is okay to not insert entries into the indexes we have not processed
2033 * yet because all of this is transaction-safe. If we fail partway
2034 * through, the updated rows are dead and it doesn't matter whether they
2035 * have index entries. Also, a new pg_class index will be created with an
2036 * entry for its own pg_class row because we do setNewRelfilenode() before
2037 * we do index_build().
2039 * Note that we also clear pg_class's rd_oidindex until the loop is done,
2040 * so that that index can't be accessed either. This means we cannot
2041 * safely generate new relation OIDs while in the loop; shouldn't be a
2044 is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
2047 /* Reindex all the indexes. */
2048 foreach(indexId, indexIds)
2050 Oid indexOid = lfirst_oid(indexId);
2053 RelationSetIndexList(rel, doneIndexes, InvalidOid);
2055 reindex_index(indexOid);
2057 CommandCounterIncrement();
2060 doneIndexes = lappend_oid(doneIndexes, indexOid);
2064 RelationSetIndexList(rel, indexIds, ClassOidIndexId);
2067 * Close rel, but continue to hold the lock.
2069 heap_close(rel, NoLock);
2071 result = (indexIds != NIL);
2074 * If the relation has a secondary toast rel, reindex that too while we
2075 * still hold the lock on the master table.
2077 if (toast_too && OidIsValid(toast_relid))
2078 result |= reindex_relation(toast_relid, false);