1 /*-------------------------------------------------------------------------
4 * code to create and destroy POSTGRES index relations
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/catalog/index.c
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
20 *-------------------------------------------------------------------------
26 #include "access/amapi.h"
27 #include "access/multixact.h"
28 #include "access/relscan.h"
29 #include "access/sysattr.h"
30 #include "access/transam.h"
31 #include "access/visibilitymap.h"
32 #include "access/xact.h"
33 #include "bootstrap/bootstrap.h"
34 #include "catalog/binary_upgrade.h"
35 #include "catalog/catalog.h"
36 #include "catalog/dependency.h"
37 #include "catalog/heap.h"
38 #include "catalog/index.h"
39 #include "catalog/objectaccess.h"
40 #include "catalog/pg_am.h"
41 #include "catalog/pg_collation.h"
42 #include "catalog/pg_constraint.h"
43 #include "catalog/pg_constraint_fn.h"
44 #include "catalog/pg_operator.h"
45 #include "catalog/pg_opclass.h"
46 #include "catalog/pg_tablespace.h"
47 #include "catalog/pg_trigger.h"
48 #include "catalog/pg_type.h"
49 #include "catalog/storage.h"
50 #include "commands/tablecmds.h"
51 #include "commands/trigger.h"
52 #include "executor/executor.h"
53 #include "miscadmin.h"
54 #include "nodes/makefuncs.h"
55 #include "nodes/nodeFuncs.h"
56 #include "optimizer/clauses.h"
57 #include "parser/parser.h"
58 #include "storage/bufmgr.h"
59 #include "storage/lmgr.h"
60 #include "storage/predicate.h"
61 #include "storage/procarray.h"
62 #include "storage/smgr.h"
63 #include "utils/builtins.h"
64 #include "utils/fmgroids.h"
65 #include "utils/guc.h"
66 #include "utils/inval.h"
67 #include "utils/lsyscache.h"
68 #include "utils/memutils.h"
69 #include "utils/pg_rusage.h"
70 #include "utils/syscache.h"
71 #include "utils/tuplesort.h"
72 #include "utils/snapmgr.h"
73 #include "utils/tqual.h"
76 /* Potentially set by pg_upgrade_support functions */
77 Oid binary_upgrade_next_index_pg_class_oid = InvalidOid;
79 /* state info for validate_index bulkdelete callback */
82 Tuplesortstate *tuplesort; /* for sorting the index TIDs */
83 /* statistics (for debug purposes only): */
89 /* non-export function prototypes */
90 static bool relationHasPrimaryKey(Relation rel);
91 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
94 Oid accessMethodObjectId,
95 Oid *collationObjectId,
97 static void InitializeAttributeOids(Relation indexRelation,
98 int numatts, Oid indexoid);
99 static void AppendAttributeTuples(Relation indexRelation, int numatts);
100 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
101 IndexInfo *indexInfo,
109 static void index_update_stats(Relation rel,
110 bool hasindex, bool isprimary,
112 static void IndexCheckExclusion(Relation heapRelation,
113 Relation indexRelation,
114 IndexInfo *indexInfo);
115 static inline int64 itemptr_encode(ItemPointer itemptr);
116 static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
117 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
118 static void validate_index_heapscan(Relation heapRelation,
119 Relation indexRelation,
120 IndexInfo *indexInfo,
123 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
124 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
125 static void ResetReindexProcessing(void);
126 static void SetReindexPending(List *indexes);
127 static void RemoveReindexPending(Oid indexOid);
128 static void ResetReindexPending(void);
132 * relationHasPrimaryKey
133 * See whether an existing relation has a primary key.
135 * Caller must have suitable lock on the relation.
137 * Note: we intentionally do not check IndexIsValid here; that's because this
138 * is used to enforce the rule that there can be only one indisprimary index,
139 * and we want that to be true even if said index is invalid.
142 relationHasPrimaryKey(Relation rel)
146 ListCell *indexoidscan;
149 * Get the list of index OIDs for the table from the relcache, and look up
150 * each one in the pg_index syscache until we find one marked primary key
151 * (hopefully there isn't more than one such).
153 indexoidlist = RelationGetIndexList(rel);
155 foreach(indexoidscan, indexoidlist)
157 Oid indexoid = lfirst_oid(indexoidscan);
158 HeapTuple indexTuple;
160 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
161 if (!HeapTupleIsValid(indexTuple)) /* should not happen */
162 elog(ERROR, "cache lookup failed for index %u", indexoid);
163 result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
164 ReleaseSysCache(indexTuple);
169 list_free(indexoidlist);
175 * index_check_primary_key
176 * Apply special checks needed before creating a PRIMARY KEY index
178 * This processing used to be in DefineIndex(), but has been split out
179 * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
181 * We check for a pre-existing primary key, and that all columns of the index
182 * are simple column references (not expressions), and that all those
183 * columns are marked NOT NULL. If they aren't (which can only happen during
184 * ALTER TABLE ADD CONSTRAINT, since the parser forces such columns to be
185 * created NOT NULL during CREATE TABLE), do an ALTER SET NOT NULL to mark
186 * them so --- or fail if they are not in fact nonnull.
188 * Caller had better have at least ShareLock on the table, else the not-null
189 * checking isn't trustworthy.
192 index_check_primary_key(Relation heapRel,
193 IndexInfo *indexInfo,
200 * If ALTER TABLE, check that there isn't already a PRIMARY KEY. In CREATE
201 * TABLE, we have faith that the parser rejected multiple pkey clauses;
202 * and CREATE INDEX doesn't have a way to say PRIMARY KEY, so it's no
205 if (is_alter_table &&
206 relationHasPrimaryKey(heapRel))
209 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
210 errmsg("multiple primary keys for table \"%s\" are not allowed",
211 RelationGetRelationName(heapRel))));
215 * Check that all of the attributes in a primary key are marked as not
216 * null, otherwise attempt to ALTER TABLE .. SET NOT NULL
219 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
221 AttrNumber attnum = indexInfo->ii_KeyAttrNumbers[i];
223 Form_pg_attribute attform;
227 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
228 errmsg("primary keys cannot be expressions")));
230 /* System attributes are never null, so no need to check */
234 atttuple = SearchSysCache2(ATTNUM,
235 ObjectIdGetDatum(RelationGetRelid(heapRel)),
236 Int16GetDatum(attnum));
237 if (!HeapTupleIsValid(atttuple))
238 elog(ERROR, "cache lookup failed for attribute %d of relation %u",
239 attnum, RelationGetRelid(heapRel));
240 attform = (Form_pg_attribute) GETSTRUCT(atttuple);
242 if (!attform->attnotnull)
244 /* Add a subcommand to make this one NOT NULL */
245 AlterTableCmd *cmd = makeNode(AlterTableCmd);
247 cmd->subtype = AT_SetNotNull;
248 cmd->name = pstrdup(NameStr(attform->attname));
249 cmds = lappend(cmds, cmd);
252 ReleaseSysCache(atttuple);
256 * XXX: Shouldn't the ALTER TABLE .. SET NOT NULL cascade to child tables?
257 * Currently, since the PRIMARY KEY itself doesn't cascade, we don't
258 * cascade the notnull constraint(s) either; but this is pretty debatable.
260 * XXX: possible future improvement: when being called from ALTER TABLE,
261 * it would be more efficient to merge this with the outer ALTER TABLE, so
262 * as to avoid two scans. But that seems to complicate DefineIndex's API
266 AlterTableInternal(RelationGetRelid(heapRel), cmds, false);
270 * ConstructTupleDescriptor
272 * Build an index tuple descriptor for a new index
275 ConstructTupleDescriptor(Relation heapRelation,
276 IndexInfo *indexInfo,
278 Oid accessMethodObjectId,
279 Oid *collationObjectId,
282 int numatts = indexInfo->ii_NumIndexAttrs;
283 ListCell *colnames_item = list_head(indexColNames);
284 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
285 IndexAmRoutine *amroutine;
286 TupleDesc heapTupDesc;
287 TupleDesc indexTupDesc;
288 int natts; /* #atts in heap rel --- for error checks */
291 /* We need access to the index AM's API struct */
292 amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId);
294 /* ... and to the table's tuple descriptor */
295 heapTupDesc = RelationGetDescr(heapRelation);
296 natts = RelationGetForm(heapRelation)->relnatts;
299 * allocate the new tuple descriptor
301 indexTupDesc = CreateTemplateTupleDesc(numatts, false);
304 * For simple index columns, we copy the pg_attribute row from the parent
305 * relation and modify it as necessary. For expressions we have to cons
306 * up a pg_attribute row the hard way.
308 for (i = 0; i < numatts; i++)
310 AttrNumber atnum = indexInfo->ii_KeyAttrNumbers[i];
311 Form_pg_attribute to = indexTupDesc->attrs[i];
313 Form_pg_type typeTup;
314 Form_pg_opclass opclassTup;
319 /* Simple index column */
320 Form_pg_attribute from;
325 * here we are indexing on a system attribute (-1...-n)
327 from = SystemAttributeDefinition(atnum,
328 heapRelation->rd_rel->relhasoids);
333 * here we are indexing on a normal attribute (1...n)
335 if (atnum > natts) /* safety check */
336 elog(ERROR, "invalid column number %d", atnum);
337 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
341 * now that we've determined the "from", let's copy the tuple desc
344 memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
347 * Fix the stuff that should not be the same as the underlying
352 to->attstattarget = -1;
353 to->attcacheoff = -1;
354 to->attnotnull = false;
355 to->atthasdef = false;
356 to->attislocal = true;
358 to->attcollation = collationObjectId[i];
362 /* Expressional index */
365 MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
367 if (indexpr_item == NULL) /* shouldn't happen */
368 elog(ERROR, "too few entries in indexprs list");
369 indexkey = (Node *) lfirst(indexpr_item);
370 indexpr_item = lnext(indexpr_item);
373 * Lookup the expression type in pg_type for the type length etc.
375 keyType = exprType(indexkey);
376 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
377 if (!HeapTupleIsValid(tuple))
378 elog(ERROR, "cache lookup failed for type %u", keyType);
379 typeTup = (Form_pg_type) GETSTRUCT(tuple);
382 * Assign some of the attributes values. Leave the rest as 0.
385 to->atttypid = keyType;
386 to->attlen = typeTup->typlen;
387 to->attbyval = typeTup->typbyval;
388 to->attstorage = typeTup->typstorage;
389 to->attalign = typeTup->typalign;
390 to->attstattarget = -1;
391 to->attcacheoff = -1;
392 to->atttypmod = exprTypmod(indexkey);
393 to->attislocal = true;
394 to->attcollation = collationObjectId[i];
396 ReleaseSysCache(tuple);
399 * Make sure the expression yields a type that's safe to store in
400 * an index. We need this defense because we have index opclasses
401 * for pseudo-types such as "record", and the actually stored type
402 * had better be safe; eg, a named composite type is okay, an
403 * anonymous record type is not. The test is the same as for
404 * whether a table column is of a safe type (which is why we
405 * needn't check for the non-expression case).
407 CheckAttributeType(NameStr(to->attname),
408 to->atttypid, to->attcollation,
413 * We do not yet have the correct relation OID for the index, so just
414 * set it invalid for now. InitializeAttributeOids() will fix it
417 to->attrelid = InvalidOid;
420 * Set the attribute name as specified by caller.
422 if (colnames_item == NULL) /* shouldn't happen */
423 elog(ERROR, "too few entries in colnames list");
424 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
425 colnames_item = lnext(colnames_item);
428 * Check the opclass and index AM to see if either provides a keytype
429 * (overriding the attribute type). Opclass takes precedence.
431 tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
432 if (!HeapTupleIsValid(tuple))
433 elog(ERROR, "cache lookup failed for opclass %u",
435 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
436 if (OidIsValid(opclassTup->opckeytype))
437 keyType = opclassTup->opckeytype;
439 keyType = amroutine->amkeytype;
440 ReleaseSysCache(tuple);
442 if (OidIsValid(keyType) && keyType != to->atttypid)
444 /* index value and heap value have different types */
445 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
446 if (!HeapTupleIsValid(tuple))
447 elog(ERROR, "cache lookup failed for type %u", keyType);
448 typeTup = (Form_pg_type) GETSTRUCT(tuple);
450 to->atttypid = keyType;
452 to->attlen = typeTup->typlen;
453 to->attbyval = typeTup->typbyval;
454 to->attalign = typeTup->typalign;
455 to->attstorage = typeTup->typstorage;
457 ReleaseSysCache(tuple);
466 /* ----------------------------------------------------------------
467 * InitializeAttributeOids
468 * ----------------------------------------------------------------
471 InitializeAttributeOids(Relation indexRelation,
475 TupleDesc tupleDescriptor;
478 tupleDescriptor = RelationGetDescr(indexRelation);
480 for (i = 0; i < numatts; i += 1)
481 tupleDescriptor->attrs[i]->attrelid = indexoid;
484 /* ----------------------------------------------------------------
485 * AppendAttributeTuples
486 * ----------------------------------------------------------------
489 AppendAttributeTuples(Relation indexRelation, int numatts)
491 Relation pg_attribute;
492 CatalogIndexState indstate;
493 TupleDesc indexTupDesc;
497 * open the attribute relation and its indexes
499 pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
501 indstate = CatalogOpenIndexes(pg_attribute);
504 * insert data from new index's tupdesc into pg_attribute
506 indexTupDesc = RelationGetDescr(indexRelation);
508 for (i = 0; i < numatts; i++)
511 * There used to be very grotty code here to set these fields, but I
512 * think it's unnecessary. They should be set already.
514 Assert(indexTupDesc->attrs[i]->attnum == i + 1);
515 Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
517 InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
520 CatalogCloseIndexes(indstate);
522 heap_close(pg_attribute, RowExclusiveLock);
525 /* ----------------------------------------------------------------
526 * UpdateIndexRelation
528 * Construct and insert a new entry in the pg_index catalog
529 * ----------------------------------------------------------------
532 UpdateIndexRelation(Oid indexoid,
534 IndexInfo *indexInfo,
544 oidvector *indcollation;
546 int2vector *indoption;
549 Datum values[Natts_pg_index];
550 bool nulls[Natts_pg_index];
556 * Copy the index key, opclass, and indoption info into arrays (should we
557 * make the caller pass them like this to start with?)
559 indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
560 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
561 indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
562 indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexAttrs);
563 indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
564 indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
567 * Convert the index expressions (if any) to a text datum
569 if (indexInfo->ii_Expressions != NIL)
573 exprsString = nodeToString(indexInfo->ii_Expressions);
574 exprsDatum = CStringGetTextDatum(exprsString);
578 exprsDatum = (Datum) 0;
581 * Convert the index predicate (if any) to a text datum. Note we convert
582 * implicit-AND format to normal explicit-AND for storage.
584 if (indexInfo->ii_Predicate != NIL)
588 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
589 predDatum = CStringGetTextDatum(predString);
593 predDatum = (Datum) 0;
596 * open the system catalog index relation
598 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
601 * Build a pg_index tuple
603 MemSet(nulls, false, sizeof(nulls));
605 values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
606 values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
607 values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
608 values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
609 values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
610 values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
611 values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
612 values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
613 values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
614 values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
615 /* we set isvalid and isready the same way */
616 values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
617 values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
618 values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
619 values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
620 values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
621 values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
622 values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
623 values[Anum_pg_index_indexprs - 1] = exprsDatum;
624 if (exprsDatum == (Datum) 0)
625 nulls[Anum_pg_index_indexprs - 1] = true;
626 values[Anum_pg_index_indpred - 1] = predDatum;
627 if (predDatum == (Datum) 0)
628 nulls[Anum_pg_index_indpred - 1] = true;
630 tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
633 * insert the tuple into the pg_index catalog
635 simple_heap_insert(pg_index, tuple);
637 /* update the indexes on pg_index */
638 CatalogUpdateIndexes(pg_index, tuple);
641 * close the relation and free the tuple
643 heap_close(pg_index, RowExclusiveLock);
644 heap_freetuple(tuple);
651 * heapRelation: table to build index on (suitably locked by caller)
652 * indexRelationName: what it say
653 * indexRelationId: normally, pass InvalidOid to let this routine
654 * generate an OID for the index. During bootstrap this may be
655 * nonzero to specify a preselected OID.
656 * relFileNode: normally, pass InvalidOid to get new storage. May be
657 * nonzero to attach an existing valid build.
658 * indexInfo: same info executor uses to insert into the index
659 * indexColNames: column names to use for index (List of char *)
660 * accessMethodObjectId: OID of index AM to use
661 * tableSpaceId: OID of tablespace to use
662 * collationObjectId: array of collation OIDs, one per index column
663 * classObjectId: array of index opclass OIDs, one per index column
664 * coloptions: array of per-index-column indoption settings
665 * reloptions: AM-specific options
666 * isprimary: index is a PRIMARY KEY
667 * isconstraint: index is owned by PRIMARY KEY, UNIQUE, or EXCLUSION constraint
668 * deferrable: constraint is DEFERRABLE
669 * initdeferred: constraint is INITIALLY DEFERRED
670 * allow_system_table_mods: allow table to be a system catalog
671 * skip_build: true to skip the index_build() step for the moment; caller
672 * must do it later (typically via reindex_index())
673 * concurrent: if true, do not lock the table against writers. The index
674 * will be marked "invalid" and the caller must take additional steps
676 * is_internal: if true, post creation hook for new index
677 * if_not_exists: if true, do not throw an error if a relation with
678 * the same name already exists.
680 * Returns the OID of the created index.
683 index_create(Relation heapRelation,
684 const char *indexRelationName,
687 IndexInfo *indexInfo,
689 Oid accessMethodObjectId,
691 Oid *collationObjectId,
699 bool allow_system_table_mods,
705 Oid heapRelationId = RelationGetRelid(heapRelation);
707 Relation indexRelation;
708 TupleDesc indexTupDesc;
709 bool shared_relation;
710 bool mapped_relation;
716 is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
718 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
721 * The index will be in the same namespace as its parent table, and is
722 * shared across databases if and only if the parent is. Likewise, it
723 * will use the relfilenode map if and only if the parent does; and it
724 * inherits the parent's relpersistence.
726 namespaceId = RelationGetNamespace(heapRelation);
727 shared_relation = heapRelation->rd_rel->relisshared;
728 mapped_relation = RelationIsMapped(heapRelation);
729 relpersistence = heapRelation->rd_rel->relpersistence;
734 if (indexInfo->ii_NumIndexAttrs < 1)
735 elog(ERROR, "must index at least one column");
737 if (!allow_system_table_mods &&
738 IsSystemRelation(heapRelation) &&
739 IsNormalProcessingMode())
741 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
742 errmsg("user-defined indexes on system catalog tables are not supported")));
745 * concurrent index build on a system catalog is unsafe because we tend to
746 * release locks before committing in catalogs
749 IsSystemRelation(heapRelation))
751 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
752 errmsg("concurrent index creation on system catalog tables is not supported")));
755 * This case is currently not supported, but there's no way to ask for it
756 * in the grammar anyway, so it can't happen.
758 if (concurrent && is_exclusion)
760 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
761 errmsg_internal("concurrent index creation for exclusion constraints is not supported")));
764 * We cannot allow indexing a shared relation after initdb (because
765 * there's no way to make the entry in other databases' pg_class).
767 if (shared_relation && !IsBootstrapProcessingMode())
769 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
770 errmsg("shared indexes cannot be created after initdb")));
773 * Shared relations must be in pg_global, too (last-ditch check)
775 if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
776 elog(ERROR, "shared relations must be placed in pg_global tablespace");
778 if (get_relname_relid(indexRelationName, namespaceId))
783 (errcode(ERRCODE_DUPLICATE_TABLE),
784 errmsg("relation \"%s\" already exists, skipping",
785 indexRelationName)));
786 heap_close(pg_class, RowExclusiveLock);
791 (errcode(ERRCODE_DUPLICATE_TABLE),
792 errmsg("relation \"%s\" already exists",
793 indexRelationName)));
797 * construct tuple descriptor for index tuples
799 indexTupDesc = ConstructTupleDescriptor(heapRelation,
802 accessMethodObjectId,
807 * Allocate an OID for the index, unless we were told what to use.
809 * The OID will be the relfilenode as well, so make sure it doesn't
810 * collide with either pg_class OIDs or existing physical files.
812 if (!OidIsValid(indexRelationId))
814 /* Use binary-upgrade override for pg_class.oid/relfilenode? */
817 if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
819 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
820 errmsg("pg_class index OID value not set when in binary upgrade mode")));
822 indexRelationId = binary_upgrade_next_index_pg_class_oid;
823 binary_upgrade_next_index_pg_class_oid = InvalidOid;
828 GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
833 * create the index relation's relcache entry and physical disk file. (If
834 * we fail further down, it's the smgr's responsibility to remove the disk
837 indexRelation = heap_create(indexRelationName,
847 allow_system_table_mods);
849 Assert(indexRelationId == RelationGetRelid(indexRelation));
852 * Obtain exclusive lock on it. Although no other backends can see it
853 * until we commit, this prevents deadlock-risk complaints from lock
854 * manager in cases such as CLUSTER.
856 LockRelation(indexRelation, AccessExclusiveLock);
859 * Fill in fields of the index's pg_class entry that are not set correctly
862 * XXX should have a cleaner way to create cataloged indexes
864 indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
865 indexRelation->rd_rel->relam = accessMethodObjectId;
866 indexRelation->rd_rel->relhasoids = false;
869 * store index's pg_class entry
871 InsertPgClassTuple(pg_class, indexRelation,
872 RelationGetRelid(indexRelation),
876 /* done with pg_class */
877 heap_close(pg_class, RowExclusiveLock);
880 * now update the object id's of all the attribute tuple forms in the
881 * index relation's tuple descriptor
883 InitializeAttributeOids(indexRelation,
884 indexInfo->ii_NumIndexAttrs,
888 * append ATTRIBUTE tuples for the index
890 AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
894 * (append INDEX tuple)
896 * Note that this stows away a representation of "predicate".
897 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
900 UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
901 collationObjectId, classObjectId, coloptions,
902 isprimary, is_exclusion,
907 * Register constraint and dependencies for the index.
909 * If the index is from a CONSTRAINT clause, construct a pg_constraint
910 * entry. The index will be linked to the constraint, which in turn is
911 * linked to the table. If it's not a CONSTRAINT, we need to make a
912 * dependency directly on the table.
914 * We don't need a dependency on the namespace, because there'll be an
915 * indirect dependency via our parent table.
917 * During bootstrap we can't register any dependencies, and we don't try
918 * to make a constraint either.
920 if (!IsBootstrapProcessingMode())
922 ObjectAddress myself,
925 myself.classId = RelationRelationId;
926 myself.objectId = indexRelationId;
927 myself.objectSubId = 0;
934 constraintType = CONSTRAINT_PRIMARY;
935 else if (indexInfo->ii_Unique)
936 constraintType = CONSTRAINT_UNIQUE;
937 else if (is_exclusion)
938 constraintType = CONSTRAINT_EXCLUSION;
941 elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
942 constraintType = 0; /* keep compiler quiet */
945 index_constraint_create(heapRelation,
952 false, /* already marked primary */
953 false, /* pg_index entry is OK */
954 false, /* no old dependencies */
955 allow_system_table_mods,
960 bool have_simple_col = false;
962 /* Create auto dependencies on simply-referenced columns */
963 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
965 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
967 referenced.classId = RelationRelationId;
968 referenced.objectId = heapRelationId;
969 referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
971 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
973 have_simple_col = true;
978 * If there are no simply-referenced columns, give the index an
979 * auto dependency on the whole table. In most cases, this will
980 * be redundant, but it might not be if the index expressions and
981 * predicate contain no Vars or only whole-row Vars.
983 if (!have_simple_col)
985 referenced.classId = RelationRelationId;
986 referenced.objectId = heapRelationId;
987 referenced.objectSubId = 0;
989 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
992 /* Non-constraint indexes can't be deferrable */
994 Assert(!initdeferred);
997 /* Store dependency on collations */
998 /* The default collation is pinned, so don't bother recording it */
999 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1001 if (OidIsValid(collationObjectId[i]) &&
1002 collationObjectId[i] != DEFAULT_COLLATION_OID)
1004 referenced.classId = CollationRelationId;
1005 referenced.objectId = collationObjectId[i];
1006 referenced.objectSubId = 0;
1008 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1012 /* Store dependency on operator classes */
1013 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1015 referenced.classId = OperatorClassRelationId;
1016 referenced.objectId = classObjectId[i];
1017 referenced.objectSubId = 0;
1019 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1022 /* Store dependencies on anything mentioned in index expressions */
1023 if (indexInfo->ii_Expressions)
1025 recordDependencyOnSingleRelExpr(&myself,
1026 (Node *) indexInfo->ii_Expressions,
1032 /* Store dependencies on anything mentioned in predicate */
1033 if (indexInfo->ii_Predicate)
1035 recordDependencyOnSingleRelExpr(&myself,
1036 (Node *) indexInfo->ii_Predicate,
1044 /* Bootstrap mode - assert we weren't asked for constraint support */
1045 Assert(!isconstraint);
1046 Assert(!deferrable);
1047 Assert(!initdeferred);
1050 /* Post creation hook for new index */
1051 InvokeObjectPostCreateHookArg(RelationRelationId,
1052 indexRelationId, 0, is_internal);
1055 * Advance the command counter so that we can see the newly-entered
1056 * catalog tuples for the index.
1058 CommandCounterIncrement();
1061 * In bootstrap mode, we have to fill in the index strategy structure with
1062 * information from the catalogs. If we aren't bootstrapping, then the
1063 * relcache entry has already been rebuilt thanks to sinval update during
1064 * CommandCounterIncrement.
1066 if (IsBootstrapProcessingMode())
1067 RelationInitIndexAccessInfo(indexRelation);
1069 Assert(indexRelation->rd_indexcxt != NULL);
1072 * If this is bootstrap (initdb) time, then we don't actually fill in the
1073 * index yet. We'll be creating more indexes and classes later, so we
1074 * delay filling them in until just before we're done with bootstrapping.
1075 * Similarly, if the caller specified skip_build then filling the index is
1076 * delayed till later (ALTER TABLE can save work in some cases with this).
1077 * Otherwise, we call the AM routine that constructs the index.
1079 if (IsBootstrapProcessingMode())
1081 index_register(heapRelationId, indexRelationId, indexInfo);
1083 else if (skip_build)
1086 * Caller is responsible for filling the index later on. However,
1087 * we'd better make sure that the heap relation is correctly marked as
1090 index_update_stats(heapRelation,
1094 /* Make the above update visible */
1095 CommandCounterIncrement();
1099 index_build(heapRelation, indexRelation, indexInfo, isprimary, false);
1103 * Close the index; but we keep the lock that we acquired above until end
1104 * of transaction. Closing the heap is caller's responsibility.
1106 index_close(indexRelation, NoLock);
1108 return indexRelationId;
1112 * index_constraint_create
1114 * Set up a constraint associated with an index. Return the new constraint's
1117 * heapRelation: table owning the index (must be suitably locked by caller)
1118 * indexRelationId: OID of the index
1119 * indexInfo: same info executor uses to insert into the index
1120 * constraintName: what it say (generally, should match name of index)
1121 * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1122 * CONSTRAINT_EXCLUSION
1123 * deferrable: constraint is DEFERRABLE
1124 * initdeferred: constraint is INITIALLY DEFERRED
1125 * mark_as_primary: if true, set flags to mark index as primary key
1126 * update_pgindex: if true, update pg_index row (else caller's done that)
1127 * remove_old_dependencies: if true, remove existing dependencies of index
1128 * on table's columns
1129 * allow_system_table_mods: allow table to be a system catalog
1130 * is_internal: index is constructed due to internal process
1133 index_constraint_create(Relation heapRelation,
1134 Oid indexRelationId,
1135 IndexInfo *indexInfo,
1136 const char *constraintName,
1137 char constraintType,
1140 bool mark_as_primary,
1141 bool update_pgindex,
1142 bool remove_old_dependencies,
1143 bool allow_system_table_mods,
1146 Oid namespaceId = RelationGetNamespace(heapRelation);
1147 ObjectAddress myself,
1151 /* constraint creation support doesn't work while bootstrapping */
1152 Assert(!IsBootstrapProcessingMode());
1154 /* enforce system-table restriction */
1155 if (!allow_system_table_mods &&
1156 IsSystemRelation(heapRelation) &&
1157 IsNormalProcessingMode())
1159 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1160 errmsg("user-defined indexes on system catalog tables are not supported")));
1162 /* primary/unique constraints shouldn't have any expressions */
1163 if (indexInfo->ii_Expressions &&
1164 constraintType != CONSTRAINT_EXCLUSION)
1165 elog(ERROR, "constraints cannot have index expressions");
1168 * If we're manufacturing a constraint for a pre-existing index, we need
1169 * to get rid of the existing auto dependencies for the index (the ones
1170 * that index_create() would have made instead of calling this function).
1172 * Note: this code would not necessarily do the right thing if the index
1173 * has any expressions or predicate, but we'd never be turning such an
1174 * index into a UNIQUE or PRIMARY KEY constraint.
1176 if (remove_old_dependencies)
1177 deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1178 RelationRelationId, DEPENDENCY_AUTO);
1181 * Construct a pg_constraint entry.
1183 conOid = CreateConstraintEntry(constraintName,
1189 RelationGetRelid(heapRelation),
1190 indexInfo->ii_KeyAttrNumbers,
1191 indexInfo->ii_NumIndexAttrs,
1192 InvalidOid, /* no domain */
1193 indexRelationId, /* index OID */
1194 InvalidOid, /* no foreign key */
1203 indexInfo->ii_ExclusionOps,
1204 NULL, /* no check constraint */
1209 true, /* noinherit */
1213 * Register the index as internally dependent on the constraint.
1215 * Note that the constraint has a dependency on the table, so we don't
1216 * need (or want) any direct dependency from the index to the table.
1218 myself.classId = RelationRelationId;
1219 myself.objectId = indexRelationId;
1220 myself.objectSubId = 0;
1222 referenced.classId = ConstraintRelationId;
1223 referenced.objectId = conOid;
1224 referenced.objectSubId = 0;
1226 recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
1229 * If the constraint is deferrable, create the deferred uniqueness
1230 * checking trigger. (The trigger will be given an internal dependency on
1231 * the constraint by CreateTrigger.)
1235 CreateTrigStmt *trigger;
1237 trigger = makeNode(CreateTrigStmt);
1238 trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
1239 "PK_ConstraintTrigger" :
1240 "Unique_ConstraintTrigger";
1241 trigger->relation = NULL;
1242 trigger->funcname = SystemFuncName("unique_key_recheck");
1243 trigger->args = NIL;
1244 trigger->row = true;
1245 trigger->timing = TRIGGER_TYPE_AFTER;
1246 trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
1247 trigger->columns = NIL;
1248 trigger->whenClause = NULL;
1249 trigger->isconstraint = true;
1250 trigger->deferrable = true;
1251 trigger->initdeferred = initdeferred;
1252 trigger->constrrel = NULL;
1254 (void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
1255 InvalidOid, conOid, indexRelationId, true);
1259 * If needed, mark the table as having a primary key. We assume it can't
1260 * have been so marked already, so no need to clear the flag in the other
1263 * Note: this might better be done by callers. We do it here to avoid
1264 * exposing index_update_stats() globally, but that wouldn't be necessary
1265 * if relhaspkey went away.
1267 if (mark_as_primary)
1268 index_update_stats(heapRelation,
1274 * If needed, mark the index as primary and/or deferred in pg_index.
1276 * Note: When making an existing index into a constraint, caller must have
1277 * a table lock that prevents concurrent table updates; otherwise, there
1278 * is a risk that concurrent readers of the table will miss seeing this
1281 if (update_pgindex && (mark_as_primary || deferrable))
1284 HeapTuple indexTuple;
1285 Form_pg_index indexForm;
1288 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1290 indexTuple = SearchSysCacheCopy1(INDEXRELID,
1291 ObjectIdGetDatum(indexRelationId));
1292 if (!HeapTupleIsValid(indexTuple))
1293 elog(ERROR, "cache lookup failed for index %u", indexRelationId);
1294 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1296 if (mark_as_primary && !indexForm->indisprimary)
1298 indexForm->indisprimary = true;
1302 if (deferrable && indexForm->indimmediate)
1304 indexForm->indimmediate = false;
1310 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1311 CatalogUpdateIndexes(pg_index, indexTuple);
1313 InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
1314 InvalidOid, is_internal);
1317 heap_freetuple(indexTuple);
1318 heap_close(pg_index, RowExclusiveLock);
1327 * NOTE: this routine should now only be called through performDeletion(),
1328 * else associated dependencies won't be cleaned up.
1331 index_drop(Oid indexId, bool concurrent)
1334 Relation userHeapRelation;
1335 Relation userIndexRelation;
1336 Relation indexRelation;
1339 LockRelId heaprelid,
1341 LOCKTAG heaplocktag;
1345 * To drop an index safely, we must grab exclusive lock on its parent
1346 * table. Exclusive lock on the index alone is insufficient because
1347 * another backend might be about to execute a query on the parent table.
1348 * If it relies on a previously cached list of index OIDs, then it could
1349 * attempt to access the just-dropped index. We must therefore take a
1350 * table lock strong enough to prevent all queries on the table from
1351 * proceeding until we commit and send out a shared-cache-inval notice
1352 * that will make them update their index lists.
1354 * In the concurrent case we avoid this requirement by disabling index use
1355 * in multiple steps and waiting out any transactions that might be using
1356 * the index, so we don't need exclusive lock on the parent table. Instead
1357 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
1358 * doing CREATE/DROP INDEX CONCURRENTLY on the same index. (We will get
1359 * AccessExclusiveLock on the index below, once we're sure nobody else is
1362 heapId = IndexGetRelation(indexId, false);
1363 lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
1364 userHeapRelation = heap_open(heapId, lockmode);
1365 userIndexRelation = index_open(indexId, lockmode);
1368 * We might still have open queries using it in our own session, which the
1369 * above locking won't prevent, so test explicitly.
1371 CheckTableNotInUse(userIndexRelation, "DROP INDEX");
1374 * Drop Index Concurrently is more or less the reverse process of Create
1375 * Index Concurrently.
1377 * First we unset indisvalid so queries starting afterwards don't use the
1378 * index to answer queries anymore. We have to keep indisready = true so
1379 * transactions that are still scanning the index can continue to see
1380 * valid index contents. For instance, if they are using READ COMMITTED
1381 * mode, and another transaction makes changes and commits, they need to
1382 * see those new tuples in the index.
1384 * After all transactions that could possibly have used the index for
1385 * queries end, we can unset indisready and indislive, then wait till
1386 * nobody could be touching it anymore. (Note: we need indislive because
1387 * this state must be distinct from the initial state during CREATE INDEX
1388 * CONCURRENTLY, which has indislive true while indisready and indisvalid
1389 * are false. That's because in that state, transactions must examine the
1390 * index for HOT-safety decisions, while in this state we don't want them
1391 * to open it at all.)
1393 * Since all predicate locks on the index are about to be made invalid, we
1394 * must promote them to predicate locks on the heap. In the
1395 * non-concurrent case we can just do that now. In the concurrent case
1396 * it's a bit trickier. The predicate locks must be moved when there are
1397 * no index scans in progress on the index and no more can subsequently
1398 * start, so that no new predicate locks can be made on the index. Also,
1399 * they must be moved before heap inserts stop maintaining the index, else
1400 * the conflict with the predicate lock on the index gap could be missed
1401 * before the lock on the heap relation is in place to detect a conflict
1402 * based on the heap tuple insert.
1407 * We must commit our transaction in order to make the first pg_index
1408 * state update visible to other sessions. If the DROP machinery has
1409 * already performed any other actions (removal of other objects,
1410 * pg_depend entries, etc), the commit would make those actions
1411 * permanent, which would leave us with inconsistent catalog state if
1412 * we fail partway through the following sequence. Since DROP INDEX
1413 * CONCURRENTLY is restricted to dropping just one index that has no
1414 * dependencies, we should get here before anything's been done ---
1415 * but let's check that to be sure. We can verify that the current
1416 * transaction has not executed any transactional updates by checking
1417 * that no XID has been assigned.
1419 if (GetTopTransactionIdIfAny() != InvalidTransactionId)
1421 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1422 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
1425 * Mark index invalid by updating its pg_index entry
1427 index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
1430 * Invalidate the relcache for the table, so that after this commit
1431 * all sessions will refresh any cached plans that might reference the
1434 CacheInvalidateRelcache(userHeapRelation);
1436 /* save lockrelid and locktag for below, then close but keep locks */
1437 heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
1438 SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1439 indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
1441 heap_close(userHeapRelation, NoLock);
1442 index_close(userIndexRelation, NoLock);
1445 * We must commit our current transaction so that the indisvalid
1446 * update becomes visible to other transactions; then start another.
1447 * Note that any previously-built data structures are lost in the
1448 * commit. The only data we keep past here are the relation IDs.
1450 * Before committing, get a session-level lock on the table, to ensure
1451 * that neither it nor the index can be dropped before we finish. This
1452 * cannot block, even if someone else is waiting for access, because
1453 * we already have the same lock within our transaction.
1455 LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1456 LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1458 PopActiveSnapshot();
1459 CommitTransactionCommand();
1460 StartTransactionCommand();
1463 * Now we must wait until no running transaction could be using the
1464 * index for a query. Use AccessExclusiveLock here to check for
1465 * running transactions that hold locks of any kind on the table. Note
1466 * we do not need to worry about xacts that open the table for reading
1467 * after this point; they will see the index as invalid when they open
1470 * Note: the reason we use actual lock acquisition here, rather than
1471 * just checking the ProcArray and sleeping, is that deadlock is
1472 * possible if one of the transactions in question is blocked trying
1473 * to acquire an exclusive lock on our table. The lock code will
1474 * detect deadlock and error out properly.
1476 WaitForLockers(heaplocktag, AccessExclusiveLock);
1479 * No more predicate locks will be acquired on this index, and we're
1480 * about to stop doing inserts into the index which could show
1481 * conflicts with existing predicate locks, so now is the time to move
1482 * them to the heap relation.
1484 userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1485 userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1486 TransferPredicateLocksToHeapRelation(userIndexRelation);
1489 * Now we are sure that nobody uses the index for queries; they just
1490 * might have it open for updating it. So now we can unset indisready
1491 * and indislive, then wait till nobody could be using it at all
1494 index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1497 * Invalidate the relcache for the table, so that after this commit
1498 * all sessions will refresh the table's index list. Forgetting just
1499 * the index's relcache entry is not enough.
1501 CacheInvalidateRelcache(userHeapRelation);
1504 * Close the relations again, though still holding session lock.
1506 heap_close(userHeapRelation, NoLock);
1507 index_close(userIndexRelation, NoLock);
1510 * Again, commit the transaction to make the pg_index update visible
1511 * to other sessions.
1513 CommitTransactionCommand();
1514 StartTransactionCommand();
1517 * Wait till every transaction that saw the old index state has
1520 WaitForLockers(heaplocktag, AccessExclusiveLock);
1523 * Re-open relations to allow us to complete our actions.
1525 * At this point, nothing should be accessing the index, but lets
1526 * leave nothing to chance and grab AccessExclusiveLock on the index
1527 * before the physical deletion.
1529 userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1530 userIndexRelation = index_open(indexId, AccessExclusiveLock);
1534 /* Not concurrent, so just transfer predicate locks and we're good */
1535 TransferPredicateLocksToHeapRelation(userIndexRelation);
1539 * Schedule physical removal of the files
1541 RelationDropStorage(userIndexRelation);
1544 * Close and flush the index's relcache entry, to ensure relcache doesn't
1545 * try to rebuild it while we're deleting catalog entries. We keep the
1548 index_close(userIndexRelation, NoLock);
1550 RelationForgetRelation(indexId);
1553 * fix INDEX relation, and check for expressional index
1555 indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
1557 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
1558 if (!HeapTupleIsValid(tuple))
1559 elog(ERROR, "cache lookup failed for index %u", indexId);
1561 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
1563 simple_heap_delete(indexRelation, &tuple->t_self);
1565 ReleaseSysCache(tuple);
1566 heap_close(indexRelation, RowExclusiveLock);
1569 * if it has any expression columns, we might have stored statistics about
1573 RemoveStatistics(indexId, 0);
1576 * fix ATTRIBUTE relation
1578 DeleteAttributeTuples(indexId);
1581 * fix RELATION relation
1583 DeleteRelationTuple(indexId);
1586 * We are presently too lazy to attempt to compute the new correct value
1587 * of relhasindex (the next VACUUM will fix it if necessary). So there is
1588 * no need to update the pg_class tuple for the owning relation. But we
1589 * must send out a shared-cache-inval notice on the owning relation to
1590 * ensure other backends update their relcache lists of indexes. (In the
1591 * concurrent case, this is redundant but harmless.)
1593 CacheInvalidateRelcache(userHeapRelation);
1596 * Close owning rel, but keep lock
1598 heap_close(userHeapRelation, NoLock);
1601 * Release the session locks before we go.
1605 UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1606 UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1610 /* ----------------------------------------------------------------
1611 * index_build support
1612 * ----------------------------------------------------------------
1617 * Construct an IndexInfo record for an open index
1619 * IndexInfo stores the information about the index that's needed by
1620 * FormIndexDatum, which is used for both index_build() and later insertion
1621 * of individual index tuples. Normally we build an IndexInfo for an index
1622 * just once per command, and then use it for (potentially) many tuples.
1626 BuildIndexInfo(Relation index)
1628 IndexInfo *ii = makeNode(IndexInfo);
1629 Form_pg_index indexStruct = index->rd_index;
1633 /* check the number of keys, and copy attr numbers into the IndexInfo */
1634 numKeys = indexStruct->indnatts;
1635 if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1636 elog(ERROR, "invalid indnatts %d for index %u",
1637 numKeys, RelationGetRelid(index));
1638 ii->ii_NumIndexAttrs = numKeys;
1639 for (i = 0; i < numKeys; i++)
1640 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1642 /* fetch any expressions needed for expressional indexes */
1643 ii->ii_Expressions = RelationGetIndexExpressions(index);
1644 ii->ii_ExpressionsState = NIL;
1646 /* fetch index predicate if any */
1647 ii->ii_Predicate = RelationGetIndexPredicate(index);
1648 ii->ii_PredicateState = NIL;
1650 /* fetch exclusion constraint info if any */
1651 if (indexStruct->indisexclusion)
1653 RelationGetExclusionInfo(index,
1654 &ii->ii_ExclusionOps,
1655 &ii->ii_ExclusionProcs,
1656 &ii->ii_ExclusionStrats);
1660 ii->ii_ExclusionOps = NULL;
1661 ii->ii_ExclusionProcs = NULL;
1662 ii->ii_ExclusionStrats = NULL;
1666 ii->ii_Unique = indexStruct->indisunique;
1667 ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1668 /* assume not doing speculative insertion for now */
1669 ii->ii_UniqueOps = NULL;
1670 ii->ii_UniqueProcs = NULL;
1671 ii->ii_UniqueStrats = NULL;
1673 /* initialize index-build state to default */
1674 ii->ii_Concurrent = false;
1675 ii->ii_BrokenHotChain = false;
1681 * BuildSpeculativeIndexInfo
1682 * Add extra state to IndexInfo record
1684 * For unique indexes, we usually don't want to add info to the IndexInfo for
1685 * checking uniqueness, since the B-Tree AM handles that directly. However,
1686 * in the case of speculative insertion, additional support is required.
1688 * Do this processing here rather than in BuildIndexInfo() to not incur the
1689 * overhead in the common non-speculative cases.
1693 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
1695 int ncols = index->rd_rel->relnatts;
1699 * fetch info for checking unique indexes
1701 Assert(ii->ii_Unique);
1703 if (index->rd_rel->relam != BTREE_AM_OID)
1704 elog(ERROR, "unexpected non-btree speculative unique index");
1706 ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols);
1707 ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols);
1708 ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols);
1711 * We have to look up the operator's strategy number. This provides a
1712 * cross-check that the operator does match the index.
1714 /* We need the func OIDs and strategy numbers too */
1715 for (i = 0; i < ncols; i++)
1717 ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
1718 ii->ii_UniqueOps[i] =
1719 get_opfamily_member(index->rd_opfamily[i],
1720 index->rd_opcintype[i],
1721 index->rd_opcintype[i],
1722 ii->ii_UniqueStrats[i]);
1723 ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
1729 * Construct values[] and isnull[] arrays for a new index tuple.
1731 * indexInfo Info about the index
1732 * slot Heap tuple for which we must prepare an index entry
1733 * estate executor state for evaluating any index expressions
1734 * values Array of index Datums (output area)
1735 * isnull Array of is-null indicators (output area)
1737 * When there are no index expressions, estate may be NULL. Otherwise it
1738 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1739 * context must point to the heap tuple passed in.
1741 * Notice we don't actually call index_form_tuple() here; we just prepare
1742 * its input arrays values[] and isnull[]. This is because the index AM
1743 * may wish to alter the data before storage.
1747 FormIndexDatum(IndexInfo *indexInfo,
1748 TupleTableSlot *slot,
1753 ListCell *indexpr_item;
1756 if (indexInfo->ii_Expressions != NIL &&
1757 indexInfo->ii_ExpressionsState == NIL)
1759 /* First time through, set up expression evaluation state */
1760 indexInfo->ii_ExpressionsState = (List *)
1761 ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
1763 /* Check caller has set up context correctly */
1764 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1766 indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1768 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1770 int keycol = indexInfo->ii_KeyAttrNumbers[i];
1777 * Plain index column; get the value we need directly from the
1780 iDatum = slot_getattr(slot, keycol, &isNull);
1785 * Index expression --- need to evaluate it.
1787 if (indexpr_item == NULL)
1788 elog(ERROR, "wrong number of index expressions");
1789 iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1790 GetPerTupleExprContext(estate),
1793 indexpr_item = lnext(indexpr_item);
1799 if (indexpr_item != NULL)
1800 elog(ERROR, "wrong number of index expressions");
1805 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1807 * This routine updates the pg_class row of either an index or its parent
1808 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
1809 * to ensure we can do all the necessary work in just one update.
1811 * hasindex: set relhasindex to this value
1812 * isprimary: if true, set relhaspkey true; else no change
1813 * reltuples: if >= 0, set reltuples to this value; else no change
1815 * If reltuples >= 0, relpages and relallvisible are also updated (using
1816 * RelationGetNumberOfBlocks() and visibilitymap_count()).
1818 * NOTE: an important side-effect of this operation is that an SI invalidation
1819 * message is sent out to all backends --- including me --- causing relcache
1820 * entries to be flushed or updated with the new data. This must happen even
1821 * if we find that no change is needed in the pg_class row. When updating
1822 * a heap entry, this ensures that other backends find out about the new
1823 * index. When updating an index, it's important because some index AMs
1824 * expect a relcache flush to occur after REINDEX.
1827 index_update_stats(Relation rel,
1832 Oid relid = RelationGetRelid(rel);
1835 Form_pg_class rd_rel;
1839 * We always update the pg_class row using a non-transactional,
1840 * overwrite-in-place update. There are several reasons for this:
1842 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1844 * 2. We could be reindexing pg_class itself, in which case we can't move
1845 * its pg_class row because CatalogUpdateIndexes might not know about all
1846 * the indexes yet (see reindex_relation).
1848 * 3. Because we execute CREATE INDEX with just share lock on the parent
1849 * rel (to allow concurrent index creations), an ordinary update could
1850 * suffer a tuple-concurrently-updated failure against another CREATE
1851 * INDEX committing at about the same time. We can avoid that by having
1852 * them both do nontransactional updates (we assume they will both be
1853 * trying to change the pg_class row to the same thing, so it doesn't
1854 * matter which goes first).
1856 * It is safe to use a non-transactional update even though our
1857 * transaction could still fail before committing. Setting relhasindex
1858 * true is safe even if there are no indexes (VACUUM will eventually fix
1859 * it), likewise for relhaspkey. And of course the new relpages and
1860 * reltuples counts are correct regardless. However, we don't want to
1861 * change relpages (or relallvisible) if the caller isn't providing an
1862 * updated reltuples count, because that would bollix the
1863 * reltuples/relpages ratio which is what's really important.
1866 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1869 * Make a copy of the tuple to update. Normally we use the syscache, but
1870 * we can't rely on that during bootstrap or while reindexing pg_class
1873 if (IsBootstrapProcessingMode() ||
1874 ReindexIsProcessingHeap(RelationRelationId))
1876 /* don't assume syscache will work */
1877 HeapScanDesc pg_class_scan;
1880 ScanKeyInit(&key[0],
1881 ObjectIdAttributeNumber,
1882 BTEqualStrategyNumber, F_OIDEQ,
1883 ObjectIdGetDatum(relid));
1885 pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
1886 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1887 tuple = heap_copytuple(tuple);
1888 heap_endscan(pg_class_scan);
1892 /* normal case, use syscache */
1893 tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
1896 if (!HeapTupleIsValid(tuple))
1897 elog(ERROR, "could not find tuple for relation %u", relid);
1898 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1900 /* Apply required updates, if any, to copied tuple */
1903 if (rd_rel->relhasindex != hasindex)
1905 rd_rel->relhasindex = hasindex;
1910 if (!rd_rel->relhaspkey)
1912 rd_rel->relhaspkey = true;
1919 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1920 BlockNumber relallvisible;
1922 if (rd_rel->relkind != RELKIND_INDEX)
1923 visibilitymap_count(rel, &relallvisible, NULL);
1924 else /* don't bother for indexes */
1927 if (rd_rel->relpages != (int32) relpages)
1929 rd_rel->relpages = (int32) relpages;
1932 if (rd_rel->reltuples != (float4) reltuples)
1934 rd_rel->reltuples = (float4) reltuples;
1937 if (rd_rel->relallvisible != (int32) relallvisible)
1939 rd_rel->relallvisible = (int32) relallvisible;
1945 * If anything changed, write out the tuple
1949 heap_inplace_update(pg_class, tuple);
1950 /* the above sends a cache inval message */
1954 /* no need to change tuple, but force relcache inval anyway */
1955 CacheInvalidateRelcacheByTuple(tuple);
1958 heap_freetuple(tuple);
1960 heap_close(pg_class, RowExclusiveLock);
1965 * index_build - invoke access-method-specific index build procedure
1967 * On entry, the index's catalog entries are valid, and its physical disk
1968 * file has been created but is empty. We call the AM-specific build
1969 * procedure to fill in the index contents. We then update the pg_class
1970 * entries of the index and heap relation as needed, using statistics
1971 * returned by ambuild as well as data passed by the caller.
1973 * isprimary tells whether to mark the index as a primary-key index.
1974 * isreindex indicates we are recreating a previously-existing index.
1976 * Note: when reindexing an existing index, isprimary can be false even if
1977 * the index is a PK; it's already properly marked and need not be re-marked.
1979 * Note: before Postgres 8.2, the passed-in heap and index Relations
1980 * were automatically closed by this routine. This is no longer the case.
1981 * The caller opened 'em, and the caller should close 'em.
1984 index_build(Relation heapRelation,
1985 Relation indexRelation,
1986 IndexInfo *indexInfo,
1990 IndexBuildResult *stats;
1992 int save_sec_context;
1998 Assert(RelationIsValid(indexRelation));
1999 Assert(PointerIsValid(indexRelation->rd_amroutine));
2000 Assert(PointerIsValid(indexRelation->rd_amroutine->ambuild));
2001 Assert(PointerIsValid(indexRelation->rd_amroutine->ambuildempty));
2004 (errmsg("building index \"%s\" on table \"%s\"",
2005 RelationGetRelationName(indexRelation),
2006 RelationGetRelationName(heapRelation))));
2009 * Switch to the table owner's userid, so that any index functions are run
2010 * as that user. Also lock down security-restricted operations and
2011 * arrange to make GUC variable changes local to this command.
2013 GetUserIdAndSecContext(&save_userid, &save_sec_context);
2014 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2015 save_sec_context | SECURITY_RESTRICTED_OPERATION);
2016 save_nestlevel = NewGUCNestLevel();
2019 * Call the access method's build procedure
2021 stats = indexRelation->rd_amroutine->ambuild(heapRelation, indexRelation,
2023 Assert(PointerIsValid(stats));
2026 * If this is an unlogged index, we may need to write out an init fork for
2027 * it -- but we must first check whether one already exists. If, for
2028 * example, an unlogged relation is truncated in the transaction that
2029 * created it, or truncated twice in a subsequent transaction, the
2030 * relfilenode won't change, and nothing needs to be done here.
2032 if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2033 !smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2035 RelationOpenSmgr(indexRelation);
2036 smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2037 indexRelation->rd_amroutine->ambuildempty(indexRelation);
2041 * If we found any potentially broken HOT chains, mark the index as not
2042 * being usable until the current transaction is below the event horizon.
2043 * See src/backend/access/heap/README.HOT for discussion.
2045 * However, when reindexing an existing index, we should do nothing here.
2046 * Any HOT chains that are broken with respect to the index must predate
2047 * the index's original creation, so there is no need to change the
2048 * index's usability horizon. Moreover, we *must not* try to change the
2049 * index's pg_index entry while reindexing pg_index itself, and this
2050 * optimization nicely prevents that.
2052 * We also need not set indcheckxmin during a concurrent index build,
2053 * because we won't set indisvalid true until all transactions that care
2054 * about the broken HOT chains are gone.
2056 * Therefore, this code path can only be taken during non-concurrent
2057 * CREATE INDEX. Thus the fact that heap_update will set the pg_index
2058 * tuple's xmin doesn't matter, because that tuple was created in the
2059 * current transaction anyway. That also means we don't need to worry
2060 * about any concurrent readers of the tuple; no other transaction can see
2063 if (indexInfo->ii_BrokenHotChain && !isreindex &&
2064 !indexInfo->ii_Concurrent)
2066 Oid indexId = RelationGetRelid(indexRelation);
2068 HeapTuple indexTuple;
2069 Form_pg_index indexForm;
2071 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2073 indexTuple = SearchSysCacheCopy1(INDEXRELID,
2074 ObjectIdGetDatum(indexId));
2075 if (!HeapTupleIsValid(indexTuple))
2076 elog(ERROR, "cache lookup failed for index %u", indexId);
2077 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2079 /* If it's a new index, indcheckxmin shouldn't be set ... */
2080 Assert(!indexForm->indcheckxmin);
2082 indexForm->indcheckxmin = true;
2083 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
2084 CatalogUpdateIndexes(pg_index, indexTuple);
2086 heap_freetuple(indexTuple);
2087 heap_close(pg_index, RowExclusiveLock);
2091 * Update heap and index pg_class rows
2093 index_update_stats(heapRelation,
2096 stats->heap_tuples);
2098 index_update_stats(indexRelation,
2101 stats->index_tuples);
2103 /* Make the updated catalog row versions visible */
2104 CommandCounterIncrement();
2107 * If it's for an exclusion constraint, make a second pass over the heap
2108 * to verify that the constraint is satisfied. We must not do this until
2109 * the index is fully valid. (Broken HOT chains shouldn't matter, though;
2110 * see comments for IndexCheckExclusion.)
2112 if (indexInfo->ii_ExclusionOps != NULL)
2113 IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
2115 /* Roll back any GUC changes executed by index functions */
2116 AtEOXact_GUC(false, save_nestlevel);
2118 /* Restore userid and security context */
2119 SetUserIdAndSecContext(save_userid, save_sec_context);
2124 * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
2126 * This is called back from an access-method-specific index build procedure
2127 * after the AM has done whatever setup it needs. The parent heap relation
2128 * is scanned to find tuples that should be entered into the index. Each
2129 * such tuple is passed to the AM's callback routine, which does the right
2130 * things to add it to the new index. After we return, the AM's index
2131 * build procedure does whatever cleanup it needs.
2133 * The total count of heap tuples is returned. This is for updating pg_class
2134 * statistics. (It's annoying not to be able to do that here, but we want
2135 * to merge that update with others; see index_update_stats.) Note that the
2136 * index AM itself must keep track of the number of index tuples; we don't do
2137 * so here because the AM might reject some of the tuples for its own reasons,
2138 * such as being unable to store NULLs.
2140 * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
2141 * any potentially broken HOT chains. Currently, we set this if there are
2142 * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
2143 * trying very hard to detect whether they're really incompatible with the
2147 IndexBuildHeapScan(Relation heapRelation,
2148 Relation indexRelation,
2149 IndexInfo *indexInfo,
2151 IndexBuildCallback callback,
2152 void *callback_state)
2154 return IndexBuildHeapRangeScan(heapRelation, indexRelation,
2155 indexInfo, allow_sync,
2157 0, InvalidBlockNumber,
2158 callback, callback_state);
2162 * As above, except that instead of scanning the complete heap, only the given
2163 * number of blocks are scanned. Scan to end-of-rel can be signalled by
2164 * passing InvalidBlockNumber as numblocks. Note that restricting the range
2165 * to scan cannot be done when requesting syncscan.
2167 * When "anyvisible" mode is requested, all tuples visible to any transaction
2168 * are considered, including those inserted or deleted by transactions that are
2169 * still in progress.
2172 IndexBuildHeapRangeScan(Relation heapRelation,
2173 Relation indexRelation,
2174 IndexInfo *indexInfo,
2177 BlockNumber start_blockno,
2178 BlockNumber numblocks,
2179 IndexBuildCallback callback,
2180 void *callback_state)
2182 bool is_system_catalog;
2183 bool checking_uniqueness;
2185 HeapTuple heapTuple;
2186 Datum values[INDEX_MAX_KEYS];
2187 bool isnull[INDEX_MAX_KEYS];
2190 TupleTableSlot *slot;
2192 ExprContext *econtext;
2194 TransactionId OldestXmin;
2195 BlockNumber root_blkno = InvalidBlockNumber;
2196 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2201 Assert(OidIsValid(indexRelation->rd_rel->relam));
2203 /* Remember if it's a system catalog */
2204 is_system_catalog = IsSystemRelation(heapRelation);
2206 /* See whether we're verifying uniqueness/exclusion properties */
2207 checking_uniqueness = (indexInfo->ii_Unique ||
2208 indexInfo->ii_ExclusionOps != NULL);
2211 * "Any visible" mode is not compatible with uniqueness checks; make sure
2212 * only one of those is requested.
2214 Assert(!(anyvisible && checking_uniqueness));
2217 * Need an EState for evaluation of index expressions and partial-index
2218 * predicates. Also a slot to hold the current tuple.
2220 estate = CreateExecutorState();
2221 econtext = GetPerTupleExprContext(estate);
2222 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2224 /* Arrange for econtext's scan tuple to be the tuple under test */
2225 econtext->ecxt_scantuple = slot;
2227 /* Set up execution state for predicate, if any. */
2228 predicate = (List *)
2229 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2233 * Prepare for scan of the base relation. In a normal index build, we use
2234 * SnapshotAny because we must retrieve all tuples and do our own time
2235 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
2236 * concurrent build, or during bootstrap, we take a regular MVCC snapshot
2237 * and index whatever's live according to that.
2239 if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2241 snapshot = RegisterSnapshot(GetTransactionSnapshot());
2242 OldestXmin = InvalidTransactionId; /* not used */
2244 /* "any visible" mode is not compatible with this */
2245 Assert(!anyvisible);
2249 snapshot = SnapshotAny;
2250 /* okay to ignore lazy VACUUMs here */
2251 OldestXmin = GetOldestXmin(heapRelation, true);
2254 scan = heap_beginscan_strat(heapRelation, /* relation */
2255 snapshot, /* snapshot */
2256 0, /* number of keys */
2257 NULL, /* scan key */
2258 true, /* buffer access strategy OK */
2259 allow_sync); /* syncscan OK? */
2261 /* set our scan endpoints */
2263 heap_setscanlimits(scan, start_blockno, numblocks);
2266 /* syncscan can only be requested on whole relation */
2267 Assert(start_blockno == 0);
2268 Assert(numblocks == InvalidBlockNumber);
2274 * Scan all tuples in the base relation.
2276 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2280 CHECK_FOR_INTERRUPTS();
2283 * When dealing with a HOT-chain of updated tuples, we want to index
2284 * the values of the live tuple (if any), but index it under the TID
2285 * of the chain's root tuple. This approach is necessary to preserve
2286 * the HOT-chain structure in the heap. So we need to be able to find
2287 * the root item offset for every tuple that's in a HOT-chain. When
2288 * first reaching a new page of the relation, call
2289 * heap_get_root_tuples() to build a map of root item offsets on the
2292 * It might look unsafe to use this information across buffer
2293 * lock/unlock. However, we hold ShareLock on the table so no
2294 * ordinary insert/update/delete should occur; and we hold pin on the
2295 * buffer continuously while visiting the page, so no pruning
2296 * operation can occur either.
2298 * Also, although our opinions about tuple liveness could change while
2299 * we scan the page (due to concurrent transaction commits/aborts),
2300 * the chain root locations won't, so this info doesn't need to be
2301 * rebuilt after waiting for another transaction.
2303 * Note the implied assumption that there is no more than one live
2304 * tuple per HOT-chain --- else we could create more than one index
2305 * entry pointing to the same root tuple.
2307 if (scan->rs_cblock != root_blkno)
2309 Page page = BufferGetPage(scan->rs_cbuf);
2311 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2312 heap_get_root_tuples(page, root_offsets);
2313 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2315 root_blkno = scan->rs_cblock;
2318 if (snapshot == SnapshotAny)
2320 /* do our own time qual check */
2322 TransactionId xwait;
2327 * We could possibly get away with not locking the buffer here,
2328 * since caller should hold ShareLock on the relation, but let's
2329 * be conservative about it. (This remark is still correct even
2330 * with HOT-pruning: our pin on the buffer prevents pruning.)
2332 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2334 switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
2337 case HEAPTUPLE_DEAD:
2338 /* Definitely dead, we can ignore it */
2340 tupleIsAlive = false;
2342 case HEAPTUPLE_LIVE:
2343 /* Normal case, index and unique-check it */
2345 tupleIsAlive = true;
2347 case HEAPTUPLE_RECENTLY_DEAD:
2350 * If tuple is recently deleted then we must index it
2351 * anyway to preserve MVCC semantics. (Pre-existing
2352 * transactions could try to use the index after we finish
2353 * building it, and may need to see such tuples.)
2355 * However, if it was HOT-updated then we must only index
2356 * the live tuple at the end of the HOT-chain. Since this
2357 * breaks semantics for pre-existing snapshots, mark the
2358 * index as unusable for them.
2360 if (HeapTupleIsHotUpdated(heapTuple))
2363 /* mark the index as unsafe for old snapshots */
2364 indexInfo->ii_BrokenHotChain = true;
2368 /* In any case, exclude the tuple from unique-checking */
2369 tupleIsAlive = false;
2371 case HEAPTUPLE_INSERT_IN_PROGRESS:
2374 * In "anyvisible" mode, this tuple is visible and we
2375 * don't need any further checks.
2380 tupleIsAlive = true;
2385 * Since caller should hold ShareLock or better, normally
2386 * the only way to see this is if it was inserted earlier
2387 * in our own transaction. However, it can happen in
2388 * system catalogs, since we tend to release write lock
2389 * before commit there. Give a warning if neither case
2392 xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
2393 if (!TransactionIdIsCurrentTransactionId(xwait))
2395 if (!is_system_catalog)
2396 elog(WARNING, "concurrent insert in progress within table \"%s\"",
2397 RelationGetRelationName(heapRelation));
2400 * If we are performing uniqueness checks, indexing
2401 * such a tuple could lead to a bogus uniqueness
2402 * failure. In that case we wait for the inserting
2403 * transaction to finish and check again.
2405 if (checking_uniqueness)
2408 * Must drop the lock on the buffer before we wait
2410 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2411 XactLockTableWait(xwait, heapRelation,
2413 XLTW_InsertIndexUnique);
2414 CHECK_FOR_INTERRUPTS();
2420 * We must index such tuples, since if the index build
2421 * commits then they're good.
2424 tupleIsAlive = true;
2426 case HEAPTUPLE_DELETE_IN_PROGRESS:
2429 * As with INSERT_IN_PROGRESS case, this is unexpected
2430 * unless it's our own deletion or a system catalog; but
2431 * in anyvisible mode, this tuple is visible.
2436 tupleIsAlive = false;
2440 xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
2441 if (!TransactionIdIsCurrentTransactionId(xwait))
2443 if (!is_system_catalog)
2444 elog(WARNING, "concurrent delete in progress within table \"%s\"",
2445 RelationGetRelationName(heapRelation));
2448 * If we are performing uniqueness checks, assuming
2449 * the tuple is dead could lead to missing a
2450 * uniqueness violation. In that case we wait for the
2451 * deleting transaction to finish and check again.
2453 * Also, if it's a HOT-updated tuple, we should not
2454 * index it but rather the live tuple at the end of
2455 * the HOT-chain. However, the deleting transaction
2456 * could abort, possibly leaving this tuple as live
2457 * after all, in which case it has to be indexed. The
2458 * only way to know what to do is to wait for the
2459 * deleting transaction to finish and check again.
2461 if (checking_uniqueness ||
2462 HeapTupleIsHotUpdated(heapTuple))
2465 * Must drop the lock on the buffer before we wait
2467 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2468 XactLockTableWait(xwait, heapRelation,
2470 XLTW_InsertIndexUnique);
2471 CHECK_FOR_INTERRUPTS();
2476 * Otherwise index it but don't check for uniqueness,
2477 * the same as a RECENTLY_DEAD tuple.
2481 else if (HeapTupleIsHotUpdated(heapTuple))
2484 * It's a HOT-updated tuple deleted by our own xact.
2485 * We can assume the deletion will commit (else the
2486 * index contents don't matter), so treat the same as
2487 * RECENTLY_DEAD HOT-updated tuples.
2490 /* mark the index as unsafe for old snapshots */
2491 indexInfo->ii_BrokenHotChain = true;
2496 * It's a regular tuple deleted by our own xact. Index
2497 * it but don't check for uniqueness, the same as a
2498 * RECENTLY_DEAD tuple.
2502 /* In any case, exclude the tuple from unique-checking */
2503 tupleIsAlive = false;
2506 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2507 indexIt = tupleIsAlive = false; /* keep compiler quiet */
2511 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2518 /* heap_getnext did the time qual check */
2519 tupleIsAlive = true;
2524 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2526 /* Set up for predicate or expression evaluation */
2527 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2530 * In a partial index, discard tuples that don't satisfy the
2533 if (predicate != NIL)
2535 if (!ExecQual(predicate, econtext, false))
2540 * For the current heap tuple, extract all the attributes we use in
2541 * this index, and note which are null. This also performs evaluation
2542 * of any expressions needed.
2544 FormIndexDatum(indexInfo,
2551 * You'd think we should go ahead and build the index tuple here, but
2552 * some index AMs want to do further processing on the data first. So
2553 * pass the values[] and isnull[] arrays, instead.
2556 if (HeapTupleIsHeapOnly(heapTuple))
2559 * For a heap-only tuple, pretend its TID is that of the root. See
2560 * src/backend/access/heap/README.HOT for discussion.
2562 HeapTupleData rootTuple;
2563 OffsetNumber offnum;
2565 rootTuple = *heapTuple;
2566 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
2568 if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
2569 elog(ERROR, "failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
2570 ItemPointerGetBlockNumber(&heapTuple->t_self),
2571 offnum, RelationGetRelationName(heapRelation));
2573 ItemPointerSetOffsetNumber(&rootTuple.t_self,
2574 root_offsets[offnum - 1]);
2576 /* Call the AM's callback routine to process the tuple */
2577 callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
2582 /* Call the AM's callback routine to process the tuple */
2583 callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
2590 /* we can now forget our snapshot, if set */
2591 if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2592 UnregisterSnapshot(snapshot);
2594 ExecDropSingleTupleTableSlot(slot);
2596 FreeExecutorState(estate);
2598 /* These may have been pointing to the now-gone estate */
2599 indexInfo->ii_ExpressionsState = NIL;
2600 indexInfo->ii_PredicateState = NIL;
2607 * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
2609 * When creating an exclusion constraint, we first build the index normally
2610 * and then rescan the heap to check for conflicts. We assume that we only
2611 * need to validate tuples that are live according to an up-to-date snapshot,
2612 * and that these were correctly indexed even in the presence of broken HOT
2613 * chains. This should be OK since we are holding at least ShareLock on the
2614 * table, meaning there can be no uncommitted updates from other transactions.
2615 * (Note: that wouldn't necessarily work for system catalogs, since many
2616 * operations release write lock early on the system catalogs.)
2619 IndexCheckExclusion(Relation heapRelation,
2620 Relation indexRelation,
2621 IndexInfo *indexInfo)
2624 HeapTuple heapTuple;
2625 Datum values[INDEX_MAX_KEYS];
2626 bool isnull[INDEX_MAX_KEYS];
2628 TupleTableSlot *slot;
2630 ExprContext *econtext;
2634 * If we are reindexing the target index, mark it as no longer being
2635 * reindexed, to forestall an Assert in index_beginscan when we try to use
2636 * the index for probes. This is OK because the index is now fully valid.
2638 if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
2639 ResetReindexProcessing();
2642 * Need an EState for evaluation of index expressions and partial-index
2643 * predicates. Also a slot to hold the current tuple.
2645 estate = CreateExecutorState();
2646 econtext = GetPerTupleExprContext(estate);
2647 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2649 /* Arrange for econtext's scan tuple to be the tuple under test */
2650 econtext->ecxt_scantuple = slot;
2652 /* Set up execution state for predicate, if any. */
2653 predicate = (List *)
2654 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2658 * Scan all live tuples in the base relation.
2660 snapshot = RegisterSnapshot(GetLatestSnapshot());
2661 scan = heap_beginscan_strat(heapRelation, /* relation */
2662 snapshot, /* snapshot */
2663 0, /* number of keys */
2664 NULL, /* scan key */
2665 true, /* buffer access strategy OK */
2666 true); /* syncscan OK */
2668 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2670 CHECK_FOR_INTERRUPTS();
2672 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2674 /* Set up for predicate or expression evaluation */
2675 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2678 * In a partial index, ignore tuples that don't satisfy the predicate.
2680 if (predicate != NIL)
2682 if (!ExecQual(predicate, econtext, false))
2687 * Extract index column values, including computing expressions.
2689 FormIndexDatum(indexInfo,
2696 * Check that this tuple has no conflicts.
2698 check_exclusion_constraint(heapRelation,
2699 indexRelation, indexInfo,
2700 &(heapTuple->t_self), values, isnull,
2705 UnregisterSnapshot(snapshot);
2707 ExecDropSingleTupleTableSlot(slot);
2709 FreeExecutorState(estate);
2711 /* These may have been pointing to the now-gone estate */
2712 indexInfo->ii_ExpressionsState = NIL;
2713 indexInfo->ii_PredicateState = NIL;
2718 * validate_index - support code for concurrent index builds
2720 * We do a concurrent index build by first inserting the catalog entry for the
2721 * index via index_create(), marking it not indisready and not indisvalid.
2722 * Then we commit our transaction and start a new one, then we wait for all
2723 * transactions that could have been modifying the table to terminate. Now
2724 * we know that any subsequently-started transactions will see the index and
2725 * honor its constraints on HOT updates; so while existing HOT-chains might
2726 * be broken with respect to the index, no currently live tuple will have an
2727 * incompatible HOT update done to it. We now build the index normally via
2728 * index_build(), while holding a weak lock that allows concurrent
2729 * insert/update/delete. Also, we index only tuples that are valid
2730 * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
2731 * build takes care to include recently-dead tuples. This is OK because
2732 * we won't mark the index valid until all transactions that might be able
2733 * to see those tuples are gone. The reason for doing that is to avoid
2734 * bogus unique-index failures due to concurrent UPDATEs (we might see
2735 * different versions of the same row as being valid when we pass over them,
2736 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
2737 * does not contain any tuples added to the table while we built the index.
2739 * Next, we mark the index "indisready" (but still not "indisvalid") and
2740 * commit the second transaction and start a third. Again we wait for all
2741 * transactions that could have been modifying the table to terminate. Now
2742 * we know that any subsequently-started transactions will see the index and
2743 * insert their new tuples into it. We then take a new reference snapshot
2744 * which is passed to validate_index(). Any tuples that are valid according
2745 * to this snap, but are not in the index, must be added to the index.
2746 * (Any tuples committed live after the snap will be inserted into the
2747 * index by their originating transaction. Any tuples committed dead before
2748 * the snap need not be indexed, because we will wait out all transactions
2749 * that might care about them before we mark the index valid.)
2751 * validate_index() works by first gathering all the TIDs currently in the
2752 * index, using a bulkdelete callback that just stores the TIDs and doesn't
2753 * ever say "delete it". (This should be faster than a plain indexscan;
2754 * also, not all index AMs support full-index indexscan.) Then we sort the
2755 * TIDs, and finally scan the table doing a "merge join" against the TID list
2756 * to see which tuples are missing from the index. Thus we will ensure that
2757 * all tuples valid according to the reference snapshot are in the index.
2759 * Building a unique index this way is tricky: we might try to insert a
2760 * tuple that is already dead or is in process of being deleted, and we
2761 * mustn't have a uniqueness failure against an updated version of the same
2762 * row. We could try to check the tuple to see if it's already dead and tell
2763 * index_insert() not to do the uniqueness check, but that still leaves us
2764 * with a race condition against an in-progress update. To handle that,
2765 * we expect the index AM to recheck liveness of the to-be-inserted tuple
2766 * before it declares a uniqueness error.
2768 * After completing validate_index(), we wait until all transactions that
2769 * were alive at the time of the reference snapshot are gone; this is
2770 * necessary to be sure there are none left with a transaction snapshot
2771 * older than the reference (and hence possibly able to see tuples we did
2772 * not index). Then we mark the index "indisvalid" and commit. Subsequent
2773 * transactions will be able to use it for queries.
2775 * Doing two full table scans is a brute-force strategy. We could try to be
2776 * cleverer, eg storing new tuples in a special area of the table (perhaps
2777 * making the table append-only by setting use_fsm). However that would
2778 * add yet more locking issues.
2781 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
2783 Relation heapRelation,
2785 IndexInfo *indexInfo;
2786 IndexVacuumInfo ivinfo;
2789 int save_sec_context;
2792 /* Open and lock the parent heap relation */
2793 heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
2794 /* And the target index relation */
2795 indexRelation = index_open(indexId, RowExclusiveLock);
2798 * Fetch info needed for index_insert. (You might think this should be
2799 * passed in from DefineIndex, but its copy is long gone due to having
2800 * been built in a previous transaction.)
2802 indexInfo = BuildIndexInfo(indexRelation);
2804 /* mark build is concurrent just for consistency */
2805 indexInfo->ii_Concurrent = true;
2808 * Switch to the table owner's userid, so that any index functions are run
2809 * as that user. Also lock down security-restricted operations and
2810 * arrange to make GUC variable changes local to this command.
2812 GetUserIdAndSecContext(&save_userid, &save_sec_context);
2813 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2814 save_sec_context | SECURITY_RESTRICTED_OPERATION);
2815 save_nestlevel = NewGUCNestLevel();
2818 * Scan the index and gather up all the TIDs into a tuplesort object.
2820 ivinfo.index = indexRelation;
2821 ivinfo.analyze_only = false;
2822 ivinfo.estimated_count = true;
2823 ivinfo.message_level = DEBUG2;
2824 ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
2825 ivinfo.strategy = NULL;
2828 * Encode TIDs as int8 values for the sort, rather than directly sorting
2829 * item pointers. This can be significantly faster, primarily because TID
2830 * is a pass-by-reference type on all platforms, whereas int8 is
2831 * pass-by-value on most platforms.
2833 state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
2835 maintenance_work_mem,
2837 state.htups = state.itups = state.tups_inserted = 0;
2839 (void) index_bulk_delete(&ivinfo, NULL,
2840 validate_index_callback, (void *) &state);
2842 /* Execute the sort */
2843 tuplesort_performsort(state.tuplesort);
2846 * Now scan the heap and "merge" it with the index
2848 validate_index_heapscan(heapRelation,
2854 /* Done with tuplesort object */
2855 tuplesort_end(state.tuplesort);
2858 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
2859 state.htups, state.itups, state.tups_inserted);
2861 /* Roll back any GUC changes executed by index functions */
2862 AtEOXact_GUC(false, save_nestlevel);
2864 /* Restore userid and security context */
2865 SetUserIdAndSecContext(save_userid, save_sec_context);
2867 /* Close rels, but keep locks */
2868 index_close(indexRelation, NoLock);
2869 heap_close(heapRelation, NoLock);
2873 * itemptr_encode - Encode ItemPointer as int64/int8
2875 * This representation must produce values encoded as int64 that sort in the
2876 * same order as their corresponding original TID values would (using the
2877 * default int8 opclass to produce a result equivalent to the default TID
2880 * As noted in validate_index(), this can be significantly faster.
2883 itemptr_encode(ItemPointer itemptr)
2885 BlockNumber block = ItemPointerGetBlockNumber(itemptr);
2886 OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
2890 * Use the 16 least significant bits for the offset. 32 adjacent bits are
2891 * used for the block number. Since remaining bits are unused, there
2892 * cannot be negative encoded values (We assume a two's complement
2895 encoded = ((uint64) block << 16) | (uint16) offset;
2901 * itemptr_decode - Decode int64/int8 representation back to ItemPointer
2904 itemptr_decode(ItemPointer itemptr, int64 encoded)
2906 BlockNumber block = (BlockNumber) (encoded >> 16);
2907 OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
2909 ItemPointerSet(itemptr, block, offset);
2913 * validate_index_callback - bulkdelete callback to collect the index TIDs
2916 validate_index_callback(ItemPointer itemptr, void *opaque)
2918 v_i_state *state = (v_i_state *) opaque;
2919 int64 encoded = itemptr_encode(itemptr);
2921 tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
2923 return false; /* never actually delete anything */
2927 * validate_index_heapscan - second table scan for concurrent index build
2929 * This has much code in common with IndexBuildHeapScan, but it's enough
2930 * different that it seems cleaner to have two routines not one.
2933 validate_index_heapscan(Relation heapRelation,
2934 Relation indexRelation,
2935 IndexInfo *indexInfo,
2940 HeapTuple heapTuple;
2941 Datum values[INDEX_MAX_KEYS];
2942 bool isnull[INDEX_MAX_KEYS];
2944 TupleTableSlot *slot;
2946 ExprContext *econtext;
2947 BlockNumber root_blkno = InvalidBlockNumber;
2948 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2949 bool in_index[MaxHeapTuplesPerPage];
2951 /* state variables for the merge */
2952 ItemPointer indexcursor = NULL;
2953 ItemPointerData decoded;
2954 bool tuplesort_empty = false;
2959 Assert(OidIsValid(indexRelation->rd_rel->relam));
2962 * Need an EState for evaluation of index expressions and partial-index
2963 * predicates. Also a slot to hold the current tuple.
2965 estate = CreateExecutorState();
2966 econtext = GetPerTupleExprContext(estate);
2967 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2969 /* Arrange for econtext's scan tuple to be the tuple under test */
2970 econtext->ecxt_scantuple = slot;
2972 /* Set up execution state for predicate, if any. */
2973 predicate = (List *)
2974 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2978 * Prepare for scan of the base relation. We need just those tuples
2979 * satisfying the passed-in reference snapshot. We must disable syncscan
2980 * here, because it's critical that we read from block zero forward to
2981 * match the sorted TIDs.
2983 scan = heap_beginscan_strat(heapRelation, /* relation */
2984 snapshot, /* snapshot */
2985 0, /* number of keys */
2986 NULL, /* scan key */
2987 true, /* buffer access strategy OK */
2988 false); /* syncscan not OK */
2991 * Scan all tuples matching the snapshot.
2993 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2995 ItemPointer heapcursor = &heapTuple->t_self;
2996 ItemPointerData rootTuple;
2997 OffsetNumber root_offnum;
2999 CHECK_FOR_INTERRUPTS();
3004 * As commented in IndexBuildHeapScan, we should index heap-only
3005 * tuples under the TIDs of their root tuples; so when we advance onto
3006 * a new heap page, build a map of root item offsets on the page.
3008 * This complicates merging against the tuplesort output: we will
3009 * visit the live tuples in order by their offsets, but the root
3010 * offsets that we need to compare against the index contents might be
3011 * ordered differently. So we might have to "look back" within the
3012 * tuplesort output, but only within the current page. We handle that
3013 * by keeping a bool array in_index[] showing all the
3014 * already-passed-over tuplesort output TIDs of the current page. We
3015 * clear that array here, when advancing onto a new heap page.
3017 if (scan->rs_cblock != root_blkno)
3019 Page page = BufferGetPage(scan->rs_cbuf);
3021 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
3022 heap_get_root_tuples(page, root_offsets);
3023 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
3025 memset(in_index, 0, sizeof(in_index));
3027 root_blkno = scan->rs_cblock;
3030 /* Convert actual tuple TID to root TID */
3031 rootTuple = *heapcursor;
3032 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
3034 if (HeapTupleIsHeapOnly(heapTuple))
3036 root_offnum = root_offsets[root_offnum - 1];
3037 if (!OffsetNumberIsValid(root_offnum))
3038 elog(ERROR, "failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
3039 ItemPointerGetBlockNumber(heapcursor),
3040 ItemPointerGetOffsetNumber(heapcursor),
3041 RelationGetRelationName(heapRelation));
3042 ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
3046 * "merge" by skipping through the index tuples until we find or pass
3047 * the current root tuple.
3049 while (!tuplesort_empty &&
3051 ItemPointerCompare(indexcursor, &rootTuple) < 0))
3059 * Remember index items seen earlier on the current heap page
3061 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
3062 in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
3065 tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
3066 &ts_val, &ts_isnull, NULL);
3067 Assert(tuplesort_empty || !ts_isnull);
3068 if (!tuplesort_empty)
3070 itemptr_decode(&decoded, DatumGetInt64(ts_val));
3071 indexcursor = &decoded;
3073 /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
3074 #ifndef USE_FLOAT8_BYVAL
3075 pfree(DatumGetPointer(ts_val));
3086 * If the tuplesort has overshot *and* we didn't see a match earlier,
3087 * then this tuple is missing from the index, so insert it.
3089 if ((tuplesort_empty ||
3090 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
3091 !in_index[root_offnum - 1])
3093 MemoryContextReset(econtext->ecxt_per_tuple_memory);
3095 /* Set up for predicate or expression evaluation */
3096 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
3099 * In a partial index, discard tuples that don't satisfy the
3102 if (predicate != NIL)
3104 if (!ExecQual(predicate, econtext, false))
3109 * For the current heap tuple, extract all the attributes we use
3110 * in this index, and note which are null. This also performs
3111 * evaluation of any expressions needed.
3113 FormIndexDatum(indexInfo,
3120 * You'd think we should go ahead and build the index tuple here,
3121 * but some index AMs want to do further processing on the data
3122 * first. So pass the values[] and isnull[] arrays, instead.
3126 * If the tuple is already committed dead, you might think we
3127 * could suppress uniqueness checking, but this is no longer true
3128 * in the presence of HOT, because the insert is actually a proxy
3129 * for a uniqueness check on the whole HOT-chain. That is, the
3130 * tuple we have here could be dead because it was already
3131 * HOT-updated, and if so the updating transaction will not have
3132 * thought it should insert index entries. The index AM will
3133 * check the whole HOT-chain and correctly detect a conflict if
3137 index_insert(indexRelation,
3142 indexInfo->ii_Unique ?
3143 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
3145 state->tups_inserted += 1;
3151 ExecDropSingleTupleTableSlot(slot);
3153 FreeExecutorState(estate);
3155 /* These may have been pointing to the now-gone estate */
3156 indexInfo->ii_ExpressionsState = NIL;
3157 indexInfo->ii_PredicateState = NIL;
3162 * index_set_state_flags - adjust pg_index state flags
3164 * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3165 * flags that denote the index's state. Because the update is not
3166 * transactional and will not roll back on error, this must only be used as
3167 * the last step in a transaction that has not made any transactional catalog
3170 * Note that heap_inplace_update does send a cache inval message for the
3171 * tuple, so other sessions will hear about the update as soon as we commit.
3173 * NB: In releases prior to PostgreSQL 9.4, the use of a non-transactional
3174 * update here would have been unsafe; now that MVCC rules apply even for
3175 * system catalog scans, we could potentially use a transactional update here
3179 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3182 HeapTuple indexTuple;
3183 Form_pg_index indexForm;
3185 /* Assert that current xact hasn't done any transactional updates */
3186 Assert(GetTopTransactionIdIfAny() == InvalidTransactionId);
3188 /* Open pg_index and fetch a writable copy of the index's tuple */
3189 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3191 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3192 ObjectIdGetDatum(indexId));
3193 if (!HeapTupleIsValid(indexTuple))
3194 elog(ERROR, "cache lookup failed for index %u", indexId);
3195 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3197 /* Perform the requested state change on the copy */
3200 case INDEX_CREATE_SET_READY:
3201 /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3202 Assert(indexForm->indislive);
3203 Assert(!indexForm->indisready);
3204 Assert(!indexForm->indisvalid);
3205 indexForm->indisready = true;
3207 case INDEX_CREATE_SET_VALID:
3208 /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3209 Assert(indexForm->indislive);
3210 Assert(indexForm->indisready);
3211 Assert(!indexForm->indisvalid);
3212 indexForm->indisvalid = true;
3214 case INDEX_DROP_CLEAR_VALID:
3217 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3219 * If indisready == true we leave it set so the index still gets
3220 * maintained by active transactions. We only need to ensure that
3221 * indisvalid is false. (We don't assert that either is initially
3222 * true, though, since we want to be able to retry a DROP INDEX
3223 * CONCURRENTLY that failed partway through.)
3225 * Note: the CLUSTER logic assumes that indisclustered cannot be
3226 * set on any invalid index, so clear that flag too.
3228 indexForm->indisvalid = false;
3229 indexForm->indisclustered = false;
3231 case INDEX_DROP_SET_DEAD:
3234 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3236 * We clear both indisready and indislive, because we not only
3237 * want to stop updates, we want to prevent sessions from touching
3240 Assert(!indexForm->indisvalid);
3241 indexForm->indisready = false;
3242 indexForm->indislive = false;
3246 /* ... and write it back in-place */
3247 heap_inplace_update(pg_index, indexTuple);
3249 heap_close(pg_index, RowExclusiveLock);
3254 * IndexGetRelation: given an index's relation OID, get the OID of the
3255 * relation it is an index on. Uses the system cache.
3258 IndexGetRelation(Oid indexId, bool missing_ok)
3261 Form_pg_index index;
3264 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3265 if (!HeapTupleIsValid(tuple))
3269 elog(ERROR, "cache lookup failed for index %u", indexId);
3271 index = (Form_pg_index) GETSTRUCT(tuple);
3272 Assert(index->indexrelid == indexId);
3274 result = index->indrelid;
3275 ReleaseSysCache(tuple);
3280 * reindex_index - This routine is used to recreate a single index
3283 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3289 IndexInfo *indexInfo;
3290 volatile bool skipped_constraint = false;
3293 pg_rusage_init(&ru0);
3296 * Open and lock the parent heap relation. ShareLock is sufficient since
3297 * we only need to be sure no schema or data changes are going on.
3299 heapId = IndexGetRelation(indexId, false);
3300 heapRelation = heap_open(heapId, ShareLock);
3303 * Open the target index relation and get an exclusive lock on it, to
3304 * ensure that no one else is touching this particular index.
3306 iRel = index_open(indexId, AccessExclusiveLock);
3309 * Don't allow reindex on temp tables of other backends ... their local
3310 * buffer manager is not going to cope.
3312 if (RELATION_IS_OTHER_TEMP(iRel))
3314 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3315 errmsg("cannot reindex temporary tables of other sessions")));
3318 * Also check for active uses of the index in the current transaction; we
3319 * don't want to reindex underneath an open indexscan.
3321 CheckTableNotInUse(iRel, "REINDEX INDEX");
3324 * All predicate locks on the index are about to be made invalid. Promote
3325 * them to relation locks on the heap.
3327 TransferPredicateLocksToHeapRelation(iRel);
3331 /* Suppress use of the target index while rebuilding it */
3332 SetReindexProcessing(heapId, indexId);
3334 /* Fetch info needed for index_build */
3335 indexInfo = BuildIndexInfo(iRel);
3337 /* If requested, skip checking uniqueness/exclusion constraints */
3338 if (skip_constraint_checks)
3340 if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3341 skipped_constraint = true;
3342 indexInfo->ii_Unique = false;
3343 indexInfo->ii_ExclusionOps = NULL;
3344 indexInfo->ii_ExclusionProcs = NULL;
3345 indexInfo->ii_ExclusionStrats = NULL;
3348 /* We'll build a new physical relation for the index */
3349 RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId,
3350 InvalidMultiXactId);
3352 /* Initialize the index and rebuild */
3353 /* Note: we do not need to re-establish pkey setting */
3354 index_build(heapRelation, iRel, indexInfo, false, true);
3358 /* Make sure flag gets cleared on error exit */
3359 ResetReindexProcessing();
3363 ResetReindexProcessing();
3366 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3367 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3368 * and we didn't skip a uniqueness check, we can now mark it valid. This
3369 * allows REINDEX to be used to clean up in such cases.
3371 * We can also reset indcheckxmin, because we have now done a
3372 * non-concurrent index build, *except* in the case where index_build
3373 * found some still-broken HOT chains. If it did, and we don't have to
3374 * change any of the other flags, we just leave indcheckxmin alone (note
3375 * that index_build won't have changed it, because this is a reindex).
3376 * This is okay and desirable because not updating the tuple leaves the
3377 * index's usability horizon (recorded as the tuple's xmin value) the same
3380 * But, if the index was invalid/not-ready/dead and there were broken HOT
3381 * chains, we had better force indcheckxmin true, because the normal
3382 * argument that the HOT chains couldn't conflict with the index is
3383 * suspect for an invalid index. (A conflict is definitely possible if
3384 * the index was dead. It probably shouldn't happen otherwise, but let's
3385 * be conservative.) In this case advancing the usability horizon is
3388 * Another reason for avoiding unnecessary updates here is that while
3389 * reindexing pg_index itself, we must not try to update tuples in it.
3390 * pg_index's indexes should always have these flags in their clean state,
3391 * so that won't happen.
3393 if (!skipped_constraint)
3396 HeapTuple indexTuple;
3397 Form_pg_index indexForm;
3400 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3402 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3403 ObjectIdGetDatum(indexId));
3404 if (!HeapTupleIsValid(indexTuple))
3405 elog(ERROR, "cache lookup failed for index %u", indexId);
3406 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3408 index_bad = (!indexForm->indisvalid ||
3409 !indexForm->indisready ||
3410 !indexForm->indislive);
3412 (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
3414 if (!indexInfo->ii_BrokenHotChain)
3415 indexForm->indcheckxmin = false;
3417 indexForm->indcheckxmin = true;
3418 indexForm->indisvalid = true;
3419 indexForm->indisready = true;
3420 indexForm->indislive = true;
3421 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
3422 CatalogUpdateIndexes(pg_index, indexTuple);
3425 * Invalidate the relcache for the table, so that after we commit
3426 * all sessions will refresh the table's index list. This ensures
3427 * that if anyone misses seeing the pg_index row during this
3428 * update, they'll refresh their list before attempting any update
3431 CacheInvalidateRelcache(heapRelation);
3434 heap_close(pg_index, RowExclusiveLock);
3437 /* Log what we did */
3438 if (options & REINDEXOPT_VERBOSE)
3440 (errmsg("index \"%s\" was reindexed",
3441 get_rel_name(indexId)),
3443 pg_rusage_show(&ru0))));
3445 /* Close rels, but keep locks */
3446 index_close(iRel, NoLock);
3447 heap_close(heapRelation, NoLock);
3451 * reindex_relation - This routine is used to recreate all indexes
3452 * of a relation (and optionally its toast relation too, if any).
3454 * "flags" is a bitmask that can include any combination of these bits:
3456 * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3458 * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3459 * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3460 * indexes are inconsistent with it. This makes things tricky if the relation
3461 * is a system catalog that we might consult during the reindexing. To deal
3462 * with that case, we mark all of the indexes as pending rebuild so that they
3463 * won't be trusted until rebuilt. The caller is required to call us *without*
3464 * having made the rebuilt table visible by doing CommandCounterIncrement;
3465 * we'll do CCI after having collected the index list. (This way we can still
3466 * use catalog indexes while collecting the list.)
3468 * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3469 * constraint conditions, else don't. To avoid deadlocks, VACUUM FULL or
3470 * CLUSTER on a system catalog must omit this flag. REINDEX should be used to
3471 * rebuild an index if constraint inconsistency is suspected. For optimal
3472 * performance, other callers should include the flag only after transforming
3473 * the data in a manner that risks a change in constraint validity.
3475 * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3476 * rebuilt indexes to unlogged.
3478 * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3479 * rebuilt indexes to permanent.
3481 * Returns true if any indexes were rebuilt (including toast table's index
3482 * when relevant). Note that a CommandCounterIncrement will occur after each
3486 reindex_relation(Oid relid, int flags, int options)
3495 * Open and lock the relation. ShareLock is sufficient since we only need
3496 * to prevent schema and data changes in it. The lock level used here
3497 * should match ReindexTable().
3499 rel = heap_open(relid, ShareLock);
3501 toast_relid = rel->rd_rel->reltoastrelid;
3504 * Get the list of index OIDs for this relation. (We trust to the
3505 * relcache to get this with a sequential scan if ignoring system
3508 indexIds = RelationGetIndexList(rel);
3511 * reindex_index will attempt to update the pg_class rows for the relation
3512 * and index. If we are processing pg_class itself, we want to make sure
3513 * that the updates do not try to insert index entries into indexes we
3514 * have not processed yet. (When we are trying to recover from corrupted
3515 * indexes, that could easily cause a crash.) We can accomplish this
3516 * because CatalogUpdateIndexes will use the relcache's index list to know
3517 * which indexes to update. We just force the index list to be only the
3518 * stuff we've processed.
3520 * It is okay to not insert entries into the indexes we have not processed
3521 * yet because all of this is transaction-safe. If we fail partway
3522 * through, the updated rows are dead and it doesn't matter whether they
3523 * have index entries. Also, a new pg_class index will be created with a
3524 * correct entry for its own pg_class row because we do
3525 * RelationSetNewRelfilenode() before we do index_build().
3527 * Note that we also clear pg_class's rd_oidindex until the loop is done,
3528 * so that that index can't be accessed either. This means we cannot
3529 * safely generate new relation OIDs while in the loop; shouldn't be a
3532 is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
3534 /* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
3536 (void) RelationGetIndexAttrBitmap(rel, INDEX_ATTR_BITMAP_ALL);
3544 if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3546 /* Suppress use of all the indexes until they are rebuilt */
3547 SetReindexPending(indexIds);
3550 * Make the new heap contents visible --- now things might be
3553 CommandCounterIncrement();
3557 * Compute persistence of indexes: same as that of owning rel, unless
3558 * caller specified otherwise.
3560 if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
3561 persistence = RELPERSISTENCE_UNLOGGED;
3562 else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
3563 persistence = RELPERSISTENCE_PERMANENT;
3565 persistence = rel->rd_rel->relpersistence;
3567 /* Reindex all the indexes. */
3569 foreach(indexId, indexIds)
3571 Oid indexOid = lfirst_oid(indexId);
3574 RelationSetIndexList(rel, doneIndexes, InvalidOid);
3576 reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
3577 persistence, options);
3579 CommandCounterIncrement();
3581 /* Index should no longer be in the pending list */
3582 Assert(!ReindexIsProcessingIndex(indexOid));
3585 doneIndexes = lappend_oid(doneIndexes, indexOid);
3590 /* Make sure list gets cleared on error exit */
3591 ResetReindexPending();
3595 ResetReindexPending();
3598 RelationSetIndexList(rel, indexIds, ClassOidIndexId);
3601 * Close rel, but continue to hold the lock.
3603 heap_close(rel, NoLock);
3605 result = (indexIds != NIL);
3608 * If the relation has a secondary toast rel, reindex that too while we
3609 * still hold the lock on the master table.
3611 if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
3612 result |= reindex_relation(toast_relid, flags, options);
3618 /* ----------------------------------------------------------------
3619 * System index reindexing support
3621 * When we are busy reindexing a system index, this code provides support
3622 * for preventing catalog lookups from using that index. We also make use
3623 * of this to catch attempted uses of user indexes during reindexing of
3625 * ----------------------------------------------------------------
3628 static Oid currentlyReindexedHeap = InvalidOid;
3629 static Oid currentlyReindexedIndex = InvalidOid;
3630 static List *pendingReindexedIndexes = NIL;
3633 * ReindexIsProcessingHeap
3634 * True if heap specified by OID is currently being reindexed.
3637 ReindexIsProcessingHeap(Oid heapOid)
3639 return heapOid == currentlyReindexedHeap;
3643 * ReindexIsCurrentlyProcessingIndex
3644 * True if index specified by OID is currently being reindexed.
3647 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
3649 return indexOid == currentlyReindexedIndex;
3653 * ReindexIsProcessingIndex
3654 * True if index specified by OID is currently being reindexed,
3655 * or should be treated as invalid because it is awaiting reindex.
3658 ReindexIsProcessingIndex(Oid indexOid)
3660 return indexOid == currentlyReindexedIndex ||
3661 list_member_oid(pendingReindexedIndexes, indexOid);
3665 * SetReindexProcessing
3666 * Set flag that specified heap/index are being reindexed.
3668 * NB: caller must use a PG_TRY block to ensure ResetReindexProcessing is done.
3671 SetReindexProcessing(Oid heapOid, Oid indexOid)
3673 Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
3674 /* Reindexing is not re-entrant. */
3675 if (OidIsValid(currentlyReindexedHeap))
3676 elog(ERROR, "cannot reindex while reindexing");
3677 currentlyReindexedHeap = heapOid;
3678 currentlyReindexedIndex = indexOid;
3679 /* Index is no longer "pending" reindex. */
3680 RemoveReindexPending(indexOid);
3684 * ResetReindexProcessing
3685 * Unset reindexing status.
3688 ResetReindexProcessing(void)
3690 currentlyReindexedHeap = InvalidOid;
3691 currentlyReindexedIndex = InvalidOid;
3696 * Mark the given indexes as pending reindex.
3698 * NB: caller must use a PG_TRY block to ensure ResetReindexPending is done.
3699 * Also, we assume that the current memory context stays valid throughout.
3702 SetReindexPending(List *indexes)
3704 /* Reindexing is not re-entrant. */
3705 if (pendingReindexedIndexes)
3706 elog(ERROR, "cannot reindex while reindexing");
3707 pendingReindexedIndexes = list_copy(indexes);
3711 * RemoveReindexPending
3712 * Remove the given index from the pending list.
3715 RemoveReindexPending(Oid indexOid)
3717 pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
3722 * ResetReindexPending
3723 * Unset reindex-pending status.
3726 ResetReindexPending(void)
3728 pendingReindexedIndexes = NIL;