rows in a broken HOT chain (the first condition is stronger than the
second). Finally, we can mark the index valid for searches.
+Note that we do not need to set pg_index.indcheckxmin in this code path,
+because we have outwaited any transactions that would need to avoid using
+the index. (indcheckxmin is only needed because non-concurrent CREATE
+INDEX doesn't want to wait; its stronger lock would create too much risk of
+deadlock if it did.)
+
Limitations and Restrictions
----------------------------
/* other info */
ii->ii_Unique = indexStruct->indisunique;
- ii->ii_ReadyForInserts = indexStruct->indisready;
+ ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
/* initialize index-build state to default */
ii->ii_Concurrent = false;
* index's usability horizon. Moreover, we *must not* try to change
* the index's pg_index entry while reindexing pg_index itself, and this
* optimization nicely prevents that.
- */
- if (indexInfo->ii_BrokenHotChain && !isreindex)
+ *
+ * We also need not set indcheckxmin during a concurrent index build,
+ * because we won't set indisvalid true until all transactions that care
+ * about the broken HOT chains are gone.
+ *
+ * Therefore, this code path can only be taken during non-concurrent
+ * CREATE INDEX. Thus the fact that heap_update will set the pg_index
+ * tuple's xmin doesn't matter, because that tuple was created in the
+ * current transaction anyway. That also means we don't need to worry
+ * about any concurrent readers of the tuple; no other transaction can see
+ * it yet.
+ */
+ if (indexInfo->ii_BrokenHotChain && !isreindex &&
+ !indexInfo->ii_Concurrent)
{
Oid indexId = RelationGetRelid(indexRelation);
Relation pg_index;
}
+/*
+ * index_set_state_flags - adjust pg_index state flags
+ *
+ * This is used during CREATE INDEX CONCURRENTLY to adjust the pg_index
+ * flags that denote the index's state. We must use an in-place update of
+ * the pg_index tuple, because we do not have exclusive lock on the parent
+ * table and so other sessions might concurrently be doing SnapshotNow scans
+ * of pg_index to identify the table's indexes. A transactional update would
+ * risk somebody not seeing the index at all. Because the update is not
+ * transactional and will not roll back on error, this must only be used as
+ * the last step in a transaction that has not made any transactional catalog
+ * updates!
+ *
+ * Note that heap_inplace_update does send a cache inval message for the
+ * tuple, so other sessions will hear about the update as soon as we commit.
+ */
+void
+index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
+{
+ Relation pg_index;
+ HeapTuple indexTuple;
+ Form_pg_index indexForm;
+
+ /* Assert that current xact hasn't done any transactional updates */
+ Assert(GetTopTransactionIdIfAny() == InvalidTransactionId);
+
+ /* Open pg_index and fetch a writable copy of the index's tuple */
+ pg_index = heap_open(IndexRelationId, RowExclusiveLock);
+
+ indexTuple = SearchSysCacheCopy(INDEXRELID,
+ ObjectIdGetDatum(indexId),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexId);
+ indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ /* Perform the requested state change on the copy */
+ switch (action)
+ {
+ case INDEX_CREATE_SET_READY:
+ /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
+ Assert(!indexForm->indisready);
+ Assert(!indexForm->indisvalid);
+ indexForm->indisready = true;
+ break;
+ case INDEX_CREATE_SET_VALID:
+ /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
+ Assert(indexForm->indisready);
+ Assert(!indexForm->indisvalid);
+ indexForm->indisvalid = true;
+ break;
+ }
+
+ /* ... and write it back in-place */
+ heap_inplace_update(pg_index, indexTuple);
+
+ heap_close(pg_index, RowExclusiveLock);
+}
+
+
/*
* IndexGetRelation: given an index's relation OID, get the OID of the
* relation it is an index on. Uses the system cache.
IndexInfo *indexInfo;
HeapTuple indexTuple;
Form_pg_index indexForm;
+ bool index_bad;
/*
* Open and lock the parent heap relation. ShareLock is sufficient since
elog(ERROR, "cache lookup failed for index %u", indexId);
indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
- if (!indexForm->indisvalid || !indexForm->indisready ||
+ index_bad = (!indexForm->indisvalid ||
+ !indexForm->indisready);
+ if (index_bad ||
(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
{
if (!indexInfo->ii_BrokenHotChain)
indexForm->indcheckxmin = false;
- else if (!indexForm->indisvalid || !indexForm->indisready)
+ else if (index_bad)
indexForm->indcheckxmin = true;
indexForm->indisvalid = true;
indexForm->indisready = true;
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
CatalogUpdateIndexes(pg_index, indexTuple);
+
+ /*
+ * Invalidate the relcache for the table, so that after we commit
+ * all sessions will refresh the table's index list. This ensures
+ * that if anyone misses seeing the pg_index row during this
+ * update, they'll refresh their list before attempting any update
+ * on the table.
+ */
+ CacheInvalidateRelcache(heapRelation);
}
heap_close(pg_index, RowExclusiveLock);
* might put recently-dead tuples out-of-order in the new table, and there
* is little harm in that.)
*/
- if (!OldIndex->rd_index->indisvalid)
+ if (!IndexIsValid(OldIndex->rd_index))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot cluster on invalid index \"%s\"",
* mark_index_clustered: mark the specified index as the one clustered on
*
* With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
+ *
+ * Note: we do transactional updates of the pg_index rows, which are unsafe
+ * against concurrent SnapshotNow scans of pg_index. Therefore this is unsafe
+ * to execute with less than full exclusive lock on the parent table;
+ * otherwise concurrent executions of RelationGetIndexList could miss indexes.
*/
void
mark_index_clustered(Relation rel, Oid indexOid)
}
else if (thisIndexOid == indexOid)
{
+ /* this was checked earlier, but let's be real sure */
+ if (!IndexIsValid(indexForm))
+ elog(ERROR, "cannot cluster on invalid index %u", indexOid);
indexForm->indisclustered = true;
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
CatalogUpdateIndexes(pg_index, indexTuple);
LockRelId heaprelid;
LOCKTAG heaplocktag;
Snapshot snapshot;
- Relation pg_index;
- HeapTuple indexTuple;
- Form_pg_index indexForm;
int i;
/*
* commit this transaction, any new transactions that open the table must
* insert new entries into the index for insertions and non-HOT updates.
*/
- pg_index = heap_open(IndexRelationId, RowExclusiveLock);
-
- indexTuple = SearchSysCacheCopy(INDEXRELID,
- ObjectIdGetDatum(indexRelationId),
- 0, 0, 0);
- if (!HeapTupleIsValid(indexTuple))
- elog(ERROR, "cache lookup failed for index %u", indexRelationId);
- indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
-
- Assert(!indexForm->indisready);
- Assert(!indexForm->indisvalid);
-
- indexForm->indisready = true;
-
- simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
- CatalogUpdateIndexes(pg_index, indexTuple);
-
- heap_close(pg_index, RowExclusiveLock);
+ index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
/* we can do away with our snapshot */
PopActiveSnapshot();
/*
* Index can now be marked valid -- update its pg_index entry
*/
- pg_index = heap_open(IndexRelationId, RowExclusiveLock);
-
- indexTuple = SearchSysCacheCopy(INDEXRELID,
- ObjectIdGetDatum(indexRelationId),
- 0, 0, 0);
- if (!HeapTupleIsValid(indexTuple))
- elog(ERROR, "cache lookup failed for index %u", indexRelationId);
- indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
-
- Assert(indexForm->indisready);
- Assert(!indexForm->indisvalid);
-
- indexForm->indisvalid = true;
-
- simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
- CatalogUpdateIndexes(pg_index, indexTuple);
-
- heap_close(pg_index, RowExclusiveLock);
+ index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
/*
* The pg_index update will cause backends (including this one) to update
* relcache inval on the parent table to force replanning of cached plans.
* Otherwise existing sessions might fail to use the new index where it
* would be useful. (Note that our earlier commits did not create reasons
- * to replan; relcache flush on the index itself was sufficient.)
+ * to replan; so relcache flush on the index itself was sufficient.)
*/
CacheInvalidateRelcacheByRelid(heaprelid.relId);
/*
* Check that the attribute is not in a primary key
+ *
+ * Note: we'll throw error even if the pkey index is not valid.
*/
/* Loop over all indexes on the relation */
/*
* Get the list of index OIDs for the table from the relcache, and look up
* each one in the pg_index syscache until we find one marked primary key
- * (hopefully there isn't more than one such).
+ * (hopefully there isn't more than one such). Insist it's valid, too.
*/
*indexOid = InvalidOid;
if (!HeapTupleIsValid(indexTuple))
elog(ERROR, "cache lookup failed for index %u", indexoid);
indexStruct = (Form_pg_index) GETSTRUCT(indexTuple);
- if (indexStruct->indisprimary)
+ if (indexStruct->indisprimary && IndexIsValid(indexStruct))
{
*indexOid = indexoid;
break;
/*
* Must have the right number of columns; must be unique and not a
- * partial index; forget it if there are any expressions, too
+ * partial index; forget it if there are any expressions, too. Invalid
+ * indexes are out as well.
*/
if (indexStruct->indnatts == numattrs &&
indexStruct->indisunique &&
+ IndexIsValid(indexStruct) &&
heap_attisnull(indexTuple, Anum_pg_index_indpred) &&
heap_attisnull(indexTuple, Anum_pg_index_indexprs))
{
/*
- * Open all the indexes of the given relation, obtaining the specified kind
- * of lock on each. Return an array of Relation pointers for the indexes
- * into *Irel, and the number of indexes into *nindexes.
+ * Open all the vacuumable indexes of the given relation, obtaining the
+ * specified kind of lock on each. Return an array of Relation pointers for
+ * the indexes into *Irel, and the number of indexes into *nindexes.
+ *
+ * We consider an index vacuumable if it is marked insertable (IndexIsReady).
+ * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
+ * execution, and what we have is too corrupt to be processable. We will
+ * vacuum even if the index isn't indisvalid; this is important because in a
+ * unique index, uniqueness checks will be performed anyway and had better not
+ * hit dangling index pointers.
*/
void
vac_open_indexes(Relation relation, LOCKMODE lockmode,
indexoidlist = RelationGetIndexList(relation);
- *nindexes = list_length(indexoidlist);
+ /* allocate enough memory for all indexes */
+ i = list_length(indexoidlist);
- if (*nindexes > 0)
- *Irel = (Relation *) palloc(*nindexes * sizeof(Relation));
+ if (i > 0)
+ *Irel = (Relation *) palloc(i * sizeof(Relation));
else
*Irel = NULL;
+ /* collect just the ready indexes */
i = 0;
foreach(indexoidscan, indexoidlist)
{
Oid indexoid = lfirst_oid(indexoidscan);
+ Relation indrel;
- (*Irel)[i++] = index_open(indexoid, lockmode);
+ indrel = index_open(indexoid, lockmode);
+ if (IndexIsReady(indrel->rd_index))
+ (*Irel)[i++] = indrel;
+ else
+ index_close(indrel, lockmode);
}
+ *nindexes = i;
+
list_free(indexoidlist);
}
/*
* For each index, open the index relation and save pg_index info. We
* acquire RowExclusiveLock, signifying we will update the index.
+ *
+ * Note: we do this even if the index is not IndexIsReady; it's not worth
+ * the trouble to optimize for the case where it isn't.
*/
i = 0;
foreach(l, indexoidlist)
* Ignore invalid indexes, since they can't safely be used for
* queries. Note that this is OK because the data structure we
* are constructing is only used by the planner --- the executor
- * still needs to insert into "invalid" indexes!
+ * still needs to insert into "invalid" indexes, if they're marked
+ * IndexIsReady.
*/
- if (!index->indisvalid)
+ if (!IndexIsValid(index))
{
index_close(indexRelation, NoLock);
continue;
RelationGetRelid(relation));
index = (Form_pg_index) GETSTRUCT(tuple);
+ /*
+ * Basically, let's just copy all the bool fields. There are one or
+ * two of these that can't actually change in the current code, but
+ * it's not worth it to track exactly which ones they are. None of
+ * the array fields are allowed to change, though.
+ */
+ relation->rd_index->indisunique = index->indisunique;
+ relation->rd_index->indisprimary = index->indisprimary;
+ relation->rd_index->indisclustered = index->indisclustered;
relation->rd_index->indisvalid = index->indisvalid;
relation->rd_index->indcheckxmin = index->indcheckxmin;
relation->rd_index->indisready = index->indisready;
+
+ /* Copy xmin too, as that is needed to make sense of indcheckxmin */
HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data,
HeapTupleHeaderGetXmin(tuple->t_data));
result = insert_ordered_oid(result, index->indexrelid);
/* Check to see if it is a unique, non-partial btree index on OID */
- if (index->indnatts == 1 &&
+ if (IndexIsValid(index) &&
+ index->indnatts == 1 &&
index->indisunique &&
index->indkey.values[0] == ObjectIdAttributeNumber &&
index->indclass.values[0] == OID_BTREE_OPS_OID &&
/*
* For each index, add referenced attributes to indexattrs.
+ *
+ * Note: we consider all indexes returned by RelationGetIndexList, even if
+ * they are not indisready or indisvalid. This is important because an
+ * index for which CREATE INDEX CONCURRENTLY has just started must be
+ * included in HOT-safety decisions (see README.HOT).
*/
indexattrs = NULL;
foreach(l, indexoidlist)
bool tupleIsAlive,
void *state);
+/* Action code for index_set_state_flags */
+typedef enum
+{
+ INDEX_CREATE_SET_READY,
+ INDEX_CREATE_SET_VALID
+} IndexStateFlagsAction;
+
extern Oid index_create(Oid heapRelationId,
const char *indexRelationName,
extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
+extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action);
+
extern void reindex_index(Oid indexId);
extern bool reindex_relation(Oid relid, bool toast_too);
#define INDOPTION_DESC 0x0001 /* values are in reverse order */
#define INDOPTION_NULLS_FIRST 0x0002 /* NULLs are first instead of last */
+/*
+ * Use of these macros is recommended over direct examination of the state
+ * flag columns where possible; this allows source code compatibility with
+ * 9.2 and up.
+ */
+#define IndexIsValid(indexForm) ((indexForm)->indisvalid)
+#define IndexIsReady(indexForm) ((indexForm)->indisready)
+
#endif /* PG_INDEX_H */