* See whether an existing relation has a primary key.
*
* Caller must have suitable lock on the relation.
+ *
+ * Note: we intentionally do not check IndexIsValid here; that's because this
+ * is used to enforce the rule that there can be only one indisprimary index,
+ * and we want that to be true even if said index is invalid.
*/
static bool
relationHasPrimaryKey(Relation rel)
* Note: since this is a transactional update, it's unsafe against
* concurrent SnapshotNow scans of pg_index. When making an existing
* index into a constraint, caller must have a table lock that prevents
- * concurrent table updates, and there is a risk that concurrent readers
- * of the table will miss seeing this index at all.
+ * concurrent table updates; if it's less than a full exclusive lock,
+ * there is a risk that concurrent readers of the table will miss seeing
+ * this index at all.
*/
if (update_pgindex && (mark_as_primary || deferrable))
{
LockRelId heaprelid,
indexrelid;
LOCKTAG heaplocktag;
+ LOCKMODE lockmode;
VirtualTransactionId *old_lockholders;
- Form_pg_index indexForm;
/*
* To drop an index safely, we must grab exclusive lock on its parent
* proceeding until we commit and send out a shared-cache-inval notice
* that will make them update their index lists.
*
- * In the concurrent case we make sure that nobody can be looking at the
- * indexes by dropping the index in multiple steps, so we don't need a full
- * AccessExclusiveLock yet.
- *
- * All predicate locks on the index are about to be made invalid. Promote
- * them to relation locks on the heap. For correctness the index must not
- * be seen with indisvalid = true during query planning after the move
- * starts, so that the index will not be used for a scan after the
- * predicate lock move, as this could create new predicate locks on the
- * index which would not ensure a heap relation lock. Also, the index must
- * not be seen during execution of a heap tuple insert with indisready =
- * false before the move is complete, since the conflict with the
- * predicate lock on the index gap could be missed before the lock on the
- * heap relation is in place to detect a conflict based on the heap tuple
- * insert.
+ * In the concurrent case we avoid this requirement by disabling index use
+ * in multiple steps and waiting out any transactions that might be using
+ * the index, so we don't need exclusive lock on the parent table. Instead
+ * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
+ * doing CREATE/DROP INDEX CONCURRENTLY on the same index. (We will get
+ * AccessExclusiveLock on the index below, once we're sure nobody else is
+ * using it.)
*/
heapId = IndexGetRelation(indexId, false);
- if (concurrent)
- {
- userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
- userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
- }
- else
- {
- userHeapRelation = heap_open(heapId, AccessExclusiveLock);
- userIndexRelation = index_open(indexId, AccessExclusiveLock);
- }
+ lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
+ userHeapRelation = heap_open(heapId, lockmode);
+ userIndexRelation = index_open(indexId, lockmode);
/*
- * We might still have open queries using it in our own session.
+ * We might still have open queries using it in our own session, which the
+ * above locking won't prevent, so test explicitly.
*/
CheckTableNotInUse(userIndexRelation, "DROP INDEX");
/*
- * Drop Index concurrently is similar in many ways to creating an index
- * concurrently, so some actions are similar to DefineIndex() just in the
- * reverse order.
+ * Drop Index Concurrently is more or less the reverse process of Create
+ * Index Concurrently.
*
* First we unset indisvalid so queries starting afterwards don't use the
- * index to answer queries anymore. We have to keep indisready = true
- * so transactions that are still scanning the index can continue to
- * see valid index contents. E.g. when they are using READ COMMITTED mode,
- * and another transactions that started later commits makes changes and
- * commits, they need to see those new tuples in the index.
+ * index to answer queries anymore. We have to keep indisready = true so
+ * transactions that are still scanning the index can continue to see
+ * valid index contents. For instance, if they are using READ COMMITTED
+ * mode, and another transaction makes changes and commits, they need to
+ * see those new tuples in the index.
*
- * After all transactions that could possibly have used it for queries
- * ended we can unset indisready and wait till nobody could be updating it
- * anymore.
+ * After all transactions that could possibly have used the index for
+ * queries end, we can unset indisready and set indisvalid, then wait till
+ * nobody could be touching it anymore. (Note: we use this illogical
+ * combination because this state must be distinct from the initial state
+ * during CREATE INDEX CONCURRENTLY, which has indisready and indisvalid
+ * both false. That's because in that state, transactions must examine
+ * the index for HOT-safety decisions, while in this state we don't want
+ * them to open it at all.)
+ *
+ * Since all predicate locks on the index are about to be made invalid, we
+ * must promote them to predicate locks on the heap. In the
+ * non-concurrent case we can just do that now. In the concurrent case
+ * it's a bit trickier. The predicate locks must be moved when there are
+ * no index scans in progress on the index and no more can subsequently
+ * start, so that no new predicate locks can be made on the index. Also,
+ * they must be moved before heap inserts stop maintaining the index, else
+ * the conflict with the predicate lock on the index gap could be missed
+ * before the lock on the heap relation is in place to detect a conflict
+ * based on the heap tuple insert.
*/
if (concurrent)
{
/*
- * Mark index invalid by updating its pg_index entry
- *
- * Don't Assert(indexForm->indisvalid) because we may be trying to
- * clear up after an error when trying to create an index which left
- * the index invalid
+ * We must commit our transaction in order to make the first pg_index
+ * state update visible to other sessions. If the DROP machinery has
+ * already performed any other actions (removal of other objects,
+ * pg_depend entries, etc), the commit would make those actions
+ * permanent, which would leave us with inconsistent catalog state if
+ * we fail partway through the following sequence. Since DROP INDEX
+ * CONCURRENTLY is restricted to dropping just one index that has no
+ * dependencies, we should get here before anything's been done ---
+ * but let's check that to be sure. We can verify that the current
+ * transaction has not executed any transactional updates by checking
+ * that no XID has been assigned.
*/
- indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
-
- tuple = SearchSysCacheCopy1(INDEXRELID,
- ObjectIdGetDatum(indexId));
- if (!HeapTupleIsValid(tuple))
- elog(ERROR, "cache lookup failed for index %u", indexId);
- indexForm = (Form_pg_index) GETSTRUCT(tuple);
+ if (GetTopTransactionIdIfAny() != InvalidTransactionId)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
/*
- * If indisready == true we leave it set so the index still gets
- * maintained by pre-existing transactions. We only need to ensure
- * that indisvalid is false.
+ * Mark index invalid by updating its pg_index entry
*/
- if (indexForm->indisvalid)
- {
- indexForm->indisvalid = false; /* make unusable for new queries */
-
- simple_heap_update(indexRelation, &tuple->t_self, tuple);
- CatalogUpdateIndexes(indexRelation, tuple);
- }
+ index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
- heap_close(indexRelation, RowExclusiveLock);
+ /*
+ * Invalidate the relcache for the table, so that after this commit
+ * all sessions will refresh any cached plans that might reference the
+ * index.
+ */
+ CacheInvalidateRelcache(userHeapRelation);
/* save lockrelid and locktag for below, then close but keep locks */
heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
- heap_close(userHeapRelation, NoLock);
-
indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
+
+ heap_close(userHeapRelation, NoLock);
index_close(userIndexRelation, NoLock);
/*
- * For a concurrent drop, it's important to make the catalog entries
- * visible to other transactions before we drop the index. The index
- * will be marked not indisvalid, so that no one else tries to use it
- * for queries.
- *
- * We must commit our current transaction so that the index update
- * becomes visible; then start another. Note that all the data
- * structures we just built are lost in the commit. The only data we
- * keep past here are the relation IDs.
+ * We must commit our current transaction so that the indisvalid
+ * update becomes visible to other transactions; then start another.
+ * Note that any previously-built data structures are lost in the
+ * commit. The only data we keep past here are the relation IDs.
*
* Before committing, get a session-level lock on the table, to ensure
* that neither it nor the index can be dropped before we finish. This
StartTransactionCommand();
/*
- * Now we must wait until no running transaction could have the table
- * open with the old list of indexes. To do this, inquire which xacts
- * currently would conflict with AccessExclusiveLock on the table --
- * ie, which ones have a lock of any kind on the table. Then wait for
- * each of these xacts to commit or abort. Note we do not need to
- * worry about xacts that open the table for writing after this point;
- * they will see the index as invalid when they open the relation.
+ * Now we must wait until no running transaction could be using the
+ * index for a query. To do this, inquire which xacts currently would
+ * conflict with AccessExclusiveLock on the table -- ie, which ones
+ * have a lock of any kind on the table. Then wait for each of these
+ * xacts to commit or abort. Note we do not need to worry about xacts
+ * that open the table for reading after this point; they will see the
+ * index as invalid when they open the relation.
*
* Note: the reason we use actual lock acquisition here, rather than
* just checking the ProcArray and sleeping, is that deadlock is
TransferPredicateLocksToHeapRelation(userIndexRelation);
/*
- * Now we are sure that nobody uses the index for queries, they just
- * might have it opened for updating it. So now we can unset
- * indisready and wait till nobody could update the index anymore.
+ * Now we are sure that nobody uses the index for queries; they just
+ * might have it open for updating it. So now we can unset indisready
+ * and set indisvalid, then wait till nobody could be using it at all
+ * anymore.
*/
- indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
-
- tuple = SearchSysCacheCopy1(INDEXRELID,
- ObjectIdGetDatum(indexId));
- if (!HeapTupleIsValid(tuple))
- elog(ERROR, "cache lookup failed for index %u", indexId);
- indexForm = (Form_pg_index) GETSTRUCT(tuple);
-
- Assert(indexForm->indisvalid == false);
- if (indexForm->indisready)
- {
- indexForm->indisready = false; /* don't update index anymore */
+ index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
- simple_heap_update(indexRelation, &tuple->t_self, tuple);
- CatalogUpdateIndexes(indexRelation, tuple);
- }
-
- heap_close(indexRelation, RowExclusiveLock);
+ /*
+ * Invalidate the relcache for the table, so that after this commit
+ * all sessions will refresh the table's index list. Forgetting just
+ * the index's relcache entry is not enough.
+ */
+ CacheInvalidateRelcache(userHeapRelation);
/*
* Close the relations again, though still holding session lock.
index_close(userIndexRelation, NoLock);
/*
- * Invalidate the relcache for the table, so that after this
- * transaction we will refresh the index list. Forgetting just the
- * index is not enough.
- */
- CacheInvalidateRelcache(userHeapRelation);
-
- /*
- * Just as with indisvalid = false we need to make sure indisready
- * is false is visible for everyone.
+ * Again, commit the transaction to make the pg_index update visible
+ * to other sessions.
*/
CommitTransactionCommand();
StartTransactionCommand();
/*
- * Wait till everyone that saw indisready = true finished so we can
- * finally really remove the index. The logic here is the same as
- * above.
+ * Wait till every transaction that saw the old index state has
+ * finished. The logic here is the same as above.
*/
old_lockholders = GetLockConflicts(&heaplocktag, AccessExclusiveLock);
userIndexRelation = index_open(indexId, AccessExclusiveLock);
}
else
+ {
+ /* Not concurrent, so just transfer predicate locks and we're good */
TransferPredicateLocksToHeapRelation(userIndexRelation);
+ }
/*
* Schedule physical removal of the files
* of relhasindex (the next VACUUM will fix it if necessary). So there is
* no need to update the pg_class tuple for the owning relation. But we
* must send out a shared-cache-inval notice on the owning relation to
- * ensure other backends update their relcache lists of indexes.
+ * ensure other backends update their relcache lists of indexes. (In the
+ * concurrent case, this is redundant but harmless.)
*/
CacheInvalidateRelcache(userHeapRelation);
/* other info */
ii->ii_Unique = indexStruct->indisunique;
- ii->ii_ReadyForInserts = indexStruct->indisready;
+ ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
/* initialize index-build state to default */
ii->ii_Concurrent = false;
* index's usability horizon. Moreover, we *must not* try to change the
* index's pg_index entry while reindexing pg_index itself, and this
* optimization nicely prevents that.
- */
- if (indexInfo->ii_BrokenHotChain && !isreindex)
+ *
+ * We also need not set indcheckxmin during a concurrent index build,
+ * because we won't set indisvalid true until all transactions that care
+ * about the broken HOT chains are gone.
+ *
+ * Therefore, this code path can only be taken during non-concurrent
+ * CREATE INDEX. Thus the fact that heap_update will set the pg_index
+ * tuple's xmin doesn't matter, because that tuple was created in the
+ * current transaction anyway. That also means we don't need to worry
+ * about any concurrent readers of the tuple; no other transaction can see
+ * it yet.
+ */
+ if (indexInfo->ii_BrokenHotChain && !isreindex &&
+ !indexInfo->ii_Concurrent)
{
Oid indexId = RelationGetRelid(indexRelation);
Relation pg_index;
}
+/*
+ * index_set_state_flags - adjust pg_index state flags
+ *
+ * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
+ * flags that denote the index's state. We must use an in-place update of
+ * the pg_index tuple, because we do not have exclusive lock on the parent
+ * table and so other sessions might concurrently be doing SnapshotNow scans
+ * of pg_index to identify the table's indexes. A transactional update would
+ * risk somebody not seeing the index at all. Because the update is not
+ * transactional and will not roll back on error, this must only be used as
+ * the last step in a transaction that has not made any transactional catalog
+ * updates!
+ *
+ * Note that heap_inplace_update does send a cache inval message for the
+ * tuple, so other sessions will hear about the update as soon as we commit.
+ */
+void
+index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
+{
+ Relation pg_index;
+ HeapTuple indexTuple;
+ Form_pg_index indexForm;
+
+ /* Assert that current xact hasn't done any transactional updates */
+ Assert(GetTopTransactionIdIfAny() == InvalidTransactionId);
+
+ /* Open pg_index and fetch a writable copy of the index's tuple */
+ pg_index = heap_open(IndexRelationId, RowExclusiveLock);
+
+ indexTuple = SearchSysCacheCopy1(INDEXRELID,
+ ObjectIdGetDatum(indexId));
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexId);
+ indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ /* Perform the requested state change on the copy */
+ switch (action)
+ {
+ case INDEX_CREATE_SET_READY:
+ /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
+ Assert(!indexForm->indisready);
+ Assert(!indexForm->indisvalid);
+ indexForm->indisready = true;
+ break;
+ case INDEX_CREATE_SET_VALID:
+ /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
+ Assert(indexForm->indisready);
+ Assert(!indexForm->indisvalid);
+ indexForm->indisvalid = true;
+ break;
+ case INDEX_DROP_CLEAR_VALID:
+
+ /*
+ * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
+ *
+ * If indisready == true we leave it set so the index still gets
+ * maintained by active transactions. We only need to ensure that
+ * indisvalid is false. (We don't assert that either is initially
+ * true, though, since we want to be able to retry a DROP INDEX
+ * CONCURRENTLY that failed partway through.)
+ *
+ * Note: the CLUSTER logic assumes that indisclustered cannot be
+ * set on any invalid index, so clear that flag too.
+ */
+ indexForm->indisvalid = false;
+ indexForm->indisclustered = false;
+ break;
+ case INDEX_DROP_SET_DEAD:
+
+ /*
+ * Clear indisready during DROP INDEX CONCURRENTLY
+ *
+ * We clear indisready and set indisvalid, because we not only
+ * want to stop updates, we want to prevent sessions from touching
+ * the index at all. See README.HOT.
+ */
+ Assert(!indexForm->indisvalid);
+ indexForm->indisready = false;
+ indexForm->indisvalid = true;
+ break;
+ }
+
+ /* ... and write it back in-place */
+ heap_inplace_update(pg_index, indexTuple);
+
+ heap_close(pg_index, RowExclusiveLock);
+}
+
+
/*
* IndexGetRelation: given an index's relation OID, get the OID of the
* relation it is an index on. Uses the system cache.
reindex_index(Oid indexId, bool skip_constraint_checks)
{
Relation iRel,
- heapRelation,
- pg_index;
+ heapRelation;
Oid heapId;
IndexInfo *indexInfo;
- HeapTuple indexTuple;
- Form_pg_index indexForm;
volatile bool skipped_constraint = false;
/*
ResetReindexProcessing();
/*
- * If the index is marked invalid or not ready (ie, it's from a failed
- * CREATE INDEX CONCURRENTLY), and we didn't skip a uniqueness check, we
- * can now mark it valid. This allows REINDEX to be used to clean up in
- * such cases.
+ * If the index is marked invalid/not-ready/dead (ie, it's from a failed
+ * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
+ * and we didn't skip a uniqueness check, we can now mark it valid. This
+ * allows REINDEX to be used to clean up in such cases.
*
* We can also reset indcheckxmin, because we have now done a
* non-concurrent index build, *except* in the case where index_build
- * found some still-broken HOT chains. If it did, we normally leave
- * indcheckxmin alone (note that index_build won't have changed it,
- * because this is a reindex). But if the index was invalid or not ready
- * and there were broken HOT chains, it seems best to force indcheckxmin
- * true, because the normal argument that the HOT chains couldn't conflict
- * with the index is suspect for an invalid index.
+ * found some still-broken HOT chains. If it did, and we don't have to
+ * change any of the other flags, we just leave indcheckxmin alone (note
+ * that index_build won't have changed it, because this is a reindex).
+ * This is okay and desirable because not updating the tuple leaves the
+ * index's usability horizon (recorded as the tuple's xmin value) the same
+ * as it was.
*
- * Note that it is important to not update the pg_index entry if we don't
- * have to, because updating it will move the index's usability horizon
- * (recorded as the tuple's xmin value) if indcheckxmin is true. We don't
- * really want REINDEX to move the usability horizon forward ever, but we
- * have no choice if we are to fix indisvalid or indisready. Of course,
- * clearing indcheckxmin eliminates the issue, so we're happy to do that
- * if we can. Another reason for caution here is that while reindexing
- * pg_index itself, we must not try to update it. We assume that
- * pg_index's indexes will always have these flags in their clean state.
+ * But, if the index was invalid/not-ready/dead and there were broken HOT
+ * chains, we had better force indcheckxmin true, because the normal
+ * argument that the HOT chains couldn't conflict with the index is
+ * suspect for an invalid index. (A conflict is definitely possible if
+ * the index was dead. It probably shouldn't happen otherwise, but let's
+ * be conservative.) In this case advancing the usability horizon is
+ * appropriate.
+ *
+ * Note that if we have to update the tuple, there is a risk of concurrent
+ * transactions not seeing it during their SnapshotNow scans of pg_index.
+ * While not especially desirable, this is safe because no such
+ * transaction could be trying to update the table (since we have
+ * ShareLock on it). The worst case is that someone might transiently
+ * fail to use the index for a query --- but it was probably unusable
+ * before anyway, if we are updating the tuple.
+ *
+ * Another reason for avoiding unnecessary updates here is that while
+ * reindexing pg_index itself, we must not try to update tuples in it.
+ * pg_index's indexes should always have these flags in their clean state,
+ * so that won't happen.
*/
if (!skipped_constraint)
{
+ Relation pg_index;
+ HeapTuple indexTuple;
+ Form_pg_index indexForm;
+ bool index_bad;
+
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
indexTuple = SearchSysCacheCopy1(INDEXRELID,
elog(ERROR, "cache lookup failed for index %u", indexId);
indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
- if (!indexForm->indisvalid || !indexForm->indisready ||
+ index_bad = (!indexForm->indisvalid ||
+ !indexForm->indisready);
+ if (index_bad ||
(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
{
if (!indexInfo->ii_BrokenHotChain)
indexForm->indcheckxmin = false;
- else if (!indexForm->indisvalid || !indexForm->indisready)
+ else if (index_bad)
indexForm->indcheckxmin = true;
indexForm->indisvalid = true;
indexForm->indisready = true;
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
CatalogUpdateIndexes(pg_index, indexTuple);
+
+ /*
+ * Invalidate the relcache for the table, so that after we commit
+ * all sessions will refresh the table's index list. This ensures
+ * that if anyone misses seeing the pg_index row during this
+ * update, they'll refresh their list before attempting any update
+ * on the table.
+ */
+ CacheInvalidateRelcache(heapRelation);
}
heap_close(pg_index, RowExclusiveLock);