]> granicus.if.org Git - postgresql/commitdiff
REINDEX CONCURRENTLY
authorPeter Eisentraut <peter@eisentraut.org>
Fri, 29 Mar 2019 07:25:20 +0000 (08:25 +0100)
committerPeter Eisentraut <peter@eisentraut.org>
Fri, 29 Mar 2019 07:26:33 +0000 (08:26 +0100)
This adds the CONCURRENTLY option to the REINDEX command.  A REINDEX
CONCURRENTLY on a specific index creates a new index (like CREATE
INDEX CONCURRENTLY), then renames the old index away and the new index
in place and adjusts the dependencies, and then drops the old
index (like DROP INDEX CONCURRENTLY).  The REINDEX command also has
the capability to run its other variants (TABLE, DATABASE) with the
CONCURRENTLY option (but not SYSTEM).

The reindexdb command gets the --concurrently option.

Author: Michael Paquier, Andreas Karlsson, Peter Eisentraut
Reviewed-by: Andres Freund, Fujii Masao, Jim Nasby, Sergei Kornilov
Discussion: https://www.postgresql.org/message-id/flat/60052986-956b-4478-45ed-8bd119e9b9cf%402ndquadrant.com#74948a1044c56c5e817a5050f554ddee

26 files changed:
doc/src/sgml/mvcc.sgml
doc/src/sgml/ref/create_index.sgml
doc/src/sgml/ref/reindex.sgml
doc/src/sgml/ref/reindexdb.sgml
src/backend/catalog/dependency.c
src/backend/catalog/index.c
src/backend/catalog/pg_depend.c
src/backend/commands/indexcmds.c
src/backend/commands/tablecmds.c
src/backend/nodes/copyfuncs.c
src/backend/nodes/equalfuncs.c
src/backend/parser/gram.y
src/backend/tcop/utility.c
src/bin/psql/common.c
src/bin/psql/tab-complete.c
src/bin/scripts/reindexdb.c
src/bin/scripts/t/090_reindexdb.pl
src/include/catalog/dependency.h
src/include/catalog/index.h
src/include/commands/defrem.h
src/include/nodes/parsenodes.h
src/test/isolation/expected/reindex-concurrently.out [new file with mode: 0644]
src/test/isolation/isolation_schedule
src/test/isolation/specs/reindex-concurrently.spec [new file with mode: 0644]
src/test/regress/expected/create_index.out
src/test/regress/sql/create_index.sql

index bedd9a008d302adb0ececa39686ae25b9fd77905..9b7ef8bf0952405f2a9097800c2745873b1b305a 100644 (file)
@@ -926,6 +926,7 @@ ERROR:  could not serialize access due to read/write dependencies among transact
         <para>
          Acquired by <command>VACUUM</command> (without <option>FULL</option>),
          <command>ANALYZE</command>, <command>CREATE INDEX CONCURRENTLY</command>,
+         <command>REINDEX CONCURRENTLY</command>,
          <command>CREATE STATISTICS</command>, and certain <command>ALTER
          INDEX</command> and <command>ALTER TABLE</command> variants (for full
          details see <xref linkend="sql-alterindex"/> and <xref
index d8f018f4dacf571caa2d73df92bec8389f617db5..d9d95b20e36da36a985f9a21127db10ca07a3e5f 100644 (file)
@@ -844,6 +844,7 @@ CREATE INDEX CONCURRENTLY sales_quantity_index ON sales_table (quantity);
   <simplelist type="inline">
    <member><xref linkend="sql-alterindex"/></member>
    <member><xref linkend="sql-dropindex"/></member>
+   <member><xref linkend="sql-reindex"/></member>
   </simplelist>
  </refsect1>
 </refentry>
index 47cef987d4868dd15e42febf22b3c8c547b36c26..ccabb330cbf28f000e323cb9f876588c26c46dc7 100644 (file)
@@ -21,7 +21,7 @@ PostgreSQL documentation
 
  <refsynopsisdiv>
 <synopsis>
-REINDEX [ ( VERBOSE ) ] { INDEX | TABLE | SCHEMA | DATABASE | SYSTEM } <replaceable class="parameter">name</replaceable>
+REINDEX [ ( VERBOSE ) ] { INDEX | TABLE | SCHEMA | DATABASE | SYSTEM } [ CONCURRENTLY ] <replaceable class="parameter">name</replaceable>
 </synopsis>
  </refsynopsisdiv>
 
@@ -68,7 +68,7 @@ REINDEX [ ( VERBOSE ) ] { INDEX | TABLE | SCHEMA | DATABASE | SYSTEM } <replacea
       An index build with the <literal>CONCURRENTLY</literal> option failed, leaving
       an <quote>invalid</quote> index. Such indexes are useless but it can be
       convenient to use <command>REINDEX</command> to rebuild them. Note that
-      <command>REINDEX</command> will not perform a concurrent build. To build the
+      <command>REINDEX</command> will not perform a concurrent build on an invalid index. To build the
       index without interfering with production you should drop the index and
       reissue the <command>CREATE INDEX CONCURRENTLY</command> command.
      </para>
@@ -151,6 +151,21 @@ REINDEX [ ( VERBOSE ) ] { INDEX | TABLE | SCHEMA | DATABASE | SYSTEM } <replacea
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>CONCURRENTLY</literal></term>
+    <listitem>
+     <para>
+      When this option is used, <productname>PostgreSQL</productname> will rebuild the
+      index without taking any locks that prevent concurrent inserts,
+      updates, or deletes on the table; whereas a standard reindex build
+      locks out writes (but not reads) on the table until it's done.
+      There are several caveats to be aware of when using this option
+      &mdash; see <xref linkend="sql-reindex-concurrently"
+      endterm="sql-reindex-concurrently-title"/>.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>VERBOSE</literal></term>
     <listitem>
@@ -241,6 +256,159 @@ REINDEX [ ( VERBOSE ) ] { INDEX | TABLE | SCHEMA | DATABASE | SYSTEM } <replacea
    Each individual partition can be reindexed separately instead.
   </para>
 
+  <refsect2 id="sql-reindex-concurrently">
+   <title id="sql-reindex-concurrently-title">Rebuilding Indexes Concurrently</title>
+
+   <indexterm zone="sql-reindex-concurrently">
+    <primary>index</primary>
+    <secondary>rebuilding concurrently</secondary>
+   </indexterm>
+
+   <para>
+    Rebuilding an index can interfere with regular operation of a database.
+    Normally <productname>PostgreSQL</productname> locks the table whose index is rebuilt
+    against writes and performs the entire index build with a single scan of the
+    table. Other transactions can still read the table, but if they try to
+    insert, update, or delete rows in the table they will block until the
+    index rebuild is finished. This could have a severe effect if the system is
+    a live production database. Very large tables can take many hours to be
+    indexed, and even for smaller tables, an index rebuild can lock out writers
+    for periods that are unacceptably long for a production system.
+   </para>
+
+   <para>
+    <productname>PostgreSQL</productname> supports rebuilding indexes with minimum locking
+    of writes.  This method is invoked by specifying the
+    <literal>CONCURRENTLY</literal> option of <command>REINDEX</command>. When this option
+    is used, <productname>PostgreSQL</productname> must perform two scans of the table
+    for each index that needs to be rebuild and in addition it must wait for
+    all existing transactions that could potentially use the index to
+    terminate. This method requires more total work than a standard index
+    rebuild and takes significantly longer to complete as it needs to wait
+    for unfinished transactions that might modify the index. However, since
+    it allows normal operations to continue while the index is rebuilt, this
+    method is useful for rebuilding indexes in a production environment. Of
+    course, the extra CPU, memory and I/O load imposed by the index rebuild
+    may slow down other operations.
+   </para>
+
+   <para>
+    The following steps occur in a concurrent reindex.  Each step is run in a
+    separate transaction.  If there are multiple indexes to be rebuilt, then
+    each step loops through all the indexes before moving to the next step.
+
+    <orderedlist>
+     <listitem>
+      <para>
+       A new temporary index definition is added into the catalog
+       <literal>pg_index</literal>.  This definition will be used to replace
+       the old index.  A <literal>SHARE UPDATE EXCLUSIVE</literal> lock at
+       session level is taken on the indexes being reindexed as well as its
+       associated table to prevent any schema modification while processing.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       A first pass to build the index is done for each new index.  Once the
+       index is built, its flag <literal>pg_index.indisready</literal> is
+       switched to <quote>true</quote> to make ready for inserts, making it
+       visible to other sessions once the transaction that performed the build
+       is finished.  This step is done in a separate transaction for each
+       index.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       Then a second pass is performed to add tuples that were added while the
+       first pass build was running.  This step is also done in a separate
+       transaction for each index.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       All the constraints that refer to the index are changed to refer to the
+       new index definition, and the names of the indexes are changed.  At
+       this point <literal>pg_index.indisvalid</literal> is switched to
+       <quote>true</quote> for the new index and to <quote>false</quote> for
+       the old, and a cache invalidation is done so as all the sessions that
+       referenced the old index are invalidated.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       The old indexes have <literal>pg_index.indisready</literal> switched to
+       <quote>false</quote> to prevent any new tuple insertions, after waiting
+       for running queries that might reference the old index to complete.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       The old indexes are dropped.  The <literal>SHARE UPDATE
+       EXCLUSIVE</literal> session locks for the indexes and the table ar
+       released.
+      </para>
+     </listitem>
+    </orderedlist>
+   </para>
+
+   <para>
+    If a problem arises while rebuilding the indexes, such as a
+    uniqueness violation in a unique index, the <command>REINDEX</command>
+    command will fail but leave behind an <quote>invalid</quote> new index on top
+    of the existing one. This index will be ignored for querying purposes
+    because it might be incomplete; however it will still consume update
+    overhead. The <application>psql</application> <command>\d</command> command will report
+    such an index as <literal>INVALID</literal>:
+
+<programlisting>
+postgres=# \d tab
+       Table "public.tab"
+ Column |  Type   | Modifiers
+--------+---------+-----------
+ col    | integer |
+Indexes:
+    "idx" btree (col)
+    "idx_ccnew" btree (col) INVALID
+</programlisting>
+
+    The recommended recovery method in such cases is to drop the invalid index
+    and try again to perform <command>REINDEX CONCURRENTLY</command>.  The
+    concurrent index created during the processing has a name ending in the
+    suffix <literal>ccnew</literal>, or <literal>ccold</literal> if it is an
+    old index definition which we failed to drop. Invalid indexes can be
+    dropped using <literal>DROP INDEX</literal>, including invalid toast
+    indexes.
+   </para>
+
+   <para>
+    Regular index builds permit other regular index builds on the same table
+    to occur in parallel, but only one concurrent index build can occur on a
+    table at a time. In both cases, no other types of schema modification on
+    the table are allowed meanwhile.  Another difference is that a regular
+    <command>REINDEX TABLE</command> or <command>REINDEX INDEX</command>
+    command can be performed within a transaction block, but <command>REINDEX
+    CONCURRENTLY</command> cannot.
+   </para>
+
+   <para>
+    <command>REINDEX SYSTEM</command> does not support
+    <command>CONCURRENTLY</command> since system catalogs cannot be reindexed
+    concurrently.
+   </para>
+
+   <para>
+    Furthermore, indexes for exclusion constraints cannot be reindexed
+    concurrently.  If such an index is named directly in this command, an
+    error is raised.  If a table or database with exclusion constraint indexes
+    is reindexed concurrently, those indexes will be skipped.  (It is possible
+    to reindex such indexes without the concurrently option.)
+   </para>
+  </refsect2>
  </refsect1>
 
  <refsect1>
@@ -272,6 +440,14 @@ $ <userinput>psql broken_db</userinput>
 ...
 broken_db=&gt; REINDEX DATABASE broken_db;
 broken_db=&gt; \q
+</programlisting></para>
+
+  <para>
+   Rebuild a table while authorizing read and write operations on involved
+   relations when performed:
+
+<programlisting>
+REINDEX TABLE CONCURRENTLY my_broken_table;
 </programlisting></para>
  </refsect1>
 
@@ -282,4 +458,14 @@ broken_db=&gt; \q
    There is no <command>REINDEX</command> command in the SQL standard.
   </para>
  </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-createindex"/></member>
+   <member><xref linkend="sql-dropindex"/></member>
+   <member><xref linkend="app-reindexdb"/></member>
+  </simplelist>
+ </refsect1>
 </refentry>
index 1273dad807254a4ca20f65597a4cb74f7c3b4ddd..cdfac3fe4f9b2562e8bf88bd1b8287b3d7d2e24c 100644 (file)
@@ -118,6 +118,16 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--concurrently</option></term>
+      <listitem>
+       <para>
+        Use the <literal>CONCURRENTLY</literal> option.  See <xref
+        linkend="sql-reindex"/> for further information.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option><optional>-d</optional> <replaceable class="parameter">dbname</replaceable></option></term>
       <term><option><optional>--dbname=</optional><replaceable class="parameter">dbname</replaceable></option></term>
index f7acb4103ebd1407b63221e9de5d62561704b665..7af1670c0d2d76938314f5eff1fc06d1e1157aba 100644 (file)
@@ -306,6 +306,10 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
  * PERFORM_DELETION_SKIP_EXTENSIONS: do not delete extensions, even when
  * deleting objects that are part of an extension.  This should generally
  * be used only when dropping temporary objects.
+ *
+ * PERFORM_DELETION_CONCURRENT_LOCK: perform the drop normally but with a lock
+ * as if it were concurrent.  This is used by REINDEX CONCURRENTLY.
+ *
  */
 void
 performDeletion(const ObjectAddress *object,
@@ -1316,9 +1320,10 @@ doDeletion(const ObjectAddress *object, int flags)
                                        relKind == RELKIND_PARTITIONED_INDEX)
                                {
                                        bool            concurrent = ((flags & PERFORM_DELETION_CONCURRENTLY) != 0);
+                                       bool            concurrent_lock_mode = ((flags & PERFORM_DELETION_CONCURRENT_LOCK) != 0);
 
                                        Assert(object->objectSubId == 0);
-                                       index_drop(object->objectId, concurrent);
+                                       index_drop(object->objectId, concurrent, concurrent_lock_mode);
                                }
                                else
                                {
index 337361a652291b7d73da348878bf4faca7305815..0d9d405c548ef27d636627809055626a33101a32 100644 (file)
@@ -42,6 +42,7 @@
 #include "catalog/pg_am.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_constraint.h"
+#include "catalog/pg_description.h"
 #include "catalog/pg_depend.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_operator.h"
@@ -778,11 +779,11 @@ index_create(Relation heapRelation,
                                 errmsg("user-defined indexes on system catalog tables are not supported")));
 
        /*
-        * concurrent index build on a system catalog is unsafe because we tend to
-        * release locks before committing in catalogs
+        * Concurrent index build on a system catalog is unsafe because we tend to
+        * release locks before committing in catalogs.
         */
        if (concurrent &&
-               IsSystemRelation(heapRelation))
+               IsCatalogRelation(heapRelation))
                ereport(ERROR,
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                 errmsg("concurrent index creation on system catalog tables is not supported")));
@@ -1201,6 +1202,462 @@ index_create(Relation heapRelation,
        return indexRelationId;
 }
 
+/*
+ * index_concurrently_create_copy
+ *
+ * Create concurrently an index based on the definition of the one provided by
+ * caller.  The index is inserted into catalogs and needs to be built later
+ * on.  This is called during concurrent reindex processing.
+ */
+Oid
+index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId, const char *newName)
+{
+       Relation        indexRelation;
+       IndexInfo  *indexInfo;
+       Oid                     newIndexId = InvalidOid;
+       HeapTuple       indexTuple,
+                               classTuple;
+       Datum           indclassDatum,
+                               colOptionDatum,
+                               optionDatum;
+       oidvector  *indclass;
+       int2vector *indcoloptions;
+       bool            isnull;
+       List       *indexColNames = NIL;
+
+       indexRelation = index_open(oldIndexId, RowExclusiveLock);
+
+       /* New index uses the same index information as old index */
+       indexInfo = BuildIndexInfo(indexRelation);
+
+       /* Get the array of class and column options IDs from index info */
+       indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldIndexId));
+       if (!HeapTupleIsValid(indexTuple))
+               elog(ERROR, "cache lookup failed for index %u", oldIndexId);
+       indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+                                                                       Anum_pg_index_indclass, &isnull);
+       Assert(!isnull);
+       indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+       colOptionDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+                                                                        Anum_pg_index_indoption, &isnull);
+       Assert(!isnull);
+       indcoloptions = (int2vector *) DatumGetPointer(colOptionDatum);
+
+       /* Fetch options of index if any */
+       classTuple = SearchSysCache1(RELOID, oldIndexId);
+       if (!HeapTupleIsValid(classTuple))
+               elog(ERROR, "cache lookup failed for relation %u", oldIndexId);
+       optionDatum = SysCacheGetAttr(RELOID, classTuple,
+                                                                 Anum_pg_class_reloptions, &isnull);
+
+       /*
+        * Extract the list of column names to be used for the index
+        * creation.
+        */
+       for (int i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+       {
+               TupleDesc       indexTupDesc = RelationGetDescr(indexRelation);
+               Form_pg_attribute att = TupleDescAttr(indexTupDesc, i);
+
+               indexColNames = lappend(indexColNames, NameStr(att->attname));
+       }
+
+       /* Now create the new index */
+       newIndexId = index_create(heapRelation,
+                                                         newName,
+                                                         InvalidOid,   /* indexRelationId */
+                                                         InvalidOid,   /* parentIndexRelid */
+                                                         InvalidOid,   /* parentConstraintId */
+                                                         InvalidOid,   /* relFileNode */
+                                                         indexInfo,
+                                                         indexColNames,
+                                                         indexRelation->rd_rel->relam,
+                                                         indexRelation->rd_rel->reltablespace,
+                                                         indexRelation->rd_indcollation,
+                                                         indclass->values,
+                                                         indcoloptions->values,
+                                                         optionDatum,
+                                                         INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT,
+                                                         0,
+                                                         true, /* allow table to be a system catalog? */
+                                                         false, /* is_internal? */
+                                                         NULL);
+
+       /* Close the relations used and clean up */
+       index_close(indexRelation, NoLock);
+       ReleaseSysCache(indexTuple);
+       ReleaseSysCache(classTuple);
+
+       return newIndexId;
+}
+
+/*
+ * index_concurrently_build
+ *
+ * Build index for a concurrent operation.  Low-level locks are taken when
+ * this operation is performed to prevent only schema changes, but they need
+ * to be kept until the end of the transaction performing this operation.
+ * 'indexOid' refers to an index relation OID already created as part of
+ * previous processing, and 'heapOid' refers to its parent heap relation.
+ */
+void
+index_concurrently_build(Oid heapRelationId,
+                                                Oid indexRelationId)
+{
+       Relation        heapRel;
+       Relation        indexRelation;
+       IndexInfo  *indexInfo;
+
+       /* This had better make sure that a snapshot is active */
+       Assert(ActiveSnapshotSet());
+
+       /* Open and lock the parent heap relation */
+       heapRel = table_open(heapRelationId, ShareUpdateExclusiveLock);
+
+       /* And the target index relation */
+       indexRelation = index_open(indexRelationId, RowExclusiveLock);
+
+       /*
+        * We have to re-build the IndexInfo struct, since it was lost in the
+        * commit of the transaction where this concurrent index was created at
+        * the catalog level.
+        */
+       indexInfo = BuildIndexInfo(indexRelation);
+       Assert(!indexInfo->ii_ReadyForInserts);
+       indexInfo->ii_Concurrent = true;
+       indexInfo->ii_BrokenHotChain = false;
+
+       /* Now build the index */
+       index_build(heapRel, indexRelation, indexInfo, false, true);
+
+       /* Close both the relations, but keep the locks */
+       table_close(heapRel, NoLock);
+       index_close(indexRelation, NoLock);
+
+       /*
+        * Update the pg_index row to mark the index as ready for inserts. Once we
+        * commit this transaction, any new transactions that open the table must
+        * insert new entries into the index for insertions and non-HOT updates.
+        */
+       index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+}
+
+/*
+ * index_concurrently_swap
+ *
+ * Swap name, dependencies, and constraints of the old index over to the new
+ * index, while marking the old index as invalid and the new as valid.
+ */
+void
+index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
+{
+       Relation        pg_class,
+                               pg_index,
+                               pg_constraint,
+                               pg_trigger;
+       Relation        oldClassRel,
+                               newClassRel;
+       HeapTuple       oldClassTuple,
+                               newClassTuple;
+       Form_pg_class oldClassForm,
+                               newClassForm;
+       HeapTuple       oldIndexTuple,
+                               newIndexTuple;
+       Form_pg_index oldIndexForm,
+                               newIndexForm;
+       Oid                     indexConstraintOid;
+       List       *constraintOids = NIL;
+       ListCell   *lc;
+
+       /*
+        * Take a necessary lock on the old and new index before swapping them.
+        */
+       oldClassRel = relation_open(oldIndexId, ShareUpdateExclusiveLock);
+       newClassRel = relation_open(newIndexId, ShareUpdateExclusiveLock);
+
+       /* Now swap names and dependencies of those indexes */
+       pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+       oldClassTuple = SearchSysCacheCopy1(RELOID,
+                                                                               ObjectIdGetDatum(oldIndexId));
+       if (!HeapTupleIsValid(oldClassTuple))
+               elog(ERROR, "could not find tuple for relation %u", oldIndexId);
+       newClassTuple = SearchSysCacheCopy1(RELOID,
+                                                                               ObjectIdGetDatum(newIndexId));
+       if (!HeapTupleIsValid(newClassTuple))
+               elog(ERROR, "could not find tuple for relation %u", newIndexId);
+
+       oldClassForm = (Form_pg_class) GETSTRUCT(oldClassTuple);
+       newClassForm = (Form_pg_class) GETSTRUCT(newClassTuple);
+
+       /* Swap the names */
+       namestrcpy(&newClassForm->relname, NameStr(oldClassForm->relname));
+       namestrcpy(&oldClassForm->relname, oldName);
+
+       CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple);
+       CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple);
+
+       heap_freetuple(oldClassTuple);
+       heap_freetuple(newClassTuple);
+
+       /* Now swap index info */
+       pg_index = table_open(IndexRelationId, RowExclusiveLock);
+
+       oldIndexTuple = SearchSysCacheCopy1(INDEXRELID,
+                                                                               ObjectIdGetDatum(oldIndexId));
+       if (!HeapTupleIsValid(oldIndexTuple))
+               elog(ERROR, "could not find tuple for relation %u", oldIndexId);
+       newIndexTuple = SearchSysCacheCopy1(INDEXRELID,
+                                                                               ObjectIdGetDatum(newIndexId));
+       if (!HeapTupleIsValid(newIndexTuple))
+               elog(ERROR, "could not find tuple for relation %u", newIndexId);
+
+       oldIndexForm = (Form_pg_index) GETSTRUCT(oldIndexTuple);
+       newIndexForm = (Form_pg_index) GETSTRUCT(newIndexTuple);
+
+       /*
+        * Copy constraint flags from the old index. This is safe because the old
+        * index guaranteed uniqueness.
+        */
+       newIndexForm->indisprimary = oldIndexForm->indisprimary;
+       oldIndexForm->indisprimary = false;
+       newIndexForm->indisexclusion = oldIndexForm->indisexclusion;
+       oldIndexForm->indisexclusion = false;
+       newIndexForm->indimmediate = oldIndexForm->indimmediate;
+       oldIndexForm->indimmediate = true;
+
+       /* Mark old index as valid and new as invalid as index_set_state_flags */
+       newIndexForm->indisvalid = true;
+       oldIndexForm->indisvalid = false;
+       oldIndexForm->indisclustered = false;
+
+       CatalogTupleUpdate(pg_index, &oldIndexTuple->t_self, oldIndexTuple);
+       CatalogTupleUpdate(pg_index, &newIndexTuple->t_self, newIndexTuple);
+
+       heap_freetuple(oldIndexTuple);
+       heap_freetuple(newIndexTuple);
+
+       /*
+        * Move constraints and triggers over to the new index
+        */
+
+       constraintOids = get_index_ref_constraints(oldIndexId);
+
+       indexConstraintOid = get_index_constraint(oldIndexId);
+
+       if (OidIsValid(indexConstraintOid))
+               constraintOids = lappend_oid(constraintOids, indexConstraintOid);
+
+       pg_constraint = table_open(ConstraintRelationId, RowExclusiveLock);
+       pg_trigger = table_open(TriggerRelationId, RowExclusiveLock);
+
+       foreach(lc, constraintOids)
+       {
+               HeapTuple       constraintTuple,
+                                       triggerTuple;
+               Form_pg_constraint conForm;
+               ScanKeyData key[1];
+               SysScanDesc scan;
+               Oid                     constraintOid = lfirst_oid(lc);
+
+               /* Move the constraint from the old to the new index */
+               constraintTuple = SearchSysCacheCopy1(CONSTROID,
+                                                                                         ObjectIdGetDatum(constraintOid));
+               if (!HeapTupleIsValid(constraintTuple))
+                       elog(ERROR, "could not find tuple for constraint %u", constraintOid);
+
+               conForm = ((Form_pg_constraint) GETSTRUCT(constraintTuple));
+
+               if (conForm->conindid == oldIndexId)
+               {
+                       conForm->conindid = newIndexId;
+
+                       CatalogTupleUpdate(pg_constraint, &constraintTuple->t_self, constraintTuple);
+               }
+
+               heap_freetuple(constraintTuple);
+
+               /* Search for trigger records */
+               ScanKeyInit(&key[0],
+                                       Anum_pg_trigger_tgconstraint,
+                                       BTEqualStrategyNumber, F_OIDEQ,
+                                       ObjectIdGetDatum(constraintOid));
+
+               scan = systable_beginscan(pg_trigger, TriggerConstraintIndexId, true,
+                                                                 NULL, 1, key);
+
+               while (HeapTupleIsValid((triggerTuple = systable_getnext(scan))))
+               {
+                       Form_pg_trigger tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
+
+                       if (tgForm->tgconstrindid != oldIndexId)
+                               continue;
+
+                       /* Make a modifiable copy */
+                       triggerTuple = heap_copytuple(triggerTuple);
+                       tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
+
+                       tgForm->tgconstrindid = newIndexId;
+
+                       CatalogTupleUpdate(pg_trigger, &triggerTuple->t_self, triggerTuple);
+
+                       heap_freetuple(triggerTuple);
+               }
+
+               systable_endscan(scan);
+       }
+
+       /*
+        * Move comment if any
+        */
+       {
+               Relation        description;
+               ScanKeyData skey[3];
+               SysScanDesc sd;
+               HeapTuple       tuple;
+               Datum           values[Natts_pg_description] = {0};
+               bool            nulls[Natts_pg_description] = {0};
+               bool            replaces[Natts_pg_description] = {0};
+
+               values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(newIndexId);
+               replaces[Anum_pg_description_objoid - 1] = true;
+
+               ScanKeyInit(&skey[0],
+                                       Anum_pg_description_objoid,
+                                       BTEqualStrategyNumber, F_OIDEQ,
+                                       ObjectIdGetDatum(oldIndexId));
+               ScanKeyInit(&skey[1],
+                                       Anum_pg_description_classoid,
+                                       BTEqualStrategyNumber, F_OIDEQ,
+                                       ObjectIdGetDatum(RelationRelationId));
+               ScanKeyInit(&skey[2],
+                                       Anum_pg_description_objsubid,
+                                       BTEqualStrategyNumber, F_INT4EQ,
+                                       Int32GetDatum(0));
+
+               description = table_open(DescriptionRelationId, RowExclusiveLock);
+
+               sd = systable_beginscan(description, DescriptionObjIndexId, true,
+                                                               NULL, 3, skey);
+
+               while ((tuple = systable_getnext(sd)) != NULL)
+               {
+                       tuple = heap_modify_tuple(tuple, RelationGetDescr(description),
+                                                                         values, nulls, replaces);
+                       CatalogTupleUpdate(description, &tuple->t_self, tuple);
+
+                       break;                                  /* Assume there can be only one match */
+               }
+
+               systable_endscan(sd);
+               table_close(description, NoLock);
+       }
+
+       /*
+        * Move all dependencies on the old index to the new one
+        */
+
+       if (OidIsValid(indexConstraintOid))
+       {
+               ObjectAddress myself,
+                                       referenced;
+
+               /* Change to having the new index depend on the constraint */
+               deleteDependencyRecordsForClass(RelationRelationId, oldIndexId,
+                                                                               ConstraintRelationId, DEPENDENCY_INTERNAL);
+
+               myself.classId = RelationRelationId;
+               myself.objectId = newIndexId;
+               myself.objectSubId = 0;
+
+               referenced.classId = ConstraintRelationId;
+               referenced.objectId = indexConstraintOid;
+               referenced.objectSubId = 0;
+
+               recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+       }
+
+       changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId);
+
+       /*
+        * Copy over statistics from old to new index
+        */
+       {
+               PgStat_StatTabEntry *tabentry;
+
+               tabentry = pgstat_fetch_stat_tabentry(oldIndexId);
+               if (tabentry)
+               {
+                       if (newClassRel->pgstat_info)
+                       {
+                               newClassRel->pgstat_info->t_counts.t_numscans = tabentry->numscans;
+                               newClassRel->pgstat_info->t_counts.t_tuples_returned = tabentry->tuples_returned;
+                               newClassRel->pgstat_info->t_counts.t_tuples_fetched = tabentry->tuples_fetched;
+                               newClassRel->pgstat_info->t_counts.t_blocks_fetched = tabentry->blocks_fetched;
+                               newClassRel->pgstat_info->t_counts.t_blocks_hit = tabentry->blocks_hit;
+                               /* The data will be sent by the next pgstat_report_stat() call. */
+                       }
+               }
+       }
+
+       /* Close relations */
+       table_close(pg_class, RowExclusiveLock);
+       table_close(pg_index, RowExclusiveLock);
+       table_close(pg_constraint, RowExclusiveLock);
+       table_close(pg_trigger, RowExclusiveLock);
+
+       /* The lock taken previously is not released until the end of transaction */
+       relation_close(oldClassRel, NoLock);
+       relation_close(newClassRel, NoLock);
+}
+
+/*
+ * index_concurrently_set_dead
+ *
+ * Perform the last invalidation stage of DROP INDEX CONCURRENTLY or REINDEX
+ * CONCURRENTLY before actually dropping the index.  After calling this
+ * function, the index is seen by all the backends as dead.  Low-level locks
+ * taken here are kept until the end of the transaction calling this function.
+ */
+void
+index_concurrently_set_dead(Oid heapId, Oid indexId)
+{
+       Relation        userHeapRelation;
+       Relation        userIndexRelation;
+
+       /*
+        * No more predicate locks will be acquired on this index, and we're
+        * about to stop doing inserts into the index which could show
+        * conflicts with existing predicate locks, so now is the time to move
+        * them to the heap relation.
+        */
+       userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
+       userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
+       TransferPredicateLocksToHeapRelation(userIndexRelation);
+
+       /*
+        * Now we are sure that nobody uses the index for queries; they just
+        * might have it open for updating it.  So now we can unset indisready
+        * and indislive, then wait till nobody could be using it at all
+        * anymore.
+        */
+       index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
+
+       /*
+        * Invalidate the relcache for the table, so that after this commit
+        * all sessions will refresh the table's index list.  Forgetting just
+        * the index's relcache entry is not enough.
+        */
+       CacheInvalidateRelcache(userHeapRelation);
+
+       /*
+        * Close the relations again, though still holding session lock.
+        */
+       table_close(userHeapRelation, NoLock);
+       index_close(userIndexRelation, NoLock);
+}
+
 /*
  * index_constraint_create
  *
@@ -1438,9 +1895,14 @@ index_constraint_create(Relation heapRelation,
  *
  * NOTE: this routine should now only be called through performDeletion(),
  * else associated dependencies won't be cleaned up.
+ *
+ * If concurrent is true, do a DROP INDEX CONCURRENTLY.  If concurrent is
+ * false but concurrent_lock_mode is true, then do a normal DROP INDEX but
+ * take a lock for CONCURRENTLY processing.  That is used as part of REINDEX
+ * CONCURRENTLY.
  */
 void
-index_drop(Oid indexId, bool concurrent)
+index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
 {
        Oid                     heapId;
        Relation        userHeapRelation;
@@ -1472,7 +1934,7 @@ index_drop(Oid indexId, bool concurrent)
         * using it.)
         */
        heapId = IndexGetRelation(indexId, false);
-       lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
+       lockmode = (concurrent || concurrent_lock_mode) ? ShareUpdateExclusiveLock : AccessExclusiveLock;
        userHeapRelation = table_open(heapId, lockmode);
        userIndexRelation = index_open(indexId, lockmode);
 
@@ -1587,36 +2049,8 @@ index_drop(Oid indexId, bool concurrent)
                 */
                WaitForLockers(heaplocktag, AccessExclusiveLock);
 
-               /*
-                * No more predicate locks will be acquired on this index, and we're
-                * about to stop doing inserts into the index which could show
-                * conflicts with existing predicate locks, so now is the time to move
-                * them to the heap relation.
-                */
-               userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
-               userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
-               TransferPredicateLocksToHeapRelation(userIndexRelation);
-
-               /*
-                * Now we are sure that nobody uses the index for queries; they just
-                * might have it open for updating it.  So now we can unset indisready
-                * and indislive, then wait till nobody could be using it at all
-                * anymore.
-                */
-               index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
-
-               /*
-                * Invalidate the relcache for the table, so that after this commit
-                * all sessions will refresh the table's index list.  Forgetting just
-                * the index's relcache entry is not enough.
-                */
-               CacheInvalidateRelcache(userHeapRelation);
-
-               /*
-                * Close the relations again, though still holding session lock.
-                */
-               table_close(userHeapRelation, NoLock);
-               index_close(userIndexRelation, NoLock);
+               /* Finish invalidation of index and mark it as dead */
+               index_concurrently_set_dead(heapId, indexId);
 
                /*
                 * Again, commit the transaction to make the pg_index update visible
index 23b01f841e4bc3fc689ca6dd46276a231bc031ea..d63bf5e56d92ead230f2a8121fd8bd26fee21de4 100644 (file)
@@ -395,6 +395,94 @@ changeDependencyFor(Oid classId, Oid objectId,
        return count;
 }
 
+/*
+ * Adjust all dependency records to point to a different object of the same type
+ *
+ * refClassId/oldRefObjectId specify the old referenced object.
+ * newRefObjectId is the new referenced object (must be of class refClassId).
+ *
+ * Returns the number of records updated.
+ */
+long
+changeDependenciesOn(Oid refClassId, Oid oldRefObjectId,
+                                        Oid newRefObjectId)
+{
+       long            count = 0;
+       Relation        depRel;
+       ScanKeyData key[2];
+       SysScanDesc scan;
+       HeapTuple       tup;
+       ObjectAddress objAddr;
+       bool            newIsPinned;
+
+       depRel = table_open(DependRelationId, RowExclusiveLock);
+
+       /*
+        * If oldRefObjectId is pinned, there won't be any dependency entries on
+        * it --- we can't cope in that case.  (This isn't really worth expending
+        * code to fix, in current usage; it just means you can't rename stuff out
+        * of pg_catalog, which would likely be a bad move anyway.)
+        */
+       objAddr.classId = refClassId;
+       objAddr.objectId = oldRefObjectId;
+       objAddr.objectSubId = 0;
+
+       if (isObjectPinned(&objAddr, depRel))
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("cannot remove dependency on %s because it is a system object",
+                                               getObjectDescription(&objAddr))));
+
+       /*
+        * We can handle adding a dependency on something pinned, though, since
+        * that just means deleting the dependency entry.
+        */
+       objAddr.objectId = newRefObjectId;
+
+       newIsPinned = isObjectPinned(&objAddr, depRel);
+
+       /* Now search for dependency records */
+       ScanKeyInit(&key[0],
+                               Anum_pg_depend_refclassid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(refClassId));
+       ScanKeyInit(&key[1],
+                               Anum_pg_depend_refobjid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(oldRefObjectId));
+
+       scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+                                                         NULL, 2, key);
+
+       while (HeapTupleIsValid((tup = systable_getnext(scan))))
+       {
+               Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup);
+
+               if (newIsPinned)
+                       CatalogTupleDelete(depRel, &tup->t_self);
+               else
+               {
+                       /* make a modifiable copy */
+                       tup = heap_copytuple(tup);
+                       depform = (Form_pg_depend) GETSTRUCT(tup);
+
+                       depform->refobjid = newRefObjectId;
+
+                       CatalogTupleUpdate(depRel, &tup->t_self, tup);
+
+                       heap_freetuple(tup);
+               }
+
+               count++;
+       }
+
+       systable_endscan(scan);
+
+       table_close(depRel, RowExclusiveLock);
+
+       return count;
+}
+
 /*
  * isObjectPinned()
  *
@@ -754,3 +842,58 @@ get_index_constraint(Oid indexId)
 
        return constraintId;
 }
+
+/*
+ * get_index_ref_constraints
+ *             Given the OID of an index, return the OID of all foreign key
+ *             constraints which reference the index.
+ */
+List *
+get_index_ref_constraints(Oid indexId)
+{
+       List       *result = NIL;
+       Relation        depRel;
+       ScanKeyData key[3];
+       SysScanDesc scan;
+       HeapTuple       tup;
+
+       /* Search the dependency table for the index */
+       depRel = table_open(DependRelationId, AccessShareLock);
+
+       ScanKeyInit(&key[0],
+                               Anum_pg_depend_refclassid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(RelationRelationId));
+       ScanKeyInit(&key[1],
+                               Anum_pg_depend_refobjid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(indexId));
+       ScanKeyInit(&key[2],
+                               Anum_pg_depend_refobjsubid,
+                               BTEqualStrategyNumber, F_INT4EQ,
+                               Int32GetDatum(0));
+
+       scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+                                                         NULL, 3, key);
+
+       while (HeapTupleIsValid(tup = systable_getnext(scan)))
+       {
+               Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup);
+
+               /*
+                * We assume any normal dependency from a constraint must be what we
+                * are looking for.
+                */
+               if (deprec->classid == ConstraintRelationId &&
+                       deprec->objsubid == 0 &&
+                       deprec->deptype == DEPENDENCY_NORMAL)
+               {
+                       result = lappend_oid(result, deprec->objid);
+               }
+       }
+
+       systable_endscan(scan);
+       table_close(depRel, AccessShareLock);
+
+       return result;
+}
index d6eb48cb4e639638459107dd0a7587403ffdef9e..c68d8d58167f159222c6870cd10118087044c100 100644 (file)
@@ -58,6 +58,7 @@
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/partcache.h"
+#include "utils/pg_rusage.h"
 #include "utils/regproc.h"
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"
@@ -83,6 +84,7 @@ static char *ChooseIndexNameAddition(List *colnames);
 static List *ChooseIndexColumnNames(List *indexElems);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
                                                                Oid relId, Oid oldRelId, void *arg);
+static bool ReindexRelationConcurrently(Oid relationOid, int options);
 static void ReindexPartitionedIndex(Relation parentIdx);
 
 /*
@@ -297,6 +299,90 @@ CheckIndexCompatible(Oid oldId,
        return ret;
 }
 
+
+/*
+ * WaitForOlderSnapshots
+ *
+ * Wait for transactions that might have an older snapshot than the given xmin
+ * limit, because it might not contain tuples deleted just before it has
+ * been taken. Obtain a list of VXIDs of such transactions, and wait for them
+ * individually. This is used when building an index concurrently.
+ *
+ * We can exclude any running transactions that have xmin > the xmin given;
+ * their oldest snapshot must be newer than our xmin limit.
+ * We can also exclude any transactions that have xmin = zero, since they
+ * evidently have no live snapshot at all (and any one they might be in
+ * process of taking is certainly newer than ours).  Transactions in other
+ * DBs can be ignored too, since they'll never even be able to see the
+ * index being worked on.
+ *
+ * We can also exclude autovacuum processes and processes running manual
+ * lazy VACUUMs, because they won't be fazed by missing index entries
+ * either.  (Manual ANALYZEs, however, can't be excluded because they
+ * might be within transactions that are going to do arbitrary operations
+ * later.)
+ *
+ * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not
+ * check for that.
+ *
+ * If a process goes idle-in-transaction with xmin zero, we do not need to
+ * wait for it anymore, per the above argument.  We do not have the
+ * infrastructure right now to stop waiting if that happens, but we can at
+ * least avoid the folly of waiting when it is idle at the time we would
+ * begin to wait.  We do this by repeatedly rechecking the output of
+ * GetCurrentVirtualXIDs.  If, during any iteration, a particular vxid
+ * doesn't show up in the output, we know we can forget about it.
+ */
+static void
+WaitForOlderSnapshots(TransactionId limitXmin)
+{
+       int                     n_old_snapshots;
+       int                     i;
+       VirtualTransactionId *old_snapshots;
+
+       old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false,
+                                                                                 PROC_IS_AUTOVACUUM | PROC_IN_VACUUM,
+                                                                                 &n_old_snapshots);
+
+       for (i = 0; i < n_old_snapshots; i++)
+       {
+               if (!VirtualTransactionIdIsValid(old_snapshots[i]))
+                       continue;                       /* found uninteresting in previous cycle */
+
+               if (i > 0)
+               {
+                       /* see if anything's changed ... */
+                       VirtualTransactionId *newer_snapshots;
+                       int                     n_newer_snapshots;
+                       int                     j;
+                       int                     k;
+
+                       newer_snapshots = GetCurrentVirtualXIDs(limitXmin,
+                                                                                                       true, false,
+                                                                                                       PROC_IS_AUTOVACUUM | PROC_IN_VACUUM,
+                                                                                                       &n_newer_snapshots);
+                       for (j = i; j < n_old_snapshots; j++)
+                       {
+                               if (!VirtualTransactionIdIsValid(old_snapshots[j]))
+                                       continue;       /* found uninteresting in previous cycle */
+                               for (k = 0; k < n_newer_snapshots; k++)
+                               {
+                                       if (VirtualTransactionIdEquals(old_snapshots[j],
+                                                                                                  newer_snapshots[k]))
+                                               break;
+                               }
+                               if (k >= n_newer_snapshots) /* not there anymore */
+                                       SetInvalidVirtualTransactionId(old_snapshots[j]);
+                       }
+                       pfree(newer_snapshots);
+               }
+
+               if (VirtualTransactionIdIsValid(old_snapshots[i]))
+                       VirtualXactLock(old_snapshots[i], true);
+       }
+}
+
+
 /*
  * DefineIndex
  *             Creates a new index.
@@ -345,7 +431,6 @@ DefineIndex(Oid relationId,
        List       *indexColNames;
        List       *allIndexParams;
        Relation        rel;
-       Relation        indexRelation;
        HeapTuple       tuple;
        Form_pg_am      accessMethodForm;
        IndexAmRoutine *amRoutine;
@@ -360,9 +445,7 @@ DefineIndex(Oid relationId,
        int                     numberOfAttributes;
        int                     numberOfKeyAttributes;
        TransactionId limitXmin;
-       VirtualTransactionId *old_snapshots;
        ObjectAddress address;
-       int                     n_old_snapshots;
        LockRelId       heaprelid;
        LOCKTAG         heaplocktag;
        LOCKMODE        lockmode;
@@ -1151,34 +1234,11 @@ DefineIndex(Oid relationId,
         * HOT-chain or the extension of the chain is HOT-safe for this index.
         */
 
-       /* Open and lock the parent heap relation */
-       rel = table_open(relationId, ShareUpdateExclusiveLock);
-
-       /* And the target index relation */
-       indexRelation = index_open(indexRelationId, RowExclusiveLock);
-
        /* Set ActiveSnapshot since functions in the indexes may need it */
        PushActiveSnapshot(GetTransactionSnapshot());
 
-       /* We have to re-build the IndexInfo struct, since it was lost in commit */
-       indexInfo = BuildIndexInfo(indexRelation);
-       Assert(!indexInfo->ii_ReadyForInserts);
-       indexInfo->ii_Concurrent = true;
-       indexInfo->ii_BrokenHotChain = false;
-
-       /* Now build the index */
-       index_build(rel, indexRelation, indexInfo, false, true);
-
-       /* Close both the relations, but keep the locks */
-       table_close(rel, NoLock);
-       index_close(indexRelation, NoLock);
-
-       /*
-        * Update the pg_index row to mark the index as ready for inserts. Once we
-        * commit this transaction, any new transactions that open the table must
-        * insert new entries into the index for insertions and non-HOT updates.
-        */
-       index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+       /* Perform concurrent build of index */
+       index_concurrently_build(relationId, indexRelationId);
 
        /* we can do away with our snapshot */
        PopActiveSnapshot();
@@ -1250,74 +1310,9 @@ DefineIndex(Oid relationId,
         * The index is now valid in the sense that it contains all currently
         * interesting tuples.  But since it might not contain tuples deleted just
         * before the reference snap was taken, we have to wait out any
-        * transactions that might have older snapshots.  Obtain a list of VXIDs
-        * of such transactions, and wait for them individually.
-        *
-        * We can exclude any running transactions that have xmin > the xmin of
-        * our reference snapshot; their oldest snapshot must be newer than ours.
-        * We can also exclude any transactions that have xmin = zero, since they
-        * evidently have no live snapshot at all (and any one they might be in
-        * process of taking is certainly newer than ours).  Transactions in other
-        * DBs can be ignored too, since they'll never even be able to see this
-        * index.
-        *
-        * We can also exclude autovacuum processes and processes running manual
-        * lazy VACUUMs, because they won't be fazed by missing index entries
-        * either.  (Manual ANALYZEs, however, can't be excluded because they
-        * might be within transactions that are going to do arbitrary operations
-        * later.)
-        *
-        * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not
-        * check for that.
-        *
-        * If a process goes idle-in-transaction with xmin zero, we do not need to
-        * wait for it anymore, per the above argument.  We do not have the
-        * infrastructure right now to stop waiting if that happens, but we can at
-        * least avoid the folly of waiting when it is idle at the time we would
-        * begin to wait.  We do this by repeatedly rechecking the output of
-        * GetCurrentVirtualXIDs.  If, during any iteration, a particular vxid
-        * doesn't show up in the output, we know we can forget about it.
+        * transactions that might have older snapshots.
         */
-       old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false,
-                                                                                 PROC_IS_AUTOVACUUM | PROC_IN_VACUUM,
-                                                                                 &n_old_snapshots);
-
-       for (i = 0; i < n_old_snapshots; i++)
-       {
-               if (!VirtualTransactionIdIsValid(old_snapshots[i]))
-                       continue;                       /* found uninteresting in previous cycle */
-
-               if (i > 0)
-               {
-                       /* see if anything's changed ... */
-                       VirtualTransactionId *newer_snapshots;
-                       int                     n_newer_snapshots;
-                       int                     j;
-                       int                     k;
-
-                       newer_snapshots = GetCurrentVirtualXIDs(limitXmin,
-                                                                                                       true, false,
-                                                                                                       PROC_IS_AUTOVACUUM | PROC_IN_VACUUM,
-                                                                                                       &n_newer_snapshots);
-                       for (j = i; j < n_old_snapshots; j++)
-                       {
-                               if (!VirtualTransactionIdIsValid(old_snapshots[j]))
-                                       continue;       /* found uninteresting in previous cycle */
-                               for (k = 0; k < n_newer_snapshots; k++)
-                               {
-                                       if (VirtualTransactionIdEquals(old_snapshots[j],
-                                                                                                  newer_snapshots[k]))
-                                               break;
-                               }
-                               if (k >= n_newer_snapshots) /* not there anymore */
-                                       SetInvalidVirtualTransactionId(old_snapshots[j]);
-                       }
-                       pfree(newer_snapshots);
-               }
-
-               if (VirtualTransactionIdIsValid(old_snapshots[i]))
-                       VirtualXactLock(old_snapshots[i], true);
-       }
+       WaitForOlderSnapshots(limitXmin);
 
        /*
         * Index can now be marked valid -- update its pg_index entry
@@ -2204,7 +2199,7 @@ ChooseIndexColumnNames(List *indexElems)
  *             Recreate a specific index.
  */
 void
-ReindexIndex(RangeVar *indexRelation, int options)
+ReindexIndex(RangeVar *indexRelation, int options, bool concurrent)
 {
        Oid                     indOid;
        Oid                     heapOid = InvalidOid;
@@ -2216,7 +2211,8 @@ ReindexIndex(RangeVar *indexRelation, int options)
         * obtain lock on table first, to avoid deadlock hazard.  The lock level
         * used here must match the index lock obtained in reindex_index().
         */
-       indOid = RangeVarGetRelidExtended(indexRelation, AccessExclusiveLock,
+       indOid = RangeVarGetRelidExtended(indexRelation,
+                                                                         concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock,
                                                                          0,
                                                                          RangeVarCallbackForReindexIndex,
                                                                          (void *) &heapOid);
@@ -2236,7 +2232,10 @@ ReindexIndex(RangeVar *indexRelation, int options)
        persistence = irel->rd_rel->relpersistence;
        index_close(irel, NoLock);
 
-       reindex_index(indOid, false, persistence, options);
+       if (concurrent)
+               ReindexRelationConcurrently(indOid, options);
+       else
+               reindex_index(indOid, false, persistence, options);
 }
 
 /*
@@ -2304,18 +2303,26 @@ RangeVarCallbackForReindexIndex(const RangeVar *relation,
  *             Recreate all indexes of a table (and of its toast table, if any)
  */
 Oid
-ReindexTable(RangeVar *relation, int options)
+ReindexTable(RangeVar *relation, int options, bool concurrent)
 {
        Oid                     heapOid;
+       bool            result;
 
        /* The lock level used here should match reindex_relation(). */
-       heapOid = RangeVarGetRelidExtended(relation, ShareLock, 0,
+       heapOid = RangeVarGetRelidExtended(relation,
+                                                                          concurrent ? ShareUpdateExclusiveLock : ShareLock,
+                                                                          0,
                                                                           RangeVarCallbackOwnsTable, NULL);
 
-       if (!reindex_relation(heapOid,
-                                                 REINDEX_REL_PROCESS_TOAST |
-                                                 REINDEX_REL_CHECK_CONSTRAINTS,
-                                                 options))
+       if (concurrent)
+               result = ReindexRelationConcurrently(heapOid, options);
+       else
+               result = reindex_relation(heapOid,
+                                                                 REINDEX_REL_PROCESS_TOAST |
+                                                                 REINDEX_REL_CHECK_CONSTRAINTS,
+                                                                 options);
+
+       if (!result)
                ereport(NOTICE,
                                (errmsg("table \"%s\" has no indexes",
                                                relation->relname)));
@@ -2333,7 +2340,7 @@ ReindexTable(RangeVar *relation, int options)
  */
 void
 ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
-                                         int options)
+                                         int options, bool concurrent)
 {
        Oid                     objectOid;
        Relation        relationRelation;
@@ -2345,12 +2352,18 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
        List       *relids = NIL;
        ListCell   *l;
        int                     num_keys;
+       bool            concurrent_warning = false;
 
        AssertArg(objectName);
        Assert(objectKind == REINDEX_OBJECT_SCHEMA ||
                   objectKind == REINDEX_OBJECT_SYSTEM ||
                   objectKind == REINDEX_OBJECT_DATABASE);
 
+       if (objectKind == REINDEX_OBJECT_SYSTEM && concurrent)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("concurrent reindex of system catalogs is not supported")));
+
        /*
         * Get OID of object to reindex, being the database currently being used
         * by session for a database or for system catalogs, or the schema defined
@@ -2453,6 +2466,25 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
                        !pg_class_ownercheck(relid, GetUserId()))
                        continue;
 
+               /*
+                * Skip system tables that index_create() would reject to index
+                * concurrently.  XXX We need the additional check for
+                * FirstNormalObjectId to skip information_schema tables, because
+                * IsCatalogClass() here does not cover information_schema, but the
+                * check in index_create() will error on the TOAST tables of
+                * information_schema tables.
+                */
+               if (concurrent &&
+                       (IsCatalogClass(relid, classtuple) || relid < FirstNormalObjectId))
+               {
+                       if (!concurrent_warning)
+                               ereport(WARNING,
+                                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("concurrent reindex is not supported for catalog relations, skipping all")));
+                       concurrent_warning = true;
+                       continue;
+               }
+
                /* Save the list of relation OIDs in private context */
                old = MemoryContextSwitchTo(private_context);
 
@@ -2479,26 +2511,663 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
        foreach(l, relids)
        {
                Oid                     relid = lfirst_oid(l);
+               bool            result;
 
                StartTransactionCommand();
                /* functions in indexes may want a snapshot set */
                PushActiveSnapshot(GetTransactionSnapshot());
-               if (reindex_relation(relid,
-                                                        REINDEX_REL_PROCESS_TOAST |
-                                                        REINDEX_REL_CHECK_CONSTRAINTS,
-                                                        options))
 
-                       if (options & REINDEXOPT_VERBOSE)
+               if (concurrent)
+               {
+                       result = ReindexRelationConcurrently(relid, options);
+                       /* ReindexRelationConcurrently() does the verbose output */
+               }
+               else
+               {
+                       result = reindex_relation(relid,
+                                                                         REINDEX_REL_PROCESS_TOAST |
+                                                                         REINDEX_REL_CHECK_CONSTRAINTS,
+                                                                         options);
+
+                       if (result && (options & REINDEXOPT_VERBOSE))
                                ereport(INFO,
                                                (errmsg("table \"%s.%s\" was reindexed",
                                                                get_namespace_name(get_rel_namespace(relid)),
                                                                get_rel_name(relid))));
+
+                       PopActiveSnapshot();
+               }
+
+               CommitTransactionCommand();
+       }
+       StartTransactionCommand();
+
+       MemoryContextDelete(private_context);
+}
+
+
+/*
+ * ReindexRelationConcurrently - process REINDEX CONCURRENTLY for given
+ * relation OID
+ *
+ * The relation can be either an index or a table.  If it is a table, all its
+ * valid indexes will be rebuilt, including its associated toast table
+ * indexes.  If it is an index, this index itself will be rebuilt.
+ *
+ * The locks taken on parent tables and involved indexes are kept until the
+ * transaction is committed, at which point a session lock is taken on each
+ * relation.  Both of these protect against concurrent schema changes.
+ */
+static bool
+ReindexRelationConcurrently(Oid relationOid, int options)
+{
+       List       *heapRelationIds = NIL;
+       List       *indexIds = NIL;
+       List       *newIndexIds = NIL;
+       List       *relationLocks = NIL;
+       List       *lockTags = NIL;
+       ListCell   *lc,
+                          *lc2;
+       MemoryContext private_context;
+       MemoryContext oldcontext;
+       char            relkind;
+       char       *relationName = NULL;
+       char       *relationNamespace = NULL;
+       PGRUsage        ru0;
+
+       /*
+        * Create a memory context that will survive forced transaction commits we
+        * do below.  Since it is a child of PortalContext, it will go away
+        * eventually even if we suffer an error; there's no need for special
+        * abort cleanup logic.
+        */
+       private_context = AllocSetContextCreate(PortalContext,
+                                                                                       "ReindexConcurrent",
+                                                                                       ALLOCSET_SMALL_SIZES);
+
+       if (options & REINDEXOPT_VERBOSE)
+       {
+               /* Save data needed by REINDEX VERBOSE in private context */
+               oldcontext = MemoryContextSwitchTo(private_context);
+
+               relationName = get_rel_name(relationOid);
+               relationNamespace = get_namespace_name(get_rel_namespace(relationOid));
+
+               pg_rusage_init(&ru0);
+
+               MemoryContextSwitchTo(oldcontext);
+       }
+
+       relkind = get_rel_relkind(relationOid);
+
+       /*
+        * Extract the list of indexes that are going to be rebuilt based on the
+        * list of relation Oids given by caller.
+        */
+       switch (relkind)
+       {
+               case RELKIND_RELATION:
+               case RELKIND_MATVIEW:
+               case RELKIND_TOASTVALUE:
+                       {
+                               /*
+                                * In the case of a relation, find all its indexes including
+                                * toast indexes.
+                                */
+                               Relation        heapRelation;
+
+                               /* Save the list of relation OIDs in private context */
+                               oldcontext = MemoryContextSwitchTo(private_context);
+
+                               /* Track this relation for session locks */
+                               heapRelationIds = lappend_oid(heapRelationIds, relationOid);
+
+                               MemoryContextSwitchTo(oldcontext);
+
+                               /* Open relation to get its indexes */
+                               heapRelation = table_open(relationOid, ShareUpdateExclusiveLock);
+
+                               /* Add all the valid indexes of relation to list */
+                               foreach(lc, RelationGetIndexList(heapRelation))
+                               {
+                                       Oid                     cellOid = lfirst_oid(lc);
+                                       Relation        indexRelation = index_open(cellOid,
+                                                                                                                  ShareUpdateExclusiveLock);
+
+                                       if (!indexRelation->rd_index->indisvalid)
+                                               ereport(WARNING,
+                                                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                                errmsg("cannot reindex concurrently invalid index \"%s.%s\", skipping",
+                                                                               get_namespace_name(get_rel_namespace(cellOid)),
+                                                                               get_rel_name(cellOid))));
+                                       else if (indexRelation->rd_index->indisexclusion)
+                                               ereport(WARNING,
+                                                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                                errmsg("cannot reindex concurrently exclusion constraint index \"%s.%s\", skipping",
+                                                                               get_namespace_name(get_rel_namespace(cellOid)),
+                                                                               get_rel_name(cellOid))));
+                                       else
+                                       {
+                                               /* Save the list of relation OIDs in private context */
+                                               oldcontext = MemoryContextSwitchTo(private_context);
+
+                                               indexIds = lappend_oid(indexIds, cellOid);
+
+                                               MemoryContextSwitchTo(oldcontext);
+                                       }
+
+                                       index_close(indexRelation, NoLock);
+                               }
+
+                               /* Also add the toast indexes */
+                               if (OidIsValid(heapRelation->rd_rel->reltoastrelid))
+                               {
+                                       Oid                     toastOid = heapRelation->rd_rel->reltoastrelid;
+                                       Relation        toastRelation = table_open(toastOid,
+                                                                                                                  ShareUpdateExclusiveLock);
+
+                                       /* Save the list of relation OIDs in private context */
+                                       oldcontext = MemoryContextSwitchTo(private_context);
+
+                                       /* Track this relation for session locks */
+                                       heapRelationIds = lappend_oid(heapRelationIds, toastOid);
+
+                                       MemoryContextSwitchTo(oldcontext);
+
+                                       foreach(lc2, RelationGetIndexList(toastRelation))
+                                       {
+                                               Oid                     cellOid = lfirst_oid(lc2);
+                                               Relation        indexRelation = index_open(cellOid,
+                                                                                                                          ShareUpdateExclusiveLock);
+
+                                               if (!indexRelation->rd_index->indisvalid)
+                                                       ereport(WARNING,
+                                                                       (errcode(ERRCODE_INDEX_CORRUPTED),
+                                                                        errmsg("cannot reindex concurrently invalid index \"%s.%s\", skipping",
+                                                                                       get_namespace_name(get_rel_namespace(cellOid)),
+                                                                                       get_rel_name(cellOid))));
+                                               else
+                                               {
+                                                       /*
+                                                        * Save the list of relation OIDs in private
+                                                        * context
+                                                        */
+                                                       oldcontext = MemoryContextSwitchTo(private_context);
+
+                                                       indexIds = lappend_oid(indexIds, cellOid);
+
+                                                       MemoryContextSwitchTo(oldcontext);
+                                               }
+
+                                               index_close(indexRelation, NoLock);
+                                       }
+
+                                       table_close(toastRelation, NoLock);
+                               }
+
+                               table_close(heapRelation, NoLock);
+                               break;
+                       }
+               case RELKIND_INDEX:
+                       {
+                               /*
+                                * For an index simply add its Oid to list. Invalid indexes
+                                * cannot be included in list.
+                                */
+                               Relation        indexRelation = index_open(relationOid, ShareUpdateExclusiveLock);
+                               Oid                     heapId = IndexGetRelation(relationOid, false);
+
+                               /* A shared relation cannot be reindexed concurrently */
+                               if (IsSharedRelation(heapId))
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                        errmsg("concurrent reindex is not supported for shared relations")));
+
+                               /* A system catalog cannot be reindexed concurrently */
+                               if (IsSystemNamespace(get_rel_namespace(heapId)))
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                        errmsg("concurrent reindex is not supported for catalog relations")));
+
+                               /* Save the list of relation OIDs in private context */
+                               oldcontext = MemoryContextSwitchTo(private_context);
+
+                               /* Track the heap relation of this index for session locks */
+                               heapRelationIds = list_make1_oid(heapId);
+
+                               MemoryContextSwitchTo(oldcontext);
+
+                               if (!indexRelation->rd_index->indisvalid)
+                                       ereport(WARNING,
+                                                       (errcode(ERRCODE_INDEX_CORRUPTED),
+                                                        errmsg("cannot reindex concurrently invalid index \"%s.%s\", skipping",
+                                                                       get_namespace_name(get_rel_namespace(relationOid)),
+                                                                       get_rel_name(relationOid))));
+                               else
+                               {
+                                       /* Save the list of relation OIDs in private context */
+                                       oldcontext = MemoryContextSwitchTo(private_context);
+
+                                       indexIds = lappend_oid(indexIds, relationOid);
+
+                                       MemoryContextSwitchTo(oldcontext);
+                               }
+
+                               index_close(indexRelation, NoLock);
+                               break;
+                       }
+               case RELKIND_PARTITIONED_TABLE:
+                       /* see reindex_relation() */
+                       ereport(WARNING,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"",
+                                                       get_rel_name(relationOid))));
+                       return false;
+               default:
+                       /* Return error if type of relation is not supported */
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                        errmsg("cannot reindex concurrently this type of relation")));
+                       break;
+       }
+
+       /* Definitely no indexes, so leave */
+       if (indexIds == NIL)
+       {
+               PopActiveSnapshot();
+               return false;
+       }
+
+       Assert(heapRelationIds != NIL);
+
+       /*-----
+        * Now we have all the indexes we want to process in indexIds.
+        *
+        * The phases now are:
+        *
+        * 1. create new indexes in the catalog
+        * 2. build new indexes
+        * 3. let new indexes catch up with tuples inserted in the meantime
+        * 4. swap index names
+        * 5. mark old indexes as dead
+        * 6. drop old indexes
+        *
+        * We process each phase for all indexes before moving to the next phase,
+        * for efficiency.
+        */
+
+       /*
+        * Phase 1 of REINDEX CONCURRENTLY
+        *
+        * Create a new index with the same properties as the old one, but it is
+        * only registered in catalogs and will be built later.  Then get session
+        * locks on all involved tables.  See analogous code in DefineIndex() for
+        * more detailed comments.
+        */
+
+       foreach(lc, indexIds)
+       {
+               char       *concurrentName;
+               Oid                     indexId = lfirst_oid(lc);
+               Oid                     newIndexId;
+               Relation        indexRel;
+               Relation        heapRel;
+               Relation        newIndexRel;
+               LockRelId       lockrelid;
+
+               indexRel = index_open(indexId, ShareUpdateExclusiveLock);
+               heapRel = table_open(indexRel->rd_index->indrelid,
+                                                        ShareUpdateExclusiveLock);
+
+               /* Choose a temporary relation name for the new index */
+               concurrentName = ChooseRelationName(get_rel_name(indexId),
+                                                                                       NULL,
+                                                                                       "ccnew",
+                                                                                       get_rel_namespace(indexRel->rd_index->indrelid),
+                                                                                       false);
+
+               /* Create new index definition based on given index */
+               newIndexId = index_concurrently_create_copy(heapRel,
+                                                                                                       indexId,
+                                                                                                       concurrentName);
+
+               /* Now open the relation of the new index, a lock is also needed on it */
+               newIndexRel = index_open(indexId, ShareUpdateExclusiveLock);
+
+               /*
+                * Save the list of OIDs and locks in private context
+                */
+               oldcontext = MemoryContextSwitchTo(private_context);
+
+               newIndexIds = lappend_oid(newIndexIds, newIndexId);
+
+               /*
+                * Save lockrelid to protect each relation from drop then close
+                * relations. The lockrelid on parent relation is not taken here to
+                * avoid multiple locks taken on the same relation, instead we rely on
+                * parentRelationIds built earlier.
+                */
+               lockrelid = indexRel->rd_lockInfo.lockRelId;
+               relationLocks = lappend(relationLocks, &lockrelid);
+               lockrelid = newIndexRel->rd_lockInfo.lockRelId;
+               relationLocks = lappend(relationLocks, &lockrelid);
+
+               MemoryContextSwitchTo(oldcontext);
+
+               index_close(indexRel, NoLock);
+               index_close(newIndexRel, NoLock);
+               table_close(heapRel, NoLock);
+       }
+
+       /*
+        * Save the heap lock for following visibility checks with other backends
+        * might conflict with this session.
+        */
+       foreach(lc, heapRelationIds)
+       {
+               Relation        heapRelation = table_open(lfirst_oid(lc), ShareUpdateExclusiveLock);
+               LockRelId       lockrelid = heapRelation->rd_lockInfo.lockRelId;
+               LOCKTAG    *heaplocktag;
+
+               /* Save the list of locks in private context */
+               oldcontext = MemoryContextSwitchTo(private_context);
+
+               /* Add lockrelid of heap relation to the list of locked relations */
+               relationLocks = lappend(relationLocks, &lockrelid);
+
+               heaplocktag = (LOCKTAG *) palloc(sizeof(LOCKTAG));
+
+               /* Save the LOCKTAG for this parent relation for the wait phase */
+               SET_LOCKTAG_RELATION(*heaplocktag, lockrelid.dbId, lockrelid.relId);
+               lockTags = lappend(lockTags, heaplocktag);
+
+               MemoryContextSwitchTo(oldcontext);
+
+               /* Close heap relation */
+               table_close(heapRelation, NoLock);
+       }
+
+       /* Get a session-level lock on each table. */
+       foreach(lc, relationLocks)
+       {
+               LockRelId       lockRel = *((LockRelId *) lfirst(lc));
+
+               LockRelationIdForSession(&lockRel, ShareUpdateExclusiveLock);
+       }
+
+       PopActiveSnapshot();
+       CommitTransactionCommand();
+       StartTransactionCommand();
+
+       /*
+        * Phase 2 of REINDEX CONCURRENTLY
+        *
+        * Build the new indexes in a separate transaction for each index to avoid
+        * having open transactions for an unnecessary long time.  But before
+        * doing that, wait until no running transactions could have the table of
+        * the index open with the old list of indexes.  See "phase 2" in
+        * DefineIndex() for more details.
+        */
+
+       WaitForLockersMultiple(lockTags, ShareLock);
+       CommitTransactionCommand();
+
+       forboth(lc, indexIds, lc2, newIndexIds)
+       {
+               Relation        indexRel;
+               Oid                     oldIndexId = lfirst_oid(lc);
+               Oid                     newIndexId = lfirst_oid(lc2);
+               Oid                     heapId;
+
+               CHECK_FOR_INTERRUPTS();
+
+               /* Start new transaction for this index's concurrent build */
+               StartTransactionCommand();
+
+               /* Set ActiveSnapshot since functions in the indexes may need it */
+               PushActiveSnapshot(GetTransactionSnapshot());
+
+               /*
+                * Index relation has been closed by previous commit, so reopen it to
+                * get its information.
+                */
+               indexRel = index_open(oldIndexId, ShareUpdateExclusiveLock);
+               heapId = indexRel->rd_index->indrelid;
+               index_close(indexRel, NoLock);
+
+               /* Perform concurrent build of new index */
+               index_concurrently_build(heapId, newIndexId);
+
+               PopActiveSnapshot();
+               CommitTransactionCommand();
+       }
+       StartTransactionCommand();
+
+       /*
+        * Phase 3 of REINDEX CONCURRENTLY
+        *
+        * During this phase the old indexes catch up with any new tuples that
+        * were created during the previous phase.  See "phase 3" in DefineIndex()
+        * for more details.
+        */
+
+       WaitForLockersMultiple(lockTags, ShareLock);
+       CommitTransactionCommand();
+
+       foreach(lc, newIndexIds)
+       {
+               Oid                     newIndexId = lfirst_oid(lc);
+               Oid                     heapId;
+               TransactionId limitXmin;
+               Snapshot        snapshot;
+
+               CHECK_FOR_INTERRUPTS();
+
+               StartTransactionCommand();
+
+               heapId = IndexGetRelation(newIndexId, false);
+
+               /*
+                * Take the "reference snapshot" that will be used by validate_index()
+                * to filter candidate tuples.
+                */
+               snapshot = RegisterSnapshot(GetTransactionSnapshot());
+               PushActiveSnapshot(snapshot);
+
+               validate_index(heapId, newIndexId, snapshot);
+
+               /*
+                * We can now do away with our active snapshot, we still need to save
+                * the xmin limit to wait for older snapshots.
+                */
+               limitXmin = snapshot->xmin;
+
                PopActiveSnapshot();
+               UnregisterSnapshot(snapshot);
+
+               /*
+                * To ensure no deadlocks, we must commit and start yet another
+                * transaction, and do our wait before any snapshot has been taken in
+                * it.
+                */
+               CommitTransactionCommand();
+               StartTransactionCommand();
+
+               /*
+                * The index is now valid in the sense that it contains all currently
+                * interesting tuples.  But since it might not contain tuples deleted just
+                * before the reference snap was taken, we have to wait out any
+                * transactions that might have older snapshots.
+                */
+               WaitForOlderSnapshots(limitXmin);
+
                CommitTransactionCommand();
        }
+
+       /*
+        * Phase 4 of REINDEX CONCURRENTLY
+        *
+        * Now that the new indexes have been validated, swap each new index with
+        * its corresponding old index.
+        *
+        * We mark the new indexes as valid and the old indexes as not valid at
+        * the same time to make sure we only get constraint violations from the
+        * indexes with the correct names.
+        */
+
        StartTransactionCommand();
 
+       forboth(lc, indexIds, lc2, newIndexIds)
+       {
+               char       *oldName;
+               Oid                     oldIndexId = lfirst_oid(lc);
+               Oid                     newIndexId = lfirst_oid(lc2);
+               Oid                     heapId;
+
+               CHECK_FOR_INTERRUPTS();
+
+               heapId = IndexGetRelation(oldIndexId, false);
+
+               /* Choose a relation name for old index */
+               oldName = ChooseRelationName(get_rel_name(oldIndexId),
+                                                                        NULL,
+                                                                        "ccold",
+                                                                        get_rel_namespace(heapId),
+                                                                        false);
+
+               /*
+                * Swap old index with the new one.  This also marks the new one as
+                * valid and the old one as not valid.
+                */
+               index_concurrently_swap(newIndexId, oldIndexId, oldName);
+
+               /*
+                * Invalidate the relcache for the table, so that after this commit
+                * all sessions will refresh any cached plans that might reference the
+                * index.
+                */
+               CacheInvalidateRelcacheByRelid(heapId);
+
+               /*
+                * CCI here so that subsequent iterations see the oldName in the
+                * catalog and can choose a nonconflicting name for their oldName.
+                * Otherwise, this could lead to conflicts if a table has two indexes
+                * whose names are equal for the first NAMEDATALEN-minus-a-few
+                * characters.
+                */
+               CommandCounterIncrement();
+       }
+
+       /* Commit this transaction and make index swaps visible */
+       CommitTransactionCommand();
+       StartTransactionCommand();
+
+       /*
+        * Phase 5 of REINDEX CONCURRENTLY
+        *
+        * Mark the old indexes as dead.  First we must wait until no running
+        * transaction could be using the index for a query.  See also
+        * index_drop() for more details.
+        */
+
+       WaitForLockersMultiple(lockTags, AccessExclusiveLock);
+
+       foreach(lc, indexIds)
+       {
+               Oid                     oldIndexId = lfirst_oid(lc);
+               Oid                     heapId;
+
+               CHECK_FOR_INTERRUPTS();
+               heapId = IndexGetRelation(oldIndexId, false);
+               index_concurrently_set_dead(heapId, oldIndexId);
+       }
+
+       /* Commit this transaction to make the updates visible. */
+       CommitTransactionCommand();
+       StartTransactionCommand();
+
+       /*
+        * Phase 6 of REINDEX CONCURRENTLY
+        *
+        * Drop the old indexes.
+        */
+
+       WaitForLockersMultiple(lockTags, AccessExclusiveLock);
+
+       PushActiveSnapshot(GetTransactionSnapshot());
+
+       {
+               ObjectAddresses *objects = new_object_addresses();
+
+               foreach(lc, indexIds)
+               {
+                       Oid                     oldIndexId = lfirst_oid(lc);
+                       ObjectAddress *object = palloc(sizeof(ObjectAddress));
+
+                       object->classId = RelationRelationId;
+                       object->objectId = oldIndexId;
+                       object->objectSubId = 0;
+
+                       add_exact_object_address(object, objects);
+               }
+
+               /*
+                * Use PERFORM_DELETION_CONCURRENT_LOCK so that index_drop() uses the
+                * right lock level.
+                */
+               performMultipleDeletions(objects, DROP_RESTRICT,
+                                                                PERFORM_DELETION_CONCURRENT_LOCK | PERFORM_DELETION_INTERNAL);
+       }
+
+       PopActiveSnapshot();
+       CommitTransactionCommand();
+
+       /*
+        * Finally, release the session-level lock on the table.
+        */
+       foreach(lc, relationLocks)
+       {
+               LockRelId       lockRel = *((LockRelId *) lfirst(lc));
+
+               UnlockRelationIdForSession(&lockRel, ShareUpdateExclusiveLock);
+       }
+
+       /* Start a new transaction to finish process properly */
+       StartTransactionCommand();
+
+       /* Log what we did */
+       if (options & REINDEXOPT_VERBOSE)
+       {
+               if (relkind == RELKIND_INDEX)
+                       ereport(INFO,
+                                       (errmsg("index \"%s.%s\" was reindexed",
+                                                       relationNamespace, relationName),
+                                        errdetail("%s.",
+                                                          pg_rusage_show(&ru0))));
+               else
+               {
+                       foreach(lc, newIndexIds)
+                       {
+                               Oid                     indOid = lfirst_oid(lc);
+
+                               ereport(INFO,
+                                               (errmsg("index \"%s.%s\" was reindexed",
+                                                               get_namespace_name(get_rel_namespace(indOid)),
+                                                               get_rel_name(indOid))));
+                               /* Don't show rusage here, since it's not per index. */
+                       }
+
+                       ereport(INFO,
+                                       (errmsg("table \"%s.%s\" was reindexed",
+                                                       relationNamespace, relationName),
+                                        errdetail("%s.",
+                                                          pg_rusage_show(&ru0))));
+               }
+       }
+
        MemoryContextDelete(private_context);
+
+       return true;
 }
 
 /*
index 06e7caa9cff9db8a57caf9d42f73c2c70710a14a..16492a23c74d1108ab9f362d8c0c32a9a33885b5 100644 (file)
@@ -1299,6 +1299,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
        bool            is_partition;
        Form_pg_class classform;
        LOCKMODE        heap_lockmode;
+       bool            invalid_system_index = false;
 
        state = (struct DropRelationCallbackState *) arg;
        relkind = state->relkind;
@@ -1361,7 +1362,36 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
                aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)),
                                           rel->relname);
 
-       if (!allowSystemTableMods && IsSystemClass(relOid, classform))
+       /*
+        * Check the case of a system index that might have been invalidated by a
+        * failed concurrent process and allow its drop. For the time being, this
+        * only concerns indexes of toast relations that became invalid during a
+        * REINDEX CONCURRENTLY process.
+        */
+       if (IsSystemClass(relOid, classform) && relkind == RELKIND_INDEX)
+       {
+               HeapTuple               locTuple;
+               Form_pg_index   indexform;
+               bool                    indisvalid;
+
+               locTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(relOid));
+               if (!HeapTupleIsValid(locTuple))
+               {
+                       ReleaseSysCache(tuple);
+                       return;
+               }
+
+               indexform = (Form_pg_index) GETSTRUCT(locTuple);
+               indisvalid = indexform->indisvalid;
+               ReleaseSysCache(locTuple);
+
+               /* Mark object as being an invalid index of system catalogs */
+               if (!indisvalid)
+                       invalid_system_index = true;
+       }
+
+       /* In the case of an invalid index, it is fine to bypass this check */
+       if (!invalid_system_index && !allowSystemTableMods && IsSystemClass(relOid, classform))
                ereport(ERROR,
                                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                                 errmsg("permission denied: \"%s\" is a system catalog",
index 04cc15606d2c649adf22def8549ef6046a01ed10..84f9112addda8137ae98ef2f5d3a30a1efb71d55 100644 (file)
@@ -4367,6 +4367,7 @@ _copyReindexStmt(const ReindexStmt *from)
        COPY_NODE_FIELD(relation);
        COPY_STRING_FIELD(name);
        COPY_SCALAR_FIELD(options);
+       COPY_SCALAR_FIELD(concurrent);
 
        return newnode;
 }
index 91c007ad5b04ffd4b2f6e5780e39bbfdb23453a6..7eb9f1dd928332b8b568cc4075092861000b9e83 100644 (file)
@@ -2103,6 +2103,7 @@ _equalReindexStmt(const ReindexStmt *a, const ReindexStmt *b)
        COMPARE_NODE_FIELD(relation);
        COMPARE_STRING_FIELD(name);
        COMPARE_SCALAR_FIELD(options);
+       COMPARE_SCALAR_FIELD(concurrent);
 
        return true;
 }
index 0a4822829a592d06c6ffab3abf7241117b98e794..d711f9a7368bbd5c3063fc8768b97364cda234bf 100644 (file)
@@ -8300,42 +8300,46 @@ DropTransformStmt: DROP TRANSFORM opt_if_exists FOR Typename LANGUAGE name opt_d
  *
  *             QUERY:
  *
- *             REINDEX [ (options) ] type <name>
+ *             REINDEX [ (options) ] type [CONCURRENTLY] <name>
  *****************************************************************************/
 
 ReindexStmt:
-                       REINDEX reindex_target_type qualified_name
+                       REINDEX reindex_target_type opt_concurrently qualified_name
                                {
                                        ReindexStmt *n = makeNode(ReindexStmt);
                                        n->kind = $2;
-                                       n->relation = $3;
+                                       n->concurrent = $3;
+                                       n->relation = $4;
                                        n->name = NULL;
                                        n->options = 0;
                                        $$ = (Node *)n;
                                }
-                       | REINDEX reindex_target_multitable name
+                       | REINDEX reindex_target_multitable opt_concurrently name
                                {
                                        ReindexStmt *n = makeNode(ReindexStmt);
                                        n->kind = $2;
-                                       n->name = $3;
+                                       n->concurrent = $3;
+                                       n->name = $4;
                                        n->relation = NULL;
                                        n->options = 0;
                                        $$ = (Node *)n;
                                }
-                       | REINDEX '(' reindex_option_list ')' reindex_target_type qualified_name
+                       | REINDEX '(' reindex_option_list ')' reindex_target_type opt_concurrently qualified_name
                                {
                                        ReindexStmt *n = makeNode(ReindexStmt);
                                        n->kind = $5;
-                                       n->relation = $6;
+                                       n->concurrent = $6;
+                                       n->relation = $7;
                                        n->name = NULL;
                                        n->options = $3;
                                        $$ = (Node *)n;
                                }
-                       | REINDEX '(' reindex_option_list ')' reindex_target_multitable name
+                       | REINDEX '(' reindex_option_list ')' reindex_target_multitable opt_concurrently name
                                {
                                        ReindexStmt *n = makeNode(ReindexStmt);
                                        n->kind = $5;
-                                       n->name = $6;
+                                       n->concurrent = $6;
+                                       n->name = $7;
                                        n->relation = NULL;
                                        n->options = $3;
                                        $$ = (Node *)n;
index 857b7a8b43fe41050ff903db83b0f93024ad4f0f..edf24c438c978fb1f718868f58658e8d06782c50 100644 (file)
@@ -774,16 +774,20 @@ standard_ProcessUtility(PlannedStmt *pstmt,
                        {
                                ReindexStmt *stmt = (ReindexStmt *) parsetree;
 
+                               if (stmt->concurrent)
+                                       PreventInTransactionBlock(isTopLevel,
+                                                                                         "REINDEX CONCURRENTLY");
+
                                /* we choose to allow this during "read only" transactions */
                                PreventCommandDuringRecovery("REINDEX");
                                /* forbidden in parallel mode due to CommandIsReadOnly */
                                switch (stmt->kind)
                                {
                                        case REINDEX_OBJECT_INDEX:
-                                               ReindexIndex(stmt->relation, stmt->options);
+                                               ReindexIndex(stmt->relation, stmt->options, stmt->concurrent);
                                                break;
                                        case REINDEX_OBJECT_TABLE:
-                                               ReindexTable(stmt->relation, stmt->options);
+                                               ReindexTable(stmt->relation, stmt->options, stmt->concurrent);
                                                break;
                                        case REINDEX_OBJECT_SCHEMA:
                                        case REINDEX_OBJECT_SYSTEM:
@@ -799,7 +803,7 @@ standard_ProcessUtility(PlannedStmt *pstmt,
                                                                                                  (stmt->kind == REINDEX_OBJECT_SCHEMA) ? "REINDEX SCHEMA" :
                                                                                                  (stmt->kind == REINDEX_OBJECT_SYSTEM) ? "REINDEX SYSTEM" :
                                                                                                  "REINDEX DATABASE");
-                                               ReindexMultipleTables(stmt->name, stmt->kind, stmt->options);
+                                               ReindexMultipleTables(stmt->name, stmt->kind, stmt->options, stmt->concurrent);
                                                break;
                                        default:
                                                elog(ERROR, "unrecognized object type: %d",
index 5d8634d8186d72fe8403082343da699a2a2e377d..82511e34ac3e56237ed4dcb7f3e7c572a0a2e32d 100644 (file)
@@ -2192,6 +2192,22 @@ command_no_begin(const char *query)
                        return true;
                if (wordlen == 10 && pg_strncasecmp(query, "tablespace", 10) == 0)
                        return true;
+               if (wordlen == 5 && (pg_strncasecmp(query, "index", 5) == 0 ||
+                                                        pg_strncasecmp(query, "table", 5) == 0))
+               {
+                       query += wordlen;
+                       query = skip_white_space(query);
+                       wordlen = 0;
+                       while (isalpha((unsigned char) query[wordlen]))
+                               wordlen += PQmblen(&query[wordlen], pset.encoding);
+
+                       /*
+                        * REINDEX [ TABLE | INDEX ] CONCURRENTLY are not allowed in
+                        * xacts.
+                        */
+                       if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0)
+                               return true;
+               }
 
                /* DROP INDEX CONCURRENTLY isn't allowed in xacts */
                if (wordlen == 5 && pg_strncasecmp(query, "index", 5) == 0)
index f14921eee12c0816843ae70384b956e48fa4b243..ab69a2c946d2c70b731878622e5639477bb107e1 100644 (file)
@@ -3213,12 +3213,24 @@ psql_completion(const char *text, int start, int end)
        else if (Matches("REINDEX"))
                COMPLETE_WITH("TABLE", "INDEX", "SYSTEM", "SCHEMA", "DATABASE");
        else if (Matches("REINDEX", "TABLE"))
-               COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexables, NULL);
+               COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexables,
+                                                                  " UNION SELECT 'CONCURRENTLY'");
        else if (Matches("REINDEX", "INDEX"))
-               COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexes, NULL);
+               COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexes,
+                                                                  " UNION SELECT 'CONCURRENTLY'");
        else if (Matches("REINDEX", "SCHEMA"))
-               COMPLETE_WITH_QUERY(Query_for_list_of_schemas);
+               COMPLETE_WITH_QUERY(Query_for_list_of_schemas
+                                                       " UNION SELECT 'CONCURRENTLY'");
        else if (Matches("REINDEX", "SYSTEM|DATABASE"))
+               COMPLETE_WITH_QUERY(Query_for_list_of_databases
+                                                       " UNION SELECT 'CONCURRENTLY'");
+       else if (Matches("REINDEX", "TABLE", "CONCURRENTLY"))
+               COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexables, NULL);
+       else if (Matches("REINDEX", "INDEX", "CONCURRENTLY"))
+               COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexes, NULL);
+       else if (Matches("REINDEX", "SCHEMA", "CONCURRENTLY"))
+               COMPLETE_WITH_QUERY(Query_for_list_of_schemas);
+       else if (Matches("REINDEX", "SYSTEM|DATABASE", "CONCURRENTLY"))
                COMPLETE_WITH_QUERY(Query_for_list_of_databases);
 
 /* SECURITY LABEL */
index 1cd1ccc951fecf227f4ac2f7b46d712c9ca2d46a..438500cb08a7780bccf171866854307f96d5d2ae 100644 (file)
@@ -19,16 +19,17 @@ static void reindex_one_database(const char *name, const char *dbname,
                                         const char *type, const char *host,
                                         const char *port, const char *username,
                                         enum trivalue prompt_password, const char *progname,
-                                        bool echo, bool verbose);
+                                        bool echo, bool verbose, bool concurrently);
 static void reindex_all_databases(const char *maintenance_db,
                                          const char *host, const char *port,
                                          const char *username, enum trivalue prompt_password,
                                          const char *progname, bool echo,
-                                         bool quiet, bool verbose);
+                                         bool quiet, bool verbose, bool concurrently);
 static void reindex_system_catalogs(const char *dbname,
                                                const char *host, const char *port,
                                                const char *username, enum trivalue prompt_password,
-                                               const char *progname, bool echo, bool verbose);
+                                               const char *progname, bool echo, bool verbose,
+                                               bool concurrently);
 static void help(const char *progname);
 
 int
@@ -49,6 +50,7 @@ main(int argc, char *argv[])
                {"table", required_argument, NULL, 't'},
                {"index", required_argument, NULL, 'i'},
                {"verbose", no_argument, NULL, 'v'},
+               {"concurrently", no_argument, NULL, 1},
                {"maintenance-db", required_argument, NULL, 2},
                {NULL, 0, NULL, 0}
        };
@@ -68,6 +70,7 @@ main(int argc, char *argv[])
        bool            echo = false;
        bool            quiet = false;
        bool            verbose = false;
+       bool            concurrently = false;
        SimpleStringList indexes = {NULL, NULL};
        SimpleStringList tables = {NULL, NULL};
        SimpleStringList schemas = {NULL, NULL};
@@ -124,6 +127,9 @@ main(int argc, char *argv[])
                        case 'v':
                                verbose = true;
                                break;
+                       case 1:
+                               concurrently = true;
+                               break;
                        case 2:
                                maintenance_db = pg_strdup(optarg);
                                break;
@@ -182,7 +188,7 @@ main(int argc, char *argv[])
                }
 
                reindex_all_databases(maintenance_db, host, port, username,
-                                                         prompt_password, progname, echo, quiet, verbose);
+                                                         prompt_password, progname, echo, quiet, verbose, concurrently);
        }
        else if (syscatalog)
        {
@@ -213,7 +219,7 @@ main(int argc, char *argv[])
                }
 
                reindex_system_catalogs(dbname, host, port, username, prompt_password,
-                                                               progname, echo, verbose);
+                                                               progname, echo, verbose, concurrently);
        }
        else
        {
@@ -234,7 +240,7 @@ main(int argc, char *argv[])
                        for (cell = schemas.head; cell; cell = cell->next)
                        {
                                reindex_one_database(cell->val, dbname, "SCHEMA", host, port,
-                                                                        username, prompt_password, progname, echo, verbose);
+                                                                        username, prompt_password, progname, echo, verbose, concurrently);
                        }
                }
 
@@ -245,7 +251,7 @@ main(int argc, char *argv[])
                        for (cell = indexes.head; cell; cell = cell->next)
                        {
                                reindex_one_database(cell->val, dbname, "INDEX", host, port,
-                                                                        username, prompt_password, progname, echo, verbose);
+                                                                        username, prompt_password, progname, echo, verbose, concurrently);
                        }
                }
                if (tables.head != NULL)
@@ -255,7 +261,7 @@ main(int argc, char *argv[])
                        for (cell = tables.head; cell; cell = cell->next)
                        {
                                reindex_one_database(cell->val, dbname, "TABLE", host, port,
-                                                                        username, prompt_password, progname, echo, verbose);
+                                                                        username, prompt_password, progname, echo, verbose, concurrently);
                        }
                }
 
@@ -265,7 +271,7 @@ main(int argc, char *argv[])
                 */
                if (indexes.head == NULL && tables.head == NULL && schemas.head == NULL)
                        reindex_one_database(NULL, dbname, "DATABASE", host, port,
-                                                                username, prompt_password, progname, echo, verbose);
+                                                                username, prompt_password, progname, echo, verbose, concurrently);
        }
 
        exit(0);
@@ -275,7 +281,7 @@ static void
 reindex_one_database(const char *name, const char *dbname, const char *type,
                                         const char *host, const char *port, const char *username,
                                         enum trivalue prompt_password, const char *progname, bool echo,
-                                        bool verbose)
+                                        bool verbose, bool concurrently)
 {
        PQExpBufferData sql;
 
@@ -284,6 +290,14 @@ reindex_one_database(const char *name, const char *dbname, const char *type,
        conn = connectDatabase(dbname, host, port, username, prompt_password,
                                                   progname, echo, false, false);
 
+       if (concurrently && PQserverVersion(conn) < 120000)
+       {
+               PQfinish(conn);
+               fprintf(stderr, _("%s: cannot use the \"%s\" option on server versions older than PostgreSQL %s\n"),
+                               progname, "concurrently", "12");
+               exit(1);
+       }
+
        initPQExpBuffer(&sql);
 
        appendPQExpBufferStr(&sql, "REINDEX ");
@@ -293,6 +307,8 @@ reindex_one_database(const char *name, const char *dbname, const char *type,
 
        appendPQExpBufferStr(&sql, type);
        appendPQExpBufferChar(&sql, ' ');
+       if (concurrently)
+               appendPQExpBufferStr(&sql, "CONCURRENTLY ");
        if (strcmp(type, "TABLE") == 0 ||
                strcmp(type, "INDEX") == 0)
                appendQualifiedRelation(&sql, name, conn, progname, echo);
@@ -328,7 +344,8 @@ static void
 reindex_all_databases(const char *maintenance_db,
                                          const char *host, const char *port,
                                          const char *username, enum trivalue prompt_password,
-                                         const char *progname, bool echo, bool quiet, bool verbose)
+                                         const char *progname, bool echo, bool quiet, bool verbose,
+                                         bool concurrently)
 {
        PGconn     *conn;
        PGresult   *result;
@@ -357,7 +374,7 @@ reindex_all_databases(const char *maintenance_db,
 
                reindex_one_database(NULL, connstr.data, "DATABASE", host,
                                                         port, username, prompt_password,
-                                                        progname, echo, verbose);
+                                                        progname, echo, verbose, concurrently);
        }
        termPQExpBuffer(&connstr);
 
@@ -367,7 +384,7 @@ reindex_all_databases(const char *maintenance_db,
 static void
 reindex_system_catalogs(const char *dbname, const char *host, const char *port,
                                                const char *username, enum trivalue prompt_password,
-                                               const char *progname, bool echo, bool verbose)
+                                               const char *progname, bool echo, bool verbose, bool concurrently)
 {
        PGconn     *conn;
        PQExpBufferData sql;
@@ -382,7 +399,11 @@ reindex_system_catalogs(const char *dbname, const char *host, const char *port,
        if (verbose)
                appendPQExpBuffer(&sql, " (VERBOSE)");
 
-       appendPQExpBuffer(&sql, " SYSTEM %s;", fmtId(PQdb(conn)));
+       appendPQExpBufferStr(&sql, " SYSTEM ");
+       if (concurrently)
+               appendPQExpBuffer(&sql, "CONCURRENTLY ");
+       appendPQExpBufferStr(&sql, fmtId(PQdb(conn)));
+       appendPQExpBufferChar(&sql, ';');
 
        if (!executeMaintenanceCommand(conn, sql.data, echo))
        {
@@ -403,6 +424,7 @@ help(const char *progname)
        printf(_("  %s [OPTION]... [DBNAME]\n"), progname);
        printf(_("\nOptions:\n"));
        printf(_("  -a, --all                 reindex all databases\n"));
+       printf(_("      --concurrently        reindex concurrently\n"));
        printf(_("  -d, --dbname=DBNAME       database to reindex\n"));
        printf(_("  -e, --echo                show the commands being sent to the server\n"));
        printf(_("  -i, --index=INDEX         recreate specific index(es) only\n"));
index e57a5e2bad5edc00894548fe56955facb05e485a..ef83be767abbce9a45fe4ddfa70012085ecb4d49 100644 (file)
@@ -3,7 +3,7 @@ use warnings;
 
 use PostgresNode;
 use TestLib;
-use Test::More tests => 23;
+use Test::More tests => 34;
 
 program_help_ok('reindexdb');
 program_version_ok('reindexdb');
@@ -43,6 +43,33 @@ $node->issues_sql_like(
        qr/statement: REINDEX \(VERBOSE\) TABLE public\.test1;/,
        'reindex with verbose output');
 
+# the same with --concurrently
+$node->issues_sql_like(
+       [ 'reindexdb', '--concurrently', 'postgres' ],
+       qr/statement: REINDEX DATABASE CONCURRENTLY postgres;/,
+       'SQL REINDEX CONCURRENTLY run');
+
+$node->issues_sql_like(
+       [ 'reindexdb', '--concurrently', '-t', 'test1', 'postgres' ],
+       qr/statement: REINDEX TABLE CONCURRENTLY public\.test1;/,
+       'reindex specific table concurrently');
+$node->issues_sql_like(
+       [ 'reindexdb', '--concurrently', '-i', 'test1x', 'postgres' ],
+       qr/statement: REINDEX INDEX CONCURRENTLY public\.test1x;/,
+       'reindex specific index concurrently');
+$node->issues_sql_like(
+       [ 'reindexdb', '--concurrently', '-S', 'public', 'postgres' ],
+       qr/statement: REINDEX SCHEMA CONCURRENTLY public;/,
+       'reindex specific schema concurrently');
+$node->command_fails(
+       [ 'reindexdb', '--concurrently', '-s', 'postgres' ],
+       'reindex system tables concurrently');
+$node->issues_sql_like(
+       [ 'reindexdb', '-v', '-t', 'test1', 'postgres' ],
+       qr/statement: REINDEX \(VERBOSE\) TABLE public\.test1;/,
+       'reindex with verbose output');
+
+# connection strings
 $node->command_ok([qw(reindexdb --echo --table=pg_am dbname=template1)],
        'reindexdb table with connection string');
 $node->command_ok(
index f537f0158799e18304deb76eaa503ed6679c485d..4f9dde9df9d0ddb6f057dbd1dfb661a86330e467 100644 (file)
@@ -136,6 +136,7 @@ typedef enum ObjectClass
 #define PERFORM_DELETION_QUIETLY                       0x0004  /* suppress notices */
 #define PERFORM_DELETION_SKIP_ORIGINAL         0x0008  /* keep original obj */
 #define PERFORM_DELETION_SKIP_EXTENSIONS       0x0010  /* keep extensions */
+#define PERFORM_DELETION_CONCURRENT_LOCK       0x0020  /* normal drop with concurrent lock mode */
 
 
 /* in dependency.c */
@@ -198,6 +199,9 @@ extern long changeDependencyFor(Oid classId, Oid objectId,
                                        Oid refClassId, Oid oldRefObjectId,
                                        Oid newRefObjectId);
 
+extern long changeDependenciesOn(Oid refClassId, Oid oldRefObjectId,
+                                                                Oid newRefObjectId);
+
 extern Oid     getExtensionOfObject(Oid classId, Oid objectId);
 
 extern bool sequenceIsOwned(Oid seqId, char deptype, Oid *tableId, int32 *colId);
@@ -208,6 +212,8 @@ extern Oid  get_constraint_index(Oid constraintId);
 
 extern Oid     get_index_constraint(Oid indexId);
 
+extern List *get_index_ref_constraints(Oid indexId);
+
 /* in pg_shdepend.c */
 
 extern void recordSharedDependencyOn(ObjectAddress *depender,
index 55a3f446833d0804d37ea0f7c7a7935a8507d99c..799efee95420d871d2f35acc8da8d9b492c96fdf 100644 (file)
@@ -78,6 +78,20 @@ extern Oid index_create(Relation heapRelation,
 #define        INDEX_CONSTR_CREATE_UPDATE_INDEX        (1 << 3)
 #define        INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS     (1 << 4)
 
+extern Oid index_concurrently_create_copy(Relation heapRelation,
+                                                                                 Oid oldIndexId,
+                                                                                 const char *newName);
+
+extern void index_concurrently_build(Oid heapRelationId,
+                                                                        Oid indexRelationId);
+
+extern void index_concurrently_swap(Oid newIndexId,
+                                                                       Oid oldIndexId,
+                                                                       const char *oldName);
+
+extern void index_concurrently_set_dead(Oid heapId,
+                                                                               Oid indexId);
+
 extern ObjectAddress index_constraint_create(Relation heapRelation,
                                                Oid indexRelationId,
                                                Oid parentConstraintId,
@@ -88,7 +102,7 @@ extern ObjectAddress index_constraint_create(Relation heapRelation,
                                                bool allow_system_table_mods,
                                                bool is_internal);
 
-extern void index_drop(Oid indexId, bool concurrent);
+extern void index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode);
 
 extern IndexInfo *BuildIndexInfo(Relation index);
 
index 3bc2e8eb16c4e4362e5d3854a1da8ee204afc953..7f49625987a7e260e6746351d8d86c9be7ba3e4b 100644 (file)
@@ -34,10 +34,10 @@ extern ObjectAddress DefineIndex(Oid relationId,
                        bool check_not_in_use,
                        bool skip_build,
                        bool quiet);
-extern void ReindexIndex(RangeVar *indexRelation, int options);
-extern Oid     ReindexTable(RangeVar *relation, int options);
+extern void ReindexIndex(RangeVar *indexRelation, int options, bool concurrent);
+extern Oid     ReindexTable(RangeVar *relation, int options, bool concurrent);
 extern void ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
-                                         int options);
+                                         int options, bool concurrent);
 extern char *makeObjectName(const char *name1, const char *name2,
                           const char *label);
 extern char *ChooseRelationName(const char *name1, const char *name2,
index bdd2bd2fd9c4b786cf720ed523f093c1dd2b08ec..e81c6269137660260a365774ce16f4d5b20f1575 100644 (file)
@@ -3305,6 +3305,7 @@ typedef struct ReindexStmt
        RangeVar   *relation;           /* Table or index to reindex */
        const char *name;                       /* name of database to reindex */
        int                     options;                /* Reindex options flags */
+       bool            concurrent;             /* reindex concurrently? */
 } ReindexStmt;
 
 /* ----------------------
diff --git a/src/test/isolation/expected/reindex-concurrently.out b/src/test/isolation/expected/reindex-concurrently.out
new file mode 100644 (file)
index 0000000..9e04169
--- /dev/null
@@ -0,0 +1,78 @@
+Parsed test spec with 3 sessions
+
+starting permutation: reindex sel1 upd2 ins2 del2 end1 end2
+step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab;
+step sel1: SELECT data FROM reind_con_tab WHERE id = 3;
+data           
+
+aaaa           
+step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3;
+step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc');
+step del2: DELETE FROM reind_con_tab WHERE data = 'cccc';
+step end1: COMMIT;
+step end2: COMMIT;
+
+starting permutation: sel1 reindex upd2 ins2 del2 end1 end2
+step sel1: SELECT data FROM reind_con_tab WHERE id = 3;
+data           
+
+aaaa           
+step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; <waiting ...>
+step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3;
+step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc');
+step del2: DELETE FROM reind_con_tab WHERE data = 'cccc';
+step end1: COMMIT;
+step end2: COMMIT;
+step reindex: <... completed>
+
+starting permutation: sel1 upd2 reindex ins2 del2 end1 end2
+step sel1: SELECT data FROM reind_con_tab WHERE id = 3;
+data           
+
+aaaa           
+step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3;
+step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; <waiting ...>
+step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc');
+step del2: DELETE FROM reind_con_tab WHERE data = 'cccc';
+step end1: COMMIT;
+step end2: COMMIT;
+step reindex: <... completed>
+
+starting permutation: sel1 upd2 ins2 reindex del2 end1 end2
+step sel1: SELECT data FROM reind_con_tab WHERE id = 3;
+data           
+
+aaaa           
+step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3;
+step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc');
+step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; <waiting ...>
+step del2: DELETE FROM reind_con_tab WHERE data = 'cccc';
+step end1: COMMIT;
+step end2: COMMIT;
+step reindex: <... completed>
+
+starting permutation: sel1 upd2 ins2 del2 reindex end1 end2
+step sel1: SELECT data FROM reind_con_tab WHERE id = 3;
+data           
+
+aaaa           
+step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3;
+step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc');
+step del2: DELETE FROM reind_con_tab WHERE data = 'cccc';
+step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; <waiting ...>
+step end1: COMMIT;
+step end2: COMMIT;
+step reindex: <... completed>
+
+starting permutation: sel1 upd2 ins2 del2 end1 reindex end2
+step sel1: SELECT data FROM reind_con_tab WHERE id = 3;
+data           
+
+aaaa           
+step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3;
+step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc');
+step del2: DELETE FROM reind_con_tab WHERE data = 'cccc';
+step end1: COMMIT;
+step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; <waiting ...>
+step end2: COMMIT;
+step reindex: <... completed>
index 70d47b3e687b81def01a70e10d18dc58d7362b74..f1ae50e5ba8df2d0d7ec72bae09f8e1b562096f7 100644 (file)
@@ -42,6 +42,7 @@ test: multixact-no-forget
 test: lock-committed-update
 test: lock-committed-keyupdate
 test: update-locked-tuple
+test: reindex-concurrently
 test: propagate-lock-delete
 test: tuplelock-conflict
 test: tuplelock-update
diff --git a/src/test/isolation/specs/reindex-concurrently.spec b/src/test/isolation/specs/reindex-concurrently.spec
new file mode 100644 (file)
index 0000000..eb59fe0
--- /dev/null
@@ -0,0 +1,40 @@
+# REINDEX CONCURRENTLY
+#
+# Ensure that concurrent operations work correctly when a REINDEX is performed
+# concurrently.
+
+setup
+{
+       CREATE TABLE reind_con_tab(id serial primary key, data text);
+       INSERT INTO reind_con_tab(data) VALUES ('aa');
+       INSERT INTO reind_con_tab(data) VALUES ('aaa');
+       INSERT INTO reind_con_tab(data) VALUES ('aaaa');
+       INSERT INTO reind_con_tab(data) VALUES ('aaaaa');
+}
+
+teardown
+{
+       DROP TABLE reind_con_tab;
+}
+
+session "s1"
+setup { BEGIN; }
+step "sel1" { SELECT data FROM reind_con_tab WHERE id = 3; }
+step "end1" { COMMIT; }
+
+session "s2"
+setup { BEGIN; }
+step "upd2" { UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3; }
+step "ins2" { INSERT INTO reind_con_tab(data) VALUES ('cccc'); }
+step "del2" { DELETE FROM reind_con_tab WHERE data = 'cccc'; }
+step "end2" { COMMIT; }
+
+session "s3"
+step "reindex" { REINDEX TABLE CONCURRENTLY reind_con_tab; }
+
+permutation "reindex" "sel1" "upd2" "ins2" "del2" "end1" "end2"
+permutation "sel1" "reindex" "upd2" "ins2" "del2" "end1" "end2"
+permutation "sel1" "upd2" "reindex" "ins2" "del2" "end1" "end2"
+permutation "sel1" "upd2" "ins2" "reindex" "del2" "end1" "end2"
+permutation "sel1" "upd2" "ins2" "del2" "reindex" "end1" "end2"
+permutation "sel1" "upd2" "ins2" "del2" "end1" "reindex" "end2"
index cc3dda4c70de0bf4a6a6ca45d739e9b91eda31ce..6b77d25debf07abc9b17bc89a4a97829d86eacc2 100644 (file)
@@ -3250,6 +3250,101 @@ INFO:  index "reindex_verbose_pkey" was reindexed
 \set VERBOSITY default
 DROP TABLE reindex_verbose;
 --
+-- REINDEX CONCURRENTLY
+--
+CREATE TABLE concur_reindex_tab (c1 int);
+-- REINDEX
+REINDEX TABLE concur_reindex_tab; -- notice
+NOTICE:  table "concur_reindex_tab" has no indexes
+REINDEX TABLE CONCURRENTLY concur_reindex_tab; -- notice
+NOTICE:  table "concur_reindex_tab" has no indexes
+ALTER TABLE concur_reindex_tab ADD COLUMN c2 text; -- add toast index
+-- Normal index with integer column
+CREATE UNIQUE INDEX concur_reindex_ind1 ON concur_reindex_tab(c1);
+-- Normal index with text column
+CREATE INDEX concur_reindex_ind2 ON concur_reindex_tab(c2);
+-- UNIQUE index with expression
+CREATE UNIQUE INDEX concur_reindex_ind3 ON concur_reindex_tab(abs(c1));
+-- Duplicate column names
+CREATE INDEX concur_reindex_ind4 ON concur_reindex_tab(c1, c1, c2);
+-- Create table for check on foreign key dependence switch with indexes swapped
+ALTER TABLE concur_reindex_tab ADD PRIMARY KEY USING INDEX concur_reindex_ind1;
+CREATE TABLE concur_reindex_tab2 (c1 int REFERENCES concur_reindex_tab);
+INSERT INTO concur_reindex_tab VALUES  (1, 'a');
+INSERT INTO concur_reindex_tab VALUES  (2, 'a');
+-- Reindex concurrently of exclusion constraint currently not supported
+CREATE TABLE concur_reindex_tab3 (c1 int, c2 int4range, EXCLUDE USING gist (c2 WITH &&));
+INSERT INTO concur_reindex_tab3 VALUES  (3, '[1,2]');
+REINDEX INDEX CONCURRENTLY  concur_reindex_tab3_c2_excl;  -- error
+ERROR:  concurrent index creation for exclusion constraints is not supported
+REINDEX TABLE CONCURRENTLY concur_reindex_tab3;  -- succeeds with warning
+WARNING:  cannot reindex concurrently exclusion constraint index "public.concur_reindex_tab3_c2_excl", skipping
+INSERT INTO concur_reindex_tab3 VALUES  (4, '[2,4]');
+ERROR:  conflicting key value violates exclusion constraint "concur_reindex_tab3_c2_excl"
+DETAIL:  Key (c2)=([2,5)) conflicts with existing key (c2)=([1,3)).
+-- Check materialized views
+CREATE MATERIALIZED VIEW concur_reindex_matview AS SELECT * FROM concur_reindex_tab;
+REINDEX INDEX CONCURRENTLY concur_reindex_ind1;
+REINDEX TABLE CONCURRENTLY concur_reindex_tab;
+REINDEX TABLE CONCURRENTLY concur_reindex_matview;
+-- Check that comments are preserved
+CREATE TABLE testcomment (i int);
+CREATE INDEX testcomment_idx1 ON testcomment (i);
+COMMENT ON INDEX testcomment_idx1 IS 'test comment';
+SELECT obj_description('testcomment_idx1'::regclass, 'pg_class');
+ obj_description 
+-----------------
+ test comment
+(1 row)
+
+REINDEX TABLE testcomment;
+SELECT obj_description('testcomment_idx1'::regclass, 'pg_class');
+ obj_description 
+-----------------
+ test comment
+(1 row)
+
+REINDEX TABLE CONCURRENTLY testcomment ;
+SELECT obj_description('testcomment_idx1'::regclass, 'pg_class');
+ obj_description 
+-----------------
+ test comment
+(1 row)
+
+DROP TABLE testcomment;
+-- Check errors
+-- Cannot run inside a transaction block
+BEGIN;
+REINDEX TABLE CONCURRENTLY concur_reindex_tab;
+ERROR:  REINDEX CONCURRENTLY cannot run inside a transaction block
+COMMIT;
+REINDEX TABLE CONCURRENTLY pg_database; -- no shared relation
+ERROR:  concurrent index creation on system catalog tables is not supported
+REINDEX TABLE CONCURRENTLY pg_class; -- no catalog relations
+ERROR:  concurrent index creation on system catalog tables is not supported
+REINDEX SYSTEM CONCURRENTLY postgres; -- not allowed for SYSTEM
+ERROR:  concurrent reindex of system catalogs is not supported
+-- Warns about catalog relations
+REINDEX SCHEMA CONCURRENTLY pg_catalog;
+WARNING:  concurrent reindex is not supported for catalog relations, skipping all
+-- Check the relation status, there should not be invalid indexes
+\d concur_reindex_tab
+         Table "public.concur_reindex_tab"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ c1     | integer |           | not null | 
+ c2     | text    |           |          | 
+Indexes:
+    "concur_reindex_ind1" PRIMARY KEY, btree (c1)
+    "concur_reindex_ind3" UNIQUE, btree (abs(c1))
+    "concur_reindex_ind2" btree (c2)
+    "concur_reindex_ind4" btree (c1, c1, c2)
+Referenced by:
+    TABLE "concur_reindex_tab2" CONSTRAINT "concur_reindex_tab2_c1_fkey" FOREIGN KEY (c1) REFERENCES concur_reindex_tab(c1)
+
+DROP MATERIALIZED VIEW concur_reindex_matview;
+DROP TABLE concur_reindex_tab, concur_reindex_tab2, concur_reindex_tab3;
+--
 -- REINDEX SCHEMA
 --
 REINDEX SCHEMA schema_to_reindex; -- failure, schema does not exist
@@ -3308,6 +3403,8 @@ BEGIN;
 REINDEX SCHEMA schema_to_reindex; -- failure, cannot run in a transaction
 ERROR:  REINDEX SCHEMA cannot run inside a transaction block
 END;
+-- concurrently
+REINDEX SCHEMA CONCURRENTLY schema_to_reindex;
 -- Failure for unauthorized user
 CREATE ROLE regress_reindexuser NOLOGIN;
 SET SESSION ROLE regress_reindexuser;
index 15c0f1f5d17a17bdbab2e783b2c2a4185f56adb9..9ff2dc68ff1e0674b2bd8b7c49a44b5181618786 100644 (file)
@@ -1172,6 +1172,65 @@ REINDEX (VERBOSE) TABLE reindex_verbose;
 \set VERBOSITY default
 DROP TABLE reindex_verbose;
 
+--
+-- REINDEX CONCURRENTLY
+--
+CREATE TABLE concur_reindex_tab (c1 int);
+-- REINDEX
+REINDEX TABLE concur_reindex_tab; -- notice
+REINDEX TABLE CONCURRENTLY concur_reindex_tab; -- notice
+ALTER TABLE concur_reindex_tab ADD COLUMN c2 text; -- add toast index
+-- Normal index with integer column
+CREATE UNIQUE INDEX concur_reindex_ind1 ON concur_reindex_tab(c1);
+-- Normal index with text column
+CREATE INDEX concur_reindex_ind2 ON concur_reindex_tab(c2);
+-- UNIQUE index with expression
+CREATE UNIQUE INDEX concur_reindex_ind3 ON concur_reindex_tab(abs(c1));
+-- Duplicate column names
+CREATE INDEX concur_reindex_ind4 ON concur_reindex_tab(c1, c1, c2);
+-- Create table for check on foreign key dependence switch with indexes swapped
+ALTER TABLE concur_reindex_tab ADD PRIMARY KEY USING INDEX concur_reindex_ind1;
+CREATE TABLE concur_reindex_tab2 (c1 int REFERENCES concur_reindex_tab);
+INSERT INTO concur_reindex_tab VALUES  (1, 'a');
+INSERT INTO concur_reindex_tab VALUES  (2, 'a');
+-- Reindex concurrently of exclusion constraint currently not supported
+CREATE TABLE concur_reindex_tab3 (c1 int, c2 int4range, EXCLUDE USING gist (c2 WITH &&));
+INSERT INTO concur_reindex_tab3 VALUES  (3, '[1,2]');
+REINDEX INDEX CONCURRENTLY  concur_reindex_tab3_c2_excl;  -- error
+REINDEX TABLE CONCURRENTLY concur_reindex_tab3;  -- succeeds with warning
+INSERT INTO concur_reindex_tab3 VALUES  (4, '[2,4]');
+-- Check materialized views
+CREATE MATERIALIZED VIEW concur_reindex_matview AS SELECT * FROM concur_reindex_tab;
+REINDEX INDEX CONCURRENTLY concur_reindex_ind1;
+REINDEX TABLE CONCURRENTLY concur_reindex_tab;
+REINDEX TABLE CONCURRENTLY concur_reindex_matview;
+-- Check that comments are preserved
+CREATE TABLE testcomment (i int);
+CREATE INDEX testcomment_idx1 ON testcomment (i);
+COMMENT ON INDEX testcomment_idx1 IS 'test comment';
+SELECT obj_description('testcomment_idx1'::regclass, 'pg_class');
+REINDEX TABLE testcomment;
+SELECT obj_description('testcomment_idx1'::regclass, 'pg_class');
+REINDEX TABLE CONCURRENTLY testcomment ;
+SELECT obj_description('testcomment_idx1'::regclass, 'pg_class');
+DROP TABLE testcomment;
+
+-- Check errors
+-- Cannot run inside a transaction block
+BEGIN;
+REINDEX TABLE CONCURRENTLY concur_reindex_tab;
+COMMIT;
+REINDEX TABLE CONCURRENTLY pg_database; -- no shared relation
+REINDEX TABLE CONCURRENTLY pg_class; -- no catalog relations
+REINDEX SYSTEM CONCURRENTLY postgres; -- not allowed for SYSTEM
+-- Warns about catalog relations
+REINDEX SCHEMA CONCURRENTLY pg_catalog;
+
+-- Check the relation status, there should not be invalid indexes
+\d concur_reindex_tab
+DROP MATERIALIZED VIEW concur_reindex_matview;
+DROP TABLE concur_reindex_tab, concur_reindex_tab2, concur_reindex_tab3;
+
 --
 -- REINDEX SCHEMA
 --
@@ -1214,6 +1273,9 @@ BEGIN;
 REINDEX SCHEMA schema_to_reindex; -- failure, cannot run in a transaction
 END;
 
+-- concurrently
+REINDEX SCHEMA CONCURRENTLY schema_to_reindex;
+
 -- Failure for unauthorized user
 CREATE ROLE regress_reindexuser NOLOGIN;
 SET SESSION ROLE regress_reindexuser;