Clean up some obsolete statements about GiST indexes, and add a section

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 21 Oct 2005 01:41:28 +0000 (01:41 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 21 Oct 2005 01:41:28 +0000 (01:41 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 21 Oct 2005 01:41:28 +0000 (01:41 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 21 Oct 2005 01:41:28 +0000 (01:41 +0000)
diff --git a/doc/src/sgml/gist.sgml b/doc/src/sgml/gist.sgml

index ce6124579ddb935f097f64697331ccd19cc0db44..17b70fd564aa1a692164d3130c5f38522ee15a9b 100644 (file)
--- a/doc/src/sgml/gist.sgml
+++ b/doc/src/sgml/gist.sgml
@@ -1,11 +1,11 @@
  <!--
-$PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.21 2005/07/02 20:08:27 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.22 2005/10/21 01:41:28 tgl Exp $
  -->
  
  <chapter id="GiST">
  <title>GiST Indexes</title>
  
-<sect1 id="intro">
+<sect1 id="gist-intro">
   <title>Introduction</title>
  
   <para>
@@ -44,7 +44,7 @@ $PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.21 2005/07/02 20:08:27 momjian Exp
  
  </sect1>
  
-<sect1 id="extensibility">
+<sect1 id="gist-extensibility">
   <title>Extensibility</title>
  
   <para>
@@ -92,7 +92,7 @@ $PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.21 2005/07/02 20:08:27 momjian Exp
  
  </sect1>
  
-<sect1 id="implementation">
+<sect1 id="gist-implementation">
   <title>Implementation</title>
   
   <para>
@@ -180,19 +180,24 @@ $PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.21 2005/07/02 20:08:27 momjian Exp
  
  </sect1>
  
-<sect1 id="examples">
+<sect1 id="gist-examples">
   <title>Examples</title>
  
   <para>
-  To see example implementations of index methods implemented using
-  <acronym>GiST</acronym>, examine the following contrib modules:
+  The <productname>PostgreSQL</productname> source distribution includes
+  several examples of index methods implemented using
+  <acronym>GiST</acronym>.  The core system currently provides R-Tree
+  equivalent functionality for some of the built-in geometric datatypes
+  (see <filename>src/backend/access/gist/gistproc.c</>).  The following
+  <filename>contrib</> modules also contain <acronym>GiST</acronym>
+  operator classes: 
   </para>
   
   <variablelist>
    <varlistentry>
     <term>btree_gist</term>
     <listitem>
-    <para>B-Tree</para>
+    <para>B-Tree equivalent functionality for several datatypes</para>
     </listitem>
    </varlistentry>
  
@@ -213,26 +218,26 @@ $PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.21 2005/07/02 20:08:27 momjian Exp
    <varlistentry>
     <term>ltree</term>
     <listitem>
-    <para>Indexing for tree-like stuctures</para>
+    <para>Indexing for tree-like structures</para>
     </listitem>
    </varlistentry>
  
    <varlistentry>
-   <term>rtree_gist</term>
+   <term>pg_trgm</term>
     <listitem>
-    <para>R-Tree</para>
+    <para>Text similarity using trigram matching</para>
     </listitem>
    </varlistentry>
  
    <varlistentry>
     <term>seg</term>
     <listitem>
-    <para>Storage and indexed access for <quote>float ranges</quote></para>
+    <para>Indexing for <quote>float ranges</quote></para>
     </listitem>
    </varlistentry>
  
    <varlistentry>
-   <term>tsearch and tsearch2</term>
+   <term>tsearch2</term>
     <listitem>
      <para>Full text indexing</para>
     </listitem>
@@ -241,4 +246,33 @@ $PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.21 2005/07/02 20:08:27 momjian Exp
  
  </sect1>
  
+<sect1 id="gist-recovery">
+ <title>Crash Recovery</title>
+
+ <para>
+  Usually, replay of the WAL log is sufficient to restore the integrity
+  of a GiST index following a database crash.  However, there are some
+  corner cases in which the index state is not fully rebuilt.  The index
+  will still be functionally correct, but there may be some performance
+  degradation.  When this occurs, the index can be repaired by
+  <command>VACUUM</>ing its table, or by rebuilding the index using
+  <command>REINDEX</>.  In some cases a plain <command>VACUUM</> is
+  not sufficient, and either <command>VACUUM FULL</> or <command>REINDEX</>
+  is needed.  The need for one of these procedures is indicated by occurrence
+  of this log message during crash recovery:
+<programlisting>
+LOG:  index NNN/NNN/NNN needs VACUUM or REINDEX to finish crash recovery
+</programlisting>
+  or this log message during routine index insertions:
+<programlisting>
+LOG:  index "FOO" needs VACUUM or REINDEX to finish crash recovery
+</programlisting>
+  If a plain <command>VACUUM</> finds itself unable to complete recovery
+  fully, it will return a notice:
+<programlisting>
+NOTICE:  index "FOO" needs VACUUM FULL or REINDEX to finish crash recovery
+</programlisting>
+ </para>
+</sect1>
+
  </chapter>
diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml

index a1c1c9735b1834242d28257d1fbef1a031ffcd49..fc268389e85da71c7372f035295d2aab8a8b4d37 100644 (file)
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.52 2005/09/12 19:17:45 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.53 2005/10/21 01:41:28 tgl Exp $ -->
  
  <chapter id="indexes">
   <title id="indexes-title">Indexes</title>
@@ -206,14 +206,6 @@ CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable>
  <synopsis>
  CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> USING hash (<replaceable>column</replaceable>);
  </synopsis>
-   <note>
-    <para>
-     Testing has shown <productname>PostgreSQL</productname>'s hash
-     indexes to perform no better than B-tree indexes, and the
-     index size and build time for hash indexes is much worse. For
-     these reasons, hash index use is presently discouraged.
-    </para>
-   </note>  
    </para>
  
    <para>
@@ -226,15 +218,33 @@ CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable>
     equivalent to the R-tree operator classes, and many other GiST operator
     classes are available in the <literal>contrib</> collection or as separate
     projects.  For more information see <xref linkend="GiST">.
-   <note>
-    <para>
-     It is likely that the R-tree index type will be retired in a future
-     release, as GiST indexes appear to do everything R-trees can do with
-     similar or better performance.  Users are encouraged to migrate
-     applications that use R-tree indexes to GiST indexes.
-    </para>
-   </note>  
    </para>
+
+  <note>
+   <para>
+    Testing has shown <productname>PostgreSQL</productname>'s hash
+    indexes to perform no better than B-tree indexes, and the
+    index size and build time for hash indexes is much worse.
+    Furthermore, hash index operations are not presently WAL-logged,
+    so hash indexes may need to be rebuilt with <command>REINDEX</>
+    after a database crash.
+    For these reasons, hash index use is presently discouraged.
+   </para>
+
+   <para>
+    Similarly, R-tree indexes do not seem to have any performance
+    advantages compared to the equivalent operations of GiST indexes.
+    Like hash indexes, they are not WAL-logged and may need
+    <command>REINDEX</>ing after a database crash.
+   </para>
+
+   <para>
+    While the problems with hash indexes may be fixed eventually,
+    it is likely that the R-tree index type will be retired in a future
+    release.  Users are encouraged to migrate applications that use R-tree
+    indexes to GiST indexes.
+   </para>
+  </note>  
   </sect1>
  
  
@@ -300,9 +310,12 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor);
  
    <para>
     A multicolumn GiST index can only be used when there is a query condition
-   on its leading column.  As with B-trees, conditions on additional columns
-   restrict the entries returned by the index, but do not in themselves aid
-   the index search.
+   on its leading column.  Conditions on additional columns restrict the
+   entries returned by the index, but the condition on the first column is the
+   most important one for determining how much of the index needs to be
+   scanned.  A GiST index will be relatively ineffective if its first column
+   has only a few distinct values, even if there are many distinct values in
+   additional columns.
    </para>
  
    <para>
diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml

index 97e351b323e370c93c50404fd78671bd5a136279..ea01104d937f4cb887523fa671cc844b2d30a462 100644 (file)
--- a/doc/src/sgml/mvcc.sgml
+++ b/doc/src/sgml/mvcc.sgml
@@ -1,5 +1,5 @@
  <!--
-$PostgreSQL: pgsql/doc/src/sgml/mvcc.sgml,v 2.51 2005/06/13 02:40:05 neilc Exp $
+$PostgreSQL: pgsql/doc/src/sgml/mvcc.sgml,v 2.52 2005/10/21 01:41:28 tgl Exp $
  -->
  
   <chapter id="mvcc">
@@ -965,41 +965,41 @@ UPDATE accounts SET balance = balance - 100.00 WHERE acctnum = 22222;
      <variablelist>
       <varlistentry>
        <term>
-       B-tree indexes
+       B-tree and <acronym>GiST</acronym> indexes
        </term>
        <listitem>
         <para>
-       Short-term share/exclusive page-level locks are used for
-       read/write access. Locks are released immediately after each
-       index row is fetched or inserted.  B-tree indexes provide
-       the highest concurrency without deadlock conditions.
+        Short-term share/exclusive page-level locks are used for
+        read/write access. Locks are released immediately after each
+        index row is fetched or inserted.  These index types provide
+        the highest concurrency without deadlock conditions.
         </para>
        </listitem>
       </varlistentry>
  
       <varlistentry>
        <term>
-       <acronym>GiST</acronym> and R-tree indexes
+       Hash indexes
        </term>
        <listitem>
         <para>
-       Share/exclusive index-level locks are used for read/write access.
-       Locks are released after the command is done.
+        Share/exclusive hash-bucket-level locks are used for read/write
+        access.  Locks are released after the whole bucket is processed.
+        Bucket-level locks provide better concurrency than index-level
+        ones, but deadlock is possible since the locks are held longer
+        than one index operation.
         </para>
        </listitem>
       </varlistentry>
  
       <varlistentry>
        <term>
-       Hash indexes
+       R-tree indexes
        </term>
        <listitem>
         <para>
-       Share/exclusive hash-bucket-level locks are used for read/write
-       access.  Locks are released after the whole bucket is processed.
-       Bucket-level locks provide better concurrency than index-level
-       ones, but deadlock is possible since the locks are held longer
-       than one index operation.
+        Share/exclusive index-level locks are used for read/write access.
+        Locks are released after the entire command is done.
         </para>
        </listitem>
       </varlistentry>
@@ -1007,14 +1007,13 @@ UPDATE accounts SET balance = balance - 100.00 WHERE acctnum = 22222;
     </para>
  
     <para>
-    In short, B-tree indexes offer the best performance for concurrent
+    Currently, B-tree indexes offer the best performance for concurrent
      applications; since they also have more features than hash
      indexes, they are the recommended index type for concurrent
      applications that need to index scalar data. When dealing with
-    non-scalar data, B-trees obviously cannot be used; in that
-    situation, application developers should be aware of the
-    relatively poor concurrent performance of GiST and R-tree
-    indexes.
+    non-scalar data, B-trees are not useful, and GiST indexes should
+    be used instead.  R-tree indexes are deprecated and are likely
+    to disappear entirely in a future release.
     </para>
    </sect1>
   </chapter>
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 21 Oct 2005 01:41:28 +0000 (01:41 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 21 Oct 2005 01:41:28 +0000 (01:41 +0000)
doc/src/sgml/gist.sgml		patch \| blob \| history
doc/src/sgml/indices.sgml		patch \| blob \| history
doc/src/sgml/mvcc.sgml		patch \| blob \| history