From e6dd664d0dd362235fd0af31360ecbf29434c4c1 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 8 May 2016 16:36:19 -0400
Subject: [PATCH] Docs: create some user-facing documentation about index-only
 scans.

We didn't have any real user documentation about how index-only scans
work or how to design indexes to exploit them.  Remedy that.
Per gripe from David Johnston.
---
 doc/src/sgml/config.sgml      |   6 +-
 doc/src/sgml/indexam.sgml     |  18 +--
 doc/src/sgml/indices.sgml     | 201 ++++++++++++++++++++++++++++++++--
 doc/src/sgml/maintenance.sgml |  13 ++-
 doc/src/sgml/storage.sgml     |   6 +-
 5 files changed, 217 insertions(+), 27 deletions(-)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 3d6baadaff..04040ff258 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3406,9 +3406,6 @@ include_dir 'conf.d'
 
      <varlistentry id="guc-enable-indexonlyscan" xreflabel="enable_indexonlyscan">
       <term><varname>enable_indexonlyscan</varname> (<type>boolean</type>)
-      <indexterm>
-       <primary>index-only scan</primary>
-      </indexterm>
       <indexterm>
        <primary><varname>enable_indexonlyscan</> configuration parameter</primary>
       </indexterm>
@@ -3416,7 +3413,8 @@ include_dir 'conf.d'
       <listitem>
        <para>
         Enables or disables the query planner's use of index-only-scan plan
-        types. The default is <literal>on</>.
+        types (see <xref linkend="indexes-index-only-scans">).
+        The default is <literal>on</>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index 69edeeabbb..fa4842b73f 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -354,10 +354,11 @@ amvacuumcleanup (IndexVacuumInfo *info,
 bool
 amcanreturn (Relation indexRelation, int attno);
 </programlisting>
-   Check whether the index can support <firstterm>index-only scans</> on the
-   given column, by returning the indexed column values for an index entry in
-   the form of an <structname>IndexTuple</structname>.  The attribute number
-   is 1-based, i.e. the first columns attno is 1. Returns TRUE if supported,
+   Check whether the index can support <link
+   linkend="indexes-index-only-scans"><firstterm>index-only scans</></link> on
+   the given column, by returning the indexed column values for an index entry
+   in the form of an <structname>IndexTuple</structname>.  The attribute number
+   is 1-based, i.e. the first column's attno is 1. Returns TRUE if supported,
    else FALSE.  If the access method does not support index-only scans at all,
    the <structfield>amcanreturn</> field in its <structname>IndexAmRoutine</>
    struct can be set to NULL.
@@ -489,8 +490,8 @@ amgettuple (IndexScanDesc scan,
   </para>
 
   <para>
-   If the index supports index-only scans (i.e.,
-   <function>amcanreturn</function> returns TRUE for it),
+   If the index supports <link linkend="indexes-index-only-scans">index-only
+   scans</link> (i.e., <function>amcanreturn</function> returns TRUE for it),
    then on success the AM must also check
    <literal>scan-&gt;xs_want_itup</>, and if that is true it must return
    the original indexed data for the index entry, in the form of an
@@ -700,9 +701,10 @@ amrestrpos (IndexScanDesc scan);
 
   <para>
    If the index stores the original indexed data values (and not some lossy
-   representation of them), it is useful to support index-only scans, in
+   representation of them), it is useful to
+   support <link linkend="indexes-index-only-scans">index-only scans</link>, in
    which the index returns the actual data not just the TID of the heap tuple.
-   This will only work if the visibility map shows that the TID is on an
+   This will only avoid I/O if the visibility map shows that the TID is on an
    all-visible page; else the heap tuple must be visited anyway to check
    MVCC visibility.  But that is no concern of the access method's.
   </para>
diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml
index 5f72e7d073..49d3286400 100644
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@@ -302,9 +302,16 @@ SELECT * FROM places ORDER BY location <-> point '(101,456)' LIMIT 10;
     <primary>GIN</primary>
     <see>index</see>
    </indexterm>
-   GIN indexes are inverted indexes which can handle values that contain more
-   than one key, arrays for example. Like GiST and SP-GiST, GIN can support
-   many different user-defined indexing strategies and the particular
+   GIN indexes are <quote>inverted indexes</> which are appropriate for
+   data values that contain multiple component values, such as arrays.  An
+   inverted index contains a separate entry for each component value, and
+   can efficiently handle queries that test for the presence of specific
+   component values.
+  </para>
+
+  <para>
+   Like GiST and SP-GiST, GIN can support
+   many different user-defined indexing strategies, and the particular
    operators with which a GIN index can be used vary depending on the
    indexing strategy.
    As an example, the standard distribution of
@@ -337,16 +344,16 @@ SELECT * FROM places ORDER BY location <-> point '(101,456)' LIMIT 10;
     <primary>BRIN</primary>
     <see>index</see>
    </indexterm>
-   BRIN indexes (a shorthand for Block Range indexes)
-   store summaries about the values stored in consecutive table physical block ranges.
+   BRIN indexes (a shorthand for Block Range INdexes) store summaries about
+   the values stored in consecutive physical block ranges of a table.
    Like GiST, SP-GiST and GIN,
    BRIN can support many different indexing strategies,
    and the particular operators with which a BRIN index can be used
    vary depending on the indexing strategy.
    For data types that have a linear sort order, the indexed data
    corresponds to the minimum and maximum values of the
-   values in the column for each block range,
-   which support indexed queries using these operators:
+   values in the column for each block range.  This supports indexed queries
+   using these operators:
 
    <simplelist>
     <member><literal>&lt;</literal></member>
@@ -460,7 +467,8 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor);
    an index on a single column is sufficient and saves space and time.
    Indexes with more than three columns are unlikely to be helpful
    unless the usage of the table is extremely stylized.  See also
-   <xref linkend="indexes-bitmap-scans"> for some discussion of the
+   <xref linkend="indexes-bitmap-scans"> and
+   <xref linkend="indexes-index-only-scans"> for some discussion of the
    merits of different index configurations.
   </para>
  </sect1>
@@ -1140,6 +1148,183 @@ CREATE INDEX test1c_content_y_index ON test1c (content COLLATE "y");
  </sect1>
 
 
+ <sect1 id="indexes-index-only-scans">
+  <title>Index-Only Scans</title>
+
+  <indexterm zone="indexes-index-only-scans">
+   <primary>index</primary>
+   <secondary>index-only scans</secondary>
+  </indexterm>
+  <indexterm zone="indexes-index-only-scans">
+   <primary>index-only scan</primary>
+  </indexterm>
+
+  <para>
+   All indexes in <productname>PostgreSQL</> are <firstterm>secondary</>
+   indexes, meaning that each index is stored separately from the table's
+   main data area (which is called the table's <firstterm>heap</>
+   in <productname>PostgreSQL</> terminology).  This means that in an
+   ordinary index scan, each row retrieval requires fetching data from both
+   the index and the heap.  Furthermore, while the index entries that match a
+   given indexable <literal>WHERE</> condition are usually close together in
+   the index, the table rows they reference might be anywhere in the heap.
+   The heap-access portion of an index scan thus involves a lot of random
+   access into the heap, which can be slow, particularly on traditional
+   rotating media.  (As described in <xref linkend="indexes-bitmap-scans">,
+   bitmap scans try to alleviate this cost by doing the heap accesses in
+   sorted order, but that only goes so far.)
+  </para>
+
+  <para>
+   To solve this performance problem, <productname>PostgreSQL</>
+   supports <firstterm>index-only scans</>, which can answer queries from an
+   index alone without any heap access.  The basic idea is to return values
+   directly out of each index entry instead of consulting the associated heap
+   entry.  There are two fundamental restrictions on when this method can be
+   used:
+
+   <orderedlist>
+    <listitem>
+     <para>
+      The index type must support index-only scans.  B-tree indexes always
+      do.  GiST and SP-GiST indexes support index-only scans for some
+      operator classes but not others.  Other index types have no support.
+      The underlying requirement is that the index must physically store, or
+      else be able to reconstruct, the original data value for each index
+      entry.  As a counterexample, GIN indexes cannot support index-only
+      scans because each index entry typically holds only part of the
+      original data value.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      The query must reference only columns stored in the index.  For
+      example, given an index on columns <literal>x</> and <literal>y</> of a
+      table that also has a column <literal>z</>, these queries could use
+      index-only scans:
+<programlisting>
+SELECT x, y FROM tab WHERE x = 'key';
+SELECT x FROM tab WHERE x = 'key' AND y &lt; 42;
+</programlisting>
+      but these queries could not:
+<programlisting>
+SELECT x, z FROM tab WHERE x = 'key';
+SELECT x FROM tab WHERE x = 'key' AND z &lt; 42;
+</programlisting>
+      (Expression indexes and partial indexes complicate this rule,
+      as discussed below.)
+     </para>
+    </listitem>
+   </orderedlist>
+  </para>
+
+  <para>
+   If these two fundamental requirements are met, then all the data values
+   required by the query are available from the index, so an index-only scan
+   is physically possible.  But there is an additional requirement for any
+   table scan in <productname>PostgreSQL</>: it must verify that each
+   retrieved row be <quote>visible</> to the query's MVCC snapshot, as
+   discussed in <xref linkend="mvcc">.  Visibility information is not stored
+   in index entries, only in heap entries; so at first glance it would seem
+   that every row retrieval would require a heap access anyway.  And this is
+   indeed the case, if the table row has been modified recently.  However,
+   for seldom-changing data there is a way around this
+   problem.  <productname>PostgreSQL</> tracks, for each page in a table's
+   heap, whether all rows stored in that page are old enough to be visible to
+   all current and future transactions.  This information is stored in a bit
+   in the table's <firstterm>visibility map</>.  An index-only scan, after
+   finding a candidate index entry, checks the visibility map bit for the
+   corresponding heap page.  If it's set, the row is known visible and so the
+   data can be returned with no further work.  If it's not set, the heap
+   entry must be visited to find out whether it's visible, so no performance
+   advantage is gained over a standard index scan.  Even in the successful
+   case, this approach trades visibility map accesses for heap accesses; but
+   since the visibility map is four orders of magnitude smaller than the heap
+   it describes, far less physical I/O is needed to access it.  In most
+   situations the visibility map remains cached in memory all the time.
+  </para>
+
+  <para>
+   In short, while an index-only scan is possible given the two fundamental
+   requirements, it will be a win only if a significant fraction of the
+   table's heap pages have their all-visible map bits set.  But tables in
+   which a large fraction of the rows are unchanging are common enough to
+   make this type of scan very useful in practice.
+  </para>
+
+  <para>
+   To make effective use of the index-only scan feature, you might choose to
+   create indexes in which only the leading columns are meant to
+   match <literal>WHERE</> clauses, while the trailing columns
+   hold <quote>payload</> data to be returned by a query.  For example, if
+   you commonly run queries like
+<programlisting>
+SELECT y FROM tab WHERE x = 'key';
+</programlisting>
+   the traditional approach to speeding up such queries would be to create an
+   index on <literal>x</> only.  However, an index on <literal>(x, y)</>
+   would offer the possibility of implementing this query as an index-only
+   scan.  As previously discussed, such an index would be larger and hence
+   more expensive than an index on <literal>x</> alone, so this is attractive
+   only if the table is known to be mostly static.  Note it's important that
+   the index be declared on <literal>(x, y)</> not <literal>(y, x)</>, as for
+   most index types (particularly B-trees) searches that do not constrain the
+   leading index columns are not very efficient.
+  </para>
+
+  <para>
+   In principle, index-only scans can be used with expression indexes.
+   For example, given an index on <literal>f(x)</> where <literal>x</> is a
+   table column, it should be possible to execute
+<programlisting>
+SELECT f(x) FROM tab WHERE f(x) &lt; 1;
+</programlisting>
+   as an index-only scan; and this is very attractive if <literal>f()</> is
+   an expensive-to-compute function.  However, <productname>PostgreSQL</>'s
+   planner is currently not very smart about such cases.  It considers a
+   query to be potentially executable by index-only scan only when
+   all <emphasis>columns</> needed by the query are available from the index.
+   In this example, <literal>x</> is not needed except in the
+   context <literal>f(x)</>, but the planner does not notice that and
+   concludes that an index-only scan is not possible.  If an index-only scan
+   seems sufficiently worthwhile, this can be worked around by declaring the
+   index to be on <literal>(f(x), x)</>, where the second column is not
+   expected to be used in practice but is just there to convince the planner
+   that an index-only scan is possible.  An additional caveat, if the goal is
+   to avoid recalculating <literal>f(x)</>, is that the planner won't
+   necessarily match uses of <literal>f(x)</> that aren't in
+   indexable <literal>WHERE</> clauses to the index column.  It will usually
+   get this right in simple queries such as shown above, but not in queries
+   that involve joins.  These deficiencies may be remedied in future versions
+   of <productname>PostgreSQL</>.
+  </para>
+
+  <para>
+   Partial indexes also have interesting interactions with index-only scans.
+   Consider the partial index shown in <xref linkend="indexes-partial-ex3">:
+<programlisting>
+CREATE UNIQUE INDEX tests_success_constraint ON tests (subject, target)
+    WHERE success;
+</programlisting>
+   In principle, we could do an index-only scan on this index to satisfy a
+   query like
+<programlisting>
+SELECT target FROM tests WHERE subject = 'some-subject' AND success;
+</programlisting>
+   But there's a problem: the <literal>WHERE</> clause refers
+   to <literal>success</> which is not available as a result column of the
+   index.  Nonetheless, an index-only scan is possible because the plan does
+   not need to recheck that part of the <literal>WHERE</> clause at runtime:
+   all entries found in the index necessarily have <literal>success = true</>
+   so this need not be explicitly checked in the
+   plan.  <productname>PostgreSQL</> versions 9.6 and later will recognize
+   such cases and allow index-only scans to be generated, but older versions
+   will not.
+  </para>
+ </sect1>
+
+
  <sect1 id="indexes-examine">
   <title>Examining Index Usage</title>
 
diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml
index 521ff56d49..74a6cc0ed7 100644
--- a/doc/src/sgml/maintenance.sgml
+++ b/doc/src/sgml/maintenance.sgml
@@ -102,8 +102,9 @@
      </listitem>
 
      <listitem>
-      <simpara>To update the visibility map, which speeds up index-only
-      scans.</simpara>
+      <simpara>To update the visibility map, which speeds
+      up <link linkend="indexes-index-only-scans">index-only
+      scans</link>.</simpara>
      </listitem>
 
      <listitem>
@@ -363,9 +364,11 @@
     Since <productname>PostgreSQL</productname> indexes don't contain tuple
     visibility information, a normal index scan fetches the heap tuple for each
     matching index entry, to check whether it should be seen by the current
-    transaction.  An <firstterm>index-only scan</>, on the other hand, checks
-    the visibility map first.  If it's known that all tuples on the page are
-    visible, the heap fetch can be skipped.  This is most noticeable on
+    transaction.
+    An <link linkend="indexes-index-only-scans"><firstterm>index-only
+    scan</></link>, on the other hand, checks the visibility map first.
+    If it's known that all tuples on the page are
+    visible, the heap fetch can be skipped.  This is most useful on
     large data sets where the visibility map can prevent disk accesses.
     The visibility map is vastly smaller than the heap, so it can easily be
     cached even when the heap is very large.
diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml
index 9b2e09e385..1b812bd0a9 100644
--- a/doc/src/sgml/storage.sgml
+++ b/doc/src/sgml/storage.sgml
@@ -636,9 +636,11 @@ Note that indexes do not have VMs.
 The visibility map stores two bits per heap page.  The first bit, if set,
 indicates that the page is all-visible, or in other words that the page does
 not contain any tuples that need to be vacuumed.
-This information can also be used by <firstterm>index-only scans</> to answer
-queries using only the index tuple.
+This information can also be used
+by <link linkend="indexes-index-only-scans"><firstterm>index-only
+scans</></link> to answer queries using only the index tuple.
 The second bit, if set, means that all tuples on the page have been frozen.
+That means that even an anti-wraparound vacuum need not revisit the page.
 </para>
 
 <para>
-- 
2.40.0