granicus.if.org Git - postgresql/blob - doc/src/sgml/ref/cluster.sgml

   1 <REFENTRY ID="SQL-CLUSTER-1">
   2  <REFMETA>
   3  <REFENTRYTITLE>
   4   CLUSTER
   5  </REFENTRYTITLE>
   6  <REFMISCINFO>SQL - Language Statements</REFMISCINFO>
   7  </REFMETA>
   8  <REFNAMEDIV>
   9  <REFNAME>
  10   CLUSTER
  11  </REFNAME>
  12  <REFPURPOSE>
  13   Gives storage clustering advice to PostgreSQL
  14  </REFPURPOSE>
  15
  16  <REFSYNOPSISDIV>
  17  <REFSYNOPSISDIVINFO>
  18   <DATE>1998-04-15</DATE>
  19  </REFSYNOPSISDIVINFO>
  20  <SYNOPSIS>
  21   CLUSTER <REPLACEABLE CLASS="PARAMETER">indexname</REPLACEABLE> ON <REPLACEABLE CLASS="PARAMETER">table</REPLACEABLE>
  22  </SYNOPSIS>
  23
  24  <REFSECT2 ID="R2-SQL-CLUSTER-1">
  25   <REFSECT2INFO>
  26   <DATE>1998-04-15</DATE>
  27   </REFSECT2INFO>
  28   <TITLE>
  29   Inputs
  30   </TITLE>
  31   <PARA>
  32   </PARA>
  33   <VARIABLELIST>
  34   <VARLISTENTRY>
  35    <TERM>
  36    <ReturnValue>
  37     <REPLACEABLE CLASS="PARAMETER">indexname</REPLACEABLE>
  38    </ReturnValue>
  39    </TERM>
  40    <LISTITEM>
  41    <PARA>
  42     The name of an index.
  43    </PARA>
  44    </LISTITEM>
  45   </VARLISTENTRY>
  46   <VARLISTENTRY>
  47    <TERM>
  48    <ReturnValue>
  49     <REPLACEABLE CLASS="PARAMETER">table</REPLACEABLE>
  50    </ReturnValue>
  51    </TERM>
  52    <LISTITEM>
  53    <PARA>
  54     The name of a table.
  55    </PARA>
  56    </LISTITEM>
  57   </VARLISTENTRY>
  58   </VARIABLELIST>
  59  </REFSECT2>
  60
  61  <REFSECT2 ID="R2-SQL-CLUSTER-2">
  62   <REFSECT2INFO>
  63   <DATE>1998-04-15</DATE>
  64   </REFSECT2INFO>
  65   <TITLE>
  66   Outputs
  67   </TITLE>
  68   <PARA>
  69   </PARA>
  70   <VARIABLELIST>
  71   <VARLISTENTRY>
  72    <TERM>
  73    </TERM>
  74    <LISTITEM>
  75    <PARA>
  76     <VARIABLELIST>
  77         <VARLISTENTRY>
  78          <TERM>
  79          <ReturnValue>CLUSTER</ReturnValue>
  80          </TERM>
  81          <LISTITEM>
  82          <PARA>
  83           The clustering was done successfully.
  84          </PARA>
  85          </LISTITEM>
  86         </VARLISTENTRY>
  87         <VARLISTENTRY>
  88          <TERM>
  89          <ReturnValue>ERROR: relation &lt;<REPLACEABLE CLASS="PARAMETER">tablerelation_number</REPLACEABLE>&gt; inherits "invoice"</ReturnValue>
  90          </TERM>
  91          <LISTITEM>
  92          <PARA>
  93           ???
  94           <comment>
  95           This is not documented anywhere. It seems not to be possible to
  96           cluster a table that is inherited.
  97           </comment>
  98          </PARA>
  99          </LISTITEM>
 100         </VARLISTENTRY>
 101         <VARLISTENTRY>
 102          <TERM>
 103          <ReturnValue>ERROR: Relation x does not exist!</ReturnValue>
 104          </TERM>
 105          <LISTITEM>
 106          <PARA>
 107           ???
 108           <comment>
 109           The relation complained of was not shown in the error message,
 110           which contained a random string instead of the relation name.
 111           </comment>
 112          </PARA>
 113          </LISTITEM>
 114         </VARLISTENTRY>
 115     </variablelist>
 116    </LISTITEM>
 117   </VARLISTENTRY>
 118   </VARIABLELIST>
 119
 120  </REFSECT2>
 121  </REFSYNOPSISDIV>
 122
 123  <REFSECT1 ID="R1-SQL-CLUSTER-1">
 124  <REFSECT1INFO>
 125   <DATE>1998-04-15</DATE>
 126  </REFSECT1INFO>
 127  <TITLE>
 128   Description
 129  </TITLE>
 130  <PARA>
 131   This command instructs PostgreSQL to cluster the class specified
 132   by <replaceable class="parameter">classname</replaceable> approximately
 133   based on the index specified by
 134   <replaceable class="parameter">indexname</replaceable>. The index must
 135   already have been defined on <replaceable class="parameter">classname</replaceable>.
 136  </PARA>
 137  <para>
 138   When a class is clustered, it is physically reordered
 139   based on the index information. The clustering is static.
 140   In other words, as the class is updated, the changes are
 141   not clustered. No attempt is made to keep new instances or
 142   updated tuples clustered.  If he wishes, the user can
 143   recluster manually by issuing the command again.
 144  </para>
 145
 146  <para>
 147   The table is actually copied to a temporary table in index
 148   order, then renamed back to the original name.  For this
 149   reason, all grant permissions and other indexes are lost
 150   when clustering is performed.
 151  </para>
 152
 153  <para>
 154   In cases where you are accessing single rows randomly
 155   within a table, the actual order of the data in the heap
 156   table is unimportant. However, if you tend to access some
 157   data more than others, and there is an index that groups
 158   them together, you will benefit from using the CLUSTER
 159   command.
 160  </para>
 161
 162  <para>
 163   Another place CLUSTER is good is in cases where you use an
 164   index to pull out several rows from a table. If you are
 165   requesting a range of indexed values from a table, or a
 166   single indexed value that has multiple rows that match,
 167   CLUSTER will help because once the index identifies the
 168   heap page for the first row that matches, all other rows
 169   that match are probably already on the same heap page,
 170   saving disk accesses and speeding up the query.
 171  </para>
 172
 173  <para>
 174   There are two ways to cluster data. The first is with the
 175   CLUSTER command, which reorders the original table with
 176   the ordering of the index you specify. This can be slow
 177   on large tables because the rows are fetched from the heap
 178   in index order, and if the heap table is unordered, the
 179   entries are on random pages, so there is one disk page
 180   retrieved for every row moved. PostgreSQL has a cache,
 181   but the majority of a big table will not fit in the cache.
 182  </para>
 183
 184  <para>
 185   Another way is to use
 186   <programlisting>SELECT ... INTO TABLE temp FROM ... ORDER BY ...</programlisting>
 187   This uses the PostgreSQL sorting code in
 188   ORDER BY to match the index, and is much faster for
 189   unordered data. You then drop the old table, use
 190 <programlisting>ALTER TABLE RENAME</programlisting>
 191  to rename 'temp' to the old name, and
 192   recreate the b bindexes. The only problem is that oids
 193   will not be preserved. From then on, CLUSTER should be
 194   fast because most of the heap data has already been
 195   ordered, and the existing index is used.
 196  </para>
 197
 198
 199  <REFSECT1 ID="R1-SQL-CLUSTER-2">
 200  <TITLE>
 201   Usage
 202  </TITLE>
 203  <PARA>
 204   Cluster the employees relation on the basis of its salary attribute
 205  </PARA>
 206  <ProgramListing>
 207   CLUSTER emp_ind ON emp
 208  </ProgramListing>
 209  </REFSECT1>
 210
 211  <REFSECT1 ID="R1-SQL-CLUSTER-3">
 212  <TITLE>
 213   Compatibility
 214  </TITLE>
 215  <PARA>
 216  </PARA>
 217
 218  <REFSECT2 ID="R2-SQL-CLUSTER-4">
 219   <REFSECT2INFO>
 220   <DATE>1998-04-15</DATE>
 221   </REFSECT2INFO>
 222   <TITLE>
 223   SQL92
 224   </TITLE>
 225   <PARA>
 226   There is no CLUSTER statement in SQL92.
 227   </PARA>
 228  </refsect2>
 229  </refsect1>
 230 </REFENTRY>
 231
 232
 233 <!-- Keep this comment at the end of the file
 234 Local variables:
 235 mode: sgml
 236 sgml-omittag:t
 237 sgml-shorttag:t
 238 sgml-minimize-attributes:nil
 239 sgml-always-quote-attributes:t
 240 sgml-indent-step:1
 241 sgml-indent-data:t
 242 sgml-parent-document:nil
 243 sgml-default-dtd-file:"../reference.ced"
 244 sgml-exposed-tags:nil
 245 sgml-local-catalogs:"/usr/lib/sgml/catalog"
 246 sgml-local-ecat-files:nil
 247 End:
 248 -->