granicus.if.org Git - postgresql/blob - doc/src/sgml/intagg.sgml

   1 <!-- $PostgreSQL: pgsql/doc/src/sgml/intagg.sgml,v 1.3 2007/12/10 05:32:51 tgl Exp $ -->
   2
   3 <sect1 id="intagg">
   4  <title>intagg</title>
   5
   6  <indexterm zone="intagg">
   7   <primary>intagg</primary>
   8  </indexterm>
   9
  10  <para>
  11   The <filename>intagg</filename> module provides an integer aggregator and an
  12   enumerator.
  13  </para>
  14
  15  <sect2>
  16   <title>Functions</title>
  17
  18  <para>
  19   The aggregator is an aggregate function
  20   <function>int_array_aggregate(integer)</>
  21   that produces an integer array
  22   containing exactly the integers it is fed.
  23   Here is a not-tremendously-useful example:
  24  </para>
  25
  26  <programlisting>
  27 test=# select int_array_aggregate(i) from
  28 test-#   generate_series(1,10,2) i;
  29  int_array_aggregate
  30 ---------------------
  31  {1,3,5,7,9}
  32 (1 row)
  33  </programlisting>
  34
  35  <para>
  36   The enumerator is a function
  37   <function>int_array_enum(integer[])</>
  38   that returns <type>setof integer</>.  It is essentially the reverse
  39   operation of the aggregator: given an array of integers, expand it
  40   into a set of rows.  For example,
  41  </para>
  42
  43  <programlisting>
  44 test=# select * from int_array_enum(array[1,3,5,7,9]);
  45  int_array_enum
  46 ----------------
  47               1
  48               3
  49               5
  50               7
  51               9
  52 (5 rows)
  53  </programlisting>
  54
  55  </sect2>
  56
  57  <sect2>
  58   <title>Sample Uses</title>
  59
  60   <para>
  61    Many database systems have the notion of a one to many table. Such a table
  62    usually sits between two indexed tables, for example:
  63   </para>
  64
  65  <programlisting>
  66 CREATE TABLE left (id INT PRIMARY KEY, ...);
  67 CREATE TABLE right (id INT PRIMARY KEY, ...);
  68 CREATE TABLE one_to_many(left INT REFERENCES left, right INT REFERENCES right);
  69  </programlisting>
  70
  71  <para>
  72   It is typically used like this:
  73  </para>
  74
  75  <programlisting>
  76   SELECT right.* from right JOIN one_to_many ON (right.id = one_to_many.right)
  77     WHERE one_to_many.left = <replaceable>item</>;
  78  </programlisting>
  79
  80  <para>
  81   This will return all the items in the right hand table for an entry
  82   in the left hand table. This is a very common construct in SQL.
  83  </para>
  84
  85  <para>
  86   Now, this methodology can be cumbersome with a very large number of
  87   entries in the <structname>one_to_many</> table.  Often,
  88   a join like this would result in an index scan
  89   and a fetch for each right hand entry in the table for a particular
  90   left hand entry. If you have a very dynamic system, there is not much you
  91   can do. However, if you have some data which is fairly static, you can
  92   create a summary table with the aggregator.
  93  </para>
  94
  95  <programlisting>
  96 CREATE TABLE summary as
  97   SELECT left, int_array_aggregate(right) AS right
  98   FROM one_to_many
  99   GROUP BY left;
 100  </programlisting>
 101
 102  <para>
 103   This will create a table with one row per left item, and an array
 104   of right items. Now this is pretty useless without some way of using
 105   the array; that's why there is an array enumerator.  You can do
 106  </para>
 107
 108  <programlisting>
 109 SELECT left, int_array_enum(right) FROM summary WHERE left = <replaceable>item</>;
 110  </programlisting>
 111
 112  <para>
 113   The above query using <function>int_array_enum</> produces the same results
 114   as
 115  </para>
 116
 117  <programlisting>
 118 SELECT left, right FROM one_to_many WHERE left = <replaceable>item</>;
 119  </programlisting>
 120
 121  <para>
 122   The difference is that the query against the summary table has to get
 123   only one row from the table, whereas the direct query against
 124   <structname>one_to_many</> must index scan and fetch a row for each entry.
 125  </para>
 126
 127  <para>
 128   On one system, an <command>EXPLAIN</> showed a query with a cost of 8488 was
 129   reduced to a cost of 329.  The original query was a join involving the
 130   <structname>one_to_many</> table, which was replaced by:
 131  </para>
 132
 133  <programlisting>
 134 SELECT right, count(right) FROM
 135   ( SELECT left, int_array_enum(right) AS right
 136     FROM summary JOIN (SELECT left FROM left_table WHERE left = <replaceable>item</>) AS lefts
 137          ON (summary.left = lefts.left)
 138   ) AS list
 139   GROUP BY right
 140   ORDER BY count DESC;
 141  </programlisting>
 142
 143  </sect2>
 144
 145 </sect1>