Document evaluation-order considerations for aggregate functions.

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 14 Nov 2014 22:19:29 +0000 (17:19 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 14 Nov 2014 22:19:56 +0000 (17:19 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 14 Nov 2014 22:19:29 +0000 (17:19 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 14 Nov 2014 22:19:56 +0000 (17:19 -0500)
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml

index 3e5a2f5092d56ce03a4c6446759c6f9c58b35530..01d24a57c8b4341e89c46798ac082aff69a1b3c6 100644 (file)
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -109,9 +109,11 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
  
      <listitem>
       <para>
-      If the <literal>GROUP BY</literal> clause is specified, the
+      If the <literal>GROUP BY</literal> clause is specified,
+      or if there are aggregate function calls, the
        output is combined into groups of rows that match on one or more
-      values.  If the <literal>HAVING</literal> clause is present, it
+      values, and the results of aggregate functions are computed.
+      If the <literal>HAVING</literal> clause is present, it
        eliminates groups that do not satisfy the given condition.  (See
        <xref linkend="sql-groupby" endterm="sql-groupby-title"> and
        <xref linkend="sql-having" endterm="sql-having-title"> below.)
@@ -637,17 +639,22 @@ GROUP BY <replaceable class="parameter">expression</replaceable> [, ...]
  
     <para>
      Aggregate functions, if any are used, are computed across all rows
-    making up each group, producing a separate value for each group
-    (whereas without <literal>GROUP BY</literal>, an aggregate
-    produces a single value computed across all the selected rows).
-    The set of rows fed to the aggregate function can be further filtered by
+    making up each group, producing a separate value for each group.
+    (If there are aggregate functions but no <literal>GROUP BY</literal>
+    clause, the query is treated as having a single group comprising all
+    the selected rows.)
+    The set of rows fed to each aggregate function can be further filtered by
      attaching a <literal>FILTER</literal> clause to the aggregate function
      call; see <xref linkend="syntax-aggregates"> for more information.  When
      a <literal>FILTER</literal> clause is present, only those rows matching it
-    are included.
-    When <literal>GROUP BY</literal> is present, it is not valid for
+    are included in the input to that aggregate function.
+   </para>
+
+   <para>
+    When <literal>GROUP BY</literal> is present,
+    or any aggregate functions are present, it is not valid for
      the <command>SELECT</command> list expressions to refer to
-    ungrouped columns except within aggregate functions or if the
+    ungrouped columns except within aggregate functions or when the
      ungrouped column is functionally dependent on the grouped columns,
      since there would otherwise be more than one possible value to
      return for an ungrouped column.  A functional dependency exists if
@@ -655,6 +662,14 @@ GROUP BY <replaceable class="parameter">expression</replaceable> [, ...]
      the table containing the ungrouped column.
     </para>
  
+   <para>
+    Keep in mind that all aggregate functions are evaluated before
+    evaluating any <quote>scalar</> expressions in the <literal>HAVING</>
+    clause or <literal>SELECT</> list.  This means that, for example,
+    a <literal>CASE</> expression cannot be used to skip evaluation of
+    an aggregate function; see <xref linkend="syntax-express-eval">.
+   </para>
+
     <para>
      Currently, <literal>FOR NO KEY UPDATE</>, <literal>FOR UPDATE</>,
      <literal>FOR SHARE</> and <literal>FOR KEY SHARE</> cannot be
@@ -683,7 +698,8 @@ HAVING <replaceable class="parameter">condition</replaceable>
      created by <literal>GROUP BY</literal>.  Each column referenced in
      <replaceable class="parameter">condition</replaceable> must
      unambiguously reference a grouping column, unless the reference
-    appears within an aggregate function.
+    appears within an aggregate function or the ungrouped column is
+    functionally dependent on the grouping columns.
     </para>
  
     <para>
diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml

index 2f0680fd0bc0a27fb8456a6e5a10154bcd2ef722..399ae07075978dc4f336644ad5ca48e33ff25296 100644 (file)
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -2426,6 +2426,28 @@ SELECT ... WHERE CASE WHEN x &gt; 0 THEN y/x &gt; 1.5 ELSE false END;
      example, it would be better to sidestep the problem by writing
      <literal>y &gt; 1.5*x</> instead.)
     </para>
+
+   <para>
+    A limitation of this technique is that a <literal>CASE</> cannot
+    prevent evaluation of an aggregate expression contained within it,
+    because aggregate expressions are computed before <quote>scalar</>
+    expressions in a <literal>SELECT</> list or <literal>HAVING</> clause
+    are considered.  For example, the following query can cause a
+    division-by-zero error despite seemingly having protected against it:
+<programlisting>
+SELECT CASE WHEN min(employees) > 0
+            THEN avg(expenses / employees)
+       END
+    FROM departments;
+</programlisting>
+    The <function>min()</> and <function>avg()</> aggregates are computed
+    concurrently over all the input rows, so if any row
+    has <structfield>employees</> equal to zero, the division-by-zero error
+    will occur before there is any opportunity to test the result of
+    <function>min()</>.  Instead, use a <literal>WHERE</>
+    or <literal>FILTER</> clause to prevent problematic input rows from
+    reaching an aggregate function in the first place.
+   </para>
    </sect2>
   </sect1>
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 14 Nov 2014 22:19:29 +0000 (17:19 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 14 Nov 2014 22:19:56 +0000 (17:19 -0500)
doc/src/sgml/ref/select.sgml		patch \| blob \| history
doc/src/sgml/syntax.sgml		patch \| blob \| history