SQL2003-standard statistical aggregates, by Sergey Koposov. I've added only

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 28 Jul 2006 18:33:04 +0000 (18:33 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 28 Jul 2006 18:33:04 +0000 (18:33 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 28 Jul 2006 18:33:04 +0000 (18:33 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 28 Jul 2006 18:33:04 +0000 (18:33 +0000)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml

index c9b474e0893e5879bad088b14e096e2851801d28..678b1cbeca8729179cb3377b7475137a8344de6a 100644 (file)
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.327 2006/07/16 23:59:58 neilc Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.328 2006/07/28 18:33:03 tgl Exp $ -->
  
   <chapter id="functions">
    <title>Functions and Operators</title>
@@ -7785,16 +7785,18 @@ SELECT NULLIF(value, '(none)') ...
  
    <para>
     <firstterm>Aggregate functions</firstterm> compute a single result
-   value from a set of input values.  <xref
-   linkend="functions-aggregate-table"> shows the built-in aggregate
-   functions.  The special syntax considerations for aggregate
+   value from a set of input values.  The built-in aggregate functions
+   are listed in
+   <xref linkend="functions-aggregate-table"> and
+   <xref linkend="functions-aggregate-statistics-table">.
+   The special syntax considerations for aggregate
     functions are explained in <xref linkend="syntax-aggregates">.
     Consult <xref linkend="tutorial-agg"> for additional introductory
     information.
    </para>
  
    <table id="functions-aggregate-table">
-   <title>Aggregate Functions</title>
+   <title>General-Purpose Aggregate Functions</title>
  
     <tgroup cols="4">
      <thead>
@@ -7897,7 +7899,7 @@ SELECT NULLIF(value, '(none)') ...
        <entry><function>count(*)</function></entry>
        <entry></entry>
        <entry><type>bigint</type></entry>
-      <entry>number of input values</entry>
+      <entry>number of input rows</entry>
       </row>
  
       <row>
@@ -7905,7 +7907,7 @@ SELECT NULLIF(value, '(none)') ...
        <entry>any</entry>
        <entry><type>bigint</type></entry>
        <entry>
-       number of input values for which the value of <replaceable
+       number of input rows for which the value of <replaceable
         class="parameter">expression</replaceable> is not null
        </entry>
       </row>
@@ -7948,6 +7950,333 @@ SELECT NULLIF(value, '(none)') ...
        </entry>
       </row>
  
+     <row>
+      <entry><function>sum(<replaceable class="parameter">expression</replaceable>)</function></entry>
+      <entry>
+       <type>smallint</type>, <type>int</type>,
+       <type>bigint</type>, <type>real</type>, <type>double
+       precision</type>, <type>numeric</type>, or
+       <type>interval</type>
+      </entry>
+      <entry>
+       <type>bigint</type> for <type>smallint</type> or
+       <type>int</type> arguments, <type>numeric</type> for
+       <type>bigint</type> arguments, <type>double precision</type>
+       for floating-point arguments, otherwise the same as the
+       argument data type
+      </entry>
+      <entry>sum of <replaceable class="parameter">expression</replaceable> across all input values</entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   It should be noted that except for <function>count</function>,
+   these functions return a null value when no rows are selected.  In
+   particular, <function>sum</function> of no rows returns null, not
+   zero as one might expect.  The <function>coalesce</function> function may be
+   used to substitute zero for null when necessary.
+  </para>
+
+  <note>
+    <indexterm>
+      <primary>ANY</primary>
+    </indexterm>
+    <indexterm>
+      <primary>SOME</primary>
+    </indexterm>
+    <para>
+      Boolean aggregates <function>bool_and</function> and 
+      <function>bool_or</function> correspond to standard SQL aggregates
+      <function>every</function> and <function>any</function> or
+      <function>some</function>. 
+      As for <function>any</function> and <function>some</function>, 
+      it seems that there is an ambiguity built into the standard syntax:
+<programlisting>
+SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...;
+</programlisting>
+      Here <function>ANY</function> can be considered both as leading
+      to a subquery or as an aggregate if the select expression returns 1 row.
+      Thus the standard name cannot be given to these aggregates.
+    </para>
+  </note>
+
+  <note>
+   <para>
+    Users accustomed to working with other SQL database management
+    systems may be surprised by the performance of the
+    <function>count</function> aggregate when it is applied to the
+    entire table. A query like:
+<programlisting>
+SELECT count(*) FROM sometable;
+</programlisting>
+    will be executed by <productname>PostgreSQL</productname> using a
+    sequential scan of the entire table.
+   </para>
+  </note>
+
+
+  <para>
+   <xref linkend="functions-aggregate-statistics-table"> shows
+   aggregate functions typically used in statistical analysis.
+   (These are separated out merely to avoid cluttering the listing
+   of more-commonly-used aggregates.)  Where the description mentions
+   <replaceable class="parameter">N</replaceable>, it means the
+   number of input rows for which all the input expressions are non-null.
+   In all cases, null is returned if the computation is meaningless,
+   for example when <replaceable class="parameter">N</replaceable> is zero.
+  </para>
+
+  <indexterm>
+   <primary>statistics</primary>
+  </indexterm>
+  <indexterm>
+   <primary>linear regression</primary>
+  </indexterm>
+
+  <table id="functions-aggregate-statistics-table">
+   <title>Aggregate Functions for Statistics</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Function</entry>
+      <entry>Argument Type</entry>
+      <entry>Return Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>correlation</primary>
+       </indexterm>
+       <function>corr(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sqrt((<replaceable class="parameter">N</replaceable> *
+      sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>))^2 / ((<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2) * (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">Y</replaceable>^2) - sum(<replaceable
+      class="parameter">Y</replaceable>)^2)))</entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>covariance</primary>
+        <secondary>population</secondary>
+       </indexterm>
+       <function>covar_pop(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>) / <replaceable
+      class="parameter">N</replaceable>) / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>covariance</primary>
+        <secondary>sample</secondary>
+       </indexterm>
+       <function>covar_samp(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>) / <replaceable
+      class="parameter">N</replaceable>) / (<replaceable
+      class="parameter">N</replaceable> - 1)</entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_avgx(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">X</replaceable>) /
+      <replaceable class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_avgy(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">Y</replaceable>) /
+      <replaceable class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_count(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>bigint</type>
+      </entry>
+      <entry>number of input rows in which both expressions are non-null</entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>regression intercept</primary>
+       </indexterm>
+       <function>regr_intercept(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(sum(<replaceable class="parameter">Y</replaceable>) *
+      sum(<replaceable class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>)) / (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2)</entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_r2(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(<replaceable class="parameter">N</replaceable> *
+      sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>))^2 / ((<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2) * (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">Y</replaceable>^2) - sum(<replaceable
+      class="parameter">Y</replaceable>)^2))</entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>regression slope</primary>
+       </indexterm>
+       <function>regr_slope(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(<replaceable class="parameter">N</replaceable> *
+      sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>)) / (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2)</entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_sxx(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">X</replaceable>^2) -
+      sum(<replaceable class="parameter">X</replaceable>)^2 / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_sxy(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>) / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_syy(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">Y</replaceable>^2) -
+      sum(<replaceable class="parameter">Y</replaceable>)^2 / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
       <row>
        <entry>
         <indexterm>
@@ -8007,24 +8336,6 @@ SELECT NULLIF(value, '(none)') ...
        <entry>sample standard deviation of the input values</entry>
       </row>
  
-     <row>
-      <entry><function>sum(<replaceable class="parameter">expression</replaceable>)</function></entry>
-      <entry>
-       <type>smallint</type>, <type>int</type>,
-       <type>bigint</type>, <type>real</type>, <type>double
-       precision</type>, <type>numeric</type>, or
-       <type>interval</type>
-      </entry>
-      <entry>
-       <type>bigint</type> for <type>smallint</type> or
-       <type>int</type> arguments, <type>numeric</type> for
-       <type>bigint</type> arguments, <type>double precision</type>
-       for floating-point arguments, otherwise the same as the
-       argument data type
-      </entry>
-      <entry>sum of <replaceable class="parameter">expression</replaceable> across all input values</entry>
-     </row>
-
       <row>
        <entry>
         <indexterm>
@@ -8087,50 +8398,6 @@ SELECT NULLIF(value, '(none)') ...
     </tgroup>
    </table>
  
-  <para>
-   It should be noted that except for <function>count</function>,
-   these functions return a null value when no rows are selected.  In
-   particular, <function>sum</function> of no rows returns null, not
-   zero as one might expect.  The <function>coalesce</function> function may be
-   used to substitute zero for null when necessary.
-  </para>
-
-  <note>
-    <indexterm>
-      <primary>ANY</primary>
-    </indexterm>
-    <indexterm>
-      <primary>SOME</primary>
-    </indexterm>
-    <para>
-      Boolean aggregates <function>bool_and</function> and 
-      <function>bool_or</function> correspond to standard SQL aggregates
-      <function>every</function> and <function>any</function> or
-      <function>some</function>. 
-      As for <function>any</function> and <function>some</function>, 
-      it seems that there is an ambiguity built into the standard syntax:
-<programlisting>
-SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...;
-</programlisting>
-      Here <function>ANY</function> can be considered both as leading
-      to a subquery or as an aggregate if the select expression returns 1 row.
-      Thus the standard name cannot be given to these aggregates.
-    </para>
-  </note>
-
-  <note>
-   <para>
-    Users accustomed to working with other SQL database management
-    systems may be surprised by the performance of the
-    <function>count</function> aggregate when it is applied to the
-    entire table. A query like:
-<programlisting>
-SELECT count(*) FROM sometable;
-</programlisting>
-    will be executed by <productname>PostgreSQL</productname> using a
-    sequential scan of the entire table.
-   </para>
-  </note>
   </sect1>
  
  
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c

index 55e79e85ed81fe1e46840d7873f926209a1bd06b..1f8d081c8bf31b443210ed13bac8bc125046191c 100644 (file)
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/float.c,v 1.127 2006/07/14 14:52:24 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/float.c,v 1.128 2006/07/28 18:33:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1878,18 +1878,18 @@ setseed(PG_FUNCTION_ARGS)
   */
  
  static float8 *
-check_float8_array(ArrayType *transarray, const char *caller)
+check_float8_array(ArrayType *transarray, const char *caller, int n)
  {
         /*
-        * We expect the input to be a 3-element float array; verify that. We
+        * We expect the input to be an N-element float array; verify that. We
          * don't need to use deconstruct_array() since the array data is just
-        * going to look like a C array of 3 float8 values.
+        * going to look like a C array of N float8 values.
          */
         if (ARR_NDIM(transarray) != 1 ||
-               ARR_DIMS(transarray)[0] != 3 ||
+               ARR_DIMS(transarray)[0] != n ||
                 ARR_HASNULL(transarray) ||
                 ARR_ELEMTYPE(transarray) != FLOAT8OID)
-               elog(ERROR, "%s: expected 3-element float8 array", caller);
+               elog(ERROR, "%s: expected %d-element float8 array", caller, n);
         return (float8 *) ARR_DATA_PTR(transarray);
  }
  
@@ -1903,7 +1903,7 @@ float8_accum(PG_FUNCTION_ARGS)
                                 sumX,
                                 sumX2;
  
-       transvalues = check_float8_array(transarray, "float8_accum");
+       transvalues = check_float8_array(transarray, "float8_accum", 3);
         N = transvalues[0];
         sumX = transvalues[1];
         sumX2 = transvalues[2];
@@ -1953,7 +1953,7 @@ float4_accum(PG_FUNCTION_ARGS)
                                 sumX2,
                                 newval;
  
-       transvalues = check_float8_array(transarray, "float4_accum");
+       transvalues = check_float8_array(transarray, "float4_accum", 3);
         N = transvalues[0];
         sumX = transvalues[1];
         sumX2 = transvalues[2];
@@ -2003,7 +2003,7 @@ float8_avg(PG_FUNCTION_ARGS)
         float8          N,
                                 sumX;
  
-       transvalues = check_float8_array(transarray, "float8_avg");
+       transvalues = check_float8_array(transarray, "float8_avg", 3);
         N = transvalues[0];
         sumX = transvalues[1];
         /* ignore sumX2 */
@@ -2025,7 +2025,7 @@ float8_var_pop(PG_FUNCTION_ARGS)
                                 sumX2,
                                 numerator;
  
-       transvalues = check_float8_array(transarray, "float8_var_pop");
+       transvalues = check_float8_array(transarray, "float8_var_pop", 3);
         N = transvalues[0];
         sumX = transvalues[1];
         sumX2 = transvalues[2];
@@ -2053,7 +2053,7 @@ float8_var_samp(PG_FUNCTION_ARGS)
                                 sumX2,
                                 numerator;
  
-       transvalues = check_float8_array(transarray, "float8_var_samp");
+       transvalues = check_float8_array(transarray, "float8_var_samp", 3);
         N = transvalues[0];
         sumX = transvalues[1];
         sumX2 = transvalues[2];
@@ -2081,7 +2081,7 @@ float8_stddev_pop(PG_FUNCTION_ARGS)
                                 sumX2,
                                 numerator;
  
-       transvalues = check_float8_array(transarray, "float8_stddev_pop");
+       transvalues = check_float8_array(transarray, "float8_stddev_pop", 3);
         N = transvalues[0];
         sumX = transvalues[1];
         sumX2 = transvalues[2];
@@ -2109,7 +2109,7 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
                                 sumX2,
                                 numerator;
  
-       transvalues = check_float8_array(transarray, "float8_stddev_samp");
+       transvalues = check_float8_array(transarray, "float8_stddev_samp", 3);
         N = transvalues[0];
         sumX = transvalues[1];
         sumX2 = transvalues[2];
@@ -2127,6 +2127,362 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
         PG_RETURN_FLOAT8(sqrt(numerator / (N * (N - 1.0))));
  }
  
+/*
+ *             =========================
+ *             SQL2003 BINARY AGGREGATES
+ *             =========================
+ *
+ * The transition datatype for all these aggregates is a 6-element array of
+ * float8, holding the values N, sum(X), sum(X*X), sum(Y), sum(Y*Y), sum(X*Y)
+ * in that order.  Note that Y is the first argument to the aggregates!
+ *
+ * It might seem attractive to optimize this by having multiple accumulator
+ * functions that only calculate the sums actually needed.  But on most
+ * modern machines, a couple of extra floating-point multiplies will be
+ * insignificant compared to the other per-tuple overhead, so I've chosen
+ * to minimize code space instead.
+ */
+
+Datum
+float8_regr_accum(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8          newvalY = PG_GETARG_FLOAT8(1);
+       float8          newvalX = PG_GETARG_FLOAT8(2);
+       float8     *transvalues;
+       float8          N, sumX, sumX2, sumY, sumY2, sumXY;
+
+       transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+       sumY = transvalues[3];
+       sumY2 = transvalues[4];
+       sumXY = transvalues[5];
+
+       N += 1.0;
+       sumX += newvalX;
+       sumX2 += newvalX * newvalX;
+       sumY += newvalY;
+       sumY2 += newvalY * newvalY;
+       sumXY += newvalX * newvalY;
+
+       /*
+        * If we're invoked by nodeAgg, we can cheat and modify our first
+        * parameter in-place to reduce palloc overhead. Otherwise we construct a
+        * new array with the updated transition data and return it.
+        */
+       if (fcinfo->context && IsA(fcinfo->context, AggState))
+       {
+               transvalues[0] = N;
+               transvalues[1] = sumX;
+               transvalues[2] = sumX2;
+               transvalues[3] = sumY;
+               transvalues[4] = sumY2;
+               transvalues[5] = sumXY;
+
+               PG_RETURN_ARRAYTYPE_P(transarray);
+       }
+       else
+       {
+               Datum           transdatums[6];
+               ArrayType  *result;
+
+               transdatums[0] = Float8GetDatumFast(N);
+               transdatums[1] = Float8GetDatumFast(sumX);
+               transdatums[2] = Float8GetDatumFast(sumX2);
+               transdatums[3] = Float8GetDatumFast(sumY);
+               transdatums[4] = Float8GetDatumFast(sumY2);
+               transdatums[5] = Float8GetDatumFast(sumXY);
+
+               result = construct_array(transdatums, 6,
+                                                                FLOAT8OID,
+                                                                sizeof(float8),
+                                                                false /* float8 byval */ , 'd');
+
+               PG_RETURN_ARRAYTYPE_P(result);
+       }
+}
+
+Datum
+float8_regr_sxx(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N,
+                               sumX,
+                               sumX2,
+                               numerator;
+
+       transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numerator = N * sumX2 - sumX * sumX;
+
+       /* Watch out for roundoff error producing a negative numerator */
+       if (numerator <= 0.0)
+               PG_RETURN_FLOAT8(0.0);
+
+       PG_RETURN_FLOAT8(numerator / N);
+}
+
+Datum
+float8_regr_syy(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N,
+                               sumY,
+                               sumY2,
+                               numerator;
+
+       transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
+       N = transvalues[0];
+       sumY = transvalues[3];
+       sumY2 = transvalues[4];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numerator = N * sumY2 - sumY * sumY;
+
+       /* Watch out for roundoff error producing a negative numerator */
+       if (numerator <= 0.0)
+               PG_RETURN_FLOAT8(0.0);
+
+       PG_RETURN_FLOAT8(numerator / N);
+}
+
+Datum
+float8_regr_sxy(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumY, sumXY, numerator;
+
+       transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumY = transvalues[3];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numerator = N * sumXY - sumX * sumY;
+
+       /* A negative result is valid here */
+
+       PG_RETURN_FLOAT8(numerator / N);
+}
+
+Datum
+float8_regr_avgx(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N,
+                               sumX;
+
+       transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       PG_RETURN_FLOAT8(sumX / N);
+}
+
+Datum
+float8_regr_avgy(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N,
+                               sumY;
+
+       transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
+       N = transvalues[0];
+       sumY = transvalues[3];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       PG_RETURN_FLOAT8(sumY / N);
+}
+
+Datum
+float8_covar_pop(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumY, sumXY, numerator;
+
+       transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumY = transvalues[3];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numerator = N * sumXY - sumX * sumY;
+
+       PG_RETURN_FLOAT8(numerator / (N * N));
+}
+
+Datum
+float8_covar_samp(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumY, sumXY, numerator;
+
+       transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumY = transvalues[3];
+       sumXY = transvalues[5];
+
+       /* if N is <= 1 we should return NULL */
+       if (N < 2.0)
+               PG_RETURN_NULL();
+
+       numerator = N * sumXY - sumX * sumY;
+
+       PG_RETURN_FLOAT8(numerator / (N * (N - 1.0)));
+}
+
+Datum
+float8_corr(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumX2, sumY, sumY2, sumXY, numeratorX,
+               numeratorY, numeratorXY;
+
+       transvalues = check_float8_array(transarray, "float8_corr", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+       sumY = transvalues[3];
+       sumY2 = transvalues[4];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numeratorX = N * sumX2 - sumX * sumX;
+       numeratorY = N * sumY2 - sumY * sumY;
+       numeratorXY = N * sumXY - sumX * sumY;
+       if (numeratorX <= 0 || numeratorY <= 0)
+               PG_RETURN_NULL();
+       
+       PG_RETURN_FLOAT8(sqrt((numeratorXY * numeratorXY) /
+                                                 (numeratorX * numeratorY)));
+}
+
+Datum
+float8_regr_r2(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumX2, sumY, sumY2, sumXY, numeratorX,
+               numeratorY, numeratorXY;
+
+       transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+       sumY = transvalues[3];
+       sumY2 = transvalues[4];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numeratorX = N * sumX2 - sumX * sumX;
+       numeratorY = N * sumY2 - sumY * sumY;
+       numeratorXY = N * sumXY - sumX * sumY;
+       if (numeratorX <= 0)
+               PG_RETURN_NULL();
+       /* per spec, horizontal line produces 1.0 */
+       if (numeratorY <= 0)
+               PG_RETURN_FLOAT8(1.0);
+
+       PG_RETURN_FLOAT8((numeratorXY * numeratorXY) /
+                                        (numeratorX * numeratorY));
+}
+
+Datum
+float8_regr_slope(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumX2, sumY, sumXY, numeratorX,
+               numeratorXY;
+
+       transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+       sumY = transvalues[3];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numeratorX = N * sumX2 - sumX * sumX;
+       numeratorXY = N * sumXY - sumX * sumY;
+       if (numeratorX <= 0)
+               PG_RETURN_NULL();
+       
+       PG_RETURN_FLOAT8(numeratorXY / numeratorX);
+}
+
+Datum
+float8_regr_intercept(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumX2, sumY, sumXY, numeratorX,
+               numeratorXXY;
+
+       transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+       sumY = transvalues[3];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numeratorX = N * sumX2 - sumX * sumX;
+       numeratorXXY = sumY * sumX2 - sumX * sumXY;
+       if (numeratorX <= 0)
+               PG_RETURN_NULL();
+       
+       PG_RETURN_FLOAT8(numeratorXXY / numeratorX);
+}
+
  
  /*
   *             ====================================
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c

index 1a6f36c40b02bf6d3368767d966592bf2415c1b2..00432994c5b0b070ce4d9a725fa70128a88a94c1 100644 (file)
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.60 2006/03/05 15:58:42 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.61 2006/07/28 18:33:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -694,6 +694,28 @@ int8inc(PG_FUNCTION_ARGS)
         }
  }
  
+/*
+ * These functions are exactly like int8inc but are used for aggregates that
+ * count only non-null values.  Since the functions are declared strict,
+ * the null checks happen before we ever get here, and all we need do is
+ * increment the state value.  We could actually make these pg_proc entries
+ * point right at int8inc, but then the opr_sanity regression test would
+ * complain about mismatched entries for a built-in function.
+ */
+
+Datum
+int8inc_any(PG_FUNCTION_ARGS)
+{
+       return int8inc(fcinfo);
+}
+
+Datum
+int8inc_float8_float8(PG_FUNCTION_ARGS)
+{
+       return int8inc(fcinfo);
+}
+
+
  Datum
  int8larger(PG_FUNCTION_ARGS)
  {
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 00896bce2e2c299a48bf838eac25954c08706b05..7386e91eaf6af74072872344b0adc99c01bf135f 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
   * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.342 2006/07/27 19:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.343 2006/07/28 18:33:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200607271
+#define CATALOG_VERSION_NO     200607281
  
  #endif
diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h

index f77328b9e918c6299d1716c043c35a3d6f756ce7..243648b4aaed799c7b412c1b8187c595aa55f851 100644 (file)
--- a/src/include/catalog/pg_aggregate.h
+++ b/src/include/catalog/pg_aggregate.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_aggregate.h,v 1.56 2006/07/27 19:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_aggregate.h,v 1.57 2006/07/28 18:33:04 tgl Exp $
   *
   * NOTES
   *       the genbki.sh script reads this file and generates .bki
@@ -192,6 +192,20 @@ DATA(insert ( 2157 float4_accum    float8_stddev_samp      0       1022    "{0,0,0}" ));
  DATA(insert ( 2158     float8_accum    float8_stddev_samp      0       1022    "{0,0,0}" ));
  DATA(insert ( 2159     numeric_accum   numeric_stddev_samp     0       1231    "{0,0,0}" ));
  
+/* SQL2003 binary regression aggregates */
+DATA(insert ( 2818     int8inc_float8_float8           -                               0       20              "0" ));
+DATA(insert ( 2819     float8_regr_accum       float8_regr_sxx                 0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2820     float8_regr_accum       float8_regr_syy                 0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2821     float8_regr_accum       float8_regr_sxy                 0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2822     float8_regr_accum       float8_regr_avgx                0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2823     float8_regr_accum       float8_regr_avgy                0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2824     float8_regr_accum       float8_regr_r2                  0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2825     float8_regr_accum       float8_regr_slope               0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2826     float8_regr_accum       float8_regr_intercept   0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2827     float8_regr_accum       float8_covar_pop                0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2828     float8_regr_accum       float8_covar_samp               0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2829     float8_regr_accum       float8_corr                             0       1022    "{0,0,0,0,0,0}" ));
+
  /* boolean-and and boolean-or */
  DATA(insert ( 2517     booland_statefunc       -                       0       16              _null_ ));
  DATA(insert ( 2518     boolor_statefunc        -                       0       16              _null_ ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index 729f92d1bf47a0be8431ae76ab4b904cb5fa17ba..0a1a6936c53f138cbb144df00b4b9b8abeacd71e 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.418 2006/07/27 19:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.419 2006/07/28 18:33:04 tgl Exp $
   *
   * NOTES
   *       The script catalog/genbki.sh reads this file and generates .bki
@@ -1534,7 +1534,7 @@ DESCR("truncate interval to specified units");
  
  DATA(insert OID = 1219 (  int8inc                 PGNSP PGUID 12 f f t f i 1 20 "20" _null_ _null_ _null_      int8inc - _null_ ));
  DESCR("increment");
-DATA(insert OID = 2804 (  int8inc_any     PGNSP PGUID 12 f f t f i 2 20 "20 2276" _null_ _null_ _null_ int8inc - _null_ ));
+DATA(insert OID = 2804 (  int8inc_any     PGNSP PGUID 12 f f t f i 2 20 "20 2276" _null_ _null_ _null_ int8inc_any - _null_ ));
  DESCR("increment, ignores second argument");
  DATA(insert OID = 1230 (  int8abs                 PGNSP PGUID 12 f f t f i 1 20 "20" _null_ _null_ _null_      int8abs - _null_ ));
  DESCR("absolute value");
@@ -2730,6 +2730,32 @@ DATA(insert OID = 1963 (  int4_avg_accum   PGNSP PGUID 12 f f t f i 2 1016 "1016
  DESCR("AVG(int4) transition function");
  DATA(insert OID = 1964 (  int8_avg                PGNSP PGUID 12 f f t f i 1 1700 "1016" _null_ _null_ _null_  int8_avg - _null_ ));
  DESCR("AVG(int) aggregate final function");
+DATA(insert OID = 2805 (  int8inc_float8_float8                PGNSP PGUID 12 f f t f i 3 20 "20 701 701" _null_ _null_ _null_ int8inc_float8_float8 - _null_ ));
+DESCR("REGR_COUNT(double, double) transition function");
+DATA(insert OID = 2806 (  float8_regr_accum                    PGNSP PGUID 12 f f t f i 3 1022 "1022 701 701" _null_ _null_ _null_ float8_regr_accum - _null_ ));
+DESCR("REGR_...(double, double) transition function");
+DATA(insert OID = 2807 (  float8_regr_sxx                      PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_sxx - _null_ ));
+DESCR("REGR_SXX(double, double) aggregate final function");
+DATA(insert OID = 2808 (  float8_regr_syy                      PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_syy - _null_ ));
+DESCR("REGR_SYY(double, double) aggregate final function");
+DATA(insert OID = 2809 (  float8_regr_sxy                      PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_sxy - _null_ ));
+DESCR("REGR_SXY(double, double) aggregate final function");
+DATA(insert OID = 2810 (  float8_regr_avgx                     PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_avgx - _null_ ));
+DESCR("REGR_AVGX(double, double) aggregate final function");
+DATA(insert OID = 2811 (  float8_regr_avgy                     PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_avgy - _null_ ));
+DESCR("REGR_AVGY(double, double) aggregate final function");
+DATA(insert OID = 2812 (  float8_regr_r2                       PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_r2 - _null_ ));
+DESCR("REGR_R2(double, double) aggregate final function");
+DATA(insert OID = 2813 (  float8_regr_slope                    PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_slope - _null_ ));
+DESCR("REGR_SLOPE(double, double) aggregate final function");
+DATA(insert OID = 2814 (  float8_regr_intercept                PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_intercept - _null_ ));
+DESCR("REGR_INTERCEPT(double, double) aggregate final function");
+DATA(insert OID = 2815 (  float8_covar_pop                     PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_covar_pop - _null_ ));
+DESCR("COVAR_POP(double, double) aggregate final function");
+DATA(insert OID = 2816 (  float8_covar_samp                    PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_covar_samp - _null_ ));
+DESCR("COVAR_SAMP(double, double) aggregate final function");
+DATA(insert OID = 2817 (  float8_corr                          PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_corr - _null_ ));
+DESCR("CORR(double, double) aggregate final function");
  
  /* To ASCII conversion */
  DATA(insert OID = 1845 ( to_ascii      PGNSP PGUID 12 f f t f i 1      25 "25" _null_ _null_ _null_    to_ascii_default - _null_ ));
@@ -3196,6 +3222,20 @@ DATA(insert OID = 2157 (  stddev                 PGNSP PGUID 12 t f f f i 1 701 "700" _null_ _
  DATA(insert OID = 2158 (  stddev                       PGNSP PGUID 12 t f f f i 1 701 "701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
  DATA(insert OID = 2159 (  stddev                       PGNSP PGUID 12 t f f f i 1 1700 "1700" _null_ _null_ _null_ aggregate_dummy - _null_ ));
  
+DATA(insert OID = 2818 (  regr_count           PGNSP PGUID 12 t f f f i 2 20 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2819 (  regr_sxx                     PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2820 (  regr_syy                     PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2821 (  regr_sxy                     PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2822 (  regr_avgx                    PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2823 (  regr_avgy                    PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2824 (  regr_r2                      PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2825 (  regr_slope           PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2826 (  regr_intercept       PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+
+DATA(insert OID = 2827 (  covar_pop                    PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2828 (  covar_samp           PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2829 (  corr                         PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+
  DATA(insert OID = 2160 ( text_pattern_lt        PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_lt - _null_ ));
  DATA(insert OID = 2161 ( text_pattern_le        PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_le - _null_ ));
  DATA(insert OID = 2162 ( text_pattern_eq        PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_eq - _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h

index 74dee731510556e4184af2eeb041e3cff9d32c06..f0bc0531771d87791fe68671afd05e9a50043c92 100644 (file)
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.280 2006/07/21 20:51:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.281 2006/07/28 18:33:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -348,6 +348,18 @@ extern Datum float8_var_pop(PG_FUNCTION_ARGS);
  extern Datum float8_var_samp(PG_FUNCTION_ARGS);
  extern Datum float8_stddev_pop(PG_FUNCTION_ARGS);
  extern Datum float8_stddev_samp(PG_FUNCTION_ARGS);
+extern Datum float8_regr_accum(PG_FUNCTION_ARGS);
+extern Datum float8_regr_sxx(PG_FUNCTION_ARGS);
+extern Datum float8_regr_syy(PG_FUNCTION_ARGS);
+extern Datum float8_regr_sxy(PG_FUNCTION_ARGS);
+extern Datum float8_regr_avgx(PG_FUNCTION_ARGS);
+extern Datum float8_regr_avgy(PG_FUNCTION_ARGS);
+extern Datum float8_covar_pop(PG_FUNCTION_ARGS);
+extern Datum float8_covar_samp(PG_FUNCTION_ARGS);
+extern Datum float8_corr(PG_FUNCTION_ARGS);
+extern Datum float8_regr_r2(PG_FUNCTION_ARGS);
+extern Datum float8_regr_slope(PG_FUNCTION_ARGS);
+extern Datum float8_regr_intercept(PG_FUNCTION_ARGS);
  extern Datum float48pl(PG_FUNCTION_ARGS);
  extern Datum float48mi(PG_FUNCTION_ARGS);
  extern Datum float48mul(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/int8.h b/src/include/utils/int8.h

index 0fc5eb61a3a4523d13156118e4e959ea26fa791f..36cd9d12737fd8e2938ce73e70f2e8ef8b5c91c0 100644 (file)
--- a/src/include/utils/int8.h
+++ b/src/include/utils/int8.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/utils/int8.h,v 1.44 2006/03/05 15:59:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/int8.h,v 1.45 2006/07/28 18:33:04 tgl Exp $
   *
   * NOTES
   * These data types are supported on all 64-bit architectures, and may
@@ -74,6 +74,8 @@ extern Datum int8div(PG_FUNCTION_ARGS);
  extern Datum int8abs(PG_FUNCTION_ARGS);
  extern Datum int8mod(PG_FUNCTION_ARGS);
  extern Datum int8inc(PG_FUNCTION_ARGS);
+extern Datum int8inc_any(PG_FUNCTION_ARGS);
+extern Datum int8inc_float8_float8(PG_FUNCTION_ARGS);
  extern Datum int8larger(PG_FUNCTION_ARGS);
  extern Datum int8smaller(PG_FUNCTION_ARGS);
  
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index 3b0c0f467a9b9f6537c7902621d8d0d7fcd69de5..34b720f0f433974b234fb876a2421f2ea06cc8e8 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -137,6 +137,61 @@ SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
            0 |            
  (1 row)
  
+-- SQL2003 binary aggregates
+SELECT regr_count(b, a) FROM aggtest;
+ regr_count 
+------------
+          4
+(1 row)
+
+SELECT regr_sxx(b, a) FROM aggtest;
+ regr_sxx 
+----------
+     5099
+(1 row)
+
+SELECT regr_syy(b, a) FROM aggtest;
+     regr_syy     
+------------------
+ 68756.2156939293
+(1 row)
+
+SELECT regr_sxy(b, a) FROM aggtest;
+     regr_sxy     
+------------------
+ 2614.51582155004
+(1 row)
+
+SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
+ regr_avgx |    regr_avgy     
+-----------+------------------
+      49.5 | 107.943152273074
+(1 row)
+
+SELECT regr_r2(b, a) FROM aggtest;
+      regr_r2       
+--------------------
+ 0.0194977982031803
+(1 row)
+
+SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
+    regr_slope     |  regr_intercept  
+-------------------+------------------
+ 0.512750700441271 | 82.5619926012309
+(1 row)
+
+SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
+    covar_pop    |    covar_samp    
+-----------------+------------------
+ 653.62895538751 | 871.505273850014
+(1 row)
+
+SELECT corr(b, a) FROM aggtest;
+       corr        
+-------------------
+ 0.139634516517873
+(1 row)
+
  SELECT count(four) AS cnt_1000 FROM onek;
   cnt_1000 
  ----------
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 5c905f55a54b3be0bbcd27e878b43bdd9bf6cce0..3ded3cc926cd32b4aab796e971cd5d7f8490af38 100644 (file)
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -66,15 +66,15 @@ WHERE p1.oid != p2.oid AND
  -- of the same internal function (ie, matching prosrc fields).  It's OK to
  -- have several entries with different pronames for the same internal function,
  -- but conflicts in the number of arguments and other critical items should
--- be complained of.
--- Ignore aggregates, since they all use "aggregate_dummy".
--- As of 8.2, this finds int8inc and int8inc_any, which are OK.
+-- be complained of.  (We don't check data types here; see next query.)
+-- Note: ignore aggregate functions here, since they all point to the same
+-- dummy built-in function.
  SELECT p1.oid, p1.proname, p2.oid, p2.proname
  FROM pg_proc AS p1, pg_proc AS p2
  WHERE p1.oid < p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
-    p1.proisagg = false AND p2.proisagg = false AND
+    (p1.proisagg = false OR p2.proisagg = false) AND
      (p1.prolang != p2.prolang OR
       p1.proisagg != p2.proisagg OR
       p1.prosecdef != p2.prosecdef OR
@@ -82,10 +82,9 @@ WHERE p1.oid < p2.oid AND
       p1.proretset != p2.proretset OR
       p1.provolatile != p2.provolatile OR
       p1.pronargs != p2.pronargs);
- oid  | proname | oid  |   proname   
-------+---------+------+-------------
- 1219 | int8inc | 2804 | int8inc_any
-(1 row)
+ oid | proname | oid | proname 
+-----+---------+-----+---------
+(0 rows)
  
  -- Look for uses of different type OIDs in the argument/result type fields
  -- for different aliases of the same built-in function.
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index 1c2a604450026a9e456f48a8b0c2db75d30d2576..890aa8dea02d583d3112f46f778000091036494b 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -39,6 +39,17 @@ SELECT var_samp(b::numeric) FROM aggtest;
  SELECT var_pop(1.0), var_samp(2.0);
  SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
  
+-- SQL2003 binary aggregates
+SELECT regr_count(b, a) FROM aggtest;
+SELECT regr_sxx(b, a) FROM aggtest;
+SELECT regr_syy(b, a) FROM aggtest;
+SELECT regr_sxy(b, a) FROM aggtest;
+SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
+SELECT regr_r2(b, a) FROM aggtest;
+SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
+SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
+SELECT corr(b, a) FROM aggtest;
+
  SELECT count(four) AS cnt_1000 FROM onek;
  SELECT count(DISTINCT four) AS cnt_4 FROM onek;
  
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql

index 84d0ce93b5c2a54063786a8c0e747eda98841035..10498a5fa1b17460fd2e2ed003704cd997fd8f59 100644 (file)
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -68,17 +68,16 @@ WHERE p1.oid != p2.oid AND
  -- of the same internal function (ie, matching prosrc fields).  It's OK to
  -- have several entries with different pronames for the same internal function,
  -- but conflicts in the number of arguments and other critical items should
--- be complained of.
--- Ignore aggregates, since they all use "aggregate_dummy".
-
--- As of 8.2, this finds int8inc and int8inc_any, which are OK.
+-- be complained of.  (We don't check data types here; see next query.)
+-- Note: ignore aggregate functions here, since they all point to the same
+-- dummy built-in function.
  
  SELECT p1.oid, p1.proname, p2.oid, p2.proname
  FROM pg_proc AS p1, pg_proc AS p2
  WHERE p1.oid < p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
-    p1.proisagg = false AND p2.proisagg = false AND
+    (p1.proisagg = false OR p2.proisagg = false) AND
      (p1.prolang != p2.prolang OR
       p1.proisagg != p2.proisagg OR
       p1.prosecdef != p2.prosecdef OR
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 28 Jul 2006 18:33:04 +0000 (18:33 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 28 Jul 2006 18:33:04 +0000 (18:33 +0000)
doc/src/sgml/func.sgml		patch \| blob \| history
src/backend/utils/adt/float.c		patch \| blob \| history
src/backend/utils/adt/int8.c		patch \| blob \| history
src/include/catalog/catversion.h		patch \| blob \| history
src/include/catalog/pg_aggregate.h		patch \| blob \| history
src/include/catalog/pg_proc.h		patch \| blob \| history
src/include/utils/builtins.h		patch \| blob \| history
src/include/utils/int8.h		patch \| blob \| history
src/test/regress/expected/aggregates.out		patch \| blob \| history
src/test/regress/expected/opr_sanity.out		patch \| blob \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| history
src/test/regress/sql/opr_sanity.sql		patch \| blob \| history