]> granicus.if.org Git - postgresql/commitdiff
SQL2003-standard statistical aggregates, by Sergey Koposov. I've added only
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 28 Jul 2006 18:33:04 +0000 (18:33 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 28 Jul 2006 18:33:04 +0000 (18:33 +0000)
the float8 versions of the aggregates, which is all that the standard requires.
Sergey's original patch also provided versions using numeric arithmetic,
but given the size and slowness of the code, I doubt we ought to include
those in core.

12 files changed:
doc/src/sgml/func.sgml
src/backend/utils/adt/float.c
src/backend/utils/adt/int8.c
src/include/catalog/catversion.h
src/include/catalog/pg_aggregate.h
src/include/catalog/pg_proc.h
src/include/utils/builtins.h
src/include/utils/int8.h
src/test/regress/expected/aggregates.out
src/test/regress/expected/opr_sanity.out
src/test/regress/sql/aggregates.sql
src/test/regress/sql/opr_sanity.sql

index c9b474e0893e5879bad088b14e096e2851801d28..678b1cbeca8729179cb3377b7475137a8344de6a 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.327 2006/07/16 23:59:58 neilc Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.328 2006/07/28 18:33:03 tgl Exp $ -->
 
  <chapter id="functions">
   <title>Functions and Operators</title>
@@ -7785,16 +7785,18 @@ SELECT NULLIF(value, '(none)') ...
 
   <para>
    <firstterm>Aggregate functions</firstterm> compute a single result
-   value from a set of input values.  <xref
-   linkend="functions-aggregate-table"> shows the built-in aggregate
-   functions.  The special syntax considerations for aggregate
+   value from a set of input values.  The built-in aggregate functions
+   are listed in
+   <xref linkend="functions-aggregate-table"> and
+   <xref linkend="functions-aggregate-statistics-table">.
+   The special syntax considerations for aggregate
    functions are explained in <xref linkend="syntax-aggregates">.
    Consult <xref linkend="tutorial-agg"> for additional introductory
    information.
   </para>
 
   <table id="functions-aggregate-table">
-   <title>Aggregate Functions</title>
+   <title>General-Purpose Aggregate Functions</title>
 
    <tgroup cols="4">
     <thead>
@@ -7897,7 +7899,7 @@ SELECT NULLIF(value, '(none)') ...
       <entry><function>count(*)</function></entry>
       <entry></entry>
       <entry><type>bigint</type></entry>
-      <entry>number of input values</entry>
+      <entry>number of input rows</entry>
      </row>
 
      <row>
@@ -7905,7 +7907,7 @@ SELECT NULLIF(value, '(none)') ...
       <entry>any</entry>
       <entry><type>bigint</type></entry>
       <entry>
-       number of input values for which the value of <replaceable
+       number of input rows for which the value of <replaceable
        class="parameter">expression</replaceable> is not null
       </entry>
      </row>
@@ -7948,6 +7950,333 @@ SELECT NULLIF(value, '(none)') ...
       </entry>
      </row>
 
+     <row>
+      <entry><function>sum(<replaceable class="parameter">expression</replaceable>)</function></entry>
+      <entry>
+       <type>smallint</type>, <type>int</type>,
+       <type>bigint</type>, <type>real</type>, <type>double
+       precision</type>, <type>numeric</type>, or
+       <type>interval</type>
+      </entry>
+      <entry>
+       <type>bigint</type> for <type>smallint</type> or
+       <type>int</type> arguments, <type>numeric</type> for
+       <type>bigint</type> arguments, <type>double precision</type>
+       for floating-point arguments, otherwise the same as the
+       argument data type
+      </entry>
+      <entry>sum of <replaceable class="parameter">expression</replaceable> across all input values</entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   It should be noted that except for <function>count</function>,
+   these functions return a null value when no rows are selected.  In
+   particular, <function>sum</function> of no rows returns null, not
+   zero as one might expect.  The <function>coalesce</function> function may be
+   used to substitute zero for null when necessary.
+  </para>
+
+  <note>
+    <indexterm>
+      <primary>ANY</primary>
+    </indexterm>
+    <indexterm>
+      <primary>SOME</primary>
+    </indexterm>
+    <para>
+      Boolean aggregates <function>bool_and</function> and 
+      <function>bool_or</function> correspond to standard SQL aggregates
+      <function>every</function> and <function>any</function> or
+      <function>some</function>. 
+      As for <function>any</function> and <function>some</function>, 
+      it seems that there is an ambiguity built into the standard syntax:
+<programlisting>
+SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...;
+</programlisting>
+      Here <function>ANY</function> can be considered both as leading
+      to a subquery or as an aggregate if the select expression returns 1 row.
+      Thus the standard name cannot be given to these aggregates.
+    </para>
+  </note>
+
+  <note>
+   <para>
+    Users accustomed to working with other SQL database management
+    systems may be surprised by the performance of the
+    <function>count</function> aggregate when it is applied to the
+    entire table. A query like:
+<programlisting>
+SELECT count(*) FROM sometable;
+</programlisting>
+    will be executed by <productname>PostgreSQL</productname> using a
+    sequential scan of the entire table.
+   </para>
+  </note>
+
+
+  <para>
+   <xref linkend="functions-aggregate-statistics-table"> shows
+   aggregate functions typically used in statistical analysis.
+   (These are separated out merely to avoid cluttering the listing
+   of more-commonly-used aggregates.)  Where the description mentions
+   <replaceable class="parameter">N</replaceable>, it means the
+   number of input rows for which all the input expressions are non-null.
+   In all cases, null is returned if the computation is meaningless,
+   for example when <replaceable class="parameter">N</replaceable> is zero.
+  </para>
+
+  <indexterm>
+   <primary>statistics</primary>
+  </indexterm>
+  <indexterm>
+   <primary>linear regression</primary>
+  </indexterm>
+
+  <table id="functions-aggregate-statistics-table">
+   <title>Aggregate Functions for Statistics</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Function</entry>
+      <entry>Argument Type</entry>
+      <entry>Return Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>correlation</primary>
+       </indexterm>
+       <function>corr(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sqrt((<replaceable class="parameter">N</replaceable> *
+      sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>))^2 / ((<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2) * (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">Y</replaceable>^2) - sum(<replaceable
+      class="parameter">Y</replaceable>)^2)))</entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>covariance</primary>
+        <secondary>population</secondary>
+       </indexterm>
+       <function>covar_pop(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>) / <replaceable
+      class="parameter">N</replaceable>) / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>covariance</primary>
+        <secondary>sample</secondary>
+       </indexterm>
+       <function>covar_samp(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>) / <replaceable
+      class="parameter">N</replaceable>) / (<replaceable
+      class="parameter">N</replaceable> - 1)</entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_avgx(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">X</replaceable>) /
+      <replaceable class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_avgy(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">Y</replaceable>) /
+      <replaceable class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_count(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>bigint</type>
+      </entry>
+      <entry>number of input rows in which both expressions are non-null</entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>regression intercept</primary>
+       </indexterm>
+       <function>regr_intercept(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(sum(<replaceable class="parameter">Y</replaceable>) *
+      sum(<replaceable class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>)) / (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2)</entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_r2(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(<replaceable class="parameter">N</replaceable> *
+      sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>))^2 / ((<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2) * (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">Y</replaceable>^2) - sum(<replaceable
+      class="parameter">Y</replaceable>)^2))</entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>regression slope</primary>
+       </indexterm>
+       <function>regr_slope(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(<replaceable class="parameter">N</replaceable> *
+      sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>)) / (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2)</entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_sxx(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">X</replaceable>^2) -
+      sum(<replaceable class="parameter">X</replaceable>)^2 / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_sxy(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>) / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_syy(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">Y</replaceable>^2) -
+      sum(<replaceable class="parameter">Y</replaceable>)^2 / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
      <row>
       <entry>
        <indexterm>
@@ -8007,24 +8336,6 @@ SELECT NULLIF(value, '(none)') ...
       <entry>sample standard deviation of the input values</entry>
      </row>
 
-     <row>
-      <entry><function>sum(<replaceable class="parameter">expression</replaceable>)</function></entry>
-      <entry>
-       <type>smallint</type>, <type>int</type>,
-       <type>bigint</type>, <type>real</type>, <type>double
-       precision</type>, <type>numeric</type>, or
-       <type>interval</type>
-      </entry>
-      <entry>
-       <type>bigint</type> for <type>smallint</type> or
-       <type>int</type> arguments, <type>numeric</type> for
-       <type>bigint</type> arguments, <type>double precision</type>
-       for floating-point arguments, otherwise the same as the
-       argument data type
-      </entry>
-      <entry>sum of <replaceable class="parameter">expression</replaceable> across all input values</entry>
-     </row>
-
      <row>
       <entry>
        <indexterm>
@@ -8087,50 +8398,6 @@ SELECT NULLIF(value, '(none)') ...
    </tgroup>
   </table>
 
-  <para>
-   It should be noted that except for <function>count</function>,
-   these functions return a null value when no rows are selected.  In
-   particular, <function>sum</function> of no rows returns null, not
-   zero as one might expect.  The <function>coalesce</function> function may be
-   used to substitute zero for null when necessary.
-  </para>
-
-  <note>
-    <indexterm>
-      <primary>ANY</primary>
-    </indexterm>
-    <indexterm>
-      <primary>SOME</primary>
-    </indexterm>
-    <para>
-      Boolean aggregates <function>bool_and</function> and 
-      <function>bool_or</function> correspond to standard SQL aggregates
-      <function>every</function> and <function>any</function> or
-      <function>some</function>. 
-      As for <function>any</function> and <function>some</function>, 
-      it seems that there is an ambiguity built into the standard syntax:
-<programlisting>
-SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...;
-</programlisting>
-      Here <function>ANY</function> can be considered both as leading
-      to a subquery or as an aggregate if the select expression returns 1 row.
-      Thus the standard name cannot be given to these aggregates.
-    </para>
-  </note>
-
-  <note>
-   <para>
-    Users accustomed to working with other SQL database management
-    systems may be surprised by the performance of the
-    <function>count</function> aggregate when it is applied to the
-    entire table. A query like:
-<programlisting>
-SELECT count(*) FROM sometable;
-</programlisting>
-    will be executed by <productname>PostgreSQL</productname> using a
-    sequential scan of the entire table.
-   </para>
-  </note>
  </sect1>
 
 
index 55e79e85ed81fe1e46840d7873f926209a1bd06b..1f8d081c8bf31b443210ed13bac8bc125046191c 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/float.c,v 1.127 2006/07/14 14:52:24 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/float.c,v 1.128 2006/07/28 18:33:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1878,18 +1878,18 @@ setseed(PG_FUNCTION_ARGS)
  */
 
 static float8 *
-check_float8_array(ArrayType *transarray, const char *caller)
+check_float8_array(ArrayType *transarray, const char *caller, int n)
 {
        /*
-        * We expect the input to be a 3-element float array; verify that. We
+        * We expect the input to be an N-element float array; verify that. We
         * don't need to use deconstruct_array() since the array data is just
-        * going to look like a C array of 3 float8 values.
+        * going to look like a C array of N float8 values.
         */
        if (ARR_NDIM(transarray) != 1 ||
-               ARR_DIMS(transarray)[0] != 3 ||
+               ARR_DIMS(transarray)[0] != n ||
                ARR_HASNULL(transarray) ||
                ARR_ELEMTYPE(transarray) != FLOAT8OID)
-               elog(ERROR, "%s: expected 3-element float8 array", caller);
+               elog(ERROR, "%s: expected %d-element float8 array", caller, n);
        return (float8 *) ARR_DATA_PTR(transarray);
 }
 
@@ -1903,7 +1903,7 @@ float8_accum(PG_FUNCTION_ARGS)
                                sumX,
                                sumX2;
 
-       transvalues = check_float8_array(transarray, "float8_accum");
+       transvalues = check_float8_array(transarray, "float8_accum", 3);
        N = transvalues[0];
        sumX = transvalues[1];
        sumX2 = transvalues[2];
@@ -1953,7 +1953,7 @@ float4_accum(PG_FUNCTION_ARGS)
                                sumX2,
                                newval;
 
-       transvalues = check_float8_array(transarray, "float4_accum");
+       transvalues = check_float8_array(transarray, "float4_accum", 3);
        N = transvalues[0];
        sumX = transvalues[1];
        sumX2 = transvalues[2];
@@ -2003,7 +2003,7 @@ float8_avg(PG_FUNCTION_ARGS)
        float8          N,
                                sumX;
 
-       transvalues = check_float8_array(transarray, "float8_avg");
+       transvalues = check_float8_array(transarray, "float8_avg", 3);
        N = transvalues[0];
        sumX = transvalues[1];
        /* ignore sumX2 */
@@ -2025,7 +2025,7 @@ float8_var_pop(PG_FUNCTION_ARGS)
                                sumX2,
                                numerator;
 
-       transvalues = check_float8_array(transarray, "float8_var_pop");
+       transvalues = check_float8_array(transarray, "float8_var_pop", 3);
        N = transvalues[0];
        sumX = transvalues[1];
        sumX2 = transvalues[2];
@@ -2053,7 +2053,7 @@ float8_var_samp(PG_FUNCTION_ARGS)
                                sumX2,
                                numerator;
 
-       transvalues = check_float8_array(transarray, "float8_var_samp");
+       transvalues = check_float8_array(transarray, "float8_var_samp", 3);
        N = transvalues[0];
        sumX = transvalues[1];
        sumX2 = transvalues[2];
@@ -2081,7 +2081,7 @@ float8_stddev_pop(PG_FUNCTION_ARGS)
                                sumX2,
                                numerator;
 
-       transvalues = check_float8_array(transarray, "float8_stddev_pop");
+       transvalues = check_float8_array(transarray, "float8_stddev_pop", 3);
        N = transvalues[0];
        sumX = transvalues[1];
        sumX2 = transvalues[2];
@@ -2109,7 +2109,7 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
                                sumX2,
                                numerator;
 
-       transvalues = check_float8_array(transarray, "float8_stddev_samp");
+       transvalues = check_float8_array(transarray, "float8_stddev_samp", 3);
        N = transvalues[0];
        sumX = transvalues[1];
        sumX2 = transvalues[2];
@@ -2127,6 +2127,362 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
        PG_RETURN_FLOAT8(sqrt(numerator / (N * (N - 1.0))));
 }
 
+/*
+ *             =========================
+ *             SQL2003 BINARY AGGREGATES
+ *             =========================
+ *
+ * The transition datatype for all these aggregates is a 6-element array of
+ * float8, holding the values N, sum(X), sum(X*X), sum(Y), sum(Y*Y), sum(X*Y)
+ * in that order.  Note that Y is the first argument to the aggregates!
+ *
+ * It might seem attractive to optimize this by having multiple accumulator
+ * functions that only calculate the sums actually needed.  But on most
+ * modern machines, a couple of extra floating-point multiplies will be
+ * insignificant compared to the other per-tuple overhead, so I've chosen
+ * to minimize code space instead.
+ */
+
+Datum
+float8_regr_accum(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8          newvalY = PG_GETARG_FLOAT8(1);
+       float8          newvalX = PG_GETARG_FLOAT8(2);
+       float8     *transvalues;
+       float8          N, sumX, sumX2, sumY, sumY2, sumXY;
+
+       transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+       sumY = transvalues[3];
+       sumY2 = transvalues[4];
+       sumXY = transvalues[5];
+
+       N += 1.0;
+       sumX += newvalX;
+       sumX2 += newvalX * newvalX;
+       sumY += newvalY;
+       sumY2 += newvalY * newvalY;
+       sumXY += newvalX * newvalY;
+
+       /*
+        * If we're invoked by nodeAgg, we can cheat and modify our first
+        * parameter in-place to reduce palloc overhead. Otherwise we construct a
+        * new array with the updated transition data and return it.
+        */
+       if (fcinfo->context && IsA(fcinfo->context, AggState))
+       {
+               transvalues[0] = N;
+               transvalues[1] = sumX;
+               transvalues[2] = sumX2;
+               transvalues[3] = sumY;
+               transvalues[4] = sumY2;
+               transvalues[5] = sumXY;
+
+               PG_RETURN_ARRAYTYPE_P(transarray);
+       }
+       else
+       {
+               Datum           transdatums[6];
+               ArrayType  *result;
+
+               transdatums[0] = Float8GetDatumFast(N);
+               transdatums[1] = Float8GetDatumFast(sumX);
+               transdatums[2] = Float8GetDatumFast(sumX2);
+               transdatums[3] = Float8GetDatumFast(sumY);
+               transdatums[4] = Float8GetDatumFast(sumY2);
+               transdatums[5] = Float8GetDatumFast(sumXY);
+
+               result = construct_array(transdatums, 6,
+                                                                FLOAT8OID,
+                                                                sizeof(float8),
+                                                                false /* float8 byval */ , 'd');
+
+               PG_RETURN_ARRAYTYPE_P(result);
+       }
+}
+
+Datum
+float8_regr_sxx(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N,
+                               sumX,
+                               sumX2,
+                               numerator;
+
+       transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numerator = N * sumX2 - sumX * sumX;
+
+       /* Watch out for roundoff error producing a negative numerator */
+       if (numerator <= 0.0)
+               PG_RETURN_FLOAT8(0.0);
+
+       PG_RETURN_FLOAT8(numerator / N);
+}
+
+Datum
+float8_regr_syy(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N,
+                               sumY,
+                               sumY2,
+                               numerator;
+
+       transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
+       N = transvalues[0];
+       sumY = transvalues[3];
+       sumY2 = transvalues[4];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numerator = N * sumY2 - sumY * sumY;
+
+       /* Watch out for roundoff error producing a negative numerator */
+       if (numerator <= 0.0)
+               PG_RETURN_FLOAT8(0.0);
+
+       PG_RETURN_FLOAT8(numerator / N);
+}
+
+Datum
+float8_regr_sxy(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumY, sumXY, numerator;
+
+       transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumY = transvalues[3];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numerator = N * sumXY - sumX * sumY;
+
+       /* A negative result is valid here */
+
+       PG_RETURN_FLOAT8(numerator / N);
+}
+
+Datum
+float8_regr_avgx(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N,
+                               sumX;
+
+       transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       PG_RETURN_FLOAT8(sumX / N);
+}
+
+Datum
+float8_regr_avgy(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N,
+                               sumY;
+
+       transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
+       N = transvalues[0];
+       sumY = transvalues[3];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       PG_RETURN_FLOAT8(sumY / N);
+}
+
+Datum
+float8_covar_pop(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumY, sumXY, numerator;
+
+       transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumY = transvalues[3];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numerator = N * sumXY - sumX * sumY;
+
+       PG_RETURN_FLOAT8(numerator / (N * N));
+}
+
+Datum
+float8_covar_samp(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumY, sumXY, numerator;
+
+       transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumY = transvalues[3];
+       sumXY = transvalues[5];
+
+       /* if N is <= 1 we should return NULL */
+       if (N < 2.0)
+               PG_RETURN_NULL();
+
+       numerator = N * sumXY - sumX * sumY;
+
+       PG_RETURN_FLOAT8(numerator / (N * (N - 1.0)));
+}
+
+Datum
+float8_corr(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumX2, sumY, sumY2, sumXY, numeratorX,
+               numeratorY, numeratorXY;
+
+       transvalues = check_float8_array(transarray, "float8_corr", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+       sumY = transvalues[3];
+       sumY2 = transvalues[4];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numeratorX = N * sumX2 - sumX * sumX;
+       numeratorY = N * sumY2 - sumY * sumY;
+       numeratorXY = N * sumXY - sumX * sumY;
+       if (numeratorX <= 0 || numeratorY <= 0)
+               PG_RETURN_NULL();
+       
+       PG_RETURN_FLOAT8(sqrt((numeratorXY * numeratorXY) /
+                                                 (numeratorX * numeratorY)));
+}
+
+Datum
+float8_regr_r2(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumX2, sumY, sumY2, sumXY, numeratorX,
+               numeratorY, numeratorXY;
+
+       transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+       sumY = transvalues[3];
+       sumY2 = transvalues[4];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numeratorX = N * sumX2 - sumX * sumX;
+       numeratorY = N * sumY2 - sumY * sumY;
+       numeratorXY = N * sumXY - sumX * sumY;
+       if (numeratorX <= 0)
+               PG_RETURN_NULL();
+       /* per spec, horizontal line produces 1.0 */
+       if (numeratorY <= 0)
+               PG_RETURN_FLOAT8(1.0);
+
+       PG_RETURN_FLOAT8((numeratorXY * numeratorXY) /
+                                        (numeratorX * numeratorY));
+}
+
+Datum
+float8_regr_slope(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumX2, sumY, sumXY, numeratorX,
+               numeratorXY;
+
+       transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+       sumY = transvalues[3];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numeratorX = N * sumX2 - sumX * sumX;
+       numeratorXY = N * sumXY - sumX * sumY;
+       if (numeratorX <= 0)
+               PG_RETURN_NULL();
+       
+       PG_RETURN_FLOAT8(numeratorXY / numeratorX);
+}
+
+Datum
+float8_regr_intercept(PG_FUNCTION_ARGS)
+{
+       ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+       float8     *transvalues;
+       float8          N, sumX, sumX2, sumY, sumXY, numeratorX,
+               numeratorXXY;
+
+       transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
+       N = transvalues[0];
+       sumX = transvalues[1];
+       sumX2 = transvalues[2];
+       sumY = transvalues[3];
+       sumXY = transvalues[5];
+
+       /* if N is 0 we should return NULL */
+       if (N < 1.0)
+               PG_RETURN_NULL();
+
+       numeratorX = N * sumX2 - sumX * sumX;
+       numeratorXXY = sumY * sumX2 - sumX * sumXY;
+       if (numeratorX <= 0)
+               PG_RETURN_NULL();
+       
+       PG_RETURN_FLOAT8(numeratorXXY / numeratorX);
+}
+
 
 /*
  *             ====================================
index 1a6f36c40b02bf6d3368767d966592bf2415c1b2..00432994c5b0b070ce4d9a725fa70128a88a94c1 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.60 2006/03/05 15:58:42 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.61 2006/07/28 18:33:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -694,6 +694,28 @@ int8inc(PG_FUNCTION_ARGS)
        }
 }
 
+/*
+ * These functions are exactly like int8inc but are used for aggregates that
+ * count only non-null values.  Since the functions are declared strict,
+ * the null checks happen before we ever get here, and all we need do is
+ * increment the state value.  We could actually make these pg_proc entries
+ * point right at int8inc, but then the opr_sanity regression test would
+ * complain about mismatched entries for a built-in function.
+ */
+
+Datum
+int8inc_any(PG_FUNCTION_ARGS)
+{
+       return int8inc(fcinfo);
+}
+
+Datum
+int8inc_float8_float8(PG_FUNCTION_ARGS)
+{
+       return int8inc(fcinfo);
+}
+
+
 Datum
 int8larger(PG_FUNCTION_ARGS)
 {
index 00896bce2e2c299a48bf838eac25954c08706b05..7386e91eaf6af74072872344b0adc99c01bf135f 100644 (file)
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.342 2006/07/27 19:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.343 2006/07/28 18:33:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200607271
+#define CATALOG_VERSION_NO     200607281
 
 #endif
index f77328b9e918c6299d1716c043c35a3d6f756ce7..243648b4aaed799c7b412c1b8187c595aa55f851 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_aggregate.h,v 1.56 2006/07/27 19:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_aggregate.h,v 1.57 2006/07/28 18:33:04 tgl Exp $
  *
  * NOTES
  *       the genbki.sh script reads this file and generates .bki
@@ -192,6 +192,20 @@ DATA(insert ( 2157 float4_accum    float8_stddev_samp      0       1022    "{0,0,0}" ));
 DATA(insert ( 2158     float8_accum    float8_stddev_samp      0       1022    "{0,0,0}" ));
 DATA(insert ( 2159     numeric_accum   numeric_stddev_samp     0       1231    "{0,0,0}" ));
 
+/* SQL2003 binary regression aggregates */
+DATA(insert ( 2818     int8inc_float8_float8           -                               0       20              "0" ));
+DATA(insert ( 2819     float8_regr_accum       float8_regr_sxx                 0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2820     float8_regr_accum       float8_regr_syy                 0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2821     float8_regr_accum       float8_regr_sxy                 0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2822     float8_regr_accum       float8_regr_avgx                0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2823     float8_regr_accum       float8_regr_avgy                0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2824     float8_regr_accum       float8_regr_r2                  0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2825     float8_regr_accum       float8_regr_slope               0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2826     float8_regr_accum       float8_regr_intercept   0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2827     float8_regr_accum       float8_covar_pop                0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2828     float8_regr_accum       float8_covar_samp               0       1022    "{0,0,0,0,0,0}" ));
+DATA(insert ( 2829     float8_regr_accum       float8_corr                             0       1022    "{0,0,0,0,0,0}" ));
+
 /* boolean-and and boolean-or */
 DATA(insert ( 2517     booland_statefunc       -                       0       16              _null_ ));
 DATA(insert ( 2518     boolor_statefunc        -                       0       16              _null_ ));
index 729f92d1bf47a0be8431ae76ab4b904cb5fa17ba..0a1a6936c53f138cbb144df00b4b9b8abeacd71e 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.418 2006/07/27 19:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.419 2006/07/28 18:33:04 tgl Exp $
  *
  * NOTES
  *       The script catalog/genbki.sh reads this file and generates .bki
@@ -1534,7 +1534,7 @@ DESCR("truncate interval to specified units");
 
 DATA(insert OID = 1219 (  int8inc                 PGNSP PGUID 12 f f t f i 1 20 "20" _null_ _null_ _null_      int8inc - _null_ ));
 DESCR("increment");
-DATA(insert OID = 2804 (  int8inc_any     PGNSP PGUID 12 f f t f i 2 20 "20 2276" _null_ _null_ _null_ int8inc - _null_ ));
+DATA(insert OID = 2804 (  int8inc_any     PGNSP PGUID 12 f f t f i 2 20 "20 2276" _null_ _null_ _null_ int8inc_any - _null_ ));
 DESCR("increment, ignores second argument");
 DATA(insert OID = 1230 (  int8abs                 PGNSP PGUID 12 f f t f i 1 20 "20" _null_ _null_ _null_      int8abs - _null_ ));
 DESCR("absolute value");
@@ -2730,6 +2730,32 @@ DATA(insert OID = 1963 (  int4_avg_accum   PGNSP PGUID 12 f f t f i 2 1016 "1016
 DESCR("AVG(int4) transition function");
 DATA(insert OID = 1964 (  int8_avg                PGNSP PGUID 12 f f t f i 1 1700 "1016" _null_ _null_ _null_  int8_avg - _null_ ));
 DESCR("AVG(int) aggregate final function");
+DATA(insert OID = 2805 (  int8inc_float8_float8                PGNSP PGUID 12 f f t f i 3 20 "20 701 701" _null_ _null_ _null_ int8inc_float8_float8 - _null_ ));
+DESCR("REGR_COUNT(double, double) transition function");
+DATA(insert OID = 2806 (  float8_regr_accum                    PGNSP PGUID 12 f f t f i 3 1022 "1022 701 701" _null_ _null_ _null_ float8_regr_accum - _null_ ));
+DESCR("REGR_...(double, double) transition function");
+DATA(insert OID = 2807 (  float8_regr_sxx                      PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_sxx - _null_ ));
+DESCR("REGR_SXX(double, double) aggregate final function");
+DATA(insert OID = 2808 (  float8_regr_syy                      PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_syy - _null_ ));
+DESCR("REGR_SYY(double, double) aggregate final function");
+DATA(insert OID = 2809 (  float8_regr_sxy                      PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_sxy - _null_ ));
+DESCR("REGR_SXY(double, double) aggregate final function");
+DATA(insert OID = 2810 (  float8_regr_avgx                     PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_avgx - _null_ ));
+DESCR("REGR_AVGX(double, double) aggregate final function");
+DATA(insert OID = 2811 (  float8_regr_avgy                     PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_avgy - _null_ ));
+DESCR("REGR_AVGY(double, double) aggregate final function");
+DATA(insert OID = 2812 (  float8_regr_r2                       PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_r2 - _null_ ));
+DESCR("REGR_R2(double, double) aggregate final function");
+DATA(insert OID = 2813 (  float8_regr_slope                    PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_slope - _null_ ));
+DESCR("REGR_SLOPE(double, double) aggregate final function");
+DATA(insert OID = 2814 (  float8_regr_intercept                PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_intercept - _null_ ));
+DESCR("REGR_INTERCEPT(double, double) aggregate final function");
+DATA(insert OID = 2815 (  float8_covar_pop                     PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_covar_pop - _null_ ));
+DESCR("COVAR_POP(double, double) aggregate final function");
+DATA(insert OID = 2816 (  float8_covar_samp                    PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_covar_samp - _null_ ));
+DESCR("COVAR_SAMP(double, double) aggregate final function");
+DATA(insert OID = 2817 (  float8_corr                          PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_corr - _null_ ));
+DESCR("CORR(double, double) aggregate final function");
 
 /* To ASCII conversion */
 DATA(insert OID = 1845 ( to_ascii      PGNSP PGUID 12 f f t f i 1      25 "25" _null_ _null_ _null_    to_ascii_default - _null_ ));
@@ -3196,6 +3222,20 @@ DATA(insert OID = 2157 (  stddev                 PGNSP PGUID 12 t f f f i 1 701 "700" _null_ _
 DATA(insert OID = 2158 (  stddev                       PGNSP PGUID 12 t f f f i 1 701 "701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
 DATA(insert OID = 2159 (  stddev                       PGNSP PGUID 12 t f f f i 1 1700 "1700" _null_ _null_ _null_ aggregate_dummy - _null_ ));
 
+DATA(insert OID = 2818 (  regr_count           PGNSP PGUID 12 t f f f i 2 20 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2819 (  regr_sxx                     PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2820 (  regr_syy                     PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2821 (  regr_sxy                     PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2822 (  regr_avgx                    PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2823 (  regr_avgy                    PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2824 (  regr_r2                      PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2825 (  regr_slope           PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2826 (  regr_intercept       PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+
+DATA(insert OID = 2827 (  covar_pop                    PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2828 (  covar_samp           PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2829 (  corr                         PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+
 DATA(insert OID = 2160 ( text_pattern_lt        PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_lt - _null_ ));
 DATA(insert OID = 2161 ( text_pattern_le        PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_le - _null_ ));
 DATA(insert OID = 2162 ( text_pattern_eq        PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_eq - _null_ ));
index 74dee731510556e4184af2eeb041e3cff9d32c06..f0bc0531771d87791fe68671afd05e9a50043c92 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.280 2006/07/21 20:51:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.281 2006/07/28 18:33:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -348,6 +348,18 @@ extern Datum float8_var_pop(PG_FUNCTION_ARGS);
 extern Datum float8_var_samp(PG_FUNCTION_ARGS);
 extern Datum float8_stddev_pop(PG_FUNCTION_ARGS);
 extern Datum float8_stddev_samp(PG_FUNCTION_ARGS);
+extern Datum float8_regr_accum(PG_FUNCTION_ARGS);
+extern Datum float8_regr_sxx(PG_FUNCTION_ARGS);
+extern Datum float8_regr_syy(PG_FUNCTION_ARGS);
+extern Datum float8_regr_sxy(PG_FUNCTION_ARGS);
+extern Datum float8_regr_avgx(PG_FUNCTION_ARGS);
+extern Datum float8_regr_avgy(PG_FUNCTION_ARGS);
+extern Datum float8_covar_pop(PG_FUNCTION_ARGS);
+extern Datum float8_covar_samp(PG_FUNCTION_ARGS);
+extern Datum float8_corr(PG_FUNCTION_ARGS);
+extern Datum float8_regr_r2(PG_FUNCTION_ARGS);
+extern Datum float8_regr_slope(PG_FUNCTION_ARGS);
+extern Datum float8_regr_intercept(PG_FUNCTION_ARGS);
 extern Datum float48pl(PG_FUNCTION_ARGS);
 extern Datum float48mi(PG_FUNCTION_ARGS);
 extern Datum float48mul(PG_FUNCTION_ARGS);
index 0fc5eb61a3a4523d13156118e4e959ea26fa791f..36cd9d12737fd8e2938ce73e70f2e8ef8b5c91c0 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/int8.h,v 1.44 2006/03/05 15:59:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/int8.h,v 1.45 2006/07/28 18:33:04 tgl Exp $
  *
  * NOTES
  * These data types are supported on all 64-bit architectures, and may
@@ -74,6 +74,8 @@ extern Datum int8div(PG_FUNCTION_ARGS);
 extern Datum int8abs(PG_FUNCTION_ARGS);
 extern Datum int8mod(PG_FUNCTION_ARGS);
 extern Datum int8inc(PG_FUNCTION_ARGS);
+extern Datum int8inc_any(PG_FUNCTION_ARGS);
+extern Datum int8inc_float8_float8(PG_FUNCTION_ARGS);
 extern Datum int8larger(PG_FUNCTION_ARGS);
 extern Datum int8smaller(PG_FUNCTION_ARGS);
 
index 3b0c0f467a9b9f6537c7902621d8d0d7fcd69de5..34b720f0f433974b234fb876a2421f2ea06cc8e8 100644 (file)
@@ -137,6 +137,61 @@ SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
           0 |            
 (1 row)
 
+-- SQL2003 binary aggregates
+SELECT regr_count(b, a) FROM aggtest;
+ regr_count 
+------------
+          4
+(1 row)
+
+SELECT regr_sxx(b, a) FROM aggtest;
+ regr_sxx 
+----------
+     5099
+(1 row)
+
+SELECT regr_syy(b, a) FROM aggtest;
+     regr_syy     
+------------------
+ 68756.2156939293
+(1 row)
+
+SELECT regr_sxy(b, a) FROM aggtest;
+     regr_sxy     
+------------------
+ 2614.51582155004
+(1 row)
+
+SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
+ regr_avgx |    regr_avgy     
+-----------+------------------
+      49.5 | 107.943152273074
+(1 row)
+
+SELECT regr_r2(b, a) FROM aggtest;
+      regr_r2       
+--------------------
+ 0.0194977982031803
+(1 row)
+
+SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
+    regr_slope     |  regr_intercept  
+-------------------+------------------
+ 0.512750700441271 | 82.5619926012309
+(1 row)
+
+SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
+    covar_pop    |    covar_samp    
+-----------------+------------------
+ 653.62895538751 | 871.505273850014
+(1 row)
+
+SELECT corr(b, a) FROM aggtest;
+       corr        
+-------------------
+ 0.139634516517873
+(1 row)
+
 SELECT count(four) AS cnt_1000 FROM onek;
  cnt_1000 
 ----------
index 5c905f55a54b3be0bbcd27e878b43bdd9bf6cce0..3ded3cc926cd32b4aab796e971cd5d7f8490af38 100644 (file)
@@ -66,15 +66,15 @@ WHERE p1.oid != p2.oid AND
 -- of the same internal function (ie, matching prosrc fields).  It's OK to
 -- have several entries with different pronames for the same internal function,
 -- but conflicts in the number of arguments and other critical items should
--- be complained of.
--- Ignore aggregates, since they all use "aggregate_dummy".
--- As of 8.2, this finds int8inc and int8inc_any, which are OK.
+-- be complained of.  (We don't check data types here; see next query.)
+-- Note: ignore aggregate functions here, since they all point to the same
+-- dummy built-in function.
 SELECT p1.oid, p1.proname, p2.oid, p2.proname
 FROM pg_proc AS p1, pg_proc AS p2
 WHERE p1.oid < p2.oid AND
     p1.prosrc = p2.prosrc AND
     p1.prolang = 12 AND p2.prolang = 12 AND
-    p1.proisagg = false AND p2.proisagg = false AND
+    (p1.proisagg = false OR p2.proisagg = false) AND
     (p1.prolang != p2.prolang OR
      p1.proisagg != p2.proisagg OR
      p1.prosecdef != p2.prosecdef OR
@@ -82,10 +82,9 @@ WHERE p1.oid < p2.oid AND
      p1.proretset != p2.proretset OR
      p1.provolatile != p2.provolatile OR
      p1.pronargs != p2.pronargs);
- oid  | proname | oid  |   proname   
-------+---------+------+-------------
- 1219 | int8inc | 2804 | int8inc_any
-(1 row)
+ oid | proname | oid | proname 
+-----+---------+-----+---------
+(0 rows)
 
 -- Look for uses of different type OIDs in the argument/result type fields
 -- for different aliases of the same built-in function.
index 1c2a604450026a9e456f48a8b0c2db75d30d2576..890aa8dea02d583d3112f46f778000091036494b 100644 (file)
@@ -39,6 +39,17 @@ SELECT var_samp(b::numeric) FROM aggtest;
 SELECT var_pop(1.0), var_samp(2.0);
 SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
 
+-- SQL2003 binary aggregates
+SELECT regr_count(b, a) FROM aggtest;
+SELECT regr_sxx(b, a) FROM aggtest;
+SELECT regr_syy(b, a) FROM aggtest;
+SELECT regr_sxy(b, a) FROM aggtest;
+SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
+SELECT regr_r2(b, a) FROM aggtest;
+SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
+SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
+SELECT corr(b, a) FROM aggtest;
+
 SELECT count(four) AS cnt_1000 FROM onek;
 SELECT count(DISTINCT four) AS cnt_4 FROM onek;
 
index 84d0ce93b5c2a54063786a8c0e747eda98841035..10498a5fa1b17460fd2e2ed003704cd997fd8f59 100644 (file)
@@ -68,17 +68,16 @@ WHERE p1.oid != p2.oid AND
 -- of the same internal function (ie, matching prosrc fields).  It's OK to
 -- have several entries with different pronames for the same internal function,
 -- but conflicts in the number of arguments and other critical items should
--- be complained of.
--- Ignore aggregates, since they all use "aggregate_dummy".
-
--- As of 8.2, this finds int8inc and int8inc_any, which are OK.
+-- be complained of.  (We don't check data types here; see next query.)
+-- Note: ignore aggregate functions here, since they all point to the same
+-- dummy built-in function.
 
 SELECT p1.oid, p1.proname, p2.oid, p2.proname
 FROM pg_proc AS p1, pg_proc AS p2
 WHERE p1.oid < p2.oid AND
     p1.prosrc = p2.prosrc AND
     p1.prolang = 12 AND p2.prolang = 12 AND
-    p1.proisagg = false AND p2.proisagg = false AND
+    (p1.proisagg = false OR p2.proisagg = false) AND
     (p1.prolang != p2.prolang OR
      p1.proisagg != p2.proisagg OR
      p1.prosecdef != p2.prosecdef OR