Improve the accuracy of floating point statistical aggregates.

author Dean Rasheed <dean.a.rasheed@gmail.com>

Sat, 6 Oct 2018 10:20:09 +0000 (11:20 +0100)

committer Dean Rasheed <dean.a.rasheed@gmail.com>

Sat, 6 Oct 2018 10:20:09 +0000 (11:20 +0100)
author Dean Rasheed <dean.a.rasheed@gmail.com>
Sat, 6 Oct 2018 10:20:09 +0000 (11:20 +0100)
committer Dean Rasheed <dean.a.rasheed@gmail.com>
Sat, 6 Oct 2018 10:20:09 +0000 (11:20 +0100)
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c

index df35557b7379876f6d57a91db872830ccabf0d1b..d1e12c14bedc87b401807b14ddc7a97bb8ab6447 100644 (file)
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -2401,13 +2401,39 @@ setseed(PG_FUNCTION_ARGS)
   *             float8_stddev_samp      - produce final result for float STDDEV_SAMP()
   *             float8_stddev_pop       - produce final result for float STDDEV_POP()
   *
+ * The naive schoolbook implementation of these aggregates works by
+ * accumulating sum(X) and sum(X^2).  However, this approach suffers from
+ * large rounding errors in the final computation of quantities like the
+ * population variance (N*sum(X^2) - sum(X)^2) / N^2, since each of the
+ * intermediate terms is potentially very large, while the difference is often
+ * quite small.
+ *
+ * Instead we use the Youngs-Cramer algorithm [1] which works by accumulating
+ * Sx=sum(X) and Sxx=sum((X-Sx/N)^2), using a numerically stable algorithm to
+ * incrementally update those quantities.  The final computations of each of
+ * the aggregate values is then trivial and gives more accurate results (for
+ * example, the population variance is just Sxx/N).  This algorithm is also
+ * fairly easy to generalize to allow parallel execution without loss of
+ * precision (see, for example, [2]).  For more details, and a comparison of
+ * this with other algorithms, see [3].
+ *
   * The transition datatype for all these aggregates is a 3-element array
- * of float8, holding the values N, sum(X), sum(X*X) in that order.
+ * of float8, holding the values N, Sx, Sxx in that order.
   *
   * Note that we represent N as a float to avoid having to build a special
   * datatype.  Given a reasonable floating-point implementation, there should
   * be no accuracy loss unless N exceeds 2 ^ 52 or so (by which time the
   * user will have doubtless lost interest anyway...)
+ *
+ * [1] Some Results Relevant to Choice of Sum and Sum-of-Product Algorithms,
+ * E. A. Youngs and E. M. Cramer, Technometrics Vol 13, No 3, August 1971.
+ *
+ * [2] Updating Formulae and a Pairwise Algorithm for Computing Sample
+ * Variances, T. F. Chan, G. H. Golub & R. J. LeVeque, COMPSTAT 1982.
+ *
+ * [3] Numerically Stable Parallel Computation of (Co-)Variance, Erich
+ * Schubert and Michael Gertz, Proceedings of the 30th International
+ * Conference on Scientific and Statistical Database Management, 2018.
   */
  
  static float8 *
@@ -2441,18 +2467,90 @@ float8_combine(PG_FUNCTION_ARGS)
         ArrayType  *transarray2 = PG_GETARG_ARRAYTYPE_P(1);
         float8     *transvalues1;
         float8     *transvalues2;
-
-       if (!AggCheckCallContext(fcinfo, NULL))
-               elog(ERROR, "aggregate function called in non-aggregate context");
+       float8          N1,
+                               Sx1,
+                               Sxx1,
+                               N2,
+                               Sx2,
+                               Sxx2,
+                               tmp,
+                               N,
+                               Sx,
+                               Sxx;
  
         transvalues1 = check_float8_array(transarray1, "float8_combine", 3);
         transvalues2 = check_float8_array(transarray2, "float8_combine", 3);
  
-       transvalues1[0] = transvalues1[0] + transvalues2[0];
-       transvalues1[1] = float8_pl(transvalues1[1], transvalues2[1]);
-       transvalues1[2] = float8_pl(transvalues1[2], transvalues2[2]);
+       N1 = transvalues1[0];
+       Sx1 = transvalues1[1];
+       Sxx1 = transvalues1[2];
+
+       N2 = transvalues2[0];
+       Sx2 = transvalues2[1];
+       Sxx2 = transvalues2[2];
+
+       /*--------------------
+        * The transition values combine using a generalization of the
+        * Youngs-Cramer algorithm as follows:
+        *
+        *      N = N1 + N2
+        *      Sx = Sx1 + Sx2
+        *      Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N;
+        *
+        * It's worth handling the special cases N1 = 0 and N2 = 0 separately
+        * since those cases are trivial, and we then don't need to worry about
+        * division-by-zero errors in the general case.
+        *--------------------
+        */
+       if (N1 == 0.0)
+       {
+               N = N2;
+               Sx = Sx2;
+               Sxx = Sxx2;
+       }
+       else if (N2 == 0.0)
+       {
+               N = N1;
+               Sx = Sx1;
+               Sxx = Sxx1;
+       }
+       else
+       {
+               N = N1 + N2;
+               Sx = float8_pl(Sx1, Sx2);
+               tmp = Sx1 / N1 - Sx2 / N2;
+               Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp * tmp / N;
+               check_float8_val(Sxx, isinf(Sxx1) || isinf(Sxx2), true);
+       }
+
+       /*
+        * If we're invoked as an aggregate, we can cheat and modify our first
+        * parameter in-place to reduce palloc overhead. Otherwise we construct a
+        * new array with the updated transition data and return it.
+        */
+       if (AggCheckCallContext(fcinfo, NULL))
+       {
+               transvalues1[0] = N;
+               transvalues1[1] = Sx;
+               transvalues1[2] = Sxx;
+
+               PG_RETURN_ARRAYTYPE_P(transarray1);
+       }
+       else
+       {
+               Datum           transdatums[3];
+               ArrayType  *result;
+
+               transdatums[0] = Float8GetDatumFast(N);
+               transdatums[1] = Float8GetDatumFast(Sx);
+               transdatums[2] = Float8GetDatumFast(Sxx);
+
+               result = construct_array(transdatums, 3,
+                                                                FLOAT8OID,
+                                                                sizeof(float8), FLOAT8PASSBYVAL, 'd');
  
-       PG_RETURN_ARRAYTYPE_P(transarray1);
+               PG_RETURN_ARRAYTYPE_P(result);
+       }
  }
  
  Datum
@@ -2461,8 +2559,43 @@ float8_accum(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8          newval = PG_GETARG_FLOAT8(1);
         float8     *transvalues;
+       float8          N,
+                               Sx,
+                               Sxx,
+                               tmp;
  
         transvalues = check_float8_array(transarray, "float8_accum", 3);
+       N = transvalues[0];
+       Sx = transvalues[1];
+       Sxx = transvalues[2];
+
+       /*
+        * Use the Youngs-Cramer algorithm to incorporate the new value into the
+        * transition values.
+        */
+       N += 1.0;
+       Sx += newval;
+       if (transvalues[0] > 0.0)
+       {
+               tmp = newval * N - Sx;
+               Sxx += tmp * tmp / (N * transvalues[0]);
+
+               /*
+                * Overflow check.  We only report an overflow error when finite
+                * inputs lead to infinite results.  Note also that Sxx should be NaN
+                * if any of the inputs are infinite, so we intentionally prevent Sxx
+                * from becoming infinite.
+                */
+               if (isinf(Sx) || isinf(Sxx))
+               {
+                       if (!isinf(transvalues[1]) && !isinf(newval))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                                                errmsg("value out of range: overflow")));
+
+                       Sxx = get_float8_nan();
+               }
+       }
  
         /*
          * If we're invoked as an aggregate, we can cheat and modify our first
@@ -2471,9 +2604,9 @@ float8_accum(PG_FUNCTION_ARGS)
          */
         if (AggCheckCallContext(fcinfo, NULL))
         {
-               transvalues[0] = transvalues[0] + 1.0;
-               transvalues[1] = float8_pl(transvalues[1], newval);
-               transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval));
+               transvalues[0] = N;
+               transvalues[1] = Sx;
+               transvalues[2] = Sxx;
  
                 PG_RETURN_ARRAYTYPE_P(transarray);
         }
@@ -2482,9 +2615,9 @@ float8_accum(PG_FUNCTION_ARGS)
                 Datum           transdatums[3];
                 ArrayType  *result;
  
-               transvalues[0] = transvalues[0] + 1.0;
-               transvalues[1] = float8_pl(transvalues[1], newval);
-               transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval));
+               transdatums[0] = Float8GetDatumFast(N);
+               transdatums[1] = Float8GetDatumFast(Sx);
+               transdatums[2] = Float8GetDatumFast(Sxx);
  
                 result = construct_array(transdatums, 3,
                                                                  FLOAT8OID,
@@ -2502,8 +2635,43 @@ float4_accum(PG_FUNCTION_ARGS)
         /* do computations as float8 */
         float8          newval = PG_GETARG_FLOAT4(1);
         float8     *transvalues;
+       float8          N,
+                               Sx,
+                               Sxx,
+                               tmp;
  
         transvalues = check_float8_array(transarray, "float4_accum", 3);
+       N = transvalues[0];
+       Sx = transvalues[1];
+       Sxx = transvalues[2];
+
+       /*
+        * Use the Youngs-Cramer algorithm to incorporate the new value into the
+        * transition values.
+        */
+       N += 1.0;
+       Sx += newval;
+       if (transvalues[0] > 0.0)
+       {
+               tmp = newval * N - Sx;
+               Sxx += tmp * tmp / (N * transvalues[0]);
+
+               /*
+                * Overflow check.  We only report an overflow error when finite
+                * inputs lead to infinite results.  Note also that Sxx should be NaN
+                * if any of the inputs are infinite, so we intentionally prevent Sxx
+                * from becoming infinite.
+                */
+               if (isinf(Sx) || isinf(Sxx))
+               {
+                       if (!isinf(transvalues[1]) && !isinf(newval))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                                                errmsg("value out of range: overflow")));
+
+                       Sxx = get_float8_nan();
+               }
+       }
  
         /*
          * If we're invoked as an aggregate, we can cheat and modify our first
@@ -2512,9 +2680,9 @@ float4_accum(PG_FUNCTION_ARGS)
          */
         if (AggCheckCallContext(fcinfo, NULL))
         {
-               transvalues[0] = transvalues[0] + 1.0;
-               transvalues[1] = float8_pl(transvalues[1], newval);
-               transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval));
+               transvalues[0] = N;
+               transvalues[1] = Sx;
+               transvalues[2] = Sxx;
  
                 PG_RETURN_ARRAYTYPE_P(transarray);
         }
@@ -2523,9 +2691,9 @@ float4_accum(PG_FUNCTION_ARGS)
                 Datum           transdatums[3];
                 ArrayType  *result;
  
-               transvalues[0] = transvalues[0] + 1.0;
-               transvalues[1] = float8_pl(transvalues[1], newval);
-               transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval));
+               transdatums[0] = Float8GetDatumFast(N);
+               transdatums[1] = Float8GetDatumFast(Sx);
+               transdatums[2] = Float8GetDatumFast(Sxx);
  
                 result = construct_array(transdatums, 3,
                                                                  FLOAT8OID,
@@ -2541,18 +2709,18 @@ float8_avg(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX;
+                               Sx;
  
         transvalues = check_float8_array(transarray, "float8_avg", 3);
         N = transvalues[0];
-       sumX = transvalues[1];
-       /* ignore sumX2 */
+       Sx = transvalues[1];
+       /* ignore Sxx */
  
         /* SQL defines AVG of no values to be NULL */
         if (N == 0.0)
                 PG_RETURN_NULL();
  
-       PG_RETURN_FLOAT8(sumX / N);
+       PG_RETURN_FLOAT8(Sx / N);
  }
  
  Datum
@@ -2561,27 +2729,20 @@ float8_var_pop(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumX2,
-                               numerator;
+                               Sxx;
  
         transvalues = check_float8_array(transarray, "float8_var_pop", 3);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumX2 = transvalues[2];
+       /* ignore Sx */
+       Sxx = transvalues[2];
  
         /* Population variance is undefined when N is 0, so return NULL */
         if (N == 0.0)
                 PG_RETURN_NULL();
  
-       numerator = N * sumX2 - sumX * sumX;
-       check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
-
-       /* Watch out for roundoff error producing a negative numerator */
-       if (numerator <= 0.0)
-               PG_RETURN_FLOAT8(0.0);
+       /* Note that Sxx is guaranteed to be non-negative */
  
-       PG_RETURN_FLOAT8(numerator / (N * N));
+       PG_RETURN_FLOAT8(Sxx / N);
  }
  
  Datum
@@ -2590,27 +2751,20 @@ float8_var_samp(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumX2,
-                               numerator;
+                               Sxx;
  
         transvalues = check_float8_array(transarray, "float8_var_samp", 3);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumX2 = transvalues[2];
+       /* ignore Sx */
+       Sxx = transvalues[2];
  
         /* Sample variance is undefined when N is 0 or 1, so return NULL */
         if (N <= 1.0)
                 PG_RETURN_NULL();
  
-       numerator = N * sumX2 - sumX * sumX;
-       check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
+       /* Note that Sxx is guaranteed to be non-negative */
  
-       /* Watch out for roundoff error producing a negative numerator */
-       if (numerator <= 0.0)
-               PG_RETURN_FLOAT8(0.0);
-
-       PG_RETURN_FLOAT8(numerator / (N * (N - 1.0)));
+       PG_RETURN_FLOAT8(Sxx / (N - 1.0));
  }
  
  Datum
@@ -2619,27 +2773,20 @@ float8_stddev_pop(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumX2,
-                               numerator;
+                               Sxx;
  
         transvalues = check_float8_array(transarray, "float8_stddev_pop", 3);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumX2 = transvalues[2];
+       /* ignore Sx */
+       Sxx = transvalues[2];
  
         /* Population stddev is undefined when N is 0, so return NULL */
         if (N == 0.0)
                 PG_RETURN_NULL();
  
-       numerator = N * sumX2 - sumX * sumX;
-       check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
-
-       /* Watch out for roundoff error producing a negative numerator */
-       if (numerator <= 0.0)
-               PG_RETURN_FLOAT8(0.0);
+       /* Note that Sxx is guaranteed to be non-negative */
  
-       PG_RETURN_FLOAT8(sqrt(numerator / (N * N)));
+       PG_RETURN_FLOAT8(sqrt(Sxx / N));
  }
  
  Datum
@@ -2648,27 +2795,20 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumX2,
-                               numerator;
+                               Sxx;
  
         transvalues = check_float8_array(transarray, "float8_stddev_samp", 3);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumX2 = transvalues[2];
+       /* ignore Sx */
+       Sxx = transvalues[2];
  
         /* Sample stddev is undefined when N is 0 or 1, so return NULL */
         if (N <= 1.0)
                 PG_RETURN_NULL();
  
-       numerator = N * sumX2 - sumX * sumX;
-       check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
+       /* Note that Sxx is guaranteed to be non-negative */
  
-       /* Watch out for roundoff error producing a negative numerator */
-       if (numerator <= 0.0)
-               PG_RETURN_FLOAT8(0.0);
-
-       PG_RETURN_FLOAT8(sqrt(numerator / (N * (N - 1.0))));
+       PG_RETURN_FLOAT8(sqrt(Sxx / (N - 1.0)));
  }
  
  /*
@@ -2676,9 +2816,14 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
   *             SQL2003 BINARY AGGREGATES
   *             =========================
   *
+ * As with the preceding aggregates, we use the Youngs-Cramer algorithm to
+ * reduce rounding errors in the aggregate final functions.
+ *
   * The transition datatype for all these aggregates is a 6-element array of
- * float8, holding the values N, sum(X), sum(X*X), sum(Y), sum(Y*Y), sum(X*Y)
- * in that order.  Note that Y is the first argument to the aggregates!
+ * float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y),
+ * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order.
+ *
+ * Note that Y is the first argument to all these aggregates!
   *
   * It might seem attractive to optimize this by having multiple accumulator
   * functions that only calculate the sums actually needed.  But on most
@@ -2695,32 +2840,66 @@ float8_regr_accum(PG_FUNCTION_ARGS)
         float8          newvalX = PG_GETARG_FLOAT8(2);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumX2,
-                               sumY,
-                               sumY2,
-                               sumXY;
+                               Sx,
+                               Sxx,
+                               Sy,
+                               Syy,
+                               Sxy,
+                               tmpX,
+                               tmpY,
+                               scale;
  
         transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumX2 = transvalues[2];
-       sumY = transvalues[3];
-       sumY2 = transvalues[4];
-       sumXY = transvalues[5];
+       Sx = transvalues[1];
+       Sxx = transvalues[2];
+       Sy = transvalues[3];
+       Syy = transvalues[4];
+       Sxy = transvalues[5];
  
+       /*
+        * Use the Youngs-Cramer algorithm to incorporate the new values into the
+        * transition values.
+        */
         N += 1.0;
-       sumX += newvalX;
-       check_float8_val(sumX, isinf(transvalues[1]) || isinf(newvalX), true);
-       sumX2 += newvalX * newvalX;
-       check_float8_val(sumX2, isinf(transvalues[2]) || isinf(newvalX), true);
-       sumY += newvalY;
-       check_float8_val(sumY, isinf(transvalues[3]) || isinf(newvalY), true);
-       sumY2 += newvalY * newvalY;
-       check_float8_val(sumY2, isinf(transvalues[4]) || isinf(newvalY), true);
-       sumXY += newvalX * newvalY;
-       check_float8_val(sumXY, isinf(transvalues[5]) || isinf(newvalX) ||
-                                        isinf(newvalY), true);
+       Sx += newvalX;
+       Sy += newvalY;
+       if (transvalues[0] > 0.0)
+       {
+               tmpX = newvalX * N - Sx;
+               tmpY = newvalY * N - Sy;
+               scale = 1.0 / (N * transvalues[0]);
+               Sxx += tmpX * tmpX * scale;
+               Syy += tmpY * tmpY * scale;
+               Sxy += tmpX * tmpY * scale;
+
+               /*
+                * Overflow check.  We only report an overflow error when finite
+                * inputs lead to infinite results.  Note also that Sxx, Syy and Sxy
+                * should be NaN if any of the relevant inputs are infinite, so we
+                * intentionally prevent them from becoming infinite.
+                */
+               if (isinf(Sx) || isinf(Sxx) || isinf(Sy) || isinf(Syy) || isinf(Sxy))
+               {
+                       if (((isinf(Sx) || isinf(Sxx)) &&
+                                !isinf(transvalues[1]) && !isinf(newvalX)) ||
+                               ((isinf(Sy) || isinf(Syy)) &&
+                                !isinf(transvalues[3]) && !isinf(newvalY)) ||
+                               (isinf(Sxy) &&
+                                !isinf(transvalues[1]) && !isinf(newvalX) &&
+                                !isinf(transvalues[3]) && !isinf(newvalY)))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                                                errmsg("value out of range: overflow")));
+
+                       if (isinf(Sxx))
+                               Sxx = get_float8_nan();
+                       if (isinf(Syy))
+                               Syy = get_float8_nan();
+                       if (isinf(Sxy))
+                               Sxy = get_float8_nan();
+               }
+       }
  
         /*
          * If we're invoked as an aggregate, we can cheat and modify our first
@@ -2730,11 +2909,11 @@ float8_regr_accum(PG_FUNCTION_ARGS)
         if (AggCheckCallContext(fcinfo, NULL))
         {
                 transvalues[0] = N;
-               transvalues[1] = sumX;
-               transvalues[2] = sumX2;
-               transvalues[3] = sumY;
-               transvalues[4] = sumY2;
-               transvalues[5] = sumXY;
+               transvalues[1] = Sx;
+               transvalues[2] = Sxx;
+               transvalues[3] = Sy;
+               transvalues[4] = Syy;
+               transvalues[5] = Sxy;
  
                 PG_RETURN_ARRAYTYPE_P(transarray);
         }
@@ -2744,11 +2923,11 @@ float8_regr_accum(PG_FUNCTION_ARGS)
                 ArrayType  *result;
  
                 transdatums[0] = Float8GetDatumFast(N);
-               transdatums[1] = Float8GetDatumFast(sumX);
-               transdatums[2] = Float8GetDatumFast(sumX2);
-               transdatums[3] = Float8GetDatumFast(sumY);
-               transdatums[4] = Float8GetDatumFast(sumY2);
-               transdatums[5] = Float8GetDatumFast(sumXY);
+               transdatums[1] = Float8GetDatumFast(Sx);
+               transdatums[2] = Float8GetDatumFast(Sxx);
+               transdatums[3] = Float8GetDatumFast(Sy);
+               transdatums[4] = Float8GetDatumFast(Syy);
+               transdatums[5] = Float8GetDatumFast(Sxy);
  
                 result = construct_array(transdatums, 6,
                                                                  FLOAT8OID,
@@ -2773,21 +2952,127 @@ float8_regr_combine(PG_FUNCTION_ARGS)
         ArrayType  *transarray2 = PG_GETARG_ARRAYTYPE_P(1);
         float8     *transvalues1;
         float8     *transvalues2;
-
-       if (!AggCheckCallContext(fcinfo, NULL))
-               elog(ERROR, "aggregate function called in non-aggregate context");
+       float8          N1,
+                               Sx1,
+                               Sxx1,
+                               Sy1,
+                               Syy1,
+                               Sxy1,
+                               N2,
+                               Sx2,
+                               Sxx2,
+                               Sy2,
+                               Syy2,
+                               Sxy2,
+                               tmp1,
+                               tmp2,
+                               N,
+                               Sx,
+                               Sxx,
+                               Sy,
+                               Syy,
+                               Sxy;
  
         transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6);
         transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6);
  
-       transvalues1[0] = transvalues1[0] + transvalues2[0];
-       transvalues1[1] = float8_pl(transvalues1[1], transvalues2[1]);
-       transvalues1[2] = float8_pl(transvalues1[2], transvalues2[2]);
-       transvalues1[3] = float8_pl(transvalues1[3], transvalues2[3]);
-       transvalues1[4] = float8_pl(transvalues1[4], transvalues2[4]);
-       transvalues1[5] = float8_pl(transvalues1[5], transvalues2[5]);
+       N1 = transvalues1[0];
+       Sx1 = transvalues1[1];
+       Sxx1 = transvalues1[2];
+       Sy1 = transvalues1[3];
+       Syy1 = transvalues1[4];
+       Sxy1 = transvalues1[5];
+
+       N2 = transvalues2[0];
+       Sx2 = transvalues2[1];
+       Sxx2 = transvalues2[2];
+       Sy2 = transvalues2[3];
+       Syy2 = transvalues2[4];
+       Sxy2 = transvalues2[5];
+
+       /*--------------------
+        * The transition values combine using a generalization of the
+        * Youngs-Cramer algorithm as follows:
+        *
+        *      N = N1 + N2
+        *      Sx = Sx1 + Sx2
+        *      Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N
+        *      Sy = Sy1 + Sy2
+        *      Syy = Syy1 + Syy2 + N1 * N2 * (Sy1/N1 - Sy2/N2)^2 / N
+        *      Sxy = Sxy1 + Sxy2 + N1 * N2 * (Sx1/N1 - Sx2/N2) * (Sy1/N1 - Sy2/N2) / N
+        *
+        * It's worth handling the special cases N1 = 0 and N2 = 0 separately
+        * since those cases are trivial, and we then don't need to worry about
+        * division-by-zero errors in the general case.
+        *--------------------
+        */
+       if (N1 == 0.0)
+       {
+               N = N2;
+               Sx = Sx2;
+               Sxx = Sxx2;
+               Sy = Sy2;
+               Syy = Syy2;
+               Sxy = Sxy2;
+       }
+       else if (N2 == 0.0)
+       {
+               N = N1;
+               Sx = Sx1;
+               Sxx = Sxx1;
+               Sy = Sy1;
+               Syy = Syy1;
+               Sxy = Sxy1;
+       }
+       else
+       {
+               N = N1 + N2;
+               Sx = float8_pl(Sx1, Sx2);
+               tmp1 = Sx1 / N1 - Sx2 / N2;
+               Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp1 * tmp1 / N;
+               check_float8_val(Sxx, isinf(Sxx1) || isinf(Sxx2), true);
+               Sy = float8_pl(Sy1, Sy2);
+               tmp2 = Sy1 / N1 - Sy2 / N2;
+               Syy = Syy1 + Syy2 + N1 * N2 * tmp2 * tmp2 / N;
+               check_float8_val(Syy, isinf(Syy1) || isinf(Syy2), true);
+               Sxy = Sxy1 + Sxy2 + N1 * N2 * tmp1 * tmp2 / N;
+               check_float8_val(Sxy, isinf(Sxy1) || isinf(Sxy2), true);
+       }
+
+       /*
+        * If we're invoked as an aggregate, we can cheat and modify our first
+        * parameter in-place to reduce palloc overhead. Otherwise we construct a
+        * new array with the updated transition data and return it.
+        */
+       if (AggCheckCallContext(fcinfo, NULL))
+       {
+               transvalues1[0] = N;
+               transvalues1[1] = Sx;
+               transvalues1[2] = Sxx;
+               transvalues1[3] = Sy;
+               transvalues1[4] = Syy;
+               transvalues1[5] = Sxy;
+
+               PG_RETURN_ARRAYTYPE_P(transarray1);
+       }
+       else
+       {
+               Datum           transdatums[6];
+               ArrayType  *result;
+
+               transdatums[0] = Float8GetDatumFast(N);
+               transdatums[1] = Float8GetDatumFast(Sx);
+               transdatums[2] = Float8GetDatumFast(Sxx);
+               transdatums[3] = Float8GetDatumFast(Sy);
+               transdatums[4] = Float8GetDatumFast(Syy);
+               transdatums[5] = Float8GetDatumFast(Sxy);
  
-       PG_RETURN_ARRAYTYPE_P(transarray1);
+               result = construct_array(transdatums, 6,
+                                                                FLOAT8OID,
+                                                                sizeof(float8), FLOAT8PASSBYVAL, 'd');
+
+               PG_RETURN_ARRAYTYPE_P(result);
+       }
  }
  
  
@@ -2797,27 +3082,19 @@ float8_regr_sxx(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumX2,
-                               numerator;
+                               Sxx;
  
         transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumX2 = transvalues[2];
+       Sxx = transvalues[2];
  
         /* if N is 0 we should return NULL */
         if (N < 1.0)
                 PG_RETURN_NULL();
  
-       numerator = N * sumX2 - sumX * sumX;
-       check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
+       /* Note that Sxx is guaranteed to be non-negative */
  
-       /* Watch out for roundoff error producing a negative numerator */
-       if (numerator <= 0.0)
-               PG_RETURN_FLOAT8(0.0);
-
-       PG_RETURN_FLOAT8(numerator / N);
+       PG_RETURN_FLOAT8(Sxx);
  }
  
  Datum
@@ -2826,27 +3103,19 @@ float8_regr_syy(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumY,
-                               sumY2,
-                               numerator;
+                               Syy;
  
         transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
         N = transvalues[0];
-       sumY = transvalues[3];
-       sumY2 = transvalues[4];
+       Syy = transvalues[4];
  
         /* if N is 0 we should return NULL */
         if (N < 1.0)
                 PG_RETURN_NULL();
  
-       numerator = N * sumY2 - sumY * sumY;
-       check_float8_val(numerator, isinf(sumY2) || isinf(sumY), true);
-
-       /* Watch out for roundoff error producing a negative numerator */
-       if (numerator <= 0.0)
-               PG_RETURN_FLOAT8(0.0);
+       /* Note that Syy is guaranteed to be non-negative */
  
-       PG_RETURN_FLOAT8(numerator / N);
+       PG_RETURN_FLOAT8(Syy);
  }
  
  Datum
@@ -2855,28 +3124,19 @@ float8_regr_sxy(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumY,
-                               sumXY,
-                               numerator;
+                               Sxy;
  
         transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumY = transvalues[3];
-       sumXY = transvalues[5];
+       Sxy = transvalues[5];
  
         /* if N is 0 we should return NULL */
         if (N < 1.0)
                 PG_RETURN_NULL();
  
-       numerator = N * sumXY - sumX * sumY;
-       check_float8_val(numerator, isinf(sumXY) || isinf(sumX) ||
-                                        isinf(sumY), true);
-
         /* A negative result is valid here */
  
-       PG_RETURN_FLOAT8(numerator / N);
+       PG_RETURN_FLOAT8(Sxy);
  }
  
  Datum
@@ -2885,17 +3145,17 @@ float8_regr_avgx(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX;
+                               Sx;
  
         transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
         N = transvalues[0];
-       sumX = transvalues[1];
+       Sx = transvalues[1];
  
         /* if N is 0 we should return NULL */
         if (N < 1.0)
                 PG_RETURN_NULL();
  
-       PG_RETURN_FLOAT8(sumX / N);
+       PG_RETURN_FLOAT8(Sx / N);
  }
  
  Datum
@@ -2904,17 +3164,17 @@ float8_regr_avgy(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumY;
+                               Sy;
  
         transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
         N = transvalues[0];
-       sumY = transvalues[3];
+       Sy = transvalues[3];
  
         /* if N is 0 we should return NULL */
         if (N < 1.0)
                 PG_RETURN_NULL();
  
-       PG_RETURN_FLOAT8(sumY / N);
+       PG_RETURN_FLOAT8(Sy / N);
  }
  
  Datum
@@ -2923,26 +3183,17 @@ float8_covar_pop(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumY,
-                               sumXY,
-                               numerator;
+                               Sxy;
  
         transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumY = transvalues[3];
-       sumXY = transvalues[5];
+       Sxy = transvalues[5];
  
         /* if N is 0 we should return NULL */
         if (N < 1.0)
                 PG_RETURN_NULL();
  
-       numerator = N * sumXY - sumX * sumY;
-       check_float8_val(numerator, isinf(sumXY) || isinf(sumX) ||
-                                        isinf(sumY), true);
-
-       PG_RETURN_FLOAT8(numerator / (N * N));
+       PG_RETURN_FLOAT8(Sxy / N);
  }
  
  Datum
@@ -2951,26 +3202,17 @@ float8_covar_samp(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumY,
-                               sumXY,
-                               numerator;
+                               Sxy;
  
         transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumY = transvalues[3];
-       sumXY = transvalues[5];
+       Sxy = transvalues[5];
  
         /* if N is <= 1 we should return NULL */
         if (N < 2.0)
                 PG_RETURN_NULL();
  
-       numerator = N * sumXY - sumX * sumY;
-       check_float8_val(numerator, isinf(sumXY) || isinf(sumX) ||
-                                        isinf(sumY), true);
-
-       PG_RETURN_FLOAT8(numerator / (N * (N - 1.0)));
+       PG_RETURN_FLOAT8(Sxy / (N - 1.0));
  }
  
  Datum
@@ -2979,38 +3221,27 @@ float8_corr(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumX2,
-                               sumY,
-                               sumY2,
-                               sumXY,
-                               numeratorX,
-                               numeratorY,
-                               numeratorXY;
+                               Sxx,
+                               Syy,
+                               Sxy;
  
         transvalues = check_float8_array(transarray, "float8_corr", 6);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumX2 = transvalues[2];
-       sumY = transvalues[3];
-       sumY2 = transvalues[4];
-       sumXY = transvalues[5];
+       Sxx = transvalues[2];
+       Syy = transvalues[4];
+       Sxy = transvalues[5];
  
         /* if N is 0 we should return NULL */
         if (N < 1.0)
                 PG_RETURN_NULL();
  
-       numeratorX = N * sumX2 - sumX * sumX;
-       check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
-       numeratorY = N * sumY2 - sumY * sumY;
-       check_float8_val(numeratorY, isinf(sumY2) || isinf(sumY), true);
-       numeratorXY = N * sumXY - sumX * sumY;
-       check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) ||
-                                        isinf(sumY), true);
-       if (numeratorX <= 0 || numeratorY <= 0)
+       /* Note that Sxx and Syy are guaranteed to be non-negative */
+
+       /* per spec, return NULL for horizontal and vertical lines */
+       if (Sxx == 0 || Syy == 0)
                 PG_RETURN_NULL();
  
-       PG_RETURN_FLOAT8(numeratorXY / sqrt(numeratorX * numeratorY));
+       PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy));
  }
  
  Datum
@@ -3019,42 +3250,31 @@ float8_regr_r2(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumX2,
-                               sumY,
-                               sumY2,
-                               sumXY,
-                               numeratorX,
-                               numeratorY,
-                               numeratorXY;
+                               Sxx,
+                               Syy,
+                               Sxy;
  
         transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumX2 = transvalues[2];
-       sumY = transvalues[3];
-       sumY2 = transvalues[4];
-       sumXY = transvalues[5];
+       Sxx = transvalues[2];
+       Syy = transvalues[4];
+       Sxy = transvalues[5];
  
         /* if N is 0 we should return NULL */
         if (N < 1.0)
                 PG_RETURN_NULL();
  
-       numeratorX = N * sumX2 - sumX * sumX;
-       check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
-       numeratorY = N * sumY2 - sumY * sumY;
-       check_float8_val(numeratorY, isinf(sumY2) || isinf(sumY), true);
-       numeratorXY = N * sumXY - sumX * sumY;
-       check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) ||
-                                        isinf(sumY), true);
-       if (numeratorX <= 0)
+       /* Note that Sxx and Syy are guaranteed to be non-negative */
+
+       /* per spec, return NULL for a vertical line */
+       if (Sxx == 0)
                 PG_RETURN_NULL();
-       /* per spec, horizontal line produces 1.0 */
-       if (numeratorY <= 0)
+
+       /* per spec, return 1.0 for a horizontal line */
+       if (Syy == 0)
                 PG_RETURN_FLOAT8(1.0);
  
-       PG_RETURN_FLOAT8((numeratorXY * numeratorXY) /
-                                        (numeratorX * numeratorY));
+       PG_RETURN_FLOAT8((Sxy * Sxy) / (Sxx * Syy));
  }
  
  Datum
@@ -3063,33 +3283,25 @@ float8_regr_slope(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumX2,
-                               sumY,
-                               sumXY,
-                               numeratorX,
-                               numeratorXY;
+                               Sxx,
+                               Sxy;
  
         transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumX2 = transvalues[2];
-       sumY = transvalues[3];
-       sumXY = transvalues[5];
+       Sxx = transvalues[2];
+       Sxy = transvalues[5];
  
         /* if N is 0 we should return NULL */
         if (N < 1.0)
                 PG_RETURN_NULL();
  
-       numeratorX = N * sumX2 - sumX * sumX;
-       check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
-       numeratorXY = N * sumXY - sumX * sumY;
-       check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) ||
-                                        isinf(sumY), true);
-       if (numeratorX <= 0)
+       /* Note that Sxx is guaranteed to be non-negative */
+
+       /* per spec, return NULL for a vertical line */
+       if (Sxx == 0)
                 PG_RETURN_NULL();
  
-       PG_RETURN_FLOAT8(numeratorXY / numeratorX);
+       PG_RETURN_FLOAT8(Sxy / Sxx);
  }
  
  Datum
@@ -3098,33 +3310,29 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
         ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
         float8     *transvalues;
         float8          N,
-                               sumX,
-                               sumX2,
-                               sumY,
-                               sumXY,
-                               numeratorX,
-                               numeratorXXY;
+                               Sx,
+                               Sxx,
+                               Sy,
+                               Sxy;
  
         transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
         N = transvalues[0];
-       sumX = transvalues[1];
-       sumX2 = transvalues[2];
-       sumY = transvalues[3];
-       sumXY = transvalues[5];
+       Sx = transvalues[1];
+       Sxx = transvalues[2];
+       Sy = transvalues[3];
+       Sxy = transvalues[5];
  
         /* if N is 0 we should return NULL */
         if (N < 1.0)
                 PG_RETURN_NULL();
  
-       numeratorX = N * sumX2 - sumX * sumX;
-       check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
-       numeratorXXY = sumY * sumX2 - sumX * sumXY;
-       check_float8_val(numeratorXXY, isinf(sumY) || isinf(sumX2) ||
-                                        isinf(sumX) || isinf(sumXY), true);
-       if (numeratorX <= 0)
+       /* Note that Sxx is guaranteed to be non-negative */
+
+       /* per spec, return NULL for a vertical line */
+       if (Sxx == 0)
                 PG_RETURN_NULL();
  
-       PG_RETURN_FLOAT8(numeratorXXY / numeratorX);
+       PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N);
  }
  
  
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index 5e21622754271f9bf29ea1846fb6d73ec896e770..717e965f306b090bb343f28353c37461e628678a 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -198,6 +198,50 @@ select avg('NaN'::numeric) from generate_series(1,3);
   NaN
  (1 row)
  
+-- verify correct results for infinite inputs
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('1'), ('infinity')) v(x);
+   avg    | var_pop 
+----------+---------
+ Infinity |     NaN
+(1 row)
+
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('infinity'), ('1')) v(x);
+   avg    | var_pop 
+----------+---------
+ Infinity |     NaN
+(1 row)
+
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('infinity'), ('infinity')) v(x);
+   avg    | var_pop 
+----------+---------
+ Infinity |     NaN
+(1 row)
+
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('-infinity'), ('infinity')) v(x);
+ avg | var_pop 
+-----+---------
+ NaN |     NaN
+(1 row)
+
+-- test accuracy with a large input offset
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x);
+    avg    | var_pop 
+-----------+---------
+ 100000005 |     2.5
+(1 row)
+
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES (7000000000005), (7000000000007)) v(x);
+      avg      | var_pop 
+---------------+---------
+ 7000000000006 |       1
+(1 row)
+
  -- SQL2003 binary aggregates
  SELECT regr_count(b, a) FROM aggtest;
   regr_count 
@@ -253,6 +297,90 @@ SELECT corr(b, a) FROM aggtest;
   0.139634516517873
  (1 row)
  
+-- test accum and combine functions directly
+CREATE TABLE regr_test (x float8, y float8);
+INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30,80);
+ count | sum | regr_sxx | sum  | regr_syy | regr_sxy 
+-------+-----+----------+------+----------+----------
+     4 | 140 |     2900 | 1290 |    83075 |    15050
+(1 row)
+
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test;
+ count | sum | regr_sxx | sum  | regr_syy | regr_sxy 
+-------+-----+----------+------+----------+----------
+     5 | 240 |     6280 | 1490 |    95080 |     8680
+(1 row)
+
+SELECT float8_accum('{4,140,2900}'::float8[], 100);
+ float8_accum 
+--------------
+ {5,240,6280}
+(1 row)
+
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
+      float8_regr_accum       
+------------------------------
+ {5,240,6280,1490,95080,8680}
+(1 row)
+
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30);
+ count | sum | regr_sxx | sum | regr_syy | regr_sxy 
+-------+-----+----------+-----+----------+----------
+     3 |  60 |      200 | 750 |    20000 |     2000
+(1 row)
+
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (80,100);
+ count | sum | regr_sxx | sum | regr_syy | regr_sxy 
+-------+-----+----------+-----+----------+----------
+     2 | 180 |      200 | 740 |    57800 |    -3400
+(1 row)
+
+SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]);
+ float8_combine 
+----------------
+ {3,60,200}
+(1 row)
+
+SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
+ float8_combine 
+----------------
+ {2,180,200}
+(1 row)
+
+SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
+ float8_combine 
+----------------
+ {5,240,6280}
+(1 row)
+
+SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
+                           '{0,0,0,0,0,0}'::float8[]);
+    float8_regr_combine    
+---------------------------
+ {3,60,200,750,20000,2000}
+(1 row)
+
+SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
+                           '{2,180,200,740,57800,-3400}'::float8[]);
+     float8_regr_combine     
+-----------------------------
+ {2,180,200,740,57800,-3400}
+(1 row)
+
+SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
+                           '{2,180,200,740,57800,-3400}'::float8[]);
+     float8_regr_combine      
+------------------------------
+ {5,240,6280,1490,95080,8680}
+(1 row)
+
+DROP TABLE regr_test;
+-- test count, distinct
  SELECT count(four) AS cnt_1000 FROM onek;
   cnt_1000 
  ----------
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index 7e77467ecd47440c004a27310771af3093dddea8..83a5492fdda7caf792a281254b7f9c566c5c357b 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -51,6 +51,22 @@ select avg(null::float8) from generate_series(1,3);
  select sum('NaN'::numeric) from generate_series(1,3);
  select avg('NaN'::numeric) from generate_series(1,3);
  
+-- verify correct results for infinite inputs
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('1'), ('infinity')) v(x);
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('infinity'), ('1')) v(x);
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('infinity'), ('infinity')) v(x);
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('-infinity'), ('infinity')) v(x);
+
+-- test accuracy with a large input offset
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x);
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES (7000000000005), (7000000000007)) v(x);
+
  -- SQL2003 binary aggregates
  SELECT regr_count(b, a) FROM aggtest;
  SELECT regr_sxx(b, a) FROM aggtest;
@@ -62,6 +78,31 @@ SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
  SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
  SELECT corr(b, a) FROM aggtest;
  
+-- test accum and combine functions directly
+CREATE TABLE regr_test (x float8, y float8);
+INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30,80);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test;
+SELECT float8_accum('{4,140,2900}'::float8[], 100);
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (80,100);
+SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]);
+SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
+SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
+SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
+                           '{0,0,0,0,0,0}'::float8[]);
+SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
+                           '{2,180,200,740,57800,-3400}'::float8[]);
+SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
+                           '{2,180,200,740,57800,-3400}'::float8[]);
+DROP TABLE regr_test;
+
+-- test count, distinct
  SELECT count(four) AS cnt_1000 FROM onek;
  SELECT count(DISTINCT four) AS cnt_4 FROM onek;
author	Dean Rasheed <dean.a.rasheed@gmail.com>
	Sat, 6 Oct 2018 10:20:09 +0000 (11:20 +0100)
committer	Dean Rasheed <dean.a.rasheed@gmail.com>
	Sat, 6 Oct 2018 10:20:09 +0000 (11:20 +0100)
src/backend/utils/adt/float.c		patch \| blob \| history
src/test/regress/expected/aggregates.out		patch \| blob \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| history