* float8_stddev_samp - produce final result for float STDDEV_SAMP()
* float8_stddev_pop - produce final result for float STDDEV_POP()
*
+ * The naive schoolbook implementation of these aggregates works by
+ * accumulating sum(X) and sum(X^2). However, this approach suffers from
+ * large rounding errors in the final computation of quantities like the
+ * population variance (N*sum(X^2) - sum(X)^2) / N^2, since each of the
+ * intermediate terms is potentially very large, while the difference is often
+ * quite small.
+ *
+ * Instead we use the Youngs-Cramer algorithm [1] which works by accumulating
+ * Sx=sum(X) and Sxx=sum((X-Sx/N)^2), using a numerically stable algorithm to
+ * incrementally update those quantities. The final computations of each of
+ * the aggregate values is then trivial and gives more accurate results (for
+ * example, the population variance is just Sxx/N). This algorithm is also
+ * fairly easy to generalize to allow parallel execution without loss of
+ * precision (see, for example, [2]). For more details, and a comparison of
+ * this with other algorithms, see [3].
+ *
* The transition datatype for all these aggregates is a 3-element array
- * of float8, holding the values N, sum(X), sum(X*X) in that order.
+ * of float8, holding the values N, Sx, Sxx in that order.
*
* Note that we represent N as a float to avoid having to build a special
* datatype. Given a reasonable floating-point implementation, there should
* be no accuracy loss unless N exceeds 2 ^ 52 or so (by which time the
* user will have doubtless lost interest anyway...)
+ *
+ * [1] Some Results Relevant to Choice of Sum and Sum-of-Product Algorithms,
+ * E. A. Youngs and E. M. Cramer, Technometrics Vol 13, No 3, August 1971.
+ *
+ * [2] Updating Formulae and a Pairwise Algorithm for Computing Sample
+ * Variances, T. F. Chan, G. H. Golub & R. J. LeVeque, COMPSTAT 1982.
+ *
+ * [3] Numerically Stable Parallel Computation of (Co-)Variance, Erich
+ * Schubert and Michael Gertz, Proceedings of the 30th International
+ * Conference on Scientific and Statistical Database Management, 2018.
*/
static float8 *
ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1);
float8 *transvalues1;
float8 *transvalues2;
-
- if (!AggCheckCallContext(fcinfo, NULL))
- elog(ERROR, "aggregate function called in non-aggregate context");
+ float8 N1,
+ Sx1,
+ Sxx1,
+ N2,
+ Sx2,
+ Sxx2,
+ tmp,
+ N,
+ Sx,
+ Sxx;
transvalues1 = check_float8_array(transarray1, "float8_combine", 3);
transvalues2 = check_float8_array(transarray2, "float8_combine", 3);
- transvalues1[0] = transvalues1[0] + transvalues2[0];
- transvalues1[1] = float8_pl(transvalues1[1], transvalues2[1]);
- transvalues1[2] = float8_pl(transvalues1[2], transvalues2[2]);
+ N1 = transvalues1[0];
+ Sx1 = transvalues1[1];
+ Sxx1 = transvalues1[2];
+
+ N2 = transvalues2[0];
+ Sx2 = transvalues2[1];
+ Sxx2 = transvalues2[2];
+
+ /*--------------------
+ * The transition values combine using a generalization of the
+ * Youngs-Cramer algorithm as follows:
+ *
+ * N = N1 + N2
+ * Sx = Sx1 + Sx2
+ * Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N;
+ *
+ * It's worth handling the special cases N1 = 0 and N2 = 0 separately
+ * since those cases are trivial, and we then don't need to worry about
+ * division-by-zero errors in the general case.
+ *--------------------
+ */
+ if (N1 == 0.0)
+ {
+ N = N2;
+ Sx = Sx2;
+ Sxx = Sxx2;
+ }
+ else if (N2 == 0.0)
+ {
+ N = N1;
+ Sx = Sx1;
+ Sxx = Sxx1;
+ }
+ else
+ {
+ N = N1 + N2;
+ Sx = float8_pl(Sx1, Sx2);
+ tmp = Sx1 / N1 - Sx2 / N2;
+ Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp * tmp / N;
+ check_float8_val(Sxx, isinf(Sxx1) || isinf(Sxx2), true);
+ }
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we construct a
+ * new array with the updated transition data and return it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ transvalues1[0] = N;
+ transvalues1[1] = Sx;
+ transvalues1[2] = Sxx;
+
+ PG_RETURN_ARRAYTYPE_P(transarray1);
+ }
+ else
+ {
+ Datum transdatums[3];
+ ArrayType *result;
+
+ transdatums[0] = Float8GetDatumFast(N);
+ transdatums[1] = Float8GetDatumFast(Sx);
+ transdatums[2] = Float8GetDatumFast(Sxx);
+
+ result = construct_array(transdatums, 3,
+ FLOAT8OID,
+ sizeof(float8), FLOAT8PASSBYVAL, 'd');
- PG_RETURN_ARRAYTYPE_P(transarray1);
+ PG_RETURN_ARRAYTYPE_P(result);
+ }
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 newval = PG_GETARG_FLOAT8(1);
float8 *transvalues;
+ float8 N,
+ Sx,
+ Sxx,
+ tmp;
transvalues = check_float8_array(transarray, "float8_accum", 3);
+ N = transvalues[0];
+ Sx = transvalues[1];
+ Sxx = transvalues[2];
+
+ /*
+ * Use the Youngs-Cramer algorithm to incorporate the new value into the
+ * transition values.
+ */
+ N += 1.0;
+ Sx += newval;
+ if (transvalues[0] > 0.0)
+ {
+ tmp = newval * N - Sx;
+ Sxx += tmp * tmp / (N * transvalues[0]);
+
+ /*
+ * Overflow check. We only report an overflow error when finite
+ * inputs lead to infinite results. Note also that Sxx should be NaN
+ * if any of the inputs are infinite, so we intentionally prevent Sxx
+ * from becoming infinite.
+ */
+ if (isinf(Sx) || isinf(Sxx))
+ {
+ if (!isinf(transvalues[1]) && !isinf(newval))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value out of range: overflow")));
+
+ Sxx = get_float8_nan();
+ }
+ }
/*
* If we're invoked as an aggregate, we can cheat and modify our first
*/
if (AggCheckCallContext(fcinfo, NULL))
{
- transvalues[0] = transvalues[0] + 1.0;
- transvalues[1] = float8_pl(transvalues[1], newval);
- transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval));
+ transvalues[0] = N;
+ transvalues[1] = Sx;
+ transvalues[2] = Sxx;
PG_RETURN_ARRAYTYPE_P(transarray);
}
Datum transdatums[3];
ArrayType *result;
- transvalues[0] = transvalues[0] + 1.0;
- transvalues[1] = float8_pl(transvalues[1], newval);
- transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval));
+ transdatums[0] = Float8GetDatumFast(N);
+ transdatums[1] = Float8GetDatumFast(Sx);
+ transdatums[2] = Float8GetDatumFast(Sxx);
result = construct_array(transdatums, 3,
FLOAT8OID,
/* do computations as float8 */
float8 newval = PG_GETARG_FLOAT4(1);
float8 *transvalues;
+ float8 N,
+ Sx,
+ Sxx,
+ tmp;
transvalues = check_float8_array(transarray, "float4_accum", 3);
+ N = transvalues[0];
+ Sx = transvalues[1];
+ Sxx = transvalues[2];
+
+ /*
+ * Use the Youngs-Cramer algorithm to incorporate the new value into the
+ * transition values.
+ */
+ N += 1.0;
+ Sx += newval;
+ if (transvalues[0] > 0.0)
+ {
+ tmp = newval * N - Sx;
+ Sxx += tmp * tmp / (N * transvalues[0]);
+
+ /*
+ * Overflow check. We only report an overflow error when finite
+ * inputs lead to infinite results. Note also that Sxx should be NaN
+ * if any of the inputs are infinite, so we intentionally prevent Sxx
+ * from becoming infinite.
+ */
+ if (isinf(Sx) || isinf(Sxx))
+ {
+ if (!isinf(transvalues[1]) && !isinf(newval))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value out of range: overflow")));
+
+ Sxx = get_float8_nan();
+ }
+ }
/*
* If we're invoked as an aggregate, we can cheat and modify our first
*/
if (AggCheckCallContext(fcinfo, NULL))
{
- transvalues[0] = transvalues[0] + 1.0;
- transvalues[1] = float8_pl(transvalues[1], newval);
- transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval));
+ transvalues[0] = N;
+ transvalues[1] = Sx;
+ transvalues[2] = Sxx;
PG_RETURN_ARRAYTYPE_P(transarray);
}
Datum transdatums[3];
ArrayType *result;
- transvalues[0] = transvalues[0] + 1.0;
- transvalues[1] = float8_pl(transvalues[1], newval);
- transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval));
+ transdatums[0] = Float8GetDatumFast(N);
+ transdatums[1] = Float8GetDatumFast(Sx);
+ transdatums[2] = Float8GetDatumFast(Sxx);
result = construct_array(transdatums, 3,
FLOAT8OID,
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX;
+ Sx;
transvalues = check_float8_array(transarray, "float8_avg", 3);
N = transvalues[0];
- sumX = transvalues[1];
- /* ignore sumX2 */
+ Sx = transvalues[1];
+ /* ignore Sxx */
/* SQL defines AVG of no values to be NULL */
if (N == 0.0)
PG_RETURN_NULL();
- PG_RETURN_FLOAT8(sumX / N);
+ PG_RETURN_FLOAT8(Sx / N);
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumX2,
- numerator;
+ Sxx;
transvalues = check_float8_array(transarray, "float8_var_pop", 3);
N = transvalues[0];
- sumX = transvalues[1];
- sumX2 = transvalues[2];
+ /* ignore Sx */
+ Sxx = transvalues[2];
/* Population variance is undefined when N is 0, so return NULL */
if (N == 0.0)
PG_RETURN_NULL();
- numerator = N * sumX2 - sumX * sumX;
- check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
-
- /* Watch out for roundoff error producing a negative numerator */
- if (numerator <= 0.0)
- PG_RETURN_FLOAT8(0.0);
+ /* Note that Sxx is guaranteed to be non-negative */
- PG_RETURN_FLOAT8(numerator / (N * N));
+ PG_RETURN_FLOAT8(Sxx / N);
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumX2,
- numerator;
+ Sxx;
transvalues = check_float8_array(transarray, "float8_var_samp", 3);
N = transvalues[0];
- sumX = transvalues[1];
- sumX2 = transvalues[2];
+ /* ignore Sx */
+ Sxx = transvalues[2];
/* Sample variance is undefined when N is 0 or 1, so return NULL */
if (N <= 1.0)
PG_RETURN_NULL();
- numerator = N * sumX2 - sumX * sumX;
- check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
+ /* Note that Sxx is guaranteed to be non-negative */
- /* Watch out for roundoff error producing a negative numerator */
- if (numerator <= 0.0)
- PG_RETURN_FLOAT8(0.0);
-
- PG_RETURN_FLOAT8(numerator / (N * (N - 1.0)));
+ PG_RETURN_FLOAT8(Sxx / (N - 1.0));
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumX2,
- numerator;
+ Sxx;
transvalues = check_float8_array(transarray, "float8_stddev_pop", 3);
N = transvalues[0];
- sumX = transvalues[1];
- sumX2 = transvalues[2];
+ /* ignore Sx */
+ Sxx = transvalues[2];
/* Population stddev is undefined when N is 0, so return NULL */
if (N == 0.0)
PG_RETURN_NULL();
- numerator = N * sumX2 - sumX * sumX;
- check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
-
- /* Watch out for roundoff error producing a negative numerator */
- if (numerator <= 0.0)
- PG_RETURN_FLOAT8(0.0);
+ /* Note that Sxx is guaranteed to be non-negative */
- PG_RETURN_FLOAT8(sqrt(numerator / (N * N)));
+ PG_RETURN_FLOAT8(sqrt(Sxx / N));
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumX2,
- numerator;
+ Sxx;
transvalues = check_float8_array(transarray, "float8_stddev_samp", 3);
N = transvalues[0];
- sumX = transvalues[1];
- sumX2 = transvalues[2];
+ /* ignore Sx */
+ Sxx = transvalues[2];
/* Sample stddev is undefined when N is 0 or 1, so return NULL */
if (N <= 1.0)
PG_RETURN_NULL();
- numerator = N * sumX2 - sumX * sumX;
- check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
+ /* Note that Sxx is guaranteed to be non-negative */
- /* Watch out for roundoff error producing a negative numerator */
- if (numerator <= 0.0)
- PG_RETURN_FLOAT8(0.0);
-
- PG_RETURN_FLOAT8(sqrt(numerator / (N * (N - 1.0))));
+ PG_RETURN_FLOAT8(sqrt(Sxx / (N - 1.0)));
}
/*
* SQL2003 BINARY AGGREGATES
* =========================
*
+ * As with the preceding aggregates, we use the Youngs-Cramer algorithm to
+ * reduce rounding errors in the aggregate final functions.
+ *
* The transition datatype for all these aggregates is a 6-element array of
- * float8, holding the values N, sum(X), sum(X*X), sum(Y), sum(Y*Y), sum(X*Y)
- * in that order. Note that Y is the first argument to the aggregates!
+ * float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y),
+ * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order.
+ *
+ * Note that Y is the first argument to all these aggregates!
*
* It might seem attractive to optimize this by having multiple accumulator
* functions that only calculate the sums actually needed. But on most
float8 newvalX = PG_GETARG_FLOAT8(2);
float8 *transvalues;
float8 N,
- sumX,
- sumX2,
- sumY,
- sumY2,
- sumXY;
+ Sx,
+ Sxx,
+ Sy,
+ Syy,
+ Sxy,
+ tmpX,
+ tmpY,
+ scale;
transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
N = transvalues[0];
- sumX = transvalues[1];
- sumX2 = transvalues[2];
- sumY = transvalues[3];
- sumY2 = transvalues[4];
- sumXY = transvalues[5];
+ Sx = transvalues[1];
+ Sxx = transvalues[2];
+ Sy = transvalues[3];
+ Syy = transvalues[4];
+ Sxy = transvalues[5];
+ /*
+ * Use the Youngs-Cramer algorithm to incorporate the new values into the
+ * transition values.
+ */
N += 1.0;
- sumX += newvalX;
- check_float8_val(sumX, isinf(transvalues[1]) || isinf(newvalX), true);
- sumX2 += newvalX * newvalX;
- check_float8_val(sumX2, isinf(transvalues[2]) || isinf(newvalX), true);
- sumY += newvalY;
- check_float8_val(sumY, isinf(transvalues[3]) || isinf(newvalY), true);
- sumY2 += newvalY * newvalY;
- check_float8_val(sumY2, isinf(transvalues[4]) || isinf(newvalY), true);
- sumXY += newvalX * newvalY;
- check_float8_val(sumXY, isinf(transvalues[5]) || isinf(newvalX) ||
- isinf(newvalY), true);
+ Sx += newvalX;
+ Sy += newvalY;
+ if (transvalues[0] > 0.0)
+ {
+ tmpX = newvalX * N - Sx;
+ tmpY = newvalY * N - Sy;
+ scale = 1.0 / (N * transvalues[0]);
+ Sxx += tmpX * tmpX * scale;
+ Syy += tmpY * tmpY * scale;
+ Sxy += tmpX * tmpY * scale;
+
+ /*
+ * Overflow check. We only report an overflow error when finite
+ * inputs lead to infinite results. Note also that Sxx, Syy and Sxy
+ * should be NaN if any of the relevant inputs are infinite, so we
+ * intentionally prevent them from becoming infinite.
+ */
+ if (isinf(Sx) || isinf(Sxx) || isinf(Sy) || isinf(Syy) || isinf(Sxy))
+ {
+ if (((isinf(Sx) || isinf(Sxx)) &&
+ !isinf(transvalues[1]) && !isinf(newvalX)) ||
+ ((isinf(Sy) || isinf(Syy)) &&
+ !isinf(transvalues[3]) && !isinf(newvalY)) ||
+ (isinf(Sxy) &&
+ !isinf(transvalues[1]) && !isinf(newvalX) &&
+ !isinf(transvalues[3]) && !isinf(newvalY)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value out of range: overflow")));
+
+ if (isinf(Sxx))
+ Sxx = get_float8_nan();
+ if (isinf(Syy))
+ Syy = get_float8_nan();
+ if (isinf(Sxy))
+ Sxy = get_float8_nan();
+ }
+ }
/*
* If we're invoked as an aggregate, we can cheat and modify our first
if (AggCheckCallContext(fcinfo, NULL))
{
transvalues[0] = N;
- transvalues[1] = sumX;
- transvalues[2] = sumX2;
- transvalues[3] = sumY;
- transvalues[4] = sumY2;
- transvalues[5] = sumXY;
+ transvalues[1] = Sx;
+ transvalues[2] = Sxx;
+ transvalues[3] = Sy;
+ transvalues[4] = Syy;
+ transvalues[5] = Sxy;
PG_RETURN_ARRAYTYPE_P(transarray);
}
ArrayType *result;
transdatums[0] = Float8GetDatumFast(N);
- transdatums[1] = Float8GetDatumFast(sumX);
- transdatums[2] = Float8GetDatumFast(sumX2);
- transdatums[3] = Float8GetDatumFast(sumY);
- transdatums[4] = Float8GetDatumFast(sumY2);
- transdatums[5] = Float8GetDatumFast(sumXY);
+ transdatums[1] = Float8GetDatumFast(Sx);
+ transdatums[2] = Float8GetDatumFast(Sxx);
+ transdatums[3] = Float8GetDatumFast(Sy);
+ transdatums[4] = Float8GetDatumFast(Syy);
+ transdatums[5] = Float8GetDatumFast(Sxy);
result = construct_array(transdatums, 6,
FLOAT8OID,
ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1);
float8 *transvalues1;
float8 *transvalues2;
-
- if (!AggCheckCallContext(fcinfo, NULL))
- elog(ERROR, "aggregate function called in non-aggregate context");
+ float8 N1,
+ Sx1,
+ Sxx1,
+ Sy1,
+ Syy1,
+ Sxy1,
+ N2,
+ Sx2,
+ Sxx2,
+ Sy2,
+ Syy2,
+ Sxy2,
+ tmp1,
+ tmp2,
+ N,
+ Sx,
+ Sxx,
+ Sy,
+ Syy,
+ Sxy;
transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6);
transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6);
- transvalues1[0] = transvalues1[0] + transvalues2[0];
- transvalues1[1] = float8_pl(transvalues1[1], transvalues2[1]);
- transvalues1[2] = float8_pl(transvalues1[2], transvalues2[2]);
- transvalues1[3] = float8_pl(transvalues1[3], transvalues2[3]);
- transvalues1[4] = float8_pl(transvalues1[4], transvalues2[4]);
- transvalues1[5] = float8_pl(transvalues1[5], transvalues2[5]);
+ N1 = transvalues1[0];
+ Sx1 = transvalues1[1];
+ Sxx1 = transvalues1[2];
+ Sy1 = transvalues1[3];
+ Syy1 = transvalues1[4];
+ Sxy1 = transvalues1[5];
+
+ N2 = transvalues2[0];
+ Sx2 = transvalues2[1];
+ Sxx2 = transvalues2[2];
+ Sy2 = transvalues2[3];
+ Syy2 = transvalues2[4];
+ Sxy2 = transvalues2[5];
+
+ /*--------------------
+ * The transition values combine using a generalization of the
+ * Youngs-Cramer algorithm as follows:
+ *
+ * N = N1 + N2
+ * Sx = Sx1 + Sx2
+ * Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N
+ * Sy = Sy1 + Sy2
+ * Syy = Syy1 + Syy2 + N1 * N2 * (Sy1/N1 - Sy2/N2)^2 / N
+ * Sxy = Sxy1 + Sxy2 + N1 * N2 * (Sx1/N1 - Sx2/N2) * (Sy1/N1 - Sy2/N2) / N
+ *
+ * It's worth handling the special cases N1 = 0 and N2 = 0 separately
+ * since those cases are trivial, and we then don't need to worry about
+ * division-by-zero errors in the general case.
+ *--------------------
+ */
+ if (N1 == 0.0)
+ {
+ N = N2;
+ Sx = Sx2;
+ Sxx = Sxx2;
+ Sy = Sy2;
+ Syy = Syy2;
+ Sxy = Sxy2;
+ }
+ else if (N2 == 0.0)
+ {
+ N = N1;
+ Sx = Sx1;
+ Sxx = Sxx1;
+ Sy = Sy1;
+ Syy = Syy1;
+ Sxy = Sxy1;
+ }
+ else
+ {
+ N = N1 + N2;
+ Sx = float8_pl(Sx1, Sx2);
+ tmp1 = Sx1 / N1 - Sx2 / N2;
+ Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp1 * tmp1 / N;
+ check_float8_val(Sxx, isinf(Sxx1) || isinf(Sxx2), true);
+ Sy = float8_pl(Sy1, Sy2);
+ tmp2 = Sy1 / N1 - Sy2 / N2;
+ Syy = Syy1 + Syy2 + N1 * N2 * tmp2 * tmp2 / N;
+ check_float8_val(Syy, isinf(Syy1) || isinf(Syy2), true);
+ Sxy = Sxy1 + Sxy2 + N1 * N2 * tmp1 * tmp2 / N;
+ check_float8_val(Sxy, isinf(Sxy1) || isinf(Sxy2), true);
+ }
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we construct a
+ * new array with the updated transition data and return it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ transvalues1[0] = N;
+ transvalues1[1] = Sx;
+ transvalues1[2] = Sxx;
+ transvalues1[3] = Sy;
+ transvalues1[4] = Syy;
+ transvalues1[5] = Sxy;
+
+ PG_RETURN_ARRAYTYPE_P(transarray1);
+ }
+ else
+ {
+ Datum transdatums[6];
+ ArrayType *result;
+
+ transdatums[0] = Float8GetDatumFast(N);
+ transdatums[1] = Float8GetDatumFast(Sx);
+ transdatums[2] = Float8GetDatumFast(Sxx);
+ transdatums[3] = Float8GetDatumFast(Sy);
+ transdatums[4] = Float8GetDatumFast(Syy);
+ transdatums[5] = Float8GetDatumFast(Sxy);
- PG_RETURN_ARRAYTYPE_P(transarray1);
+ result = construct_array(transdatums, 6,
+ FLOAT8OID,
+ sizeof(float8), FLOAT8PASSBYVAL, 'd');
+
+ PG_RETURN_ARRAYTYPE_P(result);
+ }
}
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumX2,
- numerator;
+ Sxx;
transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
N = transvalues[0];
- sumX = transvalues[1];
- sumX2 = transvalues[2];
+ Sxx = transvalues[2];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
- numerator = N * sumX2 - sumX * sumX;
- check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
+ /* Note that Sxx is guaranteed to be non-negative */
- /* Watch out for roundoff error producing a negative numerator */
- if (numerator <= 0.0)
- PG_RETURN_FLOAT8(0.0);
-
- PG_RETURN_FLOAT8(numerator / N);
+ PG_RETURN_FLOAT8(Sxx);
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumY,
- sumY2,
- numerator;
+ Syy;
transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
N = transvalues[0];
- sumY = transvalues[3];
- sumY2 = transvalues[4];
+ Syy = transvalues[4];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
- numerator = N * sumY2 - sumY * sumY;
- check_float8_val(numerator, isinf(sumY2) || isinf(sumY), true);
-
- /* Watch out for roundoff error producing a negative numerator */
- if (numerator <= 0.0)
- PG_RETURN_FLOAT8(0.0);
+ /* Note that Syy is guaranteed to be non-negative */
- PG_RETURN_FLOAT8(numerator / N);
+ PG_RETURN_FLOAT8(Syy);
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumY,
- sumXY,
- numerator;
+ Sxy;
transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
N = transvalues[0];
- sumX = transvalues[1];
- sumY = transvalues[3];
- sumXY = transvalues[5];
+ Sxy = transvalues[5];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
- numerator = N * sumXY - sumX * sumY;
- check_float8_val(numerator, isinf(sumXY) || isinf(sumX) ||
- isinf(sumY), true);
-
/* A negative result is valid here */
- PG_RETURN_FLOAT8(numerator / N);
+ PG_RETURN_FLOAT8(Sxy);
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX;
+ Sx;
transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
N = transvalues[0];
- sumX = transvalues[1];
+ Sx = transvalues[1];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
- PG_RETURN_FLOAT8(sumX / N);
+ PG_RETURN_FLOAT8(Sx / N);
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumY;
+ Sy;
transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
N = transvalues[0];
- sumY = transvalues[3];
+ Sy = transvalues[3];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
- PG_RETURN_FLOAT8(sumY / N);
+ PG_RETURN_FLOAT8(Sy / N);
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumY,
- sumXY,
- numerator;
+ Sxy;
transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
N = transvalues[0];
- sumX = transvalues[1];
- sumY = transvalues[3];
- sumXY = transvalues[5];
+ Sxy = transvalues[5];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
- numerator = N * sumXY - sumX * sumY;
- check_float8_val(numerator, isinf(sumXY) || isinf(sumX) ||
- isinf(sumY), true);
-
- PG_RETURN_FLOAT8(numerator / (N * N));
+ PG_RETURN_FLOAT8(Sxy / N);
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumY,
- sumXY,
- numerator;
+ Sxy;
transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
N = transvalues[0];
- sumX = transvalues[1];
- sumY = transvalues[3];
- sumXY = transvalues[5];
+ Sxy = transvalues[5];
/* if N is <= 1 we should return NULL */
if (N < 2.0)
PG_RETURN_NULL();
- numerator = N * sumXY - sumX * sumY;
- check_float8_val(numerator, isinf(sumXY) || isinf(sumX) ||
- isinf(sumY), true);
-
- PG_RETURN_FLOAT8(numerator / (N * (N - 1.0)));
+ PG_RETURN_FLOAT8(Sxy / (N - 1.0));
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumX2,
- sumY,
- sumY2,
- sumXY,
- numeratorX,
- numeratorY,
- numeratorXY;
+ Sxx,
+ Syy,
+ Sxy;
transvalues = check_float8_array(transarray, "float8_corr", 6);
N = transvalues[0];
- sumX = transvalues[1];
- sumX2 = transvalues[2];
- sumY = transvalues[3];
- sumY2 = transvalues[4];
- sumXY = transvalues[5];
+ Sxx = transvalues[2];
+ Syy = transvalues[4];
+ Sxy = transvalues[5];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
- numeratorX = N * sumX2 - sumX * sumX;
- check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
- numeratorY = N * sumY2 - sumY * sumY;
- check_float8_val(numeratorY, isinf(sumY2) || isinf(sumY), true);
- numeratorXY = N * sumXY - sumX * sumY;
- check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) ||
- isinf(sumY), true);
- if (numeratorX <= 0 || numeratorY <= 0)
+ /* Note that Sxx and Syy are guaranteed to be non-negative */
+
+ /* per spec, return NULL for horizontal and vertical lines */
+ if (Sxx == 0 || Syy == 0)
PG_RETURN_NULL();
- PG_RETURN_FLOAT8(numeratorXY / sqrt(numeratorX * numeratorY));
+ PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy));
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumX2,
- sumY,
- sumY2,
- sumXY,
- numeratorX,
- numeratorY,
- numeratorXY;
+ Sxx,
+ Syy,
+ Sxy;
transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
N = transvalues[0];
- sumX = transvalues[1];
- sumX2 = transvalues[2];
- sumY = transvalues[3];
- sumY2 = transvalues[4];
- sumXY = transvalues[5];
+ Sxx = transvalues[2];
+ Syy = transvalues[4];
+ Sxy = transvalues[5];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
- numeratorX = N * sumX2 - sumX * sumX;
- check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
- numeratorY = N * sumY2 - sumY * sumY;
- check_float8_val(numeratorY, isinf(sumY2) || isinf(sumY), true);
- numeratorXY = N * sumXY - sumX * sumY;
- check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) ||
- isinf(sumY), true);
- if (numeratorX <= 0)
+ /* Note that Sxx and Syy are guaranteed to be non-negative */
+
+ /* per spec, return NULL for a vertical line */
+ if (Sxx == 0)
PG_RETURN_NULL();
- /* per spec, horizontal line produces 1.0 */
- if (numeratorY <= 0)
+
+ /* per spec, return 1.0 for a horizontal line */
+ if (Syy == 0)
PG_RETURN_FLOAT8(1.0);
- PG_RETURN_FLOAT8((numeratorXY * numeratorXY) /
- (numeratorX * numeratorY));
+ PG_RETURN_FLOAT8((Sxy * Sxy) / (Sxx * Syy));
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumX2,
- sumY,
- sumXY,
- numeratorX,
- numeratorXY;
+ Sxx,
+ Sxy;
transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
N = transvalues[0];
- sumX = transvalues[1];
- sumX2 = transvalues[2];
- sumY = transvalues[3];
- sumXY = transvalues[5];
+ Sxx = transvalues[2];
+ Sxy = transvalues[5];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
- numeratorX = N * sumX2 - sumX * sumX;
- check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
- numeratorXY = N * sumXY - sumX * sumY;
- check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) ||
- isinf(sumY), true);
- if (numeratorX <= 0)
+ /* Note that Sxx is guaranteed to be non-negative */
+
+ /* per spec, return NULL for a vertical line */
+ if (Sxx == 0)
PG_RETURN_NULL();
- PG_RETURN_FLOAT8(numeratorXY / numeratorX);
+ PG_RETURN_FLOAT8(Sxy / Sxx);
}
Datum
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- sumX,
- sumX2,
- sumY,
- sumXY,
- numeratorX,
- numeratorXXY;
+ Sx,
+ Sxx,
+ Sy,
+ Sxy;
transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
N = transvalues[0];
- sumX = transvalues[1];
- sumX2 = transvalues[2];
- sumY = transvalues[3];
- sumXY = transvalues[5];
+ Sx = transvalues[1];
+ Sxx = transvalues[2];
+ Sy = transvalues[3];
+ Sxy = transvalues[5];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
- numeratorX = N * sumX2 - sumX * sumX;
- check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
- numeratorXXY = sumY * sumX2 - sumX * sumXY;
- check_float8_val(numeratorXXY, isinf(sumY) || isinf(sumX2) ||
- isinf(sumX) || isinf(sumXY), true);
- if (numeratorX <= 0)
+ /* Note that Sxx is guaranteed to be non-negative */
+
+ /* per spec, return NULL for a vertical line */
+ if (Sxx == 0)
PG_RETURN_NULL();
- PG_RETURN_FLOAT8(numeratorXXY / numeratorX);
+ PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N);
}
NaN
(1 row)
+-- verify correct results for infinite inputs
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('1'), ('infinity')) v(x);
+ avg | var_pop
+----------+---------
+ Infinity | NaN
+(1 row)
+
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('infinity'), ('1')) v(x);
+ avg | var_pop
+----------+---------
+ Infinity | NaN
+(1 row)
+
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('infinity'), ('infinity')) v(x);
+ avg | var_pop
+----------+---------
+ Infinity | NaN
+(1 row)
+
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES ('-infinity'), ('infinity')) v(x);
+ avg | var_pop
+-----+---------
+ NaN | NaN
+(1 row)
+
+-- test accuracy with a large input offset
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x);
+ avg | var_pop
+-----------+---------
+ 100000005 | 2.5
+(1 row)
+
+SELECT avg(x::float8), var_pop(x::float8)
+FROM (VALUES (7000000000005), (7000000000007)) v(x);
+ avg | var_pop
+---------------+---------
+ 7000000000006 | 1
+(1 row)
+
-- SQL2003 binary aggregates
SELECT regr_count(b, a) FROM aggtest;
regr_count
0.139634516517873
(1 row)
+-- test accum and combine functions directly
+CREATE TABLE regr_test (x float8, y float8);
+INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30,80);
+ count | sum | regr_sxx | sum | regr_syy | regr_sxy
+-------+-----+----------+------+----------+----------
+ 4 | 140 | 2900 | 1290 | 83075 | 15050
+(1 row)
+
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test;
+ count | sum | regr_sxx | sum | regr_syy | regr_sxy
+-------+-----+----------+------+----------+----------
+ 5 | 240 | 6280 | 1490 | 95080 | 8680
+(1 row)
+
+SELECT float8_accum('{4,140,2900}'::float8[], 100);
+ float8_accum
+--------------
+ {5,240,6280}
+(1 row)
+
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
+ float8_regr_accum
+------------------------------
+ {5,240,6280,1490,95080,8680}
+(1 row)
+
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30);
+ count | sum | regr_sxx | sum | regr_syy | regr_sxy
+-------+-----+----------+-----+----------+----------
+ 3 | 60 | 200 | 750 | 20000 | 2000
+(1 row)
+
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (80,100);
+ count | sum | regr_sxx | sum | regr_syy | regr_sxy
+-------+-----+----------+-----+----------+----------
+ 2 | 180 | 200 | 740 | 57800 | -3400
+(1 row)
+
+SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]);
+ float8_combine
+----------------
+ {3,60,200}
+(1 row)
+
+SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
+ float8_combine
+----------------
+ {2,180,200}
+(1 row)
+
+SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
+ float8_combine
+----------------
+ {5,240,6280}
+(1 row)
+
+SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
+ '{0,0,0,0,0,0}'::float8[]);
+ float8_regr_combine
+---------------------------
+ {3,60,200,750,20000,2000}
+(1 row)
+
+SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
+ '{2,180,200,740,57800,-3400}'::float8[]);
+ float8_regr_combine
+-----------------------------
+ {2,180,200,740,57800,-3400}
+(1 row)
+
+SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
+ '{2,180,200,740,57800,-3400}'::float8[]);
+ float8_regr_combine
+------------------------------
+ {5,240,6280,1490,95080,8680}
+(1 row)
+
+DROP TABLE regr_test;
+-- test count, distinct
SELECT count(four) AS cnt_1000 FROM onek;
cnt_1000
----------