Commit e954a727 authored by Dean Rasheed's avatar Dean Rasheed

Improve the accuracy of floating point statistical aggregates.

When computing statistical aggregates like variance, the common
schoolbook algorithm which computes the sum of the squares of the
values and subtracts the square of the mean can lead to a large loss
of precision when using floating point arithmetic, because the
difference between the two terms is often very small relative to the
terms themselves.

To avoid this, re-work these aggregates to use the Youngs-Cramer
algorithm, which is a proven, numerically stable algorithm that
directly aggregates the sum of the squares of the differences of the
values from the mean in a single pass over the data.

While at it, improve the test coverage to test the aggregate combine
functions used during parallel aggregation.

Per report and suggested algorithm from Erich Schubert.

Patch by me, reviewed by Madeleine Thompson.

Discussion: https://postgr.es/m/153313051300.1397.9594490737341194671@wrigleys.postgresql.org
parent 38921d14
...@@ -2401,13 +2401,39 @@ setseed(PG_FUNCTION_ARGS) ...@@ -2401,13 +2401,39 @@ setseed(PG_FUNCTION_ARGS)
* float8_stddev_samp - produce final result for float STDDEV_SAMP() * float8_stddev_samp - produce final result for float STDDEV_SAMP()
* float8_stddev_pop - produce final result for float STDDEV_POP() * float8_stddev_pop - produce final result for float STDDEV_POP()
* *
* The naive schoolbook implementation of these aggregates works by
* accumulating sum(X) and sum(X^2). However, this approach suffers from
* large rounding errors in the final computation of quantities like the
* population variance (N*sum(X^2) - sum(X)^2) / N^2, since each of the
* intermediate terms is potentially very large, while the difference is often
* quite small.
*
* Instead we use the Youngs-Cramer algorithm [1] which works by accumulating
* Sx=sum(X) and Sxx=sum((X-Sx/N)^2), using a numerically stable algorithm to
* incrementally update those quantities. The final computations of each of
* the aggregate values is then trivial and gives more accurate results (for
* example, the population variance is just Sxx/N). This algorithm is also
* fairly easy to generalize to allow parallel execution without loss of
* precision (see, for example, [2]). For more details, and a comparison of
* this with other algorithms, see [3].
*
* The transition datatype for all these aggregates is a 3-element array * The transition datatype for all these aggregates is a 3-element array
* of float8, holding the values N, sum(X), sum(X*X) in that order. * of float8, holding the values N, Sx, Sxx in that order.
* *
* Note that we represent N as a float to avoid having to build a special * Note that we represent N as a float to avoid having to build a special
* datatype. Given a reasonable floating-point implementation, there should * datatype. Given a reasonable floating-point implementation, there should
* be no accuracy loss unless N exceeds 2 ^ 52 or so (by which time the * be no accuracy loss unless N exceeds 2 ^ 52 or so (by which time the
* user will have doubtless lost interest anyway...) * user will have doubtless lost interest anyway...)
*
* [1] Some Results Relevant to Choice of Sum and Sum-of-Product Algorithms,
* E. A. Youngs and E. M. Cramer, Technometrics Vol 13, No 3, August 1971.
*
* [2] Updating Formulae and a Pairwise Algorithm for Computing Sample
* Variances, T. F. Chan, G. H. Golub & R. J. LeVeque, COMPSTAT 1982.
*
* [3] Numerically Stable Parallel Computation of (Co-)Variance, Erich
* Schubert and Michael Gertz, Proceedings of the 30th International
* Conference on Scientific and Statistical Database Management, 2018.
*/ */
static float8 * static float8 *
...@@ -2441,18 +2467,90 @@ float8_combine(PG_FUNCTION_ARGS) ...@@ -2441,18 +2467,90 @@ float8_combine(PG_FUNCTION_ARGS)
ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1); ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1);
float8 *transvalues1; float8 *transvalues1;
float8 *transvalues2; float8 *transvalues2;
float8 N1,
if (!AggCheckCallContext(fcinfo, NULL)) Sx1,
elog(ERROR, "aggregate function called in non-aggregate context"); Sxx1,
N2,
Sx2,
Sxx2,
tmp,
N,
Sx,
Sxx;
transvalues1 = check_float8_array(transarray1, "float8_combine", 3); transvalues1 = check_float8_array(transarray1, "float8_combine", 3);
transvalues2 = check_float8_array(transarray2, "float8_combine", 3); transvalues2 = check_float8_array(transarray2, "float8_combine", 3);
transvalues1[0] = transvalues1[0] + transvalues2[0]; N1 = transvalues1[0];
transvalues1[1] = float8_pl(transvalues1[1], transvalues2[1]); Sx1 = transvalues1[1];
transvalues1[2] = float8_pl(transvalues1[2], transvalues2[2]); Sxx1 = transvalues1[2];
N2 = transvalues2[0];
Sx2 = transvalues2[1];
Sxx2 = transvalues2[2];
/*--------------------
* The transition values combine using a generalization of the
* Youngs-Cramer algorithm as follows:
*
* N = N1 + N2
* Sx = Sx1 + Sx2
* Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N;
*
* It's worth handling the special cases N1 = 0 and N2 = 0 separately
* since those cases are trivial, and we then don't need to worry about
* division-by-zero errors in the general case.
*--------------------
*/
if (N1 == 0.0)
{
N = N2;
Sx = Sx2;
Sxx = Sxx2;
}
else if (N2 == 0.0)
{
N = N1;
Sx = Sx1;
Sxx = Sxx1;
}
else
{
N = N1 + N2;
Sx = float8_pl(Sx1, Sx2);
tmp = Sx1 / N1 - Sx2 / N2;
Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp * tmp / N;
check_float8_val(Sxx, isinf(Sxx1) || isinf(Sxx2), true);
}
/*
* If we're invoked as an aggregate, we can cheat and modify our first
* parameter in-place to reduce palloc overhead. Otherwise we construct a
* new array with the updated transition data and return it.
*/
if (AggCheckCallContext(fcinfo, NULL))
{
transvalues1[0] = N;
transvalues1[1] = Sx;
transvalues1[2] = Sxx;
PG_RETURN_ARRAYTYPE_P(transarray1);
}
else
{
Datum transdatums[3];
ArrayType *result;
transdatums[0] = Float8GetDatumFast(N);
transdatums[1] = Float8GetDatumFast(Sx);
transdatums[2] = Float8GetDatumFast(Sxx);
result = construct_array(transdatums, 3,
FLOAT8OID,
sizeof(float8), FLOAT8PASSBYVAL, 'd');
PG_RETURN_ARRAYTYPE_P(transarray1); PG_RETURN_ARRAYTYPE_P(result);
}
} }
Datum Datum
...@@ -2461,8 +2559,43 @@ float8_accum(PG_FUNCTION_ARGS) ...@@ -2461,8 +2559,43 @@ float8_accum(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 newval = PG_GETARG_FLOAT8(1); float8 newval = PG_GETARG_FLOAT8(1);
float8 *transvalues; float8 *transvalues;
float8 N,
Sx,
Sxx,
tmp;
transvalues = check_float8_array(transarray, "float8_accum", 3); transvalues = check_float8_array(transarray, "float8_accum", 3);
N = transvalues[0];
Sx = transvalues[1];
Sxx = transvalues[2];
/*
* Use the Youngs-Cramer algorithm to incorporate the new value into the
* transition values.
*/
N += 1.0;
Sx += newval;
if (transvalues[0] > 0.0)
{
tmp = newval * N - Sx;
Sxx += tmp * tmp / (N * transvalues[0]);
/*
* Overflow check. We only report an overflow error when finite
* inputs lead to infinite results. Note also that Sxx should be NaN
* if any of the inputs are infinite, so we intentionally prevent Sxx
* from becoming infinite.
*/
if (isinf(Sx) || isinf(Sxx))
{
if (!isinf(transvalues[1]) && !isinf(newval))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value out of range: overflow")));
Sxx = get_float8_nan();
}
}
/* /*
* If we're invoked as an aggregate, we can cheat and modify our first * If we're invoked as an aggregate, we can cheat and modify our first
...@@ -2471,9 +2604,9 @@ float8_accum(PG_FUNCTION_ARGS) ...@@ -2471,9 +2604,9 @@ float8_accum(PG_FUNCTION_ARGS)
*/ */
if (AggCheckCallContext(fcinfo, NULL)) if (AggCheckCallContext(fcinfo, NULL))
{ {
transvalues[0] = transvalues[0] + 1.0; transvalues[0] = N;
transvalues[1] = float8_pl(transvalues[1], newval); transvalues[1] = Sx;
transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); transvalues[2] = Sxx;
PG_RETURN_ARRAYTYPE_P(transarray); PG_RETURN_ARRAYTYPE_P(transarray);
} }
...@@ -2482,9 +2615,9 @@ float8_accum(PG_FUNCTION_ARGS) ...@@ -2482,9 +2615,9 @@ float8_accum(PG_FUNCTION_ARGS)
Datum transdatums[3]; Datum transdatums[3];
ArrayType *result; ArrayType *result;
transvalues[0] = transvalues[0] + 1.0; transdatums[0] = Float8GetDatumFast(N);
transvalues[1] = float8_pl(transvalues[1], newval); transdatums[1] = Float8GetDatumFast(Sx);
transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); transdatums[2] = Float8GetDatumFast(Sxx);
result = construct_array(transdatums, 3, result = construct_array(transdatums, 3,
FLOAT8OID, FLOAT8OID,
...@@ -2502,8 +2635,43 @@ float4_accum(PG_FUNCTION_ARGS) ...@@ -2502,8 +2635,43 @@ float4_accum(PG_FUNCTION_ARGS)
/* do computations as float8 */ /* do computations as float8 */
float8 newval = PG_GETARG_FLOAT4(1); float8 newval = PG_GETARG_FLOAT4(1);
float8 *transvalues; float8 *transvalues;
float8 N,
Sx,
Sxx,
tmp;
transvalues = check_float8_array(transarray, "float4_accum", 3); transvalues = check_float8_array(transarray, "float4_accum", 3);
N = transvalues[0];
Sx = transvalues[1];
Sxx = transvalues[2];
/*
* Use the Youngs-Cramer algorithm to incorporate the new value into the
* transition values.
*/
N += 1.0;
Sx += newval;
if (transvalues[0] > 0.0)
{
tmp = newval * N - Sx;
Sxx += tmp * tmp / (N * transvalues[0]);
/*
* Overflow check. We only report an overflow error when finite
* inputs lead to infinite results. Note also that Sxx should be NaN
* if any of the inputs are infinite, so we intentionally prevent Sxx
* from becoming infinite.
*/
if (isinf(Sx) || isinf(Sxx))
{
if (!isinf(transvalues[1]) && !isinf(newval))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value out of range: overflow")));
Sxx = get_float8_nan();
}
}
/* /*
* If we're invoked as an aggregate, we can cheat and modify our first * If we're invoked as an aggregate, we can cheat and modify our first
...@@ -2512,9 +2680,9 @@ float4_accum(PG_FUNCTION_ARGS) ...@@ -2512,9 +2680,9 @@ float4_accum(PG_FUNCTION_ARGS)
*/ */
if (AggCheckCallContext(fcinfo, NULL)) if (AggCheckCallContext(fcinfo, NULL))
{ {
transvalues[0] = transvalues[0] + 1.0; transvalues[0] = N;
transvalues[1] = float8_pl(transvalues[1], newval); transvalues[1] = Sx;
transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); transvalues[2] = Sxx;
PG_RETURN_ARRAYTYPE_P(transarray); PG_RETURN_ARRAYTYPE_P(transarray);
} }
...@@ -2523,9 +2691,9 @@ float4_accum(PG_FUNCTION_ARGS) ...@@ -2523,9 +2691,9 @@ float4_accum(PG_FUNCTION_ARGS)
Datum transdatums[3]; Datum transdatums[3];
ArrayType *result; ArrayType *result;
transvalues[0] = transvalues[0] + 1.0; transdatums[0] = Float8GetDatumFast(N);
transvalues[1] = float8_pl(transvalues[1], newval); transdatums[1] = Float8GetDatumFast(Sx);
transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); transdatums[2] = Float8GetDatumFast(Sxx);
result = construct_array(transdatums, 3, result = construct_array(transdatums, 3,
FLOAT8OID, FLOAT8OID,
...@@ -2541,18 +2709,18 @@ float8_avg(PG_FUNCTION_ARGS) ...@@ -2541,18 +2709,18 @@ float8_avg(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX; Sx;
transvalues = check_float8_array(transarray, "float8_avg", 3); transvalues = check_float8_array(transarray, "float8_avg", 3);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sx = transvalues[1];
/* ignore sumX2 */ /* ignore Sxx */
/* SQL defines AVG of no values to be NULL */ /* SQL defines AVG of no values to be NULL */
if (N == 0.0) if (N == 0.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
PG_RETURN_FLOAT8(sumX / N); PG_RETURN_FLOAT8(Sx / N);
} }
Datum Datum
...@@ -2561,27 +2729,20 @@ float8_var_pop(PG_FUNCTION_ARGS) ...@@ -2561,27 +2729,20 @@ float8_var_pop(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxx;
sumX2,
numerator;
transvalues = check_float8_array(transarray, "float8_var_pop", 3); transvalues = check_float8_array(transarray, "float8_var_pop", 3);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; /* ignore Sx */
sumX2 = transvalues[2]; Sxx = transvalues[2];
/* Population variance is undefined when N is 0, so return NULL */ /* Population variance is undefined when N is 0, so return NULL */
if (N == 0.0) if (N == 0.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numerator = N * sumX2 - sumX * sumX; /* Note that Sxx is guaranteed to be non-negative */
check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
/* Watch out for roundoff error producing a negative numerator */
if (numerator <= 0.0)
PG_RETURN_FLOAT8(0.0);
PG_RETURN_FLOAT8(numerator / (N * N)); PG_RETURN_FLOAT8(Sxx / N);
} }
Datum Datum
...@@ -2590,27 +2751,20 @@ float8_var_samp(PG_FUNCTION_ARGS) ...@@ -2590,27 +2751,20 @@ float8_var_samp(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxx;
sumX2,
numerator;
transvalues = check_float8_array(transarray, "float8_var_samp", 3); transvalues = check_float8_array(transarray, "float8_var_samp", 3);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; /* ignore Sx */
sumX2 = transvalues[2]; Sxx = transvalues[2];
/* Sample variance is undefined when N is 0 or 1, so return NULL */ /* Sample variance is undefined when N is 0 or 1, so return NULL */
if (N <= 1.0) if (N <= 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numerator = N * sumX2 - sumX * sumX; /* Note that Sxx is guaranteed to be non-negative */
check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
/* Watch out for roundoff error producing a negative numerator */ PG_RETURN_FLOAT8(Sxx / (N - 1.0));
if (numerator <= 0.0)
PG_RETURN_FLOAT8(0.0);
PG_RETURN_FLOAT8(numerator / (N * (N - 1.0)));
} }
Datum Datum
...@@ -2619,27 +2773,20 @@ float8_stddev_pop(PG_FUNCTION_ARGS) ...@@ -2619,27 +2773,20 @@ float8_stddev_pop(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxx;
sumX2,
numerator;
transvalues = check_float8_array(transarray, "float8_stddev_pop", 3); transvalues = check_float8_array(transarray, "float8_stddev_pop", 3);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; /* ignore Sx */
sumX2 = transvalues[2]; Sxx = transvalues[2];
/* Population stddev is undefined when N is 0, so return NULL */ /* Population stddev is undefined when N is 0, so return NULL */
if (N == 0.0) if (N == 0.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numerator = N * sumX2 - sumX * sumX; /* Note that Sxx is guaranteed to be non-negative */
check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
/* Watch out for roundoff error producing a negative numerator */
if (numerator <= 0.0)
PG_RETURN_FLOAT8(0.0);
PG_RETURN_FLOAT8(sqrt(numerator / (N * N))); PG_RETURN_FLOAT8(sqrt(Sxx / N));
} }
Datum Datum
...@@ -2648,27 +2795,20 @@ float8_stddev_samp(PG_FUNCTION_ARGS) ...@@ -2648,27 +2795,20 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxx;
sumX2,
numerator;
transvalues = check_float8_array(transarray, "float8_stddev_samp", 3); transvalues = check_float8_array(transarray, "float8_stddev_samp", 3);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; /* ignore Sx */
sumX2 = transvalues[2]; Sxx = transvalues[2];
/* Sample stddev is undefined when N is 0 or 1, so return NULL */ /* Sample stddev is undefined when N is 0 or 1, so return NULL */
if (N <= 1.0) if (N <= 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numerator = N * sumX2 - sumX * sumX; /* Note that Sxx is guaranteed to be non-negative */
check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
/* Watch out for roundoff error producing a negative numerator */ PG_RETURN_FLOAT8(sqrt(Sxx / (N - 1.0)));
if (numerator <= 0.0)
PG_RETURN_FLOAT8(0.0);
PG_RETURN_FLOAT8(sqrt(numerator / (N * (N - 1.0))));
} }
/* /*
...@@ -2676,9 +2816,14 @@ float8_stddev_samp(PG_FUNCTION_ARGS) ...@@ -2676,9 +2816,14 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
* SQL2003 BINARY AGGREGATES * SQL2003 BINARY AGGREGATES
* ========================= * =========================
* *
* As with the preceding aggregates, we use the Youngs-Cramer algorithm to
* reduce rounding errors in the aggregate final functions.
*
* The transition datatype for all these aggregates is a 6-element array of * The transition datatype for all these aggregates is a 6-element array of
* float8, holding the values N, sum(X), sum(X*X), sum(Y), sum(Y*Y), sum(X*Y) * float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y),
* in that order. Note that Y is the first argument to the aggregates! * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order.
*
* Note that Y is the first argument to all these aggregates!
* *
* It might seem attractive to optimize this by having multiple accumulator * It might seem attractive to optimize this by having multiple accumulator
* functions that only calculate the sums actually needed. But on most * functions that only calculate the sums actually needed. But on most
...@@ -2695,32 +2840,66 @@ float8_regr_accum(PG_FUNCTION_ARGS) ...@@ -2695,32 +2840,66 @@ float8_regr_accum(PG_FUNCTION_ARGS)
float8 newvalX = PG_GETARG_FLOAT8(2); float8 newvalX = PG_GETARG_FLOAT8(2);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sx,
sumX2, Sxx,
sumY, Sy,
sumY2, Syy,
sumXY; Sxy,
tmpX,
tmpY,
scale;
transvalues = check_float8_array(transarray, "float8_regr_accum", 6); transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sx = transvalues[1];
sumX2 = transvalues[2]; Sxx = transvalues[2];
sumY = transvalues[3]; Sy = transvalues[3];
sumY2 = transvalues[4]; Syy = transvalues[4];
sumXY = transvalues[5]; Sxy = transvalues[5];
/*
* Use the Youngs-Cramer algorithm to incorporate the new values into the
* transition values.
*/
N += 1.0; N += 1.0;
sumX += newvalX; Sx += newvalX;
check_float8_val(sumX, isinf(transvalues[1]) || isinf(newvalX), true); Sy += newvalY;
sumX2 += newvalX * newvalX; if (transvalues[0] > 0.0)
check_float8_val(sumX2, isinf(transvalues[2]) || isinf(newvalX), true); {
sumY += newvalY; tmpX = newvalX * N - Sx;
check_float8_val(sumY, isinf(transvalues[3]) || isinf(newvalY), true); tmpY = newvalY * N - Sy;
sumY2 += newvalY * newvalY; scale = 1.0 / (N * transvalues[0]);
check_float8_val(sumY2, isinf(transvalues[4]) || isinf(newvalY), true); Sxx += tmpX * tmpX * scale;
sumXY += newvalX * newvalY; Syy += tmpY * tmpY * scale;
check_float8_val(sumXY, isinf(transvalues[5]) || isinf(newvalX) || Sxy += tmpX * tmpY * scale;
isinf(newvalY), true);
/*
* Overflow check. We only report an overflow error when finite
* inputs lead to infinite results. Note also that Sxx, Syy and Sxy
* should be NaN if any of the relevant inputs are infinite, so we
* intentionally prevent them from becoming infinite.
*/
if (isinf(Sx) || isinf(Sxx) || isinf(Sy) || isinf(Syy) || isinf(Sxy))
{
if (((isinf(Sx) || isinf(Sxx)) &&
!isinf(transvalues[1]) && !isinf(newvalX)) ||
((isinf(Sy) || isinf(Syy)) &&
!isinf(transvalues[3]) && !isinf(newvalY)) ||
(isinf(Sxy) &&
!isinf(transvalues[1]) && !isinf(newvalX) &&
!isinf(transvalues[3]) && !isinf(newvalY)))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value out of range: overflow")));
if (isinf(Sxx))
Sxx = get_float8_nan();
if (isinf(Syy))
Syy = get_float8_nan();
if (isinf(Sxy))
Sxy = get_float8_nan();
}
}
/* /*
* If we're invoked as an aggregate, we can cheat and modify our first * If we're invoked as an aggregate, we can cheat and modify our first
...@@ -2730,11 +2909,11 @@ float8_regr_accum(PG_FUNCTION_ARGS) ...@@ -2730,11 +2909,11 @@ float8_regr_accum(PG_FUNCTION_ARGS)
if (AggCheckCallContext(fcinfo, NULL)) if (AggCheckCallContext(fcinfo, NULL))
{ {
transvalues[0] = N; transvalues[0] = N;
transvalues[1] = sumX; transvalues[1] = Sx;
transvalues[2] = sumX2; transvalues[2] = Sxx;
transvalues[3] = sumY; transvalues[3] = Sy;
transvalues[4] = sumY2; transvalues[4] = Syy;
transvalues[5] = sumXY; transvalues[5] = Sxy;
PG_RETURN_ARRAYTYPE_P(transarray); PG_RETURN_ARRAYTYPE_P(transarray);
} }
...@@ -2744,11 +2923,11 @@ float8_regr_accum(PG_FUNCTION_ARGS) ...@@ -2744,11 +2923,11 @@ float8_regr_accum(PG_FUNCTION_ARGS)
ArrayType *result; ArrayType *result;
transdatums[0] = Float8GetDatumFast(N); transdatums[0] = Float8GetDatumFast(N);
transdatums[1] = Float8GetDatumFast(sumX); transdatums[1] = Float8GetDatumFast(Sx);
transdatums[2] = Float8GetDatumFast(sumX2); transdatums[2] = Float8GetDatumFast(Sxx);
transdatums[3] = Float8GetDatumFast(sumY); transdatums[3] = Float8GetDatumFast(Sy);
transdatums[4] = Float8GetDatumFast(sumY2); transdatums[4] = Float8GetDatumFast(Syy);
transdatums[5] = Float8GetDatumFast(sumXY); transdatums[5] = Float8GetDatumFast(Sxy);
result = construct_array(transdatums, 6, result = construct_array(transdatums, 6,
FLOAT8OID, FLOAT8OID,
...@@ -2773,21 +2952,127 @@ float8_regr_combine(PG_FUNCTION_ARGS) ...@@ -2773,21 +2952,127 @@ float8_regr_combine(PG_FUNCTION_ARGS)
ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1); ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1);
float8 *transvalues1; float8 *transvalues1;
float8 *transvalues2; float8 *transvalues2;
float8 N1,
if (!AggCheckCallContext(fcinfo, NULL)) Sx1,
elog(ERROR, "aggregate function called in non-aggregate context"); Sxx1,
Sy1,
Syy1,
Sxy1,
N2,
Sx2,
Sxx2,
Sy2,
Syy2,
Sxy2,
tmp1,
tmp2,
N,
Sx,
Sxx,
Sy,
Syy,
Sxy;
transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6); transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6);
transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6); transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6);
transvalues1[0] = transvalues1[0] + transvalues2[0]; N1 = transvalues1[0];
transvalues1[1] = float8_pl(transvalues1[1], transvalues2[1]); Sx1 = transvalues1[1];
transvalues1[2] = float8_pl(transvalues1[2], transvalues2[2]); Sxx1 = transvalues1[2];
transvalues1[3] = float8_pl(transvalues1[3], transvalues2[3]); Sy1 = transvalues1[3];
transvalues1[4] = float8_pl(transvalues1[4], transvalues2[4]); Syy1 = transvalues1[4];
transvalues1[5] = float8_pl(transvalues1[5], transvalues2[5]); Sxy1 = transvalues1[5];
N2 = transvalues2[0];
Sx2 = transvalues2[1];
Sxx2 = transvalues2[2];
Sy2 = transvalues2[3];
Syy2 = transvalues2[4];
Sxy2 = transvalues2[5];
/*--------------------
* The transition values combine using a generalization of the
* Youngs-Cramer algorithm as follows:
*
* N = N1 + N2
* Sx = Sx1 + Sx2
* Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N
* Sy = Sy1 + Sy2
* Syy = Syy1 + Syy2 + N1 * N2 * (Sy1/N1 - Sy2/N2)^2 / N
* Sxy = Sxy1 + Sxy2 + N1 * N2 * (Sx1/N1 - Sx2/N2) * (Sy1/N1 - Sy2/N2) / N
*
* It's worth handling the special cases N1 = 0 and N2 = 0 separately
* since those cases are trivial, and we then don't need to worry about
* division-by-zero errors in the general case.
*--------------------
*/
if (N1 == 0.0)
{
N = N2;
Sx = Sx2;
Sxx = Sxx2;
Sy = Sy2;
Syy = Syy2;
Sxy = Sxy2;
}
else if (N2 == 0.0)
{
N = N1;
Sx = Sx1;
Sxx = Sxx1;
Sy = Sy1;
Syy = Syy1;
Sxy = Sxy1;
}
else
{
N = N1 + N2;
Sx = float8_pl(Sx1, Sx2);
tmp1 = Sx1 / N1 - Sx2 / N2;
Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp1 * tmp1 / N;
check_float8_val(Sxx, isinf(Sxx1) || isinf(Sxx2), true);
Sy = float8_pl(Sy1, Sy2);
tmp2 = Sy1 / N1 - Sy2 / N2;
Syy = Syy1 + Syy2 + N1 * N2 * tmp2 * tmp2 / N;
check_float8_val(Syy, isinf(Syy1) || isinf(Syy2), true);
Sxy = Sxy1 + Sxy2 + N1 * N2 * tmp1 * tmp2 / N;
check_float8_val(Sxy, isinf(Sxy1) || isinf(Sxy2), true);
}
/*
* If we're invoked as an aggregate, we can cheat and modify our first
* parameter in-place to reduce palloc overhead. Otherwise we construct a
* new array with the updated transition data and return it.
*/
if (AggCheckCallContext(fcinfo, NULL))
{
transvalues1[0] = N;
transvalues1[1] = Sx;
transvalues1[2] = Sxx;
transvalues1[3] = Sy;
transvalues1[4] = Syy;
transvalues1[5] = Sxy;
PG_RETURN_ARRAYTYPE_P(transarray1);
}
else
{
Datum transdatums[6];
ArrayType *result;
transdatums[0] = Float8GetDatumFast(N);
transdatums[1] = Float8GetDatumFast(Sx);
transdatums[2] = Float8GetDatumFast(Sxx);
transdatums[3] = Float8GetDatumFast(Sy);
transdatums[4] = Float8GetDatumFast(Syy);
transdatums[5] = Float8GetDatumFast(Sxy);
PG_RETURN_ARRAYTYPE_P(transarray1); result = construct_array(transdatums, 6,
FLOAT8OID,
sizeof(float8), FLOAT8PASSBYVAL, 'd');
PG_RETURN_ARRAYTYPE_P(result);
}
} }
...@@ -2797,27 +3082,19 @@ float8_regr_sxx(PG_FUNCTION_ARGS) ...@@ -2797,27 +3082,19 @@ float8_regr_sxx(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxx;
sumX2,
numerator;
transvalues = check_float8_array(transarray, "float8_regr_sxx", 6); transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sxx = transvalues[2];
sumX2 = transvalues[2];
/* if N is 0 we should return NULL */ /* if N is 0 we should return NULL */
if (N < 1.0) if (N < 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numerator = N * sumX2 - sumX * sumX; /* Note that Sxx is guaranteed to be non-negative */
check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true);
/* Watch out for roundoff error producing a negative numerator */ PG_RETURN_FLOAT8(Sxx);
if (numerator <= 0.0)
PG_RETURN_FLOAT8(0.0);
PG_RETURN_FLOAT8(numerator / N);
} }
Datum Datum
...@@ -2826,27 +3103,19 @@ float8_regr_syy(PG_FUNCTION_ARGS) ...@@ -2826,27 +3103,19 @@ float8_regr_syy(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumY, Syy;
sumY2,
numerator;
transvalues = check_float8_array(transarray, "float8_regr_syy", 6); transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
N = transvalues[0]; N = transvalues[0];
sumY = transvalues[3]; Syy = transvalues[4];
sumY2 = transvalues[4];
/* if N is 0 we should return NULL */ /* if N is 0 we should return NULL */
if (N < 1.0) if (N < 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numerator = N * sumY2 - sumY * sumY; /* Note that Syy is guaranteed to be non-negative */
check_float8_val(numerator, isinf(sumY2) || isinf(sumY), true);
/* Watch out for roundoff error producing a negative numerator */
if (numerator <= 0.0)
PG_RETURN_FLOAT8(0.0);
PG_RETURN_FLOAT8(numerator / N); PG_RETURN_FLOAT8(Syy);
} }
Datum Datum
...@@ -2855,28 +3124,19 @@ float8_regr_sxy(PG_FUNCTION_ARGS) ...@@ -2855,28 +3124,19 @@ float8_regr_sxy(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxy;
sumY,
sumXY,
numerator;
transvalues = check_float8_array(transarray, "float8_regr_sxy", 6); transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sxy = transvalues[5];
sumY = transvalues[3];
sumXY = transvalues[5];
/* if N is 0 we should return NULL */ /* if N is 0 we should return NULL */
if (N < 1.0) if (N < 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numerator = N * sumXY - sumX * sumY;
check_float8_val(numerator, isinf(sumXY) || isinf(sumX) ||
isinf(sumY), true);
/* A negative result is valid here */ /* A negative result is valid here */
PG_RETURN_FLOAT8(numerator / N); PG_RETURN_FLOAT8(Sxy);
} }
Datum Datum
...@@ -2885,17 +3145,17 @@ float8_regr_avgx(PG_FUNCTION_ARGS) ...@@ -2885,17 +3145,17 @@ float8_regr_avgx(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX; Sx;
transvalues = check_float8_array(transarray, "float8_regr_avgx", 6); transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sx = transvalues[1];
/* if N is 0 we should return NULL */ /* if N is 0 we should return NULL */
if (N < 1.0) if (N < 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
PG_RETURN_FLOAT8(sumX / N); PG_RETURN_FLOAT8(Sx / N);
} }
Datum Datum
...@@ -2904,17 +3164,17 @@ float8_regr_avgy(PG_FUNCTION_ARGS) ...@@ -2904,17 +3164,17 @@ float8_regr_avgy(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumY; Sy;
transvalues = check_float8_array(transarray, "float8_regr_avgy", 6); transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
N = transvalues[0]; N = transvalues[0];
sumY = transvalues[3]; Sy = transvalues[3];
/* if N is 0 we should return NULL */ /* if N is 0 we should return NULL */
if (N < 1.0) if (N < 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
PG_RETURN_FLOAT8(sumY / N); PG_RETURN_FLOAT8(Sy / N);
} }
Datum Datum
...@@ -2923,26 +3183,17 @@ float8_covar_pop(PG_FUNCTION_ARGS) ...@@ -2923,26 +3183,17 @@ float8_covar_pop(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxy;
sumY,
sumXY,
numerator;
transvalues = check_float8_array(transarray, "float8_covar_pop", 6); transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sxy = transvalues[5];
sumY = transvalues[3];
sumXY = transvalues[5];
/* if N is 0 we should return NULL */ /* if N is 0 we should return NULL */
if (N < 1.0) if (N < 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numerator = N * sumXY - sumX * sumY; PG_RETURN_FLOAT8(Sxy / N);
check_float8_val(numerator, isinf(sumXY) || isinf(sumX) ||
isinf(sumY), true);
PG_RETURN_FLOAT8(numerator / (N * N));
} }
Datum Datum
...@@ -2951,26 +3202,17 @@ float8_covar_samp(PG_FUNCTION_ARGS) ...@@ -2951,26 +3202,17 @@ float8_covar_samp(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxy;
sumY,
sumXY,
numerator;
transvalues = check_float8_array(transarray, "float8_covar_samp", 6); transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sxy = transvalues[5];
sumY = transvalues[3];
sumXY = transvalues[5];
/* if N is <= 1 we should return NULL */ /* if N is <= 1 we should return NULL */
if (N < 2.0) if (N < 2.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numerator = N * sumXY - sumX * sumY; PG_RETURN_FLOAT8(Sxy / (N - 1.0));
check_float8_val(numerator, isinf(sumXY) || isinf(sumX) ||
isinf(sumY), true);
PG_RETURN_FLOAT8(numerator / (N * (N - 1.0)));
} }
Datum Datum
...@@ -2979,38 +3221,27 @@ float8_corr(PG_FUNCTION_ARGS) ...@@ -2979,38 +3221,27 @@ float8_corr(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxx,
sumX2, Syy,
sumY, Sxy;
sumY2,
sumXY,
numeratorX,
numeratorY,
numeratorXY;
transvalues = check_float8_array(transarray, "float8_corr", 6); transvalues = check_float8_array(transarray, "float8_corr", 6);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sxx = transvalues[2];
sumX2 = transvalues[2]; Syy = transvalues[4];
sumY = transvalues[3]; Sxy = transvalues[5];
sumY2 = transvalues[4];
sumXY = transvalues[5];
/* if N is 0 we should return NULL */ /* if N is 0 we should return NULL */
if (N < 1.0) if (N < 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numeratorX = N * sumX2 - sumX * sumX; /* Note that Sxx and Syy are guaranteed to be non-negative */
check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
numeratorY = N * sumY2 - sumY * sumY; /* per spec, return NULL for horizontal and vertical lines */
check_float8_val(numeratorY, isinf(sumY2) || isinf(sumY), true); if (Sxx == 0 || Syy == 0)
numeratorXY = N * sumXY - sumX * sumY;
check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) ||
isinf(sumY), true);
if (numeratorX <= 0 || numeratorY <= 0)
PG_RETURN_NULL(); PG_RETURN_NULL();
PG_RETURN_FLOAT8(numeratorXY / sqrt(numeratorX * numeratorY)); PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy));
} }
Datum Datum
...@@ -3019,42 +3250,31 @@ float8_regr_r2(PG_FUNCTION_ARGS) ...@@ -3019,42 +3250,31 @@ float8_regr_r2(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxx,
sumX2, Syy,
sumY, Sxy;
sumY2,
sumXY,
numeratorX,
numeratorY,
numeratorXY;
transvalues = check_float8_array(transarray, "float8_regr_r2", 6); transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sxx = transvalues[2];
sumX2 = transvalues[2]; Syy = transvalues[4];
sumY = transvalues[3]; Sxy = transvalues[5];
sumY2 = transvalues[4];
sumXY = transvalues[5];
/* if N is 0 we should return NULL */ /* if N is 0 we should return NULL */
if (N < 1.0) if (N < 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numeratorX = N * sumX2 - sumX * sumX; /* Note that Sxx and Syy are guaranteed to be non-negative */
check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
numeratorY = N * sumY2 - sumY * sumY; /* per spec, return NULL for a vertical line */
check_float8_val(numeratorY, isinf(sumY2) || isinf(sumY), true); if (Sxx == 0)
numeratorXY = N * sumXY - sumX * sumY;
check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) ||
isinf(sumY), true);
if (numeratorX <= 0)
PG_RETURN_NULL(); PG_RETURN_NULL();
/* per spec, horizontal line produces 1.0 */
if (numeratorY <= 0) /* per spec, return 1.0 for a horizontal line */
if (Syy == 0)
PG_RETURN_FLOAT8(1.0); PG_RETURN_FLOAT8(1.0);
PG_RETURN_FLOAT8((numeratorXY * numeratorXY) / PG_RETURN_FLOAT8((Sxy * Sxy) / (Sxx * Syy));
(numeratorX * numeratorY));
} }
Datum Datum
...@@ -3063,33 +3283,25 @@ float8_regr_slope(PG_FUNCTION_ARGS) ...@@ -3063,33 +3283,25 @@ float8_regr_slope(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sxx,
sumX2, Sxy;
sumY,
sumXY,
numeratorX,
numeratorXY;
transvalues = check_float8_array(transarray, "float8_regr_slope", 6); transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sxx = transvalues[2];
sumX2 = transvalues[2]; Sxy = transvalues[5];
sumY = transvalues[3];
sumXY = transvalues[5];
/* if N is 0 we should return NULL */ /* if N is 0 we should return NULL */
if (N < 1.0) if (N < 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numeratorX = N * sumX2 - sumX * sumX; /* Note that Sxx is guaranteed to be non-negative */
check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
numeratorXY = N * sumXY - sumX * sumY; /* per spec, return NULL for a vertical line */
check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) || if (Sxx == 0)
isinf(sumY), true);
if (numeratorX <= 0)
PG_RETURN_NULL(); PG_RETURN_NULL();
PG_RETURN_FLOAT8(numeratorXY / numeratorX); PG_RETURN_FLOAT8(Sxy / Sxx);
} }
Datum Datum
...@@ -3098,33 +3310,29 @@ float8_regr_intercept(PG_FUNCTION_ARGS) ...@@ -3098,33 +3310,29 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues; float8 *transvalues;
float8 N, float8 N,
sumX, Sx,
sumX2, Sxx,
sumY, Sy,
sumXY, Sxy;
numeratorX,
numeratorXXY;
transvalues = check_float8_array(transarray, "float8_regr_intercept", 6); transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
N = transvalues[0]; N = transvalues[0];
sumX = transvalues[1]; Sx = transvalues[1];
sumX2 = transvalues[2]; Sxx = transvalues[2];
sumY = transvalues[3]; Sy = transvalues[3];
sumXY = transvalues[5]; Sxy = transvalues[5];
/* if N is 0 we should return NULL */ /* if N is 0 we should return NULL */
if (N < 1.0) if (N < 1.0)
PG_RETURN_NULL(); PG_RETURN_NULL();
numeratorX = N * sumX2 - sumX * sumX; /* Note that Sxx is guaranteed to be non-negative */
check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true);
numeratorXXY = sumY * sumX2 - sumX * sumXY; /* per spec, return NULL for a vertical line */
check_float8_val(numeratorXXY, isinf(sumY) || isinf(sumX2) || if (Sxx == 0)
isinf(sumX) || isinf(sumXY), true);
if (numeratorX <= 0)
PG_RETURN_NULL(); PG_RETURN_NULL();
PG_RETURN_FLOAT8(numeratorXXY / numeratorX); PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N);
} }
......
...@@ -198,6 +198,50 @@ select avg('NaN'::numeric) from generate_series(1,3); ...@@ -198,6 +198,50 @@ select avg('NaN'::numeric) from generate_series(1,3);
NaN NaN
(1 row) (1 row)
-- verify correct results for infinite inputs
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES ('1'), ('infinity')) v(x);
avg | var_pop
----------+---------
Infinity | NaN
(1 row)
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES ('infinity'), ('1')) v(x);
avg | var_pop
----------+---------
Infinity | NaN
(1 row)
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES ('infinity'), ('infinity')) v(x);
avg | var_pop
----------+---------
Infinity | NaN
(1 row)
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES ('-infinity'), ('infinity')) v(x);
avg | var_pop
-----+---------
NaN | NaN
(1 row)
-- test accuracy with a large input offset
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x);
avg | var_pop
-----------+---------
100000005 | 2.5
(1 row)
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES (7000000000005), (7000000000007)) v(x);
avg | var_pop
---------------+---------
7000000000006 | 1
(1 row)
-- SQL2003 binary aggregates -- SQL2003 binary aggregates
SELECT regr_count(b, a) FROM aggtest; SELECT regr_count(b, a) FROM aggtest;
regr_count regr_count
...@@ -253,6 +297,90 @@ SELECT corr(b, a) FROM aggtest; ...@@ -253,6 +297,90 @@ SELECT corr(b, a) FROM aggtest;
0.139634516517873 0.139634516517873
(1 row) (1 row)
-- test accum and combine functions directly
CREATE TABLE regr_test (x float8, y float8);
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test WHERE x IN (10,20,30,80);
count | sum | regr_sxx | sum | regr_syy | regr_sxy
-------+-----+----------+------+----------+----------
4 | 140 | 2900 | 1290 | 83075 | 15050
(1 row)
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test;
count | sum | regr_sxx | sum | regr_syy | regr_sxy
-------+-----+----------+------+----------+----------
5 | 240 | 6280 | 1490 | 95080 | 8680
(1 row)
SELECT float8_accum('{4,140,2900}'::float8[], 100);
float8_accum
--------------
{5,240,6280}
(1 row)
SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
float8_regr_accum
------------------------------
{5,240,6280,1490,95080,8680}
(1 row)
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test WHERE x IN (10,20,30);
count | sum | regr_sxx | sum | regr_syy | regr_sxy
-------+-----+----------+-----+----------+----------
3 | 60 | 200 | 750 | 20000 | 2000
(1 row)
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test WHERE x IN (80,100);
count | sum | regr_sxx | sum | regr_syy | regr_sxy
-------+-----+----------+-----+----------+----------
2 | 180 | 200 | 740 | 57800 | -3400
(1 row)
SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]);
float8_combine
----------------
{3,60,200}
(1 row)
SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
float8_combine
----------------
{2,180,200}
(1 row)
SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
float8_combine
----------------
{5,240,6280}
(1 row)
SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
'{0,0,0,0,0,0}'::float8[]);
float8_regr_combine
---------------------------
{3,60,200,750,20000,2000}
(1 row)
SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
'{2,180,200,740,57800,-3400}'::float8[]);
float8_regr_combine
-----------------------------
{2,180,200,740,57800,-3400}
(1 row)
SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
'{2,180,200,740,57800,-3400}'::float8[]);
float8_regr_combine
------------------------------
{5,240,6280,1490,95080,8680}
(1 row)
DROP TABLE regr_test;
-- test count, distinct
SELECT count(four) AS cnt_1000 FROM onek; SELECT count(four) AS cnt_1000 FROM onek;
cnt_1000 cnt_1000
---------- ----------
......
...@@ -51,6 +51,22 @@ select avg(null::float8) from generate_series(1,3); ...@@ -51,6 +51,22 @@ select avg(null::float8) from generate_series(1,3);
select sum('NaN'::numeric) from generate_series(1,3); select sum('NaN'::numeric) from generate_series(1,3);
select avg('NaN'::numeric) from generate_series(1,3); select avg('NaN'::numeric) from generate_series(1,3);
-- verify correct results for infinite inputs
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES ('1'), ('infinity')) v(x);
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES ('infinity'), ('1')) v(x);
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES ('infinity'), ('infinity')) v(x);
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES ('-infinity'), ('infinity')) v(x);
-- test accuracy with a large input offset
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x);
SELECT avg(x::float8), var_pop(x::float8)
FROM (VALUES (7000000000005), (7000000000007)) v(x);
-- SQL2003 binary aggregates -- SQL2003 binary aggregates
SELECT regr_count(b, a) FROM aggtest; SELECT regr_count(b, a) FROM aggtest;
SELECT regr_sxx(b, a) FROM aggtest; SELECT regr_sxx(b, a) FROM aggtest;
...@@ -62,6 +78,31 @@ SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest; ...@@ -62,6 +78,31 @@ SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest; SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
SELECT corr(b, a) FROM aggtest; SELECT corr(b, a) FROM aggtest;
-- test accum and combine functions directly
CREATE TABLE regr_test (x float8, y float8);
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test WHERE x IN (10,20,30,80);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test;
SELECT float8_accum('{4,140,2900}'::float8[], 100);
SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test WHERE x IN (10,20,30);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test WHERE x IN (80,100);
SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]);
SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
'{0,0,0,0,0,0}'::float8[]);
SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
'{2,180,200,740,57800,-3400}'::float8[]);
SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
'{2,180,200,740,57800,-3400}'::float8[]);
DROP TABLE regr_test;
-- test count, distinct
SELECT count(four) AS cnt_1000 FROM onek; SELECT count(four) AS cnt_1000 FROM onek;
SELECT count(DISTINCT four) AS cnt_4 FROM onek; SELECT count(DISTINCT four) AS cnt_4 FROM onek;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment