Commit 1249cf8f authored by Tom Lane's avatar Tom Lane

SQL2003-standard statistical aggregates, by Sergey Koposov. I've added only

the float8 versions of the aggregates, which is all that the standard requires.
Sergey's original patch also provided versions using numeric arithmetic,
but given the size and slowness of the code, I doubt we ought to include
those in core.
parent 0fd087af
This diff is collapsed.
This diff is collapsed.
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.60 2006/03/05 15:58:42 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.61 2006/07/28 18:33:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -694,6 +694,28 @@ int8inc(PG_FUNCTION_ARGS)
}
}
/*
* These functions are exactly like int8inc but are used for aggregates that
* count only non-null values. Since the functions are declared strict,
* the null checks happen before we ever get here, and all we need do is
* increment the state value. We could actually make these pg_proc entries
* point right at int8inc, but then the opr_sanity regression test would
* complain about mismatched entries for a built-in function.
*/
Datum
int8inc_any(PG_FUNCTION_ARGS)
{
return int8inc(fcinfo);
}
Datum
int8inc_float8_float8(PG_FUNCTION_ARGS)
{
return int8inc(fcinfo);
}
Datum
int8larger(PG_FUNCTION_ARGS)
{
......
......@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.342 2006/07/27 19:52:06 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.343 2006/07/28 18:33:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 200607271
#define CATALOG_VERSION_NO 200607281
#endif
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_aggregate.h,v 1.56 2006/07/27 19:52:06 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_aggregate.h,v 1.57 2006/07/28 18:33:04 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
......@@ -192,6 +192,20 @@ DATA(insert ( 2157 float4_accum float8_stddev_samp 0 1022 "{0,0,0}" ));
DATA(insert ( 2158 float8_accum float8_stddev_samp 0 1022 "{0,0,0}" ));
DATA(insert ( 2159 numeric_accum numeric_stddev_samp 0 1231 "{0,0,0}" ));
/* SQL2003 binary regression aggregates */
DATA(insert ( 2818 int8inc_float8_float8 - 0 20 "0" ));
DATA(insert ( 2819 float8_regr_accum float8_regr_sxx 0 1022 "{0,0,0,0,0,0}" ));
DATA(insert ( 2820 float8_regr_accum float8_regr_syy 0 1022 "{0,0,0,0,0,0}" ));
DATA(insert ( 2821 float8_regr_accum float8_regr_sxy 0 1022 "{0,0,0,0,0,0}" ));
DATA(insert ( 2822 float8_regr_accum float8_regr_avgx 0 1022 "{0,0,0,0,0,0}" ));
DATA(insert ( 2823 float8_regr_accum float8_regr_avgy 0 1022 "{0,0,0,0,0,0}" ));
DATA(insert ( 2824 float8_regr_accum float8_regr_r2 0 1022 "{0,0,0,0,0,0}" ));
DATA(insert ( 2825 float8_regr_accum float8_regr_slope 0 1022 "{0,0,0,0,0,0}" ));
DATA(insert ( 2826 float8_regr_accum float8_regr_intercept 0 1022 "{0,0,0,0,0,0}" ));
DATA(insert ( 2827 float8_regr_accum float8_covar_pop 0 1022 "{0,0,0,0,0,0}" ));
DATA(insert ( 2828 float8_regr_accum float8_covar_samp 0 1022 "{0,0,0,0,0,0}" ));
DATA(insert ( 2829 float8_regr_accum float8_corr 0 1022 "{0,0,0,0,0,0}" ));
/* boolean-and and boolean-or */
DATA(insert ( 2517 booland_statefunc - 0 16 _null_ ));
DATA(insert ( 2518 boolor_statefunc - 0 16 _null_ ));
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.418 2006/07/27 19:52:06 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.419 2006/07/28 18:33:04 tgl Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
......@@ -1534,7 +1534,7 @@ DESCR("truncate interval to specified units");
DATA(insert OID = 1219 ( int8inc PGNSP PGUID 12 f f t f i 1 20 "20" _null_ _null_ _null_ int8inc - _null_ ));
DESCR("increment");
DATA(insert OID = 2804 ( int8inc_any PGNSP PGUID 12 f f t f i 2 20 "20 2276" _null_ _null_ _null_ int8inc - _null_ ));
DATA(insert OID = 2804 ( int8inc_any PGNSP PGUID 12 f f t f i 2 20 "20 2276" _null_ _null_ _null_ int8inc_any - _null_ ));
DESCR("increment, ignores second argument");
DATA(insert OID = 1230 ( int8abs PGNSP PGUID 12 f f t f i 1 20 "20" _null_ _null_ _null_ int8abs - _null_ ));
DESCR("absolute value");
......@@ -2730,6 +2730,32 @@ DATA(insert OID = 1963 ( int4_avg_accum PGNSP PGUID 12 f f t f i 2 1016 "1016
DESCR("AVG(int4) transition function");
DATA(insert OID = 1964 ( int8_avg PGNSP PGUID 12 f f t f i 1 1700 "1016" _null_ _null_ _null_ int8_avg - _null_ ));
DESCR("AVG(int) aggregate final function");
DATA(insert OID = 2805 ( int8inc_float8_float8 PGNSP PGUID 12 f f t f i 3 20 "20 701 701" _null_ _null_ _null_ int8inc_float8_float8 - _null_ ));
DESCR("REGR_COUNT(double, double) transition function");
DATA(insert OID = 2806 ( float8_regr_accum PGNSP PGUID 12 f f t f i 3 1022 "1022 701 701" _null_ _null_ _null_ float8_regr_accum - _null_ ));
DESCR("REGR_...(double, double) transition function");
DATA(insert OID = 2807 ( float8_regr_sxx PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_sxx - _null_ ));
DESCR("REGR_SXX(double, double) aggregate final function");
DATA(insert OID = 2808 ( float8_regr_syy PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_syy - _null_ ));
DESCR("REGR_SYY(double, double) aggregate final function");
DATA(insert OID = 2809 ( float8_regr_sxy PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_sxy - _null_ ));
DESCR("REGR_SXY(double, double) aggregate final function");
DATA(insert OID = 2810 ( float8_regr_avgx PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_avgx - _null_ ));
DESCR("REGR_AVGX(double, double) aggregate final function");
DATA(insert OID = 2811 ( float8_regr_avgy PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_avgy - _null_ ));
DESCR("REGR_AVGY(double, double) aggregate final function");
DATA(insert OID = 2812 ( float8_regr_r2 PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_r2 - _null_ ));
DESCR("REGR_R2(double, double) aggregate final function");
DATA(insert OID = 2813 ( float8_regr_slope PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_slope - _null_ ));
DESCR("REGR_SLOPE(double, double) aggregate final function");
DATA(insert OID = 2814 ( float8_regr_intercept PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_intercept - _null_ ));
DESCR("REGR_INTERCEPT(double, double) aggregate final function");
DATA(insert OID = 2815 ( float8_covar_pop PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_covar_pop - _null_ ));
DESCR("COVAR_POP(double, double) aggregate final function");
DATA(insert OID = 2816 ( float8_covar_samp PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_covar_samp - _null_ ));
DESCR("COVAR_SAMP(double, double) aggregate final function");
DATA(insert OID = 2817 ( float8_corr PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_corr - _null_ ));
DESCR("CORR(double, double) aggregate final function");
/* To ASCII conversion */
DATA(insert OID = 1845 ( to_ascii PGNSP PGUID 12 f f t f i 1 25 "25" _null_ _null_ _null_ to_ascii_default - _null_ ));
......@@ -3196,6 +3222,20 @@ DATA(insert OID = 2157 ( stddev PGNSP PGUID 12 t f f f i 1 701 "700" _null_ _
DATA(insert OID = 2158 ( stddev PGNSP PGUID 12 t f f f i 1 701 "701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2159 ( stddev PGNSP PGUID 12 t f f f i 1 1700 "1700" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2818 ( regr_count PGNSP PGUID 12 t f f f i 2 20 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2819 ( regr_sxx PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2820 ( regr_syy PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2821 ( regr_sxy PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2822 ( regr_avgx PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2823 ( regr_avgy PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2824 ( regr_r2 PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2825 ( regr_slope PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2826 ( regr_intercept PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2827 ( covar_pop PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2828 ( covar_samp PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2829 ( corr PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2160 ( text_pattern_lt PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_lt - _null_ ));
DATA(insert OID = 2161 ( text_pattern_le PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_le - _null_ ));
DATA(insert OID = 2162 ( text_pattern_eq PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_eq - _null_ ));
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.280 2006/07/21 20:51:33 tgl Exp $
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.281 2006/07/28 18:33:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -348,6 +348,18 @@ extern Datum float8_var_pop(PG_FUNCTION_ARGS);
extern Datum float8_var_samp(PG_FUNCTION_ARGS);
extern Datum float8_stddev_pop(PG_FUNCTION_ARGS);
extern Datum float8_stddev_samp(PG_FUNCTION_ARGS);
extern Datum float8_regr_accum(PG_FUNCTION_ARGS);
extern Datum float8_regr_sxx(PG_FUNCTION_ARGS);
extern Datum float8_regr_syy(PG_FUNCTION_ARGS);
extern Datum float8_regr_sxy(PG_FUNCTION_ARGS);
extern Datum float8_regr_avgx(PG_FUNCTION_ARGS);
extern Datum float8_regr_avgy(PG_FUNCTION_ARGS);
extern Datum float8_covar_pop(PG_FUNCTION_ARGS);
extern Datum float8_covar_samp(PG_FUNCTION_ARGS);
extern Datum float8_corr(PG_FUNCTION_ARGS);
extern Datum float8_regr_r2(PG_FUNCTION_ARGS);
extern Datum float8_regr_slope(PG_FUNCTION_ARGS);
extern Datum float8_regr_intercept(PG_FUNCTION_ARGS);
extern Datum float48pl(PG_FUNCTION_ARGS);
extern Datum float48mi(PG_FUNCTION_ARGS);
extern Datum float48mul(PG_FUNCTION_ARGS);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/int8.h,v 1.44 2006/03/05 15:59:07 momjian Exp $
* $PostgreSQL: pgsql/src/include/utils/int8.h,v 1.45 2006/07/28 18:33:04 tgl Exp $
*
* NOTES
* These data types are supported on all 64-bit architectures, and may
......@@ -74,6 +74,8 @@ extern Datum int8div(PG_FUNCTION_ARGS);
extern Datum int8abs(PG_FUNCTION_ARGS);
extern Datum int8mod(PG_FUNCTION_ARGS);
extern Datum int8inc(PG_FUNCTION_ARGS);
extern Datum int8inc_any(PG_FUNCTION_ARGS);
extern Datum int8inc_float8_float8(PG_FUNCTION_ARGS);
extern Datum int8larger(PG_FUNCTION_ARGS);
extern Datum int8smaller(PG_FUNCTION_ARGS);
......
......@@ -137,6 +137,61 @@ SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
0 |
(1 row)
-- SQL2003 binary aggregates
SELECT regr_count(b, a) FROM aggtest;
regr_count
------------
4
(1 row)
SELECT regr_sxx(b, a) FROM aggtest;
regr_sxx
----------
5099
(1 row)
SELECT regr_syy(b, a) FROM aggtest;
regr_syy
------------------
68756.2156939293
(1 row)
SELECT regr_sxy(b, a) FROM aggtest;
regr_sxy
------------------
2614.51582155004
(1 row)
SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
regr_avgx | regr_avgy
-----------+------------------
49.5 | 107.943152273074
(1 row)
SELECT regr_r2(b, a) FROM aggtest;
regr_r2
--------------------
0.0194977982031803
(1 row)
SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
regr_slope | regr_intercept
-------------------+------------------
0.512750700441271 | 82.5619926012309
(1 row)
SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
covar_pop | covar_samp
-----------------+------------------
653.62895538751 | 871.505273850014
(1 row)
SELECT corr(b, a) FROM aggtest;
corr
-------------------
0.139634516517873
(1 row)
SELECT count(four) AS cnt_1000 FROM onek;
cnt_1000
----------
......
......@@ -66,15 +66,15 @@ WHERE p1.oid != p2.oid AND
-- of the same internal function (ie, matching prosrc fields). It's OK to
-- have several entries with different pronames for the same internal function,
-- but conflicts in the number of arguments and other critical items should
-- be complained of.
-- Ignore aggregates, since they all use "aggregate_dummy".
-- As of 8.2, this finds int8inc and int8inc_any, which are OK.
-- be complained of. (We don't check data types here; see next query.)
-- Note: ignore aggregate functions here, since they all point to the same
-- dummy built-in function.
SELECT p1.oid, p1.proname, p2.oid, p2.proname
FROM pg_proc AS p1, pg_proc AS p2
WHERE p1.oid < p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
p1.proisagg = false AND p2.proisagg = false AND
(p1.proisagg = false OR p2.proisagg = false) AND
(p1.prolang != p2.prolang OR
p1.proisagg != p2.proisagg OR
p1.prosecdef != p2.prosecdef OR
......@@ -83,9 +83,8 @@ WHERE p1.oid < p2.oid AND
p1.provolatile != p2.provolatile OR
p1.pronargs != p2.pronargs);
oid | proname | oid | proname
------+---------+------+-------------
1219 | int8inc | 2804 | int8inc_any
(1 row)
-----+---------+-----+---------
(0 rows)
-- Look for uses of different type OIDs in the argument/result type fields
-- for different aliases of the same built-in function.
......
......@@ -39,6 +39,17 @@ SELECT var_samp(b::numeric) FROM aggtest;
SELECT var_pop(1.0), var_samp(2.0);
SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
-- SQL2003 binary aggregates
SELECT regr_count(b, a) FROM aggtest;
SELECT regr_sxx(b, a) FROM aggtest;
SELECT regr_syy(b, a) FROM aggtest;
SELECT regr_sxy(b, a) FROM aggtest;
SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
SELECT regr_r2(b, a) FROM aggtest;
SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
SELECT corr(b, a) FROM aggtest;
SELECT count(four) AS cnt_1000 FROM onek;
SELECT count(DISTINCT four) AS cnt_4 FROM onek;
......
......@@ -68,17 +68,16 @@ WHERE p1.oid != p2.oid AND
-- of the same internal function (ie, matching prosrc fields). It's OK to
-- have several entries with different pronames for the same internal function,
-- but conflicts in the number of arguments and other critical items should
-- be complained of.
-- Ignore aggregates, since they all use "aggregate_dummy".
-- As of 8.2, this finds int8inc and int8inc_any, which are OK.
-- be complained of. (We don't check data types here; see next query.)
-- Note: ignore aggregate functions here, since they all point to the same
-- dummy built-in function.
SELECT p1.oid, p1.proname, p2.oid, p2.proname
FROM pg_proc AS p1, pg_proc AS p2
WHERE p1.oid < p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
p1.proisagg = false AND p2.proisagg = false AND
(p1.proisagg = false OR p2.proisagg = false) AND
(p1.prolang != p2.prolang OR
p1.proisagg != p2.proisagg OR
p1.prosecdef != p2.prosecdef OR
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment