From cf57ef4e506cd87195baa76213326b1981644452 Mon Sep 17 00:00:00 2001 From: Neil Conway Date: Tue, 16 Jan 2007 21:41:14 +0000 Subject: [PATCH] Implement width_bucket() for the float8 data type. The implementation is somewhat ugly logic-wise, but I don't see an easy way to make it more concise. When writing this, I noticed that my previous implementation of width_bucket() doesn't handle NaN correctly: postgres=# select width_bucket('NaN', 1, 5, 5); width_bucket -------------- 6 (1 row) AFAICS SQL:2003 does not define a NaN value, so it doesn't address how width_bucket() should behave here. The patch changes width_bucket() so that ereport(ERROR) is raised if NaN is specified for the operand or the lower or upper bounds to width_bucket(). For float8, NaN is disallowed for any of the floating-point inputs, and +/- infinity is disallowed for the histogram bounds (but allowed for the operand). Update docs and regression tests, bump the catversion. --- doc/src/sgml/func.sgml | 12 ++- src/backend/utils/adt/float.c | 84 ++++++++++++++++++++- src/backend/utils/adt/numeric.c | 19 +++-- src/include/catalog/catversion.h | 4 +- src/include/catalog/pg_proc.h | 4 +- src/include/utils/builtins.h | 3 +- src/test/regress/expected/numeric.out | 104 ++++++++++++++++---------- src/test/regress/sql/numeric.sql | 49 ++++++++---- 8 files changed, 210 insertions(+), 69 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index b9cd00320b..ea64410673 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ - + Functions and Operators @@ -854,6 +854,16 @@ width_bucket(5.35, 0.024, 10.06, 5) 3 + + + width_bucket(op dp, b1 dp, b2 dp, count int) + int + return the bucket to which operand would + be assigned in an equidepth histogram with count + buckets, in the range b1 to b2 + width_bucket(5.35, 0.024, 10.06, 5) + 3 + diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index 44fe89296e..fe3cc8215e 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/float.c,v 1.146 2007/01/06 20:21:29 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/float.c,v 1.147 2007/01/16 21:41:13 neilc Exp $ * *------------------------------------------------------------------------- */ @@ -2745,6 +2745,88 @@ float84ge(PG_FUNCTION_ARGS) PG_RETURN_BOOL(float8_cmp_internal(arg1, arg2) >= 0); } +/* + * Implements the float8 version of the width_bucket() function + * defined by SQL2003. See also width_bucket_numeric(). + * + * 'bound1' and 'bound2' are the lower and upper bounds of the + * histogram's range, respectively. 'count' is the number of buckets + * in the histogram. width_bucket() returns an integer indicating the + * bucket number that 'operand' belongs to in an equiwidth histogram + * with the specified characteristics. An operand smaller than the + * lower bound is assigned to bucket 0. An operand greater than the + * upper bound is assigned to an additional bucket (with number + * count+1). We don't allow "NaN" for any of the float8 inputs, and we + * don't allow either of the histogram bounds to be +/- infinity. + */ +Datum +width_bucket_float8(PG_FUNCTION_ARGS) +{ + float8 operand = PG_GETARG_FLOAT8(0); + float8 bound1 = PG_GETARG_FLOAT8(1); + float8 bound2 = PG_GETARG_FLOAT8(2); + int32 count = PG_GETARG_INT32(3); + int32 result; + + if (count <= 0.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("count must be greater than zero"))); + + if (isnan(operand) || isnan(bound1) || isnan(bound2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("operand, lower bound and upper bound cannot be NaN"))); + + /* Note that we allow "operand" to be infinite */ + if (is_infinite(bound1) || is_infinite(bound2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("lower and upper bounds must be finite"))); + + if (bound1 < bound2) + { + if (operand < bound1) + result = 0; + else if (operand >= bound2) + { + result = count + 1; + /* check for overflow */ + if (result < count) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + } + else + result = ((float8) count * (operand - bound1) / (bound2 - bound1)) + 1; + } + else if (bound1 > bound2) + { + if (operand > bound1) + result = 0; + else if (operand <= bound2) + { + result = count + 1; + /* check for overflow */ + if (result < count) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + } + else + result = ((float8) count * (bound1 - operand) / (bound1 - bound2)) + 1; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("lower bound cannot equal upper bound"))); + result = 0; /* keep the compiler quiet */ + } + + PG_RETURN_INT32(result); +} + /* ========== PRIVATE ROUTINES ========== */ #ifndef HAVE_CBRT diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 14a576c15b..7e51873fd1 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -14,7 +14,7 @@ * Copyright (c) 1998-2007, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.98 2007/01/05 22:19:41 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.99 2007/01/16 21:41:13 neilc Exp $ * *------------------------------------------------------------------------- */ @@ -874,16 +874,17 @@ numeric_floor(PG_FUNCTION_ARGS) } /* - * width_bucket_numeric() - + * Implements the numeric version of the width_bucket() function + * defined by SQL2003. See also width_bucket_float8(). * * 'bound1' and 'bound2' are the lower and upper bounds of the * histogram's range, respectively. 'count' is the number of buckets * in the histogram. width_bucket() returns an integer indicating the - * bucket number that 'operand' belongs in for an equiwidth histogram + * bucket number that 'operand' belongs to in an equiwidth histogram * with the specified characteristics. An operand smaller than the * lower bound is assigned to bucket 0. An operand greater than the * upper bound is assigned to an additional bucket (with number - * count+1). + * count+1). We don't allow "NaN" for any of the numeric arguments. */ Datum width_bucket_numeric(PG_FUNCTION_ARGS) @@ -901,6 +902,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); + if (NUMERIC_IS_NAN(operand) || + NUMERIC_IS_NAN(bound1) || + NUMERIC_IS_NAN(bound2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("operand, lower bound and upper bound cannot be NaN"))); + init_var(&result_var); init_var(&count_var); @@ -937,6 +945,7 @@ width_bucket_numeric(PG_FUNCTION_ARGS) break; } + /* if result exceeds the range of a legal int4, we ereport here */ result = numericvar_to_int4(&result_var); free_var(&count_var); @@ -946,8 +955,6 @@ width_bucket_numeric(PG_FUNCTION_ARGS) } /* - * compute_bucket() - - * * If 'operand' is not outside the bucket range, determine the correct * bucket for it to go. The calculations performed by this function * are derived directly from the SQL2003 spec. diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 98a625cfbf..c8892d24f2 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.371 2007/01/09 02:14:15 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.372 2007/01/16 21:41:13 neilc Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200701081 +#define CATALOG_VERSION_NO 200701161 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index e2c67ea86d..24a287e3d6 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.436 2007/01/05 22:19:53 momjian Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.437 2007/01/16 21:41:13 neilc Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -636,6 +636,8 @@ DATA(insert OID = 309 ( float84gt PGNSP PGUID 12 f f t f i 2 16 "701 700" _ DESCR("greater-than"); DATA(insert OID = 310 ( float84ge PGNSP PGUID 12 f f t f i 2 16 "701 700" _null_ _null_ _null_ float84ge - _null_ )); DESCR("greater-than-or-equal"); +DATA(insert OID = 320 ( width_bucket PGNSP PGUID 12 f f t f i 4 23 "701 701 701 23" _null_ _null_ _null_ width_bucket_float8 - _null_ )); +DESCR("bucket number of operand in equidepth histogram"); DATA(insert OID = 311 ( float8 PGNSP PGUID 12 f f t f i 1 701 "700" _null_ _null_ _null_ ftod - _null_ )); DESCR("convert float4 to float8"); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 396bac922d..3368e2c024 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.285 2007/01/05 22:19:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.286 2007/01/16 21:41:14 neilc Exp $ * *------------------------------------------------------------------------- */ @@ -380,6 +380,7 @@ extern Datum float84lt(PG_FUNCTION_ARGS); extern Datum float84le(PG_FUNCTION_ARGS); extern Datum float84gt(PG_FUNCTION_ARGS); extern Datum float84ge(PG_FUNCTION_ARGS); +extern Datum width_bucket_float8(PG_FUNCTION_ARGS); /* dbsize.c */ extern Datum pg_tablespace_size_oid(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out index 96c70a8a09..3780c17e5a 100644 --- a/src/test/regress/expected/numeric.out +++ b/src/test/regress/expected/numeric.out @@ -730,55 +730,77 @@ SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round; (7 rows) DROP TABLE ceil_floor_round; --- Testing for width_bucket() --- NULL result -SELECT width_bucket(NULL, NULL, NULL, NULL); - width_bucket --------------- - -(1 row) - +-- Testing for width_bucket(). For convenience, we test both the +-- numeric and float8 versions of the function in this file. -- errors SELECT width_bucket(5.0, 3.0, 4.0, 0); ERROR: count must be greater than zero SELECT width_bucket(5.0, 3.0, 4.0, -5); ERROR: count must be greater than zero -SELECT width_bucket(3.0, 3.0, 3.0, 888); +SELECT width_bucket(3.5, 3.0, 3.0, 888); +ERROR: lower bound cannot equal upper bound +SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0); +ERROR: count must be greater than zero +SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5); +ERROR: count must be greater than zero +SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888); ERROR: lower bound cannot equal upper bound +SELECT width_bucket('NaN', 3.0, 4.0, 888); +ERROR: operand, lower bound and upper bound cannot be NaN +SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888); +ERROR: operand, lower bound and upper bound cannot be NaN -- normal operation -CREATE TABLE width_bucket_test (operand numeric); -COPY width_bucket_test FROM stdin; +CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8); +COPY width_bucket_test (operand_num) FROM stdin; +UPDATE width_bucket_test SET operand_f8 = operand_num::float8; SELECT - operand, - width_bucket(operand, 0, 10, 5) AS wb_1, - width_bucket(operand, 10, 0, 5) AS wb_2, - width_bucket(operand, 2, 8, 4) AS wb_3, - width_bucket(operand, 5.0, 5.5, 20) AS wb_4, - width_bucket(operand, -25, 25, 10) AS wb_5 + operand_num, operand_f8, + width_bucket(operand_num, 0, 10, 5) AS wb_1, + width_bucket(operand_f8, 0, 10, 5) AS wb_1f, + width_bucket(operand_num, 10, 0, 5) AS wb_2, + width_bucket(operand_f8, 10, 0, 5) AS wb_2f, + width_bucket(operand_num, 2, 8, 4) AS wb_3, + width_bucket(operand_f8, 2, 8, 4) AS wb_3f, + width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4, + width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f, + width_bucket(operand_num, -25, 25, 10) AS wb_5, + width_bucket(operand_f8, -25, 25, 10) AS wb_5f FROM width_bucket_test; - operand | wb_1 | wb_2 | wb_3 | wb_4 | wb_5 -------------------+------+------+------+------+------ - -5.2 | 0 | 6 | 0 | 0 | 4 - -0.0000000000001 | 0 | 6 | 0 | 0 | 5 - 0.0000000000001 | 1 | 5 | 0 | 0 | 6 - 1 | 1 | 5 | 0 | 0 | 6 - 1.99999999999999 | 1 | 5 | 0 | 0 | 6 - 2 | 2 | 5 | 1 | 0 | 6 - 2.00000000000001 | 2 | 4 | 1 | 0 | 6 - 3 | 2 | 4 | 1 | 0 | 6 - 4 | 3 | 4 | 2 | 0 | 6 - 4.5 | 3 | 3 | 2 | 0 | 6 - 5 | 3 | 3 | 3 | 1 | 7 - 5.5 | 3 | 3 | 3 | 21 | 7 - 6 | 4 | 3 | 3 | 21 | 7 - 7 | 4 | 2 | 4 | 21 | 7 - 8 | 5 | 2 | 5 | 21 | 7 - 9 | 5 | 1 | 5 | 21 | 7 - 9.99999999999999 | 5 | 1 | 5 | 21 | 7 - 10 | 6 | 1 | 5 | 21 | 8 - 10.0000000000001 | 6 | 0 | 5 | 21 | 8 - NaN | 6 | 0 | 5 | 21 | 11 -(20 rows) + operand_num | operand_f8 | wb_1 | wb_1f | wb_2 | wb_2f | wb_3 | wb_3f | wb_4 | wb_4f | wb_5 | wb_5f +------------------+------------------+------+-------+------+-------+------+-------+------+-------+------+------- + -5.2 | -5.2 | 0 | 0 | 6 | 6 | 0 | 0 | 0 | 0 | 4 | 4 + -0.0000000001 | -1e-10 | 0 | 0 | 6 | 6 | 0 | 0 | 0 | 0 | 5 | 5 + 0.000000000001 | 1e-12 | 1 | 1 | 5 | 5 | 0 | 0 | 0 | 0 | 6 | 6 + 1 | 1 | 1 | 1 | 5 | 5 | 0 | 0 | 0 | 0 | 6 | 6 + 1.99999999999999 | 1.99999999999999 | 1 | 1 | 5 | 5 | 0 | 0 | 0 | 0 | 6 | 6 + 2 | 2 | 2 | 2 | 5 | 5 | 1 | 1 | 0 | 0 | 6 | 6 + 2.00000000000001 | 2.00000000000001 | 2 | 2 | 4 | 4 | 1 | 1 | 0 | 0 | 6 | 6 + 3 | 3 | 2 | 2 | 4 | 4 | 1 | 1 | 0 | 0 | 6 | 6 + 4 | 4 | 3 | 3 | 4 | 4 | 2 | 2 | 0 | 0 | 6 | 6 + 4.5 | 4.5 | 3 | 3 | 3 | 3 | 2 | 2 | 0 | 0 | 6 | 6 + 5 | 5 | 3 | 3 | 3 | 3 | 3 | 3 | 1 | 1 | 7 | 7 + 5.5 | 5.5 | 3 | 3 | 3 | 3 | 3 | 3 | 21 | 21 | 7 | 7 + 6 | 6 | 4 | 4 | 3 | 3 | 3 | 3 | 21 | 21 | 7 | 7 + 7 | 7 | 4 | 4 | 2 | 2 | 4 | 4 | 21 | 21 | 7 | 7 + 8 | 8 | 5 | 5 | 2 | 2 | 5 | 5 | 21 | 21 | 7 | 7 + 9 | 9 | 5 | 5 | 1 | 1 | 5 | 5 | 21 | 21 | 7 | 7 + 9.99999999999999 | 9.99999999999999 | 5 | 5 | 1 | 1 | 5 | 5 | 21 | 21 | 7 | 7 + 10 | 10 | 6 | 6 | 1 | 1 | 5 | 5 | 21 | 21 | 8 | 8 + 10.0000000000001 | 10.0000000000001 | 6 | 6 | 0 | 0 | 5 | 5 | 21 | 21 | 8 | 8 +(19 rows) + +-- for float8 only, check positive and negative infinity: we require +-- finite bucket bounds, but allow an infinite operand +SELECT width_bucket(0.0::float8, 'Infinity'::float8, 5, 10); -- error +ERROR: lower and upper bounds must be finite +SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error +ERROR: lower and upper bounds must be finite +SELECT width_bucket('Infinity'::float8, 1, 10, 10), + width_bucket('-Infinity'::float8, 1, 10, 10); + width_bucket | width_bucket +--------------+-------------- + 11 | 0 +(1 row) DROP TABLE width_bucket_test; -- TO_CHAR() @@ -800,7 +822,7 @@ SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999') (10 rows) SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999') - FROM num_data; + FROM num_data; to_char_2 | to_char -----------+-------------------------------------------- | .000,000,000,000,000 diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql index b6f69b6f7d..eacc4b5b73 100644 --- a/src/test/regress/sql/numeric.sql +++ b/src/test/regress/sql/numeric.sql @@ -667,22 +667,26 @@ INSERT INTO ceil_floor_round VALUES ('-0.000001'); SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round; DROP TABLE ceil_floor_round; --- Testing for width_bucket() --- NULL result -SELECT width_bucket(NULL, NULL, NULL, NULL); +-- Testing for width_bucket(). For convenience, we test both the +-- numeric and float8 versions of the function in this file. -- errors SELECT width_bucket(5.0, 3.0, 4.0, 0); SELECT width_bucket(5.0, 3.0, 4.0, -5); -SELECT width_bucket(3.0, 3.0, 3.0, 888); +SELECT width_bucket(3.5, 3.0, 3.0, 888); +SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0); +SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5); +SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888); +SELECT width_bucket('NaN', 3.0, 4.0, 888); +SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888); -- normal operation -CREATE TABLE width_bucket_test (operand numeric); +CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8); -COPY width_bucket_test FROM stdin; +COPY width_bucket_test (operand_num) FROM stdin; -5.2 --0.0000000000001 -0.0000000000001 +-0.0000000001 +0.000000000001 1 1.99999999999999 2 @@ -699,18 +703,31 @@ COPY width_bucket_test FROM stdin; 9.99999999999999 10 10.0000000000001 -NaN \. +UPDATE width_bucket_test SET operand_f8 = operand_num::float8; + SELECT - operand, - width_bucket(operand, 0, 10, 5) AS wb_1, - width_bucket(operand, 10, 0, 5) AS wb_2, - width_bucket(operand, 2, 8, 4) AS wb_3, - width_bucket(operand, 5.0, 5.5, 20) AS wb_4, - width_bucket(operand, -25, 25, 10) AS wb_5 + operand_num, operand_f8, + width_bucket(operand_num, 0, 10, 5) AS wb_1, + width_bucket(operand_f8, 0, 10, 5) AS wb_1f, + width_bucket(operand_num, 10, 0, 5) AS wb_2, + width_bucket(operand_f8, 10, 0, 5) AS wb_2f, + width_bucket(operand_num, 2, 8, 4) AS wb_3, + width_bucket(operand_f8, 2, 8, 4) AS wb_3f, + width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4, + width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f, + width_bucket(operand_num, -25, 25, 10) AS wb_5, + width_bucket(operand_f8, -25, 25, 10) AS wb_5f FROM width_bucket_test; +-- for float8 only, check positive and negative infinity: we require +-- finite bucket bounds, but allow an infinite operand +SELECT width_bucket(0.0::float8, 'Infinity'::float8, 5, 10); -- error +SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error +SELECT width_bucket('Infinity'::float8, 1, 10, 10), + width_bucket('-Infinity'::float8, 1, 10, 10); + DROP TABLE width_bucket_test; -- TO_CHAR() @@ -719,7 +736,7 @@ SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999') FROM num_data; SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999') - FROM num_data; + FROM num_data; SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR') FROM num_data; -- 2.40.0