From: Tom Lane Date: Thu, 7 Dec 2000 18:38:59 +0000 (+0000) Subject: Make OVERLAPS operators conform to SQL92 spec regarding NULL handling. X-Git-Tag: REL7_1_BETA2~235 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=821f4673ffa53ddfb792cbf646783037dd6796ca;p=postgresql Make OVERLAPS operators conform to SQL92 spec regarding NULL handling. As I read it, the spec requires a non-null result in some cases where one of the inputs is NULL: specifically, if the other endpoint of that interval is between the endpoints of the other interval, then the result is known TRUE despite the missing endpoint. The spec could've been a lot simpler if they did not intend this behavior. I did not force an initdb for this change, but if you don't do one you'll still see the old strict-function behavior. --- diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 9ccbaa1f48..2203357aa0 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.53 2000/12/03 14:51:01 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.54 2000/12/07 18:38:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -529,37 +529,122 @@ time_smaller(PG_FUNCTION_ARGS) PG_RETURN_TIMEADT((time1 < time2) ? time1 : time2); } -/* overlaps_time() - * Implements the SQL92 OVERLAPS operator. - * Algorithm from Date and Darwen, 1997 +/* overlaps_time() --- implements the SQL92 OVERLAPS operator. + * + * Algorithm is per SQL92 spec. This is much harder than you'd think + * because the spec requires us to deliver a non-null answer in some cases + * where some of the inputs are null. */ Datum overlaps_time(PG_FUNCTION_ARGS) { - TimeADT ts1 = PG_GETARG_TIMEADT(0); - TimeADT te1 = PG_GETARG_TIMEADT(1); - TimeADT ts2 = PG_GETARG_TIMEADT(2); - TimeADT te2 = PG_GETARG_TIMEADT(3); + /* The arguments are TimeADT, but we leave them as generic Datums + * to avoid dereferencing nulls (TimeADT is pass-by-reference!) + */ + Datum ts1 = PG_GETARG_DATUM(0); + Datum te1 = PG_GETARG_DATUM(1); + Datum ts2 = PG_GETARG_DATUM(2); + Datum te2 = PG_GETARG_DATUM(3); + bool ts1IsNull = PG_ARGISNULL(0); + bool te1IsNull = PG_ARGISNULL(1); + bool ts2IsNull = PG_ARGISNULL(2); + bool te2IsNull = PG_ARGISNULL(3); - /* Make sure we have ordered pairs... */ - if (ts1 > te1) - { - TimeADT tt = ts1; +#define TIMEADT_GT(t1,t2) \ + (DatumGetTimeADT(t1) > DatumGetTimeADT(t2)) +#define TIMEADT_LT(t1,t2) \ + (DatumGetTimeADT(t1) < DatumGetTimeADT(t2)) + /* + * If both endpoints of interval 1 are null, the result is null (unknown). + * If just one endpoint is null, take ts1 as the non-null one. + * Otherwise, take ts1 as the lesser endpoint. + */ + if (ts1IsNull) + { + if (te1IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ ts1 = te1; - te1 = tt; + te1IsNull = true; } - if (ts2 > te2) + else if (!te1IsNull) { - TimeADT tt = ts2; + if (TIMEADT_GT(ts1, te1)) + { + Datum tt = ts1; + + ts1 = te1; + te1 = tt; + } + } + /* Likewise for interval 2. */ + if (ts2IsNull) + { + if (te2IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ ts2 = te2; - te2 = tt; + te2IsNull = true; + } + else if (!te2IsNull) + { + if (TIMEADT_GT(ts2, te2)) + { + Datum tt = ts2; + + ts2 = te2; + te2 = tt; + } } - PG_RETURN_BOOL((ts1 > ts2 && (ts1 < te2 || te1 < te2)) || - (ts1 < ts2 && (ts2 < te1 || te2 < te1)) || - (ts1 == ts2)); + /* + * At this point neither ts1 nor ts2 is null, so we can consider three + * cases: ts1 > ts2, ts1 < ts2, ts1 = ts2 + */ + if (TIMEADT_GT(ts1, ts2)) + { + /* This case is ts1 < te2 OR te1 < te2, which may look redundant + * but in the presence of nulls it's not quite completely so. + */ + if (te2IsNull) + PG_RETURN_NULL(); + if (TIMEADT_LT(ts1, te2)) + PG_RETURN_BOOL(true); + if (te1IsNull) + PG_RETURN_NULL(); + /* If te1 is not null then we had ts1 <= te1 above, and we just + * found ts1 >= te2, hence te1 >= te2. + */ + PG_RETURN_BOOL(false); + } + else if (TIMEADT_LT(ts1, ts2)) + { + /* This case is ts2 < te1 OR te2 < te1 */ + if (te1IsNull) + PG_RETURN_NULL(); + if (TIMEADT_LT(ts2, te1)) + PG_RETURN_BOOL(true); + if (te2IsNull) + PG_RETURN_NULL(); + /* If te2 is not null then we had ts2 <= te2 above, and we just + * found ts2 >= te1, hence te2 >= te1. + */ + PG_RETURN_BOOL(false); + } + else + { + /* For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a + * rather silly way of saying "true if both are nonnull, else null". + */ + if (te1IsNull || te2IsNull) + PG_RETURN_NULL(); + PG_RETURN_BOOL(true); + } + +#undef TIMEADT_GT +#undef TIMEADT_LT } /* timestamp_time() @@ -964,53 +1049,122 @@ timetz_mi_interval(PG_FUNCTION_ARGS) PG_RETURN_TIMETZADT_P(result); } -/* overlaps_timetz() - * Implements the SQL92 OVERLAPS operator. - * Algorithm from Date and Darwen, 1997 +/* overlaps_timetz() --- implements the SQL92 OVERLAPS operator. + * + * Algorithm is per SQL92 spec. This is much harder than you'd think + * because the spec requires us to deliver a non-null answer in some cases + * where some of the inputs are null. */ Datum overlaps_timetz(PG_FUNCTION_ARGS) { /* The arguments are TimeTzADT *, but we leave them as generic Datums - * for convenience of notation. + * for convenience of notation --- and to avoid dereferencing nulls. */ Datum ts1 = PG_GETARG_DATUM(0); Datum te1 = PG_GETARG_DATUM(1); Datum ts2 = PG_GETARG_DATUM(2); Datum te2 = PG_GETARG_DATUM(3); + bool ts1IsNull = PG_ARGISNULL(0); + bool te1IsNull = PG_ARGISNULL(1); + bool ts2IsNull = PG_ARGISNULL(2); + bool te2IsNull = PG_ARGISNULL(3); #define TIMETZ_GT(t1,t2) \ DatumGetBool(DirectFunctionCall2(timetz_gt,t1,t2)) #define TIMETZ_LT(t1,t2) \ DatumGetBool(DirectFunctionCall2(timetz_lt,t1,t2)) -#define TIMETZ_EQ(t1,t2) \ - DatumGetBool(DirectFunctionCall2(timetz_eq,t1,t2)) - /* Make sure we have ordered pairs... */ - if (TIMETZ_GT(ts1, te1)) + /* + * If both endpoints of interval 1 are null, the result is null (unknown). + * If just one endpoint is null, take ts1 as the non-null one. + * Otherwise, take ts1 as the lesser endpoint. + */ + if (ts1IsNull) { - Datum tt = ts1; - + if (te1IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ ts1 = te1; - te1 = tt; + te1IsNull = true; } - if (TIMETZ_GT(ts2, te2)) + else if (!te1IsNull) { - Datum tt = ts2; + if (TIMETZ_GT(ts1, te1)) + { + Datum tt = ts1; + + ts1 = te1; + te1 = tt; + } + } + /* Likewise for interval 2. */ + if (ts2IsNull) + { + if (te2IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ ts2 = te2; - te2 = tt; + te2IsNull = true; + } + else if (!te2IsNull) + { + if (TIMETZ_GT(ts2, te2)) + { + Datum tt = ts2; + + ts2 = te2; + te2 = tt; + } } - PG_RETURN_BOOL((TIMETZ_GT(ts1, ts2) && - (TIMETZ_LT(ts1, te2) || TIMETZ_LT(te1, te2))) || - (TIMETZ_GT(ts2, ts1) && - (TIMETZ_LT(ts2, te1) || TIMETZ_LT(te2, te1))) || - TIMETZ_EQ(ts1, ts2)); + /* + * At this point neither ts1 nor ts2 is null, so we can consider three + * cases: ts1 > ts2, ts1 < ts2, ts1 = ts2 + */ + if (TIMETZ_GT(ts1, ts2)) + { + /* This case is ts1 < te2 OR te1 < te2, which may look redundant + * but in the presence of nulls it's not quite completely so. + */ + if (te2IsNull) + PG_RETURN_NULL(); + if (TIMETZ_LT(ts1, te2)) + PG_RETURN_BOOL(true); + if (te1IsNull) + PG_RETURN_NULL(); + /* If te1 is not null then we had ts1 <= te1 above, and we just + * found ts1 >= te2, hence te1 >= te2. + */ + PG_RETURN_BOOL(false); + } + else if (TIMETZ_LT(ts1, ts2)) + { + /* This case is ts2 < te1 OR te2 < te1 */ + if (te1IsNull) + PG_RETURN_NULL(); + if (TIMETZ_LT(ts2, te1)) + PG_RETURN_BOOL(true); + if (te2IsNull) + PG_RETURN_NULL(); + /* If te2 is not null then we had ts2 <= te2 above, and we just + * found ts2 >= te1, hence te2 >= te1. + */ + PG_RETURN_BOOL(false); + } + else + { + /* For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a + * rather silly way of saying "true if both are nonnull, else null". + */ + if (te1IsNull || te2IsNull) + PG_RETURN_NULL(); + PG_RETURN_BOOL(true); + } #undef TIMETZ_GT #undef TIMETZ_LT -#undef TIMETZ_EQ } /* timestamp_timetz() diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 6930205b15..7ec213baab 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.39 2000/12/03 20:45:36 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.40 2000/12/07 18:38:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -825,53 +825,123 @@ interval_hash(PG_FUNCTION_ARGS) return hash_any((char *) key, sizeof(double) + sizeof(int4)); } -/* overlaps_timestamp() - * Implements the SQL92 OVERLAPS operator. - * Algorithm from Date and Darwen, 1997 +/* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator. + * + * Algorithm is per SQL92 spec. This is much harder than you'd think + * because the spec requires us to deliver a non-null answer in some cases + * where some of the inputs are null. */ Datum overlaps_timestamp(PG_FUNCTION_ARGS) { /* The arguments are Timestamps, but we leave them as generic Datums - * to avoid unnecessary conversions between value and reference forms... + * to avoid unnecessary conversions between value and reference forms + * --- not to mention possible dereferences of null pointers. */ Datum ts1 = PG_GETARG_DATUM(0); Datum te1 = PG_GETARG_DATUM(1); Datum ts2 = PG_GETARG_DATUM(2); Datum te2 = PG_GETARG_DATUM(3); + bool ts1IsNull = PG_ARGISNULL(0); + bool te1IsNull = PG_ARGISNULL(1); + bool ts2IsNull = PG_ARGISNULL(2); + bool te2IsNull = PG_ARGISNULL(3); #define TIMESTAMP_GT(t1,t2) \ DatumGetBool(DirectFunctionCall2(timestamp_gt,t1,t2)) #define TIMESTAMP_LT(t1,t2) \ DatumGetBool(DirectFunctionCall2(timestamp_lt,t1,t2)) -#define TIMESTAMP_EQ(t1,t2) \ - DatumGetBool(DirectFunctionCall2(timestamp_eq,t1,t2)) - /* Make sure we have ordered pairs... */ - if (TIMESTAMP_GT(ts1, te1)) + /* + * If both endpoints of interval 1 are null, the result is null (unknown). + * If just one endpoint is null, take ts1 as the non-null one. + * Otherwise, take ts1 as the lesser endpoint. + */ + if (ts1IsNull) { - Datum tt = ts1; - + if (te1IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ ts1 = te1; - te1 = tt; + te1IsNull = true; } - if (TIMESTAMP_GT(ts2, te2)) + else if (!te1IsNull) { - Datum tt = ts2; + if (TIMESTAMP_GT(ts1, te1)) + { + Datum tt = ts1; + + ts1 = te1; + te1 = tt; + } + } + /* Likewise for interval 2. */ + if (ts2IsNull) + { + if (te2IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ ts2 = te2; - te2 = tt; + te2IsNull = true; + } + else if (!te2IsNull) + { + if (TIMESTAMP_GT(ts2, te2)) + { + Datum tt = ts2; + + ts2 = te2; + te2 = tt; + } } - PG_RETURN_BOOL((TIMESTAMP_GT(ts1, ts2) && - (TIMESTAMP_LT(ts1, te2) || TIMESTAMP_LT(te1, te2))) || - (TIMESTAMP_GT(ts2, ts1) && - (TIMESTAMP_LT(ts2, te1) || TIMESTAMP_LT(te2, te1))) || - TIMESTAMP_EQ(ts1, ts2)); + /* + * At this point neither ts1 nor ts2 is null, so we can consider three + * cases: ts1 > ts2, ts1 < ts2, ts1 = ts2 + */ + if (TIMESTAMP_GT(ts1, ts2)) + { + /* This case is ts1 < te2 OR te1 < te2, which may look redundant + * but in the presence of nulls it's not quite completely so. + */ + if (te2IsNull) + PG_RETURN_NULL(); + if (TIMESTAMP_LT(ts1, te2)) + PG_RETURN_BOOL(true); + if (te1IsNull) + PG_RETURN_NULL(); + /* If te1 is not null then we had ts1 <= te1 above, and we just + * found ts1 >= te2, hence te1 >= te2. + */ + PG_RETURN_BOOL(false); + } + else if (TIMESTAMP_LT(ts1, ts2)) + { + /* This case is ts2 < te1 OR te2 < te1 */ + if (te1IsNull) + PG_RETURN_NULL(); + if (TIMESTAMP_LT(ts2, te1)) + PG_RETURN_BOOL(true); + if (te2IsNull) + PG_RETURN_NULL(); + /* If te2 is not null then we had ts2 <= te2 above, and we just + * found ts2 >= te1, hence te2 >= te1. + */ + PG_RETURN_BOOL(false); + } + else + { + /* For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a + * rather silly way of saying "true if both are nonnull, else null". + */ + if (te1IsNull || te2IsNull) + PG_RETURN_NULL(); + PG_RETURN_BOOL(true); + } #undef TIMESTAMP_GT #undef TIMESTAMP_LT -#undef TIMESTAMP_EQ } diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 390cf2f357..0807ade9ef 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.176 2000/12/03 14:51:09 thomas Exp $ + * $Id: pg_proc.h,v 1.177 2000/12/07 18:38:58 tgl Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -1510,7 +1510,7 @@ DESCR("absolute value"); DATA(insert OID = 1263 ( interval PGUID 12 f t f t 1 f 1186 "25" 100 0 0 100 text_interval - )); DESCR("convert text to interval"); -DATA(insert OID = 1271 ( overlaps PGUID 12 f t t t 4 f 16 "1266 1266 1266 1266" 100 0 0 100 overlaps_timetz - )); +DATA(insert OID = 1271 ( overlaps PGUID 12 f t t f 4 f 16 "1266 1266 1266 1266" 100 0 0 100 overlaps_timetz - )); DESCR("SQL92 interval comparison"); DATA(insert OID = 1272 ( datetime_pl PGUID 12 f t f t 2 f 1184 "1082 1083" 100 0 0 100 datetime_timestamp - )); DESCR("convert date and time to timestamp"); @@ -1569,22 +1569,22 @@ DESCR("restriction selectivity for containment comparison operators"); DATA(insert OID = 1303 ( contjoinsel PGUID 12 f t f t 5 f 701 "26 26 21 26 21" 100 0 0 100 contjoinsel - )); DESCR("join selectivity for containment comparison operators"); -DATA(insert OID = 1304 ( overlaps PGUID 12 f t t t 4 f 16 "1184 1184 1184 1184" 100 0 0 100 overlaps_timestamp - )); +DATA(insert OID = 1304 ( overlaps PGUID 12 f t t f 4 f 16 "1184 1184 1184 1184" 100 0 0 100 overlaps_timestamp - )); DESCR("SQL92 interval comparison"); -DATA(insert OID = 1305 ( overlaps PGUID 14 f t t t 4 f 16 "1184 1186 1184 1186" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, ($3 + $4))" - )); +DATA(insert OID = 1305 ( overlaps PGUID 14 f t t f 4 f 16 "1184 1186 1184 1186" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, ($3 + $4))" - )); DESCR("SQL92 interval comparison"); -DATA(insert OID = 1306 ( overlaps PGUID 14 f t t t 4 f 16 "1184 1184 1184 1186" 100 0 0 100 "select ($1, $2) overlaps ($3, ($3 + $4))" - )); +DATA(insert OID = 1306 ( overlaps PGUID 14 f t t f 4 f 16 "1184 1184 1184 1186" 100 0 0 100 "select ($1, $2) overlaps ($3, ($3 + $4))" - )); DESCR("SQL92 interval comparison"); -DATA(insert OID = 1307 ( overlaps PGUID 14 f t t t 4 f 16 "1184 1186 1184 1184" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, $4)" - )); +DATA(insert OID = 1307 ( overlaps PGUID 14 f t t f 4 f 16 "1184 1186 1184 1184" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, $4)" - )); DESCR("SQL92 interval comparison"); -DATA(insert OID = 1308 ( overlaps PGUID 12 f t t t 4 f 16 "1083 1083 1083 1083" 100 0 0 100 overlaps_time - )); +DATA(insert OID = 1308 ( overlaps PGUID 12 f t t f 4 f 16 "1083 1083 1083 1083" 100 0 0 100 overlaps_time - )); DESCR("SQL92 interval comparison"); -DATA(insert OID = 1309 ( overlaps PGUID 14 f t t t 4 f 16 "1083 1186 1083 1186" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, ($3 + $4))" - )); +DATA(insert OID = 1309 ( overlaps PGUID 14 f t t f 4 f 16 "1083 1186 1083 1186" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, ($3 + $4))" - )); DESCR("SQL92 interval comparison"); -DATA(insert OID = 1310 ( overlaps PGUID 14 f t t t 4 f 16 "1083 1083 1083 1186" 100 0 0 100 "select ($1, $2) overlaps ($3, ($3 + $4))" - )); +DATA(insert OID = 1310 ( overlaps PGUID 14 f t t f 4 f 16 "1083 1083 1083 1186" 100 0 0 100 "select ($1, $2) overlaps ($3, ($3 + $4))" - )); DESCR("SQL92 interval comparison"); -DATA(insert OID = 1311 ( overlaps PGUID 14 f t t t 4 f 16 "1083 1186 1083 1083" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, $4)" - )); +DATA(insert OID = 1311 ( overlaps PGUID 14 f t t f 4 f 16 "1083 1186 1083 1083" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, $4)" - )); DESCR("SQL92 interval comparison"); DATA(insert OID = 1314 ( timestamp_cmp PGUID 12 f t f t 2 f 23 "1184 1184" 100 0 0 100 timestamp_cmp - ));