From 0862aeaeec46153337df89c9c0d4d25e6d5c7713 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 22 Dec 2005 22:50:22 +0000 Subject: [PATCH] Adjust string comparison so that only bitwise-equal strings are considered equal: if strcoll claims two strings are equal, check it with strcmp, and sort according to strcmp if not identical. This fixes inconsistent behavior under glibc's hu_HU locale, and probably under some other locales as well. Also, take advantage of the now-well-defined behavior to speed up texteq, textne, bpchareq, bpcharne: they may as well just do a bitwise comparison and not bother with strcoll at all. NOTE: affected databases may need to REINDEX indexes on text columns to be sure they are self-consistent. --- src/backend/utils/adt/varchar.c | 20 ++++++++++++++------ src/backend/utils/adt/varlena.c | 27 ++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 0982fb95d4..0fe3443917 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.102 2003/08/04 04:03:10 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.102.4.1 2005/12/22 22:50:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -549,11 +549,14 @@ bpchareq(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - /* fast path for different-length inputs */ + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. + */ if (len1 != len2) result = false; else - result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) == 0); + result = (strncmp(VARDATA(arg1), VARDATA(arg2), len1) == 0); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -573,11 +576,14 @@ bpcharne(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - /* fast path for different-length inputs */ + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. + */ if (len1 != len2) result = true; else - result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) != 0); + result = (strncmp(VARDATA(arg1), VARDATA(arg2), len1) != 0); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -690,7 +696,9 @@ bpcharcmp(PG_FUNCTION_ARGS) * bpchar needs a specialized hash function because we want to ignore * trailing blanks in comparisons. * - * XXX is there any need for locale-specific behavior here? + * Note: currently there is no need for locale-specific behavior here, + * but if we ever change the semantics of bpchar comparison to trust + * strcoll() completely, we'd need to do something different in non-C locales. */ Datum hashbpchar(PG_FUNCTION_ARGS) diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 7b17b50aec..02721aece8 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.106.2.4 2004/02/21 00:35:13 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.106.2.5 2005/12/22 22:50:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -868,6 +868,15 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2) result = strcoll(a1p, a2p); + /* + * In some locales strcoll() can claim that nonidentical strings are + * equal. Believing that would be bad news for a number of reasons, + * so we follow Perl's lead and sort "equal" strings according to + * strcmp(). + */ + if (result == 0) + result = strcmp(a1p, a2p); + if (len1 >= STACKBUFLEN) pfree(a1p); if (len2 >= STACKBUFLEN) @@ -920,11 +929,15 @@ texteq(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_P(1); bool result; - /* fast path for different-length inputs */ + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. + */ if (VARSIZE(arg1) != VARSIZE(arg2)) result = false; else - result = (text_cmp(arg1, arg2) == 0); + result = (strncmp(VARDATA(arg1), VARDATA(arg2), + VARSIZE(arg1) - VARHDRSZ) == 0); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -939,11 +952,15 @@ textne(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_P(1); bool result; - /* fast path for different-length inputs */ + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. + */ if (VARSIZE(arg1) != VARSIZE(arg2)) result = true; else - result = (text_cmp(arg1, arg2) != 0); + result = (strncmp(VARDATA(arg1), VARDATA(arg2), + VARSIZE(arg1) - VARHDRSZ) != 0); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); -- 2.50.0