From 77b9bac3be0f4ca81f8daef108890fb993f1cab6 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 7 Feb 2013 19:14:22 -0500 Subject: [PATCH] Make contrib/btree_gist's GiST penalty function a bit saner. The previous coding supposed that the first differing bytes in two varlena datums must have the same sign difference as their overall comparison result. This is obviously bogus for text strings in non-C locales, and probably wrong for numeric, and even for bytea I think it was wrong on machines where char is signed. When the assumption failed, the function could deliver a zero or negative penalty in situations where such a result is quite ridiculous, leading the core GiST code to make very bad page-split decisions. To fix, take the absolute values of the byte-level differences. Also, switch the code to using unsigned char not just char, so that the behavior will be consistent whether char is signed or not. Per investigation of a trouble report from Tomas Vondra. Back-patch to all supported branches. --- contrib/btree_gist/btree_utils_var.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/contrib/btree_gist/btree_utils_var.c b/contrib/btree_gist/btree_utils_var.c index 7e6a3c3508..30886cb788 100644 --- a/contrib/btree_gist/btree_utils_var.c +++ b/contrib/btree_gist/btree_utils_var.c @@ -92,14 +92,12 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo) static int32 gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo) { - GBT_VARKEY_R r = gbt_var_key_readable(node); int32 i = 0; int32 l = 0; int32 t1len = VARSIZE(r.lower) - VARHDRSZ; int32 t2len = VARSIZE(r.upper) - VARHDRSZ; int32 ml = Min(t1len, t2len); - char *p1 = VARDATA(r.lower); char *p2 = VARDATA(r.upper); @@ -110,7 +108,6 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo) { if (tinfo->eml > 1 && l == 0) { - if ((l = pg_mblen(p1)) != pg_mblen(p2)) { return i; @@ -378,13 +375,14 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n, const gbtree GBT_VARKEY *newe = (GBT_VARKEY *) DatumGetPointer(n->key); GBT_VARKEY_R ok, nk; - GBT_VARKEY *tmp = NULL; *res = 0.0; nk = gbt_var_key_readable(newe); if (nk.lower == nk.upper) /* leaf */ { + GBT_VARKEY *tmp; + tmp = gbt_var_leaf2node(newe, tinfo); if (tmp != newe) nk = gbt_var_key_readable(tmp); @@ -401,7 +399,7 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n, const gbtree )) { Datum d = PointerGetDatum(0); - double dres = 0.0; + double dres; int32 ol, ul; @@ -412,20 +410,18 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n, const gbtree if (ul < ol) { - dres = (ol - ul); /* lost of common prefix len */ + dres = (ol - ul); /* reduction of common prefix len */ } else { GBT_VARKEY_R uk = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(d)); + unsigned char tmp[4]; - char tmp[4]; - - tmp[0] = ((VARSIZE(ok.lower) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(ok.lower)[ul]); - tmp[1] = ((VARSIZE(uk.lower) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(uk.lower)[ul]); - tmp[2] = ((VARSIZE(ok.upper) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(ok.upper)[ul]); - tmp[3] = ((VARSIZE(uk.upper) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(uk.upper)[ul]); - dres = (tmp[0] - tmp[1]) + - (tmp[3] - tmp[2]); + tmp[0] = (unsigned char) (((VARSIZE(ok.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.lower)[ul])); + tmp[1] = (unsigned char) (((VARSIZE(uk.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.lower)[ul])); + tmp[2] = (unsigned char) (((VARSIZE(ok.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.upper)[ul])); + tmp[3] = (unsigned char) (((VARSIZE(uk.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.upper)[ul])); + dres = Abs(tmp[0] - tmp[1]) + Abs(tmp[3] - tmp[2]); dres /= 256.0; } -- 2.40.0