]> granicus.if.org Git - postgresql/commitdiff
More fixes for abbreviated keys infrastructure.
authorRobert Haas <rhaas@postgresql.org>
Thu, 22 Jan 2015 16:58:58 +0000 (11:58 -0500)
committerRobert Haas <rhaas@postgresql.org>
Thu, 22 Jan 2015 16:58:58 +0000 (11:58 -0500)
First, when LC_COLLATE = C, bttext_abbrev_convert should use memcpy()
rather than strxfrm() to construct the abbreviated key, because the
authoritative comparator uses memcpy().  If we do anything else here,
we might get inconsistent answers, and the buildfarm says this risk
is not theoretical.  It should be faster this way, too.

Second, while I'm looking at bttext_abbrev_convert, convert a needless
use of goto into the loop it's trying to implement into an actual
loop.

Both of the above problems date to the original commit of abbreviated
keys, commit 4ea51cdfe85ceef8afabceb03c446574daa0ac23.

Third, fix a bogus assignment to tss->locale before tss is set up.
That's a new goof in commit b529b65d1bf8537ca7fa024760a9782d7c8b66e5.

src/backend/utils/adt/varlena.c

index dba650c34d530cb048ccf0cd6093f76124c91170..f26a77cf2936c413e87398614c3939b195d5313e 100644 (file)
@@ -63,6 +63,7 @@ typedef struct
        char                       *buf2;               /* 2nd string, or abbreviation strxfrm() buf */
        int                                     buflen1;
        int                                     buflen2;
+       bool                            collate_c;
        hyperLogLogState        abbr_card;      /* Abbreviated key cardinality state */
        hyperLogLogState        full_card;      /* Full key cardinality state */
 #ifdef HAVE_LOCALE_T
@@ -1744,7 +1745,7 @@ static void
 btsortsupport_worker(SortSupport ssup, Oid collid)
 {
        bool                            abbreviate = ssup->abbreviate;
-       bool                            locale_aware = false;
+       bool                            collate_c = false;
        TextSortSupport    *tss;
 
 #ifdef HAVE_LOCALE_T
@@ -1769,7 +1770,10 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
         * bttextcmp() via the fmgr trampoline.
         */
        if (lc_collate_is_c(collid))
+       {
                ssup->comparator = bttextfastcmp_c;
+               collate_c = true;
+       }
 #ifdef WIN32
        else if (GetDatabaseEncoding() == PG_UTF8)
                return;
@@ -1777,7 +1781,6 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
        else
        {
                ssup->comparator = bttextfastcmp_locale;
-               locale_aware = true;
 
                /*
                 * We need a collation-sensitive comparison.  To make things faster,
@@ -1798,7 +1801,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
                                                 errhint("Use the COLLATE clause to set the collation explicitly.")));
                        }
 #ifdef HAVE_LOCALE_T
-                       tss->locale = pg_newlocale_from_collation(collid);
+                       locale = pg_newlocale_from_collation(collid);
 #endif
                }
        }
@@ -1828,7 +1831,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
         * will make use of the temporary buffers we initialize here for scratch
         * space, and the abbreviation case requires additional state.
         */
-       if (abbreviate || locale_aware)
+       if (abbreviate || !collate_c)
        {
                tss = palloc(sizeof(TextSortSupport));
                tss->buf1 = palloc(TEXTBUFLEN);
@@ -1838,6 +1841,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
 #ifdef HAVE_LOCALE_T
                tss->locale = locale;
 #endif
+               tss->collate_c = collate_c;
                ssup->ssup_extra = tss;
 
                /*
@@ -2011,45 +2015,58 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
        memset(pres, 0, sizeof(Datum));
        len = VARSIZE_ANY_EXHDR(authoritative);
 
-       /* By convention, we use buffer 1 to store and NUL-terminate text */
-       if (len >= tss->buflen1)
+       /*
+        * If we're using the C collation, use memcmp(), rather than strxfrm(),
+        * to abbreviated keys.  The full comparator for the C locale is always
+        * memcmp(), and we can't risk having this give a different answer.
+        * Besides, this should be faster, too.
+        */
+       if (tss->collate_c)
+               memcpy(pres, VARDATA_ANY(authoritative), Min(len, sizeof(Datum)));
+       else
        {
-               pfree(tss->buf1);
-               tss->buflen1 = Max(len + 1, Min(tss->buflen1 * 2, MaxAllocSize));
-               tss->buf1 = palloc(tss->buflen1);
-       }
+               /*
+                * We're not using the C collation, so fall back on strxfrm.
+                */
 
-       /* Just like strcoll(), strxfrm() expects a NUL-terminated string */
-       memcpy(tss->buf1, VARDATA_ANY(authoritative), len);
-       tss->buf1[len] = '\0';
+               /* By convention, we use buffer 1 to store and NUL-terminate text */
+               if (len >= tss->buflen1)
+               {
+                       pfree(tss->buf1);
+                       tss->buflen1 = Max(len + 1, Min(tss->buflen1 * 2, MaxAllocSize));
+                       tss->buf1 = palloc(tss->buflen1);
+               }
 
-       /* Don't leak memory here */
-       if (PointerGetDatum(authoritative) != original)
-               pfree(authoritative);
+               /* Just like strcoll(), strxfrm() expects a NUL-terminated string */
+               memcpy(tss->buf1, VARDATA_ANY(authoritative), len);
+               tss->buf1[len] = '\0';
 
-retry:
+               /* Don't leak memory here */
+               if (PointerGetDatum(authoritative) != original)
+                       pfree(authoritative);
 
-       /*
-        * There is no special handling of the C locale here, unlike with
-        * varstr_cmp().  strxfrm() is used indifferently.
-        */
+               for (;;)
+               {
 #ifdef HAVE_LOCALE_T
-       if (tss->locale)
-               bsize = strxfrm_l(tss->buf2, tss->buf1, tss->buflen2, tss->locale);
-       else
+                       if (tss->locale)
+                               bsize = strxfrm_l(tss->buf2, tss->buf1,
+                                                                 tss->buflen2, tss->locale);
+                       else
 #endif
-               bsize = strxfrm(tss->buf2, tss->buf1, tss->buflen2);
+                               bsize = strxfrm(tss->buf2, tss->buf1, tss->buflen2);
 
-       if (bsize >= tss->buflen2)
-       {
-               /*
-                * The C standard states that the contents of the buffer is now
-                * unspecified.  Grow buffer, and retry.
-                */
-               pfree(tss->buf2);
-               tss->buflen2 = Max(bsize + 1, Min(tss->buflen2 * 2, MaxAllocSize));
-               tss->buf2 = palloc(tss->buflen2);
-               goto retry;
+                       if (bsize < tss->buflen2)
+                               break;
+
+                       /*
+                        * The C standard states that the contents of the buffer is now
+                        * unspecified.  Grow buffer, and retry.
+                        */
+                       pfree(tss->buf2);
+                       tss->buflen2 = Max(bsize + 1,
+                                                          Min(tss->buflen2 * 2, MaxAllocSize));
+                       tss->buf2 = palloc(tss->buflen2);
+               }
        }
 
        /*