char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
int buflen1;
int buflen2;
+ int last_len1; /* Length of last buf1 string/strxfrm() blob */
+ int last_len2; /* Length of last buf2 string/strxfrm() blob */
+ int last_returned; /* Last comparison result (cache) */
bool collate_c;
hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
hyperLogLogState full_card; /* Full key cardinality state */
tss->buflen1 = TEXTBUFLEN;
tss->buf2 = palloc(TEXTBUFLEN);
tss->buflen2 = TEXTBUFLEN;
+ /* Start with invalid values */
+ tss->last_len1 = -1;
+ tss->last_len2 = -1;
#ifdef HAVE_LOCALE_T
tss->locale = locale;
#endif
+ /*
+ * To avoid somehow confusing a strxfrm() blob and an original string
+ * within bttextfastcmp_locale(), initialize last returned cache to a
+ * sentinel value. A platform-specific actual strcoll() return value
+ * of INT_MIN seems unlikely, but if that occurs it will not cause
+ * wrong answers.
+ */
+ tss->last_returned = INT_MIN;
tss->collate_c = collate_c;
ssup->ssup_extra = tss;
{
text *arg1 = DatumGetTextPP(x);
text *arg2 = DatumGetTextPP(y);
+ bool arg1_match;
TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
/* working state */
/* Fast pre-check for equality, as discussed in varstr_cmp() */
if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
{
+ /*
+ * No change in buf1 or buf2 contents, so avoid changing last_len1 or
+ * last_len2. Existing contents of buffers might still be used by next
+ * call.
+ */
result = 0;
goto done;
}
tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2);
}
- memcpy(tss->buf1, a1p, len1);
- tss->buf1[len1] = '\0';
- memcpy(tss->buf2, a2p, len2);
- tss->buf2[len2] = '\0';
+ /*
+ * We're likely to be asked to compare the same strings repeatedly, and
+ * memcmp() is so much cheaper than strcoll() that it pays to try to cache
+ * comparisons, even though in general there is no reason to think that
+ * that will work out (every text datum may be unique). Caching does not
+ * slow things down measurably when it doesn't work out, and can speed
+ * things up by rather a lot when it does. In part, this is because the
+ * memcmp() compares data from cachelines that are needed in L1 cache even
+ * when the last comparison's result cannot be reused.
+ */
+ arg1_match = true;
+ if (len1 != tss->last_len1 || memcmp(tss->buf1, a1p, len1) != 0)
+ {
+ arg1_match = false;
+ memcpy(tss->buf1, a1p, len1);
+ tss->buf1[len1] = '\0';
+ tss->last_len1 = len1;
+ }
+
+ /*
+ * If we're comparing the same two strings as last time, we can return the
+ * same answer without calling strcoll() again. This is more likely than
+ * it seems (at least with moderate to low cardinality sets), because
+ * quicksort compares the same pivot against many values.
+ */
+ if (len2 != tss->last_len2 || memcmp(tss->buf2, a2p, len2) != 0)
+ {
+ memcpy(tss->buf2, a2p, len2);
+ tss->buf2[len2] = '\0';
+ tss->last_len2 = len2;
+ }
+ else if (arg1_match && tss->last_returned != INT_MIN)
+ {
+ /* Use result cached following last actual strcoll() call */
+ result = tss->last_returned;
+ goto done;
+ }
#ifdef HAVE_LOCALE_T
if (tss->locale)
if (result == 0)
result = strcmp(tss->buf1, tss->buf2);
+ /* Cache result, perhaps saving an expensive strcoll() call next time */
+ tss->last_returned = result;
done:
/* We can't afford to leak memory here. */
if (PointerGetDatum(arg1) != x)
tss->buf1 = palloc(tss->buflen1);
}
+ /* Might be able to reuse strxfrm() blob from last call */
+ if (tss->last_len1 == len &&
+ memcmp(tss->buf1, authoritative_data, len) == 0)
+ {
+ memcpy(pres, tss->buf2, Min(sizeof(Datum), tss->last_len2));
+ /* No change affecting cardinality, so no hashing required */
+ goto done;
+ }
+
/* Just like strcoll(), strxfrm() expects a NUL-terminated string */
memcpy(tss->buf1, authoritative_data, len);
tss->buf1[len] = '\0';
+ tss->last_len1 = len;
for (;;)
{
#endif
bsize = strxfrm(tss->buf2, tss->buf1, tss->buflen2);
+ tss->last_len2 = bsize;
if (bsize < tss->buflen2)
break;
addHyperLogLog(&tss->abbr_card, hash);
+done:
/*
* Byteswap on little-endian machines.
*