Callers of icu_to_uchar() neglected to pfree the result string when done
with it. This results in catastrophic memory leaks in varstr_cmp(),
because of our prevailing assumption that btree comparison functions don't
leak memory. For safety, make all the call sites clean up leaks, though
I suspect that we could get away without it in formatting.c. I audited
callers of icu_from_uchar() as well, but found no places that seemed to
have a comparable issue.
Add function API specifications for icu_to_uchar() and icu_from_uchar();
the lack of any thought-through specification is perhaps not unrelated
to the existence of this bug in the first place. Fix icu_to_uchar()
to guarantee a nul-terminated result; although no existing caller appears
to care, the fact that it would have been nul-terminated except in
extreme corner cases seems ideally designed to bite someone on the rear
someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst
case, that could perhaps have led to a non-nul-terminated result, too.
Fix icu_from_uchar() to have a more reasonable definition of the function
result --- no callers are actually paying attention, so this isn't a live
bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s
input string for consistency.
That is not the end of what needs to be done to these functions, but
it's as much as I have the patience for right now.
Discussion: https://postgr.es/m/1955.
1498181798@sss.pgh.pa.us
#ifdef USE_ICU
+/*
+ * Get the ICU language tag for a locale name.
+ * The result is a palloc'd string.
+ */
static char *
get_icu_language_tag(const char *localename)
{
return pstrdup(buf);
}
-
+/*
+ * Get a comment (specifically, the display name) for an ICU locale.
+ * The result is a palloc'd string.
+ */
static char *
get_icu_locale_comment(const char *localename)
{
char *result;
status = U_ZERO_ERROR;
- len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status);
+ len_uchar = uloc_getDisplayName(localename, "en",
+ &displayname[0], sizeof(displayname),
+ &status);
if (U_FAILURE(status))
ereport(ERROR,
- (errmsg("could get display name for locale \"%s\": %s",
+ (errmsg("could not get display name for locale \"%s\": %s",
localename, u_errorName(status))));
icu_from_uchar(&result, displayname, len_uchar);
len_conv = icu_convert_case(u_strToLower, mylocale,
&buff_conv, buff_uchar, len_uchar);
icu_from_uchar(&result, buff_conv, len_conv);
+ pfree(buff_uchar);
}
else
#endif
len_conv = icu_convert_case(u_strToUpper, mylocale,
&buff_conv, buff_uchar, len_uchar);
icu_from_uchar(&result, buff_conv, len_conv);
+ pfree(buff_uchar);
}
else
#endif
len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
&buff_conv, buff_uchar, len_uchar);
icu_from_uchar(&result, buff_conv, len_conv);
+ pfree(buff_uchar);
}
else
#endif
icu_converter = conv;
}
+/*
+ * Convert a string in the database encoding into a string of UChars.
+ *
+ * The source string at buff is of length nbytes
+ * (it needn't be nul-terminated)
+ *
+ * *buff_uchar receives a pointer to the palloc'd result string, and
+ * the function's result is the number of UChars generated.
+ *
+ * The result string is nul-terminated, though most callers rely on the
+ * result length instead.
+ */
int32_t
icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
{
init_icu_converter();
- len_uchar = 2 * nbytes; /* max length per docs */
+ len_uchar = 2 * nbytes + 1; /* max length per docs */
*buff_uchar = palloc(len_uchar * sizeof(**buff_uchar));
status = U_ZERO_ERROR;
- len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status);
+ len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar,
+ buff, nbytes, &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
return len_uchar;
}
+/*
+ * Convert a string of UChars into the database encoding.
+ *
+ * The source string at buff_uchar is of length len_uchar
+ * (it needn't be nul-terminated)
+ *
+ * *result receives a pointer to the palloc'd result string, and the
+ * function's result is the number of bytes generated (not counting nul).
+ *
+ * The result string is nul-terminated.
+ */
int32_t
-icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
+icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
{
UErrorCode status;
int32_t len_result;
len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter));
*result = palloc(len_result + 1);
status = U_ZERO_ERROR;
- ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status);
+ len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
+ buff_uchar, len_uchar, &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
return len_result;
}
-#endif
+#endif /* USE_ICU */
/*
* These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
result = ucol_strcoll(mylocale->info.icu.ucol,
uchar1, ulen1,
uchar2, ulen2);
+
+ pfree(uchar1);
+ pfree(uchar2);
}
#else /* not USE_ICU */
/* shouldn't happen */
result = ucol_strcoll(sss->locale->info.icu.ucol,
uchar1, ulen1,
uchar2, ulen2);
+
+ pfree(uchar1);
+ pfree(uchar2);
}
#else /* not USE_ICU */
/* shouldn't happen */
Size bsize;
#ifdef USE_ICU
int32_t ulen = -1;
- UChar *uchar;
+ UChar *uchar = NULL;
#endif
/*
&status);
if (U_FAILURE(status))
ereport(ERROR,
- (errmsg("sort key generation failed: %s", u_errorName(status))));
+ (errmsg("sort key generation failed: %s",
+ u_errorName(status))));
}
else
bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
* okay. See remarks on bytea case above.)
*/
memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
+
+#ifdef USE_ICU
+ if (uchar)
+ pfree(uchar);
+#endif
}
/*
#ifdef USE_ICU
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
-extern int32_t icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar);
+extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
#endif
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */