{
char *result;
int i;
+ bool enc_is_single_byte;
result = palloc(len + 1);
+ enc_is_single_byte = pg_database_encoding_max_length() == 1;
/*
* SQL99 specifies Unicode-aware case normalization, which we don't yet
* locale-aware translation. However, there are some locales where this
* is not right either (eg, Turkish may do strange things with 'i' and
* 'I'). Our current compromise is to use tolower() for characters with
- * the high bit set, and use an ASCII-only downcasing for 7-bit
- * characters.
+ * the high bit set, as long as they aren't part of a multi-byte character,
+ * and use an ASCII-only downcasing for 7-bit characters.
*/
for (i = 0; i < len; i++)
{
if (ch >= 'A' && ch <= 'Z')
ch += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch) && isupper(ch))
+ else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
ch = tolower(ch);
result[i] = (char) ch;
}