From db29620d4d16e08241f965ccd70d0f65883ff0de Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 24 Oct 2014 21:10:13 +0300 Subject: [PATCH] Work around Windows locale name with non-ASCII character. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Windows has one a locale whose name contains a non-ASCII character: "Norwegian (BokmÃ¥l)" (that's an 'a' with a ring on top). That causes trouble; when passing it setlocale(), it's not clear what encoding the argument should be in. Another problem is that the locale name is stored in pg_database catalog table, and the encoding used there depends on what server encoding happens to be in use when the database is created. For example, if you issue the CREATE DATABASE when connected to a UTF-8 database, the locale name is stored in pg_database in UTF-8. As long as all locale names are pure ASCII, that's not a problem. To work around that, map the troublesome locale name to a pure-ASCII alias of the same locale, "norwegian-bokmal". Now, this doesn't change the existing values that are already in pg_database and in postgresql.conf. Old clusters will need to be fixed manually. Instructions for that need to be put in the release notes. This fixes bug #11431 reported by Alon Siman-Tov. Backpatch to 9.2; backpatching further would require more work than seems worth it. --- src/port/win32setlocale.c | 158 +++++++++++++++++++++++++++----------- 1 file changed, 114 insertions(+), 44 deletions(-) diff --git a/src/port/win32setlocale.c b/src/port/win32setlocale.c index b1172ecfbb..379049b1bc 100644 --- a/src/port/win32setlocale.c +++ b/src/port/win32setlocale.c @@ -9,15 +9,26 @@ * src/port/win32setlocale.c * * - * Windows has a problem with locale names that have a dot in the country - * name. For example: + * The setlocale() function in Windows is broken in two ways. First, it + * has a problem with locale names that have a dot in the country name. For + * example: * * "Chinese (Traditional)_Hong Kong S.A.R..950" * - * For some reason, setlocale() doesn't accept that. Fortunately, Windows' - * setlocale() accepts various alternative names for such countries, so we - * provide a wrapper setlocale() function that maps the troublemaking locale - * names to accepted aliases. + * For some reason, setlocale() doesn't accept that as argument, even though + * setlocale(LC_ALL, NULL) returns exactly that. Fortunately, it accepts + * various alternative names for such countries, so to work around the broken + * setlocale() function, we map the troublemaking locale names to accepted + * aliases, before calling setlocale(). + * + * The second problem is that the locale name for "Norwegian (Bokmål)" + * contains a non-ASCII character. That's problematic, because it's not clear + * what encoding the locale name itself is supposed to be in, when you + * haven't yet set a locale. Also, it causes problems when the cluster + * contains databases with different encodings, as the locale name is stored + * in the pg_database system catalog. To work around that, when setlocale() + * returns that locale name, map it to a pure-ASCII alias for the same + * locale. *------------------------------------------------------------------------- */ @@ -27,11 +38,23 @@ struct locale_map { - const char *locale_name_part; /* string in locale name to replace */ - const char *replacement; /* string to replace it with */ + /* + * String in locale name to replace. Can be a single string (end is NULL), + * or separate start and end strings. If two strings are given, the + * locale name must contain both of them, and everything between them + * is replaced. This is used for a poor-man's regexp search, allowing + * replacement of "start.*end". + */ + const char *locale_name_start; + const char *locale_name_end; + + const char *replacement; /* string to replace the match with */ }; -static const struct locale_map locale_map_list[] = { +/* + * Mappings applied before calling setlocale(), to the argument. + */ +static const struct locale_map locale_map_argument[] = { /* * "HKG" is listed here: * http://msdn.microsoft.com/en-us/library/cdax410z%28v=vs.71%29.aspx @@ -40,8 +63,8 @@ static const struct locale_map locale_map_list[] = { * "ARE" is the ISO-3166 three-letter code for U.A.E. It is not on the * above list, but seems to work anyway. */ - {"Hong Kong S.A.R.", "HKG"}, - {"U.A.E.", "ARE"}, + {"Hong Kong S.A.R.", NULL, "HKG"}, + {"U.A.E.", NULL, "ARE"}, /* * The ISO-3166 country code for Macau S.A.R. is MAC, but Windows doesn't @@ -56,60 +79,107 @@ static const struct locale_map locale_map_list[] = { * * Some versions of Windows spell it "Macau", others "Macao". */ - {"Chinese (Traditional)_Macau S.A.R..950", "ZHM"}, - {"Chinese_Macau S.A.R..950", "ZHM"}, - {"Chinese (Traditional)_Macao S.A.R..950", "ZHM"}, - {"Chinese_Macao S.A.R..950", "ZHM"} + {"Chinese (Traditional)_Macau S.A.R..950", NULL, "ZHM"}, + {"Chinese_Macau S.A.R..950", NULL, "ZHM"}, + {"Chinese (Traditional)_Macao S.A.R..950", NULL, "ZHM"}, + {"Chinese_Macao S.A.R..950", NULL, "ZHM"}, + {NULL, NULL, NULL} }; -char * -pgwin32_setlocale(int category, const char *locale) +/* + * Mappings applied after calling setlocale(), to its return value. + */ +static const struct locale_map locale_map_result[] = { + /* + * "Norwegian (Bokmål)" locale name contains the a-ring character. + * Map it to a pure-ASCII alias. + * + * It's not clear what encoding setlocale() uses when it returns the + * locale name, so to play it safe, we search for "Norwegian (Bok*l)". + */ + {"Norwegian (Bokm", "l)", "norwegian-bokmal"}, + {NULL, NULL, NULL} +}; + +#define MAX_LOCALE_NAME_LEN 100 + +static char * +map_locale(struct locale_map *map, char *locale) { - char *result; - char *alias; + static char aliasbuf[MAX_LOCALE_NAME_LEN]; int i; - if (locale == NULL) - return setlocale(category, locale); - /* Check if the locale name matches any of the problematic ones. */ - alias = NULL; - for (i = 0; i < lengthof(locale_map_list); i++) + for (i = 0; map[i].locale_name_start != NULL; i++) { - const char *needle = locale_map_list[i].locale_name_part; - const char *replacement = locale_map_list[i].replacement; + const char *needle_start = map[i].locale_name_start; + const char *needle_end = map[i].locale_name_end; + const char *replacement = map[i].replacement; char *match; + char *match_start = NULL; + char *match_end = NULL; - match = strstr(locale, needle); - if (match != NULL) + match = strstr(locale, needle_start); + if (match) + { + /* + * Found a match for the first part. If this was a two-part + * replacement, find the second part. + */ + match_start = match; + if (needle_end) + { + match = strstr(match_start + strlen(needle_start), needle_end); + if (match) + match_end = match + strlen(needle_end); + else + match_start = NULL; + } + else + match_end = match_start + strlen(needle_start); + } + + if (match_start) { /* Found a match. Replace the matched string. */ - int matchpos = match - locale; + int matchpos = match_start - locale; int replacementlen = strlen(replacement); - char *rest = match + strlen(needle); + char *rest = match_end; int restlen = strlen(rest); - alias = malloc(matchpos + replacementlen + restlen + 1); - if (!alias) + /* check that the result fits in the static buffer */ + if (matchpos + replacementlen + restlen + 1 > MAX_LOCALE_NAME_LEN) return NULL; - memcpy(&alias[0], &locale[0], matchpos); - memcpy(&alias[matchpos], replacement, replacementlen); - memcpy(&alias[matchpos + replacementlen], rest, restlen + 1); /* includes null - * terminator */ + memcpy(&aliasbuf[0], &locale[0], matchpos); + memcpy(&aliasbuf[matchpos], replacement, replacementlen); + /* includes null terminator */ + memcpy(&aliasbuf[matchpos + replacementlen], rest, restlen + 1); - break; + return aliasbuf; } } - /* Call the real setlocale() function */ - if (alias) - { - result = setlocale(category, alias); - free(alias); - } + /* no match, just return the original string */ + return locale; +} + +char * +pgwin32_setlocale(int category, const char *locale) +{ + char *argument; + char *result; + + if (locale == NULL) + argument = NULL; else - result = setlocale(category, locale); + argument = map_locale(locale_map_argument, locale); + + /* Call the real setlocale() function */ + result = setlocale(category, argument); + + if (result) + result = map_locale(locale_map_result, result); return result; } -- 2.40.0