From a88b6e4cfbff9802906dd400ef334ffa49e7f286 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 1 Sep 2011 11:08:32 +0300 Subject: [PATCH] setlocale() on Windows doesn't work correctly if the locale name contains dots. I previously worked around this in initdb, mapping the known problematic locale names to aliases that work, but Hiroshi Inoue pointed out that that's not enough because even if you use one of the aliases, like "Chinese_HKG", setlocale(LC_CTYPE, NULL) returns back the long form, ie. "Chinese_Hong Kong S.A.R.". When we try to restore an old locale value by passing that value back to setlocale(), it fails. Note that you are affected by this bug also if you use one of those short-form names manually, so just reverting the hack in initdb won't fix it. To work around that, move the locale name mapping from initdb to a wrapper around setlocale(), so that the mapping is invoked on every setlocale() call. Also, add a few checks for failed setlocale() calls in the backend. These calls shouldn't fail, and if they do there isn't much we can do about it, but at least you'll get a warning. Backpatch to 9.1, where the initdb hack was introduced. The Windows bug affects older versions too if you set locale manually to one of the aliases, but given the lack of complaints from the field, I'm hesitent to backpatch. --- src/backend/utils/adt/pg_locale.c | 19 ++++-- src/bin/initdb/initdb.c | 70 ++------------------ src/include/port.h | 13 +++- src/port/chklocale.c | 106 ++++++++++++++++++++++++++++++ 4 files changed, 138 insertions(+), 70 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 4a2fd28f47..7112dea0e1 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -239,7 +239,9 @@ check_locale(int category, const char *value) /* set the locale with setlocale, to see if it accepts it. */ ret = (setlocale(category, value) != NULL); - setlocale(category, save); /* assume this won't fail */ + /* restore old value. */ + if (!setlocale(category, save)) + elog(WARNING, "failed to restore old locale"); pfree(save); return ret; @@ -499,13 +501,15 @@ PGLC_localeconv(void) /* Try to restore internal settings */ if (save_lc_monetary) { - setlocale(LC_MONETARY, save_lc_monetary); + if (!setlocale(LC_MONETARY, save_lc_monetary)) + elog(WARNING, "failed to restore old locale"); pfree(save_lc_monetary); } if (save_lc_numeric) { - setlocale(LC_NUMERIC, save_lc_numeric); + if (!setlocale(LC_NUMERIC, save_lc_numeric)) + elog(WARNING, "failed to restore old locale"); pfree(save_lc_numeric); } @@ -513,7 +517,8 @@ PGLC_localeconv(void) /* Try to restore internal ctype settings */ if (save_lc_ctype) { - setlocale(LC_CTYPE, save_lc_ctype); + if (!setlocale(LC_CTYPE, save_lc_ctype)) + elog(WARNING, "failed to restore old locale"); pfree(save_lc_ctype); } #endif @@ -674,7 +679,8 @@ cache_locale_time(void) /* try to restore internal settings */ if (save_lc_time) { - setlocale(LC_TIME, save_lc_time); + if (!setlocale(LC_TIME, save_lc_time)) + elog(WARNING, "failed to restore old locale"); pfree(save_lc_time); } @@ -682,7 +688,8 @@ cache_locale_time(void) /* try to restore internal ctype settings */ if (save_lc_ctype) { - setlocale(LC_CTYPE, save_lc_ctype); + if (!setlocale(LC_CTYPE, save_lc_ctype)) + elog(WARNING, "failed to restore old locale"); pfree(save_lc_ctype); } #endif diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index c317355894..1dbd6f603a 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -185,7 +185,6 @@ static int locale_date_order(const char *locale); static bool check_locale_name(const char *locale); static bool check_locale_encoding(const char *locale, int encoding); static void setlocales(void); -static char *localemap(char *locale); static void usage(const char *progname); #ifdef WIN32 @@ -2286,61 +2285,6 @@ strreplace(char *str, char *needle, char *replacement) } #endif /* WIN32 */ -/* - * Windows has a problem with locale names that have a dot in the country - * name. For example: - * - * "Chinese (Traditional)_Hong Kong S.A.R..950" - * - * For some reason, setlocale() doesn't accept that. Fortunately, Windows' - * setlocale() accepts various alternative names for such countries, so we - * map the full country names to accepted aliases. - * - * The returned string is always malloc'd - if no mapping is done it is - * just a malloc'd copy of the original. - */ -static char * -localemap(char *locale) -{ - locale = xstrdup(locale); - -#ifdef WIN32 - - /* - * Map the full country name to an abbreviation that setlocale() accepts. - * - * "HKG" is listed here: - * http://msdn.microsoft.com/en-us/library/cdax410z%28v=vs.71%29.aspx - * (Country/Region Strings). - * - * "ARE" is the ISO-3166 three-letter code for U.A.E. It is not on the - * above list, but seems to work anyway. - */ - strreplace(locale, "Hong Kong S.A.R.", "HKG"); - strreplace(locale, "U.A.E.", "ARE"); - - /* - * The ISO-3166 country code for Macau S.A.R. is MAC, but Windows doesn't - * seem to recognize that. And Macau isn't listed in the table of accepted - * abbreviations linked above. - * - * Fortunately, "ZHM" seems to be accepted as an alias for "Chinese - * (Traditional)_Macau S.A.R..950", so we use that. Note that it's unlike - * HKG and ARE, ZHM is an alias for the whole locale name, not just the - * country part. I'm not sure where that "ZHM" comes from, must be some - * legacy naming scheme. But hey, it works. - * - * Some versions of Windows spell it "Macau", others "Macao". - */ - strreplace(locale, "Chinese (Traditional)_Macau S.A.R..950", "ZHM"); - strreplace(locale, "Chinese_Macau S.A.R..950", "ZHM"); - strreplace(locale, "Chinese (Traditional)_Macao S.A.R..950", "ZHM"); - strreplace(locale, "Chinese_Macao S.A.R..950", "ZHM"); -#endif /* WIN32 */ - - return locale; -} - /* * set up the locale variables * @@ -2372,25 +2316,25 @@ setlocales(void) */ if (strlen(lc_ctype) == 0 || !check_locale_name(lc_ctype)) - lc_ctype = localemap(setlocale(LC_CTYPE, NULL)); + lc_ctype = xstrdup(setlocale(LC_CTYPE, NULL)); if (strlen(lc_collate) == 0 || !check_locale_name(lc_collate)) - lc_collate = localemap(setlocale(LC_COLLATE, NULL)); + lc_collate = xstrdup(setlocale(LC_COLLATE, NULL)); if (strlen(lc_numeric) == 0 || !check_locale_name(lc_numeric)) - lc_numeric = localemap(setlocale(LC_NUMERIC, NULL)); + lc_numeric = xstrdup(setlocale(LC_NUMERIC, NULL)); if (strlen(lc_time) == 0 || !check_locale_name(lc_time)) - lc_time = localemap(setlocale(LC_TIME, NULL)); + lc_time = xstrdup(setlocale(LC_TIME, NULL)); if (strlen(lc_monetary) == 0 || !check_locale_name(lc_monetary)) - lc_monetary = localemap(setlocale(LC_MONETARY, NULL)); + lc_monetary = xstrdup(setlocale(LC_MONETARY, NULL)); if (strlen(lc_messages) == 0 || !check_locale_name(lc_messages)) #if defined(LC_MESSAGES) && !defined(WIN32) { /* when available get the current locale setting */ - lc_messages = localemap(setlocale(LC_MESSAGES, NULL)); + lc_messages = xstrdup(setlocale(LC_MESSAGES, NULL)); } #else { /* when not available, get the CTYPE setting */ - lc_messages = localemap(setlocale(LC_CTYPE, NULL)); + lc_messages = xstrdup(setlocale(LC_CTYPE, NULL)); } #endif diff --git a/src/include/port.h b/src/include/port.h index 9d742ace0f..eceb4bfcd3 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -226,16 +226,27 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2))); #endif #endif /* USE_REPL_SNPRINTF */ +#if defined(WIN32) /* * Versions of libintl >= 0.18? try to replace setlocale() with a macro * to their own versions. Remove the macro, if it exists, because it * ends up calling the wrong version when the backend and libintl use * different versions of msvcrt. */ -#if defined(setlocale) && defined(WIN32) +#if defined(setlocale) #undef setlocale #endif +/* + * Define our own wrapper macro around setlocale() to work around bugs in + * Windows' native setlocale() function. + */ +extern char *pgwin32_setlocale(int category, const char *locale); + +#define setlocale(a,b) pgwin32_setlocale(a,b) + +#endif /* WIN32 */ + /* Portable prompt handling */ extern char *simple_prompt(const char *prompt, int maxlen, bool echo); diff --git a/src/port/chklocale.c b/src/port/chklocale.c index e4f3dc99e0..cd911b84ce 100644 --- a/src/port/chklocale.c +++ b/src/port/chklocale.c @@ -356,3 +356,109 @@ pg_get_encoding_from_locale(const char *ctype, bool write_message) } #endif /* (HAVE_LANGINFO_H && CODESET) || WIN32 */ + +#ifdef WIN32 +/* + * Windows has a problem with locale names that have a dot in the country + * name. For example: + * + * "Chinese (Traditional)_Hong Kong S.A.R..950" + * + * For some reason, setlocale() doesn't accept that. Fortunately, Windows' + * setlocale() accepts various alternative names for such countries, so we + * provide a wrapper setlocale() function that maps the troublemaking locale + * names to accepted aliases. + */ + +#undef setlocale + +struct locale_map +{ + const char *locale_name_part; /* string in locale name to replace */ + const char *replacement; /* string to replace it with */ +}; + +static const struct locale_map locale_map_list[] = { + + /* + * "HKG" is listed here: + * http://msdn.microsoft.com/en-us/library/cdax410z%28v=vs.71%29.aspx + * (Country/Region Strings). + * + * "ARE" is the ISO-3166 three-letter code for U.A.E. It is not on the + * above list, but seems to work anyway. + */ + { "Hong Kong S.A.R.", "HKG" }, + { "U.A.E.", "ARE" }, + + /* + * The ISO-3166 country code for Macau S.A.R. is MAC, but Windows doesn't + * seem to recognize that. And Macau isn't listed in the table of + * accepted abbreviations linked above. Fortunately, "ZHM" seems to be + * accepted as an alias for "Chinese (Traditional)_Macau S.A.R..950". I'm + * not sure where "ZHM" comes from, must be some legacy naming scheme. But + * hey, it works. + * + * Note that unlike HKG and ARE, ZHM is an alias for the *whole* locale + * name, not just the country part. + * + * Some versions of Windows spell it "Macau", others "Macao". + */ + { "Chinese (Traditional)_Macau S.A.R..950", "ZHM" }, + { "Chinese_Macau S.A.R..950", "ZHM" }, + { "Chinese (Traditional)_Macao S.A.R..950", "ZHM" }, + { "Chinese_Macao S.A.R..950", "ZHM" } +}; + +char * +pgwin32_setlocale(int category, const char *locale) +{ + char *result; + char *alias; + int i; + + if (locale == NULL) + return setlocale(category, locale); + + /* Check if the locale name matches any of the problematic ones. */ + alias = NULL; + for (i = 0; i < lengthof(locale_map_list); i++) + { + const char *needle = locale_map_list[i].locale_name_part; + const char *replacement = locale_map_list[i].replacement; + char *match; + + match = strstr(locale, needle); + if (match != NULL) + { + /* Found a match. Replace the matched string. */ + int matchpos = match - locale; + int replacementlen = strlen(replacement); + char *rest = match + strlen(needle); + int restlen = strlen(rest); + + alias = malloc(matchpos + replacementlen + restlen + 1); + if (!alias) + return NULL; + + memcpy(&alias[0], &locale[0], matchpos); + memcpy(&alias[matchpos], replacement, replacementlen); + memcpy(&alias[matchpos + replacementlen], rest, restlen + 1); /* includes null terminator */ + + break; + } + } + + /* Call the real setlocale() function */ + if (alias) + { + result = setlocale(category, alias); + free(alias); + } + else + result = setlocale(category, locale); + + return result; +} + +#endif /* WIN32 */ -- 2.40.0