Fix whitespace

[postgresql] / src / port / chklocale.c
diff --git a/src/port/chklocale.c b/src/port/chklocale.c

index 02251f7fb0ebcefc490df55b4df4df5562b02d60..3c9d7abcbd7fd580b5efa33ab602a89521c327a5 100644 (file)
--- a/src/port/chklocale.c
+++ b/src/port/chklocale.c
@@ -4,11 +4,11 @@
   *             Functions for handling locale-related info
   *
   *
- * Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ * Copyright (c) 1996-2013, PostgreSQL Global Development Group
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/port/chklocale.c,v 1.5 2007/10/10 11:50:07 mha Exp $
+ *       src/port/chklocale.c
   *
   *-------------------------------------------------------------------------
   */
@@ -62,7 +62,7 @@ static const struct encoding_match encoding_match_list[] = {
         {PG_EUC_KR, "IBM-eucKR"},
         {PG_EUC_KR, "deckorean"},
         {PG_EUC_KR, "5601"},
-       {PG_EUC_KR, "CP51949"}, /* or 20949 ? */
+       {PG_EUC_KR, "CP51949"},
  
         {PG_EUC_TW, "EUC-TW"},
         {PG_EUC_TW, "eucTW"},
@@ -123,6 +123,14 @@ static const struct encoding_match encoding_match_list[] = {
         {PG_KOI8R, "KOI8-R"},
         {PG_KOI8R, "CP20866"},
  
+       {PG_KOI8U, "KOI8-U"},
+       {PG_KOI8U, "CP21866"},
+
+       {PG_WIN866, "CP866"},
+       {PG_WIN874, "CP874"},
+       {PG_WIN1250, "CP1250"},
+       {PG_WIN1251, "CP1251"},
+       {PG_WIN1251, "ansi-1251"},
         {PG_WIN1252, "CP1252"},
         {PG_WIN1253, "CP1253"},
         {PG_WIN1254, "CP1254"},
@@ -130,11 +138,6 @@ static const struct encoding_match encoding_match_list[] = {
         {PG_WIN1256, "CP1256"},
         {PG_WIN1257, "CP1257"},
         {PG_WIN1258, "CP1258"},
-#ifdef NOT_VERIFIED
-       {PG_WIN874, "???"},
-#endif
-       {PG_WIN1251, "CP1251"},
-       {PG_WIN866, "CP866"},
  
         {PG_ISO_8859_5, "ISO-8859-5"},
         {PG_ISO_8859_5, "ISO8859-5"},
@@ -154,20 +157,23 @@ static const struct encoding_match encoding_match_list[] = {
         {PG_ISO_8859_8, "ISO-8859-8"},
         {PG_ISO_8859_8, "ISO8859-8"},
         {PG_ISO_8859_8, "iso88598"},
-    {PG_ISO_8859_8, "CP28598"},
+       {PG_ISO_8859_8, "CP28598"},
  
         {PG_SJIS, "SJIS"},
         {PG_SJIS, "PCK"},
         {PG_SJIS, "CP932"},
+       {PG_SJIS, "SHIFT_JIS"},
  
         {PG_BIG5, "BIG5"},
         {PG_BIG5, "BIG5HKSCS"},
+       {PG_BIG5, "Big5-HKSCS"},
         {PG_BIG5, "CP950"},
  
         {PG_GBK, "GBK"},
         {PG_GBK, "CP936"},
  
         {PG_UHC, "UHC"},
+       {PG_UHC, "CP949"},
  
         {PG_JOHAB, "JOHAB"},
         {PG_JOHAB, "CP1361"},
@@ -177,50 +183,104 @@ static const struct encoding_match encoding_match_list[] = {
  
         {PG_SHIFT_JIS_2004, "SJIS_2004"},
  
+       {PG_SQL_ASCII, "US-ASCII"},
+
         {PG_SQL_ASCII, NULL}            /* end marker */
  };
  
  #ifdef WIN32
  /*
- * On Windows, use CP<codepage number> instead of the nl_langinfo() result
+ * On Windows, use CP<code page number> instead of the nl_langinfo() result
+ *
+ * Visual Studio 2012 expanded the set of valid LC_CTYPE values, so have its
+ * locale machinery determine the code page.  See comments at IsoLocaleName().
+ * For other compilers, follow the locale's predictable format.
+ *
+ * Returns a malloc()'d string for the caller to free.
   */
  static char *
  win32_langinfo(const char *ctype)
  {
-       char       *r;
+       char       *r = NULL;
+
+#if (_MSC_VER >= 1700)
+       _locale_t       loct = NULL;
+
+       loct = _create_locale(LC_CTYPE, ctype);
+       if (loct != NULL)
+       {
+               r = malloc(16);                 /* excess */
+               if (r != NULL)
+                       sprintf(r, "CP%u", loct->locinfo->lc_codepage);
+               _free_locale(loct);
+       }
+#else
         char       *codepage;
-       int                     ln;
  
         /*
-        * Locale format on Win32 is <Language>_<Country>.<CodePage> .
-        * For example, English_USA.1252.
+        * Locale format on Win32 is <Language>_<Country>.<CodePage> . For
+        * example, English_United States.1252.
          */
         codepage = strrchr(ctype, '.');
-       if (!codepage)
-               return NULL;
-       codepage++;
-       ln = strlen(codepage);
-       r = malloc(ln + 3);
-       sprintf(r, "CP%s", codepage);
+       if (codepage != NULL)
+       {
+               int                     ln;
+
+               codepage++;
+               ln = strlen(codepage);
+               r = malloc(ln + 3);
+               if (r != NULL)
+                       sprintf(r, "CP%s", codepage);
+       }
+#endif
  
         return r;
  }
-#endif /* WIN32 */
+
+#ifndef FRONTEND
+/*
+ * Given a Windows code page identifier, find the corresponding PostgreSQL
+ * encoding.  Issue a warning and return -1 if none found.
+ */
+int
+pg_codepage_to_encoding(UINT cp)
+{
+       char            sys[16];
+       int                     i;
+
+       sprintf(sys, "CP%u", cp);
+
+       /* Check the table */
+       for (i = 0; encoding_match_list[i].system_enc_name; i++)
+               if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
+                       return encoding_match_list[i].pg_enc_code;
+
+       ereport(WARNING,
+                       (errmsg("could not determine encoding for codeset \"%s\"", sys),
+                  errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
+
+       return -1;
+}
+#endif
+#endif   /* WIN32 */
  
  #if (defined(HAVE_LANGINFO_H) && defined(CODESET)) || defined(WIN32)
  
  /*
   * Given a setting for LC_CTYPE, return the Postgres ID of the associated
- * encoding, if we can determine it.
+ * encoding, if we can determine it.  Return -1 if we can't determine it.
   *
   * Pass in NULL to get the encoding for the current locale setting.
+ * Pass "" to get the encoding selected by the server's environment.
   *
   * If the result is PG_SQL_ASCII, callers should treat it as being compatible
- * with any desired encoding.  We return this if the locale is C/POSIX or we
- * can't determine the encoding.
+ * with any desired encoding.
+ *
+ * If running in the backend and write_message is false, this function must
+ * cope with the possibility that elog() and palloc() are not yet usable.
   */
  int
-pg_get_encoding_from_locale(const char *ctype)
+pg_get_encoding_from_locale(const char *ctype, bool write_message)
  {
         char       *sys;
         int                     i;
@@ -231,19 +291,24 @@ pg_get_encoding_from_locale(const char *ctype)
                 char       *save;
                 char       *name;
  
+               /* If locale is C or POSIX, we can allow all encodings */
+               if (pg_strcasecmp(ctype, "C") == 0 ||
+                       pg_strcasecmp(ctype, "POSIX") == 0)
+                       return PG_SQL_ASCII;
+
                 save = setlocale(LC_CTYPE, NULL);
                 if (!save)
-                       return PG_SQL_ASCII;            /* setlocale() broken? */
+                       return -1;                      /* setlocale() broken? */
                 /* must copy result, or it might change after setlocale */
                 save = strdup(save);
                 if (!save)
-                       return PG_SQL_ASCII;            /* out of memory; unlikely */
+                       return -1;                      /* out of memory; unlikely */
  
                 name = setlocale(LC_CTYPE, ctype);
                 if (!name)
                 {
                         free(save);
-                       return PG_SQL_ASCII;            /* bogus ctype passed in? */
+                       return -1;                      /* bogus ctype passed in? */
                 }
  
  #ifndef WIN32
@@ -262,7 +327,13 @@ pg_get_encoding_from_locale(const char *ctype)
                 /* much easier... */
                 ctype = setlocale(LC_CTYPE, NULL);
                 if (!ctype)
-                       return PG_SQL_ASCII;            /* setlocale() broken? */
+                       return -1;                      /* setlocale() broken? */
+
+               /* If locale is C or POSIX, we can allow all encodings */
+               if (pg_strcasecmp(ctype, "C") == 0 ||
+                       pg_strcasecmp(ctype, "POSIX") == 0)
+                       return PG_SQL_ASCII;
+
  #ifndef WIN32
                 sys = nl_langinfo(CODESET);
                 if (sys)
@@ -273,14 +344,7 @@ pg_get_encoding_from_locale(const char *ctype)
         }
  
         if (!sys)
-               return PG_SQL_ASCII;            /* out of memory; unlikely */
-
-       /* If locale is C or POSIX, we can allow all encodings */
-       if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
-       {
-               free(sys);
-               return PG_SQL_ASCII;
-       }
+               return -1;                              /* out of memory; unlikely */
  
         /* Check the table */
         for (i = 0; encoding_match_list[i].system_enc_name; i++)
@@ -295,6 +359,7 @@ pg_get_encoding_from_locale(const char *ctype)
         /* Special-case kluges for particular platforms go here */
  
  #ifdef __darwin__
+
         /*
          * Current OS X has many locales that report an empty string for CODESET,
          * but they all seem to actually use UTF-8.
@@ -308,33 +373,39 @@ pg_get_encoding_from_locale(const char *ctype)
  
         /*
          * We print a warning if we got a CODESET string but couldn't recognize
-        * it.  This means we need another entry in the table.
+        * it.  This means we need another entry in the table.
          */
+       if (write_message)
+       {
  #ifdef FRONTEND
-       fprintf(stderr, _("could not determine encoding for locale \"%s\": codeset is \"%s\""),
-                       ctype, sys);
-       /* keep newline separate so there's only one translatable string */
-       fputc('\n', stderr);
+               fprintf(stderr, _("could not determine encoding for locale \"%s\": codeset is \"%s\""),
+                               ctype, sys);
+               /* keep newline separate so there's only one translatable string */
+               fputc('\n', stderr);
  #else
-       ereport(WARNING,
-                       (errmsg("could not determine encoding for locale \"%s\": codeset is \"%s\"",
-                                       ctype, sys),
-                        errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
+               ereport(WARNING,
+                               (errmsg("could not determine encoding for locale \"%s\": codeset is \"%s\"",
+                                               ctype, sys),
+                  errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
  #endif
+       }
  
         free(sys);
-       return PG_SQL_ASCII;
+       return -1;
  }
-
-#else /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
+#else                                                  /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
  
  /*
- * stub if no platform support
+ * stub if no multi-language platform support
+ *
+ * Note: we could return -1 here, but that would have the effect of
+ * forcing users to specify an encoding to initdb on such platforms.
+ * It seems better to silently default to SQL_ASCII.
   */
  int
-pg_get_encoding_from_locale(const char *ctype)
+pg_get_encoding_from_locale(const char *ctype, bool write_message)
  {
         return PG_SQL_ASCII;
  }
  
-#endif /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
+#endif   /* (HAVE_LANGINFO_H && CODESET) || WIN32 */