granicus.if.org Git - postgresql/blob - src/port/chklocale.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * chklocale.c
   4  *              Functions for handling locale-related info
   5  *
   6  *
   7  * Copyright (c) 1996-2007, PostgreSQL Global Development Group
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/port/chklocale.c,v 1.1 2007/09/28 22:25:49 tgl Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #ifndef FRONTEND
  17 #include "postgres.h"
  18 #else
  19 #include "postgres_fe.h"
  20 #endif
  21
  22 #include <locale.h>
  23 #ifdef HAVE_LANGINFO_H
  24 #include <langinfo.h>
  25 #endif
  26
  27 #include "mb/pg_wchar.h"
  28
  29
  30 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
  31
  32 /*
  33  * This table needs to recognize all the CODESET spellings for supported
  34  * backend encodings.  We don't need to handle frontend-only encodings.
  35  * Note that we search the table with pg_strcasecmp(), so variant
  36  * capitalizations don't need their own entries.
  37  */
  38 struct encoding_match
  39 {
  40         enum pg_enc pg_enc_code;
  41         const char *system_enc_name;
  42 };
  43
  44 static const struct encoding_match encoding_match_list[] = {
  45         {PG_EUC_JP, "EUC-JP"},
  46         {PG_EUC_JP, "eucJP"},
  47         {PG_EUC_JP, "IBM-eucJP"},
  48         {PG_EUC_JP, "sdeckanji"},
  49
  50         {PG_EUC_CN, "EUC-CN"},
  51         {PG_EUC_CN, "eucCN"},
  52         {PG_EUC_CN, "IBM-eucCN"},
  53         {PG_EUC_CN, "GB2312"},
  54         {PG_EUC_CN, "dechanzi"},
  55
  56         {PG_EUC_KR, "EUC-KR"},
  57         {PG_EUC_KR, "eucKR"},
  58         {PG_EUC_KR, "IBM-eucKR"},
  59         {PG_EUC_KR, "deckorean"},
  60         {PG_EUC_KR, "5601"},
  61
  62         {PG_EUC_TW, "EUC-TW"},
  63         {PG_EUC_TW, "eucTW"},
  64         {PG_EUC_TW, "IBM-eucTW"},
  65         {PG_EUC_TW, "cns11643"},
  66
  67         {PG_UTF8, "UTF-8"},
  68         {PG_UTF8, "utf8"},
  69
  70         {PG_LATIN1, "ISO-8859-1"},
  71         {PG_LATIN1, "ISO8859-1"},
  72         {PG_LATIN1, "iso88591"},
  73
  74         {PG_LATIN2, "ISO-8859-2"},
  75         {PG_LATIN2, "ISO8859-2"},
  76         {PG_LATIN2, "iso88592"},
  77
  78         {PG_LATIN3, "ISO-8859-3"},
  79         {PG_LATIN3, "ISO8859-3"},
  80         {PG_LATIN3, "iso88593"},
  81
  82         {PG_LATIN4, "ISO-8859-4"},
  83         {PG_LATIN4, "ISO8859-4"},
  84         {PG_LATIN4, "iso88594"},
  85
  86         {PG_LATIN5, "ISO-8859-9"},
  87         {PG_LATIN5, "ISO8859-9"},
  88         {PG_LATIN5, "iso88599"},
  89
  90         {PG_LATIN6, "ISO-8859-10"},
  91         {PG_LATIN6, "ISO8859-10"},
  92         {PG_LATIN6, "iso885910"},
  93
  94         {PG_LATIN7, "ISO-8859-13"},
  95         {PG_LATIN7, "ISO8859-13"},
  96         {PG_LATIN7, "iso885913"},
  97
  98         {PG_LATIN8, "ISO-8859-14"},
  99         {PG_LATIN8, "ISO8859-14"},
 100         {PG_LATIN8, "iso885914"},
 101
 102         {PG_LATIN9, "ISO-8859-15"},
 103         {PG_LATIN9, "ISO8859-15"},
 104         {PG_LATIN9, "iso885915"},
 105
 106         {PG_LATIN10, "ISO-8859-16"},
 107         {PG_LATIN10, "ISO8859-16"},
 108         {PG_LATIN10, "iso885916"},
 109
 110         {PG_KOI8R, "KOI8-R"},
 111
 112         {PG_WIN1252, "CP1252"},
 113         {PG_WIN1253, "CP1253"},
 114         {PG_WIN1254, "CP1254"},
 115         {PG_WIN1255, "CP1255"},
 116         {PG_WIN1256, "CP1256"},
 117         {PG_WIN1257, "CP1257"},
 118         {PG_WIN1258, "CP1258"},
 119 #ifdef NOT_VERIFIED
 120         {PG_WIN874, "???"},
 121 #endif
 122         {PG_WIN1251, "CP1251"},
 123         {PG_WIN866, "CP866"},
 124
 125         {PG_ISO_8859_5, "ISO-8859-5"},
 126         {PG_ISO_8859_5, "ISO8859-5"},
 127         {PG_ISO_8859_5, "iso88595"},
 128
 129         {PG_ISO_8859_6, "ISO-8859-6"},
 130         {PG_ISO_8859_6, "ISO8859-6"},
 131         {PG_ISO_8859_6, "iso88596"},
 132
 133         {PG_ISO_8859_7, "ISO-8859-7"},
 134         {PG_ISO_8859_7, "ISO8859-7"},
 135         {PG_ISO_8859_7, "iso88597"},
 136
 137         {PG_ISO_8859_8, "ISO-8859-8"},
 138         {PG_ISO_8859_8, "ISO8859-8"},
 139         {PG_ISO_8859_8, "iso88598"},
 140
 141         {PG_SQL_ASCII, NULL}            /* end marker */
 142 };
 143
 144
 145 /*
 146  * Given a setting for LC_CTYPE, return the Postgres ID of the associated
 147  * encoding, if we can determine it.
 148  *
 149  * Pass in NULL to get the encoding for the current locale setting.
 150  *
 151  * If the result is PG_SQL_ASCII, callers should treat it as being compatible
 152  * with any desired encoding.  We return this if the locale is C/POSIX or we
 153  * can't determine the encoding.
 154  */
 155 int
 156 pg_get_encoding_from_locale(const char *ctype)
 157 {
 158         char       *sys;
 159         int                     i;
 160
 161         if (ctype)
 162         {
 163                 char       *save;
 164
 165                 save = setlocale(LC_CTYPE, NULL);
 166                 if (!save)
 167                         return PG_SQL_ASCII;            /* setlocale() broken? */
 168                 /* must copy result, or it might change after setlocale */
 169                 save = strdup(save);
 170                 if (!save)
 171                         return PG_SQL_ASCII;            /* out of memory; unlikely */
 172
 173                 if (!setlocale(LC_CTYPE, ctype))
 174                 {
 175                         free(save);
 176                         return PG_SQL_ASCII;            /* bogus ctype passed in? */
 177                 }
 178
 179                 sys = nl_langinfo(CODESET);
 180                 if (sys)
 181                         sys = strdup(sys);
 182
 183                 setlocale(LC_CTYPE, save);
 184                 free(save);
 185         }
 186         else
 187         {
 188                 /* much easier... */
 189                 ctype = setlocale(LC_CTYPE, NULL);
 190                 if (!ctype)
 191                         return PG_SQL_ASCII;            /* setlocale() broken? */
 192                 sys = nl_langinfo(CODESET);
 193                 if (sys)
 194                         sys = strdup(sys);
 195         }
 196
 197         if (!sys)
 198                 return PG_SQL_ASCII;            /* out of memory; unlikely */
 199
 200         if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
 201         {
 202                 free(sys);
 203                 return PG_SQL_ASCII;
 204         }
 205
 206         for (i = 0; encoding_match_list[i].system_enc_name; i++)
 207         {
 208                 if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
 209                 {
 210                         free(sys);
 211                         return encoding_match_list[i].pg_enc_code;
 212                 }
 213         }
 214
 215         /*
 216          * We print a warning if we got a CODESET string but couldn't recognize
 217          * it.  This means we need another entry in the table.
 218          */
 219 #ifdef FRONTEND
 220         fprintf(stderr, _("could not determine encoding for locale \"%s\": codeset is \"%s\""),
 221                         ctype, sys);
 222         /* keep newline separate so there's only one translatable string */
 223         fputc('\n', stderr);
 224 #else
 225         ereport(WARNING,
 226                         (errmsg("could not determine encoding for locale \"%s\": codeset is \"%s\"",
 227                                         ctype, sys),
 228                          errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
 229 #endif
 230
 231         free(sys);
 232         return PG_SQL_ASCII;
 233 }
 234
 235 #else /* !(HAVE_LANGINFO_H && CODESET) */
 236
 237 /*
 238  * stub if no platform support
 239  */
 240 int
 241 pg_get_encoding_from_locale(const char *ctype)
 242 {
 243         return PG_SQL_ASCII;
 244 }
 245
 246 #endif /* HAVE_LANGINFO_H && CODESET */