1 /*-------------------------------------------------------------------------
4 * This file contains functions for encoding conversion.
6 * The string-conversion functions in this file share some API quirks.
9 * The functions return a palloc'd, null-terminated string if conversion
10 * is required. However, if no conversion is performed, the given source
11 * string pointer is returned as-is.
13 * Although the presence of a length argument means that callers can pass
14 * non-null-terminated strings, care is required because the same string
15 * will be passed back if no conversion occurs. Such callers *must* check
16 * whether result == src and handle that case differently.
18 * If the source and destination encodings are the same, the source string
19 * is returned without any verification; it's assumed to be valid data.
20 * If that might not be the case, the caller is responsible for validating
21 * the string using a separate call to pg_verify_mbstr(). Whenever the
22 * source and destination encodings are different, the functions ensure that
23 * the result is validly encoded according to the destination encoding.
26 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
27 * Portions Copyright (c) 1994, Regents of the University of California
31 * src/backend/utils/mb/mbutils.c
33 *-------------------------------------------------------------------------
37 #include "access/xact.h"
38 #include "catalog/namespace.h"
39 #include "mb/pg_wchar.h"
40 #include "utils/builtins.h"
41 #include "utils/memutils.h"
42 #include "utils/syscache.h"
45 * When converting strings between different encodings, we assume that space
46 * for converted result is 4-to-1 growth in the worst case. The rate for
47 * currently supported encoding pairs are within 3 (SJIS JIS X0201 half width
48 * kanna -> UTF8 is the worst case). So "4" should be enough for the moment.
50 * Note that this is not the same as the maximum character width in any
51 * particular encoding.
53 #define MAX_CONVERSION_GROWTH 4
56 * We maintain a simple linked list caching the fmgr lookup info for the
57 * currently selected conversion functions, as well as any that have been
58 * selected previously in the current session. (We remember previous
59 * settings because we must be able to restore a previous setting during
60 * transaction rollback, without doing any fresh catalog accesses.)
62 * Since we'll never release this data, we just keep it in TopMemoryContext.
64 typedef struct ConvProcInfo
66 int s_encoding; /* server and client encoding IDs */
68 FmgrInfo to_server_info; /* lookup info for conversion procs */
69 FmgrInfo to_client_info;
72 static List *ConvProcList = NIL; /* List of ConvProcInfo */
75 * These variables point to the currently active conversion functions,
76 * or are NULL when no conversion is needed.
78 static FmgrInfo *ToServerConvProc = NULL;
79 static FmgrInfo *ToClientConvProc = NULL;
82 * These variables track the currently-selected encodings.
84 static const pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
85 static const pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
86 static const pg_enc2name *MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
89 * During backend startup we can't set client encoding because we (a)
90 * can't look up the conversion functions, and (b) may not know the database
91 * encoding yet either. So SetClientEncoding() just accepts anything and
92 * remembers it for InitializeClientEncoding() to apply later.
94 static bool backend_startup_complete = false;
95 static int pending_client_encoding = PG_SQL_ASCII;
98 /* Internal functions */
99 static char *perform_default_encoding_conversion(const char *src,
100 int len, bool is_client_to_server);
101 static int cliplen(const char *str, int len, int limit);
105 * Prepare for a future call to SetClientEncoding. Success should mean
106 * that SetClientEncoding is guaranteed to succeed for this encoding request.
108 * (But note that success before backend_startup_complete does not guarantee
111 * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
114 PrepareClientEncoding(int encoding)
116 int current_server_encoding;
119 if (!PG_VALID_FE_ENCODING(encoding))
122 /* Can't do anything during startup, per notes above */
123 if (!backend_startup_complete)
126 current_server_encoding = GetDatabaseEncoding();
129 * Check for cases that require no conversion function.
131 if (current_server_encoding == encoding ||
132 current_server_encoding == PG_SQL_ASCII ||
133 encoding == PG_SQL_ASCII)
136 if (IsTransactionState())
139 * If we're in a live transaction, it's safe to access the catalogs,
140 * so look up the functions. We repeat the lookup even if the info is
141 * already cached, so that we can react to changes in the contents of
146 ConvProcInfo *convinfo;
147 MemoryContext oldcontext;
149 to_server_proc = FindDefaultConversionProc(encoding,
150 current_server_encoding);
151 if (!OidIsValid(to_server_proc))
153 to_client_proc = FindDefaultConversionProc(current_server_encoding,
155 if (!OidIsValid(to_client_proc))
159 * Load the fmgr info into TopMemoryContext (could still fail here)
161 convinfo = (ConvProcInfo *) MemoryContextAlloc(TopMemoryContext,
162 sizeof(ConvProcInfo));
163 convinfo->s_encoding = current_server_encoding;
164 convinfo->c_encoding = encoding;
165 fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
167 fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
170 /* Attach new info to head of list */
171 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
172 ConvProcList = lcons(convinfo, ConvProcList);
173 MemoryContextSwitchTo(oldcontext);
176 * We cannot yet remove any older entry for the same encoding pair,
177 * since it could still be in use. SetClientEncoding will clean up.
180 return 0; /* success */
185 * If we're not in a live transaction, the only thing we can do is
186 * restore a previous setting using the cache. This covers all
187 * transaction-rollback cases. The only case it might not work for is
188 * trying to change client_encoding on the fly by editing
189 * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
190 * thing to do anyway.
192 foreach(lc, ConvProcList)
194 ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
196 if (oldinfo->s_encoding == current_server_encoding &&
197 oldinfo->c_encoding == encoding)
201 return -1; /* it's not cached, so fail */
206 * Set the active client encoding and set up the conversion-function pointers.
207 * PrepareClientEncoding should have been called previously for this encoding.
209 * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
212 SetClientEncoding(int encoding)
214 int current_server_encoding;
220 if (!PG_VALID_FE_ENCODING(encoding))
223 /* Can't do anything during startup, per notes above */
224 if (!backend_startup_complete)
226 pending_client_encoding = encoding;
230 current_server_encoding = GetDatabaseEncoding();
233 * Check for cases that require no conversion function.
235 if (current_server_encoding == encoding ||
236 current_server_encoding == PG_SQL_ASCII ||
237 encoding == PG_SQL_ASCII)
239 ClientEncoding = &pg_enc2name_tbl[encoding];
240 ToServerConvProc = NULL;
241 ToClientConvProc = NULL;
246 * Search the cache for the entry previously prepared by
247 * PrepareClientEncoding; if there isn't one, we lose. While at it,
248 * release any duplicate entries so that repeated Prepare/Set cycles don't
253 for (lc = list_head(ConvProcList); lc; lc = next)
255 ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
259 if (convinfo->s_encoding == current_server_encoding &&
260 convinfo->c_encoding == encoding)
264 /* Found newest entry, so set up */
265 ClientEncoding = &pg_enc2name_tbl[encoding];
266 ToServerConvProc = &convinfo->to_server_info;
267 ToClientConvProc = &convinfo->to_client_info;
272 /* Duplicate entry, release it */
273 ConvProcList = list_delete_cell(ConvProcList, lc, prev);
275 continue; /* prev mustn't advance */
283 return 0; /* success */
285 return -1; /* it's not cached, so fail */
289 * Initialize client encoding conversions.
290 * Called from InitPostgres() once during backend startup.
293 InitializeClientEncoding(void)
295 Assert(!backend_startup_complete);
296 backend_startup_complete = true;
298 if (PrepareClientEncoding(pending_client_encoding) < 0 ||
299 SetClientEncoding(pending_client_encoding) < 0)
302 * Oops, the requested conversion is not available. We couldn't fail
303 * before, but we can now.
306 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
307 errmsg("conversion between %s and %s is not supported",
308 pg_enc2name_tbl[pending_client_encoding].name,
309 GetDatabaseEncodingName())));
314 * returns the current client encoding
317 pg_get_client_encoding(void)
319 return ClientEncoding->encoding;
323 * returns the current client encoding name
326 pg_get_client_encoding_name(void)
328 return ClientEncoding->name;
332 * Convert src string to another encoding (general case).
334 * See the notes about string conversion functions at the top of this file.
337 pg_do_encoding_conversion(unsigned char *src, int len,
338 int src_encoding, int dest_encoding)
340 unsigned char *result;
344 return src; /* empty string is always valid */
346 if (src_encoding == dest_encoding)
347 return src; /* no conversion required, assume valid */
349 if (dest_encoding == PG_SQL_ASCII)
350 return src; /* any string is valid in SQL_ASCII */
352 if (src_encoding == PG_SQL_ASCII)
354 /* No conversion is possible, but we must validate the result */
355 (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
359 if (!IsTransactionState()) /* shouldn't happen */
360 elog(ERROR, "cannot perform encoding conversion outside a transaction");
362 proc = FindDefaultConversionProc(src_encoding, dest_encoding);
363 if (!OidIsValid(proc))
365 (errcode(ERRCODE_UNDEFINED_FUNCTION),
366 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
367 pg_encoding_to_char(src_encoding),
368 pg_encoding_to_char(dest_encoding))));
371 * Allocate space for conversion result, being wary of integer overflow
373 if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
375 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
376 errmsg("out of memory"),
377 errdetail("String of %d bytes is too long for encoding conversion.",
380 result = palloc(len * MAX_CONVERSION_GROWTH + 1);
382 OidFunctionCall5(proc,
383 Int32GetDatum(src_encoding),
384 Int32GetDatum(dest_encoding),
385 CStringGetDatum(src),
386 CStringGetDatum(result),
392 * Convert string to encoding encoding_name. The source
393 * encoding is the DB encoding.
395 * BYTEA convert_to(TEXT string, NAME encoding_name) */
397 pg_convert_to(PG_FUNCTION_ARGS)
399 Datum string = PG_GETARG_DATUM(0);
400 Datum dest_encoding_name = PG_GETARG_DATUM(1);
401 Datum src_encoding_name = DirectFunctionCall1(namein,
402 CStringGetDatum(DatabaseEncoding->name));
406 * pg_convert expects a bytea as its first argument. We're passing it a
407 * text argument here, relying on the fact that they are both in fact
408 * varlena types, and thus structurally identical.
410 result = DirectFunctionCall3(pg_convert, string,
411 src_encoding_name, dest_encoding_name);
413 PG_RETURN_DATUM(result);
417 * Convert string from encoding encoding_name. The destination
418 * encoding is the DB encoding.
420 * TEXT convert_from(BYTEA string, NAME encoding_name) */
422 pg_convert_from(PG_FUNCTION_ARGS)
424 Datum string = PG_GETARG_DATUM(0);
425 Datum src_encoding_name = PG_GETARG_DATUM(1);
426 Datum dest_encoding_name = DirectFunctionCall1(namein,
427 CStringGetDatum(DatabaseEncoding->name));
430 result = DirectFunctionCall3(pg_convert, string,
431 src_encoding_name, dest_encoding_name);
434 * pg_convert returns a bytea, which we in turn return as text, relying on
435 * the fact that they are both in fact varlena types, and thus
436 * structurally identical. Although not all bytea values are valid text,
437 * in this case it will be because we've told pg_convert to return one
438 * that is valid as text in the current database encoding.
440 PG_RETURN_DATUM(result);
444 * Convert string between two arbitrary encodings.
446 * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
449 pg_convert(PG_FUNCTION_ARGS)
451 bytea *string = PG_GETARG_BYTEA_PP(0);
452 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
453 int src_encoding = pg_char_to_encoding(src_encoding_name);
454 char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
455 int dest_encoding = pg_char_to_encoding(dest_encoding_name);
461 if (src_encoding < 0)
463 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
464 errmsg("invalid source encoding name \"%s\"",
465 src_encoding_name)));
466 if (dest_encoding < 0)
468 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
469 errmsg("invalid destination encoding name \"%s\"",
470 dest_encoding_name)));
472 /* make sure that source string is valid */
473 len = VARSIZE_ANY_EXHDR(string);
474 src_str = VARDATA_ANY(string);
475 pg_verify_mbstr_len(src_encoding, src_str, len, false);
477 /* perform conversion */
478 dest_str = (char *) pg_do_encoding_conversion((unsigned char *) src_str,
483 /* update len if conversion actually happened */
484 if (dest_str != src_str)
485 len = strlen(dest_str);
488 * build bytea data type structure.
490 retval = (bytea *) palloc(len + VARHDRSZ);
491 SET_VARSIZE(retval, len + VARHDRSZ);
492 memcpy(VARDATA(retval), dest_str, len);
494 if (dest_str != src_str)
497 /* free memory if allocated by the toaster */
498 PG_FREE_IF_COPY(string, 0);
500 PG_RETURN_BYTEA_P(retval);
504 * get the length of the string considered as text in the specified
505 * encoding. Raises an error if the data is not valid in that
508 * INT4 length (BYTEA string, NAME src_encoding_name)
511 length_in_encoding(PG_FUNCTION_ARGS)
513 bytea *string = PG_GETARG_BYTEA_PP(0);
514 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
515 int src_encoding = pg_char_to_encoding(src_encoding_name);
520 if (src_encoding < 0)
522 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
523 errmsg("invalid encoding name \"%s\"",
524 src_encoding_name)));
526 len = VARSIZE_ANY_EXHDR(string);
527 src_str = VARDATA_ANY(string);
529 retval = pg_verify_mbstr_len(src_encoding, src_str, len, false);
531 PG_RETURN_INT32(retval);
535 * Get maximum multibyte character length in the specified encoding.
537 * Note encoding is specified numerically, not by name as above.
540 pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
542 int encoding = PG_GETARG_INT32(0);
544 if (PG_VALID_ENCODING(encoding))
545 PG_RETURN_INT32(pg_wchar_table[encoding].maxmblen);
551 * Convert client encoding to server encoding.
553 * See the notes about string conversion functions at the top of this file.
556 pg_client_to_server(const char *s, int len)
558 return pg_any_to_server(s, len, ClientEncoding->encoding);
562 * Convert any encoding to server encoding.
564 * See the notes about string conversion functions at the top of this file.
566 * Unlike the other string conversion functions, this will apply validation
567 * even if encoding == DatabaseEncoding->encoding. This is because this is
568 * used to process data coming in from outside the database, and we never
569 * want to just assume validity.
572 pg_any_to_server(const char *s, int len, int encoding)
575 return (char *) s; /* empty string is always valid */
577 if (encoding == DatabaseEncoding->encoding ||
578 encoding == PG_SQL_ASCII)
581 * No conversion is needed, but we must still validate the data.
583 (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
587 if (DatabaseEncoding->encoding == PG_SQL_ASCII)
590 * No conversion is possible, but we must still validate the data,
591 * because the client-side code might have done string escaping using
592 * the selected client_encoding. If the client encoding is ASCII-safe
593 * then we just do a straight validation under that encoding. For an
594 * ASCII-unsafe encoding we have a problem: we dare not pass such data
595 * to the parser but we have no way to convert it. We compromise by
596 * rejecting the data if it contains any non-ASCII characters.
598 if (PG_VALID_BE_ENCODING(encoding))
599 (void) pg_verify_mbstr(encoding, s, len, false);
604 for (i = 0; i < len; i++)
606 if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
608 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
609 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
610 pg_enc2name_tbl[PG_SQL_ASCII].name,
611 (unsigned char) s[i])));
617 /* Fast path if we can use cached conversion function */
618 if (encoding == ClientEncoding->encoding)
619 return perform_default_encoding_conversion(s, len, true);
621 /* General case ... will not work outside transactions */
622 return (char *) pg_do_encoding_conversion((unsigned char *) s,
625 DatabaseEncoding->encoding);
629 * Convert server encoding to client encoding.
631 * See the notes about string conversion functions at the top of this file.
634 pg_server_to_client(const char *s, int len)
636 return pg_server_to_any(s, len, ClientEncoding->encoding);
640 * Convert server encoding to any encoding.
642 * See the notes about string conversion functions at the top of this file.
645 pg_server_to_any(const char *s, int len, int encoding)
648 return (char *) s; /* empty string is always valid */
650 if (encoding == DatabaseEncoding->encoding ||
651 encoding == PG_SQL_ASCII)
652 return (char *) s; /* assume data is valid */
654 if (DatabaseEncoding->encoding == PG_SQL_ASCII)
656 /* No conversion is possible, but we must validate the result */
657 (void) pg_verify_mbstr(encoding, s, len, false);
661 /* Fast path if we can use cached conversion function */
662 if (encoding == ClientEncoding->encoding)
663 return perform_default_encoding_conversion(s, len, false);
665 /* General case ... will not work outside transactions */
666 return (char *) pg_do_encoding_conversion((unsigned char *) s,
668 DatabaseEncoding->encoding,
673 * Perform default encoding conversion using cached FmgrInfo. Since
674 * this function does not access database at all, it is safe to call
675 * outside transactions. If the conversion has not been set up by
676 * SetClientEncoding(), no conversion is performed.
679 perform_default_encoding_conversion(const char *src, int len,
680 bool is_client_to_server)
687 if (is_client_to_server)
689 src_encoding = ClientEncoding->encoding;
690 dest_encoding = DatabaseEncoding->encoding;
691 flinfo = ToServerConvProc;
695 src_encoding = DatabaseEncoding->encoding;
696 dest_encoding = ClientEncoding->encoding;
697 flinfo = ToClientConvProc;
704 * Allocate space for conversion result, being wary of integer overflow
706 if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
708 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
709 errmsg("out of memory"),
710 errdetail("String of %d bytes is too long for encoding conversion.",
713 result = palloc(len * MAX_CONVERSION_GROWTH + 1);
715 FunctionCall5(flinfo,
716 Int32GetDatum(src_encoding),
717 Int32GetDatum(dest_encoding),
718 CStringGetDatum(src),
719 CStringGetDatum(result),
725 /* convert a multibyte string to a wchar */
727 pg_mb2wchar(const char *from, pg_wchar *to)
729 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) ((const unsigned char *) from, to, strlen(from));
732 /* convert a multibyte string to a wchar with a limited length */
734 pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
736 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
739 /* same, with any encoding */
741 pg_encoding_mb2wchar_with_len(int encoding,
742 const char *from, pg_wchar *to, int len)
744 return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
747 /* convert a wchar string to a multibyte */
749 pg_wchar2mb(const pg_wchar *from, char *to)
751 return (*pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len) (from, (unsigned char *) to, pg_wchar_strlen(from));
754 /* convert a wchar string to a multibyte with a limited length */
756 pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
758 return (*pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len) (from, (unsigned char *) to, len);
761 /* same, with any encoding */
763 pg_encoding_wchar2mb_with_len(int encoding,
764 const pg_wchar *from, char *to, int len)
766 return (*pg_wchar_table[encoding].wchar2mb_with_len) (from, (unsigned char *) to, len);
769 /* returns the byte length of a multibyte character */
771 pg_mblen(const char *mbstr)
773 return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr));
776 /* returns the display length of a multibyte character */
778 pg_dsplen(const char *mbstr)
780 return ((*pg_wchar_table[DatabaseEncoding->encoding].dsplen) ((const unsigned char *) mbstr));
783 /* returns the length (counted in wchars) of a multibyte string */
785 pg_mbstrlen(const char *mbstr)
789 /* optimization for single byte encoding */
790 if (pg_database_encoding_max_length() == 1)
791 return strlen(mbstr);
795 mbstr += pg_mblen(mbstr);
801 /* returns the length (counted in wchars) of a multibyte string
802 * (not necessarily NULL terminated)
805 pg_mbstrlen_with_len(const char *mbstr, int limit)
809 /* optimization for single byte encoding */
810 if (pg_database_encoding_max_length() == 1)
813 while (limit > 0 && *mbstr)
815 int l = pg_mblen(mbstr);
825 * returns the byte length of a multibyte string
826 * (not necessarily NULL terminated)
827 * that is no longer than limit.
828 * this function does not break multibyte character boundary.
831 pg_mbcliplen(const char *mbstr, int len, int limit)
833 return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
838 * pg_mbcliplen with specified encoding
841 pg_encoding_mbcliplen(int encoding, const char *mbstr,
844 mblen_converter mblen_fn;
848 /* optimization for single byte encoding */
849 if (pg_encoding_max_length(encoding) == 1)
850 return cliplen(mbstr, len, limit);
852 mblen_fn = pg_wchar_table[encoding].mblen;
854 while (len > 0 && *mbstr)
856 l = (*mblen_fn) ((const unsigned char *) mbstr);
857 if ((clen + l) > limit)
869 * Similar to pg_mbcliplen except the limit parameter specifies the
870 * character length, not the byte length.
873 pg_mbcharcliplen(const char *mbstr, int len, int limit)
879 /* optimization for single byte encoding */
880 if (pg_database_encoding_max_length() == 1)
881 return cliplen(mbstr, len, limit);
883 while (len > 0 && *mbstr)
896 /* mbcliplen for any single-byte encoding */
898 cliplen(const char *str, int len, int limit)
902 len = Min(len, limit);
903 while (l < len && str[l])
909 SetDatabaseEncoding(int encoding)
911 if (!PG_VALID_BE_ENCODING(encoding))
912 elog(ERROR, "invalid database encoding: %d", encoding);
914 DatabaseEncoding = &pg_enc2name_tbl[encoding];
915 Assert(DatabaseEncoding->encoding == encoding);
919 SetMessageEncoding(int encoding)
921 /* Some calls happen before we can elog()! */
922 Assert(PG_VALID_ENCODING(encoding));
924 MessageEncoding = &pg_enc2name_tbl[encoding];
925 Assert(MessageEncoding->encoding == encoding);
930 * Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext
931 * codeset. Fails for MULE_INTERNAL, an encoding unknown to gettext; can also
932 * fail for gettext-internal causes like out-of-memory.
935 raw_pg_bind_textdomain_codeset(const char *domainname, int encoding)
937 bool elog_ok = (CurrentMemoryContext != NULL);
940 for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++)
942 if (pg_enc2gettext_tbl[i].encoding == encoding)
944 if (bind_textdomain_codeset(domainname,
945 pg_enc2gettext_tbl[i].name) != NULL)
949 elog(LOG, "bind_textdomain_codeset failed");
951 write_stderr("bind_textdomain_codeset failed");
961 * Bind a gettext message domain to the codeset corresponding to the database
962 * encoding. For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE.
963 * Return the MessageEncoding implied by the new settings.
965 * On most platforms, gettext defaults to the codeset implied by LC_CTYPE.
966 * When that matches the database encoding, we don't need to do anything. In
967 * CREATE DATABASE, we enforce or trust that the locale's codeset matches the
968 * database encoding, except for the C locale. (On Windows, we also permit a
969 * discrepancy under the UTF8 encoding.) For the C locale, explicitly bind
970 * gettext to the right codeset.
972 * On Windows, gettext defaults to the Windows ANSI code page. This is a
973 * convenient departure for software that passes the strings to Windows ANSI
974 * APIs, but we don't do that. Compel gettext to use database encoding or,
975 * failing that, the LC_CTYPE encoding as it would on other platforms.
977 * This function is called before elog() and palloc() are usable.
980 pg_bind_textdomain_codeset(const char *domainname)
982 bool elog_ok = (CurrentMemoryContext != NULL);
983 int encoding = GetDatabaseEncoding();
987 const char *ctype = setlocale(LC_CTYPE, NULL);
989 if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
991 if (encoding != PG_SQL_ASCII &&
992 raw_pg_bind_textdomain_codeset(domainname, encoding))
995 new_msgenc = pg_get_encoding_from_locale(NULL, elog_ok);
997 new_msgenc = PG_SQL_ASCII;
1000 if (!raw_pg_bind_textdomain_codeset(domainname, new_msgenc))
1001 /* On failure, the old message encoding remains valid. */
1002 return GetMessageEncoding();
1010 * The database encoding, also called the server encoding, represents the
1011 * encoding of data stored in text-like data types. Affected types include
1012 * cstring, text, varchar, name, xml, and json.
1015 GetDatabaseEncoding(void)
1017 return DatabaseEncoding->encoding;
1021 GetDatabaseEncodingName(void)
1023 return DatabaseEncoding->name;
1027 getdatabaseencoding(PG_FUNCTION_ARGS)
1029 return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
1033 pg_client_encoding(PG_FUNCTION_ARGS)
1035 return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
1039 * gettext() returns messages in this encoding. This often matches the
1040 * database encoding, but it differs for SQL_ASCII databases, for processes
1041 * not attached to a database, and under a database encoding lacking iconv
1042 * support (MULE_INTERNAL).
1045 GetMessageEncoding(void)
1047 return MessageEncoding->encoding;
1052 * Result is palloc'ed null-terminated utf16 string. The character length
1053 * is also passed to utf16len if not null. Returns NULL iff failed.
1056 pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
1062 codepage = pg_enc2name_tbl[GetMessageEncoding()].codepage;
1065 * Use MultiByteToWideChar directly if there is a corresponding codepage,
1066 * or double conversion through UTF8 if not.
1070 utf16 = (WCHAR *) palloc(sizeof(WCHAR) * (len + 1));
1071 dstlen = MultiByteToWideChar(codepage, 0, str, len, utf16, len);
1072 utf16[dstlen] = (WCHAR) 0;
1078 utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
1080 GetMessageEncoding(),
1085 utf16 = (WCHAR *) palloc(sizeof(WCHAR) * (len + 1));
1086 dstlen = MultiByteToWideChar(CP_UTF8, 0, utf8, len, utf16, len);
1087 utf16[dstlen] = (WCHAR) 0;
1093 if (dstlen == 0 && len > 0)
1096 return NULL; /* error */