2 * This file contains public functions for conversion between
3 * client encoding and server internal encoding.
4 * (currently mule internal code (mic) is used)
7 * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.48 2004/10/13 01:25:12 neilc Exp $
11 #include "access/xact.h"
12 #include "miscadmin.h"
13 #include "mb/pg_wchar.h"
14 #include "utils/builtins.h"
15 #include "utils/memutils.h"
16 #include "utils/syscache.h"
17 #include "catalog/namespace.h"
20 * We handle for actual FE and BE encoding setting encoding-identificator
21 * and encoding-name too. It prevent searching and conversion from encoding
22 * to encoding name in getdatabaseencoding() and other routines.
24 static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
25 static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
28 * Caches for conversion function info. Note that these values are
29 * allocated in TopMemoryContext so that they survive across
30 * transactions. See SetClientEncoding() for more details.
32 static FmgrInfo *ToServerConvProc = NULL;
33 static FmgrInfo *ToClientConvProc = NULL;
36 * During backend startup we can't set client encoding because we (a)
37 * can't look up the conversion functions, and (b) may not know the database
38 * encoding yet either. So SetClientEncoding() just accepts anything and
39 * remembers it for InitializeClientEncoding() to apply later.
41 static bool backend_startup_complete = false;
42 static int pending_client_encoding = PG_SQL_ASCII;
45 /* Internal functions */
46 static unsigned char *perform_default_encoding_conversion(unsigned char *src,
47 int len, bool is_client_to_server);
48 static int cliplen(const unsigned char *str, int len, int limit);
52 * Set the client encoding and save fmgrinfo for the conversion
53 * function if necessary. Returns 0 if okay, -1 if not (bad encoding
54 * or can't support conversion)
57 SetClientEncoding(int encoding, bool doit)
59 int current_server_encoding;
64 MemoryContext oldcontext;
66 if (!PG_VALID_FE_ENCODING(encoding))
69 /* Can't do anything during startup, per notes above */
70 if (!backend_startup_complete)
73 pending_client_encoding = encoding;
77 current_server_encoding = GetDatabaseEncoding();
80 * Check for cases that require no conversion function.
82 if (current_server_encoding == encoding ||
83 (current_server_encoding == PG_SQL_ASCII ||
84 encoding == PG_SQL_ASCII))
88 ClientEncoding = &pg_enc2name_tbl[encoding];
90 if (ToServerConvProc != NULL)
92 if (ToServerConvProc->fn_extra)
93 pfree(ToServerConvProc->fn_extra);
94 pfree(ToServerConvProc);
96 ToServerConvProc = NULL;
98 if (ToClientConvProc != NULL)
100 if (ToClientConvProc->fn_extra)
101 pfree(ToClientConvProc->fn_extra);
102 pfree(ToClientConvProc);
104 ToClientConvProc = NULL;
110 * If we're not inside a transaction then we can't do catalog lookups,
111 * so fail. After backend startup, this could only happen if we are
112 * re-reading postgresql.conf due to SIGHUP --- so basically this just
113 * constrains the ability to change client_encoding on the fly from
114 * postgresql.conf. Which would probably be a stupid thing to do
117 if (!IsTransactionState())
121 * Look up the conversion functions.
123 to_server_proc = FindDefaultConversionProc(encoding,
124 current_server_encoding);
125 if (!OidIsValid(to_server_proc))
127 to_client_proc = FindDefaultConversionProc(current_server_encoding,
129 if (!OidIsValid(to_client_proc))
133 * Done if not wanting to actually apply setting.
139 * load the fmgr info into TopMemoryContext so that it survives
140 * outside transaction.
142 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
143 to_server = palloc(sizeof(FmgrInfo));
144 to_client = palloc(sizeof(FmgrInfo));
145 fmgr_info(to_server_proc, to_server);
146 fmgr_info(to_client_proc, to_client);
147 MemoryContextSwitchTo(oldcontext);
149 ClientEncoding = &pg_enc2name_tbl[encoding];
151 if (ToServerConvProc != NULL)
153 if (ToServerConvProc->fn_extra)
154 pfree(ToServerConvProc->fn_extra);
155 pfree(ToServerConvProc);
157 ToServerConvProc = to_server;
159 if (ToClientConvProc != NULL)
161 if (ToClientConvProc->fn_extra)
162 pfree(ToClientConvProc->fn_extra);
163 pfree(ToClientConvProc);
165 ToClientConvProc = to_client;
171 * Initialize client encoding if necessary.
172 * called from InitPostgres() once during backend starting up.
175 InitializeClientEncoding(void)
177 Assert(!backend_startup_complete);
178 backend_startup_complete = true;
180 if (SetClientEncoding(pending_client_encoding, true) < 0)
183 * Oops, the requested conversion is not available. We couldn't
184 * fail before, but we can now.
187 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
188 errmsg("conversion between %s and %s is not supported",
189 pg_enc2name_tbl[pending_client_encoding].name,
190 GetDatabaseEncodingName())));
195 * returns the current client encoding */
197 pg_get_client_encoding(void)
199 Assert(ClientEncoding);
200 return (ClientEncoding->encoding);
204 * returns the current client encoding name
207 pg_get_client_encoding_name(void)
209 Assert(ClientEncoding);
210 return (ClientEncoding->name);
214 * Apply encoding conversion on src and return it. The encoding
215 * conversion function is chosen from the pg_conversion system catalog
216 * marked as "default". If it is not found in the schema search path,
217 * it's taken from pg_catalog schema. If it even is not in the schema,
218 * warn and returns src. We cannot raise an error, since it will cause
219 * an infinit loop in error message sending.
221 * In the case of no conversion, src is returned.
223 * XXX We assume that storage for converted result is 4-to-1 growth in
224 * the worst case. The rate for currently supported encoding pares are within 3
225 * (SJIS JIS X0201 half width kanna -> UTF-8 is the worst case).
226 * So "4" should be enough for the moment.
229 pg_do_encoding_conversion(unsigned char *src, int len,
230 int src_encoding, int dest_encoding)
232 unsigned char *result;
235 if (!IsTransactionState())
238 if (src_encoding == dest_encoding)
241 if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
247 proc = FindDefaultConversionProc(src_encoding, dest_encoding);
248 if (!OidIsValid(proc))
251 (errcode(ERRCODE_UNDEFINED_FUNCTION),
252 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
253 pg_encoding_to_char(src_encoding),
254 pg_encoding_to_char(dest_encoding))));
259 * XXX we should avoid throwing errors in OidFunctionCall. Otherwise
260 * we are going into infinite loop! So we have to make sure that the
261 * function exists before calling OidFunctionCall.
263 if (!SearchSysCacheExists(PROCOID,
264 ObjectIdGetDatum(proc),
267 elog(LOG, "cache lookup failed for function %u", proc);
271 result = palloc(len * 4 + 1);
273 OidFunctionCall5(proc,
274 Int32GetDatum(src_encoding),
275 Int32GetDatum(dest_encoding),
276 CStringGetDatum(src),
277 CStringGetDatum(result),
283 * Convert string using encoding_nanme. We assume that string's
284 * encoding is same as DB encoding.
286 * TEXT convert(TEXT string, NAME encoding_name) */
288 pg_convert(PG_FUNCTION_ARGS)
290 Datum string = PG_GETARG_DATUM(0);
291 Datum dest_encoding_name = PG_GETARG_DATUM(1);
292 Datum src_encoding_name = DirectFunctionCall1(
293 namein, CStringGetDatum(DatabaseEncoding->name));
296 result = DirectFunctionCall3(
297 pg_convert2, string, src_encoding_name, dest_encoding_name);
299 /* free memory allocated by namein */
300 pfree((void *) src_encoding_name);
302 PG_RETURN_TEXT_P(result);
306 * Convert string using encoding_nanme.
308 * TEXT convert2(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
311 pg_convert2(PG_FUNCTION_ARGS)
313 text *string = PG_GETARG_TEXT_P(0);
314 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
315 int src_encoding = pg_char_to_encoding(src_encoding_name);
316 char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
317 int dest_encoding = pg_char_to_encoding(dest_encoding_name);
318 unsigned char *result;
323 if (src_encoding < 0)
325 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
326 errmsg("invalid source encoding name \"%s\"",
327 src_encoding_name)));
328 if (dest_encoding < 0)
330 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
331 errmsg("invalid destination encoding name \"%s\"",
332 dest_encoding_name)));
334 /* make sure that source string is null terminated */
335 len = VARSIZE(string) - VARHDRSZ;
336 str = palloc(len + 1);
337 memcpy(str, VARDATA(string), len);
340 result = pg_do_encoding_conversion(str, len, src_encoding, dest_encoding);
342 elog(ERROR, "encoding conversion failed");
345 * build text data type structure. we cannot use textin() here, since
346 * textin assumes that input string encoding is same as database
349 len = strlen(result) + VARHDRSZ;
350 retval = palloc(len);
351 VARATT_SIZEP(retval) = len;
352 memcpy(VARDATA(retval), result, len - VARHDRSZ);
358 /* free memory if allocated by the toaster */
359 PG_FREE_IF_COPY(string, 0);
361 PG_RETURN_TEXT_P(retval);
365 * convert client encoding to server encoding.
368 pg_client_to_server(unsigned char *s, int len)
370 Assert(DatabaseEncoding);
371 Assert(ClientEncoding);
373 if (ClientEncoding->encoding == DatabaseEncoding->encoding)
376 return perform_default_encoding_conversion(s, len, true);
380 * convert server encoding to client encoding.
383 pg_server_to_client(unsigned char *s, int len)
385 Assert(DatabaseEncoding);
386 Assert(ClientEncoding);
388 if (ClientEncoding->encoding == DatabaseEncoding->encoding)
391 return perform_default_encoding_conversion(s, len, false);
395 * Perform default encoding conversion using cached FmgrInfo. Since
396 * this function does not access database at all, it is safe to call
397 * outside transactions. Explicit setting client encoding required
398 * before calling this function. Otherwise no conversion is
401 static unsigned char *
402 perform_default_encoding_conversion(unsigned char *src, int len, bool is_client_to_server)
404 unsigned char *result;
412 if (is_client_to_server)
414 src_encoding = ClientEncoding->encoding;
415 dest_encoding = DatabaseEncoding->encoding;
416 flinfo = ToServerConvProc;
420 src_encoding = DatabaseEncoding->encoding;
421 dest_encoding = ClientEncoding->encoding;
422 flinfo = ToClientConvProc;
428 if (src_encoding == dest_encoding)
431 if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
434 result = palloc(len * 4 + 1);
436 FunctionCall5(flinfo,
437 Int32GetDatum(src_encoding),
438 Int32GetDatum(dest_encoding),
439 CStringGetDatum(src),
440 CStringGetDatum(result),
445 /* convert a multibyte string to a wchar */
447 pg_mb2wchar(const unsigned char *from, pg_wchar *to)
449 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) (from, to, strlen(from));
452 /* convert a multibyte string to a wchar with a limited length */
454 pg_mb2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
456 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) (from, to, len);
459 /* returns the byte length of a multibyte word */
461 pg_mblen(const unsigned char *mbstr)
463 return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) (mbstr));
466 /* returns the display length of a multibyte word */
468 pg_dsplen(const unsigned char *mbstr)
470 return ((*pg_wchar_table[DatabaseEncoding->encoding].dsplen) (mbstr));
473 /* returns the length (counted as a wchar) of a multibyte string */
475 pg_mbstrlen(const unsigned char *mbstr)
479 /* optimization for single byte encoding */
480 if (pg_database_encoding_max_length() == 1)
481 return strlen((char *) mbstr);
485 mbstr += pg_mblen(mbstr);
491 /* returns the length (counted as a wchar) of a multibyte string
492 (not necessarily NULL terminated) */
494 pg_mbstrlen_with_len(const unsigned char *mbstr, int limit)
499 while (limit > 0 && *mbstr)
510 * returns the byte length of a multibyte string
511 * (not necessarily NULL terminated)
512 * that is no longer than limit.
513 * this function does not break multibyte word boundary.
516 pg_mbcliplen(const unsigned char *mbstr, int len, int limit)
521 /* optimization for single byte encoding */
522 if (pg_database_encoding_max_length() == 1)
523 return cliplen(mbstr, len, limit);
525 while (len > 0 && *mbstr)
528 if ((clen + l) > limit)
540 * Similar to pg_mbcliplen except the limit parameter specifies the
541 * character length, not the byte length. */
543 pg_mbcharcliplen(const unsigned char *mbstr, int len, int limit)
549 /* optimization for single byte encoding */
550 if (pg_database_encoding_max_length() == 1)
551 return cliplen(mbstr, len, limit);
553 while (len > 0 && *mbstr)
567 SetDatabaseEncoding(int encoding)
569 if (!PG_VALID_BE_ENCODING(encoding))
570 elog(ERROR, "invalid database encoding");
572 DatabaseEncoding = &pg_enc2name_tbl[encoding];
573 Assert(DatabaseEncoding->encoding == encoding);
577 SetDefaultClientEncoding(void)
579 ClientEncoding = &pg_enc2name_tbl[GetDatabaseEncoding()];
583 GetDatabaseEncoding(void)
585 Assert(DatabaseEncoding);
586 return (DatabaseEncoding->encoding);
590 GetDatabaseEncodingName(void)
592 Assert(DatabaseEncoding);
593 return (DatabaseEncoding->name);
597 getdatabaseencoding(PG_FUNCTION_ARGS)
599 Assert(DatabaseEncoding);
600 return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
604 pg_client_encoding(PG_FUNCTION_ARGS)
606 Assert(ClientEncoding);
607 return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
611 cliplen(const unsigned char *str, int len, int limit)
614 const unsigned char *s;
616 for (s = str; *s; s++, l++)
618 if (l >= len || l >= limit)