2 * This file contains public functions for conversion between
3 * client encoding and server internal encoding.
4 * (currently mule internal code (mic) is used)
7 * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.52 2005/10/15 02:49:33 momjian Exp $
11 #include "access/xact.h"
12 #include "miscadmin.h"
13 #include "mb/pg_wchar.h"
14 #include "utils/builtins.h"
15 #include "utils/memutils.h"
16 #include "utils/syscache.h"
17 #include "catalog/namespace.h"
20 * We handle for actual FE and BE encoding setting encoding-identificator
21 * and encoding-name too. It prevent searching and conversion from encoding
22 * to encoding name in getdatabaseencoding() and other routines.
24 static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
25 static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
28 * Caches for conversion function info. Note that these values are
29 * allocated in TopMemoryContext so that they survive across
30 * transactions. See SetClientEncoding() for more details.
32 static FmgrInfo *ToServerConvProc = NULL;
33 static FmgrInfo *ToClientConvProc = NULL;
36 * During backend startup we can't set client encoding because we (a)
37 * can't look up the conversion functions, and (b) may not know the database
38 * encoding yet either. So SetClientEncoding() just accepts anything and
39 * remembers it for InitializeClientEncoding() to apply later.
41 static bool backend_startup_complete = false;
42 static int pending_client_encoding = PG_SQL_ASCII;
45 /* Internal functions */
46 static char *perform_default_encoding_conversion(const char *src,
47 int len, bool is_client_to_server);
48 static int cliplen(const char *str, int len, int limit);
52 * Set the client encoding and save fmgrinfo for the conversion
53 * function if necessary. Returns 0 if okay, -1 if not (bad encoding
54 * or can't support conversion)
57 SetClientEncoding(int encoding, bool doit)
59 int current_server_encoding;
64 MemoryContext oldcontext;
66 if (!PG_VALID_FE_ENCODING(encoding))
69 /* Can't do anything during startup, per notes above */
70 if (!backend_startup_complete)
73 pending_client_encoding = encoding;
77 current_server_encoding = GetDatabaseEncoding();
80 * Check for cases that require no conversion function.
82 if (current_server_encoding == encoding ||
83 (current_server_encoding == PG_SQL_ASCII ||
84 encoding == PG_SQL_ASCII))
88 ClientEncoding = &pg_enc2name_tbl[encoding];
90 if (ToServerConvProc != NULL)
92 if (ToServerConvProc->fn_extra)
93 pfree(ToServerConvProc->fn_extra);
94 pfree(ToServerConvProc);
96 ToServerConvProc = NULL;
98 if (ToClientConvProc != NULL)
100 if (ToClientConvProc->fn_extra)
101 pfree(ToClientConvProc->fn_extra);
102 pfree(ToClientConvProc);
104 ToClientConvProc = NULL;
110 * If we're not inside a transaction then we can't do catalog lookups, so
111 * fail. After backend startup, this could only happen if we are
112 * re-reading postgresql.conf due to SIGHUP --- so basically this just
113 * constrains the ability to change client_encoding on the fly from
114 * postgresql.conf. Which would probably be a stupid thing to do anyway.
116 if (!IsTransactionState())
120 * Look up the conversion functions.
122 to_server_proc = FindDefaultConversionProc(encoding,
123 current_server_encoding);
124 if (!OidIsValid(to_server_proc))
126 to_client_proc = FindDefaultConversionProc(current_server_encoding,
128 if (!OidIsValid(to_client_proc))
132 * Done if not wanting to actually apply setting.
138 * load the fmgr info into TopMemoryContext so that it survives outside
141 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
142 to_server = palloc(sizeof(FmgrInfo));
143 to_client = palloc(sizeof(FmgrInfo));
144 fmgr_info(to_server_proc, to_server);
145 fmgr_info(to_client_proc, to_client);
146 MemoryContextSwitchTo(oldcontext);
148 ClientEncoding = &pg_enc2name_tbl[encoding];
150 if (ToServerConvProc != NULL)
152 if (ToServerConvProc->fn_extra)
153 pfree(ToServerConvProc->fn_extra);
154 pfree(ToServerConvProc);
156 ToServerConvProc = to_server;
158 if (ToClientConvProc != NULL)
160 if (ToClientConvProc->fn_extra)
161 pfree(ToClientConvProc->fn_extra);
162 pfree(ToClientConvProc);
164 ToClientConvProc = to_client;
170 * Initialize client encoding if necessary.
171 * called from InitPostgres() once during backend starting up.
174 InitializeClientEncoding(void)
176 Assert(!backend_startup_complete);
177 backend_startup_complete = true;
179 if (SetClientEncoding(pending_client_encoding, true) < 0)
182 * Oops, the requested conversion is not available. We couldn't fail
183 * before, but we can now.
186 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
187 errmsg("conversion between %s and %s is not supported",
188 pg_enc2name_tbl[pending_client_encoding].name,
189 GetDatabaseEncodingName())));
194 * returns the current client encoding */
196 pg_get_client_encoding(void)
198 Assert(ClientEncoding);
199 return (ClientEncoding->encoding);
203 * returns the current client encoding name
206 pg_get_client_encoding_name(void)
208 Assert(ClientEncoding);
209 return (ClientEncoding->name);
213 * Apply encoding conversion on src and return it. The encoding
214 * conversion function is chosen from the pg_conversion system catalog
215 * marked as "default". If it is not found in the schema search path,
216 * it's taken from pg_catalog schema. If it even is not in the schema,
217 * warn and returns src. We cannot raise an error, since it will cause
218 * an infinit loop in error message sending.
220 * In the case of no conversion, src is returned.
222 * XXX We assume that storage for converted result is 4-to-1 growth in
223 * the worst case. The rate for currently supported encoding pares are within 3
224 * (SJIS JIS X0201 half width kanna -> UTF8 is the worst case).
225 * So "4" should be enough for the moment.
228 pg_do_encoding_conversion(unsigned char *src, int len,
229 int src_encoding, int dest_encoding)
231 unsigned char *result;
234 if (!IsTransactionState())
237 if (src_encoding == dest_encoding)
240 if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
246 proc = FindDefaultConversionProc(src_encoding, dest_encoding);
247 if (!OidIsValid(proc))
250 (errcode(ERRCODE_UNDEFINED_FUNCTION),
251 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
252 pg_encoding_to_char(src_encoding),
253 pg_encoding_to_char(dest_encoding))));
258 * XXX we should avoid throwing errors in OidFunctionCall. Otherwise we
259 * are going into infinite loop! So we have to make sure that the
260 * function exists before calling OidFunctionCall.
262 if (!SearchSysCacheExists(PROCOID,
263 ObjectIdGetDatum(proc),
266 elog(LOG, "cache lookup failed for function %u", proc);
270 result = palloc(len * 4 + 1);
272 OidFunctionCall5(proc,
273 Int32GetDatum(src_encoding),
274 Int32GetDatum(dest_encoding),
275 CStringGetDatum(src),
276 CStringGetDatum(result),
282 * Convert string using encoding_nanme. We assume that string's
283 * encoding is same as DB encoding.
285 * TEXT convert(TEXT string, NAME encoding_name) */
287 pg_convert(PG_FUNCTION_ARGS)
289 Datum string = PG_GETARG_DATUM(0);
290 Datum dest_encoding_name = PG_GETARG_DATUM(1);
291 Datum src_encoding_name = DirectFunctionCall1(
292 namein, CStringGetDatum(DatabaseEncoding->name));
295 result = DirectFunctionCall3(
296 pg_convert2, string, src_encoding_name, dest_encoding_name);
298 /* free memory allocated by namein */
299 pfree((void *) src_encoding_name);
301 PG_RETURN_TEXT_P(result);
305 * Convert string using encoding_name.
307 * TEXT convert2(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
310 pg_convert2(PG_FUNCTION_ARGS)
312 text *string = PG_GETARG_TEXT_P(0);
313 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
314 int src_encoding = pg_char_to_encoding(src_encoding_name);
315 char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
316 int dest_encoding = pg_char_to_encoding(dest_encoding_name);
317 unsigned char *result;
322 if (src_encoding < 0)
324 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
325 errmsg("invalid source encoding name \"%s\"",
326 src_encoding_name)));
327 if (dest_encoding < 0)
329 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
330 errmsg("invalid destination encoding name \"%s\"",
331 dest_encoding_name)));
333 /* make sure that source string is null terminated */
334 len = VARSIZE(string) - VARHDRSZ;
335 str = palloc(len + 1);
336 memcpy(str, VARDATA(string), len);
339 result = pg_do_encoding_conversion(str, len, src_encoding, dest_encoding);
341 elog(ERROR, "encoding conversion failed");
344 * build text data type structure. we cannot use textin() here, since
345 * textin assumes that input string encoding is same as database encoding.
347 len = strlen((char *) result) + VARHDRSZ;
348 retval = palloc(len);
349 VARATT_SIZEP(retval) = len;
350 memcpy(VARDATA(retval), result, len - VARHDRSZ);
356 /* free memory if allocated by the toaster */
357 PG_FREE_IF_COPY(string, 0);
359 PG_RETURN_TEXT_P(retval);
363 * convert client encoding to server encoding.
366 pg_client_to_server(const char *s, int len)
368 Assert(DatabaseEncoding);
369 Assert(ClientEncoding);
371 if (ClientEncoding->encoding == DatabaseEncoding->encoding)
374 return perform_default_encoding_conversion(s, len, true);
378 * convert server encoding to client encoding.
381 pg_server_to_client(const char *s, int len)
383 Assert(DatabaseEncoding);
384 Assert(ClientEncoding);
386 if (ClientEncoding->encoding == DatabaseEncoding->encoding)
389 return perform_default_encoding_conversion(s, len, false);
393 * Perform default encoding conversion using cached FmgrInfo. Since
394 * this function does not access database at all, it is safe to call
395 * outside transactions. Explicit setting client encoding required
396 * before calling this function. Otherwise no conversion is
400 perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
410 if (is_client_to_server)
412 src_encoding = ClientEncoding->encoding;
413 dest_encoding = DatabaseEncoding->encoding;
414 flinfo = ToServerConvProc;
418 src_encoding = DatabaseEncoding->encoding;
419 dest_encoding = ClientEncoding->encoding;
420 flinfo = ToClientConvProc;
426 if (src_encoding == dest_encoding)
429 if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
432 result = palloc(len * 4 + 1);
434 FunctionCall5(flinfo,
435 Int32GetDatum(src_encoding),
436 Int32GetDatum(dest_encoding),
437 CStringGetDatum(src),
438 CStringGetDatum(result),
443 /* convert a multibyte string to a wchar */
445 pg_mb2wchar(const char *from, pg_wchar *to)
447 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) ((const unsigned char *) from, to, strlen(from));
450 /* convert a multibyte string to a wchar with a limited length */
452 pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
454 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
457 /* returns the byte length of a multibyte word */
459 pg_mblen(const char *mbstr)
461 return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr));
464 /* returns the display length of a multibyte word */
466 pg_dsplen(const char *mbstr)
468 return ((*pg_wchar_table[DatabaseEncoding->encoding].dsplen) ((const unsigned char *) mbstr));
471 /* returns the length (counted in wchars) of a multibyte string */
473 pg_mbstrlen(const char *mbstr)
477 /* optimization for single byte encoding */
478 if (pg_database_encoding_max_length() == 1)
479 return strlen(mbstr);
483 mbstr += pg_mblen(mbstr);
489 /* returns the length (counted in wchars) of a multibyte string
490 * (not necessarily NULL terminated)
493 pg_mbstrlen_with_len(const char *mbstr, int limit)
497 /* optimization for single byte encoding */
498 if (pg_database_encoding_max_length() == 1)
501 while (limit > 0 && *mbstr)
503 int l = pg_mblen(mbstr);
513 * returns the byte length of a multibyte string
514 * (not necessarily NULL terminated)
515 * that is no longer than limit.
516 * this function does not break multibyte word boundary.
519 pg_mbcliplen(const char *mbstr, int len, int limit)
524 /* optimization for single byte encoding */
525 if (pg_database_encoding_max_length() == 1)
526 return cliplen(mbstr, len, limit);
528 while (len > 0 && *mbstr)
531 if ((clen + l) > limit)
543 * Similar to pg_mbcliplen except the limit parameter specifies the
544 * character length, not the byte length. */
546 pg_mbcharcliplen(const char *mbstr, int len, int limit)
552 /* optimization for single byte encoding */
553 if (pg_database_encoding_max_length() == 1)
554 return cliplen(mbstr, len, limit);
556 while (len > 0 && *mbstr)
570 SetDatabaseEncoding(int encoding)
572 if (!PG_VALID_BE_ENCODING(encoding))
573 elog(ERROR, "invalid database encoding");
575 DatabaseEncoding = &pg_enc2name_tbl[encoding];
576 Assert(DatabaseEncoding->encoding == encoding);
580 SetDefaultClientEncoding(void)
582 ClientEncoding = &pg_enc2name_tbl[GetDatabaseEncoding()];
586 GetDatabaseEncoding(void)
588 Assert(DatabaseEncoding);
589 return (DatabaseEncoding->encoding);
593 GetDatabaseEncodingName(void)
595 Assert(DatabaseEncoding);
596 return (DatabaseEncoding->name);
600 getdatabaseencoding(PG_FUNCTION_ARGS)
602 Assert(DatabaseEncoding);
603 return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
607 pg_client_encoding(PG_FUNCTION_ARGS)
609 Assert(ClientEncoding);
610 return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
614 cliplen(const char *str, int len, int limit)
619 for (s = str; *s; s++, l++)
621 if (l >= len || l >= limit)