2 * This file contains public functions for conversion between
3 * client encoding and server internal encoding.
4 * (currently mule internal code (mic) is used)
6 * $Id: mbutils.c,v 1.32 2002/08/19 04:08:08 ishii Exp $
9 #include "access/xact.h"
10 #include "miscadmin.h"
11 #include "mb/pg_wchar.h"
12 #include "utils/builtins.h"
13 #include "utils/memutils.h"
14 #include "utils/syscache.h"
15 #include "catalog/namespace.h"
18 * We handle for actual FE and BE encoding setting encoding-identificator
19 * and encoding-name too. It prevent searching and conversion from encoding
20 * to encoding name in getdatabaseencoding() and other routines.
22 static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
23 static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
26 * Caches for conversion function info. Note that Fcinfo.flinfo is
27 * allocated in TopMemoryContext so that it survives outside
28 * transactions. See SetClientEncoding() for more details.
30 static FmgrInfo *ToServerConvPorc = NULL;
31 static FmgrInfo *ToClientConvPorc = NULL;
33 /* Internal functions */
34 static unsigned char *
35 perform_default_encoding_conversion(unsigned char *src, int len, bool is_client_to_server);
38 * Set the client encoding and save fmgrinfo for the converion
39 * function if necessary. if encoding conversion between client/server
40 * encoding is not supported, returns -1
43 SetClientEncoding(int encoding, bool doit)
45 int current_server_encoding;
46 Oid to_server_proc, to_client_proc;
47 FmgrInfo *to_server = NULL;
48 FmgrInfo *to_client = NULL;
49 MemoryContext oldcontext;
51 current_server_encoding = GetDatabaseEncoding();
53 if (!PG_VALID_FE_ENCODING(encoding))
56 if (current_server_encoding == encoding ||
57 (current_server_encoding == PG_SQL_ASCII || encoding == PG_SQL_ASCII))
59 ClientEncoding = &pg_enc2name_tbl[encoding];
63 /* XXX We cannot use FindDefaultConversionProc() while in
64 * bootstrap or initprocessing mode since namespace functions will
67 if (IsTransactionState())
69 to_server_proc = FindDefaultConversionProc(encoding, current_server_encoding);
70 to_client_proc = FindDefaultConversionProc(current_server_encoding, encoding);
72 if (!OidIsValid(to_server_proc) || !OidIsValid(to_client_proc))
76 * load the fmgr info into TopMemoryContext so that it
77 * survives outside transaction.
79 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
80 to_server = palloc(sizeof(FmgrInfo));
81 to_client = palloc(sizeof(FmgrInfo));
82 fmgr_info(to_server_proc, to_server);
83 fmgr_info(to_client_proc, to_client);
84 MemoryContextSwitchTo(oldcontext);
90 if (IsTransactionState())
92 ClientEncoding = &pg_enc2name_tbl[encoding];
94 if(ToServerConvPorc != NULL)
96 if (ToServerConvPorc->fn_extra)
97 pfree(ToServerConvPorc->fn_extra);
98 pfree(ToServerConvPorc);
100 ToServerConvPorc = to_server;
102 if(ToClientConvPorc != NULL)
104 if (ToClientConvPorc->fn_extra)
105 pfree(ToClientConvPorc->fn_extra);
106 pfree(ToClientConvPorc);
108 ToClientConvPorc = to_client;
114 * returns the current client encoding */
116 pg_get_client_encoding(void)
118 Assert(ClientEncoding);
119 return (ClientEncoding->encoding);
123 * returns the current client encoding name
126 pg_get_client_encoding_name(void)
128 Assert(ClientEncoding);
129 return (ClientEncoding->name);
133 * Apply encoding conversion on src and return it. The encoding
134 * conversion function is chosen from the pg_conversion system catalog
135 * marked as "default". If it is not found in the schema search path,
136 * it's taken from pg_catalog schema. If it even is not in the schema,
137 * warn and returns src. We cannot raise an error, since it will cause
138 * an infinit loop in error message sending.
140 * In the case of no coversion, src is returned.
142 * XXX We assume that storage for converted result is 4-to-1 growth in
143 * the worst case. The rate for currently supported encoding pares are within 3
144 * (SJIS JIS X0201 half width kanna -> UTF-8 is the worst case).
145 * So "4" should be enough for the moment.
148 pg_do_encoding_conversion(unsigned char *src, int len,
149 int src_encoding, int dest_encoding)
151 unsigned char *result;
154 if (!IsTransactionState())
157 if (src_encoding == dest_encoding)
160 if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
163 proc = FindDefaultConversionProc(src_encoding, dest_encoding);
164 if (!OidIsValid(proc))
166 elog(LOG, "default conversion proc for %s to %s not found",
167 pg_encoding_to_char(src_encoding), pg_encoding_to_char(dest_encoding));
171 /* XXX we shoud avoid throwing errors in OidFuctionCall. Otherwise
172 * we are going into inifinite loop! So we have to make sure that
173 * the function exists before calling OidFunctionCall.
175 if (!SearchSysCacheExists(PROCOID,
176 ObjectIdGetDatum(proc),
179 elog(LOG, "default conversion proc %u for %s to %s not found in pg_proc",
181 pg_encoding_to_char(src_encoding), pg_encoding_to_char(dest_encoding));
185 result = palloc(len * 4 + 1);
187 OidFunctionCall5(proc,
188 Int32GetDatum(src_encoding),
189 Int32GetDatum(dest_encoding),
190 CStringGetDatum(src),
191 CStringGetDatum(result),
197 * Convert string using encoding_nanme. We assume that string's
198 * encoding is same as DB encoding.
200 * TEXT convert(TEXT string, NAME encoding_name) */
202 pg_convert(PG_FUNCTION_ARGS)
204 Datum string = PG_GETARG_DATUM(0);
205 Datum dest_encoding_name = PG_GETARG_DATUM(1);
206 Datum src_encoding_name = DirectFunctionCall1(
207 namein, CStringGetDatum(DatabaseEncoding->name));
210 result = DirectFunctionCall3(
211 pg_convert2, string, src_encoding_name, dest_encoding_name);
213 /* free memory allocated by namein */
214 pfree((void *)src_encoding_name);
216 PG_RETURN_TEXT_P(result);
220 * Convert string using encoding_nanme.
222 * TEXT convert2(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
225 pg_convert2(PG_FUNCTION_ARGS)
227 text *string = PG_GETARG_TEXT_P(0);
228 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
229 int src_encoding = pg_char_to_encoding(src_encoding_name);
230 char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
231 int dest_encoding = pg_char_to_encoding(dest_encoding_name);
232 unsigned char *result;
237 if (src_encoding < 0)
238 elog(ERROR, "Invalid source encoding name %s", src_encoding_name);
239 if (dest_encoding < 0)
240 elog(ERROR, "Invalid destination encoding name %s", dest_encoding_name);
242 /* make sure that source string is null terminated */
243 len = VARSIZE(string) - VARHDRSZ;
244 str = palloc(len + 1);
245 memcpy(str, VARDATA(string), len);
248 result = pg_do_encoding_conversion(str, len, src_encoding, dest_encoding);
250 elog(ERROR, "Encoding conversion failed");
252 /* build text data type structre. we cannot use textin() here,
253 since textin assumes that input string encoding is same as
254 database encoding. */
255 len = strlen(result) + VARHDRSZ;
256 retval = palloc(len);
257 VARATT_SIZEP(retval) = len;
258 memcpy(VARDATA(retval), result, len - VARHDRSZ);
264 /* free memory if allocated by the toaster */
265 PG_FREE_IF_COPY(string, 0);
267 PG_RETURN_TEXT_P(retval);
271 * convert client encoding to server encoding.
274 pg_client_to_server(unsigned char *s, int len)
276 Assert(DatabaseEncoding);
277 Assert(ClientEncoding);
279 if (ClientEncoding->encoding == DatabaseEncoding->encoding)
282 return perform_default_encoding_conversion(s, len, true);
286 * convert server encoding to client encoding.
289 pg_server_to_client(unsigned char *s, int len)
291 Assert(DatabaseEncoding);
292 Assert(ClientEncoding);
294 if (ClientEncoding->encoding == DatabaseEncoding->encoding)
297 return perform_default_encoding_conversion(s, len, false);
301 * Perform default encoding conversion using cached FmgrInfo. Since
302 * this function does not access database at all, it is safe to call
303 * outside transactions. Explicit setting client encoding required
304 * before calling this function. Otherwise no conversion is
307 static unsigned char *
308 perform_default_encoding_conversion(unsigned char *src, int len, bool is_client_to_server)
310 unsigned char *result;
311 int src_encoding, dest_encoding;
314 if (is_client_to_server)
316 src_encoding = ClientEncoding->encoding;
317 dest_encoding = DatabaseEncoding->encoding;
318 flinfo = ToServerConvPorc;
322 src_encoding = DatabaseEncoding->encoding;
323 dest_encoding = ClientEncoding->encoding;
324 flinfo = ToClientConvPorc;
330 if (src_encoding == dest_encoding)
333 if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
336 result = palloc(len * 4 + 1);
338 FunctionCall5(flinfo,
339 Int32GetDatum(src_encoding),
340 Int32GetDatum(dest_encoding),
341 CStringGetDatum(src),
342 CStringGetDatum(result),
347 /* convert a multi-byte string to a wchar */
349 pg_mb2wchar(const unsigned char *from, pg_wchar *to)
351 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) (from, to, strlen(from));
354 /* convert a multi-byte string to a wchar with a limited length */
356 pg_mb2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
358 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) (from, to, len);
361 /* returns the byte length of a multi-byte word */
363 pg_mblen(const unsigned char *mbstr)
365 return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) (mbstr));
368 /* returns the length (counted as a wchar) of a multi-byte string */
370 pg_mbstrlen(const unsigned char *mbstr)
376 mbstr += pg_mblen(mbstr);
382 /* returns the length (counted as a wchar) of a multi-byte string
383 (not necessarily NULL terminated) */
385 pg_mbstrlen_with_len(const unsigned char *mbstr, int limit)
390 while (limit > 0 && *mbstr)
401 * returns the byte length of a multi-byte string
402 * (not necessarily NULL terminated)
403 * that is no longer than limit.
404 * this function does not break multi-byte word boundary.
407 pg_mbcliplen(const unsigned char *mbstr, int len, int limit)
412 while (len > 0 && *mbstr)
415 if ((clen + l) > limit)
427 * Similar to pg_mbcliplen but the limit parameter specifies the
428 * character length, not the byte length. */
430 pg_mbcharcliplen(const unsigned char *mbstr, int len, int limit)
436 while (len > 0 && *mbstr)
450 SetDatabaseEncoding(int encoding)
452 if (!PG_VALID_BE_ENCODING(encoding))
453 elog(ERROR, "SetDatabaseEncoding(): invalid database encoding");
455 DatabaseEncoding = &pg_enc2name_tbl[encoding];
456 Assert(DatabaseEncoding->encoding == encoding);
460 SetDefaultClientEncoding()
462 ClientEncoding = &pg_enc2name_tbl[GetDatabaseEncoding()];
466 GetDatabaseEncoding(void)
468 Assert(DatabaseEncoding);
469 return (DatabaseEncoding->encoding);
473 GetDatabaseEncodingName(void)
475 Assert(DatabaseEncoding);
476 return (DatabaseEncoding->name);
480 getdatabaseencoding(PG_FUNCTION_ARGS)
482 Assert(DatabaseEncoding);
483 return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
487 pg_client_encoding(PG_FUNCTION_ARGS)
489 Assert(ClientEncoding);
490 return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));