* client encoding and server internal encoding.
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
- * $Id: mbutils.c,v 1.18 2001/07/15 11:07:36 ishii Exp $
+ * $Id: mbutils.c,v 1.19 2001/08/15 07:07:40 ishii Exp $
*/
#include "postgres.h"
}
/*
- * set the client encoding. if encoding conversion between
- * client/server encoding is not supported, returns -1
+ * Find appropriate encoding conversion functions. If no such
+ * functions found, returns -1.
+ *
+ * Arguments:
+ *
+ * src, dest (in): source and destination encoding ids
+ *
+ * src_to_mic (out): pointer to a function which converts src to
+ * mic/unicode according to dest. if src == mic/unicode or no
+ * appropriate function found, set to 0.
+ *
+ * dest_from_mic (out): pointer to a function which converts
+ * mic/unicode to dest according to src. if dest == mic/unicode or no
+ * appropriate function found, set to 0.
*/
int
-pg_set_client_encoding(int encoding)
+pg_find_encoding_converters(int src, int dest, void (**src_to_mic)(), void (**dest_from_mic)())
{
- int current_server_encoding = GetDatabaseEncoding();
-
- client_encoding = encoding;
-
- if (client_encoding == current_server_encoding)
- { /* server == client? */
- client_to_mic = client_from_mic = 0;
- server_to_mic = server_from_mic = 0;
+ if (src == dest)
+ { /* src == dest? */
+ *src_to_mic = *dest_from_mic = 0;
}
- else if (current_server_encoding == MULE_INTERNAL)
- { /* server == MULE_INETRNAL? */
- client_to_mic = pg_get_enc_ent(encoding)->to_mic;
- client_from_mic = pg_get_enc_ent(encoding)->from_mic;
- server_to_mic = server_from_mic = 0;
- if (client_to_mic == 0 || client_from_mic == 0)
+ else if (src == MULE_INTERNAL)
+ { /* src == MULE_INETRNAL? */
+ *dest_from_mic = pg_get_enc_ent(dest)->from_mic;
+ if (*dest_from_mic == 0)
return (-1);
+ *src_to_mic = 0;
}
- else if (encoding == MULE_INTERNAL)
- { /* client == MULE_INETRNAL? */
- client_to_mic = client_from_mic = 0;
- server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic;
- server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic;
- if (server_to_mic == 0 || server_from_mic == 0)
+ else if (dest == MULE_INTERNAL)
+ { /* dest == MULE_INETRNAL? */
+ *src_to_mic = pg_get_enc_ent(src)->to_mic;
+ if (*src_to_mic == 0)
return (-1);
+ *dest_from_mic = 0;
}
- else if (current_server_encoding == UNICODE)
- { /* server == UNICODE? */
- client_to_mic = pg_get_enc_ent(encoding)->to_unicode;
- client_from_mic = pg_get_enc_ent(encoding)->from_unicode;
- server_to_mic = server_from_mic = 0;
- if (client_to_mic == 0 || client_from_mic == 0)
+ else if (src == UNICODE)
+ { /* src == UNICODE? */
+ *dest_from_mic = pg_get_enc_ent(dest)->from_unicode;
+ if (*dest_from_mic == 0)
return (-1);
+ *src_to_mic = 0;
}
- else if (encoding == UNICODE)
- { /* client == UNICODE? */
- client_to_mic = client_from_mic = 0;
- server_to_mic = pg_get_enc_ent(current_server_encoding)->to_unicode;
- server_from_mic = pg_get_enc_ent(current_server_encoding)->from_unicode;
- if (server_to_mic == 0 || server_from_mic == 0)
+ else if (dest == UNICODE)
+ { /* dest == UNICODE? */
+ *src_to_mic = pg_get_enc_ent(src)->to_unicode;
+ if (*src_to_mic == 0)
return (-1);
+ *dest_from_mic = 0;
}
else
{
- client_to_mic = pg_get_enc_ent(encoding)->to_mic;
- client_from_mic = pg_get_enc_ent(encoding)->from_mic;
- server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic;
- server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic;
- if (client_to_mic == 0 || client_from_mic == 0)
- return (-1);
- if (server_to_mic == 0 || server_from_mic == 0)
+ *src_to_mic = pg_get_enc_ent(src)->to_mic;
+ *dest_from_mic = pg_get_enc_ent(dest)->from_mic;
+ if (*src_to_mic == 0 || *dest_from_mic == 0)
return (-1);
}
return (0);
}
+/*
+ * set the client encoding. if encoding conversion between
+ * client/server encoding is not supported, returns -1
+ */
+int
+pg_set_client_encoding(int encoding)
+{
+ int current_server_encoding = GetDatabaseEncoding();
+
+ if (pg_find_encoding_converters(encoding, current_server_encoding, &client_to_mic, &server_from_mic) < 0)
+ return (-1);
+ client_encoding = encoding;
+
+ if (pg_find_encoding_converters(current_server_encoding, encoding, &server_to_mic, &client_from_mic) < 0)
+ return (-1);
+ return 0;
+}
+
/*
* returns the current client encoding
*/
}
/*
- * convert client encoding to server encoding.
+ * Convert src encoding and returns it. Actual conversion is done by
+ * src_to_mic and dest_from_mic, which can be obtained by
+ * pg_find_encoding_converters(). The reason we require two conversion
+ * functions is that we have an intermediate encoding: MULE_INTERNAL
+ * Using intermediate encodings will reduce the number of functions
+ * doing encoding conversions. Special case is either src or dest is
+ * the intermediate encoding itself. In this case, you don't need src
+ * or dest (setting 0 will indicate there's no conversion
+ * function). Another case is you have direct-conversion function from
+ * src to dest. In this case either src_to_mic or dest_from_mic could
+ * be set to 0 also.
+ *
+ * Note that If src or dest is UNICODE, we have to do
+ * direct-conversion, since we don't support conversion bwteen UNICODE
+ * and MULE_INTERNAL, we cannot go through MULE_INTERNAL.
*
* CASE 1: if no conversion is required, then the given pointer s is returned.
*
* to determine whether to pfree the result or not!
*
* Note: we assume that conversion cannot cause more than a 4-to-1 growth
- * in the length of the string --- is this enough?
- */
+ * in the length of the string --- is this enough? */
+
unsigned char *
-pg_client_to_server(unsigned char *s, int len)
+pg_do_encoding_conversion(unsigned char *src, int len, void (*src_to_mic)(), void (*dest_from_mic)())
{
- unsigned char *result = s;
+ unsigned char *result = src;
unsigned char *buf;
- if (client_encoding == GetDatabaseEncoding())
- return result;
- if (client_to_mic)
+ if (src_to_mic)
{
buf = (unsigned char *) palloc(len * 4 + 1);
- (*client_to_mic) (result, buf, len);
+ (*src_to_mic) (result, buf, len);
result = buf;
len = strlen(result);
}
- if (server_from_mic)
+ if (dest_from_mic)
{
buf = (unsigned char *) palloc(len * 4 + 1);
- (*server_from_mic) (result, buf, len);
- if (result != s)
+ (*dest_from_mic) (result, buf, len);
+ if (result != src)
pfree(result); /* release first buffer */
result = buf;
}
return result;
}
+/*
+ * Convert string using encoding_nanme. We assume that string's
+ * encoding is same as DB encoding.
+ *
+ * TEXT convert(TEXT string, NAME encoding_name)
+ */
+Datum
+pg_convert(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_P(0);
+ Name s = PG_GETARG_NAME(1);
+ int encoding = pg_char_to_encoding(NameStr(*s));
+ int db_encoding = GetDatabaseEncoding();
+ void (*src)(), (*dest)();
+ unsigned char *result;
+ text *retval;
+
+ if (encoding < 0)
+ elog(ERROR, "Invalid encoding name %s", NameStr(*s));
+
+ if (pg_find_encoding_converters(db_encoding, encoding, &src, &dest) < 0)
+ {
+ char *encoding_name = (char *)pg_encoding_to_char(db_encoding);
+ elog(ERROR, "Conversion from %s to %s is not possible", NameStr(*s), encoding_name);
+ }
+
+ result = pg_do_encoding_conversion(VARDATA(string), VARSIZE(string)-VARHDRSZ,
+ src, dest);
+ if (result == NULL)
+ elog(ERROR, "Encoding conversion failed");
+
+ retval = DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(result)));
+ if (result != (unsigned char *)VARDATA(string))
+ pfree(result);
+
+ /* free memory if allocated by the toaster */
+ PG_FREE_IF_COPY(string, 0);
+
+ PG_RETURN_TEXT_P(retval);
+}
+
+/*
+ * Convert string using encoding_nanme.
+ *
+ * TEXT convert(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
+ */
+Datum
+pg_convert2(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_P(0);
+ char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
+ int src_encoding = pg_char_to_encoding(src_encoding_name);
+ char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
+ int dest_encoding = pg_char_to_encoding(dest_encoding_name);
+ void (*src)(), (*dest)();
+ unsigned char *result;
+ text *retval;
+
+ if (src_encoding < 0)
+ elog(ERROR, "Invalid source encoding name %s", src_encoding_name);
+ if (dest_encoding < 0)
+ elog(ERROR, "Invalid destination encoding name %s", dest_encoding_name);
+
+ if (pg_find_encoding_converters(src_encoding, dest_encoding, &src, &dest) < 0)
+ {
+ elog(ERROR, "Conversion from %s to %s is not possible",
+ src_encoding_name, dest_encoding_name);
+ }
+
+ result = pg_do_encoding_conversion(VARDATA(string), VARSIZE(string)-VARHDRSZ,
+ src, dest);
+ if (result == NULL)
+ elog(ERROR, "Encoding conversion failed");
+
+ retval = DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(result)));
+ if (result != (unsigned char *)VARDATA(string))
+ pfree(result);
+
+ /* free memory if allocated by the toaster */
+ PG_FREE_IF_COPY(string, 0);
+
+ PG_RETURN_TEXT_P(retval);
+}
+
+/*
+ * convert client encoding to server encoding.
+ *
+ * CASE 1: if no conversion is required, then the given pointer s is returned.
+ *
+ * CASE 2: if conversion is required, a palloc'd string is returned.
+ *
+ * Callers must check whether return value differs from passed value
+ * to determine whether to pfree the result or not!
+ *
+ * Note: we assume that conversion cannot cause more than a 4-to-1 growth
+ * in the length of the string --- is this enough?
+ */
+unsigned char *
+pg_client_to_server(unsigned char *s, int len)
+{
+ if (client_encoding == GetDatabaseEncoding())
+ return s;
+
+ return pg_do_encoding_conversion(s, len, client_to_mic, server_from_mic);
+}
+
/*
* convert server encoding to client encoding.
*
unsigned char *
pg_server_to_client(unsigned char *s, int len)
{
- unsigned char *result = s;
- unsigned char *buf;
-
if (client_encoding == GetDatabaseEncoding())
- return result;
- if (server_to_mic)
- {
- buf = (unsigned char *) palloc(len * 4 + 1);
- (*server_to_mic) (result, buf, len);
- result = buf;
- len = strlen(result);
- }
- if (client_from_mic)
- {
- buf = (unsigned char *) palloc(len * 4 + 1);
- (*client_from_mic) (result, buf, len);
- if (result != s)
- pfree(result); /* release first buffer */
- result = buf;
- }
- return result;
+ return s;
+
+ return pg_do_encoding_conversion(s, len, server_to_mic, client_from_mic);
}
/* convert a multi-byte string to a wchar */