From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 23 Feb 2014 21:59:05 +0000 (-0500)
Subject: Prefer pg_any_to_server/pg_server_to_any over pg_do_encoding_conversion.
X-Git-Tag: REL9_4_BETA1~432
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=769065c1b2471f484bb48bb58a8bdcf1d12a419c;p=postgresql

Prefer pg_any_to_server/pg_server_to_any over pg_do_encoding_conversion.

A large majority of the callers of pg_do_encoding_conversion were
specifying the database encoding as either source or target of the
conversion, meaning that we can use the less general functions
pg_any_to_server/pg_server_to_any instead.

The main advantage of using the latter functions is that they can make use
of a cached conversion-function lookup in the common case that the other
encoding is the current client_encoding.  It's notationally cleaner too in
most cases, not least because of the historical artifact that the latter
functions use "char *" rather than "unsigned char *" in their APIs.

Note that pg_any_to_server will apply an encoding verification step in
some cases where pg_do_encoding_conversion would have just done nothing.
This seems to me to be a good idea at most of these call sites, though
it partially negates the performance benefit.

Per discussion of bug #9210.
---

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 97f171d68e..0bd0d046c7 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -1458,11 +1458,9 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
 				{
 					char	   *enc;
 
-					enc = (char *)
-						pg_do_encoding_conversion((unsigned char *) qstr,
-												  entry->query_len,
-												  entry->encoding,
-												  GetDatabaseEncoding());
+					enc = pg_any_to_server(qstr,
+										   entry->query_len,
+										   entry->encoding);
 
 					values[i++] = CStringGetTextDatum(enc);
 
diff --git a/contrib/sslinfo/sslinfo.c b/contrib/sslinfo/sslinfo.c
index d724fe91ff..90c6b57c69 100644
--- a/contrib/sslinfo/sslinfo.c
+++ b/contrib/sslinfo/sslinfo.c
@@ -158,10 +158,7 @@ ASN1_STRING_to_text(ASN1_STRING *str)
 	nullterm = '\0';
 	BIO_write(membuf, &nullterm, 1);
 	size = BIO_get_mem_data(membuf, &sp);
-	dp = (char *) pg_do_encoding_conversion((unsigned char *) sp,
-											size - 1,
-											PG_UTF8,
-											GetDatabaseEncoding());
+	dp = pg_any_to_server(sp, size - 1, PG_UTF8);
 	result = cstring_to_text(dp);
 	if (dp != sp)
 		pfree(dp);
@@ -323,10 +320,7 @@ X509_NAME_to_text(X509_NAME *name)
 	nullterm = '\0';
 	BIO_write(membuf, &nullterm, 1);
 	size = BIO_get_mem_data(membuf, &sp);
-	dp = (char *) pg_do_encoding_conversion((unsigned char *) sp,
-											size - 1,
-											PG_UTF8,
-											GetDatabaseEncoding());
+	dp = pg_any_to_server(sp, size - 1, PG_UTF8);
 	result = cstring_to_text(dp);
 	if (dp != sp)
 		pfree(dp);
diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c
index ce5aed301b..06bd90b9aa 100644
--- a/src/backend/commands/extension.c
+++ b/src/backend/commands/extension.c
@@ -635,7 +635,6 @@ read_extension_script_file(const ExtensionControlFile *control,
 						   const char *filename)
 {
 	int			src_encoding;
-	int			dest_encoding = GetDatabaseEncoding();
 	bytea	   *content;
 	char	   *src_str;
 	char	   *dest_str;
@@ -645,7 +644,7 @@ read_extension_script_file(const ExtensionControlFile *control,
 
 	/* use database encoding if not given */
 	if (control->encoding < 0)
-		src_encoding = dest_encoding;
+		src_encoding = GetDatabaseEncoding();
 	else
 		src_encoding = control->encoding;
 
@@ -655,10 +654,7 @@ read_extension_script_file(const ExtensionControlFile *control,
 	pg_verify_mbstr_len(src_encoding, src_str, len, false);
 
 	/* convert the encoding to the database encoding */
-	dest_str = (char *) pg_do_encoding_conversion((unsigned char *) src_str,
-												  len,
-												  src_encoding,
-												  dest_encoding);
+	dest_str = pg_any_to_server(src_str, len, src_encoding);
 
 	/* if no conversion happened, we have to arrange for null termination */
 	if (dest_str == src_str)
diff --git a/src/backend/snowball/dict_snowball.c b/src/backend/snowball/dict_snowball.c
index a585c7206b..37d2966359 100644
--- a/src/backend/snowball/dict_snowball.c
+++ b/src/backend/snowball/dict_snowball.c
@@ -255,10 +255,7 @@ dsnowball_lexize(PG_FUNCTION_ARGS)
 		{
 			char	   *recoded;
 
-			recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
-														 strlen(txt),
-													   GetDatabaseEncoding(),
-														 PG_UTF8);
+			recoded = pg_server_to_any(txt, strlen(txt), PG_UTF8);
 			if (recoded != txt)
 			{
 				pfree(txt);
@@ -284,10 +281,7 @@ dsnowball_lexize(PG_FUNCTION_ARGS)
 		{
 			char	   *recoded;
 
-			recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
-														 strlen(txt),
-														 PG_UTF8,
-													  GetDatabaseEncoding());
+			recoded = pg_any_to_server(txt, strlen(txt), PG_UTF8);
 			if (recoded != txt)
 			{
 				pfree(txt);
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index d73687af9e..f9490c835d 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -209,10 +209,7 @@ t_readline(FILE *fp)
 	(void) pg_verify_mbstr(PG_UTF8, buf, len, false);
 
 	/* And convert */
-	recoded = (char *) pg_do_encoding_conversion((unsigned char *) buf,
-												 len,
-												 PG_UTF8,
-												 GetDatabaseEncoding());
+	recoded = pg_any_to_server(buf, len, PG_UTF8);
 	if (recoded == buf)
 	{
 		/*
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 0bf350adea..f34446329f 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -418,9 +418,7 @@ db_encoding_strdup(int encoding, const char *str)
 	char	   *mstr;
 
 	/* convert the string to the database encoding */
-	pstr = (char *) pg_do_encoding_conversion(
-										  (unsigned char *) str, strlen(str),
-											encoding, GetDatabaseEncoding());
+	pstr = pg_any_to_server(str, strlen(str), encoding);
 	mstr = strdup(pstr);
 	if (pstr != str)
 		pfree(pstr);
@@ -581,35 +579,32 @@ strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm
 {
 	size_t		len;
 	wchar_t		wbuf[MAX_L10N_DATA];
-	int			encoding;
-
-	encoding = GetDatabaseEncoding();
 
 	len = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
 	if (len == 0)
-
+	{
 		/*
 		 * strftime call failed - return 0 with the contents of dst
 		 * unspecified
 		 */
 		return 0;
+	}
 
 	len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen, NULL, NULL);
 	if (len == 0)
-		elog(ERROR,
-		"could not convert string to UTF-8: error code %lu", GetLastError());
+		elog(ERROR, "could not convert string to UTF-8: error code %lu",
+			 GetLastError());
 
 	dst[len] = '\0';
-	if (encoding != PG_UTF8)
+	if (GetDatabaseEncoding() != PG_UTF8)
 	{
-		char	   *convstr =
-		(char *) pg_do_encoding_conversion((unsigned char *) dst,
-										   len, PG_UTF8, encoding);
+		char	   *convstr = pg_any_to_server(dst, len, PG_UTF8);
 
-		if (dst != convstr)
+		if (convstr != dst)
 		{
 			strlcpy(dst, convstr, dstlen);
 			len = strlen(dst);
+			pfree(convstr);
 		}
 	}
 
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index d36751855b..765469c623 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -345,10 +345,7 @@ xml_recv(PG_FUNCTION_ARGS)
 	xmlFreeDoc(doc);
 
 	/* Now that we know what we're dealing with, convert to server encoding */
-	newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
-												nbytes,
-												encoding,
-												GetDatabaseEncoding());
+	newstr = pg_any_to_server(str, nbytes, encoding);
 
 	if (newstr != str)
 	{
@@ -1793,10 +1790,8 @@ sqlchar_to_unicode(char *s)
 	char	   *utf8string;
 	pg_wchar	ret[2];			/* need space for trailing zero */
 
-	utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
-													pg_mblen(s),
-													GetDatabaseEncoding(),
-													PG_UTF8);
+	/* note we're not assuming s is null-terminated */
+	utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
 
 	pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
 								  pg_encoding_mblen(PG_UTF8, utf8string));
@@ -1892,19 +1887,15 @@ map_sql_identifier_to_xml_name(char *ident, bool fully_escaped,
 static char *
 unicode_to_sqlchar(pg_wchar c)
 {
-	unsigned char utf8string[5];	/* need room for trailing zero */
+	char		utf8string[8];			/* need room for trailing zero */
 	char	   *result;
 
 	memset(utf8string, 0, sizeof(utf8string));
-	unicode_to_utf8(c, utf8string);
-
-	result = (char *) pg_do_encoding_conversion(utf8string,
-												pg_encoding_mblen(PG_UTF8,
-														(char *) utf8string),
-												PG_UTF8,
-												GetDatabaseEncoding());
-	/* if pg_do_encoding_conversion didn't strdup, we must */
-	if (result == (char *) utf8string)
+	unicode_to_utf8(c, (unsigned char *) utf8string);
+
+	result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
+	/* if pg_any_to_server didn't strdup, we must */
+	if (result == utf8string)
 		result = pstrdup(result);
 	return result;
 }
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 7f43cae69e..15cf0d806b 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -1077,7 +1077,9 @@ pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
 		char	   *utf8;
 
 		utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
-										 len, GetMessageEncoding(), PG_UTF8);
+												  len,
+												  GetMessageEncoding(),
+												  PG_UTF8);
 		if (utf8 != str)
 			len = strlen(utf8);
 
diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c
index ed6884e863..f8ccaa59e8 100644
--- a/src/pl/plperl/plperl.c
+++ b/src/pl/plperl/plperl.c
@@ -3811,9 +3811,7 @@ hv_store_string(HV *hv, const char *key, SV *val)
 	char	   *hkey;
 	SV		  **ret;
 
-	hkey = (char *)
-		pg_do_encoding_conversion((unsigned char *) key, strlen(key),
-								  GetDatabaseEncoding(), PG_UTF8);
+	hkey = pg_server_to_any(key, strlen(key), PG_UTF8);
 
 	/*
 	 * This seems nowhere documented, but under Perl 5.8.0 and up, hv_store()
@@ -3841,9 +3839,7 @@ hv_fetch_string(HV *hv, const char *key)
 	char	   *hkey;
 	SV		  **ret;
 
-	hkey = (char *)
-		pg_do_encoding_conversion((unsigned char *) key, strlen(key),
-								  GetDatabaseEncoding(), PG_UTF8);
+	hkey = pg_server_to_any(key, strlen(key), PG_UTF8);
 
 	/* See notes in hv_store_string */
 	hlen = -(int) strlen(hkey);
diff --git a/src/pl/plperl/plperl_helpers.h b/src/pl/plperl/plperl_helpers.h
index 3e8aa7c4a2..c1c7c297cc 100644
--- a/src/pl/plperl/plperl_helpers.h
+++ b/src/pl/plperl/plperl_helpers.h
@@ -9,24 +9,11 @@
 static inline char *
 utf_u2e(char *utf8_str, size_t len)
 {
-	int			enc = GetDatabaseEncoding();
 	char	   *ret;
 
-	/*
-	 * When we are in a PG_UTF8 or SQL_ASCII database
-	 * pg_do_encoding_conversion() will not do any conversion (which is good)
-	 * or verification (not so much), so we need to run the verification step
-	 * separately.
-	 */
-	if (enc == PG_UTF8 || enc == PG_SQL_ASCII)
-	{
-		pg_verify_mbstr_len(enc, utf8_str, len, false);
-		ret = utf8_str;
-	}
-	else
-		ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str,
-												 len, PG_UTF8, enc);
+	ret = pg_any_to_server(utf8_str, len, PG_UTF8);
 
+	/* ensure we have a copy even if no conversion happened */
 	if (ret == utf8_str)
 		ret = pstrdup(ret);
 
@@ -41,12 +28,14 @@ utf_u2e(char *utf8_str, size_t len)
 static inline char *
 utf_e2u(const char *str)
 {
-	char	   *ret =
-	(char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
-									   GetDatabaseEncoding(), PG_UTF8);
+	char	   *ret;
 
+	ret = pg_server_to_any(str, strlen(str), PG_UTF8);
+
+	/* ensure we have a copy even if no conversion happened */
 	if (ret == str)
 		ret = pstrdup(ret);
+
 	return ret;
 }
 
diff --git a/src/pl/plpython/plpy_util.c b/src/pl/plpython/plpy_util.c
index 95cbba5cdc..88670e66d0 100644
--- a/src/pl/plpython/plpy_util.c
+++ b/src/pl/plpython/plpy_util.c
@@ -90,11 +90,9 @@ PLyUnicode_Bytes(PyObject *unicode)
 	{
 		PG_TRY();
 		{
-			encoded = (char *) pg_do_encoding_conversion(
-												(unsigned char *) utf8string,
-														 strlen(utf8string),
-														 PG_UTF8,
-													  GetDatabaseEncoding());
+			encoded = pg_any_to_server(utf8string,
+									   strlen(utf8string),
+									   PG_UTF8);
 		}
 		PG_CATCH();
 		{
@@ -109,7 +107,7 @@ PLyUnicode_Bytes(PyObject *unicode)
 	/* finally, build a bytes object in the server encoding */
 	rv = PyBytes_FromStringAndSize(encoded, strlen(encoded));
 
-	/* if pg_do_encoding_conversion allocated memory, free it now */
+	/* if pg_any_to_server allocated memory, free it now */
 	if (utf8string != encoded)
 		pfree(encoded);
 
@@ -149,10 +147,7 @@ PLyUnicode_FromString(const char *s)
 	char	   *utf8string;
 	PyObject   *o;
 
-	utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
-													strlen(s),
-													GetDatabaseEncoding(),
-													PG_UTF8);
+	utf8string = pg_server_to_any(s, strlen(s), PG_UTF8);
 
 	o = PyUnicode_FromString(utf8string);
 
diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c
index 0538038982..b3bf65ec88 100644
--- a/src/pl/tcl/pltcl.c
+++ b/src/pl/tcl/pltcl.c
@@ -63,13 +63,17 @@
 static unsigned char *
 utf_u2e(unsigned char *src)
 {
-	return pg_do_encoding_conversion(src, strlen(src), PG_UTF8, GetDatabaseEncoding());
+	return (unsigned char *) pg_any_to_server((char *) src,
+											  strlen(src),
+											  PG_UTF8);
 }
 
 static unsigned char *
 utf_e2u(unsigned char *src)
 {
-	return pg_do_encoding_conversion(src, strlen(src), GetDatabaseEncoding(), PG_UTF8);
+	return (unsigned char *) pg_server_to_any((char *) src,
+											  strlen(src),
+											  PG_UTF8);
 }
 
 #define PLTCL_UTF