From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 4 Jan 2009 18:37:36 +0000 (+0000)
Subject: Add a pg_encoding_mbcliplen() function that is just like pg_mbcliplen()
X-Git-Tag: REL8_4_BETA1~483
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1efd5ff89b8bf841324245e75739800414cf47aa;p=postgresql

Add a pg_encoding_mbcliplen() function that is just like pg_mbcliplen()
except the caller can specify the encoding to work in; this will be needed
for pg_stat_statements.  In passing, do some marginal efficiency hacking
and clean up some comments.  Also, prevent the single-byte-encoding code
path from fetching one byte past the stated length of the string (this
last is a bug that might need to be back-patched at some point).
---

diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 6465afa104..4831e4da48 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -4,7 +4,7 @@
  * (currently mule internal code (mic) is used)
  * Tatsuo Ishii
  *
- * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.75 2008/11/11 03:01:20 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.76 2009/01/04 18:37:35 tgl Exp $
  */
 #include "postgres.h"
 
@@ -710,14 +710,14 @@ pg_encoding_mb2wchar_with_len(int encoding,
 	return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
 }
 
-/* returns the byte length of a multibyte word */
+/* returns the byte length of a multibyte character */
 int
 pg_mblen(const char *mbstr)
 {
 	return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr));
 }
 
-/* returns the display length of a multibyte word */
+/* returns the display length of a multibyte character */
 int
 pg_dsplen(const char *mbstr)
 {
@@ -767,23 +767,37 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)
 
 /*
  * returns the byte length of a multibyte string
- * (not necessarily  NULL terminated)
+ * (not necessarily NULL terminated)
  * that is no longer than limit.
- * this function does not break multibyte word boundary.
+ * this function does not break multibyte character boundary.
  */
 int
 pg_mbcliplen(const char *mbstr, int len, int limit)
 {
+	return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
+								 len, limit);
+}
+
+/*
+ * pg_mbcliplen with specified encoding
+ */
+int
+pg_encoding_mbcliplen(int encoding, const char *mbstr,
+					  int len, int limit)
+{
+	mblen_converter mblen_fn;
 	int			clen = 0;
 	int			l;
 
 	/* optimization for single byte encoding */
-	if (pg_database_encoding_max_length() == 1)
+	if (pg_encoding_max_length(encoding) == 1)
 		return cliplen(mbstr, len, limit);
 
+	mblen_fn = pg_wchar_table[encoding].mblen;
+
 	while (len > 0 && *mbstr)
 	{
-		l = pg_mblen(mbstr);
+		l = (*mblen_fn) ((const unsigned char *) mbstr);
 		if ((clen + l) > limit)
 			break;
 		clen += l;
@@ -797,7 +811,8 @@ pg_mbcliplen(const char *mbstr, int len, int limit)
 
 /*
  * Similar to pg_mbcliplen except the limit parameter specifies the
- * character length, not the byte length.  */
+ * character length, not the byte length.
+ */
 int
 pg_mbcharcliplen(const char *mbstr, int len, int limit)
 {
@@ -822,6 +837,18 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit)
 	return clen;
 }
 
+/* mbcliplen for any single-byte encoding */
+static int
+cliplen(const char *str, int len, int limit)
+{
+	int			l = 0;
+
+	len = Min(len, limit);
+	while (l < len && str[l])
+		l++;
+	return l;
+}
+
 void
 SetDatabaseEncoding(int encoding)
 {
@@ -884,17 +911,3 @@ pg_client_encoding(PG_FUNCTION_ARGS)
 	Assert(ClientEncoding);
 	return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
 }
-
-static int
-cliplen(const char *str, int len, int limit)
-{
-	int			l = 0;
-	const char *s;
-
-	for (s = str; *s; s++, l++)
-	{
-		if (l >= len || l >= limit)
-			return l;
-	}
-	return (s - str);
-}
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index e9a26d4843..dc89af9d50 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.81 2009/01/01 17:23:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.82 2009/01/04 18:37:36 tgl Exp $
  *
  *	NOTES
  *		This is used both by the backend and by libpq, but should not be
@@ -358,6 +358,8 @@ extern int	pg_mic_mblen(const unsigned char *mbstr);
 extern int	pg_mbstrlen(const char *mbstr);
 extern int	pg_mbstrlen_with_len(const char *mbstr, int len);
 extern int	pg_mbcliplen(const char *mbstr, int len, int limit);
+extern int	pg_encoding_mbcliplen(int encoding, const char *mbstr,
+								  int len, int limit);
 extern int	pg_mbcharcliplen(const char *mbstr, int len, int imit);
 extern int	pg_encoding_max_length(int encoding);
 extern int	pg_database_encoding_max_length(void);