From 1efd5ff89b8bf841324245e75739800414cf47aa Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 4 Jan 2009 18:37:36 +0000 Subject: [PATCH] Add a pg_encoding_mbcliplen() function that is just like pg_mbcliplen() except the caller can specify the encoding to work in; this will be needed for pg_stat_statements. In passing, do some marginal efficiency hacking and clean up some comments. Also, prevent the single-byte-encoding code path from fetching one byte past the stated length of the string (this last is a bug that might need to be back-patched at some point). --- src/backend/utils/mb/mbutils.c | 57 +++++++++++++++++++++------------- src/include/mb/pg_wchar.h | 4 ++- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 6465afa104..4831e4da48 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -4,7 +4,7 @@ * (currently mule internal code (mic) is used) * Tatsuo Ishii * - * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.75 2008/11/11 03:01:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.76 2009/01/04 18:37:35 tgl Exp $ */ #include "postgres.h" @@ -710,14 +710,14 @@ pg_encoding_mb2wchar_with_len(int encoding, return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len); } -/* returns the byte length of a multibyte word */ +/* returns the byte length of a multibyte character */ int pg_mblen(const char *mbstr) { return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr)); } -/* returns the display length of a multibyte word */ +/* returns the display length of a multibyte character */ int pg_dsplen(const char *mbstr) { @@ -767,23 +767,37 @@ pg_mbstrlen_with_len(const char *mbstr, int limit) /* * returns the byte length of a multibyte string - * (not necessarily NULL terminated) + * (not necessarily NULL terminated) * that is no longer than limit. - * this function does not break multibyte word boundary. + * this function does not break multibyte character boundary. */ int pg_mbcliplen(const char *mbstr, int len, int limit) { + return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr, + len, limit); +} + +/* + * pg_mbcliplen with specified encoding + */ +int +pg_encoding_mbcliplen(int encoding, const char *mbstr, + int len, int limit) +{ + mblen_converter mblen_fn; int clen = 0; int l; /* optimization for single byte encoding */ - if (pg_database_encoding_max_length() == 1) + if (pg_encoding_max_length(encoding) == 1) return cliplen(mbstr, len, limit); + mblen_fn = pg_wchar_table[encoding].mblen; + while (len > 0 && *mbstr) { - l = pg_mblen(mbstr); + l = (*mblen_fn) ((const unsigned char *) mbstr); if ((clen + l) > limit) break; clen += l; @@ -797,7 +811,8 @@ pg_mbcliplen(const char *mbstr, int len, int limit) /* * Similar to pg_mbcliplen except the limit parameter specifies the - * character length, not the byte length. */ + * character length, not the byte length. + */ int pg_mbcharcliplen(const char *mbstr, int len, int limit) { @@ -822,6 +837,18 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit) return clen; } +/* mbcliplen for any single-byte encoding */ +static int +cliplen(const char *str, int len, int limit) +{ + int l = 0; + + len = Min(len, limit); + while (l < len && str[l]) + l++; + return l; +} + void SetDatabaseEncoding(int encoding) { @@ -884,17 +911,3 @@ pg_client_encoding(PG_FUNCTION_ARGS) Assert(ClientEncoding); return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name)); } - -static int -cliplen(const char *str, int len, int limit) -{ - int l = 0; - const char *s; - - for (s = str; *s; s++, l++) - { - if (l >= len || l >= limit) - return l; - } - return (s - str); -} diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index e9a26d4843..dc89af9d50 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.81 2009/01/01 17:23:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.82 2009/01/04 18:37:36 tgl Exp $ * * NOTES * This is used both by the backend and by libpq, but should not be @@ -358,6 +358,8 @@ extern int pg_mic_mblen(const unsigned char *mbstr); extern int pg_mbstrlen(const char *mbstr); extern int pg_mbstrlen_with_len(const char *mbstr, int len); extern int pg_mbcliplen(const char *mbstr, int len, int limit); +extern int pg_encoding_mbcliplen(int encoding, const char *mbstr, + int len, int limit); extern int pg_mbcharcliplen(const char *mbstr, int len, int imit); extern int pg_encoding_max_length(int encoding); extern int pg_database_encoding_max_length(void); -- 2.40.0