elsewhere.
Similarly rename the version in mbprint.c, not because this affects anything
but just to keep the two copies in exact sync. There was some discussion of
having only one copy in src/port/ instead, but this function is so small
and unlikely to change that that seems like overkill.
Slightly editorialized version of a patch by Joseph Adams. (The bug-fix
aspect of his patch was applied separately, and back-patched.)
/*
* conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii
- * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.74 2010/01/04 20:38:31 adunstan Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.75 2010/08/18 19:54:01 tgl Exp $
*
*/
/* can be used in either frontend or backend */
* We return "1" for any leading byte that is either flat-out illegal or
* indicates a length larger than we support.
*
- * pg_utf2wchar_with_len(), utf2ucs(), pg_utf8_islegal(), and perhaps
+ * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
* other places would need to be fixed to change this.
*/
int
(ucs >= 0x20000 && ucs <= 0x2ffff)));
}
-static pg_wchar
-utf2ucs(const unsigned char *c)
+/*
+ * Convert a UTF-8 character to a Unicode code point.
+ * This is a one-character version of pg_utf2wchar_with_len.
+ *
+ * No error checks here, c must point to a long-enough string.
+ */
+pg_wchar
+utf8_to_unicode(const unsigned char *c)
{
- /*
- * one char version of pg_utf2wchar_with_len. no control here, c must
- * point to a large enough string
- */
if ((*c & 0x80) == 0)
return (pg_wchar) c[0];
else if ((*c & 0xe0) == 0xc0)
static int
pg_utf_dsplen(const unsigned char *s)
{
- return ucs_wcwidth(utf2ucs(s));
+ return ucs_wcwidth(utf8_to_unicode(s));
}
/*
*
* Copyright (c) 2000-2010, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.39 2010/08/16 00:06:18 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.40 2010/08/18 19:54:01 tgl Exp $
*
* XXX this file does not really belong in psql/. Perhaps move to libpq?
* It also seems that the mbvalidate function is redundant with existing
#define PG_UTF8 pg_get_utf8_id()
+/*
+ * Convert a UTF-8 character to a Unicode code point.
+ * This is a one-character version of pg_utf2wchar_with_len.
+ *
+ * No error checks here, c must point to a long-enough string.
+ */
static pg_wchar
-utf2ucs(const unsigned char *c)
+utf8_to_unicode(const unsigned char *c)
{
- /*
- * one char version of pg_utf2wchar_with_len. no control here, c must
- * point to a large enough string
- */
if ((*c & 0x80) == 0)
return (pg_wchar) c[0];
else if ((*c & 0xe0) == 0xc0)
else if (w < 0) /* Non-ascii control char */
{
if (encoding == PG_UTF8)
- sprintf((char *) ptr, "\\u%04X", utf2ucs(pwcs));
+ sprintf((char *) ptr, "\\u%04X", utf8_to_unicode(pwcs));
else
{
/*
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.94 2010/02/26 02:01:25 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.95 2010/08/18 19:54:01 tgl Exp $
*
* NOTES
* This is used both by the backend and by libpq, but should not be
extern int pg_valid_server_encoding(const char *name);
extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string);
+extern pg_wchar utf8_to_unicode(const unsigned char *c);
extern int pg_utf_mblen(const unsigned char *);
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
int src_encoding,