From e8c32050371d06c0f61fed02acc554fe7a7e8c66 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Mon, 15 Mar 2004 10:41:26 +0000 Subject: [PATCH] Add PQmbdsplen() which returns the "display length" of a character. Still some works needed: - UTF-8, MULE_INTERNAL always returns 1 --- src/backend/utils/mb/mbutils.c | 9 +- src/backend/utils/mb/wchar.c | 267 +++++++++++++++++++++++++++----- src/bin/psql/common.c | 6 +- src/include/mb/pg_wchar.h | 7 +- src/interfaces/libpq/fe-misc.c | 12 +- src/interfaces/libpq/libpq-fe.h | 5 +- 6 files changed, 261 insertions(+), 45 deletions(-) diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 3838a0a097..96dd563862 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -4,7 +4,7 @@ * (currently mule internal code (mic) is used) * Tatsuo Ishii * - * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.45 2003/11/29 19:52:02 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.46 2004/03/15 10:41:25 ishii Exp $ */ #include "postgres.h" @@ -463,6 +463,13 @@ pg_mblen(const unsigned char *mbstr) return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) (mbstr)); } +/* returns the display length of a multibyte word */ +int +pg_dsplen(const unsigned char *mbstr) +{ + return ((*pg_wchar_table[DatabaseEncoding->encoding].dsplen) (mbstr)); +} + /* returns the length (counted as a wchar) of a multibyte string */ int pg_mbstrlen(const unsigned char *mbstr) diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index b2d48c9d8c..f08cffa9f4 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multibyte streams. * Tatsuo Ishii - * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.35 2003/11/29 22:39:59 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.36 2004/03/15 10:41:25 ishii Exp $ * * WIN1250 client encoding updated by Pavel Behal * @@ -49,6 +49,12 @@ pg_ascii_mblen(const unsigned char *s) return (1); } +static int +pg_ascii_dsplen(const unsigned char *s) +{ + return (1); +} + /* * EUC */ @@ -107,6 +113,22 @@ pg_euc_mblen(const unsigned char *s) return (len); } +static int +pg_euc_dsplen(const unsigned char *s) +{ + int len; + + if (*s == SS2) + len = 2; + else if (*s == SS3) + len = 2; + else if (*s & 0x80) + len = 2; + else + len = 1; + return (len); +} + /* * EUC_JP */ @@ -122,6 +144,22 @@ pg_eucjp_mblen(const unsigned char *s) return (pg_euc_mblen(s)); } +static int +pg_eucjp_dsplen(const unsigned char *s) +{ + int len; + + if (*s == SS2) + len = 1; + else if (*s == SS3) + len = 2; + else if (*s & 0x80) + len = 2; + else + len = 1; + return (len); +} + /* * EUC_KR */ @@ -137,6 +175,12 @@ pg_euckr_mblen(const unsigned char *s) return (pg_euc_mblen(s)); } +static int +pg_euckr_dsplen(const unsigned char *s) +{ + return (pg_euc_dsplen(s)); +} + /* * EUC_CN */ @@ -191,6 +235,18 @@ pg_euccn_mblen(const unsigned char *s) return (len); } +static int +pg_euccn_dsplen(const unsigned char *s) +{ + int len; + + if (*s & 0x80) + len = 2; + else + len = 1; + return (len); +} + /* * EUC_TW */ @@ -250,6 +306,22 @@ pg_euctw_mblen(const unsigned char *s) return (len); } +static int +pg_euctw_dsplen(const unsigned char *s) +{ + int len; + + if (*s == SS2) + len = 2; + else if (*s == SS3) + len = 2; + else if (*s & 0x80) + len = 2; + else + len = 1; + return (len); +} + /* * JOHAB */ @@ -265,6 +337,12 @@ pg_johab_mblen(const unsigned char *s) return (pg_euc_mblen(s)); } +static int +pg_johab_dsplen(const unsigned char *s) +{ + return (pg_euc_dsplen(s)); +} + /* * convert UTF-8 string to pg_wchar (UCS-2) * caller should allocate enough space for "to" @@ -333,6 +411,12 @@ pg_utf_mblen(const unsigned char *s) return (len); } +static int +pg_utf_dsplen(const unsigned char *s) +{ + return 1; /* XXX fix me! */ +} + /* * convert mule internal code to pg_wchar * caller should allocate enough space for "to" @@ -406,6 +490,12 @@ pg_mule_mblen(const unsigned char *s) return (len); } +static int +pg_mule_dsplen(const unsigned char *s) +{ + return 1; /* XXX fix me! */ +} + /* * ISO8859-1 */ @@ -430,6 +520,12 @@ pg_latin1_mblen(const unsigned char *s) return (1); } +static int +pg_latin1_dsplen(const unsigned char *s) +{ + return (1); +} + /* * SJIS */ @@ -453,6 +549,26 @@ pg_sjis_mblen(const unsigned char *s) return (len); } +static int +pg_sjis_dsplen(const unsigned char *s) +{ + int len; + + if (*s >= 0xa1 && *s <= 0xdf) + { /* 1 byte kana? */ + len = 1; + } + else if (*s > 0x7f) + { /* kanji? */ + len = 2; + } + else + { /* should be ASCII */ + len = 1; + } + return (len); +} + /* * Big5 */ @@ -472,6 +588,22 @@ pg_big5_mblen(const unsigned char *s) return (len); } +static int +pg_big5_dsplen(const unsigned char *s) +{ + int len; + + if (*s > 0x7f) + { /* kanji? */ + len = 2; + } + else + { /* should be ASCII */ + len = 1; + } + return (len); +} + /* * GBK */ @@ -491,6 +623,22 @@ pg_gbk_mblen(const unsigned char *s) return (len); } +static int +pg_gbk_dsplen(const unsigned char *s) +{ + int len; + + if (*s > 0x7f) + { /* kanji? */ + len = 2; + } + else + { /* should be ASCII */ + len = 1; + } + return (len); +} + /* * UHC */ @@ -510,6 +658,22 @@ pg_uhc_mblen(const unsigned char *s) return (len); } +static int +pg_uhc_dsplen(const unsigned char *s) +{ + int len; + + if (*s > 0x7f) + { /* 2byte? */ + len = 2; + } + else + { /* should be ASCII */ + len = 1; + } + return (len); +} + /* * * GB18030 * * Added by Bill Huang , @@ -535,42 +699,58 @@ pg_gb18030_mblen(const unsigned char *s) return (len); } +static int +pg_gb18030_dsplen(const unsigned char *s) +{ + int len; + + if (*s <= 0x7f) + { /* ASCII */ + len = 1; + } + else + { + len = 2; + } + return (len); +} + pg_wchar_tbl pg_wchar_table[] = { - {pg_ascii2wchar_with_len, pg_ascii_mblen, 1}, /* 0; PG_SQL_ASCII */ - {pg_eucjp2wchar_with_len, pg_eucjp_mblen, 3}, /* 1; PG_EUC_JP */ - {pg_euccn2wchar_with_len, pg_euccn_mblen, 3}, /* 2; PG_EUC_CN */ - {pg_euckr2wchar_with_len, pg_euckr_mblen, 3}, /* 3; PG_EUC_KR */ - {pg_euctw2wchar_with_len, pg_euctw_mblen, 3}, /* 4; PG_EUC_TW */ - {pg_johab2wchar_with_len, pg_johab_mblen, 3}, /* 5; PG_JOHAB */ - {pg_utf2wchar_with_len, pg_utf_mblen, 3}, /* 6; PG_UNICODE */ - {pg_mule2wchar_with_len, pg_mule_mblen, 3}, /* 7; PG_MULE_INTERNAL */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 8; PG_LATIN1 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 9; PG_LATIN2 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 10; PG_LATIN3 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 11; PG_LATIN4 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 12; PG_LATIN5 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 13; PG_LATIN6 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 14; PG_LATIN7 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 15; PG_LATIN8 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 16; PG_LATIN9 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 17; PG_LATIN10 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 18; PG_WIN1256 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 19; PG_TCVN */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 20; PG_WIN874 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 21; PG_KOI8 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 22; PG_WIN1251 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 23; PG_ALT */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 24; ISO-8859-5 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 25; ISO-8859-6 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 26; ISO-8859-7 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 27; ISO-8859-8 */ - {0, pg_sjis_mblen, 2}, /* 28; PG_SJIS */ - {0, pg_big5_mblen, 2}, /* 29; PG_BIG5 */ - {0, pg_gbk_mblen, 2}, /* 30; PG_GBK */ - {0, pg_uhc_mblen, 2}, /* 31; PG_UHC */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 32; PG_WIN1250 */ - {0, pg_gb18030_mblen, 2} /* 33; PG_GB18030 */ + {pg_ascii2wchar_with_len, pg_ascii_mblen, pg_ascii_dsplen, 1}, /* 0; PG_SQL_ASCII */ + {pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, 3}, /* 1; PG_EUC_JP */ + {pg_euccn2wchar_with_len, pg_euccn_mblen, pg_euccn_dsplen, 3}, /* 2; PG_EUC_CN */ + {pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, 3}, /* 3; PG_EUC_KR */ + {pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, 3}, /* 4; PG_EUC_TW */ + {pg_johab2wchar_with_len, pg_johab_mblen, pg_johab_dsplen, 3}, /* 5; PG_JOHAB */ + {pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, 3}, /* 6; PG_UNICODE */ + {pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, 3}, /* 7; PG_MULE_INTERNAL */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 8; PG_LATIN1 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 9; PG_LATIN2 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 10; PG_LATIN3 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 11; PG_LATIN4 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 12; PG_LATIN5 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 13; PG_LATIN6 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 14; PG_LATIN7 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 15; PG_LATIN8 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 16; PG_LATIN9 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 17; PG_LATIN10 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 18; PG_WIN1256 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 19; PG_TCVN */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 20; PG_WIN874 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 21; PG_KOI8 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 22; PG_WIN1251 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 23; PG_ALT */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 24; ISO-8859-5 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 25; ISO-8859-6 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 26; ISO-8859-7 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 27; ISO-8859-8 */ + {0, pg_sjis_mblen, pg_sjis_dsplen, 2}, /* 28; PG_SJIS */ + {0, pg_big5_mblen, pg_big5_dsplen,2}, /* 29; PG_BIG5 */ + {0, pg_gbk_mblen, pg_gbk_dsplen, 2}, /* 30; PG_GBK */ + {0, pg_uhc_mblen, pg_uhc_dsplen, 2}, /* 31; PG_UHC */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 32; PG_WIN1250 */ + {0, pg_gb18030_mblen, pg_gb18030_dsplen, 2} /* 33; PG_GB18030 */ }; /* returns the byte length of a word for mule internal code */ @@ -594,6 +774,20 @@ pg_encoding_mblen(int encoding, const unsigned char *mbstr) ((*pg_wchar_table[PG_SQL_ASCII].mblen) (mbstr))); } +/* + * Returns the display length of a multibyte word. + */ +int +pg_encoding_dsplen(int encoding, const unsigned char *mbstr) +{ + Assert(PG_VALID_ENCODING(encoding)); + + return ((encoding >= 0 && + encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ? + ((*pg_wchar_table[encoding].dsplen) (mbstr)) : + ((*pg_wchar_table[PG_SQL_ASCII].dsplen) (mbstr))); +} + /* * fetch maximum length of a char encoding */ @@ -688,6 +882,3 @@ pg_database_encoding_max_length(void) } #endif - - - diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c index 511107b575..2a6be545ca 100644 --- a/src/bin/psql/common.c +++ b/src/bin/psql/common.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.83 2004/03/14 04:25:17 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.84 2004/03/15 10:41:26 ishii Exp $ */ #include "postgres_fe.h" #include "common.h" @@ -410,7 +410,7 @@ ReportSyntaxErrorPosition(const PGresult *result, const char *query) { qidx[i] = qoffset; scridx[i] = scroffset; - scroffset += 1; /* XXX fix me when we have screen width info */ + scroffset += PQdsplen(&query[qoffset], pset.encoding); qoffset += PQmblen(&query[qoffset], pset.encoding); } qidx[i] = qoffset; @@ -526,7 +526,7 @@ ReportSyntaxErrorPosition(const PGresult *result, const char *query) scroffset = 0; for (i = 0; i < msg.len; i += PQmblen(&msg.data[i], pset.encoding)) { - scroffset += 1; /* XXX fix me when we have screen width info */ + scroffset += PQdsplen(&msg.data[i], pset.encoding); } /* Finish and emit the message. */ diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 4273da946d..c4fd6f5687 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -1,4 +1,4 @@ -/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.49 2003/11/29 22:41:04 pgsql Exp $ */ +/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.50 2004/03/15 10:41:26 ishii Exp $ */ #ifndef PG_WCHAR_H #define PG_WCHAR_H @@ -248,11 +248,14 @@ typedef int (*mb2wchar_with_len_converter) (const unsigned char *from, int len); typedef int (*mblen_converter) (const unsigned char *mbstr); +typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr); + typedef struct { mb2wchar_with_len_converter mb2wchar_with_len; /* convert a multibyte * string to a wchar */ mblen_converter mblen; /* returns the length of a multibyte char */ + mbdisplaylen_converter dsplen; /* returns the lenghth of a display length */ int maxmblen; /* max bytes for a char in this charset */ } pg_wchar_tbl; @@ -283,7 +286,9 @@ extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n); extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n); extern size_t pg_wchar_strlen(const pg_wchar *wstr); extern int pg_mblen(const unsigned char *mbstr); +extern int pg_dsplen(const unsigned char *mbstr); extern int pg_encoding_mblen(int encoding, const unsigned char *mbstr); +extern int pg_encoding_dsplen(int encoding, const unsigned char *mbstr); extern int pg_mule_mblen(const unsigned char *mbstr); extern int pg_mic_mblen(const unsigned char *mbstr); extern int pg_mbstrlen(const unsigned char *mbstr); diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index 3717998a6b..d484747b9c 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -23,7 +23,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.104 2003/11/29 19:52:12 pgsql Exp $ + * $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.105 2004/03/15 10:41:26 ishii Exp $ * *------------------------------------------------------------------------- */ @@ -1095,6 +1095,16 @@ PQmblen(const unsigned char *s, int encoding) return (pg_encoding_mblen(encoding, s)); } +/* + * returns the display length of the word beginning s, using the + * specified encoding. + */ +int +PQdsplen(const unsigned char *s, int encoding) +{ + return (pg_encoding_dsplen(encoding, s)); +} + /* * Get encoding id from environment variable PGCLIENTENCODING. */ diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h index f6ea1f4ba7..293d50e690 100644 --- a/src/interfaces/libpq/libpq-fe.h +++ b/src/interfaces/libpq/libpq-fe.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/interfaces/libpq/libpq-fe.h,v 1.102 2004/01/09 02:02:43 momjian Exp $ + * $PostgreSQL: pgsql/src/interfaces/libpq/libpq-fe.h,v 1.103 2004/03/15 10:41:26 ishii Exp $ * *------------------------------------------------------------------------- */ @@ -447,6 +447,9 @@ extern int lo_export(PGconn *conn, Oid lobjId, const char *filename); /* Determine length of multibyte encoded char at *s */ extern int PQmblen(const unsigned char *s, int encoding); +/* Determine display length of multibyte encoded char at *s */ +extern int PQdsplen(const unsigned char *s, int encoding); + /* Get encoding id from environment variable PGCLIENTENCODING */ extern int PQenv2encoding(void); -- 2.40.0