2 * conversion functions between pg_wchar and multi-byte streams.
4 * $Id: wchar.c,v 1.14 2001/02/10 02:31:27 tgl Exp $
6 * WIN1250 client encoding updated by Pavel Behal
9 /* can be used in either frontend or backend */
10 #include "postgres_fe.h"
12 #include "mb/pg_wchar.h"
15 * conversion to pg_wchar is done by "table driven."
16 * to add an encoding support, define mb2wchar_with_len(), mblen()
17 * for the particular encoding. Note that if the encoding is only
18 * supported in the client, you don't need to define
19 * mb2wchar_with_len() function (SJIS is the case).
25 static int pg_ascii2wchar_with_len
26 (const unsigned char *from, pg_wchar * to, int len)
30 while (*from && len > 0)
41 pg_ascii_mblen(const unsigned char *s)
50 static int pg_euc2wchar_with_len
51 (const unsigned char *from, pg_wchar * to, int len)
55 while (*from && len > 0)
64 else if (*from == SS3)
68 *to |= 0x3f & *from++;
71 else if (*from & 0x80)
90 pg_euc_mblen(const unsigned char *s)
108 static int pg_eucjp2wchar_with_len
109 (const unsigned char *from, pg_wchar * to, int len)
111 return(pg_euc2wchar_with_len(from, to, len));
115 pg_eucjp_mblen(const unsigned char *s)
117 return (pg_euc_mblen(s));
123 static int pg_euckr2wchar_with_len
124 (const unsigned char *from, pg_wchar * to, int len)
126 return(pg_euc2wchar_with_len(from, to, len));
130 pg_euckr_mblen(const unsigned char *s)
132 return (pg_euc_mblen(s));
138 static int pg_euccn2wchar_with_len
139 (const unsigned char *from, pg_wchar * to, int len)
143 while (*from && len > 0)
149 *to = 0x3f00 & (*from++ << 8);
153 else if (*from == SS3)
157 *to |= 0x3f & *from++;
160 else if (*from & 0x80)
179 pg_euccn_mblen(const unsigned char *s)
193 static int pg_euctw2wchar_with_len
194 (const unsigned char *from, pg_wchar * to, int len)
198 while (*from && len > 0)
209 else if (*from == SS3)
213 *to |= 0x3f & *from++;
216 else if (*from & 0x80)
235 pg_euctw_mblen(const unsigned char *s)
251 * convert UTF-8 string to pg_wchar (UCS-2)
252 * caller should allocate enough space for "to"
253 * len: length of from.
254 * "from" not necessarily null terminated.
257 pg_utf2wchar_with_len(const unsigned char *from, pg_wchar * to, int len)
264 while (*from && len > 0)
266 if ((*from & 0x80) == 0)
271 else if ((*from & 0xe0) == 0xc0)
279 else if ((*from & 0xe0) == 0xe0)
302 * returns the byte length of a UTF-8 word pointed to by s
305 pg_utf_mblen(const unsigned char *s)
309 if ((*s & 0x80) == 0)
311 else if ((*s & 0xe0) == 0xc0)
313 else if ((*s & 0xe0) == 0xe0)
319 * convert mule internal code to pg_wchar
320 * caller should allocate enough space for "to"
321 * len: length of from.
322 * "from" not necessarily null terminated.
325 pg_mule2wchar_with_len(const unsigned char *from, pg_wchar * to, int len)
329 while (*from && len > 0)
337 else if (IS_LCPRV1(*from))
344 else if (IS_LC2(*from))
351 else if (IS_LCPRV2(*from))
361 *to = (unsigned char) *from++;
372 pg_mule_mblen(const unsigned char *s)
378 else if (IS_LCPRV1(*s))
382 else if (IS_LCPRV2(*s))
395 pg_latin12wchar_with_len(const unsigned char *from, pg_wchar * to, int len)
399 while (*from && len-- > 0)
409 pg_latin1_mblen(const unsigned char *s)
418 pg_sjis_mblen(const unsigned char *s)
422 if (*s >= 0xa1 && *s <= 0xdf)
431 { /* should be ASCII */
441 pg_big5_mblen(const unsigned char *s)
450 { /* should be ASCII */
456 pg_wchar_tbl pg_wchar_table[] = {
457 {pg_ascii2wchar_with_len, pg_ascii_mblen}, /* 0 */
458 {pg_eucjp2wchar_with_len, pg_eucjp_mblen}, /* 1 */
459 {pg_euccn2wchar_with_len, pg_euccn_mblen}, /* 2 */
460 {pg_euckr2wchar_with_len, pg_euckr_mblen}, /* 3 */
461 {pg_euctw2wchar_with_len, pg_euctw_mblen}, /* 4 */
462 {pg_utf2wchar_with_len, pg_utf_mblen}, /* 5 */
463 {pg_mule2wchar_with_len, pg_mule_mblen}, /* 6 */
464 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 7 */
465 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 8 */
466 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 9 */
467 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 10 */
468 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 11 */
469 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 12 */
470 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 13 */
471 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 14 */
472 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 15 */
473 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 16 */
474 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 17 */
475 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 18 */
476 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 19 */
477 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 20 */
478 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 21 */
479 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 22 */
480 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 23 */
481 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 24 */
482 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 25 */
483 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 26 */
484 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 27 */
485 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 28 */
486 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 29 */
487 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 30 */
488 {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 31 */
489 {0, pg_sjis_mblen}, /* 32 */
490 {0, pg_big5_mblen}, /* 33 */
491 {pg_latin12wchar_with_len, pg_latin1_mblen} /* 34 */
494 /* returns the byte length of a word for mule internal code */
496 pg_mic_mblen(const unsigned char *mbstr)
498 return (pg_mule_mblen(mbstr));