*to |= *from++;
len -= 2;
}
- else
- /* must be ASCII */
+ else /* must be ASCII */
{
*to = *from++;
len--;
return len;
}
+/*
+ * Convert pg_wchar to EUC_* encoding.
+ * caller must allocate enough space for "to", including a trailing zero!
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ unsigned char c;
+
+ if ((c = (*from >> 24)))
+ {
+ *to++ = c;
+ *to++ = (*from >> 16) & 0xff;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else if ((c = (*from >> 16)))
+ {
+ *to++ = c;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if ((c = (*from >> 8)))
+ {
+ *to++ = c;
+ *to++ = *from & 0xff;
+ cnt += 2;
+ }
+ else
+ {
+ *to++ = *from;
+ cnt++;
+ }
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
+
/*
* JOHAB
*/
return cnt;
}
+
+/*
+ * Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
+ * space allocated.
+ */
+static unsigned char *
+unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+{
+ if (c <= 0x7F)
+ {
+ utf8string[0] = c;
+ }
+ else if (c <= 0x7FF)
+ {
+ utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
+ utf8string[1] = 0x80 | (c & 0x3F);
+ }
+ else if (c <= 0xFFFF)
+ {
+ utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
+ utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
+ utf8string[2] = 0x80 | (c & 0x3F);
+ }
+ else
+ {
+ utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
+ utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
+ utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
+ utf8string[3] = 0x80 | (c & 0x3F);
+ }
+
+ return utf8string;
+}
+
+/*
+ * Trivial conversion from pg_wchar to UTF-8.
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ int char_len;
+
+ unicode_to_utf8(*from, to);
+ char_len = pg_utf_mblen(to);
+ cnt += char_len;
+ to += char_len;
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
/*
* Return the byte length of a UTF8 character pointed to by s
*
return cnt;
}
+/*
+ * convert pg_wchar to mule internal code
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ unsigned char lb;
+
+ lb = (*from >> 16) & 0xff;
+ if (IS_LC1(lb))
+ {
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 2;
+ }
+ else if (IS_LC2(lb))
+ {
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV1_A_RANGE(lb))
+ {
+ *to++ = LCPRV1_A;
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV1_B_RANGE(lb))
+ {
+ *to++ = LCPRV1_B;
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV2_A_RANGE(lb))
+ {
+ *to++ = LCPRV2_A;
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else if (IS_LCPRV2_B_RANGE(lb))
+ {
+ *to++ = LCPRV2_B;
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else
+ {
+ *to++ = *from & 0xff;
+ cnt += 1;
+ }
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
int
pg_mule_mblen(const unsigned char *s)
{
return cnt;
}
+/*
+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
+ * high bits.
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ *to++ = *from++;
+ len--;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
static int
pg_latin1_mblen(const unsigned char *s)
{
*-------------------------------------------------------------------
*/
pg_wchar_tbl pg_wchar_table[] = {
- {pg_ascii2wchar_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* 0; PG_SQL_ASCII */
- {pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* 1; PG_EUC_JP */
- {pg_euccn2wchar_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* 2; PG_EUC_CN */
- {pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* 3; PG_EUC_KR */
- {pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* 4; PG_EUC_TW */
- {pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* 5; PG_EUC_JIS_2004 */
- {pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* 6; PG_UTF8 */
- {pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* 7; PG_MULE_INTERNAL */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 8; PG_LATIN1 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 9; PG_LATIN2 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 10; PG_LATIN3 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 11; PG_LATIN4 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 12; PG_LATIN5 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 13; PG_LATIN6 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 14; PG_LATIN7 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 15; PG_LATIN8 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 16; PG_LATIN9 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 17; PG_LATIN10 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 18; PG_WIN1256 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 19; PG_WIN1258 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 20; PG_WIN866 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 21; PG_WIN874 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 22; PG_KOI8R */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 23; PG_WIN1251 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 24; PG_WIN1252 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 25; ISO-8859-5 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 26; ISO-8859-6 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 27; ISO-8859-7 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 28; ISO-8859-8 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 29; PG_WIN1250 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 30; PG_WIN1253 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 31; PG_WIN1254 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 32; PG_WIN1255 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 33; PG_WIN1257 */
- {0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* 34; PG_SJIS */
- {0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* 35; PG_BIG5 */
- {0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* 36; PG_GBK */
- {0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* 37; PG_UHC */
- {0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* 38; PG_GB18030 */
- {0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* 39; PG_JOHAB */
- {0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* 40; PG_SHIFT_JIS_2004 */
+ {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
+ {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
+ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
+ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
+ {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
+ {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
+ {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
+ {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
+ {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
+ {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
+ {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
};
/* returns the byte length of a word for mule internal code */