Back-patch addition of pg_wchar-to-multibyte conversion functionality.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 10 Jul 2012 20:52:55 +0000 (16:52 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 10 Jul 2012 20:52:55 +0000 (16:52 -0400)
Back-patch of commits 72dd6291f216440f6bb61a8733729a37c7e3b2d2,
f6a05fd973a102f7e66c491d3f854864b8d24844, and
60e9c224a197aa37abb1aa3aefa3aad42da61f7f.

This is needed to support fixing the regex prefix extraction bug in
back branches.

src/backend/utils/mb/mbutils.c
src/backend/utils/mb/wchar.c
src/include/mb/pg_wchar.h

index 91977512b7f372e2a2e9b62be72892fb7595d0ac..066cf77d3fdf31f62bbea835f8c03640765cc539 100644 (file)
@@ -577,6 +577,28 @@ pg_encoding_mb2wchar_with_len(int encoding,
        return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
 }
 
+/* convert a wchar string to a multibyte */
+int
+pg_wchar2mb(const pg_wchar *from, char *to)
+{
+       return (*pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len) (from, (unsigned char *)to, pg_wchar_strlen(from));
+}
+
+/* convert a wchar string to a multibyte with a limited length */
+int
+pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
+{
+       return (*pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len) (from, (unsigned char *)to, len);
+}
+
+/* same, with any encoding */
+int
+pg_encoding_wchar2mb_with_len(int encoding,
+                                                         const pg_wchar *from, char *to, int len)
+{
+       return (*pg_wchar_table[encoding].wchar2mb_with_len) (from, (unsigned char *)to, len);
+}
+
 /* returns the byte length of a multibyte word */
 int
 pg_mblen(const char *mbstr)
index 7d69d044669ec043082f2c85ea00e0479284f442..65768e224be03a7f731850582cc545cd55df6162 100644 (file)
@@ -99,8 +99,7 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
                        *to |= *from++;
                        len -= 2;
                }
-               else
-                       /* must be ASCII */
+               else                                                    /* must be ASCII */
                {
                        *to = *from++;
                        len--;
@@ -339,6 +338,55 @@ pg_euctw_dsplen(const unsigned char *s)
        return len;
 }
 
+/*
+ * Convert pg_wchar to EUC_* encoding.
+ * caller must allocate enough space for "to", including a trailing zero!
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+       int                     cnt = 0;
+
+       while (len > 0 && *from)
+       {
+               unsigned char c;
+
+               if ((c = (*from >> 24)))
+               {
+                       *to++ = c;
+                       *to++ = (*from >> 16) & 0xff;
+                       *to++ = (*from >> 8) & 0xff;
+                       *to++ = *from & 0xff;
+                       cnt += 4;
+               }
+               else if ((c = (*from >> 16)))
+               {
+                       *to++ = c;
+                       *to++ = (*from >> 8) & 0xff;
+                       *to++ = *from & 0xff;
+                       cnt += 3;
+               }
+               else if ((c = (*from >> 8)))
+               {
+                       *to++ = c;
+                       *to++ = *from & 0xff;
+                       cnt += 2;
+               }
+               else
+               {
+                       *to++ = *from;
+                       cnt++;
+               }
+               from++;
+               len--;
+       }
+       *to = 0;
+       return cnt;
+}
+
+
 /*
  * JOHAB
  */
@@ -419,6 +467,66 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
        return cnt;
 }
 
+
+/*
+ * Map a Unicode code point to UTF-8.  utf8string must have 4 bytes of
+ * space allocated.
+ */
+static unsigned char *
+unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+{
+       if (c <= 0x7F)
+       {
+               utf8string[0] = c;
+       }
+       else if (c <= 0x7FF)
+       {
+               utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
+               utf8string[1] = 0x80 | (c & 0x3F);
+       }
+       else if (c <= 0xFFFF)
+       {
+               utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
+               utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
+               utf8string[2] = 0x80 | (c & 0x3F);
+       }
+       else
+       {
+               utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
+               utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
+               utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
+               utf8string[3] = 0x80 | (c & 0x3F);
+       }
+
+       return utf8string;
+}
+
+/*
+ * Trivial conversion from pg_wchar to UTF-8.
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+       int                     cnt = 0;
+
+       while (len > 0 && *from)
+       {
+               int char_len;
+
+               unicode_to_utf8(*from, to);
+               char_len = pg_utf_mblen(to);
+               cnt += char_len;
+               to += char_len;
+               from++;
+               len--;
+       }
+       *to = 0;
+       return cnt;
+}
+
 /*
  * Return the byte length of a UTF8 character pointed to by s
  *
@@ -682,6 +790,77 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
        return cnt;
 }
 
+/*
+ * convert pg_wchar to mule internal code
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+       int                     cnt = 0;
+
+       while (len > 0 && *from)
+       {
+               unsigned char lb;
+
+               lb = (*from >> 16) & 0xff;
+               if (IS_LC1(lb))
+               {
+                       *to++ = lb;
+                       *to++ = *from & 0xff;
+                       cnt += 2;
+               }
+               else if (IS_LC2(lb))
+               {
+                       *to++ = lb;
+                       *to++ = (*from >> 8) & 0xff;
+                       *to++ = *from & 0xff;
+                       cnt += 3;
+               }
+               else if (IS_LCPRV1_A_RANGE(lb))
+               {
+                       *to++ = LCPRV1_A;
+                       *to++ = lb;
+                       *to++ = *from & 0xff;
+                       cnt += 3;
+               }
+               else if (IS_LCPRV1_B_RANGE(lb))
+               {
+                       *to++ = LCPRV1_B;
+                       *to++ = lb;
+                       *to++ = *from & 0xff;
+                       cnt += 3;
+               }
+               else if (IS_LCPRV2_A_RANGE(lb))
+               {
+                       *to++ = LCPRV2_A;
+                       *to++ = lb;
+                       *to++ = (*from >> 8) & 0xff;
+                       *to++ = *from & 0xff;
+                       cnt += 4;
+               }
+               else if (IS_LCPRV2_B_RANGE(lb))
+               {
+                       *to++ = LCPRV2_B;
+                       *to++ = lb;
+                       *to++ = (*from >> 8) & 0xff;
+                       *to++ = *from & 0xff;
+                       cnt += 4;
+               }
+               else
+               {
+                       *to++ = *from & 0xff;
+                       cnt += 1;
+               }
+               from++;
+               len--;
+       }
+       *to = 0;
+       return cnt;
+}
+
 int
 pg_mule_mblen(const unsigned char *s)
 {
@@ -737,6 +916,28 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
        return cnt;
 }
 
+/*
+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
+ * high bits.
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+       int                     cnt = 0;
+
+       while (len > 0 && *from)
+       {
+               *to++ = *from++;
+               len--;
+               cnt++;
+       }
+       *to = 0;
+       return cnt;
+}
+
 static int
 pg_latin1_mblen(const unsigned char *s)
 {
@@ -1304,47 +1505,47 @@ pg_utf8_islegal(const unsigned char *source, int length)
  *-------------------------------------------------------------------
  */
 pg_wchar_tbl pg_wchar_table[] = {
-       {pg_ascii2wchar_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1},       /* 0; PG_SQL_ASCII      */
-       {pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},       /* 1; PG_EUC_JP */
-       {pg_euccn2wchar_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2},       /* 2; PG_EUC_CN */
-       {pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3},       /* 3; PG_EUC_KR */
-       {pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4},       /* 4; PG_EUC_TW */
-       {pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},       /* 5; PG_EUC_JIS_2004 */
-       {pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4},      /* 6; PG_UTF8 */
-       {pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4},           /* 7; PG_MULE_INTERNAL */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 8; PG_LATIN1 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 9; PG_LATIN2 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 10; PG_LATIN3 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 11; PG_LATIN4 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 12; PG_LATIN5 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 13; PG_LATIN6 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 14; PG_LATIN7 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 15; PG_LATIN8 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 16; PG_LATIN9 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 17; PG_LATIN10 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 18; PG_WIN1256 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 19; PG_WIN1258 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 20; PG_WIN866 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 21; PG_WIN874 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 22; PG_KOI8R */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 23; PG_WIN1251 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 24; PG_WIN1252 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 25; ISO-8859-5 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 26; ISO-8859-6 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 27; ISO-8859-7 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 28; ISO-8859-8 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 29; PG_WIN1250 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 30; PG_WIN1253 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 31; PG_WIN1254 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 32; PG_WIN1255 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* 33; PG_WIN1257 */
-       {0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2},        /* 34; PG_SJIS */
-       {0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2},        /* 35; PG_BIG5 */
-       {0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2},           /* 36; PG_GBK */
-       {0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2},           /* 37; PG_UHC */
-       {0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4},       /* 38; PG_GB18030 */
-       {0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* 39; PG_JOHAB */
-       {0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}         /* 40; PG_SHIFT_JIS_2004 */
+       {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1},     /* PG_SQL_ASCII */
+       {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},        /* PG_EUC_JP */
+       {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2},        /* PG_EUC_CN */
+       {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3},        /* PG_EUC_KR */
+       {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4},        /* PG_EUC_TW */
+       {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},        /* PG_EUC_JIS_2004 */
+       {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4},       /* PG_UTF8 */
+       {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4},           /* PG_MULE_INTERNAL */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_LATIN1 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_LATIN2 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_LATIN3 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_LATIN4 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_LATIN5 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_LATIN6 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_LATIN7 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_LATIN8 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_LATIN9 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_LATIN10 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN1256 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN1258 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN866 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN874 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_KOI8R */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN1251 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN1252 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* ISO-8859-5 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* ISO-8859-6 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* ISO-8859-7 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* ISO-8859-8 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN1250 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN1253 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN1254 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN1255 */
+       {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},         /* PG_WIN1257 */
+       {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2},     /* PG_SJIS */
+       {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2},     /* PG_BIG5 */
+       {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2},                /* PG_GBK */
+       {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2},                /* PG_UHC */
+       {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4},    /* PG_GB18030 */
+       {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
+       {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}              /* PG_SHIFT_JIS_2004 */
 };
 
 /* returns the byte length of a word for mule internal code */
index 4755aa17fe72d6dc4dcbd7ba7f5155735b8d0a59..d9d0ab48ac6a3204f870d3f47dc6ce2f27b2cb8f 100644 (file)
@@ -51,7 +51,13 @@ typedef unsigned int pg_wchar;
 /*
  * Is a prefix byte for "private" single byte encodings?
  */
-#define IS_LCPRV1(c)   ((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b)
+#define LCPRV1_A               0x9a
+#define LCPRV1_B               0x9b
+#define IS_LCPRV1(c)   ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)
+#define IS_LCPRV1_A_RANGE(c)   \
+       ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
+#define IS_LCPRV1_B_RANGE(c)   \
+       ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
 /*
  * Is a leading byte for "official" multibyte encodings?
  */
@@ -59,7 +65,13 @@ typedef unsigned int pg_wchar;
 /*
  * Is a prefix byte for "private" multibyte encodings?
  */
-#define IS_LCPRV2(c)   ((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d)
+#define LCPRV2_A               0x9c
+#define LCPRV2_B               0x9d
+#define IS_LCPRV2(c)   ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)
+#define IS_LCPRV2_A_RANGE(c)   \
+       ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
+#define IS_LCPRV2_B_RANGE(c)   \
+       ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
 
 /*----------------------------------------------------
  * leading characters
@@ -264,7 +276,11 @@ extern pg_enc2name pg_enc2name_tbl[];
  * pg_wchar stuff
  */
 typedef int (*mb2wchar_with_len_converter) (const unsigned char *from,
-                                                                                                               pg_wchar *to,
+                                                                                                       pg_wchar *to,
+                                                                                                       int len);
+
+typedef int (*wchar2mb_with_len_converter) (const pg_wchar *from,
+                                                                                                               unsigned char *to,
                                                                                                                int len);
 
 typedef int (*mblen_converter) (const unsigned char *mbstr);
@@ -275,8 +291,10 @@ typedef int (*mbverifier) (const unsigned char *mbstr, int len);
 
 typedef struct
 {
-       mb2wchar_with_len_converter mb2wchar_with_len;          /* convert a multibyte
-                                                                                                                * string to a wchar */
+       mb2wchar_with_len_converter mb2wchar_with_len;  /* convert a multibyte
+                                                                                                        * string to a wchar */
+       wchar2mb_with_len_converter wchar2mb_with_len;  /* convert a wchar
+                                                                                                        * string to a multibyte */
        mblen_converter mblen;          /* get byte length of a char */
        mbdisplaylen_converter dsplen;          /* get display width of a char */
        mbverifier      mbverify;               /* verify multibyte sequence */
@@ -357,6 +375,10 @@ extern int pg_mb2wchar(const char *from, pg_wchar *to);
 extern int     pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len);
 extern int pg_encoding_mb2wchar_with_len(int encoding,
                                                          const char *from, pg_wchar *to, int len);
+extern int     pg_wchar2mb(const pg_wchar *from, char *to);
+extern int     pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len);
+extern int pg_encoding_wchar2mb_with_len(int encoding,
+                                                         const pg_wchar *from, char *to, int len);
 extern int     pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2);
 extern int     pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
 extern int     pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);