* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.10 2005/06/10 16:43:56 ishii Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.11 2005/06/24 13:56:39 ishii Exp $
*
*-------------------------------------------------------------------------
*/
static void mic2sjis(unsigned char *mic, unsigned char *p, int len);
static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len);
static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len);
+static void euc_jp2sjis(unsigned char *mic, unsigned char *p, int len);
+static void sjis2euc_jp(unsigned char *mic, unsigned char *p, int len);
Datum
euc_jp_to_sjis(PG_FUNCTION_ARGS)
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
- unsigned char *buf;
Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
Assert(PG_GETARG_INT32(1) == PG_SJIS);
Assert(len >= 0);
- buf = palloc(len * ENCODING_GROWTH_RATE);
- euc_jp2mic(src, buf, len);
- mic2sjis(buf, dest, strlen(buf));
- pfree(buf);
+ euc_jp2sjis(src, dest, len);
PG_RETURN_VOID();
}
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
- unsigned char *buf;
Assert(PG_GETARG_INT32(0) == PG_SJIS);
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
Assert(len >= 0);
- buf = palloc(len * ENCODING_GROWTH_RATE);
- sjis2mic(src, buf, len);
- mic2euc_jp(buf, dest, strlen(buf));
- pfree(buf);
+ sjis2euc_jp(src, dest, len);
PG_RETURN_VOID();
}
}
*p = '\0';
}
+
+/*
+ * EUC_JP -> SJIS
+ */
+static void
+euc_jp2sjis(unsigned char *euc, unsigned char *p, int len)
+{
+ int c1,
+ c2,
+ k;
+ unsigned char *euc_end = euc + len;
+
+ while (euc_end >= euc && (c1 = *euc++))
+ {
+ if(c1 < 0x80)
+ {
+ /* should be ASCII */
+ *p++ = c1;
+ }
+ else if (c1 == SS2)
+ {
+ /* hankaku kana? */
+ *p++ = *euc++;
+ }
+ else if (c1 == SS3)
+ {
+ /* JIS X0212 kanji? */
+ c1 = *euc++;
+ c2 = *euc++;
+ k = c1 << 8 | c2;
+ if (k >= 0xf5a1)
+ {
+ /* UDC2 */
+ c1 -= 0x54;
+ *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
+ *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
+ }
+ else
+ {
+ int i, k2;
+
+ /* IBM kanji */
+ for (i = 0;; i++)
+ {
+ k2 = ibmkanji[i].euc & 0xffff;
+ if (k2 == 0xffff)
+ {
+ *p++ = PGSJISALTCODE >> 8;
+ *p++ = PGSJISALTCODE & 0xff;
+ break;
+ }
+ if (k2 == k)
+ {
+ k = ibmkanji[i].sjis;
+ *p++ = k >> 8;
+ *p++ = k & 0xff;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* JIS X0208 kanji? */
+ c2 = *euc++;
+ k = (c1 << 8) | (c2 & 0xff);
+ if (k >= 0xf5a1)
+ {
+ /* UDC1 */
+ c1 -= 0x54;
+ *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
+ }
+ else
+ *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
+ *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
+ }
+ }
+ *p = '\0';
+}
+
+/*
+ * SJIS ---> EUC_JP
+ */
+static void
+sjis2euc_jp(unsigned char *sjis, unsigned char *p, int len)
+{
+ int c1,
+ c2,
+ i,
+ k,
+ k2;
+ unsigned char *sjis_end = sjis + len;
+
+ while (sjis_end >= sjis && (c1 = *sjis++))
+ {
+ if(c1 < 0x80)
+ {
+ /* should be ASCII */
+ *p++ = c1;
+ }
+ else if (c1 >= 0xa1 && c1 <= 0xdf)
+ {
+ /* JIS X0201 (1 byte kana) */
+ *p++ = SS2;
+ *p++ = c1;
+ }
+ else
+ {
+ /*
+ * JIS X0208, X0212, user defined extended characters
+ */
+ c2 = *sjis++;
+ k = (c1 << 8) + c2;
+ if (k >= 0xed40 && k < 0xf040)
+ {
+ /* NEC selection IBM kanji */
+ for (i = 0;; i++)
+ {
+ k2 = ibmkanji[i].nec;
+ if (k2 == 0xffff)
+ break;
+ if (k2 == k)
+ {
+ k = ibmkanji[i].sjis;
+ c1 = (k >> 8) & 0xff;
+ c2 = k & 0xff;
+ }
+ }
+ }
+
+ if (k < 0xeb3f)
+ {
+ /* JIS X0208 */
+ *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
+ *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
+ }
+ else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
+ {
+ /* NEC selection IBM kanji - Other undecided justice */
+ *p++ = PGEUCALTCODE >> 8;
+ *p++ = PGEUCALTCODE & 0xff;
+ }
+ else if (k >= 0xf040 && k < 0xf540)
+ {
+ /*
+ * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
+ * 0x7e7e EUC 0xf5a1 - 0xfefe
+ */
+ c1 -= 0x6f;
+ *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
+ *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
+ }
+ else if (k >= 0xf540 && k < 0xfa40)
+ {
+ /*
+ * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
+ * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
+ */
+ *p++ = SS3;
+ c1 -= 0x74;
+ *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
+ *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
+ }
+ else if (k >= 0xfa40)
+ {
+ /*
+ * mapping IBM kanji to X0208 and X0212
+ *
+ */
+ for (i = 0;; i++)
+ {
+ k2 = ibmkanji[i].sjis;
+ if (k2 == 0xffff)
+ break;
+ if (k2 == k)
+ {
+ k = ibmkanji[i].euc;
+ if (k >= 0x8f0000)
+ {
+ *p++ = SS3;
+ *p++ = 0x80 | ((k & 0xff00) >> 8);
+ *p++ = 0x80 | (k & 0xff);
+ }
+ else
+ {
+ *p++ = 0x80 | (k >> 8);
+ *p++ = 0x80 | (k & 0xff);
+ }
+ }
+ }
+ }
+ }
+ }
+ *p = '\0';
+}
+