1 /*-------------------------------------------------------------------------
3 * EUC_JIS_2004, SHIFT_JIS_2004
5 * Copyright (c) 2007-2008, PostgreSQL Global Development Group
8 * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/euc_jis_2004_and_shift_jis_2004.c,v 1.3 2008/01/01 20:31:21 tgl Exp $
10 *-------------------------------------------------------------------------
15 #include "mb/pg_wchar.h"
19 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
20 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
22 extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
23 extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
25 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
26 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
30 * INTEGER, -- source encoding id
31 * INTEGER, -- destination encoding id
32 * CSTRING, -- source string (null terminated C string)
33 * CSTRING, -- destination string (null terminated C string)
34 * INTEGER -- source string length
40 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
42 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
43 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
44 int len = PG_GETARG_INT32(4);
46 Assert(PG_GETARG_INT32(0) == PG_EUC_JIS_2004);
47 Assert(PG_GETARG_INT32(1) == PG_SHIFT_JIS_2004);
50 euc_jis_20042shift_jis_2004(src, dest, len);
56 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
58 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
59 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
60 int len = PG_GETARG_INT32(4);
62 Assert(PG_GETARG_INT32(0) == PG_SHIFT_JIS_2004);
63 Assert(PG_GETARG_INT32(1) == PG_EUC_JIS_2004);
66 shift_jis_20042euc_jis_2004(src, dest, len);
72 * EUC_JIS_2004 -> SHIFT_JIS_2004
75 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
85 if (!IS_HIGHBIT_SET(c1))
89 report_invalid_encoding(PG_EUC_JIS_2004,
90 (const char *) euc, len);
97 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
100 report_invalid_encoding(PG_EUC_JIS_2004,
101 (const char *) euc, len);
103 if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
107 else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
123 *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
126 if (ku >= 78 && ku <= 94)
128 *p++ = (ku + 0x19b) >> 1;
131 report_invalid_encoding(PG_EUC_JIS_2004,
132 (const char *) euc, len);
137 if (ten >= 1 && ten <= 63)
139 else if (ten >= 64 && ten <= 94)
142 report_invalid_encoding(PG_EUC_JIS_2004,
143 (const char *) euc, len);
149 else if (l == 2) /* JIS X 0213 plane 1? */
154 if (ku >= 1 && ku <= 62)
155 *p++ = (ku + 0x101) >> 1;
156 else if (ku >= 63 && ku <= 94)
157 *p++ = (ku + 0x181) >> 1;
159 report_invalid_encoding(PG_EUC_JIS_2004,
160 (const char *) euc, len);
164 if (ten >= 1 && ten <= 63)
166 else if (ten >= 64 && ten <= 94)
169 report_invalid_encoding(PG_EUC_JIS_2004,
170 (const char *) euc, len);
176 report_invalid_encoding(PG_EUC_JIS_2004,
177 (const char *) euc, len);
186 * returns SHIFT_JIS_2004 "ku" code indicated by second byte
187 * *ku = 0: "ku" = even
188 * *ku = 1: "ku" = odd
191 get_ten(int b, int *ku)
195 if (b >= 0x40 && b <= 0x7e)
200 else if (b >= 0x80 && b <= 0x9e)
205 else if (b >= 0x9f && b <= 0xfc)
212 ten = -1; /* error */
218 * SHIFT_JIS_2004 ---> EUC_JIS_2004
222 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
237 if (!IS_HIGHBIT_SET(c1))
241 report_invalid_encoding(PG_SHIFT_JIS_2004,
242 (const char *) sjis, len);
249 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
252 report_invalid_encoding(PG_SHIFT_JIS_2004,
253 (const char *) sjis, len);
255 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
257 /* JIS X0201 (1 byte kana) */
270 if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
272 ku = (c1 << 1) - 0x100;
273 ten = get_ten(c2, &kubun);
275 report_invalid_encoding(PG_SHIFT_JIS_2004,
276 (const char *) sjis, len);
279 else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
281 ku = (c1 << 1) - 0x180;
282 ten = get_ten(c2, &kubun);
284 report_invalid_encoding(PG_SHIFT_JIS_2004,
286 (const char *) sjis, len);
289 else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
290 * 1,3,4,5,8,12,13,14,15 ku */
293 ten = get_ten(c2, &kubun);
295 report_invalid_encoding(PG_SHIFT_JIS_2004,
296 (const char *) sjis, len);
300 ku = kubun == 0 ? 8 : 1;
303 ku = kubun == 0 ? 4 : 3;
306 ku = kubun == 0 ? 12 : 5;
309 ku = kubun == 0 ? 14 : 13;
313 else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
316 ten = get_ten(c2, &kubun);
318 report_invalid_encoding(PG_SHIFT_JIS_2004,
319 (const char *) sjis, len);
320 if (c1 == 0xf4 && kubun == 1)
323 ku = (c1 << 1) - 0x19a - kubun;
326 report_invalid_encoding(PG_SHIFT_JIS_2004,
327 (const char *) sjis, len);