1 /*-------------------------------------------------------------------------
3 * EUC_JIS_2004, SHIFT_JIS_2004
5 * Copyright (c) 2007-2016, PostgreSQL Global Development Group
8 * src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
10 *-------------------------------------------------------------------------
15 #include "mb/pg_wchar.h"
19 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
20 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
22 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
23 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
27 * INTEGER, -- source encoding id
28 * INTEGER, -- destination encoding id
29 * CSTRING, -- source string (null terminated C string)
30 * CSTRING, -- destination string (null terminated C string)
31 * INTEGER -- source string length
37 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
39 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
40 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
41 int len = PG_GETARG_INT32(4);
43 CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
45 euc_jis_20042shift_jis_2004(src, dest, len);
51 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
53 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
54 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
55 int len = PG_GETARG_INT32(4);
57 CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
59 shift_jis_20042euc_jis_2004(src, dest, len);
65 * EUC_JIS_2004 -> SHIFT_JIS_2004
68 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
78 if (!IS_HIGHBIT_SET(c1))
82 report_invalid_encoding(PG_EUC_JIS_2004,
83 (const char *) euc, len);
90 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
93 report_invalid_encoding(PG_EUC_JIS_2004,
94 (const char *) euc, len);
96 if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
100 else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
116 *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
119 if (ku >= 78 && ku <= 94)
121 *p++ = (ku + 0x19b) >> 1;
124 report_invalid_encoding(PG_EUC_JIS_2004,
125 (const char *) euc, len);
130 if (ten >= 1 && ten <= 63)
132 else if (ten >= 64 && ten <= 94)
135 report_invalid_encoding(PG_EUC_JIS_2004,
136 (const char *) euc, len);
142 else if (l == 2) /* JIS X 0213 plane 1? */
147 if (ku >= 1 && ku <= 62)
148 *p++ = (ku + 0x101) >> 1;
149 else if (ku >= 63 && ku <= 94)
150 *p++ = (ku + 0x181) >> 1;
152 report_invalid_encoding(PG_EUC_JIS_2004,
153 (const char *) euc, len);
157 if (ten >= 1 && ten <= 63)
159 else if (ten >= 64 && ten <= 94)
162 report_invalid_encoding(PG_EUC_JIS_2004,
163 (const char *) euc, len);
169 report_invalid_encoding(PG_EUC_JIS_2004,
170 (const char *) euc, len);
179 * returns SHIFT_JIS_2004 "ku" code indicated by second byte
180 * *ku = 0: "ku" = even
181 * *ku = 1: "ku" = odd
184 get_ten(int b, int *ku)
188 if (b >= 0x40 && b <= 0x7e)
193 else if (b >= 0x80 && b <= 0x9e)
198 else if (b >= 0x9f && b <= 0xfc)
205 ten = -1; /* error */
206 *ku = 0; /* keep compiler quiet */
212 * SHIFT_JIS_2004 ---> EUC_JIS_2004
216 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
229 if (!IS_HIGHBIT_SET(c1))
233 report_invalid_encoding(PG_SHIFT_JIS_2004,
234 (const char *) sjis, len);
241 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
243 if (l < 0 || l > len)
244 report_invalid_encoding(PG_SHIFT_JIS_2004,
245 (const char *) sjis, len);
247 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
249 /* JIS X0201 (1 byte kana) */
264 if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
266 ku = (c1 << 1) - 0x100;
267 ten = get_ten(c2, &kubun);
269 report_invalid_encoding(PG_SHIFT_JIS_2004,
270 (const char *) sjis, len);
273 else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
275 ku = (c1 << 1) - 0x180;
276 ten = get_ten(c2, &kubun);
278 report_invalid_encoding(PG_SHIFT_JIS_2004,
280 (const char *) sjis, len);
283 else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
284 * 1,3,4,5,8,12,13,14,15 ku */
287 ten = get_ten(c2, &kubun);
289 report_invalid_encoding(PG_SHIFT_JIS_2004,
290 (const char *) sjis, len);
294 ku = kubun == 0 ? 8 : 1;
297 ku = kubun == 0 ? 4 : 3;
300 ku = kubun == 0 ? 12 : 5;
303 ku = kubun == 0 ? 14 : 13;
307 else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
310 ten = get_ten(c2, &kubun);
312 report_invalid_encoding(PG_SHIFT_JIS_2004,
313 (const char *) sjis, len);
314 if (c1 == 0xf4 && kubun == 1)
317 ku = (c1 << 1) - 0x19a - kubun;
320 report_invalid_encoding(PG_SHIFT_JIS_2004,
321 (const char *) sjis, len);