]> granicus.if.org Git - postgresql/blob - src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
Update copyright for 2016
[postgresql] / src / backend / utils / mb / conversion_procs / euc2004_sjis2004 / euc2004_sjis2004.c
1 /*-------------------------------------------------------------------------
2  *
3  *        EUC_JIS_2004, SHIFT_JIS_2004
4  *
5  * Copyright (c) 2007-2016, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *        src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
9  *
10  *-------------------------------------------------------------------------
11  */
12
13 #include "postgres.h"
14 #include "fmgr.h"
15 #include "mb/pg_wchar.h"
16
17 PG_MODULE_MAGIC;
18
19 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
20 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
21
22 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
23 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
24
25 /* ----------
26  * conv_proc(
27  *              INTEGER,        -- source encoding id
28  *              INTEGER,        -- destination encoding id
29  *              CSTRING,        -- source string (null terminated C string)
30  *              CSTRING,        -- destination string (null terminated C string)
31  *              INTEGER         -- source string length
32  * ) returns VOID;
33  * ----------
34  */
35
36 Datum
37 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
38 {
39         unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
40         unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
41         int                     len = PG_GETARG_INT32(4);
42
43         CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
44
45         euc_jis_20042shift_jis_2004(src, dest, len);
46
47         PG_RETURN_VOID();
48 }
49
50 Datum
51 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
52 {
53         unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
54         unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
55         int                     len = PG_GETARG_INT32(4);
56
57         CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
58
59         shift_jis_20042euc_jis_2004(src, dest, len);
60
61         PG_RETURN_VOID();
62 }
63
64 /*
65  * EUC_JIS_2004 -> SHIFT_JIS_2004
66  */
67 static void
68 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
69 {
70         int                     c1,
71                                 ku,
72                                 ten;
73         int                     l;
74
75         while (len > 0)
76         {
77                 c1 = *euc;
78                 if (!IS_HIGHBIT_SET(c1))
79                 {
80                         /* ASCII */
81                         if (c1 == 0)
82                                 report_invalid_encoding(PG_EUC_JIS_2004,
83                                                                                 (const char *) euc, len);
84                         *p++ = c1;
85                         euc++;
86                         len--;
87                         continue;
88                 }
89
90                 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
91
92                 if (l < 0)
93                         report_invalid_encoding(PG_EUC_JIS_2004,
94                                                                         (const char *) euc, len);
95
96                 if (c1 == SS2 && l == 2)        /* JIS X 0201 kana? */
97                 {
98                         *p++ = euc[1];
99                 }
100                 else if (c1 == SS3 && l == 3)   /* JIS X 0213 plane 2? */
101                 {
102                         ku = euc[1] - 0xa0;
103                         ten = euc[2] - 0xa0;
104
105                         switch (ku)
106                         {
107                                 case 1:
108                                 case 3:
109                                 case 4:
110                                 case 5:
111                                 case 8:
112                                 case 12:
113                                 case 13:
114                                 case 14:
115                                 case 15:
116                                         *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
117                                         break;
118                                 default:
119                                         if (ku >= 78 && ku <= 94)
120                                         {
121                                                 *p++ = (ku + 0x19b) >> 1;
122                                         }
123                                         else
124                                                 report_invalid_encoding(PG_EUC_JIS_2004,
125                                                                                                 (const char *) euc, len);
126                         }
127
128                         if (ku % 2)
129                         {
130                                 if (ten >= 1 && ten <= 63)
131                                         *p++ = ten + 0x3f;
132                                 else if (ten >= 64 && ten <= 94)
133                                         *p++ = ten + 0x40;
134                                 else
135                                         report_invalid_encoding(PG_EUC_JIS_2004,
136                                                                                         (const char *) euc, len);
137                         }
138                         else
139                                 *p++ = ten + 0x9e;
140                 }
141
142                 else if (l == 2)                /* JIS X 0213 plane 1? */
143                 {
144                         ku = c1 - 0xa0;
145                         ten = euc[1] - 0xa0;
146
147                         if (ku >= 1 && ku <= 62)
148                                 *p++ = (ku + 0x101) >> 1;
149                         else if (ku >= 63 && ku <= 94)
150                                 *p++ = (ku + 0x181) >> 1;
151                         else
152                                 report_invalid_encoding(PG_EUC_JIS_2004,
153                                                                                 (const char *) euc, len);
154
155                         if (ku % 2)
156                         {
157                                 if (ten >= 1 && ten <= 63)
158                                         *p++ = ten + 0x3f;
159                                 else if (ten >= 64 && ten <= 94)
160                                         *p++ = ten + 0x40;
161                                 else
162                                         report_invalid_encoding(PG_EUC_JIS_2004,
163                                                                                         (const char *) euc, len);
164                         }
165                         else
166                                 *p++ = ten + 0x9e;
167                 }
168                 else
169                         report_invalid_encoding(PG_EUC_JIS_2004,
170                                                                         (const char *) euc, len);
171
172                 euc += l;
173                 len -= l;
174         }
175         *p = '\0';
176 }
177
178 /*
179  * returns SHIFT_JIS_2004 "ku" code indicated by second byte
180  * *ku = 0: "ku" = even
181  * *ku = 1: "ku" = odd
182  */
183 static int
184 get_ten(int b, int *ku)
185 {
186         int                     ten;
187
188         if (b >= 0x40 && b <= 0x7e)
189         {
190                 ten = b - 0x3f;
191                 *ku = 1;
192         }
193         else if (b >= 0x80 && b <= 0x9e)
194         {
195                 ten = b - 0x40;
196                 *ku = 1;
197         }
198         else if (b >= 0x9f && b <= 0xfc)
199         {
200                 ten = b - 0x9e;
201                 *ku = 0;
202         }
203         else
204         {
205                 ten = -1;                               /* error */
206                 *ku = 0;                                /* keep compiler quiet */
207         }
208         return ten;
209 }
210
211 /*
212  * SHIFT_JIS_2004 ---> EUC_JIS_2004
213  */
214
215 static void
216 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
217 {
218         int                     c1;
219         int                     ku,
220                                 ten,
221                                 kubun;
222         int                     plane;
223         int                     l;
224
225         while (len > 0)
226         {
227                 c1 = *sjis;
228
229                 if (!IS_HIGHBIT_SET(c1))
230                 {
231                         /* ASCII */
232                         if (c1 == 0)
233                                 report_invalid_encoding(PG_SHIFT_JIS_2004,
234                                                                                 (const char *) sjis, len);
235                         *p++ = c1;
236                         sjis++;
237                         len--;
238                         continue;
239                 }
240
241                 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
242
243                 if (l < 0 || l > len)
244                         report_invalid_encoding(PG_SHIFT_JIS_2004,
245                                                                         (const char *) sjis, len);
246
247                 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
248                 {
249                         /* JIS X0201 (1 byte kana) */
250                         *p++ = SS2;
251                         *p++ = c1;
252                 }
253                 else if (l == 2)
254                 {
255                         int                     c2 = sjis[1];
256
257                         plane = 1;
258                         ku = 1;
259                         ten = 1;
260
261                         /*
262                          * JIS X 0213
263                          */
264                         if (c1 >= 0x81 && c1 <= 0x9f)           /* plane 1 1ku-62ku */
265                         {
266                                 ku = (c1 << 1) - 0x100;
267                                 ten = get_ten(c2, &kubun);
268                                 if (ten < 0)
269                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
270                                                                                         (const char *) sjis, len);
271                                 ku -= kubun;
272                         }
273                         else if (c1 >= 0xe0 && c1 <= 0xef)      /* plane 1 62ku-94ku */
274                         {
275                                 ku = (c1 << 1) - 0x180;
276                                 ten = get_ten(c2, &kubun);
277                                 if (ten < 0)
278                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
279
280                                                                                         (const char *) sjis, len);
281                                 ku -= kubun;
282                         }
283                         else if (c1 >= 0xf0 && c1 <= 0xf3)      /* plane 2
284                                                                                                  * 1,3,4,5,8,12,13,14,15 ku */
285                         {
286                                 plane = 2;
287                                 ten = get_ten(c2, &kubun);
288                                 if (ten < 0)
289                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
290                                                                                         (const char *) sjis, len);
291                                 switch (c1)
292                                 {
293                                         case 0xf0:
294                                                 ku = kubun == 0 ? 8 : 1;
295                                                 break;
296                                         case 0xf1:
297                                                 ku = kubun == 0 ? 4 : 3;
298                                                 break;
299                                         case 0xf2:
300                                                 ku = kubun == 0 ? 12 : 5;
301                                                 break;
302                                         default:
303                                                 ku = kubun == 0 ? 14 : 13;
304                                                 break;
305                                 }
306                         }
307                         else if (c1 >= 0xf4 && c1 <= 0xfc)      /* plane 2 78-94ku */
308                         {
309                                 plane = 2;
310                                 ten = get_ten(c2, &kubun);
311                                 if (ten < 0)
312                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
313                                                                                         (const char *) sjis, len);
314                                 if (c1 == 0xf4 && kubun == 1)
315                                         ku = 15;
316                                 else
317                                         ku = (c1 << 1) - 0x19a - kubun;
318                         }
319                         else
320                                 report_invalid_encoding(PG_SHIFT_JIS_2004,
321                                                                                 (const char *) sjis, len);
322
323                         if (plane == 2)
324                                 *p++ = SS3;
325
326                         *p++ = ku + 0xa0;
327                         *p++ = ten + 0xa0;
328                 }
329                 sjis += l;
330                 len -= l;
331         }
332         *p = '\0';
333 }