]> granicus.if.org Git - postgresql/blob
e65d755368fc81388af7fd1ac5dd72bd4e435feb
[postgresql] /
1 /*-------------------------------------------------------------------------
2  *
3  *        EUC_JIS_2004, SHIFT_JIS_2004
4  *
5  * Copyright (c) 2007-2009, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *        $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/euc_jis_2004_and_shift_jis_2004.c,v 1.6 2009/01/29 19:23:39 tgl Exp $
9  *
10  *-------------------------------------------------------------------------
11  */
12
13 #include "postgres.h"
14 #include "fmgr.h"
15 #include "mb/pg_wchar.h"
16
17 PG_MODULE_MAGIC;
18
19 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
20 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
21
22 extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
23 extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
24
25 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
26 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
27
28 /* ----------
29  * conv_proc(
30  *              INTEGER,        -- source encoding id
31  *              INTEGER,        -- destination encoding id
32  *              CSTRING,        -- source string (null terminated C string)
33  *              CSTRING,        -- destination string (null terminated C string)
34  *              INTEGER         -- source string length
35  * ) returns VOID;
36  * ----------
37  */
38
39 Datum
40 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
41 {
42         unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
43         unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
44         int                     len = PG_GETARG_INT32(4);
45
46         CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
47
48         euc_jis_20042shift_jis_2004(src, dest, len);
49
50         PG_RETURN_VOID();
51 }
52
53 Datum
54 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
55 {
56         unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
57         unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
58         int                     len = PG_GETARG_INT32(4);
59
60         CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
61
62         shift_jis_20042euc_jis_2004(src, dest, len);
63
64         PG_RETURN_VOID();
65 }
66
67 /*
68  * EUC_JIS_2004 -> SHIFT_JIS_2004
69  */
70 static void
71 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
72 {
73         int                     c1,
74                                 ku,
75                                 ten;
76         int                     l;
77
78         while (len > 0)
79         {
80                 c1 = *euc;
81                 if (!IS_HIGHBIT_SET(c1))
82                 {
83                         /* ASCII */
84                         if (c1 == 0)
85                                 report_invalid_encoding(PG_EUC_JIS_2004,
86                                                                                 (const char *) euc, len);
87                         *p++ = c1;
88                         euc++;
89                         len--;
90                         continue;
91                 }
92
93                 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
94
95                 if (l < 0)
96                         report_invalid_encoding(PG_EUC_JIS_2004,
97                                                                         (const char *) euc, len);
98
99                 if (c1 == SS2 && l == 2)        /* JIS X 0201 kana? */
100                 {
101                         *p++ = euc[1];
102                 }
103                 else if (c1 == SS3 && l == 3)   /* JIS X 0213 plane 2? */
104                 {
105                         ku = euc[1] - 0xa0;
106                         ten = euc[2] - 0xa0;
107
108                         switch (ku)
109                         {
110                                 case 1:
111                                 case 3:
112                                 case 4:
113                                 case 5:
114                                 case 8:
115                                 case 12:
116                                 case 13:
117                                 case 14:
118                                 case 15:
119                                         *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
120                                         break;
121                                 default:
122                                         if (ku >= 78 && ku <= 94)
123                                         {
124                                                 *p++ = (ku + 0x19b) >> 1;
125                                         }
126                                         else
127                                                 report_invalid_encoding(PG_EUC_JIS_2004,
128                                                                                                 (const char *) euc, len);
129                         }
130
131                         if (ku % 2)
132                         {
133                                 if (ten >= 1 && ten <= 63)
134                                         *p++ = ten + 0x3f;
135                                 else if (ten >= 64 && ten <= 94)
136                                         *p++ = ten + 0x40;
137                                 else
138                                         report_invalid_encoding(PG_EUC_JIS_2004,
139                                                                                         (const char *) euc, len);
140                         }
141                         else
142                                 *p++ = ten + 0x9e;
143                 }
144
145                 else if (l == 2)                /* JIS X 0213 plane 1? */
146                 {
147                         ku = c1 - 0xa0;
148                         ten = euc[1] - 0xa0;
149
150                         if (ku >= 1 && ku <= 62)
151                                 *p++ = (ku + 0x101) >> 1;
152                         else if (ku >= 63 && ku <= 94)
153                                 *p++ = (ku + 0x181) >> 1;
154                         else
155                                 report_invalid_encoding(PG_EUC_JIS_2004,
156                                                                                 (const char *) euc, len);
157
158                         if (ku % 2)
159                         {
160                                 if (ten >= 1 && ten <= 63)
161                                         *p++ = ten + 0x3f;
162                                 else if (ten >= 64 && ten <= 94)
163                                         *p++ = ten + 0x40;
164                                 else
165                                         report_invalid_encoding(PG_EUC_JIS_2004,
166                                                                                         (const char *) euc, len);
167                         }
168                         else
169                                 *p++ = ten + 0x9e;
170                 }
171                 else
172                         report_invalid_encoding(PG_EUC_JIS_2004,
173                                                                         (const char *) euc, len);
174
175                 euc += l;
176                 len -= l;
177         }
178         *p = '\0';
179 }
180
181 /*
182  * returns SHIFT_JIS_2004 "ku" code indicated by second byte
183  * *ku = 0: "ku" = even
184  * *ku = 1: "ku" = odd
185  */
186 static int
187 get_ten(int b, int *ku)
188 {
189         int                     ten;
190
191         if (b >= 0x40 && b <= 0x7e)
192         {
193                 ten = b - 0x3f;
194                 *ku = 1;
195         }
196         else if (b >= 0x80 && b <= 0x9e)
197         {
198                 ten = b - 0x40;
199                 *ku = 1;
200         }
201         else if (b >= 0x9f && b <= 0xfc)
202         {
203                 ten = b - 0x9e;
204                 *ku = 0;
205         }
206         else
207         {
208                 ten = -1;                               /* error */
209                 *ku = 0;                                /* keep compiler quiet */
210         }
211         return ten;
212 }
213
214 /*
215  * SHIFT_JIS_2004 ---> EUC_JIS_2004
216  */
217
218 static void
219 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
220 {
221         int                     c1,
222                                 c2;
223         int                     ku,
224                                 ten,
225                                 kubun;
226         int                     plane;
227         int                     l;
228
229         while (len > 0)
230         {
231                 c1 = *sjis;
232                 c2 = sjis[1];
233
234                 if (!IS_HIGHBIT_SET(c1))
235                 {
236                         /* ASCII */
237                         if (c1 == 0)
238                                 report_invalid_encoding(PG_SHIFT_JIS_2004,
239                                                                                 (const char *) sjis, len);
240                         *p++ = c1;
241                         sjis++;
242                         len--;
243                         continue;
244                 }
245
246                 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
247
248                 if (l < 0)
249                         report_invalid_encoding(PG_SHIFT_JIS_2004,
250                                                                         (const char *) sjis, len);
251
252                 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
253                 {
254                         /* JIS X0201 (1 byte kana) */
255                         *p++ = SS2;
256                         *p++ = c1;
257                 }
258                 else if (l == 2)
259                 {
260                         plane = 1;
261                         ku = 1;
262                         ten = 1;
263
264                         /*
265                          * JIS X 0213
266                          */
267                         if (c1 >= 0x81 && c1 <= 0x9f)           /* plane 1 1ku-62ku */
268                         {
269                                 ku = (c1 << 1) - 0x100;
270                                 ten = get_ten(c2, &kubun);
271                                 if (ten < 0)
272                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
273                                                                                         (const char *) sjis, len);
274                                 ku -= kubun;
275                         }
276                         else if (c1 >= 0xe0 && c1 <= 0xef)      /* plane 1 62ku-94ku */
277                         {
278                                 ku = (c1 << 1) - 0x180;
279                                 ten = get_ten(c2, &kubun);
280                                 if (ten < 0)
281                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
282
283                                                                                         (const char *) sjis, len);
284                                 ku -= kubun;
285                         }
286                         else if (c1 >= 0xf0 && c1 <= 0xf3)      /* plane 2
287                                                                                                  * 1,3,4,5,8,12,13,14,15 ku */
288                         {
289                                 plane = 2;
290                                 ten = get_ten(c2, &kubun);
291                                 if (ten < 0)
292                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
293                                                                                         (const char *) sjis, len);
294                                 switch (c1)
295                                 {
296                                         case 0xf0:
297                                                 ku = kubun == 0 ? 8 : 1;
298                                                 break;
299                                         case 0xf1:
300                                                 ku = kubun == 0 ? 4 : 3;
301                                                 break;
302                                         case 0xf2:
303                                                 ku = kubun == 0 ? 12 : 5;
304                                                 break;
305                                         default:
306                                                 ku = kubun == 0 ? 14 : 13;
307                                                 break;
308                                 }
309                         }
310                         else if (c1 >= 0xf4 && c1 <= 0xfc)      /* plane 2 78-94ku */
311                         {
312                                 plane = 2;
313                                 ten = get_ten(c2, &kubun);
314                                 if (ten < 0)
315                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
316                                                                                         (const char *) sjis, len);
317                                 if (c1 == 0xf4 && kubun == 1)
318                                         ku = 15;
319                                 else
320                                         ku = (c1 << 1) - 0x19a - kubun;
321                         }
322                         else
323                                 report_invalid_encoding(PG_SHIFT_JIS_2004,
324                                                                                 (const char *) sjis, len);
325
326                         if (plane == 2)
327                                 *p++ = SS3;
328
329                         *p++ = ku + 0xa0;
330                         *p++ = ten + 0xa0;
331                 }
332                 sjis += l;
333                 len -= l;
334         }
335         *p = '\0';
336 }