]> granicus.if.org Git - postgresql/blob
a6d8490bdea173b71ee5e9e1d6274d68245f8a00
[postgresql] /
1 /*-------------------------------------------------------------------------
2  *
3  *        EUC_JIS_2004, SHIFT_JIS_2004
4  *
5  * Copyright (c) 2007-2008, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *        $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/euc_jis_2004_and_shift_jis_2004.c,v 1.3.2.1 2009/01/29 19:23:56 tgl Exp $
9  *
10  *-------------------------------------------------------------------------
11  */
12
13 #include "postgres.h"
14 #include "fmgr.h"
15 #include "mb/pg_wchar.h"
16
17 PG_MODULE_MAGIC;
18
19 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
20 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
21
22 extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
23 extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
24
25 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
26 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
27
28 /* ----------
29  * conv_proc(
30  *              INTEGER,        -- source encoding id
31  *              INTEGER,        -- destination encoding id
32  *              CSTRING,        -- source string (null terminated C string)
33  *              CSTRING,        -- destination string (null terminated C string)
34  *              INTEGER         -- source string length
35  * ) returns VOID;
36  * ----------
37  */
38
39 Datum
40 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
41 {
42         unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
43         unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
44         int                     len = PG_GETARG_INT32(4);
45
46         CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
47
48         euc_jis_20042shift_jis_2004(src, dest, len);
49
50         PG_RETURN_VOID();
51 }
52
53 Datum
54 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
55 {
56         unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
57         unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
58         int                     len = PG_GETARG_INT32(4);
59
60         CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
61
62         shift_jis_20042euc_jis_2004(src, dest, len);
63
64         PG_RETURN_VOID();
65 }
66
67 /*
68  * EUC_JIS_2004 -> SHIFT_JIS_2004
69  */
70 static void
71 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
72 {
73         int                     c1,
74                                 ku,
75                                 ten;
76         int                     l;
77
78         while (len > 0)
79         {
80                 c1 = *euc;
81                 if (!IS_HIGHBIT_SET(c1))
82                 {
83                         /* ASCII */
84                         if (c1 == 0)
85                                 report_invalid_encoding(PG_EUC_JIS_2004,
86                                                                                 (const char *) euc, len);
87                         *p++ = c1;
88                         euc++;
89                         len--;
90                         continue;
91                 }
92
93                 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
94
95                 if (l < 0)
96                         report_invalid_encoding(PG_EUC_JIS_2004,
97                                                                         (const char *) euc, len);
98
99                 if (c1 == SS2 && l == 2)        /* JIS X 0201 kana? */
100                 {
101                         *p++ = euc[1];
102                 }
103                 else if (c1 == SS3 && l == 3)   /* JIS X 0213 plane 2? */
104                 {
105                         ku = euc[1] - 0xa0;
106                         ten = euc[2] - 0xa0;
107
108                         switch (ku)
109                         {
110                                 case 1:
111                                 case 3:
112                                 case 4:
113                                 case 5:
114                                 case 8:
115                                 case 12:
116                                 case 13:
117                                 case 14:
118                                 case 15:
119                                         *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
120                                         break;
121                                 default:
122                                         if (ku >= 78 && ku <= 94)
123                                         {
124                                                 *p++ = (ku + 0x19b) >> 1;
125                                         }
126                                         else
127                                                 report_invalid_encoding(PG_EUC_JIS_2004,
128                                                                                                 (const char *) euc, len);
129                         }
130
131                         if (ku % 2)
132                         {
133                                 if (ten >= 1 && ten <= 63)
134                                         *p++ = ten + 0x3f;
135                                 else if (ten >= 64 && ten <= 94)
136                                         *p++ = ten + 0x40;
137                                 else
138                                         report_invalid_encoding(PG_EUC_JIS_2004,
139                                                                                         (const char *) euc, len);
140                         }
141                         else
142                                 *p++ = ten + 0x9e;
143                 }
144
145                 else if (l == 2)                /* JIS X 0213 plane 1? */
146                 {
147                         ku = c1 - 0xa0;
148                         ten = euc[1] - 0xa0;
149
150                         if (ku >= 1 && ku <= 62)
151                                 *p++ = (ku + 0x101) >> 1;
152                         else if (ku >= 63 && ku <= 94)
153                                 *p++ = (ku + 0x181) >> 1;
154                         else
155                                 report_invalid_encoding(PG_EUC_JIS_2004,
156                                                                                 (const char *) euc, len);
157
158                         if (ku % 2)
159                         {
160                                 if (ten >= 1 && ten <= 63)
161                                         *p++ = ten + 0x3f;
162                                 else if (ten >= 64 && ten <= 94)
163                                         *p++ = ten + 0x40;
164                                 else
165                                         report_invalid_encoding(PG_EUC_JIS_2004,
166                                                                                         (const char *) euc, len);
167                         }
168                         else
169                                 *p++ = ten + 0x9e;
170                 }
171                 else
172                         report_invalid_encoding(PG_EUC_JIS_2004,
173                                                                         (const char *) euc, len);
174
175                 euc += l;
176                 len -= l;
177         }
178         *p = '\0';
179 }
180
181 /*
182  * returns SHIFT_JIS_2004 "ku" code indicated by second byte
183  * *ku = 0: "ku" = even
184  * *ku = 1: "ku" = odd
185  */
186 static int
187 get_ten(int b, int *ku)
188 {
189         int                     ten;
190
191         if (b >= 0x40 && b <= 0x7e)
192         {
193                 ten = b - 0x3f;
194                 *ku = 1;
195         }
196         else if (b >= 0x80 && b <= 0x9e)
197         {
198                 ten = b - 0x40;
199                 *ku = 1;
200         }
201         else if (b >= 0x9f && b <= 0xfc)
202         {
203                 ten = b - 0x9e;
204                 *ku = 0;
205         }
206         else
207         {
208                 ten = -1;                               /* error */
209         }
210         return ten;
211 }
212
213 /*
214  * SHIFT_JIS_2004 ---> EUC_JIS_2004
215  */
216
217 static void
218 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
219 {
220         int                     c1,
221                                 c2;
222         int                     ku,
223                                 ten,
224                                 kubun;
225         int                     plane;
226         int                     l;
227
228         while (len > 0)
229         {
230                 c1 = *sjis;
231                 c2 = sjis[1];
232
233                 if (!IS_HIGHBIT_SET(c1))
234                 {
235                         /* ASCII */
236                         if (c1 == 0)
237                                 report_invalid_encoding(PG_SHIFT_JIS_2004,
238                                                                                 (const char *) sjis, len);
239                         *p++ = c1;
240                         sjis++;
241                         len--;
242                         continue;
243                 }
244
245                 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
246
247                 if (l < 0)
248                         report_invalid_encoding(PG_SHIFT_JIS_2004,
249                                                                         (const char *) sjis, len);
250
251                 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
252                 {
253                         /* JIS X0201 (1 byte kana) */
254                         *p++ = SS2;
255                         *p++ = c1;
256                 }
257                 else if (l == 2)
258                 {
259                         plane = 1;
260                         ku = 1;
261                         ten = 1;
262
263                         /*
264                          * JIS X 0213
265                          */
266                         if (c1 >= 0x81 && c1 <= 0x9f)           /* plane 1 1ku-62ku */
267                         {
268                                 ku = (c1 << 1) - 0x100;
269                                 ten = get_ten(c2, &kubun);
270                                 if (ten < 0)
271                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
272                                                                                         (const char *) sjis, len);
273                                 ku -= kubun;
274                         }
275                         else if (c1 >= 0xe0 && c1 <= 0xef)      /* plane 1 62ku-94ku */
276                         {
277                                 ku = (c1 << 1) - 0x180;
278                                 ten = get_ten(c2, &kubun);
279                                 if (ten < 0)
280                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
281
282                                                                                         (const char *) sjis, len);
283                                 ku -= kubun;
284                         }
285                         else if (c1 >= 0xf0 && c1 <= 0xf3)      /* plane 2
286                                                                                                  * 1,3,4,5,8,12,13,14,15 ku */
287                         {
288                                 plane = 2;
289                                 ten = get_ten(c2, &kubun);
290                                 if (ten < 0)
291                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
292                                                                                         (const char *) sjis, len);
293                                 switch (c1)
294                                 {
295                                         case 0xf0:
296                                                 ku = kubun == 0 ? 8 : 1;
297                                                 break;
298                                         case 0xf1:
299                                                 ku = kubun == 0 ? 4 : 3;
300                                                 break;
301                                         case 0xf2:
302                                                 ku = kubun == 0 ? 12 : 5;
303                                                 break;
304                                         default:
305                                                 ku = kubun == 0 ? 14 : 13;
306                                                 break;
307                                 }
308                         }
309                         else if (c1 >= 0xf4 && c1 <= 0xfc)      /* plane 2 78-94ku */
310                         {
311                                 plane = 2;
312                                 ten = get_ten(c2, &kubun);
313                                 if (ten < 0)
314                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
315                                                                                         (const char *) sjis, len);
316                                 if (c1 == 0xf4 && kubun == 1)
317                                         ku = 15;
318                                 else
319                                         ku = (c1 << 1) - 0x19a - kubun;
320                         }
321                         else
322                                 report_invalid_encoding(PG_SHIFT_JIS_2004,
323                                                                                 (const char *) sjis, len);
324
325                         if (plane == 2)
326                                 *p++ = SS3;
327
328                         *p++ = ku + 0xa0;
329                         *p++ = ten + 0xa0;
330                 }
331                 sjis += l;
332                 len -= l;
333         }
334         *p = '\0';
335 }