]> granicus.if.org Git - postgresql/blob - src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
Fix initialization of fake LSN for unlogged relations
[postgresql] / src / backend / utils / mb / Unicode / UCS_to_EUC_JP.pl
1 #! /usr/bin/perl
2 #
3 # Copyright (c) 2001-2019, PostgreSQL Global Development Group
4 #
5 # src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
6 #
7 # Generate UTF-8 <--> EUC_JP code conversion tables from
8 # map files provided by Unicode organization.
9 # Unfortunately it is prohibited by the organization
10 # to distribute the map files. So if you try to use this script,
11 # you have to obtain CP932.TXT and JIS0212.TXT from the
12 # organization's ftp site.
13
14 use strict;
15 use convutils;
16
17 my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl';
18
19 # Load JIS0212.TXT
20 my $jis0212 = &read_source("JIS0212.TXT");
21
22 my @mapping;
23
24 foreach my $i (@$jis0212)
25 {
26
27         # We have a different mapping for this in the EUC_JP to UTF-8 direction.
28         if ($i->{code} == 0x2243)
29         {
30                 $i->{direction} = FROM_UNICODE;
31         }
32
33         if ($i->{code} == 0x2271)
34         {
35                 $i->{direction} = TO_UNICODE;
36         }
37
38         if ($i->{ucs} >= 0x080)
39         {
40                 $i->{code} = $i->{code} | 0x8f8080;
41         }
42         else
43         {
44                 next;
45         }
46
47         push @mapping, $i;
48 }
49
50 # Load CP932.TXT.
51 my $ct932 = &read_source("CP932.TXT");
52
53 foreach my $i (@$ct932)
54 {
55         my $sjis = $i->{code};
56
57         # We have a different mapping for this in the EUC_JP to UTF-8 direction.
58         if (   $sjis == 0xeefa
59                 || $sjis == 0xeefb
60                 || $sjis == 0xeefc)
61         {
62                 next;
63         }
64
65         if ($sjis >= 0xa1)
66         {
67                 my $jis = &sjis2jis($sjis);
68
69                 $i->{code} = $jis | (
70                         $jis < 0x100
71                         ? 0x8e00
72                         : ($sjis >= 0xeffd ? 0x8f8080 : 0x8080));
73
74                 # Remember the SJIS code for later.
75                 $i->{sjis} = $sjis;
76
77                 push @mapping, $i;
78         }
79 }
80
81 foreach my $i (@mapping)
82 {
83         my $sjis = $i->{sjis};
84
85         # These SJIS characters are excluded completely.
86         if (   $sjis >= 0xed00 && $sjis <= 0xeef9
87                 || $sjis >= 0xfa54 && $sjis <= 0xfa56
88                 || $sjis >= 0xfa58 && $sjis <= 0xfc4b)
89         {
90                 $i->{direction} = NONE;
91                 next;
92         }
93
94         # These SJIS characters are only in the UTF-8 to EUC_JP table
95         if ($sjis == 0xeefa || $sjis == 0xeefb || $sjis == 0xeefc)
96         {
97                 $i->{direction} = FROM_UNICODE;
98                 next;
99         }
100
101         if (   $sjis == 0x8790
102                 || $sjis == 0x8791
103                 || $sjis == 0x8792
104                 || $sjis == 0x8795
105                 || $sjis == 0x8796
106                 || $sjis == 0x8797
107                 || $sjis == 0x879a
108                 || $sjis == 0x879b
109                 || $sjis == 0x879c
110                 || ($sjis >= 0xfa4a && $sjis <= 0xfa53))
111         {
112                 $i->{direction} = TO_UNICODE;
113                 next;
114         }
115 }
116
117 push @mapping, (
118         {
119                 direction => BOTH,
120                 ucs       => 0x4efc,
121                 code      => 0x8ff4af,
122                 comment   => '# CJK(4EFC)'
123         },
124         {
125                 direction => BOTH,
126                 ucs       => 0x50f4,
127                 code      => 0x8ff4b0,
128                 comment   => '# CJK(50F4)'
129         },
130         {
131                 direction => BOTH,
132                 ucs       => 0x51EC,
133                 code      => 0x8ff4b1,
134                 comment   => '# CJK(51EC)'
135         },
136         {
137                 direction => BOTH,
138                 ucs       => 0x5307,
139                 code      => 0x8ff4b2,
140                 comment   => '# CJK(5307)'
141         },
142         {
143                 direction => BOTH,
144                 ucs       => 0x5324,
145                 code      => 0x8ff4b3,
146                 comment   => '# CJK(5324)'
147         },
148         {
149                 direction => BOTH,
150                 ucs       => 0x548A,
151                 code      => 0x8ff4b5,
152                 comment   => '# CJK(548A)'
153         },
154         {
155                 direction => BOTH,
156                 ucs       => 0x5759,
157                 code      => 0x8ff4b6,
158                 comment   => '# CJK(5759)'
159         },
160         {
161                 direction => BOTH,
162                 ucs       => 0x589E,
163                 code      => 0x8ff4b9,
164                 comment   => '# CJK(589E)'
165         },
166         {
167                 direction => BOTH,
168                 ucs       => 0x5BEC,
169                 code      => 0x8ff4ba,
170                 comment   => '# CJK(5BEC)'
171         },
172         {
173                 direction => BOTH,
174                 ucs       => 0x5CF5,
175                 code      => 0x8ff4bb,
176                 comment   => '# CJK(5CF5)'
177         },
178         {
179                 direction => BOTH,
180                 ucs       => 0x5D53,
181                 code      => 0x8ff4bc,
182                 comment   => '# CJK(5D53)'
183         },
184         {
185                 direction => BOTH,
186                 ucs       => 0x5FB7,
187                 code      => 0x8ff4be,
188                 comment   => '# CJK(5FB7)'
189         },
190         {
191                 direction => BOTH,
192                 ucs       => 0x6085,
193                 code      => 0x8ff4bf,
194                 comment   => '# CJK(6085)'
195         },
196         {
197                 direction => BOTH,
198                 ucs       => 0x6120,
199                 code      => 0x8ff4c0,
200                 comment   => '# CJK(6120)'
201         },
202         {
203                 direction => BOTH,
204                 ucs       => 0x654E,
205                 code      => 0x8ff4c1,
206                 comment   => '# CJK(654E)'
207         },
208         {
209                 direction => BOTH,
210                 ucs       => 0x663B,
211                 code      => 0x8ff4c2,
212                 comment   => '# CJK(663B)'
213         },
214         {
215                 direction => BOTH,
216                 ucs       => 0x6665,
217                 code      => 0x8ff4c3,
218                 comment   => '# CJK(6665)'
219         },
220         {
221                 direction => BOTH,
222                 ucs       => 0x6801,
223                 code      => 0x8ff4c6,
224                 comment   => '# CJK(6801)'
225         },
226         {
227                 direction => BOTH,
228                 ucs       => 0x6A6B,
229                 code      => 0x8ff4c9,
230                 comment   => '# CJK(6A6B)'
231         },
232         {
233                 direction => BOTH,
234                 ucs       => 0x6AE2,
235                 code      => 0x8ff4ca,
236                 comment   => '# CJK(6AE2)'
237         },
238         {
239                 direction => BOTH,
240                 ucs       => 0x6DF2,
241                 code      => 0x8ff4cc,
242                 comment   => '# CJK(6DF2)'
243         },
244         {
245                 direction => BOTH,
246                 ucs       => 0x6DF8,
247                 code      => 0x8ff4cb,
248                 comment   => '# CJK(6DF8)'
249         },
250         {
251                 direction => BOTH,
252                 ucs       => 0x7028,
253                 code      => 0x8ff4cd,
254                 comment   => '# CJK(7028)'
255         },
256         {
257                 direction => BOTH,
258                 ucs       => 0x70BB,
259                 code      => 0x8ff4ae,
260                 comment   => '# CJK(70BB)'
261         },
262         {
263                 direction => BOTH,
264                 ucs       => 0x7501,
265                 code      => 0x8ff4d0,
266                 comment   => '# CJK(7501)'
267         },
268         {
269                 direction => BOTH,
270                 ucs       => 0x7682,
271                 code      => 0x8ff4d1,
272                 comment   => '# CJK(7682)'
273         },
274         {
275                 direction => BOTH,
276                 ucs       => 0x769E,
277                 code      => 0x8ff4d2,
278                 comment   => '# CJK(769E)'
279         },
280         {
281                 direction => BOTH,
282                 ucs       => 0x7930,
283                 code      => 0x8ff4d4,
284                 comment   => '# CJK(7930)'
285         },
286         {
287                 direction => BOTH,
288                 ucs       => 0x7AE7,
289                 code      => 0x8ff4d9,
290                 comment   => '# CJK(7AE7)'
291         },
292         {
293                 direction => BOTH,
294                 ucs       => 0x7DA0,
295                 code      => 0x8ff4dc,
296                 comment   => '# CJK(7DA0)'
297         },
298         {
299                 direction => BOTH,
300                 ucs       => 0x7DD6,
301                 code      => 0x8ff4dd,
302                 comment   => '# CJK(7DD6)'
303         },
304         {
305                 direction => BOTH,
306                 ucs       => 0x8362,
307                 code      => 0x8ff4df,
308                 comment   => '# CJK(8362)'
309         },
310         {
311                 direction => BOTH,
312                 ucs       => 0x85B0,
313                 code      => 0x8ff4e1,
314                 comment   => '# CJK(85B0)'
315         },
316         {
317                 direction => BOTH,
318                 ucs       => 0x8807,
319                 code      => 0x8ff4e4,
320                 comment   => '# CJK(8807)'
321         },
322         {
323                 direction => BOTH,
324                 ucs       => 0x8B7F,
325                 code      => 0x8ff4e6,
326                 comment   => '# CJK(8B7F)'
327         },
328         {
329                 direction => BOTH,
330                 ucs       => 0x8CF4,
331                 code      => 0x8ff4e7,
332                 comment   => '# CJK(8CF4)'
333         },
334         {
335                 direction => BOTH,
336                 ucs       => 0x8D76,
337                 code      => 0x8ff4e8,
338                 comment   => '# CJK(8D76)'
339         },
340         {
341                 direction => BOTH,
342                 ucs       => 0x90DE,
343                 code      => 0x8ff4ec,
344                 comment   => '# CJK(90DE)'
345         },
346         {
347                 direction => BOTH,
348                 ucs       => 0x9115,
349                 code      => 0x8ff4ee,
350                 comment   => '# CJK(9115)'
351         },
352         {
353                 direction => BOTH,
354                 ucs       => 0x9592,
355                 code      => 0x8ff4f1,
356                 comment   => '# CJK(9592)'
357         },
358         {
359                 direction => BOTH,
360                 ucs       => 0x973B,
361                 code      => 0x8ff4f4,
362                 comment   => '# CJK(973B)'
363         },
364         {
365                 direction => BOTH,
366                 ucs       => 0x974D,
367                 code      => 0x8ff4f5,
368                 comment   => '# CJK(974D)'
369         },
370         {
371                 direction => BOTH,
372                 ucs       => 0x9751,
373                 code      => 0x8ff4f6,
374                 comment   => '# CJK(9751)'
375         },
376         {
377                 direction => BOTH,
378                 ucs       => 0x999E,
379                 code      => 0x8ff4fa,
380                 comment   => '# CJK(999E)'
381         },
382         {
383                 direction => BOTH,
384                 ucs       => 0x9AD9,
385                 code      => 0x8ff4fb,
386                 comment   => '# CJK(9AD9)'
387         },
388         {
389                 direction => BOTH,
390                 ucs       => 0x9B72,
391                 code      => 0x8ff4fc,
392                 comment   => '# CJK(9B72)'
393         },
394         {
395                 direction => BOTH,
396                 ucs       => 0x9ED1,
397                 code      => 0x8ff4fe,
398                 comment   => '# CJK(9ED1)'
399         },
400         {
401                 direction => BOTH,
402                 ucs       => 0xF929,
403                 code      => 0x8ff4c5,
404                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-F929'
405         },
406         {
407                 direction => BOTH,
408                 ucs       => 0xF9DC,
409                 code      => 0x8ff4f2,
410                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-F9DC'
411         },
412         {
413                 direction => BOTH,
414                 ucs       => 0xFA0E,
415                 code      => 0x8ff4b4,
416                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA0E'
417         },
418         {
419                 direction => BOTH,
420                 ucs       => 0xFA0F,
421                 code      => 0x8ff4b7,
422                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA0F'
423         },
424         {
425                 direction => BOTH,
426                 ucs       => 0xFA10,
427                 code      => 0x8ff4b8,
428                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA10'
429         },
430         {
431                 direction => BOTH,
432                 ucs       => 0xFA11,
433                 code      => 0x8ff4bd,
434                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA11'
435         },
436         {
437                 direction => BOTH,
438                 ucs       => 0xFA12,
439                 code      => 0x8ff4c4,
440                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA12'
441         },
442         {
443                 direction => BOTH,
444                 ucs       => 0xFA13,
445                 code      => 0x8ff4c7,
446                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA13'
447         },
448         {
449                 direction => BOTH,
450                 ucs       => 0xFA14,
451                 code      => 0x8ff4c8,
452                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA14'
453         },
454         {
455                 direction => BOTH,
456                 ucs       => 0xFA15,
457                 code      => 0x8ff4ce,
458                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA15'
459         },
460         {
461                 direction => BOTH,
462                 ucs       => 0xFA16,
463                 code      => 0x8ff4cf,
464                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA16'
465         },
466         {
467                 direction => BOTH,
468                 ucs       => 0xFA17,
469                 code      => 0x8ff4d3,
470                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA17'
471         },
472         {
473                 direction => BOTH,
474                 ucs       => 0xFA18,
475                 code      => 0x8ff4d5,
476                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA18'
477         },
478         {
479                 direction => BOTH,
480                 ucs       => 0xFA19,
481                 code      => 0x8ff4d6,
482                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA19'
483         },
484         {
485                 direction => BOTH,
486                 ucs       => 0xFA1A,
487                 code      => 0x8ff4d7,
488                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1A'
489         },
490         {
491                 direction => BOTH,
492                 ucs       => 0xFA1B,
493                 code      => 0x8ff4d8,
494                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1B'
495         },
496         {
497                 direction => BOTH,
498                 ucs       => 0xFA1C,
499                 code      => 0x8ff4da,
500                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1C'
501         },
502         {
503                 direction => BOTH,
504                 ucs       => 0xFA1D,
505                 code      => 0x8ff4db,
506                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1D'
507         },
508         {
509                 direction => BOTH,
510                 ucs       => 0xFA1E,
511                 code      => 0x8ff4de,
512                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1E'
513         },
514         {
515                 direction => BOTH,
516                 ucs       => 0xFA1F,
517                 code      => 0x8ff4e0,
518                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1F'
519         },
520         {
521                 direction => BOTH,
522                 ucs       => 0xFA20,
523                 code      => 0x8ff4e2,
524                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA20'
525         },
526         {
527                 direction => BOTH,
528                 ucs       => 0xFA21,
529                 code      => 0x8ff4e3,
530                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA21'
531         },
532         {
533                 direction => BOTH,
534                 ucs       => 0xFA22,
535                 code      => 0x8ff4e5,
536                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA22'
537         },
538         {
539                 direction => BOTH,
540                 ucs       => 0xFA23,
541                 code      => 0x8ff4e9,
542                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA23'
543         },
544         {
545                 direction => BOTH,
546                 ucs       => 0xFA24,
547                 code      => 0x8ff4ea,
548                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA24'
549         },
550         {
551                 direction => BOTH,
552                 ucs       => 0xFA25,
553                 code      => 0x8ff4eb,
554                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA25'
555         },
556         {
557                 direction => BOTH,
558                 ucs       => 0xFA26,
559                 code      => 0x8ff4ed,
560                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA26'
561         },
562         {
563                 direction => BOTH,
564                 ucs       => 0xFA27,
565                 code      => 0x8ff4ef,
566                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA27'
567         },
568         {
569                 direction => BOTH,
570                 ucs       => 0xFA28,
571                 code      => 0x8ff4f0,
572                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA28'
573         },
574         {
575                 direction => BOTH,
576                 ucs       => 0xFA29,
577                 code      => 0x8ff4f3,
578                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA29'
579         },
580         {
581                 direction => BOTH,
582                 ucs       => 0xFA2A,
583                 code      => 0x8ff4f7,
584                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA2A'
585         },
586         {
587                 direction => BOTH,
588                 ucs       => 0xFA2B,
589                 code      => 0x8ff4f8,
590                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA2B'
591         },
592         {
593                 direction => BOTH,
594                 ucs       => 0xFA2C,
595                 code      => 0x8ff4f9,
596                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA2C'
597         },
598         {
599                 direction => BOTH,
600                 ucs       => 0xFA2D,
601                 code      => 0x8ff4fd,
602                 comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA2D'
603         },
604         {
605                 direction => BOTH,
606                 ucs       => 0xFF07,
607                 code      => 0x8ff4a9,
608                 comment   => '# FULLWIDTH APOSTROPHE'
609         },
610         {
611                 direction => BOTH,
612                 ucs       => 0xFFE4,
613                 code      => 0x8fa2c3,
614                 comment   => '# FULLWIDTH BROKEN BAR'
615         },
616
617         # additional conversions for EUC_JP -> UTF-8 conversion
618         {
619                 direction => TO_UNICODE,
620                 ucs       => 0x2116,
621                 code      => 0x8ff4ac,
622                 comment   => '# NUMERO SIGN'
623         },
624         {
625                 direction => TO_UNICODE,
626                 ucs       => 0x2121,
627                 code      => 0x8ff4ad,
628                 comment   => '# TELEPHONE SIGN'
629         },
630         {
631                 direction => TO_UNICODE,
632                 ucs       => 0x3231,
633                 code      => 0x8ff4ab,
634                 comment   => '# PARENTHESIZED IDEOGRAPH STOCK'
635         });
636
637 print_conversion_tables($this_script, "EUC_JP", \@mapping);
638
639
640 #######################################################################
641 # sjis2jis ; SJIS => JIS conversion
642 sub sjis2jis
643 {
644         my ($sjis) = @_;
645
646         return $sjis if ($sjis <= 0x100);
647
648         my $hi = $sjis >> 8;
649         my $lo = $sjis & 0xff;
650
651         if ($lo >= 0x80) { $lo--; }
652         $lo -= 0x40;
653         if ($hi >= 0xe0) { $hi -= 0x40; }
654         $hi -= 0x81;
655         my $pos = $lo + $hi * 0xbc;
656
657         if ($pos >= 114 * 0x5e && $pos <= 115 * 0x5e + 0x1b)
658         {
659
660                 # This region (115-ku) is out of range of JIS code but for
661                 # convenient to generate code in EUC CODESET 3, move this to
662                 # seemingly duplicate region (83-84-ku).
663                 $pos = $pos - ((31 * 0x5e) + 12);
664
665                 # after 85-ku 82-ten needs to be moved 2 codepoints
666                 $pos = $pos - 2 if ($pos >= 84 * 0x5c + 82);
667         }
668
669         my $hi2 = $pos / 0x5e;
670         my $lo2 = ($pos % 0x5e);
671
672         my $ret = $lo2 + 0x21 + (($hi2 + 0x21) << 8);
673
674         return $ret;
675 }