3 # Copyright (c) 2001-2019, PostgreSQL Global Development Group
5 # src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
7 # Generate UTF-8 <--> EUC_JP code conversion tables from
8 # map files provided by Unicode organization.
9 # Unfortunately it is prohibited by the organization
10 # to distribute the map files. So if you try to use this script,
11 # you have to obtain CP932.TXT and JIS0212.TXT from the
12 # organization's ftp site.
17 my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl';
20 my $jis0212 = &read_source("JIS0212.TXT");
24 foreach my $i (@$jis0212)
27 # We have a different mapping for this in the EUC_JP to UTF-8 direction.
28 if ($i->{code} == 0x2243)
30 $i->{direction} = FROM_UNICODE;
33 if ($i->{code} == 0x2271)
35 $i->{direction} = TO_UNICODE;
38 if ($i->{ucs} >= 0x080)
40 $i->{code} = $i->{code} | 0x8f8080;
51 my $ct932 = &read_source("CP932.TXT");
53 foreach my $i (@$ct932)
55 my $sjis = $i->{code};
57 # We have a different mapping for this in the EUC_JP to UTF-8 direction.
67 my $jis = &sjis2jis($sjis);
72 : ($sjis >= 0xeffd ? 0x8f8080 : 0x8080));
74 # Remember the SJIS code for later.
81 foreach my $i (@mapping)
83 my $sjis = $i->{sjis};
85 # These SJIS characters are excluded completely.
86 if ( $sjis >= 0xed00 && $sjis <= 0xeef9
87 || $sjis >= 0xfa54 && $sjis <= 0xfa56
88 || $sjis >= 0xfa58 && $sjis <= 0xfc4b)
90 $i->{direction} = NONE;
94 # These SJIS characters are only in the UTF-8 to EUC_JP table
95 if ($sjis == 0xeefa || $sjis == 0xeefb || $sjis == 0xeefc)
97 $i->{direction} = FROM_UNICODE;
110 || ($sjis >= 0xfa4a && $sjis <= 0xfa53))
112 $i->{direction} = TO_UNICODE;
122 comment => '# CJK(4EFC)'
128 comment => '# CJK(50F4)'
134 comment => '# CJK(51EC)'
140 comment => '# CJK(5307)'
146 comment => '# CJK(5324)'
152 comment => '# CJK(548A)'
158 comment => '# CJK(5759)'
164 comment => '# CJK(589E)'
170 comment => '# CJK(5BEC)'
176 comment => '# CJK(5CF5)'
182 comment => '# CJK(5D53)'
188 comment => '# CJK(5FB7)'
194 comment => '# CJK(6085)'
200 comment => '# CJK(6120)'
206 comment => '# CJK(654E)'
212 comment => '# CJK(663B)'
218 comment => '# CJK(6665)'
224 comment => '# CJK(6801)'
230 comment => '# CJK(6A6B)'
236 comment => '# CJK(6AE2)'
242 comment => '# CJK(6DF2)'
248 comment => '# CJK(6DF8)'
254 comment => '# CJK(7028)'
260 comment => '# CJK(70BB)'
266 comment => '# CJK(7501)'
272 comment => '# CJK(7682)'
278 comment => '# CJK(769E)'
284 comment => '# CJK(7930)'
290 comment => '# CJK(7AE7)'
296 comment => '# CJK(7DA0)'
302 comment => '# CJK(7DD6)'
308 comment => '# CJK(8362)'
314 comment => '# CJK(85B0)'
320 comment => '# CJK(8807)'
326 comment => '# CJK(8B7F)'
332 comment => '# CJK(8CF4)'
338 comment => '# CJK(8D76)'
344 comment => '# CJK(90DE)'
350 comment => '# CJK(9115)'
356 comment => '# CJK(9592)'
362 comment => '# CJK(973B)'
368 comment => '# CJK(974D)'
374 comment => '# CJK(9751)'
380 comment => '# CJK(999E)'
386 comment => '# CJK(9AD9)'
392 comment => '# CJK(9B72)'
398 comment => '# CJK(9ED1)'
404 comment => '# CJK COMPATIBILITY IDEOGRAPH-F929'
410 comment => '# CJK COMPATIBILITY IDEOGRAPH-F9DC'
416 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0E'
422 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0F'
428 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA10'
434 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA11'
440 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA12'
446 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA13'
452 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA14'
458 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA15'
464 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA16'
470 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA17'
476 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA18'
482 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA19'
488 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1A'
494 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1B'
500 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1C'
506 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1D'
512 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1E'
518 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1F'
524 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA20'
530 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA21'
536 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA22'
542 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA23'
548 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA24'
554 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA25'
560 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA26'
566 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA27'
572 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA28'
578 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA29'
584 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2A'
590 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2B'
596 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2C'
602 comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2D'
608 comment => '# FULLWIDTH APOSTROPHE'
614 comment => '# FULLWIDTH BROKEN BAR'
617 # additional conversions for EUC_JP -> UTF-8 conversion
619 direction => TO_UNICODE,
622 comment => '# NUMERO SIGN'
625 direction => TO_UNICODE,
628 comment => '# TELEPHONE SIGN'
631 direction => TO_UNICODE,
634 comment => '# PARENTHESIZED IDEOGRAPH STOCK'
637 print_conversion_tables($this_script, "EUC_JP", \@mapping);
640 #######################################################################
641 # sjis2jis ; SJIS => JIS conversion
646 return $sjis if ($sjis <= 0x100);
649 my $lo = $sjis & 0xff;
651 if ($lo >= 0x80) { $lo--; }
653 if ($hi >= 0xe0) { $hi -= 0x40; }
655 my $pos = $lo + $hi * 0xbc;
657 if ($pos >= 114 * 0x5e && $pos <= 115 * 0x5e + 0x1b)
660 # This region (115-ku) is out of range of JIS code but for
661 # convenient to generate code in EUC CODESET 3, move this to
662 # seemingly duplicate region (83-84-ku).
663 $pos = $pos - ((31 * 0x5e) + 12);
665 # after 85-ku 82-ten needs to be moved 2 codepoints
666 $pos = $pos - 2 if ($pos >= 84 * 0x5c + 82);
669 my $hi2 = $pos / 0x5e;
670 my $lo2 = ($pos % 0x5e);
672 my $ret = $lo2 + 0x21 + (($hi2 + 0x21) << 8);