3 # Copyright (c) 2001-2010, PostgreSQL Global Development Group
5 # src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
7 # Generate UTF-8 <--> EUC_JP code conversion tables from
8 # map files provided by Unicode organization.
9 # Unfortunately it is prohibited by the organization
10 # to distribute the map files. So if you try to use this script,
11 # you have to obtain JIS0201.TXT, JIS0208.TXT, JIS0212.TXT from
12 # the organization's ftp site.
17 # # and Unicode name (not used in this script)
20 # JIS0208 shift-JIS code in hex
23 # # and Unicode name (not used in this script)
28 # # and Unicode name (not used in this script)
32 # first generate UTF-8 --> EUC_JP table
37 $in_file = "JIS0201.TXT";
39 open( FILE, $in_file ) || die( "cannot open $in_file" );
48 ( $c, $u, $rest ) = split;
51 if( $code >= 0x80 && $ucs >= 0x0080 ){
52 $utf = &ucs2utf($ucs);
53 if( $array{ $utf } ne "" ){
54 printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
60 $array{ $utf } = ($code | 0x8e00);
68 $in_file = "JIS0208.TXT";
70 open( FILE, $in_file ) || die( "cannot open $in_file" );
77 ( $s, $c, $u, $rest ) = split;
80 if( $code >= 0x80 && $ucs >= 0x0080 ){
81 $utf = &ucs2utf($ucs);
82 if( $array{ $utf } ne "" ){
83 printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
88 $array{ $utf } = ($code | 0x8080);
96 $in_file = "JIS0212.TXT";
98 open( FILE, $in_file ) || die( "cannot open $in_file" );
105 ( $c, $u, $rest ) = split;
108 if( $code >= 0x80 && $ucs >= 0x0080 ){
109 $utf = &ucs2utf($ucs);
110 if( $array{ $utf } ne "" ){
111 printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
116 $array{ $utf } = ($code | 0x8f8080);
122 # first, generate UTF8 --> EUC_JP table
125 $file = "utf8_to_euc_jp.map";
126 open( FILE, "> $file" ) || die( "cannot open $file" );
127 print FILE "static pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
129 for $index ( sort {$a <=> $b} keys( %array ) ){
130 $code = $array{ $index };
133 printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
135 printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
143 # then generate EUC_JP --> UTF8 table
149 $in_file = "JIS0201.TXT";
151 open( FILE, $in_file ) || die( "cannot open $in_file" );
160 ( $c, $u, $rest ) = split;
163 if( $code >= 0x80 && $ucs >= 0x0080 ){
164 $utf = &ucs2utf($ucs);
165 if( $array{ $code } ne "" ){
166 printf STDERR "Warning: duplicate code: %04x\n",$ucs;
173 $array{ $code } = $utf;
181 $in_file = "JIS0208.TXT";
183 open( FILE, $in_file ) || die( "cannot open $in_file" );
190 ( $s, $c, $u, $rest ) = split;
193 if( $code >= 0x80 && $ucs >= 0x0080 ){
194 $utf = &ucs2utf($ucs);
195 if( $array{ $code } ne "" ){
196 printf STDERR "Warning: duplicate code: %04x\n",$ucs;
202 $array{ $code } = $utf;
210 $in_file = "JIS0212.TXT";
212 open( FILE, $in_file ) || die( "cannot open $in_file" );
219 ( $c, $u, $rest ) = split;
222 if( $code >= 0x80 && $ucs >= 0x0080 ){
223 $utf = &ucs2utf($ucs);
224 if( $array{ $code } ne "" ){
225 printf STDERR "Warning: duplicate code: %04x\n",$ucs;
231 $array{ $code } = $utf;
236 $file = "euc_jp_to_utf8.map";
237 open( FILE, "> $file" ) || die( "cannot open $file" );
238 print FILE "static pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
239 for $index ( sort {$a <=> $b} keys( %array ) ){
240 $utf = $array{ $index };
243 printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
245 printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;