3 # Copyright (c) 2007-2015, PostgreSQL Global Development Group
5 # src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
7 # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
8 # "sjis-0213-2004-std.txt" (http://x0213.org)
12 # first generate UTF-8 --> SHIFT_JIS_2004 table
14 $in_file = "sjis-0213-2004-std.txt";
16 open(FILE, $in_file) || die("cannot open $in_file");
23 while ($line = <FILE>)
25 if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
30 $rest = "U+" . $u1 . "+" . $u2 . $4;
33 $utf1 = &ucs2utf($ucs);
35 $utf2 = &ucs2utf($ucs);
36 $str = sprintf "%08x%08x", $utf1, $utf2;
37 $array1{$str} = $code;
38 $comment1{$str} = $rest;
42 elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
46 $rest = "U+" . $u . $3;
55 $utf = &ucs2utf($ucs);
56 if ($array{$utf} ne "")
59 "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n", $utf,
66 $comment{$code} = $rest;
70 $file = "utf8_to_shift_jis_2004.map";
71 open(FILE, "> $file") || die("cannot open $file");
73 print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
75 print FILE "static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
77 for $index (sort { $a <=> $b } keys(%array))
79 $code = $array{$index};
83 printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code,
88 printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code,
96 $file = "utf8_to_shift_jis_2004_combined.map";
97 open(FILE, "> $file") || die("cannot open $file");
99 print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
102 "static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
104 for $index (sort { $a cmp $b } keys(%array1))
106 $code = $array1{$index};
110 printf FILE " {0x%s, 0x%s, 0x%04x} /* %s */\n", substr($index, 0, 8),
111 substr($index, 8, 8), $code, $comment1{$index};
115 printf FILE " {0x%s, 0x%s, 0x%04x}, /* %s */\n",
116 substr($index, 0, 8), substr($index, 8, 8), $code,
124 # then generate SHIFT_JIS_2004 --> UTF-8 table
126 $in_file = "sjis-0213-2004-std.txt";
128 open(FILE, $in_file) || die("cannot open $in_file");
135 while ($line = <FILE>)
137 if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
142 $rest = "U+" . $u1 . "+" . $u2 . $4;
145 $utf1 = &ucs2utf($ucs);
147 $utf2 = &ucs2utf($ucs);
148 $str = sprintf "%08x%08x", $utf1, $utf2;
149 $array1{$code} = $str;
150 $comment1{$code} = $rest;
154 elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
158 $rest = "U+" . $u . $3;
167 $utf = &ucs2utf($ucs);
168 if ($array{$code} ne "")
171 "Warning: duplicate UTF-8: %08x UCS: %04x Shift JIS: %04x\n", $utf,
173 printf STDERR "Previous value: UTF-8: %08x\n", $array{$utf};
178 $array{$code} = $utf;
179 $comment{$utf} = $rest;
183 $file = "shift_jis_2004_to_utf8.map";
184 open(FILE, "> $file") || die("cannot open $file");
186 print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n";
188 print FILE "static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
190 for $index (sort { $a <=> $b } keys(%array))
192 $code = $array{$index};
196 printf FILE " {0x%04x, 0x%08x} /* %s */\n", $index, $code,
201 printf FILE " {0x%04x, 0x%08x}, /* %s */\n", $index, $code,
209 $file = "shift_jis_2004_to_utf8_combined.map";
210 open(FILE, "> $file") || die("cannot open $file");
212 print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
215 "static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
217 for $index (sort { $a <=> $b } keys(%array1))
219 $code = $array1{$index};
223 printf FILE " {0x%04x, 0x%s, 0x%s} /* %s */\n", $index,
224 substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
228 printf FILE " {0x%04x, 0x%s, 0x%s}, /* %s */\n", $index,
229 substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};