]> granicus.if.org Git - postgresql/blob - src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
pgindent run for 9.5
[postgresql] / src / backend / utils / mb / Unicode / UCS_to_SHIFT_JIS_2004.pl
1 #! /usr/bin/perl
2 #
3 # Copyright (c) 2007-2015, PostgreSQL Global Development Group
4 #
5 # src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
6 #
7 # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
8 # "sjis-0213-2004-std.txt" (http://x0213.org)
9
10 require "ucs2utf.pl";
11
12 # first generate UTF-8 --> SHIFT_JIS_2004 table
13
14 $in_file = "sjis-0213-2004-std.txt";
15
16 open(FILE, $in_file) || die("cannot open $in_file");
17
18 reset 'array';
19 reset 'array1';
20 reset 'comment';
21 reset 'comment1';
22
23 while ($line = <FILE>)
24 {
25         if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
26         {
27                 $c              = $1;
28                 $u1             = $2;
29                 $u2             = $3;
30                 $rest           = "U+" . $u1 . "+" . $u2 . $4;
31                 $code           = hex($c);
32                 $ucs            = hex($u1);
33                 $utf1           = &ucs2utf($ucs);
34                 $ucs            = hex($u2);
35                 $utf2           = &ucs2utf($ucs);
36                 $str            = sprintf "%08x%08x", $utf1, $utf2;
37                 $array1{$str}   = $code;
38                 $comment1{$str} = $rest;
39                 $count1++;
40                 next;
41         }
42         elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
43         {
44                 $c    = $1;
45                 $u    = $2;
46                 $rest = "U+" . $u . $3;
47         }
48         else
49         {
50                 next;
51         }
52
53         $ucs  = hex($u);
54         $code = hex($c);
55         $utf  = &ucs2utf($ucs);
56         if ($array{$utf} ne "")
57         {
58                 printf STDERR
59                   "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n", $utf,
60                   $ucs, $code;
61                 next;
62         }
63         $count++;
64
65         $array{$utf}    = $code;
66         $comment{$code} = $rest;
67 }
68 close(FILE);
69
70 $file = "utf8_to_shift_jis_2004.map";
71 open(FILE, "> $file") || die("cannot open $file");
72 print FILE "/*\n";
73 print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
74 print FILE " */\n";
75 print FILE "static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
76
77 for $index (sort { $a <=> $b } keys(%array))
78 {
79         $code = $array{$index};
80         $count--;
81         if ($count == 0)
82         {
83                 printf FILE "  {0x%08x, 0x%06x} /* %s */\n", $index, $code,
84                   $comment{$code};
85         }
86         else
87         {
88                 printf FILE "  {0x%08x, 0x%06x},        /* %s */\n", $index, $code,
89                   $comment{$code};
90         }
91 }
92
93 print FILE "};\n";
94 close(FILE);
95
96 $file = "utf8_to_shift_jis_2004_combined.map";
97 open(FILE, "> $file") || die("cannot open $file");
98 print FILE "/*\n";
99 print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
100 print FILE " */\n";
101 print FILE
102 "static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
103
104 for $index (sort { $a cmp $b } keys(%array1))
105 {
106         $code = $array1{$index};
107         $count1--;
108         if ($count1 == 0)
109         {
110                 printf FILE "  {0x%s, 0x%s, 0x%04x}     /* %s */\n", substr($index, 0, 8),
111                   substr($index, 8, 8), $code, $comment1{$index};
112         }
113         else
114         {
115                 printf FILE "  {0x%s, 0x%s, 0x%04x},    /* %s */\n",
116                   substr($index, 0, 8), substr($index, 8, 8), $code,
117                   $comment1{$index};
118         }
119 }
120
121 print FILE "};\n";
122 close(FILE);
123
124 # then generate SHIFT_JIS_2004 --> UTF-8 table
125
126 $in_file = "sjis-0213-2004-std.txt";
127
128 open(FILE, $in_file) || die("cannot open $in_file");
129
130 reset 'array';
131 reset 'array1';
132 reset 'comment';
133 reset 'comment1';
134
135 while ($line = <FILE>)
136 {
137         if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
138         {
139                 $c               = $1;
140                 $u1              = $2;
141                 $u2              = $3;
142                 $rest            = "U+" . $u1 . "+" . $u2 . $4;
143                 $code            = hex($c);
144                 $ucs             = hex($u1);
145                 $utf1            = &ucs2utf($ucs);
146                 $ucs             = hex($u2);
147                 $utf2            = &ucs2utf($ucs);
148                 $str             = sprintf "%08x%08x", $utf1, $utf2;
149                 $array1{$code}   = $str;
150                 $comment1{$code} = $rest;
151                 $count1++;
152                 next;
153         }
154         elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
155         {
156                 $c    = $1;
157                 $u    = $2;
158                 $rest = "U+" . $u . $3;
159         }
160         else
161         {
162                 next;
163         }
164
165         $ucs  = hex($u);
166         $code = hex($c);
167         $utf  = &ucs2utf($ucs);
168         if ($array{$code} ne "")
169         {
170                 printf STDERR
171                   "Warning: duplicate UTF-8: %08x UCS: %04x Shift JIS: %04x\n", $utf,
172                   $ucs, $code;
173                 printf STDERR "Previous value: UTF-8: %08x\n", $array{$utf};
174                 next;
175         }
176         $count++;
177
178         $array{$code}  = $utf;
179         $comment{$utf} = $rest;
180 }
181 close(FILE);
182
183 $file = "shift_jis_2004_to_utf8.map";
184 open(FILE, "> $file") || die("cannot open $file");
185 print FILE "/*\n";
186 print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n";
187 print FILE " */\n";
188 print FILE "static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
189
190 for $index (sort { $a <=> $b } keys(%array))
191 {
192         $code = $array{$index};
193         $count--;
194         if ($count == 0)
195         {
196                 printf FILE "  {0x%04x, 0x%08x} /* %s */\n", $index, $code,
197                   $comment{$code};
198         }
199         else
200         {
201                 printf FILE "  {0x%04x, 0x%08x},        /* %s */\n", $index, $code,
202                   $comment{$code};
203         }
204 }
205
206 print FILE "};\n";
207 close(FILE);
208
209 $file = "shift_jis_2004_to_utf8_combined.map";
210 open(FILE, "> $file") || die("cannot open $file");
211 print FILE "/*\n";
212 print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
213 print FILE " */\n";
214 print FILE
215 "static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
216
217 for $index (sort { $a <=> $b } keys(%array1))
218 {
219         $code = $array1{$index};
220         $count1--;
221         if ($count1 == 0)
222         {
223                 printf FILE "  {0x%04x, 0x%s, 0x%s}     /* %s */\n", $index,
224                   substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
225         }
226         else
227         {
228                 printf FILE "  {0x%04x, 0x%s, 0x%s},    /* %s */\n", $index,
229                   substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
230         }
231 }
232
233 print FILE "};\n";
234 close(FILE);