Rewrite the perl scripts to produce our Unicode conversion tables.

author Heikki Linnakangas <heikki.linnakangas@iki.fi>

Wed, 30 Nov 2016 12:54:02 +0000 (14:54 +0200)

committer Heikki Linnakangas <heikki.linnakangas@iki.fi>

Wed, 30 Nov 2016 12:54:52 +0000 (14:54 +0200)
author Heikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 30 Nov 2016 12:54:02 +0000 (14:54 +0200)
committer Heikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 30 Nov 2016 12:54:52 +0000 (14:54 +0200)
diff --git a/src/backend/utils/mb/Unicode/Makefile b/src/backend/utils/mb/Unicode/Makefile

index 9d2ef5e3d22e7eaa6b4779cfbc958582c0a3e76a..ea21f4a8527084a45ef5557fb201211d04b1fda3 100644 (file)
--- a/src/backend/utils/mb/Unicode/Makefile
+++ b/src/backend/utils/mb/Unicode/Makefile
@@ -39,8 +39,6 @@ WINMAPS = win866_to_utf8.map utf8_to_win866.map \
         win1258_to_utf8.map utf8_to_win1258.map
  
  GENERICMAPS = $(ISO8859MAPS) $(WINMAPS) \
-       johab_to_utf8.map utf8_to_johab.map \
-       uhc_to_utf8.map utf8_to_uhc.map \
         gbk_to_utf8.map utf8_to_gbk.map \
         koi8r_to_utf8.map utf8_to_koi8r.map
  
@@ -51,6 +49,8 @@ SPECIALMAPS = euc_cn_to_utf8.map utf8_to_euc_cn.map \
         sjis_to_utf8.map utf8_to_sjis.map \
         gb18030_to_utf8.map utf8_to_gb18030.map \
         big5_to_utf8.map utf8_to_big5.map \
+       johab_to_utf8.map utf8_to_johab.map \
+       uhc_to_utf8.map utf8_to_uhc.map \
         euc_jis_2004_to_utf8.map euc_jis_2004_to_utf8_combined.map \
         utf8_to_euc_jis_2004.map utf8_to_euc_jis_2004_combined.map \
         shift_jis_2004_to_utf8.map shift_jis_2004_to_utf8_combined.map \
@@ -63,23 +63,29 @@ ISO8859TEXTS = 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT \
         8859-10.TXT 8859-13.TXT 8859-14.TXT 8859-15.TXT \
         8859-16.TXT
  
-WINTEXTS = CP866.TXT CP874.TXT CP936.TXT CP949.TXT \
+WINTEXTS = CP866.TXT CP874.TXT CP936.TXT \
         CP1250.TXT CP1251.TXT \
         CP1252.TXT CP1253.TXT CP1254.TXT CP1255.TXT \
         CP1256.TXT CP1257.TXT CP1258.TXT
  
  GENERICTEXTS = $(ISO8859TEXTS) $(WINTEXTS) \
-       KOI8-R.TXT KOI8-U.TXT JOHAB.TXT
+       KOI8-R.TXT KOI8-U.TXT
  
  all: $(MAPS)
  
  $(GENERICMAPS): UCS_to_most.pl $(GENERICTEXTS)
         $(PERL) $<
  
-euc_jp_to_utf8.map utf8_to_euc_jp.map: UCS_to_EUC_JP.pl JIS0201.TXT JIS0208.TXT JIS0212.TXT
+johab_to_utf8.map utf8_to_johab.map: UCS_to_JOHAB.pl JOHAB.TXT
+       $(PERL) $<
+
+uhc_to_utf8.map utf8_to_uhc.map: UCS_to_UHC.pl windows-949-2000.xml
+       $(PERL) $<
+
+euc_jp_to_utf8.map utf8_to_euc_jp.map: UCS_to_EUC_JP.pl CP932.TXT JIS0212.TXT
         $(PERL) $<
  
-euc_cn_to_utf8.map utf8_to_euc_cn.map: UCS_to_EUC_CN.pl GB2312.TXT
+euc_cn_to_utf8.map utf8_to_euc_cn.map: UCS_to_EUC_CN.pl gb-18030-2000.xml
         $(PERL) $<
  
  euc_kr_to_utf8.map utf8_to_euc_kr.map: UCS_to_EUC_KR.pl KSX1001.TXT
@@ -119,7 +125,7 @@ BIG5.TXT CNS11643.TXT:
  euc-jis-2004-std.txt sjis-0213-2004-std.txt:
         $(DOWNLOAD) http://x0213.org/codetable/$(@F)
  
-gb-18030-2000.xml:
+gb-18030-2000.xml windows-949-2000.xml:
         $(DOWNLOAD) https://ssl.icu-project.org/repos/icu/data/trunk/charset/data/xml/$(@F)
  
  GB2312.TXT:
@@ -137,7 +143,7 @@ KOI8-R.TXT KOI8-U.TXT:
  $(ISO8859TEXTS):
         $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/ISO8859/$(@F)
  
-$(filter-out CP8%,$(WINTEXTS)):
+$(filter-out CP8%,$(WINTEXTS)) CP932.TXT CP950.TXT:
         $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/$(@F)
  
  $(filter CP8%,$(WINTEXTS)):
diff --git a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl

index 127fd157b07db238c913ee606ad6b9395c783a4e..6a1321bab84eed95dd6d2059fbd70b6d09779ded 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
@@ -25,56 +25,17 @@
  #               # and Unicode name (not used in this script)
  
  
-require "ucs2utf.pl";
+require "convutils.pm";
  
+# Load BIG5.TXT
+my $all = &read_source("BIG5.TXT");
  
-#
-# first, generate UTF8 --> BIG5 table
-#
-$in_file = "BIG5.TXT";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-reset 'array';
+# Load CP950.TXT
+my $cp950txt = &read_source("CP950.TXT");
  
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
-       {
-               next;
-       }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-               $array{$utf} = $code;
-       }
-}
-close(FILE);
-
-$in_file = "CP950.TXT";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
-       {
-               next;
-       }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
+foreach my $i (@$cp950txt) {
+       my $code = $i->{code};
+       my $ucs = $i->{ucs};
  
         # Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc
         # from CP950.TXT
@@ -83,126 +44,25 @@ while (<FILE>)
                 && $code >= 0xf9d6
                 && $code <= 0xf9dc)
         {
-               $utf = &ucs2utf($ucs);
-               if ($array{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-               $array{$utf} = $code;
+               push @$all, {code => $code,
+                                        ucs => $ucs,
+                                        comment => $i->{comment},
+                                        direction => "both"};
         }
  }
-close(FILE);
-
-$file = lc("utf8_to_big5.map");
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapBIG5[ $count ] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $code = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
-       }
-}
-
-print FILE "};\n";
-close(FILE);
-
-#
-# then generate BIG5 --> UTF8 table
-#
-$in_file = "BIG5.TXT";
  
-open(FILE, $in_file) || die("cannot open $in_file");
+foreach my $i (@$all) {
+       my $code = $i->{code};
+       my $ucs = $i->{ucs};
  
-reset 'array';
-
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
-       {
-               next;
-       }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-               $array{$code} = $utf;
-       }
-}
-close(FILE);
-
-$in_file = "CP950.TXT";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
-       {
-               next;
-       }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-
-       # Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc
-       # from CP950.TXT
-       if (   $code >= 0x80
-               && $ucs >= 0x0080
-               && $code >= 0xf9d6
-               && $code <= 0xf9dc)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-               $array{$code} = $utf;
-       }
-}
-close(FILE);
-
-$file = lc("big5_to_utf8.map");
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapBIG5[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $utf = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-       }
-       else
+       # BIG5.TXT maps several BIG5 characters to U+FFFD. The UTF-8 to BIG5 mapping can
+       # contain only one of them. XXX: Doesn't really make sense to include any of them,
+       # but for historical reasons, we map the first one of them.
+       if ($i->{ucs} == 0xFFFD && $i->{code} != 0xA15A)
         {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
+               $i->{direction} = "to_unicode";
         }
  }
  
-print FILE "};\n";
-close(FILE);
+# Output
+print_tables("BIG5", $all);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl

index 53f44773c938fa63d7bc22d98dd6d834420b2b35..8df23f8be65fae3ff3b90c32328b2dc49c592b32 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
@@ -1,128 +1,76 @@
  #! /usr/bin/perl
  #
-# Copyright (c) 2001-2016, PostgreSQL Global Development Group
+# Copyright (c) 2007-2016, PostgreSQL Global Development Group
  #
-# src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
+# src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
  #
-# Generate UTF-8 <--> EUC_CN code conversion tables from
-# map files provided by Unicode organization.
-# Unfortunately it is prohibited by the organization
-# to distribute the map files. So if you try to use this script,
-# you have to obtain GB2312.TXT from
-# the organization's ftp site.
+# Generate UTF-8 <--> GB18030 code conversion tables from
+# "gb-18030-2000.xml", obtained from
+# http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/
  #
-# GB2312.TXT format:
-#               GB2312 code in hex
-#               UCS-2 code in hex
-#               # and Unicode name (not used in this script)
+# The lines we care about in the source file look like
+#    <a u="009A" b="81 30 83 36"/>
+# where the "u" field is the Unicode code point in hex,
+# and the "b" field is the hex byte sequence for GB18030
  
-require "ucs2utf.pl";
+require "convutils.pm";
  
-# first generate UTF-8 --> EUC_CN table
+# Read the input
  
-$in_file = "GB2312.TXT";
+$in_file = "gb-18030-2000.xml";
  
  open(FILE, $in_file) || die("cannot open $in_file");
  
+my @mapping;
+
  while (<FILE>)
  {
-       chop;
-       if (/^#/)
-       {
-               next;
-       }
-       ($c, $u, $rest) = split;
+       next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
+       $u = $1;
+       $c = $2;
+       $c =~ s/ //g;
         $ucs  = hex($u);
         $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-
-               $array{$utf} = ($code | 0x8080);
-       }
-}
-close(FILE);
-
-$file = "utf8_to_euc_cn.map";
-open(FILE, "> $file") || die("cannot open $file");
  
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $code = $array{$index};
-       $count--;
-       if ($count == 0)
+       # The GB-18030 character set, which we use as the source, contains
+       # a lot of extra characters on top of the GB2312 character set that
+       # EUC_CN encodes. Filter out those extra characters.
+       next if (($code & 0xFF) < 0xA1);
+       next if (!($code >= 0xA100 && $code <= 0xA9FF ||
+                          $code >= 0xB000 && $code <= 0xF7FF));
+
+       next if ($code >= 0xA2A1 && $code <= 0xA2B0);
+       next if ($code >= 0xA2E3 && $code <= 0xA2E4);
+       next if ($code >= 0xA2EF && $code <= 0xA2F0);
+       next if ($code >= 0xA2FD && $code <= 0xA2FE);
+       next if ($code >= 0xA4F4 && $code <= 0xA4FE);
+       next if ($code >= 0xA5F7 && $code <= 0xA5FE);
+       next if ($code >= 0xA6B9 && $code <= 0xA6C0);
+       next if ($code >= 0xA6D9 && $code <= 0xA6FE);
+       next if ($code >= 0xA7C2 && $code <= 0xA7D0);
+       next if ($code >= 0xA7F2 && $code <= 0xA7FE);
+       next if ($code >= 0xA8BB && $code <= 0xA8C4);
+       next if ($code >= 0xA8EA && $code <= 0xA8FE);
+       next if ($code >= 0xA9A1 && $code <= 0xA9A3);
+       next if ($code >= 0xA9F0 && $code <= 0xA9FE);
+       next if ($code >= 0xD7FA && $code <= 0xD7FE);
+
+       # A couple of characters are mapped differently from GB-2312 or GB-18030
+       if ($code == 0xA1A4)
         {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
+               $ucs = 0x30FB;
         }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
-       }
-}
-
-print FILE "};\n";
-close(FILE);
-
-#
-# then generate EUC_CN --> UTF8 table
-#
-reset 'array';
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
+       if ($code == 0xA1AA)
         {
-               next;
+               $ucs = 0x2015;
         }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$code} ne "")
-               {
-                       printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
  
-               $code |= 0x8080;
-               $array{$code} = $utf;
+       push @mapping, {
+               ucs => $ucs,
+               code => $code,
+               direction => 'both'
         }
  }
  close(FILE);
  
-$file = "euc_cn_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $utf = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-       }
-}
-
-print FILE "};\n";
-close(FILE);
+print_tables("EUC_CN", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl

index d2f1b757cb30ac3c1fb86c7b78dacca1682ebf35..b4e140b657c993be291dfafa3ea66d13181341af 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
@@ -7,9 +7,7 @@
  # Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
  # "euc-jis-2004-std.txt" (http://x0213.org)
  
-require "ucs2utf.pl";
-
-$TEST = 0;
+require "convutils.pm";
  
  # first generate UTF-8 --> EUC_JIS_2004 table
  
@@ -17,10 +15,7 @@ $in_file = "euc-jis-2004-std.txt";
  
  open(FILE, $in_file) || die("cannot open $in_file");
  
-reset 'array';
-reset 'array1';
-reset 'comment';
-reset 'comment1';
+my @all;
  
  while ($line = <FILE>)
  {
@@ -31,14 +26,14 @@ while ($line = <FILE>)
                 $u2             = $3;
                 $rest           = "U+" . $u1 . "+" . $u2 . $4;
                 $code           = hex($c);
-               $ucs            = hex($u1);
-               $utf1           = &ucs2utf($ucs);
-               $ucs            = hex($u2);
-               $utf2           = &ucs2utf($ucs);
-               $str            = sprintf "%08x%08x", $utf1, $utf2;
-               $array1{$str}   = $code;
-               $comment1{$str} = $rest;
-               $count1++;
+               $ucs1           = hex($u1);
+               $ucs2           = hex($u2);
+
+               push @all, { direction => 'both',
+                                        ucs => $ucs1,
+                                        ucs_second => $ucs2,
+                                        code => $code,
+                                        comment => $rest };
                 next;
         }
         elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
@@ -54,252 +49,11 @@ while ($line = <FILE>)
  
         $ucs  = hex($u);
         $code = hex($c);
-       $utf  = &ucs2utf($ucs);
-       if ($array{$utf} ne "")
-       {
-               printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-               next;
-       }
-       $count++;
-
-       $array{$utf}    = $code;
-       $comment{$code} = $rest;
-}
-close(FILE);
-
-$file = "utf8_to_euc_jis_2004.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE "static const pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $code = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%08x, 0x%06x} /* %s */\n", $index, $code,
-                 $comment{$code};
-       }
-       else
-       {
-               printf FILE "  {0x%08x, 0x%06x},        /* %s */\n", $index, $code,
-                 $comment{$code};
-       }
-}
-
-print FILE "};\n";
-close(FILE);
-
-if ($TEST == 1)
-{
-       $file1 = "utf8.data";
-       $file2 = "euc_jis_2004.data";
-       open(FILE1, "> $file1") || die("cannot open $file1");
-       open(FILE2, "> $file2") || die("cannot open $file2");
-
-       for $index (sort { $a <=> $b } keys(%array))
-       {
-               $code = $array{$index};
-               if (   $code > 0x00
-                       && $code != 0x09
-                       && $code != 0x0a
-                       && $code != 0x0d
-                       && $code != 0x5c
-                       && (   $code < 0x80
-                               || ($code >= 0x8ea1   && $code <= 0x8efe)
-                               || ($code >= 0x8fa1a1 && $code <= 0x8ffefe)
-                               || ($code >= 0xa1a1   && $code <= 0x8fefe)))
-               {
-                       for ($i = 3; $i >= 0; $i--)
-                       {
-                               $s    = $i * 8;
-                               $mask = 0xff << $s;
-                               print FILE1 pack("C", ($index & $mask) >> $s)
-                                 if $index & $mask;
-                               print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask;
-                       }
-                       print FILE1 "\n";
-                       print FILE2 "\n";
-               }
-       }
-}
  
-$file = "utf8_to_euc_jis_2004_combined.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE
-  "static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
+       next if ($code < 0x80 && $ucs < 0x80);
  
-for $index (sort { $a cmp $b } keys(%array1))
-{
-       $code = $array1{$index};
-       $count1--;
-       if ($count1 == 0)
-       {
-               printf FILE "  {0x%s, 0x%s, 0x%06x}     /* %s */\n", substr($index, 0, 8),
-                 substr($index, 8, 8), $code, $comment1{$index};
-       }
-       else
-       {
-               printf FILE "  {0x%s, 0x%s, 0x%06x},    /* %s */\n",
-                 substr($index, 0, 8), substr($index, 8, 8), $code,
-                 $comment1{$index};
-       }
+       push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
  }
-
-print FILE "};\n";
  close(FILE);
  
-if ($TEST == 1)
-{
-       for $index (sort { $a cmp $b } keys(%array1))
-       {
-               $code = $array1{$index};
-               if (   $code > 0x00
-                       && $code != 0x09
-                       && $code != 0x0a
-                       && $code != 0x0d
-                       && $code != 0x5c
-                       && (   $code < 0x80
-                               || ($code >= 0x8ea1   && $code <= 0x8efe)
-                               || ($code >= 0x8fa1a1 && $code <= 0x8ffefe)
-                               || ($code >= 0xa1a1   && $code <= 0x8fefe)))
-               {
-
-                       $v1 = hex(substr($index, 0, 8));
-                       $v2 = hex(substr($index, 8, 8));
-
-                       for ($i = 3; $i >= 0; $i--)
-                       {
-                               $s    = $i * 8;
-                               $mask = 0xff << $s;
-                               print FILE1 pack("C", ($v1 & $mask) >> $s)   if $v1 & $mask;
-                               print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask;
-                       }
-                       for ($i = 3; $i >= 0; $i--)
-                       {
-                               $s    = $i * 8;
-                               $mask = 0xff << $s;
-                               print FILE1 pack("C", ($v2 & $mask) >> $s) if $v2 & $mask;
-                       }
-                       print FILE1 "\n";
-                       print FILE2 "\n";
-               }
-       }
-       close(FILE1);
-       close(FILE2);
-}
-
-# then generate EUC_JIS_2004 --> UTF-8 table
-
-$in_file = "euc-jis-2004-std.txt";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-reset 'array';
-reset 'array1';
-reset 'comment';
-reset 'comment1';
-
-while ($line = <FILE>)
-{
-       if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
-       {
-               $c               = $1;
-               $u1              = $2;
-               $u2              = $3;
-               $rest            = "U+" . $u1 . "+" . $u2 . $4;
-               $code            = hex($c);
-               $ucs             = hex($u1);
-               $utf1            = &ucs2utf($ucs);
-               $ucs             = hex($u2);
-               $utf2            = &ucs2utf($ucs);
-               $str             = sprintf "%08x%08x", $utf1, $utf2;
-               $array1{$code}   = $str;
-               $comment1{$code} = $rest;
-               $count1++;
-               next;
-       }
-       elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
-       {
-               $c    = $1;
-               $u    = $2;
-               $rest = "U+" . $u . $3;
-       }
-       else
-       {
-               next;
-       }
-
-       $ucs  = hex($u);
-       $code = hex($c);
-       $utf  = &ucs2utf($ucs);
-       if ($array{$code} ne "")
-       {
-               printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-               next;
-       }
-       $count++;
-
-       $array{$code}  = $utf;
-       $comment{$utf} = $rest;
-}
-close(FILE);
-
-$file = "euc_jis_2004_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE "static const pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $code = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%06x, 0x%08x} /* %s */\n", $index, $code,
-                 $comment{$code};
-       }
-       else
-       {
-               printf FILE "  {0x%06x, 0x%08x},        /* %s */\n", $index, $code,
-                 $comment{$code};
-       }
-}
-
-print FILE "};\n";
-close(FILE);
-
-$file = "euc_jis_2004_to_utf8_combined.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE
-  "static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array1))
-{
-       $code = $array1{$index};
-       $count1--;
-       if ($count1 == 0)
-       {
-               printf FILE "  {0x%06x, 0x%s, 0x%s}     /* %s */\n", $index,
-                 substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
-       }
-       else
-       {
-               printf FILE "  {0x%06x, 0x%s, 0x%s},    /* %s */\n", $index,
-                 substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
-       }
-}
-
-print FILE "};\n";
-close(FILE);
+print_tables("EUC_JIS_2004", \@all, 1);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl

index 055fc849bae4ee70933eb1e52ca5ca7bb5ebda25..0e9dd292bff11c58df0000cd563cbc59c5b76229 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
@@ -8,275 +8,223 @@
  # map files provided by Unicode organization.
  # Unfortunately it is prohibited by the organization
  # to distribute the map files. So if you try to use this script,
-# you have to obtain JIS0201.TXT, JIS0208.TXT, JIS0212.TXT from
-# the organization's ftp site.
-#
-# JIS0201.TXT format:
-#               JIS0201 code in hex
-#               UCS-2 code in hex
-#               # and Unicode name (not used in this script)
-#
-# JIS0208.TXT format:
-#               JIS0208 shift-JIS code in hex
-#               JIS0208 code in hex
-#               UCS-2 code in hex
-#               # and Unicode name (not used in this script)
-#
-# JIS0212.TXT format:
-#               JIS0212 code in hex
-#               UCS-2 code in hex
-#               # and Unicode name (not used in this script)
-
-require "ucs2utf.pl";
-
-# first generate UTF-8 --> EUC_JP table
+# you have to obtain CP932.TXT and JIS0212.TXT from the
+# organization's ftp site.
  
-#
-# JIS0201
-#
-$in_file = "JIS0201.TXT";
+use strict;
+require "convutils.pm";
  
-open(FILE, $in_file) || die("cannot open $in_file");
+# Load JIS0212.TXT
+my $jis0212 = &read_source("JIS0212.TXT");
  
-reset 'array';
+my @mapping;
  
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
+foreach my $i (@$jis0212) {
+       # We have a different mapping for this in the EUC_JP to UTF-8 direction.
+       if ($i->{code} == 0x2243)
         {
-               next;
+               $i->{direction} = "from_unicode";
         }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
  
-               # add single shift 2
-               $array{$utf} = ($code | 0x8e00);
+       if ($i->{code} == 0x2271)
+       {
+               $i->{direction} = "to_unicode";
         }
-}
-close(FILE);
-
-#
-# JIS0208
-#
-$in_file = "JIS0208.TXT";
  
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
+       if ($i->{ucs} >= 0x080)
         {
-               next;
+               $i->{code} = $i->{code} | 0x8f8080;
         }
-       ($s, $c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
+       else
         {
-               $utf = &ucs2utf($ucs);
-               if ($array{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-
-               $array{$utf} = ($code | 0x8080);
+               next;
         }
+
+       push @mapping, $i;
  }
-close(FILE);
  
-#
-# JIS0212
-#
-$in_file = "JIS0212.TXT";
+# Load CP932.TXT.
+my $ct932 = &read_source("CP932.TXT");
  
-open(FILE, $in_file) || die("cannot open $in_file");
+foreach my $i (@$ct932) {
+       my $sjis = $i->{code};
  
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
+       # We have a different mapping for this in the EUC_JP to UTF-8 direction.
+       if ($sjis == 0xeefa ||
+               $sjis == 0xeefb ||
+               $sjis == 0xeefc)
         {
                 next;
         }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
  
-               $array{$utf} = ($code | 0x8f8080);
-       }
-}
-close(FILE);
+       if ($sjis >= 0xa1)
+       {
+               my $jis = &sjis2jis($sjis);
  
-$file = "utf8_to_euc_jp.map";
-open(FILE, "> $file") || die("cannot open $file");
+               $i->{code} = $jis | ($jis < 0x100 ? 0x8e00 :
+                                                        ($sjis >= 0xeffd  ? 0x8f8080 : 0x8080));
  
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
+               # Remember the SJIS code for later.
+               $i->{sjis} = $sjis;
  
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $code = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
+               push @mapping, $i;
         }
  }
  
-print FILE "};\n";
-close(FILE);
-
-#
-# then generate EUC_JP --> UTF8 table
-#
+foreach my $i (@mapping) {
+       my $sjis = $i->{sjis};
  
-#
-# JIS0201
-#
-$in_file = "JIS0201.TXT";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-reset 'array';
-
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
+       # These SJIS characters are excluded completely.
+       if ($sjis >= 0xed00 && $sjis <= 0xeef9 ||
+               $sjis >= 0xfa54 && $sjis <= 0xfa56 ||
+               $sjis >= 0xfa58 && $sjis <= 0xfc4b)
         {
+               $i->{direction} = "none";
                 next;
         }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$code} ne "")
-               {
-                       printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-
-               # add single shift 2
-               $code |= 0x8e00;
-               $array{$code} = $utf;
-       }
-}
-close(FILE);
-
-#
-# JIS0208
-#
-$in_file = "JIS0208.TXT";
-
-open(FILE, $in_file) || die("cannot open $in_file");
  
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
+       # These SJIS characters are only in the UTF-8 to EUC_JP table
+       if ($sjis == 0xeefa || $sjis == 0xeefb || $sjis == 0xeefc)
         {
+               $i->{direction} = "from_unicode";
                 next;
         }
-       ($s, $c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$code} ne "")
-               {
-                       printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
  
-               $code |= 0x8080;
-               $array{$code} = $utf;
+       if ($sjis == 0x8790 || $sjis == 0x8791 || $sjis == 0x8792 ||
+               $sjis == 0x8795 || $sjis == 0x8796 || $sjis == 0x8797 ||
+               $sjis == 0x879a || $sjis == 0x879b || $sjis == 0x879c ||
+               ($sjis >= 0xfa4a && $sjis <= 0xfa53))
+       {
+               $i->{direction} = "to_unicode";
+               next;
         }
  }
-close(FILE);
  
-#
-# JIS0212
-#
-$in_file = "JIS0212.TXT";
+push @mapping, (
+        {direction => 'both', ucs => 0x4efc, code => 0x8ff4af, comment => '# CJK(4EFC)'},
+        {direction => 'both', ucs => 0x50f4, code => 0x8ff4b0, comment => '# CJK(50F4)'},
+        {direction => 'both', ucs => 0x51EC, code => 0x8ff4b1, comment => '# CJK(51EC)'},
+        {direction => 'both', ucs => 0x5307, code => 0x8ff4b2, comment => '# CJK(5307)'},
+        {direction => 'both', ucs => 0x5324, code => 0x8ff4b3, comment => '# CJK(5324)'},
+        {direction => 'both', ucs => 0x548A, code => 0x8ff4b5, comment => '# CJK(548A)'},
+        {direction => 'both', ucs => 0x5759, code => 0x8ff4b6, comment => '# CJK(5759)'},
+        {direction => 'both', ucs => 0x589E, code => 0x8ff4b9, comment => '# CJK(589E)'},
+        {direction => 'both', ucs => 0x5BEC, code => 0x8ff4ba, comment => '# CJK(5BEC)'},
+        {direction => 'both', ucs => 0x5CF5, code => 0x8ff4bb, comment => '# CJK(5CF5)'},
+        {direction => 'both', ucs => 0x5D53, code => 0x8ff4bc, comment => '# CJK(5D53)'},
+        {direction => 'both', ucs => 0x5FB7, code => 0x8ff4be, comment => '# CJK(5FB7)'},
+        {direction => 'both', ucs => 0x6085, code => 0x8ff4bf, comment => '# CJK(6085)'},
+        {direction => 'both', ucs => 0x6120, code => 0x8ff4c0, comment => '# CJK(6120)'},
+        {direction => 'both', ucs => 0x654E, code => 0x8ff4c1, comment => '# CJK(654E)'},
+        {direction => 'both', ucs => 0x663B, code => 0x8ff4c2, comment => '# CJK(663B)'},
+        {direction => 'both', ucs => 0x6665, code => 0x8ff4c3, comment => '# CJK(6665)'},
+        {direction => 'both', ucs => 0x6801, code => 0x8ff4c6, comment => '# CJK(6801)'},
+        {direction => 'both', ucs => 0x6A6B, code => 0x8ff4c9, comment => '# CJK(6A6B)'},
+        {direction => 'both', ucs => 0x6AE2, code => 0x8ff4ca, comment => '# CJK(6AE2)'},
+        {direction => 'both', ucs => 0x6DF2, code => 0x8ff4cc, comment => '# CJK(6DF2)'},
+        {direction => 'both', ucs => 0x6DF8, code => 0x8ff4cb, comment => '# CJK(6DF8)'},
+        {direction => 'both', ucs => 0x7028, code => 0x8ff4cd, comment => '# CJK(7028)'},
+        {direction => 'both', ucs => 0x70BB, code => 0x8ff4ae, comment => '# CJK(70BB)'},
+        {direction => 'both', ucs => 0x7501, code => 0x8ff4d0, comment => '# CJK(7501)'},
+        {direction => 'both', ucs => 0x7682, code => 0x8ff4d1, comment => '# CJK(7682)'},
+        {direction => 'both', ucs => 0x769E, code => 0x8ff4d2, comment => '# CJK(769E)'},
+        {direction => 'both', ucs => 0x7930, code => 0x8ff4d4, comment => '# CJK(7930)'},
+        {direction => 'both', ucs => 0x7AE7, code => 0x8ff4d9, comment => '# CJK(7AE7)'},
+        {direction => 'both', ucs => 0x7DA0, code => 0x8ff4dc, comment => '# CJK(7DA0)'},
+        {direction => 'both', ucs => 0x7DD6, code => 0x8ff4dd, comment => '# CJK(7DD6)'},
+        {direction => 'both', ucs => 0x8362, code => 0x8ff4df, comment => '# CJK(8362)'},
+        {direction => 'both', ucs => 0x85B0, code => 0x8ff4e1, comment => '# CJK(85B0)'},
+        {direction => 'both', ucs => 0x8807, code => 0x8ff4e4, comment => '# CJK(8807)'},
+        {direction => 'both', ucs => 0x8B7F, code => 0x8ff4e6, comment => '# CJK(8B7F)'},
+        {direction => 'both', ucs => 0x8CF4, code => 0x8ff4e7, comment => '# CJK(8CF4)'},
+        {direction => 'both', ucs => 0x8D76, code => 0x8ff4e8, comment => '# CJK(8D76)'},
+        {direction => 'both', ucs => 0x90DE, code => 0x8ff4ec, comment => '# CJK(90DE)'},
+        {direction => 'both', ucs => 0x9115, code => 0x8ff4ee, comment => '# CJK(9115)'},
+        {direction => 'both', ucs => 0x9592, code => 0x8ff4f1, comment => '# CJK(9592)'},
+        {direction => 'both', ucs => 0x973B, code => 0x8ff4f4, comment => '# CJK(973B)'},
+        {direction => 'both', ucs => 0x974D, code => 0x8ff4f5, comment => '# CJK(974D)'},
+        {direction => 'both', ucs => 0x9751, code => 0x8ff4f6, comment => '# CJK(9751)'},
+        {direction => 'both', ucs => 0x999E, code => 0x8ff4fa, comment => '# CJK(999E)'},
+        {direction => 'both', ucs => 0x9AD9, code => 0x8ff4fb, comment => '# CJK(9AD9)'},
+        {direction => 'both', ucs => 0x9B72, code => 0x8ff4fc, comment => '# CJK(9B72)'},
+        {direction => 'both', ucs => 0x9ED1, code => 0x8ff4fe, comment => '# CJK(9ED1)'},
+        {direction => 'both', ucs => 0xF929, code => 0x8ff4c5, comment => '# CJK COMPATIBILITY IDEOGRAPH-F929'},
+        {direction => 'both', ucs => 0xF9DC, code => 0x8ff4f2, comment => '# CJK COMPATIBILITY IDEOGRAPH-F9DC'},
+        {direction => 'both', ucs => 0xFA0E, code => 0x8ff4b4, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0E'},
+        {direction => 'both', ucs => 0xFA0F, code => 0x8ff4b7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0F'},
+        {direction => 'both', ucs => 0xFA10, code => 0x8ff4b8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA10'},
+        {direction => 'both', ucs => 0xFA11, code => 0x8ff4bd, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA11'},
+        {direction => 'both', ucs => 0xFA12, code => 0x8ff4c4, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA12'},
+        {direction => 'both', ucs => 0xFA13, code => 0x8ff4c7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA13'},
+        {direction => 'both', ucs => 0xFA14, code => 0x8ff4c8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA14'},
+        {direction => 'both', ucs => 0xFA15, code => 0x8ff4ce, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA15'},
+        {direction => 'both', ucs => 0xFA16, code => 0x8ff4cf, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA16'},
+        {direction => 'both', ucs => 0xFA17, code => 0x8ff4d3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA17'},
+        {direction => 'both', ucs => 0xFA18, code => 0x8ff4d5, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA18'},
+        {direction => 'both', ucs => 0xFA19, code => 0x8ff4d6, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA19'},
+        {direction => 'both', ucs => 0xFA1A, code => 0x8ff4d7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1A'},
+        {direction => 'both', ucs => 0xFA1B, code => 0x8ff4d8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1B'},
+        {direction => 'both', ucs => 0xFA1C, code => 0x8ff4da, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1C'},
+        {direction => 'both', ucs => 0xFA1D, code => 0x8ff4db, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1D'},
+        {direction => 'both', ucs => 0xFA1E, code => 0x8ff4de, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1E'},
+        {direction => 'both', ucs => 0xFA1F, code => 0x8ff4e0, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1F'},
+        {direction => 'both', ucs => 0xFA20, code => 0x8ff4e2, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA20'},
+        {direction => 'both', ucs => 0xFA21, code => 0x8ff4e3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA21'},
+        {direction => 'both', ucs => 0xFA22, code => 0x8ff4e5, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA22'},
+        {direction => 'both', ucs => 0xFA23, code => 0x8ff4e9, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA23'},
+        {direction => 'both', ucs => 0xFA24, code => 0x8ff4ea, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA24'},
+        {direction => 'both', ucs => 0xFA25, code => 0x8ff4eb, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA25'},
+        {direction => 'both', ucs => 0xFA26, code => 0x8ff4ed, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA26'},
+        {direction => 'both', ucs => 0xFA27, code => 0x8ff4ef, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA27'},
+        {direction => 'both', ucs => 0xFA28, code => 0x8ff4f0, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA28'},
+        {direction => 'both', ucs => 0xFA29, code => 0x8ff4f3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA29'},
+        {direction => 'both', ucs => 0xFA2A, code => 0x8ff4f7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2A'},
+        {direction => 'both', ucs => 0xFA2B, code => 0x8ff4f8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2B'},
+        {direction => 'both', ucs => 0xFA2C, code => 0x8ff4f9, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2C'},
+        {direction => 'both', ucs => 0xFA2D, code => 0x8ff4fd, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2D'},
+        {direction => 'both', ucs => 0xFF07, code => 0x8ff4a9, comment => '# FULLWIDTH APOSTROPHE'},
+        {direction => 'both', ucs => 0xFFE4, code => 0x8fa2c3, comment => '# FULLWIDTH BROKEN BAR'},
+
+        # additional conversions for EUC_JP -> UTF-8 conversion
+        {direction => 'to_unicode', ucs => 0x2116, code => 0x8ff4ac, comment => '# NUMERO SIGN'},
+        {direction => 'to_unicode', ucs => 0x2121, code => 0x8ff4ad, comment => '# TELEPHONE SIGN'},
+        {direction => 'to_unicode', ucs => 0x3231, code => 0x8ff4ab, comment => '# PARENTHESIZED IDEOGRAPH STOCK'}
+       );
+
+print_tables("EUC_JP", \@mapping);
+
+#######################################################################
+# sjis2jis ; SJIS => JIS conversion
+sub sjis2jis
+{
+       my ($sjis) = @_;
  
-open(FILE, $in_file) || die("cannot open $in_file");
+       return $sjis if ($sjis <= 0x100);
  
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
-       {
-               next;
-       }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
+       my $hi = $sjis >> 8;
+       my $lo = $sjis & 0xff;
+
+       if ($lo >= 0x80) { $lo--; }
+       $lo -= 0x40;
+       if ($hi >= 0xe0) { $hi -= 0x40; }
+       $hi -= 0x81;
+       my $pos = $lo + $hi * 0xbc;
+
+       if ($pos >= 114 * 0x5e && $pos <= 115 * 0x5e + 0x1b)
         {
-               $utf = &ucs2utf($ucs);
-               if ($array{$code} ne "")
-               {
-                       printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
+               # This region (115-ku) is out of range of JIS code but for
+               # convenient to generate code in EUC CODESET 3, move this to
+               # seemingly duplicate region (83-84-ku).
+               $pos = $pos - ((31 * 0x5e) + 12);
  
-               $code |= 0x8f8080;
-               $array{$code} = $utf;
+               # after 85-ku 82-ten needs to be moved 2 codepoints
+               $pos = $pos - 2 if ($pos >= 84 * 0x5c + 82)
         }
-}
-close(FILE);
  
-$file = "euc_jp_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
+       my $hi2 = $pos / 0x5e;
+       my $lo2 = ($pos % 0x5e);
  
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $utf = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-       }
-}
+       my $ret = $lo2 + 0x21 + (($hi2 + 0x21) << 8);
  
-print FILE "};\n";
-close(FILE);
+       return $ret;
+}
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl

index a7c94bca915c91279be0bfafafe8343c2d346e54..a917d067172a25c53fad45a840b2f1f792e68474 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
@@ -16,113 +16,22 @@
  #               UCS-2 code in hex
  #               # and Unicode name (not used in this script)
  
-require "ucs2utf.pl";
+require "convutils.pm";
  
-# first generate UTF-8 --> EUC_KR table
+# Load the source file.
  
-$in_file = "KSX1001.TXT";
+my $mapping = &read_source("KSX1001.TXT");
  
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
-       {
-               next;
-       }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-
-               $array{$utf} = ($code | 0x8080);
-       }
-}
-close(FILE);
-
-$file = "utf8_to_euc_kr.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
+foreach my $i (@$mapping)
  {
-       $code = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
-       }
+       $i->{code} = $i->{code} | 0x8080;
  }
  
-print FILE "};\n";
-close(FILE);
-
-#
-# then generate EUC_KR --> UTF8 table
-#
-reset 'array';
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
-       {
-               next;
-       }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$code} ne "")
-               {
-                       printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-
-               $code |= 0x8080;
-               $array{$code} = $utf;
-       }
-}
-close(FILE);
-
-$file = "euc_kr_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $utf = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-       }
-}
+# Some extra characters that are not in KSX1001.TXT
+push @$mapping, (
+       {direction => 'both', ucs => 0x20AC, code => 0xa2e6, comment => '# EURO SIGN'},
+       {direction => 'both', ucs => 0x00AE, code => 0xa2e7, comment => '# REGISTERED SIGN'},
+       {direction => 'both', ucs => 0x327E, code => 0xa2e8, comment => '# CIRCLED HANGUL IEUNG U'}
+       );
  
-print FILE "};\n";
-close(FILE);
+print_tables("EUC_KR", $mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl

index e4fc535b1800031bd42c06357eef1c2d812942f5..aceef5433c28bd59590c164ec58019893957cbc1 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
@@ -17,141 +17,47 @@
  #               UCS-2 code in hex
  #               # and Unicode name (not used in this script)
  
-require "ucs2utf.pl";
+require "convutils.pm";
  
-# first generate UTF-8 --> EUC_TW table
+my $mapping = &read_source("CNS11643.TXT");
  
-$in_file = "CNS11643.TXT";
+my @extras;
  
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
+foreach my $i (@$mapping)
  {
-       chop;
-       if (/^#/)
+       my $ucs = $i->{ucs};
+       my $code = $i->{code};
+       my $origcode = $i->{code};
+
+       my $plane = ($code & 0x1f0000) >> 16;
+       if ($plane > 16)
         {
+               printf STDERR "Warning: invalid plane No.$plane. ignored\n";
                 next;
         }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-
-               $plane = ($code & 0x1f0000) >> 16;
-               if ($plane > 16)
-               {
-                       printf STDERR "Warning: invalid plane No.$plane. ignored\n";
-                       next;
-               }
-
-               if ($plane == 1)
-               {
-                       $array{$utf} = (($code & 0xffff) | 0x8080);
-               }
-               else
-               {
-                       $array{$utf} =
-                         (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
-               }
-       }
-}
-close(FILE);
-
-$file = "utf8_to_euc_tw.map";
-open(FILE, "> $file") || die("cannot open $file");
  
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $code = $array{$index};
-       $count--;
-       if ($count == 0)
+       if ($plane == 1)
         {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
+               $code = ($code & 0xffff) | 0x8080;
         }
         else
         {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
+               $code = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
         }
-}
-
-print FILE "};\n";
-close(FILE);
-
-#
-# then generate EUC_TW --> UTF8 table
-#
-reset 'array';
-
-open(FILE, $in_file) || die("cannot open $in_file");
+       $i->{code} = $code;
  
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
+       # Some codes are mapped twice in the EUC_TW to UTF-8 table.
+       if ($origcode >= 0x12121 && $origcode <= 0x20000)
         {
-               next;
-       }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ($array{$code} ne "")
-               {
-                       printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-                       next;
-               }
-               $count++;
-
-               $plane = ($code & 0x1f0000) >> 16;
-               if ($plane > 16)
-               {
-                       printf STDERR "Warning: invalid plane No.$plane. ignored\n";
-                       next;
-               }
-
-               if ($plane == 1)
-               {
-                       $c = (($code & 0xffff) | 0x8080);
-                       $array{$c} = $utf;
-                       $count++;
+               push @extras, {
+                       ucs => $i->{ucs},
+                       code => ($i->{code} + 0x8ea10000),
+                       rest => $i->{rest},
+                       direction => 'to_unicode'
                 }
-               $c = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
-               $array{$c} = $utf;
         }
  }
-close(FILE);
-
-$file = "euc_tw_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
  
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $utf = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-       }
-}
+push @$mapping, @extras;
  
-print FILE "};\n";
-close(FILE);
+print_tables("EUC_TW", $mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl

index 043c1c27ec8f6bccc85c4d759a47449784172221..f58361024e4c67e375b25a11cf90728fb36cbdb8 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
@@ -13,8 +13,7 @@
  # where the "u" field is the Unicode code point in hex,
  # and the "b" field is the hex byte sequence for GB18030
  
-require "ucs2utf.pl";
-
+require "convutils.pm";
  
  # Read the input
  
@@ -22,6 +21,8 @@ $in_file = "gb-18030-2000.xml";
  
  open(FILE, $in_file) || die("cannot open $in_file");
  
+my @mapping;
+
  while (<FILE>)
  {
         next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
@@ -32,78 +33,13 @@ while (<FILE>)
         $code = hex($c);
         if ($code >= 0x80 && $ucs >= 0x0080)
         {
-               $utf = &ucs2utf($ucs);
-               if ($arrayu{$utf} ne "")
-               {
-                       printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                       next;
+               push @mapping, {
+                       ucs => $ucs,
+                       code => $code,
+                       direction => 'both'
                 }
-               if ($arrayc{$code} ne "")
-               {
-                       printf STDERR "Warning: duplicate GB18030: %08x\n", $code;
-                       next;
-               }
-               $arrayu{$utf}  = $code;
-               $arrayc{$code} = $utf;
-               $count++;
-       }
-}
-close(FILE);
-
-
-#
-# first, generate UTF8 --> GB18030 table
-#
-
-$file = "utf8_to_gb18030.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapGB18030[ $count ] = {\n";
-
-$cc = $count;
-for $index (sort { $a <=> $b } keys(%arrayu))
-{
-       $code = $arrayu{$index};
-       $cc--;
-       if ($cc == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
         }
  }
-
-print FILE "};\n";
  close(FILE);
  
-
-#
-# then generate GB18030 --> UTF8 table
-#
-
-$file = "gb18030_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapGB18030[ $count ] = {\n";
-
-$cc = $count;
-for $index (sort { $a <=> $b } keys(%arrayc))
-{
-       $utf = $arrayc{$index};
-       $cc--;
-       if ($cc == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-       }
-}
-
-print FILE "};\n";
-close(FILE);
+print_tables("GB18030", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl

new file mode 100755 (executable)

index 0000000..b98f9a7
--- /dev/null
+++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
@@ -0,0 +1,31 @@
+#! /usr/bin/perl
+#
+# Copyright (c) 2001-2016, PostgreSQL Global Development Group
+#
+# src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
+#
+# Generate UTF-8 <--> JOHAB conversion tables from
+# map files provided by Unicode organization.
+# Unfortunately it is prohibited by the organization
+# to distribute the map files. So if you try to use this script,
+# you have to obtain the map files from the organization's ftp site.
+# ftp://www.unicode.org/Public/MAPPINGS/
+# We assume the file include three tab-separated columns:
+#               JOHAB code in hex
+#               UCS-2 code in hex
+#               # and Unicode name (not used in this script)
+
+require "convutils.pm";
+
+# Load the source file.
+
+my $mapping = &read_source("JOHAB.TXT");
+
+# Some extra characters that are not in JOHAB.TXT
+push @$mapping, (
+       {direction => 'both', ucs => 0x20AC, code => 0xd9e6, comment => '# EURO SIGN'},
+       {direction => 'both', ucs => 0x00AE, code => 0xd9e7, comment => '# REGISTERED SIGN'},
+       {direction => 'both', ucs => 0x327E, code => 0xd9e8, comment => '# CIRCLED HANGUL IEUNG U'}
+       );
+
+print_tables("JOHAB", $mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl

index 51ffd86b2c96a0047a2ff043e3c8615c1d8373a9..16a53ad1d9fa74d46f80f61de5c178db13efa775 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
@@ -7,7 +7,7 @@
  # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
  # "sjis-0213-2004-std.txt" (http://x0213.org)
  
-require "ucs2utf.pl";
+require "convutils.pm";
  
  # first generate UTF-8 --> SHIFT_JIS_2004 table
  
@@ -15,10 +15,7 @@ $in_file = "sjis-0213-2004-std.txt";
  
  open(FILE, $in_file) || die("cannot open $in_file");
  
-reset 'array';
-reset 'array1';
-reset 'comment';
-reset 'comment1';
+my @mapping;
  
  while ($line = <FILE>)
  {
@@ -29,14 +26,16 @@ while ($line = <FILE>)
                 $u2             = $3;
                 $rest           = "U+" . $u1 . "+" . $u2 . $4;
                 $code           = hex($c);
-               $ucs            = hex($u1);
-               $utf1           = &ucs2utf($ucs);
-               $ucs            = hex($u2);
-               $utf2           = &ucs2utf($ucs);
-               $str            = sprintf "%08x%08x", $utf1, $utf2;
-               $array1{$str}   = $code;
-               $comment1{$str} = $rest;
-               $count1++;
+               $ucs1           = hex($u1);
+               $ucs2           = hex($u2);
+
+               push @mapping, {
+                       code => $code,
+                       ucs => $ucs1,
+                       ucs_second => $ucs2,
+                       comment => $rest,
+                       direction => 'both'
+               };
                 next;
         }
         elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
@@ -52,183 +51,31 @@ while ($line = <FILE>)
  
         $ucs  = hex($u);
         $code = hex($c);
-       $utf  = &ucs2utf($ucs);
-       if ($array{$utf} ne "")
-       {
-               printf STDERR
-                 "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n", $utf,
-                 $ucs, $code;
-               next;
-       }
-       $count++;
  
-       $array{$utf}    = $code;
-       $comment{$code} = $rest;
-}
-close(FILE);
-
-$file = "utf8_to_shift_jis_2004.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE "static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $code = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%08x, 0x%06x} /* %s */\n", $index, $code,
-                 $comment{$code};
-       }
-       else
+       if ($code < 0x80 && $ucs < 0x80)
         {
-               printf FILE "  {0x%08x, 0x%06x},        /* %s */\n", $index, $code,
-                 $comment{$code};
-       }
-}
-
-print FILE "};\n";
-close(FILE);
-
-$file = "utf8_to_shift_jis_2004_combined.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE
-"static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
-
-for $index (sort { $a cmp $b } keys(%array1))
-{
-       $code = $array1{$index};
-       $count1--;
-       if ($count1 == 0)
-       {
-               printf FILE "  {0x%s, 0x%s, 0x%04x}     /* %s */\n", substr($index, 0, 8),
-                 substr($index, 8, 8), $code, $comment1{$index};
-       }
-       else
-       {
-               printf FILE "  {0x%s, 0x%s, 0x%04x},    /* %s */\n",
-                 substr($index, 0, 8), substr($index, 8, 8), $code,
-                 $comment1{$index};
-       }
-}
-
-print FILE "};\n";
-close(FILE);
-
-# then generate SHIFT_JIS_2004 --> UTF-8 table
-
-$in_file = "sjis-0213-2004-std.txt";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-reset 'array';
-reset 'array1';
-reset 'comment';
-reset 'comment1';
-
-while ($line = <FILE>)
-{
-       if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
-       {
-               $c               = $1;
-               $u1              = $2;
-               $u2              = $3;
-               $rest            = "U+" . $u1 . "+" . $u2 . $4;
-               $code            = hex($c);
-               $ucs             = hex($u1);
-               $utf1            = &ucs2utf($ucs);
-               $ucs             = hex($u2);
-               $utf2            = &ucs2utf($ucs);
-               $str             = sprintf "%08x%08x", $utf1, $utf2;
-               $array1{$code}   = $str;
-               $comment1{$code} = $rest;
-               $count1++;
                 next;
         }
-       elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
+       elsif ($code < 0x80)
         {
-               $c    = $1;
-               $u    = $2;
-               $rest = "U+" . $u . $3;
+               $direction = 'from_unicode';
         }
-       else
-       {
-               next;
-       }
-
-       $ucs  = hex($u);
-       $code = hex($c);
-       $utf  = &ucs2utf($ucs);
-       if ($array{$code} ne "")
-       {
-               printf STDERR
-                 "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n", $utf,
-                 $ucs, $code;
-               printf STDERR "Previous value: UTF8: %08x\n", $array{$utf};
-               next;
-       }
-       $count++;
-
-       $array{$code}  = $utf;
-       $comment{$utf} = $rest;
-}
-close(FILE);
-
-$file = "shift_jis_2004_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n";
-print FILE " */\n";
-print FILE "static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $code = $array{$index};
-       $count--;
-       if ($count == 0)
+       elsif ($ucs < 0x80)
         {
-               printf FILE "  {0x%04x, 0x%08x} /* %s */\n", $index, $code,
-                 $comment{$code};
+               $direction = 'to_unicode';
         }
         else
         {
-               printf FILE "  {0x%04x, 0x%08x},        /* %s */\n", $index, $code,
-                 $comment{$code};
+               $direction = 'both';
         }
-}
-
-print FILE "};\n";
-close(FILE);
-
-$file = "shift_jis_2004_to_utf8_combined.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE
-"static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
  
-for $index (sort { $a <=> $b } keys(%array1))
-{
-       $code = $array1{$index};
-       $count1--;
-       if ($count1 == 0)
-       {
-               printf FILE "  {0x%04x, 0x%s, 0x%s}     /* %s */\n", $index,
-                 substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%s, 0x%s},    /* %s */\n", $index,
-                 substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
-       }
+       push @mapping, {
+               code => $code,
+               ucs => $ucs,
+               comment => $rest,
+               direction => $direction
+       };
  }
-
-print FILE "};\n";
  close(FILE);
+
+print_tables("SHIFT_JIS_2004", \@mapping, 1);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl

index 10e54b157d266199cd8b2a9ebfdcdaa712ae3217..c8ff712af8fd279349da855752bb848ee6101a6e 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
@@ -4,138 +4,45 @@
  #
  # src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
  #
-# Generate UTF-8 <--> SJIS code conversion tables from
-# map files provided by Unicode organization.
-# Unfortunately it is prohibited by the organization
-# to distribute the map files. So if you try to use this script,
-# you have to obtain SHIFTJIS.TXT from
-# the organization's ftp site.
-#
-# SHIFTJIS.TXT format:
-#               SHIFTJIS code in hex
-#               UCS-2 code in hex
-#               # and Unicode name (not used in this script)
-# Warning: SHIFTJIS.TXT contains only JIS0201 and JIS0208. no JIS0212.
-
-require "ucs2utf.pl";
-
-# first generate UTF-8 --> SJIS table
-
-$in_file = "CP932.TXT";
-$count   = 0;
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
-       {
-               next;
-       }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               if ((($code >= 0xed40) && ($code <= 0xeefc))
-                       || (   ($code >= 0x8754)
-                               && ($code <= 0x875d))
-                       || ($code == 0x878a)
-                       || ($code == 0x8782)
-                       || ($code == 0x8784)
-                       || ($code == 0xfa5b)
-                       || ($code == 0xfa54)
-                       || (   ($code >= 0x8790)
-                               && ($code <= 0x8792))
-                       || (   ($code >= 0x8795)
-                               && ($code <= 0x8797))
-                       || (   ($code >= 0x879a)
-                               && ($code <= 0x879c)))
-               {
-                       printf STDERR "Warning: duplicate UTF8: UCS=0x%04x SJIS=0x%04x\n",
-                         $ucs,
-                         $code;
-                       next;
-               }
-               $count++;
-               $array{$utf} = $code;
-       }
-}
+# Generate UTF-8 <=> SJIS code conversion radix tree Generate UTF-8
+# <=> SJIS code conversion radix tree Unfortunately it is prohibited
+# by the organization to distribute the map files. So if you try to
+# use this script, you have to obtain CP932.TXT from the organization's
+# ftp site.
  
-close(FILE);
+use strict;
+require "convutils.pm";
  
-$file = "utf8_to_sjis.map";
-open(FILE, "> $file") || die("cannot open $file");
+my $charset = read_source("CP932.TXT");
  
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapSJIS[ $count ] = {\n";
+# Drop these SJIS codes from the source for UTF8=>SJIS conversion
+my @reject_sjis =(
+       0xed40..0xeefc, 0x8754..0x875d, 0x878a, 0x8782,
+       0x8784, 0xfa5b, 0xfa54, 0x8790..0x8792, 0x8795..0x8797,
+       0x879a..0x879c
+);
  
-for $index (sort { $a <=> $b } keys(%array))
+foreach my $i (@$charset)
  {
-       $code = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-       }
-       else
-       {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
-       }
-}
-
-print FILE "};\n";
-close(FILE);
+       my $code = $i->{code};
+       my $ucs = $i->{ucs};
  
-#
-# then generate SJIS --> UTF8 table
-#
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-reset 'array';
-$count = 0;
-
-while (<FILE>)
-{
-       chop;
-       if (/^#/)
-       {
-               next;
-       }
-       ($c, $u, $rest) = split;
-       $ucs  = hex($u);
-       $code = hex($c);
-       if ($code >= 0x80 && $ucs >= 0x0080)
-       {
-               $utf = &ucs2utf($ucs);
-               $count++;
-
-               $array{$code} = $utf;
-       }
-}
-close(FILE);
-
-$file = "sjis_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapSJIS[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-       $utf = $array{$index};
-       $count--;
-       if ($count == 0)
-       {
-               printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-       }
-       else
+       if (grep {$code == $_} @reject_sjis)
         {
-               printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
+               $i->{direction} = "to_unicode";
         }
  }
  
-print FILE "};\n";
-close(FILE);
+# Add these UTF8->SJIS pairs to the table.
+push @$charset, (
+       {direction => "from_unicode", ucs => 0x00a2,   code => 0x8191, comment => '# CENT SIGN'},
+       {direction => "from_unicode", ucs => 0x00a3,   code => 0x8192, comment => '# POUND SIGN'},
+       {direction => "from_unicode", ucs => 0x00a5,   code => 0x5c,   comment => '# YEN SIGN'},
+       {direction => "from_unicode", ucs => 0x00ac,   code => 0x81ca, comment => '# NOT SIGN'},
+       {direction => "from_unicode", ucs => 0x2016, code => 0x8161, comment => '# DOUBLE VERTICAL LINE'},
+       {direction => "from_unicode", ucs => 0x203e, code => 0x7e,   comment => '# OVERLINE'},
+       {direction => "from_unicode", ucs => 0x2212, code => 0x817c, comment => '# MINUS SIGN'},
+       {direction => "from_unicode", ucs => 0x301c, code => 0x8160, comment => '# WAVE DASH'}
+);
+
+print_tables("SJIS", $charset);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl

new file mode 100755 (executable)

index 0000000..b6bf3bd
--- /dev/null
+++ b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
@@ -0,0 +1,51 @@
+#! /usr/bin/perl
+#
+# Copyright (c) 2007-2016, PostgreSQL Global Development Group
+#
+# src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
+#
+# Generate UTF-8 <--> UHC code conversion tables from
+# "windows-949-2000.xml", obtained from
+# http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/
+#
+# The lines we care about in the source file look like
+#    <a u="009A" b="81 30 83 36"/>
+# where the "u" field is the Unicode code point in hex,
+# and the "b" field is the hex byte sequence for UHC
+
+require "convutils.pm";
+
+# Read the input
+
+$in_file = "windows-949-2000.xml";
+
+open(FILE, $in_file) || die("cannot open $in_file");
+
+my @mapping;
+
+while (<FILE>)
+{
+       next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
+       $u = $1;
+       $c = $2;
+       $c =~ s/ //g;
+       $ucs  = hex($u);
+       $code = hex($c);
+
+       next if ($code == 0x0080 || $code == 0x00FF);
+
+       if ($code >= 0x80 && $ucs >= 0x0080)
+       {
+               push @mapping, {
+                       ucs => $ucs,
+                       code => $code,
+                       direction => 'both'
+               }
+       }
+}
+close(FILE);
+
+# One extra character that's not in the source file.
+push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U' };
+
+print_tables("UHC", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl

index 125378f149ac0ae3c3862be4d58e0318e689f12d..a3cf436eefd56708788d8ac18bb64fef67c85b9e 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_most.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl
@@ -15,7 +15,7 @@
  #               UCS-2 code in hex
  #               # and Unicode name (not used in this script)
  
-require "ucs2utf.pl";
+require "convutils.pm";
  
  %filename = (
         'WIN866'     => 'CP866.TXT',
@@ -44,121 +44,13 @@ require "ucs2utf.pl";
         'ISO8859_16' => '8859-16.TXT',
         'KOI8R'      => 'KOI8-R.TXT',
         'KOI8U'      => 'KOI8-U.TXT',
-       'GBK'        => 'CP936.TXT',
-       'UHC'        => 'CP949.TXT',
-       'JOHAB'      => 'JOHAB.TXT',);
+       'GBK'        => 'CP936.TXT');
  
  @charsets = keys(%filename);
  @charsets = @ARGV if scalar(@ARGV);
  foreach $charset (@charsets)
  {
+       my $mapping = &read_source($filename{$charset});
  
-       #
-       # first, generate UTF8-> charset table
-       #
-       $in_file = $filename{$charset};
-
-       open(FILE, $in_file) || die("cannot open $in_file");
-
-       reset 'array';
-
-       while (<FILE>)
-       {
-               chop;
-               if (/^#/)
-               {
-                       next;
-               }
-               ($c, $u, $rest) = split;
-               $ucs  = hex($u);
-               $code = hex($c);
-               if ($code >= 0x80 && $ucs >= 0x0080)
-               {
-                       $utf = &ucs2utf($ucs);
-                       if ($array{$utf} ne "")
-                       {
-                               printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                               next;
-                       }
-                       $count++;
-                       $array{$utf} = $code;
-               }
-       }
-       close(FILE);
-
-       $file = lc("utf8_to_${charset}.map");
-       open(FILE, "> $file") || die("cannot open $file");
-
-       print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-       print FILE "static const pg_utf_to_local ULmap${charset}[ $count ] = {\n";
-
-       for $index (sort { $a <=> $b } keys(%array))
-       {
-               $code = $array{$index};
-               $count--;
-               if ($count == 0)
-               {
-                       printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-               }
-               else
-               {
-                       printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
-               }
-       }
-
-       print FILE "};\n";
-       close(FILE);
-
-       #
-       # then generate character set code ->UTF8 table
-       #
-       open(FILE, $in_file) || die("cannot open $in_file");
-
-       reset 'array';
-
-       while (<FILE>)
-       {
-               chop;
-               if (/^#/)
-               {
-                       next;
-               }
-               ($c, $u, $rest) = split;
-               $ucs  = hex($u);
-               $code = hex($c);
-               if ($code >= 0x80 && $ucs >= 0x0080)
-               {
-                       $utf = &ucs2utf($ucs);
-                       if ($array{$code} ne "")
-                       {
-                               printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-                               next;
-                       }
-                       $count++;
-                       $array{$code} = $utf;
-               }
-       }
-       close(FILE);
-
-       $file = lc("${charset}_to_utf8.map");
-       open(FILE, "> $file") || die("cannot open $file");
-
-       print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-       print FILE "static const pg_local_to_utf LUmap${charset}[ $count ] = {\n";
-       for $index (sort { $a <=> $b } keys(%array))
-       {
-               $utf = $array{$index};
-               $count--;
-               if ($count == 0)
-               {
-                       printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-               }
-               else
-               {
-                       printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-               }
-       }
-
-       print FILE "};\n";
-       close(FILE);
+       print_tables($charset, $mapping);
  }
diff --git a/src/backend/utils/mb/Unicode/convutils.pm b/src/backend/utils/mb/Unicode/convutils.pm

new file mode 100644 (file)

index 0000000..d6a13e8
--- /dev/null
+++ b/src/backend/utils/mb/Unicode/convutils.pm
@@ -0,0 +1,282 @@
+#
+# Copyright (c) 2001-2016, PostgreSQL Global Development Group
+#
+# src/backend/utils/mb/Unicode/convutils.pm
+
+use strict;
+
+#######################################################################
+# convert UCS-4 to UTF-8
+#
+sub ucs2utf
+{
+       my ($ucs) = @_;
+       my $utf;
+
+       if ($ucs <= 0x007f)
+       {
+               $utf = $ucs;
+       }
+       elsif ($ucs > 0x007f && $ucs <= 0x07ff)
+       {
+               $utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8);
+       }
+       elsif ($ucs > 0x07ff && $ucs <= 0xffff)
+       {
+               $utf =
+                 ((($ucs >> 12) | 0xe0) << 16) |
+                 (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80);
+       }
+       else
+       {
+               $utf =
+                 ((($ucs >> 18) | 0xf0) << 24) |
+                 (((($ucs & 0x3ffff) >> 12) | 0x80) << 16) |
+                 (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80);
+       }
+       return ($utf);
+}
+
+#######################################################################
+# read_source - common routine to read source file
+#
+# fname ; input file name
+sub read_source
+{
+       my ($fname) = @_;
+       my @r;
+
+       open(my $in, '<', $fname) || die("cannot open $fname");
+
+       while (<$in>)
+       {
+               next if (/^#/);
+               chop;
+
+               next if (/^$/); # Ignore empty lines
+
+               next if (/^0x([0-9A-F]+)\s+(#.*)$/);
+
+               # Skip the first column for JIS0208.TXT
+               if (!/^0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s+(?:0x([0-9A-Fa-f]+)\s+)?(#.*)$/)
+               {
+                       print STDERR "READ ERROR at line $. in $fname: $_\n";
+                       exit;
+               }
+               my $out = {f => $fname, l => $.,
+                                  code => hex($1),
+                                  ucs => hex($2),
+                                  comment => $4,
+                                  direction => "both"
+                               };
+
+               # Ignore pure ASCII mappings. PostgreSQL character conversion code
+               # never even passes these to the conversion code.
+               next if ($out->{code} < 0x80 || $out->{ucs} < 0x80);
+
+               push(@r, $out);
+       }
+       close($in);
+
+       return \@r;
+}
+
+##################################################################
+# print_tables : output mapping tables
+#
+# Arguments:
+#  charset - string name of the character set.
+#  table   - mapping table (see format below)
+#  verbose - if 1, output comment on each line,
+#            if 2, also output source file name and number
+#
+#
+#
+# Mapping table format:
+#
+# Mapping table is a list of hashes. Each hash has the following fields:
+#   direction  - Direction: 'both', 'from_unicode' or 'to_unicode'
+#   ucs        - Unicode code point
+#   ucs_second - Second Unicode code point, if this is a "combined" character.
+#   code       - Byte sequence in the "other" character set, as an integer
+#   comment    - Text representation of the character
+#   f          - Source filename
+#   l          - Line number in source file
+#
+#
+sub print_tables
+{
+       my ($charset, $table, $verbose) = @_;
+
+       # Build an array with only the to-UTF8 direction mappings
+       my @to_unicode;
+       my @to_unicode_combined;
+       my @from_unicode;
+       my @from_unicode_combined;
+
+       foreach my $i (@$table)
+       {
+               if (defined $i->{ucs_second})
+               {
+                       my $entry = {utf8 => ucs2utf($i->{ucs}),
+                                                utf8_second => ucs2utf($i->{ucs_second}),
+                                                code => $i->{code},
+                                                comment => $i->{comment},
+                                                f => $i->{f}, l => $i->{l}};
+                       if ($i->{direction} eq "both" || $i->{direction} eq "to_unicode")
+                       {
+                               push @to_unicode_combined, $entry;
+                       }
+                       if ($i->{direction} eq "both" || $i->{direction} eq "from_unicode")
+                       {
+                               push @from_unicode_combined, $entry;
+                       }
+               }
+               else
+               {
+                       my $entry = {utf8 => ucs2utf($i->{ucs}),
+                                                code => $i->{code},
+                                                comment => $i->{comment},
+                                                f => $i->{f}, l => $i->{l}};
+                       if ($i->{direction} eq "both" || $i->{direction} eq "to_unicode")
+                       {
+                               push @to_unicode, $entry;
+                       }
+                       if ($i->{direction} eq "both" || $i->{direction} eq "from_unicode")
+                       {
+                               push @from_unicode, $entry;
+                       }
+               }
+       }
+
+       print_to_utf8_map($charset, \@to_unicode, $verbose);
+       print_to_utf8_combined_map($charset, \@to_unicode_combined, $verbose) if (scalar @to_unicode_combined > 0);
+       print_from_utf8_map($charset, \@from_unicode, $verbose);
+       print_from_utf8_combined_map($charset, \@from_unicode_combined, $verbose) if (scalar @from_unicode_combined > 0);
+}
+
+sub print_from_utf8_map
+{
+       my ($charset, $table, $verbose) = @_;
+
+       my $last_comment = "";
+
+       my $fname = lc("utf8_to_${charset}.map");
+       print "- Writing UTF8=>${charset} conversion table: $fname\n";
+       open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
+       printf($out "/* src/backend/utils/mb/Unicode/$fname */\n\n".
+                  "static const pg_utf_to_local ULmap${charset}[ %d ] = {",
+                  scalar(@$table));
+       my $first = 1;
+       foreach my $i (sort {$$a{utf8} <=> $$b{utf8}} @$table)
+    {
+               print($out ",") if (!$first);
+               $first = 0;
+               print($out "\t/* $last_comment */") if ($verbose);
+
+               printf($out "\n  {0x%04x, 0x%04x}", $$i{utf8}, $$i{code});
+               if ($verbose >= 2)
+               {
+                       $last_comment = "$$i{f}:$$i{l} $$i{comment}";
+               }
+               else
+               {
+                       $last_comment = $$i{comment};
+               }
+       }
+       print($out "\t/* $last_comment */") if ($verbose);
+       print $out "\n};\n";
+       close($out);
+}
+
+sub print_from_utf8_combined_map
+{
+       my ($charset, $table, $verbose) = @_;
+
+       my $last_comment = "";
+
+       my $fname = lc("utf8_to_${charset}_combined.map");
+       print "- Writing UTF8=>${charset} conversion table: $fname\n";
+       open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
+       printf($out "/* src/backend/utils/mb/Unicode/$fname */\n\n".
+                  "static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {",
+                  scalar(@$table));
+       my $first = 1;
+       foreach my $i (sort {$$a{utf8} <=> $$b{utf8}} @$table)
+    {
+               print($out ",") if (!$first);
+               $first = 0;
+               print($out "\t/* $last_comment */") if ($verbose);
+
+               printf($out "\n  {0x%08x, 0x%08x, 0x%04x}", $$i{utf8}, $$i{utf8_second}, $$i{code});
+               $last_comment = "$$i{comment}";
+       }
+       print($out "\t/* $last_comment */") if ($verbose);
+       print $out "\n};\n";
+       close($out);
+}
+
+sub print_to_utf8_map
+{
+       my ($charset, $table, $verbose) = @_;
+
+       my $last_comment = "";
+
+       my $fname = lc("${charset}_to_utf8.map");
+
+       print "- Writing ${charset}=>UTF8 conversion table: $fname\n";
+       open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
+       printf($out "/* src/backend/utils/mb/Unicode/${fname} */\n\n".
+                  "static const pg_local_to_utf LUmap${charset}[ %d ] = {",
+                  scalar(@$table));
+       my $first = 1;
+       foreach my $i (sort {$$a{code} <=> $$b{code}} @$table)
+    {
+               print($out ",") if (!$first);
+               $first = 0;
+               print($out "\t/* $last_comment */") if ($verbose);
+
+               printf($out "\n  {0x%04x, 0x%x}", $$i{code}, $$i{utf8});
+               if ($verbose >= 2)
+               {
+                       $last_comment = "$$i{f}:$$i{l} $$i{comment}";
+               }
+               else
+               {
+                       $last_comment = $$i{comment};
+               }
+       }
+       print($out "\t/* $last_comment */") if ($verbose);
+       print $out "\n};\n";
+       close($out);
+}
+
+sub print_to_utf8_combined_map
+{
+       my ($charset, $table, $verbose) = @_;
+
+       my $last_comment = "";
+
+       my $fname = lc("${charset}_to_utf8_combined.map");
+
+       print "- Writing ${charset}=>UTF8 conversion table: $fname\n";
+       open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
+       printf($out "/* src/backend/utils/mb/Unicode/${fname} */\n\n".
+                  "static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {",
+                  scalar(@$table));
+       my $first = 1;
+       foreach my $i (sort {$$a{code} <=> $$b{code}} @$table)
+    {
+               print($out ",") if (!$first);
+               $first = 0;
+               print($out "\t/* $last_comment */") if ($verbose);
+
+               printf($out "\n  {0x%04x, 0x%08x, 0x%08x}", $$i{code}, $$i{utf8}, $$i{utf8_second});
+               $last_comment = "$$i{comment}";
+       }
+       print($out "\t/* $last_comment */") if ($verbose);
+       print $out "\n};\n";
+       close($out);
+}
+
+1;
diff --git a/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map b/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map

index 2c3a607bf86e43cff6d466162df09303c088d8bb..33fd42ac4647390a1ee38fb97e92e57cd6388527 100644 (file)
--- a/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_EUC_JIS_2004.pl
- */
-static const pg_local_to_utf LUmapEUC_JIS_2004[] = {
+/* src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map */
+
+static const pg_local_to_utf LUmapEUC_JIS_2004[ 11303 ] = {    /*  */
    {0x0080, 0xc280},    /* U+0080        <control> */
    {0x0081, 0xc281},    /* U+0081        <control> */
    {0x0082, 0xc282},    /* U+0082        <control> */
@@ -205,7 +204,7 @@ static const pg_local_to_utf LUmapEUC_JIS_2004[] = {
    {0xa2ac, 0xe28691},  /* U+2191        UPWARDS ARROW */
    {0xa2ad, 0xe28693},  /* U+2193        DOWNWARDS ARROW */
    {0xa2ae, 0xe38093},  /* U+3013        GETA MARK */
-  {0xa2af, 0xefbc87},  /* U+FF07        FULLWIDTH APOSTROPHE   [2000] */
+  {0xa2af, 0xefbc87},  /* U+FF07        FULLWIDTH APOSTROPHE */
    {0xa2b0, 0xefbc82},  /* U+FF02        FULLWIDTH QUOTATION MARK       [2000] */
    {0xa2b1, 0xefbc8d},  /* U+FF0D        FULLWIDTH HYPHEN-MINUS [2000] */
    {0xa2b2, 0xefbd9e},  /* U+FF5E        FULLWIDTH TILDE        [2000] */
diff --git a/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map b/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map

index 7a7f85b105d3e9936aae7a4727d3e4128d473277..2d8987b990814b72b456ff2b7e8592fb74cdfa62 100644 (file)
--- a/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map
+++ b/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_EUC_JIS_2004.pl
- */
-static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {
+/* src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map */
+
+static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[ 25 ] = {     /*  */
    {0xa4f7, 0x00e3818b, 0x00e3829a},    /* U+304B+309A          [2000] */
    {0xa4f8, 0x00e3818d, 0x00e3829a},    /* U+304D+309A          [2000] */
    {0xa4f9, 0x00e3818f, 0x00e3829a},    /* U+304F+309A          [2000] */
diff --git a/src/backend/utils/mb/Unicode/euc_jp_to_utf8.map b/src/backend/utils/mb/Unicode/euc_jp_to_utf8.map

index db427cbb24cdc94617ed026b3fe50638e42d8f9e..eb17f9829c54f6d19488e99c22a3d6b0b0271571 100644 (file)
--- a/src/backend/utils/mb/Unicode/euc_jp_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/euc_jp_to_utf8.map
@@ -1,6 +1,6 @@
  /* src/backend/utils/mb/Unicode/euc_jp_to_utf8.map */
  
-static const pg_local_to_utf LUmapEUC_JP[] = {
+static const pg_local_to_utf LUmapEUC_JP[ 13197 ] = {
    {0x8ea1, 0xefbda1},
    {0x8ea2, 0xefbda2},
    {0x8ea3, 0xefbda3},
@@ -13197,5 +13197,5 @@ static const pg_local_to_utf LUmapEUC_JP[] = {
    {0x8ff4fb, 0xe9ab99},
    {0x8ff4fc, 0xe9adb2},
    {0x8ff4fd, 0xefa8ad},
-  {0x8ff4fe, 0xe9bb91},
+  {0x8ff4fe, 0xe9bb91}
  };
diff --git a/src/backend/utils/mb/Unicode/euc_kr_to_utf8.map b/src/backend/utils/mb/Unicode/euc_kr_to_utf8.map

index e37152137d6b93bc95466e9135fb5df2d7d71784..701a7a476ffed95008722289708a648c95ee9cf2 100644 (file)
--- a/src/backend/utils/mb/Unicode/euc_kr_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/euc_kr_to_utf8.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/euc_kr_to_utf8.map */
+
  static const pg_local_to_utf LUmapEUC_KR[ 8227 ] = {
    {0xa1a1, 0xe38080},
    {0xa1a2, 0xe38081},
diff --git a/src/backend/utils/mb/Unicode/johab_to_utf8.map b/src/backend/utils/mb/Unicode/johab_to_utf8.map

index 8110f6e8531c7aeebd298ac368475d272640c76d..e31d24184c15a792a4cbbf21add0cbdc697f3b7b 100644 (file)
--- a/src/backend/utils/mb/Unicode/johab_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/johab_to_utf8.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/johab_to_utf8.map */
+
  static const pg_local_to_utf LUmapJOHAB[ 17049 ] = {
    {0x8444, 0xe384b3},
    {0x8446, 0xe384b5},
diff --git a/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map b/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map

index 81c898c6be487ade03ba9208a5fa9927f427343d..958dde7b83d6c90a06732de6ec256df3a376d599 100644 (file)
--- a/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_SHIFTJIS_2004.pl
- */
-static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {
+/* src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map */
+
+static const pg_local_to_utf LUmapSHIFT_JIS_2004[ 11271 ] = {  /*  */
    {0x00a1, 0xefbda1},  /* U+FF61        HALFWIDTH IDEOGRAPHIC FULL STOP */
    {0x00a2, 0xefbda2},  /* U+FF62        HALFWIDTH LEFT CORNER BRACKET */
    {0x00a3, 0xefbda3},  /* U+FF63        HALFWIDTH RIGHT CORNER BRACKET */
@@ -173,7 +172,7 @@ static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {
    {0x81aa, 0xe28691},  /* U+2191        UPWARDS ARROW */
    {0x81ab, 0xe28693},  /* U+2193        DOWNWARDS ARROW */
    {0x81ac, 0xe38093},  /* U+3013        GETA MARK */
-  {0x81ad, 0xefbc87},  /* U+FF07        FULLWIDTH APOSTROPHE   [2000] */
+  {0x81ad, 0xefbc87},  /* U+FF07        FULLWIDTH APOSTROPHE */
    {0x81ae, 0xefbc82},  /* U+FF02        FULLWIDTH QUOTATION MARK       [2000] */
    {0x81af, 0xefbc8d},  /* U+FF0D        FULLWIDTH HYPHEN-MINUS [2000] */
    {0x81b0, 0x7e},      /* U+007E        TILDE  [2000]  Fullwidth: U+FF5E */
diff --git a/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map b/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map

index b1c7bced5fd605816fd02bc2185c7d59f83ddabc..414e59dc404348d3ef10889d7d1b4121f932230e 100644 (file)
--- a/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map
+++ b/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_SHIFT_JIS_2004.pl
- */
-static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {
+/* src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map */
+
+static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[ 25 ] = {   /*  */
    {0x82f5, 0x00e3818b, 0x00e3829a},    /* U+304B+309A          [2000] */
    {0x82f6, 0x00e3818d, 0x00e3829a},    /* U+304D+309A          [2000] */
    {0x82f7, 0x00e3818f, 0x00e3829a},    /* U+304F+309A          [2000] */
diff --git a/src/backend/utils/mb/Unicode/ucs2utf.pl b/src/backend/utils/mb/Unicode/ucs2utf.pl

deleted file mode 100644 (file)

index e0f1fb2..0000000
--- a/src/backend/utils/mb/Unicode/ucs2utf.pl
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# Copyright (c) 2001-2016, PostgreSQL Global Development Group
-#
-# src/backend/utils/mb/Unicode/ucs2utf.pl
-# convert UCS-4 to UTF-8
-#
-sub ucs2utf
-{
-       local ($ucs) = @_;
-       local $utf;
-
-       if ($ucs <= 0x007f)
-       {
-               $utf = $ucs;
-       }
-       elsif ($ucs > 0x007f && $ucs <= 0x07ff)
-       {
-               $utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8);
-       }
-       elsif ($ucs > 0x07ff && $ucs <= 0xffff)
-       {
-               $utf =
-                 ((($ucs >> 12) | 0xe0) << 16) |
-                 (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80);
-       }
-       else
-       {
-               $utf =
-                 ((($ucs >> 18) | 0xf0) << 24) |
-                 (((($ucs & 0x3ffff) >> 12) | 0x80) << 16) |
-                 (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80);
-       }
-       return ($utf);
-}
-1;
diff --git a/src/backend/utils/mb/Unicode/uhc_to_utf8.map b/src/backend/utils/mb/Unicode/uhc_to_utf8.map

index 26a7b18f658672ef404d3894d2e9d64fb316305c..65c7e114a3a9019580609b2b3db14d7ab197c30b 100644 (file)
--- a/src/backend/utils/mb/Unicode/uhc_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/uhc_to_utf8.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/uhc_to_utf8.map */
+
  static const pg_local_to_utf LUmapUHC[ 17237 ] = {
    {0x8141, 0xeab082},
    {0x8142, 0xeab083},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_euc_cn.map b/src/backend/utils/mb/Unicode/utf8_to_euc_cn.map

index b28eb9cc0c7ba0cb7f6d2ebc14f5a275c005a09a..3d64cd1a604dc9676b86a39f1db4838c261692b5 100644 (file)
--- a/src/backend/utils/mb/Unicode/utf8_to_euc_cn.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_euc_cn.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/utf8_to_euc_cn.map */
+
  static const pg_utf_to_local ULmapEUC_CN[ 7445 ] = {
    {0xc2a4, 0xa1e8},
    {0xc2a7, 0xa1ec},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map b/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map

index 513720121768b7adeafce100f802eac3facfa05f..b50e232b6ce1ce6630b7c1895186cd2a91a8c9ce 100644 (file)
--- a/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_EUC_JIS_2004.pl
- */
-static const pg_utf_to_local ULmapEUC_JIS_2004[] = {
+/* src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map */
+
+static const pg_utf_to_local ULmapEUC_JIS_2004[ 11303 ] = {    /*  */
    {0xc280, 0x0080},    /* U+0080        <control> */
    {0xc281, 0x0081},    /* U+0081        <control> */
    {0xc282, 0x0082},    /* U+0082        <control> */
@@ -10849,7 +10848,7 @@ static const pg_utf_to_local ULmapEUC_JIS_2004[] = {
    {0xefbc84, 0xa1f0},  /* U+FF04        FULLWIDTH DOLLAR SIGN */
    {0xefbc85, 0xa1f3},  /* U+FF05        FULLWIDTH PERCENT SIGN */
    {0xefbc86, 0xa1f5},  /* U+FF06        FULLWIDTH AMPERSAND */
-  {0xefbc87, 0xa2af},  /* U+FF07        FULLWIDTH APOSTROPHE   [2000] */
+  {0xefbc87, 0xa2af},  /* U+FF07        FULLWIDTH APOSTROPHE */
    {0xefbc88, 0xa1ca},  /* U+FF08        FULLWIDTH LEFT PARENTHESIS */
    {0xefbc89, 0xa1cb},  /* U+FF09        FULLWIDTH RIGHT PARENTHESIS */
    {0xefbc8a, 0xa1f6},  /* U+FF0A        FULLWIDTH ASTERISK */
diff --git a/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map b/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map

index d8ff5c05868fc584104d6c83edf19f2932f39b4f..0d57667a558fdbe3553e49dca70fc9ee3fe0110d 100644 (file)
--- a/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_EUC_JIS_2004.pl
- */
-static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {
+/* src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map */
+
+static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[ 25 ] = {     /*  */
    {0x0000c3a6, 0x0000cc80, 0xabc4},    /* U+00E6+0300          [2000] */
    {0x0000c994, 0x0000cc80, 0xabc8},    /* U+0254+0300          [2000] */
    {0x0000c994, 0x0000cc81, 0xabc9},    /* U+0254+0301          [2000] */
diff --git a/src/backend/utils/mb/Unicode/utf8_to_euc_jp.map b/src/backend/utils/mb/Unicode/utf8_to_euc_jp.map

index 137d4fdef614e574fa53ba34ea3872ac34e2a83d..eef6db65b3440f693ad98227917663336734896e 100644 (file)
--- a/src/backend/utils/mb/Unicode/utf8_to_euc_jp.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_euc_jp.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/utf8_to_euc_jp.map */
+
  static const pg_utf_to_local ULmapEUC_JP[ 13175 ] = {
    {0xc2a1, 0x8fa2c2},
    {0xc2a4, 0x8fa2f0},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_euc_kr.map b/src/backend/utils/mb/Unicode/utf8_to_euc_kr.map

index 4a78b260ea45759058d2762b3a0a247fd819de97..a642b2154f29d0da387871a70e873bcd9b40f034 100644 (file)
--- a/src/backend/utils/mb/Unicode/utf8_to_euc_kr.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_euc_kr.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/utf8_to_euc_kr.map */
+
  static const pg_utf_to_local ULmapEUC_KR[ 8227 ] = {
    {0xc2a1, 0xa2ae},
    {0xc2a4, 0xa2b4},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_johab.map b/src/backend/utils/mb/Unicode/utf8_to_johab.map

index 869f8213d214bb23d05ce309c844ce58e7b2825b..78997d82d04e54602485c323030dbb21fce2edb1 100644 (file)
--- a/src/backend/utils/mb/Unicode/utf8_to_johab.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_johab.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/utf8_to_johab.map */
+
  static const pg_utf_to_local ULmapJOHAB[ 17049 ] = {
    {0xc2a1, 0xd9ae},
    {0xc2a4, 0xd9b4},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map b/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map

index 4fab64fc95662bf54bac224c51bc7c636e458f22..e9f9e638c66826906cb70004d9f4a02a1e7755ed 100644 (file)
--- a/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_SHIFT_JIS_2004.pl
- */
-static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {
+/* src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map */
+
+static const pg_utf_to_local ULmapSHIFT_JIS_2004[ 11271 ] = {  /*  */
    {0xc2a0, 0x8541},    /* U+00A0        NO-BREAK SPACE [2000] */
    {0xc2a1, 0x8542},    /* U+00A1        INVERTED EXCLAMATION MARK      [2000] */
    {0xc2a2, 0x8191},    /* U+00A2        CENT SIGN      Windows: U+FFE0 */
@@ -10817,7 +10816,7 @@ static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {
    {0xefbc84, 0x8190},  /* U+FF04        FULLWIDTH DOLLAR SIGN */
    {0xefbc85, 0x8193},  /* U+FF05        FULLWIDTH PERCENT SIGN */
    {0xefbc86, 0x8195},  /* U+FF06        FULLWIDTH AMPERSAND */
-  {0xefbc87, 0x81ad},  /* U+FF07        FULLWIDTH APOSTROPHE   [2000] */
+  {0xefbc87, 0x81ad},  /* U+FF07        FULLWIDTH APOSTROPHE */
    {0xefbc88, 0x8169},  /* U+FF08        FULLWIDTH LEFT PARENTHESIS */
    {0xefbc89, 0x816a},  /* U+FF09        FULLWIDTH RIGHT PARENTHESIS */
    {0xefbc8a, 0x8196},  /* U+FF0A        FULLWIDTH ASTERISK */
diff --git a/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map b/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map

index e55d4a2a6cfd53eca7e96653dc4f38c32b605ebf..3642851fd6aa672e743358a5a66a8ca7c347d59c 100644 (file)
--- a/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_SHIFT_JIS_2004.pl
- */
-static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {
+/* src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map */
+
+static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[ 25 ] = {   /*  */
    {0x0000c3a6, 0x0000cc80, 0x8663},    /* U+00E6+0300          [2000] */
    {0x0000c994, 0x0000cc80, 0x8667},    /* U+0254+0300          [2000] */
    {0x0000c994, 0x0000cc81, 0x8668},    /* U+0254+0301          [2000] */
diff --git a/src/backend/utils/mb/Unicode/utf8_to_sjis.map b/src/backend/utils/mb/Unicode/utf8_to_sjis.map

index fb0566a1db0e0e8aaf7489a46ee82b05a703dc30..cd6ea48ffc320e26dbbef3cff8f0c93bdb1ff94b 100644 (file)
--- a/src/backend/utils/mb/Unicode/utf8_to_sjis.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_sjis.map
@@ -3,7 +3,7 @@
  static const pg_utf_to_local ULmapSJIS[ 7397 ] = {
    {0xc2a2, 0x8191},
    {0xc2a3, 0x8192},
-  {0xc2a5, 0x5c},
+  {0xc2a5, 0x005c},
    {0xc2a7, 0x8198},
    {0xc2a8, 0x814e},
    {0xc2ac, 0x81ca},
@@ -142,7 +142,7 @@ static const pg_utf_to_local ULmapSJIS[ 7397 ] = {
    {0xe280b2, 0x818c},
    {0xe280b3, 0x818d},
    {0xe280bb, 0x81a6},
-  {0xe280be, 0x7e},
+  {0xe280be, 0x007e},
    {0xe28483, 0x818e},
    {0xe28496, 0xfa59},
    {0xe284a1, 0xfa5a},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_uhc.map b/src/backend/utils/mb/Unicode/utf8_to_uhc.map

index 15dfb56a09958393bc1d7bbea776ab45a5f179f2..dc04726364a83adf9e894d91608307822e1118fd 100644 (file)
--- a/src/backend/utils/mb/Unicode/utf8_to_uhc.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_uhc.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/utf8_to_uhc.map */
+
  static const pg_utf_to_local ULmapUHC[ 17237 ] = {
    {0xc2a1, 0xa2ae},
    {0xc2a4, 0xa2b4},
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Wed, 30 Nov 2016 12:54:02 +0000 (14:54 +0200)
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Wed, 30 Nov 2016 12:54:52 +0000 (14:54 +0200)
src/backend/utils/mb/Unicode/Makefile		patch \| blob \| history
src/backend/utils/mb/Unicode/UCS_to_BIG5.pl		patch \| blob \| history
src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl		patch \| blob \| history
src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl		patch \| blob \| history
src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl		patch \| blob \| history
src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl		patch \| blob \| history
src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl		patch \| blob \| history
src/backend/utils/mb/Unicode/UCS_to_GB18030.pl		patch \| blob \| history
src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl	[new file with mode: 0755]	patch \| blob
src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl		patch \| blob \| history
src/backend/utils/mb/Unicode/UCS_to_SJIS.pl		patch \| blob \| history
src/backend/utils/mb/Unicode/UCS_to_UHC.pl	[new file with mode: 0755]	patch \| blob
src/backend/utils/mb/Unicode/UCS_to_most.pl		patch \| blob \| history
src/backend/utils/mb/Unicode/convutils.pm	[new file with mode: 0644]	patch \| blob
src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map		patch \| blob \| history
src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map		patch \| blob \| history
src/backend/utils/mb/Unicode/euc_jp_to_utf8.map		patch \| blob \| history
src/backend/utils/mb/Unicode/euc_kr_to_utf8.map		patch \| blob \| history
src/backend/utils/mb/Unicode/johab_to_utf8.map		patch \| blob \| history
src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map		patch \| blob \| history
src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map		patch \| blob \| history
src/backend/utils/mb/Unicode/ucs2utf.pl	[deleted file]	patch \| blob \| history
src/backend/utils/mb/Unicode/uhc_to_utf8.map		patch \| blob \| history
src/backend/utils/mb/Unicode/utf8_to_euc_cn.map		patch \| blob \| history
src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map		patch \| blob \| history
src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map		patch \| blob \| history
src/backend/utils/mb/Unicode/utf8_to_euc_jp.map		patch \| blob \| history
src/backend/utils/mb/Unicode/utf8_to_euc_kr.map		patch \| blob \| history
src/backend/utils/mb/Unicode/utf8_to_johab.map		patch \| blob \| history
src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map		patch \| blob \| history
src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map		patch \| blob \| history
src/backend/utils/mb/Unicode/utf8_to_sjis.map		patch \| blob \| history
src/backend/utils/mb/Unicode/utf8_to_uhc.map		patch \| blob \| history