]> granicus.if.org Git - postgresql/commitdiff
Add missing Unicode support for Cyrillic encodings.
authorTatsuo Ishii <ishii@postgresql.org>
Sun, 29 Apr 2001 07:27:38 +0000 (07:27 +0000)
committerTatsuo Ishii <ishii@postgresql.org>
Sun, 29 Apr 2001 07:27:38 +0000 (07:27 +0000)
Patches contributed by Victor Wagner.

src/backend/utils/mb/Unicode/ALT_to_utf8.map [new file with mode: 0644]
src/backend/utils/mb/Unicode/KOI8_to_utf8.map [new file with mode: 0644]
src/backend/utils/mb/Unicode/Makefile
src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl [new file with mode: 0644]
src/backend/utils/mb/Unicode/WIN_to_utf8.map [new file with mode: 0644]
src/backend/utils/mb/Unicode/utf8_to_ALT.map [new file with mode: 0644]
src/backend/utils/mb/Unicode/utf8_to_KOI8.map [new file with mode: 0644]
src/backend/utils/mb/Unicode/utf8_to_WIN.map [new file with mode: 0644]
src/backend/utils/mb/conv.c

diff --git a/src/backend/utils/mb/Unicode/ALT_to_utf8.map b/src/backend/utils/mb/Unicode/ALT_to_utf8.map
new file mode 100644 (file)
index 0000000..7bc6982
--- /dev/null
@@ -0,0 +1,130 @@
+static pg_local_to_utf LUmapALT[ 128 ] = {
+  {0x0080, 0xd090},
+  {0x0081, 0xd091},
+  {0x0082, 0xd092},
+  {0x0083, 0xd093},
+  {0x0084, 0xd094},
+  {0x0085, 0xd095},
+  {0x0086, 0xd096},
+  {0x0087, 0xd097},
+  {0x0088, 0xd098},
+  {0x0089, 0xd099},
+  {0x008a, 0xd09a},
+  {0x008b, 0xd09b},
+  {0x008c, 0xd09c},
+  {0x008d, 0xd09d},
+  {0x008e, 0xd09e},
+  {0x008f, 0xd09f},
+  {0x0090, 0xd0a0},
+  {0x0091, 0xd0a1},
+  {0x0092, 0xd0a2},
+  {0x0093, 0xd0a3},
+  {0x0094, 0xd0a4},
+  {0x0095, 0xd0a5},
+  {0x0096, 0xd0a6},
+  {0x0097, 0xd0a7},
+  {0x0098, 0xd0a8},
+  {0x0099, 0xd0a9},
+  {0x009a, 0xd0aa},
+  {0x009b, 0xd0ab},
+  {0x009c, 0xd0ac},
+  {0x009d, 0xd0ad},
+  {0x009e, 0xd0ae},
+  {0x009f, 0xd0af},
+  {0x00a0, 0xd0b0},
+  {0x00a1, 0xd0b1},
+  {0x00a2, 0xd0b2},
+  {0x00a3, 0xd0b3},
+  {0x00a4, 0xd0b4},
+  {0x00a5, 0xd0b5},
+  {0x00a6, 0xd0b6},
+  {0x00a7, 0xd0b7},
+  {0x00a8, 0xd0b8},
+  {0x00a9, 0xd0b9},
+  {0x00aa, 0xd0ba},
+  {0x00ab, 0xd0bb},
+  {0x00ac, 0xd0bc},
+  {0x00ad, 0xd0bd},
+  {0x00ae, 0xd0be},
+  {0x00af, 0xd0bf},
+  {0x00b0, 0xe29691},
+  {0x00b1, 0xe29692},
+  {0x00b2, 0xe29693},
+  {0x00b3, 0xe29482},
+  {0x00b4, 0xe294a4},
+  {0x00b5, 0xe295a1},
+  {0x00b6, 0xe295a2},
+  {0x00b7, 0xe29596},
+  {0x00b8, 0xe29595},
+  {0x00b9, 0xe295a3},
+  {0x00ba, 0xe29591},
+  {0x00bb, 0xe29597},
+  {0x00bc, 0xe2959d},
+  {0x00bd, 0xe2959c},
+  {0x00be, 0xe2959b},
+  {0x00bf, 0xe29490},
+  {0x00c0, 0xe29494},
+  {0x00c1, 0xe294b4},
+  {0x00c2, 0xe294ac},
+  {0x00c3, 0xe2949c},
+  {0x00c4, 0xe29480},
+  {0x00c5, 0xe294bc},
+  {0x00c6, 0xe2959e},
+  {0x00c7, 0xe2959f},
+  {0x00c8, 0xe2959a},
+  {0x00c9, 0xe29594},
+  {0x00ca, 0xe295a9},
+  {0x00cb, 0xe295a6},
+  {0x00cc, 0xe295a0},
+  {0x00cd, 0xe29590},
+  {0x00ce, 0xe295ac},
+  {0x00cf, 0xe295a7},
+  {0x00d0, 0xe295a8},
+  {0x00d1, 0xe295a4},
+  {0x00d2, 0xe295a5},
+  {0x00d3, 0xe29599},
+  {0x00d4, 0xe29598},
+  {0x00d5, 0xe29592},
+  {0x00d6, 0xe29593},
+  {0x00d7, 0xe295ab},
+  {0x00d8, 0xe295aa},
+  {0x00d9, 0xe29498},
+  {0x00da, 0xe2948c},
+  {0x00db, 0xe29688},
+  {0x00dc, 0xe29684},
+  {0x00dd, 0xe2968c},
+  {0x00de, 0xe29690},
+  {0x00df, 0xe29680},
+  {0x00e0, 0xd180},
+  {0x00e1, 0xd181},
+  {0x00e2, 0xd182},
+  {0x00e3, 0xd183},
+  {0x00e4, 0xd184},
+  {0x00e5, 0xd185},
+  {0x00e6, 0xd186},
+  {0x00e7, 0xd187},
+  {0x00e8, 0xd188},
+  {0x00e9, 0xd189},
+  {0x00ea, 0xd18a},
+  {0x00eb, 0xd18b},
+  {0x00ec, 0xd18c},
+  {0x00ed, 0xd18d},
+  {0x00ee, 0xd18e},
+  {0x00ef, 0xd18f},
+  {0x00f0, 0xd081},
+  {0x00f1, 0xd191},
+  {0x00f2, 0xd084},
+  {0x00f3, 0xd194},
+  {0x00f4, 0xd087},
+  {0x00f5, 0xd197},
+  {0x00f6, 0xd08e},
+  {0x00f7, 0xd19e},
+  {0x00f8, 0xc2b0},
+  {0x00f9, 0xe28899},
+  {0x00fa, 0xc2b7},
+  {0x00fb, 0xe2889a},
+  {0x00fc, 0xe28496},
+  {0x00fd, 0xc2a4},
+  {0x00fe, 0xe296a0},
+  {0x00ff, 0xc2a0}
+};
diff --git a/src/backend/utils/mb/Unicode/KOI8_to_utf8.map b/src/backend/utils/mb/Unicode/KOI8_to_utf8.map
new file mode 100644 (file)
index 0000000..54a84df
--- /dev/null
@@ -0,0 +1,130 @@
+static pg_local_to_utf LUmapKOI8[ 128 ] = {
+  {0x0080, 0xe29480},
+  {0x0081, 0xe29482},
+  {0x0082, 0xe2948c},
+  {0x0083, 0xe29490},
+  {0x0084, 0xe29494},
+  {0x0085, 0xe29498},
+  {0x0086, 0xe2949c},
+  {0x0087, 0xe294a4},
+  {0x0088, 0xe294ac},
+  {0x0089, 0xe294b4},
+  {0x008a, 0xe294bc},
+  {0x008b, 0xe29680},
+  {0x008c, 0xe29684},
+  {0x008d, 0xe29688},
+  {0x008e, 0xe2968c},
+  {0x008f, 0xe29690},
+  {0x0090, 0xe29691},
+  {0x0091, 0xe29692},
+  {0x0092, 0xe29693},
+  {0x0093, 0xe28ca0},
+  {0x0094, 0xe296a0},
+  {0x0095, 0xe28899},
+  {0x0096, 0xe2889a},
+  {0x0097, 0xe28988},
+  {0x0098, 0xe289a4},
+  {0x0099, 0xe289a5},
+  {0x009a, 0xc2a0},
+  {0x009b, 0xe28ca1},
+  {0x009c, 0xc2b0},
+  {0x009d, 0xc2b2},
+  {0x009e, 0xc2b7},
+  {0x009f, 0xc3b7},
+  {0x00a0, 0xe29590},
+  {0x00a1, 0xe29591},
+  {0x00a2, 0xe29592},
+  {0x00a3, 0xd191},
+  {0x00a4, 0xe29593},
+  {0x00a5, 0xe29594},
+  {0x00a6, 0xe29595},
+  {0x00a7, 0xe29596},
+  {0x00a8, 0xe29597},
+  {0x00a9, 0xe29598},
+  {0x00aa, 0xe29599},
+  {0x00ab, 0xe2959a},
+  {0x00ac, 0xe2959b},
+  {0x00ad, 0xe2959c},
+  {0x00ae, 0xe2959d},
+  {0x00af, 0xe2959e},
+  {0x00b0, 0xe2959f},
+  {0x00b1, 0xe295a0},
+  {0x00b2, 0xe295a1},
+  {0x00b3, 0xd081},
+  {0x00b4, 0xe295a2},
+  {0x00b5, 0xe295a3},
+  {0x00b6, 0xe295a4},
+  {0x00b7, 0xe295a5},
+  {0x00b8, 0xe295a6},
+  {0x00b9, 0xe295a7},
+  {0x00ba, 0xe295a8},
+  {0x00bb, 0xe295a9},
+  {0x00bc, 0xe295aa},
+  {0x00bd, 0xe295ab},
+  {0x00be, 0xe295ac},
+  {0x00bf, 0xc2a9},
+  {0x00c0, 0xd18e},
+  {0x00c1, 0xd0b0},
+  {0x00c2, 0xd0b1},
+  {0x00c3, 0xd186},
+  {0x00c4, 0xd0b4},
+  {0x00c5, 0xd0b5},
+  {0x00c6, 0xd184},
+  {0x00c7, 0xd0b3},
+  {0x00c8, 0xd185},
+  {0x00c9, 0xd0b8},
+  {0x00ca, 0xd0b9},
+  {0x00cb, 0xd0ba},
+  {0x00cc, 0xd0bb},
+  {0x00cd, 0xd0bc},
+  {0x00ce, 0xd0bd},
+  {0x00cf, 0xd0be},
+  {0x00d0, 0xd0bf},
+  {0x00d1, 0xd18f},
+  {0x00d2, 0xd180},
+  {0x00d3, 0xd181},
+  {0x00d4, 0xd182},
+  {0x00d5, 0xd183},
+  {0x00d6, 0xd0b6},
+  {0x00d7, 0xd0b2},
+  {0x00d8, 0xd18c},
+  {0x00d9, 0xd18b},
+  {0x00da, 0xd0b7},
+  {0x00db, 0xd188},
+  {0x00dc, 0xd18d},
+  {0x00dd, 0xd189},
+  {0x00de, 0xd187},
+  {0x00df, 0xd18a},
+  {0x00e0, 0xd0ae},
+  {0x00e1, 0xd090},
+  {0x00e2, 0xd091},
+  {0x00e3, 0xd0a6},
+  {0x00e4, 0xd094},
+  {0x00e5, 0xd095},
+  {0x00e6, 0xd0a4},
+  {0x00e7, 0xd093},
+  {0x00e8, 0xd0a5},
+  {0x00e9, 0xd098},
+  {0x00ea, 0xd099},
+  {0x00eb, 0xd09a},
+  {0x00ec, 0xd09b},
+  {0x00ed, 0xd09c},
+  {0x00ee, 0xd09d},
+  {0x00ef, 0xd09e},
+  {0x00f0, 0xd09f},
+  {0x00f1, 0xd0af},
+  {0x00f2, 0xd0a0},
+  {0x00f3, 0xd0a1},
+  {0x00f4, 0xd0a2},
+  {0x00f5, 0xd0a3},
+  {0x00f6, 0xd096},
+  {0x00f7, 0xd092},
+  {0x00f8, 0xd0ac},
+  {0x00f9, 0xd0ab},
+  {0x00fa, 0xd097},
+  {0x00fb, 0xd0a8},
+  {0x00fc, 0xd0ad},
+  {0x00fd, 0xd0a9},
+  {0x00fe, 0xd0a7},
+  {0x00ff, 0xd0aa}
+};
index 9cd0c95f1a92b26f3b4924160dad9fd83bf7cd78..68a7750d1f0d3ef988afb680d46b74fbd027b2df 100644 (file)
@@ -4,7 +4,7 @@
 #
 # Copyright 2001 by PostgreSQL Global Development Group
 #
-# $Header: /cvsroot/pgsql/src/backend/utils/mb/Unicode/Makefile,v 1.1 2000/10/30 10:40:29 ishii Exp $
+# $Header: /cvsroot/pgsql/src/backend/utils/mb/Unicode/Makefile,v 1.2 2001/04/29 07:27:38 ishii Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -17,7 +17,11 @@ ISO8859MAPS=iso8859_2_to_utf8.map iso8859_3_to_utf8.map \
        utf8_to_iso8859_2.map utf8_to_iso8859_3.map \
        utf8_to_iso8859_4.map utf8_to_iso8859_5.map 
 
-MAPS= $(ISO8859MAPS) \
+
+CYRILLICMAPS=KOI8_to_utf8.map WIN_to_utf8.map ALT_to_utf8.map\
+             utf8_to_KOI8.map  utf8_to_WIN.map utf8_to_ALT.map
+
+MAPS= $(ISO8859MAPS) $(CYRILLICMAPS)\
        big5_to_utf8.map euc_cn_to_utf8.map euc_jp_to_utf8.map \
        euc_kr_to_utf8.map euc_tw_to_utf8.map sjis_to_utf8.map \
        utf8_to_big5.map utf8_to_euc_cn.map utf8_to_euc_jp.map \
@@ -25,8 +29,9 @@ MAPS= $(ISO8859MAPS) \
        utf8_to_sjis.map
 
 ISO8859TEXTS= 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT
+CYRILLICTEXTS=cp866.txt cp1251.txt koi8-r.txt
 
-TEXTS=$(ISO8859TEXTS) \
+TEXTS=$(ISO8859TEXTS) $(CYRILLICTEXTS) \
        BIG5.TXT CNS11643.TXT GB2312.TXT \
        JIS0201.TXT JIS0208.TXT JIS0212.TXT \
        OLD5601.TXT SHIFTJIS.TXT
@@ -35,6 +40,8 @@ all: $(MAPS)
 
 $(ISO8859MAPS) : $(ISO8859TEXTS)
        ./UCS_to_8859.pl
+$(CYRILLICMAPS) : $(CYRILLICTEXTS)
+       ./UCS_to_cyrillic.pl
 
 euc_jp_to_utf8.map utf8_to_euc_jp.map : JIS0201.TXT JIS0208.TXT JIS0212.TXT
        ./UCS_to_EUC_JP.pl
diff --git a/src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl b/src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl
new file mode 100644 (file)
index 0000000..cc4fb90
--- /dev/null
@@ -0,0 +1,112 @@
+#! /usr/bin/perl
+#
+# Copyright 2001 by PostgreSQL Global Development Group
+#
+# $Id: UCS_to_cyrillic.pl,v 1.1 2001/04/29 07:27:38 ishii Exp $
+#
+# Generate UTF-8 <--> ISO8859 code conversion tables from
+# map files provided by Unicode organization.
+# Unfortunately it is prohibited by the organization
+# to distribute the map files. So if you try to use this script,
+# you have to obtain "8859-[2-5].TXT" from the organization's ftp site.
+# We assume the file include three tab-separated columns:
+#               ISO/IEC 8859 code in hex
+#               UCS-2 code in hex
+#               # and Unicode name (not used in this script)
+
+require "ucs2utf.pl";
+%filename = ('KOI8'=>'koi8-r.txt',
+             'WIN'=>'cp1251.txt',
+             'ALT'=>'cp866.txt');
+@charsets = ('KOI8','ALT','WIN');
+foreach $charset (@charsets) {
+
+#
+# first, generate UTF8->ISO8859 table
+#
+    $in_file = $filename{$charset};
+
+    open( FILE, $in_file ) || die( "cannot open $in_file" );
+
+       reset 'array';
+
+    while( <FILE> ){
+               chop;
+               if( /^#/ ){
+                       next;
+               }
+               ( $c, $u, $rest ) = split;
+               $ucs = hex($u);
+               $code = hex($c);
+               if( $code >= 0x80){
+                       $utf = &ucs2utf($ucs);
+                       if( $array{ $utf } ne "" ){
+                               printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
+                               next;
+                       }
+                       $count++;
+                       $array{ $utf } = $code;
+               }
+       }
+    close( FILE );
+
+       $file = "utf8_to_${charset}.map";
+    open( FILE, "> $file" ) || die( "cannot open $file" );
+       print FILE "static pg_utf_to_local ULmap_${charset}[ $count ] = {\n";
+
+       for $index ( sort {$a <=> $b} keys( %array ) ){
+               $code = $array{ $index };
+               $count--;
+               if( $count == 0 ){
+                       printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
+               } else {
+                       printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
+               }
+       }
+
+       print FILE "};\n";
+       close(FILE);
+
+#
+# then generate ISO885->UTF8 table
+#
+    open( FILE, $in_file ) || die( "cannot open $in_file" );
+
+       reset 'array';
+
+    while( <FILE> ){
+               chop;
+               if( /^#/ ){
+                       next;
+               }
+               ( $c, $u, $rest ) = split;
+               $ucs = hex($u);
+               $code = hex($c);
+               if($code >= 0x80){
+                       $utf = &ucs2utf($ucs);
+                       if( $array{ $utf } ne "" ){
+                               printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
+                               next;
+                       }
+                       $count++;
+                       $array{ $code } = $utf;
+               }
+       }
+    close( FILE );
+
+       $file = "${charset}_to_utf8.map";
+    open( FILE, "> $file" ) || die( "cannot open $file" );
+       print FILE "static pg_local_to_utf LUmap${charset}[ $count ] = {\n";
+       for $index ( sort {$a <=> $b} keys( %array ) ){
+               $utf = $array{ $index };
+               $count--;
+               if( $count == 0 ){
+                       printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
+               } else {
+                       printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
+               }
+       }
+
+       print FILE "};\n";
+       close(FILE);
+}
diff --git a/src/backend/utils/mb/Unicode/WIN_to_utf8.map b/src/backend/utils/mb/Unicode/WIN_to_utf8.map
new file mode 100644 (file)
index 0000000..d5a7707
--- /dev/null
@@ -0,0 +1,130 @@
+static pg_local_to_utf LUmapWIN[ 128 ] = {
+  {0x0080, 0xd082},
+  {0x0081, 0xd083},
+  {0x0082, 0xe2809a},
+  {0x0083, 0xd193},
+  {0x0084, 0xe2809e},
+  {0x0085, 0xe280a6},
+  {0x0086, 0xe280a0},
+  {0x0087, 0xe280a1},
+  {0x0088, 0x0000},
+  {0x0089, 0xe280b0},
+  {0x008a, 0xd089},
+  {0x008b, 0xe280b9},
+  {0x008c, 0xd08a},
+  {0x008d, 0xd08c},
+  {0x008e, 0xd08b},
+  {0x008f, 0xd08f},
+  {0x0090, 0xd192},
+  {0x0091, 0xe28098},
+  {0x0092, 0xe28099},
+  {0x0093, 0xe2809c},
+  {0x0094, 0xe2809d},
+  {0x0095, 0xe280a2},
+  {0x0096, 0xe28093},
+  {0x0097, 0xe28094},
+  {0x0098, 0x0000},
+  {0x0099, 0xe284a2},
+  {0x009a, 0xd199},
+  {0x009b, 0xe280ba},
+  {0x009c, 0xd19a},
+  {0x009d, 0xd19c},
+  {0x009e, 0xd19b},
+  {0x009f, 0xd19f},
+  {0x00a0, 0xc2a0},
+  {0x00a1, 0xd08e},
+  {0x00a2, 0xd19e},
+  {0x00a3, 0xd088},
+  {0x00a4, 0xc2a4},
+  {0x00a5, 0xd290},
+  {0x00a6, 0xc2a6},
+  {0x00a7, 0xc2a7},
+  {0x00a8, 0xd081},
+  {0x00a9, 0xc2a9},
+  {0x00aa, 0xd084},
+  {0x00ab, 0xc2ab},
+  {0x00ac, 0xc2ac},
+  {0x00ad, 0xc2ad},
+  {0x00ae, 0xc2ae},
+  {0x00af, 0xd087},
+  {0x00b0, 0xc2b0},
+  {0x00b1, 0xc2b1},
+  {0x00b2, 0xd086},
+  {0x00b3, 0xd196},
+  {0x00b4, 0xd291},
+  {0x00b5, 0xc2b5},
+  {0x00b6, 0xc2b6},
+  {0x00b7, 0xc2b7},
+  {0x00b8, 0xd191},
+  {0x00b9, 0xe28496},
+  {0x00ba, 0xd194},
+  {0x00bb, 0xc2bb},
+  {0x00bc, 0xd198},
+  {0x00bd, 0xd085},
+  {0x00be, 0xd195},
+  {0x00bf, 0xd197},
+  {0x00c0, 0xd090},
+  {0x00c1, 0xd091},
+  {0x00c2, 0xd092},
+  {0x00c3, 0xd093},
+  {0x00c4, 0xd094},
+  {0x00c5, 0xd095},
+  {0x00c6, 0xd096},
+  {0x00c7, 0xd097},
+  {0x00c8, 0xd098},
+  {0x00c9, 0xd099},
+  {0x00ca, 0xd09a},
+  {0x00cb, 0xd09b},
+  {0x00cc, 0xd09c},
+  {0x00cd, 0xd09d},
+  {0x00ce, 0xd09e},
+  {0x00cf, 0xd09f},
+  {0x00d0, 0xd0a0},
+  {0x00d1, 0xd0a1},
+  {0x00d2, 0xd0a2},
+  {0x00d3, 0xd0a3},
+  {0x00d4, 0xd0a4},
+  {0x00d5, 0xd0a5},
+  {0x00d6, 0xd0a6},
+  {0x00d7, 0xd0a7},
+  {0x00d8, 0xd0a8},
+  {0x00d9, 0xd0a9},
+  {0x00da, 0xd0aa},
+  {0x00db, 0xd0ab},
+  {0x00dc, 0xd0ac},
+  {0x00dd, 0xd0ad},
+  {0x00de, 0xd0ae},
+  {0x00df, 0xd0af},
+  {0x00e0, 0xd0b0},
+  {0x00e1, 0xd0b1},
+  {0x00e2, 0xd0b2},
+  {0x00e3, 0xd0b3},
+  {0x00e4, 0xd0b4},
+  {0x00e5, 0xd0b5},
+  {0x00e6, 0xd0b6},
+  {0x00e7, 0xd0b7},
+  {0x00e8, 0xd0b8},
+  {0x00e9, 0xd0b9},
+  {0x00ea, 0xd0ba},
+  {0x00eb, 0xd0bb},
+  {0x00ec, 0xd0bc},
+  {0x00ed, 0xd0bd},
+  {0x00ee, 0xd0be},
+  {0x00ef, 0xd0bf},
+  {0x00f0, 0xd180},
+  {0x00f1, 0xd181},
+  {0x00f2, 0xd182},
+  {0x00f3, 0xd183},
+  {0x00f4, 0xd184},
+  {0x00f5, 0xd185},
+  {0x00f6, 0xd186},
+  {0x00f7, 0xd187},
+  {0x00f8, 0xd188},
+  {0x00f9, 0xd189},
+  {0x00fa, 0xd18a},
+  {0x00fb, 0xd18b},
+  {0x00fc, 0xd18c},
+  {0x00fd, 0xd18d},
+  {0x00fe, 0xd18e},
+  {0x00ff, 0xd18f}
+};
diff --git a/src/backend/utils/mb/Unicode/utf8_to_ALT.map b/src/backend/utils/mb/Unicode/utf8_to_ALT.map
new file mode 100644 (file)
index 0000000..2aba0c5
--- /dev/null
@@ -0,0 +1,130 @@
+static pg_utf_to_local ULmap_ALT[ 128 ] = {
+  {0xc2a0, 0x00ff},
+  {0xc2a4, 0x00fd},
+  {0xc2b0, 0x00f8},
+  {0xc2b7, 0x00fa},
+  {0xd081, 0x00f0},
+  {0xd084, 0x00f2},
+  {0xd087, 0x00f4},
+  {0xd08e, 0x00f6},
+  {0xd090, 0x0080},
+  {0xd091, 0x0081},
+  {0xd092, 0x0082},
+  {0xd093, 0x0083},
+  {0xd094, 0x0084},
+  {0xd095, 0x0085},
+  {0xd096, 0x0086},
+  {0xd097, 0x0087},
+  {0xd098, 0x0088},
+  {0xd099, 0x0089},
+  {0xd09a, 0x008a},
+  {0xd09b, 0x008b},
+  {0xd09c, 0x008c},
+  {0xd09d, 0x008d},
+  {0xd09e, 0x008e},
+  {0xd09f, 0x008f},
+  {0xd0a0, 0x0090},
+  {0xd0a1, 0x0091},
+  {0xd0a2, 0x0092},
+  {0xd0a3, 0x0093},
+  {0xd0a4, 0x0094},
+  {0xd0a5, 0x0095},
+  {0xd0a6, 0x0096},
+  {0xd0a7, 0x0097},
+  {0xd0a8, 0x0098},
+  {0xd0a9, 0x0099},
+  {0xd0aa, 0x009a},
+  {0xd0ab, 0x009b},
+  {0xd0ac, 0x009c},
+  {0xd0ad, 0x009d},
+  {0xd0ae, 0x009e},
+  {0xd0af, 0x009f},
+  {0xd0b0, 0x00a0},
+  {0xd0b1, 0x00a1},
+  {0xd0b2, 0x00a2},
+  {0xd0b3, 0x00a3},
+  {0xd0b4, 0x00a4},
+  {0xd0b5, 0x00a5},
+  {0xd0b6, 0x00a6},
+  {0xd0b7, 0x00a7},
+  {0xd0b8, 0x00a8},
+  {0xd0b9, 0x00a9},
+  {0xd0ba, 0x00aa},
+  {0xd0bb, 0x00ab},
+  {0xd0bc, 0x00ac},
+  {0xd0bd, 0x00ad},
+  {0xd0be, 0x00ae},
+  {0xd0bf, 0x00af},
+  {0xd180, 0x00e0},
+  {0xd181, 0x00e1},
+  {0xd182, 0x00e2},
+  {0xd183, 0x00e3},
+  {0xd184, 0x00e4},
+  {0xd185, 0x00e5},
+  {0xd186, 0x00e6},
+  {0xd187, 0x00e7},
+  {0xd188, 0x00e8},
+  {0xd189, 0x00e9},
+  {0xd18a, 0x00ea},
+  {0xd18b, 0x00eb},
+  {0xd18c, 0x00ec},
+  {0xd18d, 0x00ed},
+  {0xd18e, 0x00ee},
+  {0xd18f, 0x00ef},
+  {0xd191, 0x00f1},
+  {0xd194, 0x00f3},
+  {0xd197, 0x00f5},
+  {0xd19e, 0x00f7},
+  {0xe28496, 0x00fc},
+  {0xe28899, 0x00f9},
+  {0xe2889a, 0x00fb},
+  {0xe29480, 0x00c4},
+  {0xe29482, 0x00b3},
+  {0xe2948c, 0x00da},
+  {0xe29490, 0x00bf},
+  {0xe29494, 0x00c0},
+  {0xe29498, 0x00d9},
+  {0xe2949c, 0x00c3},
+  {0xe294a4, 0x00b4},
+  {0xe294ac, 0x00c2},
+  {0xe294b4, 0x00c1},
+  {0xe294bc, 0x00c5},
+  {0xe29590, 0x00cd},
+  {0xe29591, 0x00ba},
+  {0xe29592, 0x00d5},
+  {0xe29593, 0x00d6},
+  {0xe29594, 0x00c9},
+  {0xe29595, 0x00b8},
+  {0xe29596, 0x00b7},
+  {0xe29597, 0x00bb},
+  {0xe29598, 0x00d4},
+  {0xe29599, 0x00d3},
+  {0xe2959a, 0x00c8},
+  {0xe2959b, 0x00be},
+  {0xe2959c, 0x00bd},
+  {0xe2959d, 0x00bc},
+  {0xe2959e, 0x00c6},
+  {0xe2959f, 0x00c7},
+  {0xe295a0, 0x00cc},
+  {0xe295a1, 0x00b5},
+  {0xe295a2, 0x00b6},
+  {0xe295a3, 0x00b9},
+  {0xe295a4, 0x00d1},
+  {0xe295a5, 0x00d2},
+  {0xe295a6, 0x00cb},
+  {0xe295a7, 0x00cf},
+  {0xe295a8, 0x00d0},
+  {0xe295a9, 0x00ca},
+  {0xe295aa, 0x00d8},
+  {0xe295ab, 0x00d7},
+  {0xe295ac, 0x00ce},
+  {0xe29680, 0x00df},
+  {0xe29684, 0x00dc},
+  {0xe29688, 0x00db},
+  {0xe2968c, 0x00dd},
+  {0xe29690, 0x00de},
+  {0xe29691, 0x00b0},
+  {0xe29692, 0x00b1},
+  {0xe29693, 0x00b2},
+  {0xe296a0, 0x00fe}
+};
diff --git a/src/backend/utils/mb/Unicode/utf8_to_KOI8.map b/src/backend/utils/mb/Unicode/utf8_to_KOI8.map
new file mode 100644 (file)
index 0000000..6ad89e1
--- /dev/null
@@ -0,0 +1,130 @@
+static pg_utf_to_local ULmap_KOI8[ 128 ] = {
+  {0xc2a0, 0x009a},
+  {0xc2a9, 0x00bf},
+  {0xc2b0, 0x009c},
+  {0xc2b2, 0x009d},
+  {0xc2b7, 0x009e},
+  {0xc3b7, 0x009f},
+  {0xd081, 0x00b3},
+  {0xd090, 0x00e1},
+  {0xd091, 0x00e2},
+  {0xd092, 0x00f7},
+  {0xd093, 0x00e7},
+  {0xd094, 0x00e4},
+  {0xd095, 0x00e5},
+  {0xd096, 0x00f6},
+  {0xd097, 0x00fa},
+  {0xd098, 0x00e9},
+  {0xd099, 0x00ea},
+  {0xd09a, 0x00eb},
+  {0xd09b, 0x00ec},
+  {0xd09c, 0x00ed},
+  {0xd09d, 0x00ee},
+  {0xd09e, 0x00ef},
+  {0xd09f, 0x00f0},
+  {0xd0a0, 0x00f2},
+  {0xd0a1, 0x00f3},
+  {0xd0a2, 0x00f4},
+  {0xd0a3, 0x00f5},
+  {0xd0a4, 0x00e6},
+  {0xd0a5, 0x00e8},
+  {0xd0a6, 0x00e3},
+  {0xd0a7, 0x00fe},
+  {0xd0a8, 0x00fb},
+  {0xd0a9, 0x00fd},
+  {0xd0aa, 0x00ff},
+  {0xd0ab, 0x00f9},
+  {0xd0ac, 0x00f8},
+  {0xd0ad, 0x00fc},
+  {0xd0ae, 0x00e0},
+  {0xd0af, 0x00f1},
+  {0xd0b0, 0x00c1},
+  {0xd0b1, 0x00c2},
+  {0xd0b2, 0x00d7},
+  {0xd0b3, 0x00c7},
+  {0xd0b4, 0x00c4},
+  {0xd0b5, 0x00c5},
+  {0xd0b6, 0x00d6},
+  {0xd0b7, 0x00da},
+  {0xd0b8, 0x00c9},
+  {0xd0b9, 0x00ca},
+  {0xd0ba, 0x00cb},
+  {0xd0bb, 0x00cc},
+  {0xd0bc, 0x00cd},
+  {0xd0bd, 0x00ce},
+  {0xd0be, 0x00cf},
+  {0xd0bf, 0x00d0},
+  {0xd180, 0x00d2},
+  {0xd181, 0x00d3},
+  {0xd182, 0x00d4},
+  {0xd183, 0x00d5},
+  {0xd184, 0x00c6},
+  {0xd185, 0x00c8},
+  {0xd186, 0x00c3},
+  {0xd187, 0x00de},
+  {0xd188, 0x00db},
+  {0xd189, 0x00dd},
+  {0xd18a, 0x00df},
+  {0xd18b, 0x00d9},
+  {0xd18c, 0x00d8},
+  {0xd18d, 0x00dc},
+  {0xd18e, 0x00c0},
+  {0xd18f, 0x00d1},
+  {0xd191, 0x00a3},
+  {0xe28899, 0x0095},
+  {0xe2889a, 0x0096},
+  {0xe28988, 0x0097},
+  {0xe289a4, 0x0098},
+  {0xe289a5, 0x0099},
+  {0xe28ca0, 0x0093},
+  {0xe28ca1, 0x009b},
+  {0xe29480, 0x0080},
+  {0xe29482, 0x0081},
+  {0xe2948c, 0x0082},
+  {0xe29490, 0x0083},
+  {0xe29494, 0x0084},
+  {0xe29498, 0x0085},
+  {0xe2949c, 0x0086},
+  {0xe294a4, 0x0087},
+  {0xe294ac, 0x0088},
+  {0xe294b4, 0x0089},
+  {0xe294bc, 0x008a},
+  {0xe29590, 0x00a0},
+  {0xe29591, 0x00a1},
+  {0xe29592, 0x00a2},
+  {0xe29593, 0x00a4},
+  {0xe29594, 0x00a5},
+  {0xe29595, 0x00a6},
+  {0xe29596, 0x00a7},
+  {0xe29597, 0x00a8},
+  {0xe29598, 0x00a9},
+  {0xe29599, 0x00aa},
+  {0xe2959a, 0x00ab},
+  {0xe2959b, 0x00ac},
+  {0xe2959c, 0x00ad},
+  {0xe2959d, 0x00ae},
+  {0xe2959e, 0x00af},
+  {0xe2959f, 0x00b0},
+  {0xe295a0, 0x00b1},
+  {0xe295a1, 0x00b2},
+  {0xe295a2, 0x00b4},
+  {0xe295a3, 0x00b5},
+  {0xe295a4, 0x00b6},
+  {0xe295a5, 0x00b7},
+  {0xe295a6, 0x00b8},
+  {0xe295a7, 0x00b9},
+  {0xe295a8, 0x00ba},
+  {0xe295a9, 0x00bb},
+  {0xe295aa, 0x00bc},
+  {0xe295ab, 0x00bd},
+  {0xe295ac, 0x00be},
+  {0xe29680, 0x008b},
+  {0xe29684, 0x008c},
+  {0xe29688, 0x008d},
+  {0xe2968c, 0x008e},
+  {0xe29690, 0x008f},
+  {0xe29691, 0x0090},
+  {0xe29692, 0x0091},
+  {0xe29693, 0x0092},
+  {0xe296a0, 0x0094}
+};
diff --git a/src/backend/utils/mb/Unicode/utf8_to_WIN.map b/src/backend/utils/mb/Unicode/utf8_to_WIN.map
new file mode 100644 (file)
index 0000000..16d4413
--- /dev/null
@@ -0,0 +1,129 @@
+static pg_utf_to_local ULmap_WIN[ 127 ] = {
+  {0x0000, 0x0088},
+  {0xc2a0, 0x00a0},
+  {0xc2a4, 0x00a4},
+  {0xc2a6, 0x00a6},
+  {0xc2a7, 0x00a7},
+  {0xc2a9, 0x00a9},
+  {0xc2ab, 0x00ab},
+  {0xc2ac, 0x00ac},
+  {0xc2ad, 0x00ad},
+  {0xc2ae, 0x00ae},
+  {0xc2b0, 0x00b0},
+  {0xc2b1, 0x00b1},
+  {0xc2b5, 0x00b5},
+  {0xc2b6, 0x00b6},
+  {0xc2b7, 0x00b7},
+  {0xc2bb, 0x00bb},
+  {0xd081, 0x00a8},
+  {0xd082, 0x0080},
+  {0xd083, 0x0081},
+  {0xd084, 0x00aa},
+  {0xd085, 0x00bd},
+  {0xd086, 0x00b2},
+  {0xd087, 0x00af},
+  {0xd088, 0x00a3},
+  {0xd089, 0x008a},
+  {0xd08a, 0x008c},
+  {0xd08b, 0x008e},
+  {0xd08c, 0x008d},
+  {0xd08e, 0x00a1},
+  {0xd08f, 0x008f},
+  {0xd090, 0x00c0},
+  {0xd091, 0x00c1},
+  {0xd092, 0x00c2},
+  {0xd093, 0x00c3},
+  {0xd094, 0x00c4},
+  {0xd095, 0x00c5},
+  {0xd096, 0x00c6},
+  {0xd097, 0x00c7},
+  {0xd098, 0x00c8},
+  {0xd099, 0x00c9},
+  {0xd09a, 0x00ca},
+  {0xd09b, 0x00cb},
+  {0xd09c, 0x00cc},
+  {0xd09d, 0x00cd},
+  {0xd09e, 0x00ce},
+  {0xd09f, 0x00cf},
+  {0xd0a0, 0x00d0},
+  {0xd0a1, 0x00d1},
+  {0xd0a2, 0x00d2},
+  {0xd0a3, 0x00d3},
+  {0xd0a4, 0x00d4},
+  {0xd0a5, 0x00d5},
+  {0xd0a6, 0x00d6},
+  {0xd0a7, 0x00d7},
+  {0xd0a8, 0x00d8},
+  {0xd0a9, 0x00d9},
+  {0xd0aa, 0x00da},
+  {0xd0ab, 0x00db},
+  {0xd0ac, 0x00dc},
+  {0xd0ad, 0x00dd},
+  {0xd0ae, 0x00de},
+  {0xd0af, 0x00df},
+  {0xd0b0, 0x00e0},
+  {0xd0b1, 0x00e1},
+  {0xd0b2, 0x00e2},
+  {0xd0b3, 0x00e3},
+  {0xd0b4, 0x00e4},
+  {0xd0b5, 0x00e5},
+  {0xd0b6, 0x00e6},
+  {0xd0b7, 0x00e7},
+  {0xd0b8, 0x00e8},
+  {0xd0b9, 0x00e9},
+  {0xd0ba, 0x00ea},
+  {0xd0bb, 0x00eb},
+  {0xd0bc, 0x00ec},
+  {0xd0bd, 0x00ed},
+  {0xd0be, 0x00ee},
+  {0xd0bf, 0x00ef},
+  {0xd180, 0x00f0},
+  {0xd181, 0x00f1},
+  {0xd182, 0x00f2},
+  {0xd183, 0x00f3},
+  {0xd184, 0x00f4},
+  {0xd185, 0x00f5},
+  {0xd186, 0x00f6},
+  {0xd187, 0x00f7},
+  {0xd188, 0x00f8},
+  {0xd189, 0x00f9},
+  {0xd18a, 0x00fa},
+  {0xd18b, 0x00fb},
+  {0xd18c, 0x00fc},
+  {0xd18d, 0x00fd},
+  {0xd18e, 0x00fe},
+  {0xd18f, 0x00ff},
+  {0xd191, 0x00b8},
+  {0xd192, 0x0090},
+  {0xd193, 0x0083},
+  {0xd194, 0x00ba},
+  {0xd195, 0x00be},
+  {0xd196, 0x00b3},
+  {0xd197, 0x00bf},
+  {0xd198, 0x00bc},
+  {0xd199, 0x009a},
+  {0xd19a, 0x009c},
+  {0xd19b, 0x009e},
+  {0xd19c, 0x009d},
+  {0xd19e, 0x00a2},
+  {0xd19f, 0x009f},
+  {0xd290, 0x00a5},
+  {0xd291, 0x00b4},
+  {0xe28093, 0x0096},
+  {0xe28094, 0x0097},
+  {0xe28098, 0x0091},
+  {0xe28099, 0x0092},
+  {0xe2809a, 0x0082},
+  {0xe2809c, 0x0093},
+  {0xe2809d, 0x0094},
+  {0xe2809e, 0x0084},
+  {0xe280a0, 0x0086},
+  {0xe280a1, 0x0087},
+  {0xe280a2, 0x0095},
+  {0xe280a6, 0x0085},
+  {0xe280b0, 0x0089},
+  {0xe280b9, 0x008b},
+  {0xe280ba, 0x009b},
+  {0xe28496, 0x00b9},
+  {0xe284a2, 0x0099}
+};
index 30b1a0d92a610a48dc01d4273c4c2998ca8a43a2..99e7eafbc7c2d877cd05b9be613ca47849ffa542 100644 (file)
@@ -6,7 +6,7 @@
  * WIN1250 client encoding support contributed by Pavel Behal
  * SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya
  *
- * $Id: conv.c,v 1.24 2001/03/22 04:00:01 momjian Exp $
+ * $Id: conv.c,v 1.25 2001/04/29 07:27:38 ishii Exp $
  *
  *
  */
 #include "Unicode/sjis_to_utf8.map"
 #include "Unicode/utf8_to_big5.map"
 #include "Unicode/big5_to_utf8.map"
+/* Cyrillic charset conversion */
+#include "Unicode/ALT_to_utf8.map"
+#include "Unicode/KOI8_to_utf8.map"
+#include "Unicode/WIN_to_utf8.map"
+#include "Unicode/utf8_to_ALT.map"
+#include "Unicode/utf8_to_KOI8.map"
+#include "Unicode/utf8_to_WIN.map"
+
 
 #endif  /* UNICODE_CONVERSION */
 
@@ -1330,6 +1338,39 @@ utf_to_latin5(unsigned char *utf, unsigned char *iso, int len)
 {
        utf_to_local(utf, iso, ULmapISO8859_5, sizeof(ULmapISO8859_5) / sizeof(pg_utf_to_local), len);
 }
+/*
+ * Cyrillic charsets
+ */
+
+/*
+ * UTF-8 --->KOI8 
+ */
+static void
+utf_to_KOI8(unsigned char *utf, unsigned char *iso, int len)
+
+{
+       utf_to_local(utf, iso, ULmap_KOI8, sizeof(ULmap_KOI8) / sizeof(pg_utf_to_local), len);
+}
+
+/*
+ * UTF-8 --->WIN 
+ */
+static void
+utf_to_WIN(unsigned char *utf, unsigned char *iso, int len)
+
+{
+       utf_to_local(utf, iso, ULmap_WIN, sizeof(ULmap_WIN) / sizeof(pg_utf_to_local), len);
+}
+
+/*
+ * UTF-8 --->ALT 
+ */
+static void
+utf_to_ALT(unsigned char *utf, unsigned char *iso, int len)
+
+{
+       utf_to_local(utf, iso, ULmap_ALT, sizeof(ULmap_ALT) / sizeof(pg_utf_to_local), len);
+}
 
 /*
  * local code ---> UTF-8
@@ -1434,6 +1475,32 @@ latin5_to_utf(unsigned char *iso, unsigned char *utf, int len)
        local_to_utf(iso, utf, LUmapISO8859_5, sizeof(LUmapISO8859_5) / sizeof(pg_local_to_utf), LATIN5, len);
 }
 
+/*
+ * KOI8 ---> UTF-8
+ */
+static void
+KOI8_to_utf(unsigned char *iso, unsigned char *utf, int len)
+{
+       local_to_utf(iso, utf, LUmapKOI8, sizeof(LUmapKOI8) / sizeof(pg_local_to_utf), KOI8, len);
+}
+
+/*
+ * WIN ---> UTF-8
+ */
+static void
+WIN_to_utf(unsigned char *iso, unsigned char *utf, int len)
+{
+       local_to_utf(iso, utf, LUmapWIN, sizeof(LUmapWIN) / sizeof(pg_local_to_utf), WIN, len);
+}
+
+/*
+ * ALT ---> UTF-8
+ */
+static void
+ALT_to_utf(unsigned char *iso, unsigned char *utf, int len)
+{
+       local_to_utf(iso, utf, LUmapALT, sizeof(LUmapALT) / sizeof(pg_local_to_utf), ALT, len);
+}
 /*
  * UTF-8 ---> EUC_JP
  */
@@ -1586,11 +1653,11 @@ pg_encoding_conv_tbl pg_conv_tbl[] = {
        {LATIN5, "LATIN5", 0, iso2mic, mic2iso,
        latin5_to_utf, utf_to_latin5},          /* ISO 8859 Latin 5 */
        {KOI8, "KOI8", 0, koi2mic, mic2koi,
-       0, 0},                                          /* KOI8-R */
+       KOI8_to_utf, utf_to_KOI8},                                              /* KOI8-R */
        {WIN, "WIN", 0, win2mic, mic2win,
-       0, 0},                                          /* CP1251 */
+       WIN_to_utf , utf_to_WIN},                                               /* CP1251 */
        {ALT, "ALT", 0, alt2mic, mic2alt,
-       0, 0},                                          /* CP866 */
+       ALT_to_utf, utf_to_ALT},                                                /* CP866 */
        {SJIS, "SJIS", 1, sjis2mic, mic2sjis,
        sjis_to_utf, utf_to_sjis},      /* SJIS */
        {BIG5, "BIG5", 1, big52mic, mic2big5,