From: Tatsuo Ishii Date: Sun, 29 Apr 2001 07:27:38 +0000 (+0000) Subject: Add missing Unicode support for Cyrillic encodings. X-Git-Tag: REL7_1_1~46 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c527366b60286c668d4c37ab3293225e5365fd98;p=postgresql Add missing Unicode support for Cyrillic encodings. Patches contributed by Victor Wagner. --- diff --git a/src/backend/utils/mb/Unicode/ALT_to_utf8.map b/src/backend/utils/mb/Unicode/ALT_to_utf8.map new file mode 100644 index 0000000000..7bc6982224 --- /dev/null +++ b/src/backend/utils/mb/Unicode/ALT_to_utf8.map @@ -0,0 +1,130 @@ +static pg_local_to_utf LUmapALT[ 128 ] = { + {0x0080, 0xd090}, + {0x0081, 0xd091}, + {0x0082, 0xd092}, + {0x0083, 0xd093}, + {0x0084, 0xd094}, + {0x0085, 0xd095}, + {0x0086, 0xd096}, + {0x0087, 0xd097}, + {0x0088, 0xd098}, + {0x0089, 0xd099}, + {0x008a, 0xd09a}, + {0x008b, 0xd09b}, + {0x008c, 0xd09c}, + {0x008d, 0xd09d}, + {0x008e, 0xd09e}, + {0x008f, 0xd09f}, + {0x0090, 0xd0a0}, + {0x0091, 0xd0a1}, + {0x0092, 0xd0a2}, + {0x0093, 0xd0a3}, + {0x0094, 0xd0a4}, + {0x0095, 0xd0a5}, + {0x0096, 0xd0a6}, + {0x0097, 0xd0a7}, + {0x0098, 0xd0a8}, + {0x0099, 0xd0a9}, + {0x009a, 0xd0aa}, + {0x009b, 0xd0ab}, + {0x009c, 0xd0ac}, + {0x009d, 0xd0ad}, + {0x009e, 0xd0ae}, + {0x009f, 0xd0af}, + {0x00a0, 0xd0b0}, + {0x00a1, 0xd0b1}, + {0x00a2, 0xd0b2}, + {0x00a3, 0xd0b3}, + {0x00a4, 0xd0b4}, + {0x00a5, 0xd0b5}, + {0x00a6, 0xd0b6}, + {0x00a7, 0xd0b7}, + {0x00a8, 0xd0b8}, + {0x00a9, 0xd0b9}, + {0x00aa, 0xd0ba}, + {0x00ab, 0xd0bb}, + {0x00ac, 0xd0bc}, + {0x00ad, 0xd0bd}, + {0x00ae, 0xd0be}, + {0x00af, 0xd0bf}, + {0x00b0, 0xe29691}, + {0x00b1, 0xe29692}, + {0x00b2, 0xe29693}, + {0x00b3, 0xe29482}, + {0x00b4, 0xe294a4}, + {0x00b5, 0xe295a1}, + {0x00b6, 0xe295a2}, + {0x00b7, 0xe29596}, + {0x00b8, 0xe29595}, + {0x00b9, 0xe295a3}, + {0x00ba, 0xe29591}, + {0x00bb, 0xe29597}, + {0x00bc, 0xe2959d}, + {0x00bd, 0xe2959c}, + {0x00be, 0xe2959b}, + {0x00bf, 0xe29490}, + {0x00c0, 0xe29494}, + {0x00c1, 0xe294b4}, + {0x00c2, 0xe294ac}, + {0x00c3, 0xe2949c}, + {0x00c4, 0xe29480}, + {0x00c5, 0xe294bc}, + {0x00c6, 0xe2959e}, + {0x00c7, 0xe2959f}, + {0x00c8, 0xe2959a}, + {0x00c9, 0xe29594}, + {0x00ca, 0xe295a9}, + {0x00cb, 0xe295a6}, + {0x00cc, 0xe295a0}, + {0x00cd, 0xe29590}, + {0x00ce, 0xe295ac}, + {0x00cf, 0xe295a7}, + {0x00d0, 0xe295a8}, + {0x00d1, 0xe295a4}, + {0x00d2, 0xe295a5}, + {0x00d3, 0xe29599}, + {0x00d4, 0xe29598}, + {0x00d5, 0xe29592}, + {0x00d6, 0xe29593}, + {0x00d7, 0xe295ab}, + {0x00d8, 0xe295aa}, + {0x00d9, 0xe29498}, + {0x00da, 0xe2948c}, + {0x00db, 0xe29688}, + {0x00dc, 0xe29684}, + {0x00dd, 0xe2968c}, + {0x00de, 0xe29690}, + {0x00df, 0xe29680}, + {0x00e0, 0xd180}, + {0x00e1, 0xd181}, + {0x00e2, 0xd182}, + {0x00e3, 0xd183}, + {0x00e4, 0xd184}, + {0x00e5, 0xd185}, + {0x00e6, 0xd186}, + {0x00e7, 0xd187}, + {0x00e8, 0xd188}, + {0x00e9, 0xd189}, + {0x00ea, 0xd18a}, + {0x00eb, 0xd18b}, + {0x00ec, 0xd18c}, + {0x00ed, 0xd18d}, + {0x00ee, 0xd18e}, + {0x00ef, 0xd18f}, + {0x00f0, 0xd081}, + {0x00f1, 0xd191}, + {0x00f2, 0xd084}, + {0x00f3, 0xd194}, + {0x00f4, 0xd087}, + {0x00f5, 0xd197}, + {0x00f6, 0xd08e}, + {0x00f7, 0xd19e}, + {0x00f8, 0xc2b0}, + {0x00f9, 0xe28899}, + {0x00fa, 0xc2b7}, + {0x00fb, 0xe2889a}, + {0x00fc, 0xe28496}, + {0x00fd, 0xc2a4}, + {0x00fe, 0xe296a0}, + {0x00ff, 0xc2a0} +}; diff --git a/src/backend/utils/mb/Unicode/KOI8_to_utf8.map b/src/backend/utils/mb/Unicode/KOI8_to_utf8.map new file mode 100644 index 0000000000..54a84df52b --- /dev/null +++ b/src/backend/utils/mb/Unicode/KOI8_to_utf8.map @@ -0,0 +1,130 @@ +static pg_local_to_utf LUmapKOI8[ 128 ] = { + {0x0080, 0xe29480}, + {0x0081, 0xe29482}, + {0x0082, 0xe2948c}, + {0x0083, 0xe29490}, + {0x0084, 0xe29494}, + {0x0085, 0xe29498}, + {0x0086, 0xe2949c}, + {0x0087, 0xe294a4}, + {0x0088, 0xe294ac}, + {0x0089, 0xe294b4}, + {0x008a, 0xe294bc}, + {0x008b, 0xe29680}, + {0x008c, 0xe29684}, + {0x008d, 0xe29688}, + {0x008e, 0xe2968c}, + {0x008f, 0xe29690}, + {0x0090, 0xe29691}, + {0x0091, 0xe29692}, + {0x0092, 0xe29693}, + {0x0093, 0xe28ca0}, + {0x0094, 0xe296a0}, + {0x0095, 0xe28899}, + {0x0096, 0xe2889a}, + {0x0097, 0xe28988}, + {0x0098, 0xe289a4}, + {0x0099, 0xe289a5}, + {0x009a, 0xc2a0}, + {0x009b, 0xe28ca1}, + {0x009c, 0xc2b0}, + {0x009d, 0xc2b2}, + {0x009e, 0xc2b7}, + {0x009f, 0xc3b7}, + {0x00a0, 0xe29590}, + {0x00a1, 0xe29591}, + {0x00a2, 0xe29592}, + {0x00a3, 0xd191}, + {0x00a4, 0xe29593}, + {0x00a5, 0xe29594}, + {0x00a6, 0xe29595}, + {0x00a7, 0xe29596}, + {0x00a8, 0xe29597}, + {0x00a9, 0xe29598}, + {0x00aa, 0xe29599}, + {0x00ab, 0xe2959a}, + {0x00ac, 0xe2959b}, + {0x00ad, 0xe2959c}, + {0x00ae, 0xe2959d}, + {0x00af, 0xe2959e}, + {0x00b0, 0xe2959f}, + {0x00b1, 0xe295a0}, + {0x00b2, 0xe295a1}, + {0x00b3, 0xd081}, + {0x00b4, 0xe295a2}, + {0x00b5, 0xe295a3}, + {0x00b6, 0xe295a4}, + {0x00b7, 0xe295a5}, + {0x00b8, 0xe295a6}, + {0x00b9, 0xe295a7}, + {0x00ba, 0xe295a8}, + {0x00bb, 0xe295a9}, + {0x00bc, 0xe295aa}, + {0x00bd, 0xe295ab}, + {0x00be, 0xe295ac}, + {0x00bf, 0xc2a9}, + {0x00c0, 0xd18e}, + {0x00c1, 0xd0b0}, + {0x00c2, 0xd0b1}, + {0x00c3, 0xd186}, + {0x00c4, 0xd0b4}, + {0x00c5, 0xd0b5}, + {0x00c6, 0xd184}, + {0x00c7, 0xd0b3}, + {0x00c8, 0xd185}, + {0x00c9, 0xd0b8}, + {0x00ca, 0xd0b9}, + {0x00cb, 0xd0ba}, + {0x00cc, 0xd0bb}, + {0x00cd, 0xd0bc}, + {0x00ce, 0xd0bd}, + {0x00cf, 0xd0be}, + {0x00d0, 0xd0bf}, + {0x00d1, 0xd18f}, + {0x00d2, 0xd180}, + {0x00d3, 0xd181}, + {0x00d4, 0xd182}, + {0x00d5, 0xd183}, + {0x00d6, 0xd0b6}, + {0x00d7, 0xd0b2}, + {0x00d8, 0xd18c}, + {0x00d9, 0xd18b}, + {0x00da, 0xd0b7}, + {0x00db, 0xd188}, + {0x00dc, 0xd18d}, + {0x00dd, 0xd189}, + {0x00de, 0xd187}, + {0x00df, 0xd18a}, + {0x00e0, 0xd0ae}, + {0x00e1, 0xd090}, + {0x00e2, 0xd091}, + {0x00e3, 0xd0a6}, + {0x00e4, 0xd094}, + {0x00e5, 0xd095}, + {0x00e6, 0xd0a4}, + {0x00e7, 0xd093}, + {0x00e8, 0xd0a5}, + {0x00e9, 0xd098}, + {0x00ea, 0xd099}, + {0x00eb, 0xd09a}, + {0x00ec, 0xd09b}, + {0x00ed, 0xd09c}, + {0x00ee, 0xd09d}, + {0x00ef, 0xd09e}, + {0x00f0, 0xd09f}, + {0x00f1, 0xd0af}, + {0x00f2, 0xd0a0}, + {0x00f3, 0xd0a1}, + {0x00f4, 0xd0a2}, + {0x00f5, 0xd0a3}, + {0x00f6, 0xd096}, + {0x00f7, 0xd092}, + {0x00f8, 0xd0ac}, + {0x00f9, 0xd0ab}, + {0x00fa, 0xd097}, + {0x00fb, 0xd0a8}, + {0x00fc, 0xd0ad}, + {0x00fd, 0xd0a9}, + {0x00fe, 0xd0a7}, + {0x00ff, 0xd0aa} +}; diff --git a/src/backend/utils/mb/Unicode/Makefile b/src/backend/utils/mb/Unicode/Makefile index 9cd0c95f1a..68a7750d1f 100644 --- a/src/backend/utils/mb/Unicode/Makefile +++ b/src/backend/utils/mb/Unicode/Makefile @@ -4,7 +4,7 @@ # # Copyright 2001 by PostgreSQL Global Development Group # -# $Header: /cvsroot/pgsql/src/backend/utils/mb/Unicode/Makefile,v 1.1 2000/10/30 10:40:29 ishii Exp $ +# $Header: /cvsroot/pgsql/src/backend/utils/mb/Unicode/Makefile,v 1.2 2001/04/29 07:27:38 ishii Exp $ # #------------------------------------------------------------------------- @@ -17,7 +17,11 @@ ISO8859MAPS=iso8859_2_to_utf8.map iso8859_3_to_utf8.map \ utf8_to_iso8859_2.map utf8_to_iso8859_3.map \ utf8_to_iso8859_4.map utf8_to_iso8859_5.map -MAPS= $(ISO8859MAPS) \ + +CYRILLICMAPS=KOI8_to_utf8.map WIN_to_utf8.map ALT_to_utf8.map\ + utf8_to_KOI8.map utf8_to_WIN.map utf8_to_ALT.map + +MAPS= $(ISO8859MAPS) $(CYRILLICMAPS)\ big5_to_utf8.map euc_cn_to_utf8.map euc_jp_to_utf8.map \ euc_kr_to_utf8.map euc_tw_to_utf8.map sjis_to_utf8.map \ utf8_to_big5.map utf8_to_euc_cn.map utf8_to_euc_jp.map \ @@ -25,8 +29,9 @@ MAPS= $(ISO8859MAPS) \ utf8_to_sjis.map ISO8859TEXTS= 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT +CYRILLICTEXTS=cp866.txt cp1251.txt koi8-r.txt -TEXTS=$(ISO8859TEXTS) \ +TEXTS=$(ISO8859TEXTS) $(CYRILLICTEXTS) \ BIG5.TXT CNS11643.TXT GB2312.TXT \ JIS0201.TXT JIS0208.TXT JIS0212.TXT \ OLD5601.TXT SHIFTJIS.TXT @@ -35,6 +40,8 @@ all: $(MAPS) $(ISO8859MAPS) : $(ISO8859TEXTS) ./UCS_to_8859.pl +$(CYRILLICMAPS) : $(CYRILLICTEXTS) + ./UCS_to_cyrillic.pl euc_jp_to_utf8.map utf8_to_euc_jp.map : JIS0201.TXT JIS0208.TXT JIS0212.TXT ./UCS_to_EUC_JP.pl diff --git a/src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl b/src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl new file mode 100644 index 0000000000..cc4fb90ac0 --- /dev/null +++ b/src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl @@ -0,0 +1,112 @@ +#! /usr/bin/perl +# +# Copyright 2001 by PostgreSQL Global Development Group +# +# $Id: UCS_to_cyrillic.pl,v 1.1 2001/04/29 07:27:38 ishii Exp $ +# +# Generate UTF-8 <--> ISO8859 code conversion tables from +# map files provided by Unicode organization. +# Unfortunately it is prohibited by the organization +# to distribute the map files. So if you try to use this script, +# you have to obtain "8859-[2-5].TXT" from the organization's ftp site. +# We assume the file include three tab-separated columns: +# ISO/IEC 8859 code in hex +# UCS-2 code in hex +# # and Unicode name (not used in this script) + +require "ucs2utf.pl"; +%filename = ('KOI8'=>'koi8-r.txt', + 'WIN'=>'cp1251.txt', + 'ALT'=>'cp866.txt'); +@charsets = ('KOI8','ALT','WIN'); +foreach $charset (@charsets) { + +# +# first, generate UTF8->ISO8859 table +# + $in_file = $filename{$charset}; + + open( FILE, $in_file ) || die( "cannot open $in_file" ); + + reset 'array'; + + while( ){ + chop; + if( /^#/ ){ + next; + } + ( $c, $u, $rest ) = split; + $ucs = hex($u); + $code = hex($c); + if( $code >= 0x80){ + $utf = &ucs2utf($ucs); + if( $array{ $utf } ne "" ){ + printf STDERR "Warning: duplicate unicode: %04x\n",$ucs; + next; + } + $count++; + $array{ $utf } = $code; + } + } + close( FILE ); + + $file = "utf8_to_${charset}.map"; + open( FILE, "> $file" ) || die( "cannot open $file" ); + print FILE "static pg_utf_to_local ULmap_${charset}[ $count ] = {\n"; + + for $index ( sort {$a <=> $b} keys( %array ) ){ + $code = $array{ $index }; + $count--; + if( $count == 0 ){ + printf FILE " {0x%04x, 0x%04x}\n", $index, $code; + } else { + printf FILE " {0x%04x, 0x%04x},\n", $index, $code; + } + } + + print FILE "};\n"; + close(FILE); + +# +# then generate ISO885->UTF8 table +# + open( FILE, $in_file ) || die( "cannot open $in_file" ); + + reset 'array'; + + while( ){ + chop; + if( /^#/ ){ + next; + } + ( $c, $u, $rest ) = split; + $ucs = hex($u); + $code = hex($c); + if($code >= 0x80){ + $utf = &ucs2utf($ucs); + if( $array{ $utf } ne "" ){ + printf STDERR "Warning: duplicate unicode: %04x\n",$ucs; + next; + } + $count++; + $array{ $code } = $utf; + } + } + close( FILE ); + + $file = "${charset}_to_utf8.map"; + open( FILE, "> $file" ) || die( "cannot open $file" ); + print FILE "static pg_local_to_utf LUmap${charset}[ $count ] = {\n"; + for $index ( sort {$a <=> $b} keys( %array ) ){ + $utf = $array{ $index }; + $count--; + if( $count == 0 ){ + printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; + } else { + printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; + } + } + + print FILE "};\n"; + close(FILE); +} diff --git a/src/backend/utils/mb/Unicode/WIN_to_utf8.map b/src/backend/utils/mb/Unicode/WIN_to_utf8.map new file mode 100644 index 0000000000..d5a7707dff --- /dev/null +++ b/src/backend/utils/mb/Unicode/WIN_to_utf8.map @@ -0,0 +1,130 @@ +static pg_local_to_utf LUmapWIN[ 128 ] = { + {0x0080, 0xd082}, + {0x0081, 0xd083}, + {0x0082, 0xe2809a}, + {0x0083, 0xd193}, + {0x0084, 0xe2809e}, + {0x0085, 0xe280a6}, + {0x0086, 0xe280a0}, + {0x0087, 0xe280a1}, + {0x0088, 0x0000}, + {0x0089, 0xe280b0}, + {0x008a, 0xd089}, + {0x008b, 0xe280b9}, + {0x008c, 0xd08a}, + {0x008d, 0xd08c}, + {0x008e, 0xd08b}, + {0x008f, 0xd08f}, + {0x0090, 0xd192}, + {0x0091, 0xe28098}, + {0x0092, 0xe28099}, + {0x0093, 0xe2809c}, + {0x0094, 0xe2809d}, + {0x0095, 0xe280a2}, + {0x0096, 0xe28093}, + {0x0097, 0xe28094}, + {0x0098, 0x0000}, + {0x0099, 0xe284a2}, + {0x009a, 0xd199}, + {0x009b, 0xe280ba}, + {0x009c, 0xd19a}, + {0x009d, 0xd19c}, + {0x009e, 0xd19b}, + {0x009f, 0xd19f}, + {0x00a0, 0xc2a0}, + {0x00a1, 0xd08e}, + {0x00a2, 0xd19e}, + {0x00a3, 0xd088}, + {0x00a4, 0xc2a4}, + {0x00a5, 0xd290}, + {0x00a6, 0xc2a6}, + {0x00a7, 0xc2a7}, + {0x00a8, 0xd081}, + {0x00a9, 0xc2a9}, + {0x00aa, 0xd084}, + {0x00ab, 0xc2ab}, + {0x00ac, 0xc2ac}, + {0x00ad, 0xc2ad}, + {0x00ae, 0xc2ae}, + {0x00af, 0xd087}, + {0x00b0, 0xc2b0}, + {0x00b1, 0xc2b1}, + {0x00b2, 0xd086}, + {0x00b3, 0xd196}, + {0x00b4, 0xd291}, + {0x00b5, 0xc2b5}, + {0x00b6, 0xc2b6}, + {0x00b7, 0xc2b7}, + {0x00b8, 0xd191}, + {0x00b9, 0xe28496}, + {0x00ba, 0xd194}, + {0x00bb, 0xc2bb}, + {0x00bc, 0xd198}, + {0x00bd, 0xd085}, + {0x00be, 0xd195}, + {0x00bf, 0xd197}, + {0x00c0, 0xd090}, + {0x00c1, 0xd091}, + {0x00c2, 0xd092}, + {0x00c3, 0xd093}, + {0x00c4, 0xd094}, + {0x00c5, 0xd095}, + {0x00c6, 0xd096}, + {0x00c7, 0xd097}, + {0x00c8, 0xd098}, + {0x00c9, 0xd099}, + {0x00ca, 0xd09a}, + {0x00cb, 0xd09b}, + {0x00cc, 0xd09c}, + {0x00cd, 0xd09d}, + {0x00ce, 0xd09e}, + {0x00cf, 0xd09f}, + {0x00d0, 0xd0a0}, + {0x00d1, 0xd0a1}, + {0x00d2, 0xd0a2}, + {0x00d3, 0xd0a3}, + {0x00d4, 0xd0a4}, + {0x00d5, 0xd0a5}, + {0x00d6, 0xd0a6}, + {0x00d7, 0xd0a7}, + {0x00d8, 0xd0a8}, + {0x00d9, 0xd0a9}, + {0x00da, 0xd0aa}, + {0x00db, 0xd0ab}, + {0x00dc, 0xd0ac}, + {0x00dd, 0xd0ad}, + {0x00de, 0xd0ae}, + {0x00df, 0xd0af}, + {0x00e0, 0xd0b0}, + {0x00e1, 0xd0b1}, + {0x00e2, 0xd0b2}, + {0x00e3, 0xd0b3}, + {0x00e4, 0xd0b4}, + {0x00e5, 0xd0b5}, + {0x00e6, 0xd0b6}, + {0x00e7, 0xd0b7}, + {0x00e8, 0xd0b8}, + {0x00e9, 0xd0b9}, + {0x00ea, 0xd0ba}, + {0x00eb, 0xd0bb}, + {0x00ec, 0xd0bc}, + {0x00ed, 0xd0bd}, + {0x00ee, 0xd0be}, + {0x00ef, 0xd0bf}, + {0x00f0, 0xd180}, + {0x00f1, 0xd181}, + {0x00f2, 0xd182}, + {0x00f3, 0xd183}, + {0x00f4, 0xd184}, + {0x00f5, 0xd185}, + {0x00f6, 0xd186}, + {0x00f7, 0xd187}, + {0x00f8, 0xd188}, + {0x00f9, 0xd189}, + {0x00fa, 0xd18a}, + {0x00fb, 0xd18b}, + {0x00fc, 0xd18c}, + {0x00fd, 0xd18d}, + {0x00fe, 0xd18e}, + {0x00ff, 0xd18f} +}; diff --git a/src/backend/utils/mb/Unicode/utf8_to_ALT.map b/src/backend/utils/mb/Unicode/utf8_to_ALT.map new file mode 100644 index 0000000000..2aba0c5a9b --- /dev/null +++ b/src/backend/utils/mb/Unicode/utf8_to_ALT.map @@ -0,0 +1,130 @@ +static pg_utf_to_local ULmap_ALT[ 128 ] = { + {0xc2a0, 0x00ff}, + {0xc2a4, 0x00fd}, + {0xc2b0, 0x00f8}, + {0xc2b7, 0x00fa}, + {0xd081, 0x00f0}, + {0xd084, 0x00f2}, + {0xd087, 0x00f4}, + {0xd08e, 0x00f6}, + {0xd090, 0x0080}, + {0xd091, 0x0081}, + {0xd092, 0x0082}, + {0xd093, 0x0083}, + {0xd094, 0x0084}, + {0xd095, 0x0085}, + {0xd096, 0x0086}, + {0xd097, 0x0087}, + {0xd098, 0x0088}, + {0xd099, 0x0089}, + {0xd09a, 0x008a}, + {0xd09b, 0x008b}, + {0xd09c, 0x008c}, + {0xd09d, 0x008d}, + {0xd09e, 0x008e}, + {0xd09f, 0x008f}, + {0xd0a0, 0x0090}, + {0xd0a1, 0x0091}, + {0xd0a2, 0x0092}, + {0xd0a3, 0x0093}, + {0xd0a4, 0x0094}, + {0xd0a5, 0x0095}, + {0xd0a6, 0x0096}, + {0xd0a7, 0x0097}, + {0xd0a8, 0x0098}, + {0xd0a9, 0x0099}, + {0xd0aa, 0x009a}, + {0xd0ab, 0x009b}, + {0xd0ac, 0x009c}, + {0xd0ad, 0x009d}, + {0xd0ae, 0x009e}, + {0xd0af, 0x009f}, + {0xd0b0, 0x00a0}, + {0xd0b1, 0x00a1}, + {0xd0b2, 0x00a2}, + {0xd0b3, 0x00a3}, + {0xd0b4, 0x00a4}, + {0xd0b5, 0x00a5}, + {0xd0b6, 0x00a6}, + {0xd0b7, 0x00a7}, + {0xd0b8, 0x00a8}, + {0xd0b9, 0x00a9}, + {0xd0ba, 0x00aa}, + {0xd0bb, 0x00ab}, + {0xd0bc, 0x00ac}, + {0xd0bd, 0x00ad}, + {0xd0be, 0x00ae}, + {0xd0bf, 0x00af}, + {0xd180, 0x00e0}, + {0xd181, 0x00e1}, + {0xd182, 0x00e2}, + {0xd183, 0x00e3}, + {0xd184, 0x00e4}, + {0xd185, 0x00e5}, + {0xd186, 0x00e6}, + {0xd187, 0x00e7}, + {0xd188, 0x00e8}, + {0xd189, 0x00e9}, + {0xd18a, 0x00ea}, + {0xd18b, 0x00eb}, + {0xd18c, 0x00ec}, + {0xd18d, 0x00ed}, + {0xd18e, 0x00ee}, + {0xd18f, 0x00ef}, + {0xd191, 0x00f1}, + {0xd194, 0x00f3}, + {0xd197, 0x00f5}, + {0xd19e, 0x00f7}, + {0xe28496, 0x00fc}, + {0xe28899, 0x00f9}, + {0xe2889a, 0x00fb}, + {0xe29480, 0x00c4}, + {0xe29482, 0x00b3}, + {0xe2948c, 0x00da}, + {0xe29490, 0x00bf}, + {0xe29494, 0x00c0}, + {0xe29498, 0x00d9}, + {0xe2949c, 0x00c3}, + {0xe294a4, 0x00b4}, + {0xe294ac, 0x00c2}, + {0xe294b4, 0x00c1}, + {0xe294bc, 0x00c5}, + {0xe29590, 0x00cd}, + {0xe29591, 0x00ba}, + {0xe29592, 0x00d5}, + {0xe29593, 0x00d6}, + {0xe29594, 0x00c9}, + {0xe29595, 0x00b8}, + {0xe29596, 0x00b7}, + {0xe29597, 0x00bb}, + {0xe29598, 0x00d4}, + {0xe29599, 0x00d3}, + {0xe2959a, 0x00c8}, + {0xe2959b, 0x00be}, + {0xe2959c, 0x00bd}, + {0xe2959d, 0x00bc}, + {0xe2959e, 0x00c6}, + {0xe2959f, 0x00c7}, + {0xe295a0, 0x00cc}, + {0xe295a1, 0x00b5}, + {0xe295a2, 0x00b6}, + {0xe295a3, 0x00b9}, + {0xe295a4, 0x00d1}, + {0xe295a5, 0x00d2}, + {0xe295a6, 0x00cb}, + {0xe295a7, 0x00cf}, + {0xe295a8, 0x00d0}, + {0xe295a9, 0x00ca}, + {0xe295aa, 0x00d8}, + {0xe295ab, 0x00d7}, + {0xe295ac, 0x00ce}, + {0xe29680, 0x00df}, + {0xe29684, 0x00dc}, + {0xe29688, 0x00db}, + {0xe2968c, 0x00dd}, + {0xe29690, 0x00de}, + {0xe29691, 0x00b0}, + {0xe29692, 0x00b1}, + {0xe29693, 0x00b2}, + {0xe296a0, 0x00fe} +}; diff --git a/src/backend/utils/mb/Unicode/utf8_to_KOI8.map b/src/backend/utils/mb/Unicode/utf8_to_KOI8.map new file mode 100644 index 0000000000..6ad89e1605 --- /dev/null +++ b/src/backend/utils/mb/Unicode/utf8_to_KOI8.map @@ -0,0 +1,130 @@ +static pg_utf_to_local ULmap_KOI8[ 128 ] = { + {0xc2a0, 0x009a}, + {0xc2a9, 0x00bf}, + {0xc2b0, 0x009c}, + {0xc2b2, 0x009d}, + {0xc2b7, 0x009e}, + {0xc3b7, 0x009f}, + {0xd081, 0x00b3}, + {0xd090, 0x00e1}, + {0xd091, 0x00e2}, + {0xd092, 0x00f7}, + {0xd093, 0x00e7}, + {0xd094, 0x00e4}, + {0xd095, 0x00e5}, + {0xd096, 0x00f6}, + {0xd097, 0x00fa}, + {0xd098, 0x00e9}, + {0xd099, 0x00ea}, + {0xd09a, 0x00eb}, + {0xd09b, 0x00ec}, + {0xd09c, 0x00ed}, + {0xd09d, 0x00ee}, + {0xd09e, 0x00ef}, + {0xd09f, 0x00f0}, + {0xd0a0, 0x00f2}, + {0xd0a1, 0x00f3}, + {0xd0a2, 0x00f4}, + {0xd0a3, 0x00f5}, + {0xd0a4, 0x00e6}, + {0xd0a5, 0x00e8}, + {0xd0a6, 0x00e3}, + {0xd0a7, 0x00fe}, + {0xd0a8, 0x00fb}, + {0xd0a9, 0x00fd}, + {0xd0aa, 0x00ff}, + {0xd0ab, 0x00f9}, + {0xd0ac, 0x00f8}, + {0xd0ad, 0x00fc}, + {0xd0ae, 0x00e0}, + {0xd0af, 0x00f1}, + {0xd0b0, 0x00c1}, + {0xd0b1, 0x00c2}, + {0xd0b2, 0x00d7}, + {0xd0b3, 0x00c7}, + {0xd0b4, 0x00c4}, + {0xd0b5, 0x00c5}, + {0xd0b6, 0x00d6}, + {0xd0b7, 0x00da}, + {0xd0b8, 0x00c9}, + {0xd0b9, 0x00ca}, + {0xd0ba, 0x00cb}, + {0xd0bb, 0x00cc}, + {0xd0bc, 0x00cd}, + {0xd0bd, 0x00ce}, + {0xd0be, 0x00cf}, + {0xd0bf, 0x00d0}, + {0xd180, 0x00d2}, + {0xd181, 0x00d3}, + {0xd182, 0x00d4}, + {0xd183, 0x00d5}, + {0xd184, 0x00c6}, + {0xd185, 0x00c8}, + {0xd186, 0x00c3}, + {0xd187, 0x00de}, + {0xd188, 0x00db}, + {0xd189, 0x00dd}, + {0xd18a, 0x00df}, + {0xd18b, 0x00d9}, + {0xd18c, 0x00d8}, + {0xd18d, 0x00dc}, + {0xd18e, 0x00c0}, + {0xd18f, 0x00d1}, + {0xd191, 0x00a3}, + {0xe28899, 0x0095}, + {0xe2889a, 0x0096}, + {0xe28988, 0x0097}, + {0xe289a4, 0x0098}, + {0xe289a5, 0x0099}, + {0xe28ca0, 0x0093}, + {0xe28ca1, 0x009b}, + {0xe29480, 0x0080}, + {0xe29482, 0x0081}, + {0xe2948c, 0x0082}, + {0xe29490, 0x0083}, + {0xe29494, 0x0084}, + {0xe29498, 0x0085}, + {0xe2949c, 0x0086}, + {0xe294a4, 0x0087}, + {0xe294ac, 0x0088}, + {0xe294b4, 0x0089}, + {0xe294bc, 0x008a}, + {0xe29590, 0x00a0}, + {0xe29591, 0x00a1}, + {0xe29592, 0x00a2}, + {0xe29593, 0x00a4}, + {0xe29594, 0x00a5}, + {0xe29595, 0x00a6}, + {0xe29596, 0x00a7}, + {0xe29597, 0x00a8}, + {0xe29598, 0x00a9}, + {0xe29599, 0x00aa}, + {0xe2959a, 0x00ab}, + {0xe2959b, 0x00ac}, + {0xe2959c, 0x00ad}, + {0xe2959d, 0x00ae}, + {0xe2959e, 0x00af}, + {0xe2959f, 0x00b0}, + {0xe295a0, 0x00b1}, + {0xe295a1, 0x00b2}, + {0xe295a2, 0x00b4}, + {0xe295a3, 0x00b5}, + {0xe295a4, 0x00b6}, + {0xe295a5, 0x00b7}, + {0xe295a6, 0x00b8}, + {0xe295a7, 0x00b9}, + {0xe295a8, 0x00ba}, + {0xe295a9, 0x00bb}, + {0xe295aa, 0x00bc}, + {0xe295ab, 0x00bd}, + {0xe295ac, 0x00be}, + {0xe29680, 0x008b}, + {0xe29684, 0x008c}, + {0xe29688, 0x008d}, + {0xe2968c, 0x008e}, + {0xe29690, 0x008f}, + {0xe29691, 0x0090}, + {0xe29692, 0x0091}, + {0xe29693, 0x0092}, + {0xe296a0, 0x0094} +}; diff --git a/src/backend/utils/mb/Unicode/utf8_to_WIN.map b/src/backend/utils/mb/Unicode/utf8_to_WIN.map new file mode 100644 index 0000000000..16d441316d --- /dev/null +++ b/src/backend/utils/mb/Unicode/utf8_to_WIN.map @@ -0,0 +1,129 @@ +static pg_utf_to_local ULmap_WIN[ 127 ] = { + {0x0000, 0x0088}, + {0xc2a0, 0x00a0}, + {0xc2a4, 0x00a4}, + {0xc2a6, 0x00a6}, + {0xc2a7, 0x00a7}, + {0xc2a9, 0x00a9}, + {0xc2ab, 0x00ab}, + {0xc2ac, 0x00ac}, + {0xc2ad, 0x00ad}, + {0xc2ae, 0x00ae}, + {0xc2b0, 0x00b0}, + {0xc2b1, 0x00b1}, + {0xc2b5, 0x00b5}, + {0xc2b6, 0x00b6}, + {0xc2b7, 0x00b7}, + {0xc2bb, 0x00bb}, + {0xd081, 0x00a8}, + {0xd082, 0x0080}, + {0xd083, 0x0081}, + {0xd084, 0x00aa}, + {0xd085, 0x00bd}, + {0xd086, 0x00b2}, + {0xd087, 0x00af}, + {0xd088, 0x00a3}, + {0xd089, 0x008a}, + {0xd08a, 0x008c}, + {0xd08b, 0x008e}, + {0xd08c, 0x008d}, + {0xd08e, 0x00a1}, + {0xd08f, 0x008f}, + {0xd090, 0x00c0}, + {0xd091, 0x00c1}, + {0xd092, 0x00c2}, + {0xd093, 0x00c3}, + {0xd094, 0x00c4}, + {0xd095, 0x00c5}, + {0xd096, 0x00c6}, + {0xd097, 0x00c7}, + {0xd098, 0x00c8}, + {0xd099, 0x00c9}, + {0xd09a, 0x00ca}, + {0xd09b, 0x00cb}, + {0xd09c, 0x00cc}, + {0xd09d, 0x00cd}, + {0xd09e, 0x00ce}, + {0xd09f, 0x00cf}, + {0xd0a0, 0x00d0}, + {0xd0a1, 0x00d1}, + {0xd0a2, 0x00d2}, + {0xd0a3, 0x00d3}, + {0xd0a4, 0x00d4}, + {0xd0a5, 0x00d5}, + {0xd0a6, 0x00d6}, + {0xd0a7, 0x00d7}, + {0xd0a8, 0x00d8}, + {0xd0a9, 0x00d9}, + {0xd0aa, 0x00da}, + {0xd0ab, 0x00db}, + {0xd0ac, 0x00dc}, + {0xd0ad, 0x00dd}, + {0xd0ae, 0x00de}, + {0xd0af, 0x00df}, + {0xd0b0, 0x00e0}, + {0xd0b1, 0x00e1}, + {0xd0b2, 0x00e2}, + {0xd0b3, 0x00e3}, + {0xd0b4, 0x00e4}, + {0xd0b5, 0x00e5}, + {0xd0b6, 0x00e6}, + {0xd0b7, 0x00e7}, + {0xd0b8, 0x00e8}, + {0xd0b9, 0x00e9}, + {0xd0ba, 0x00ea}, + {0xd0bb, 0x00eb}, + {0xd0bc, 0x00ec}, + {0xd0bd, 0x00ed}, + {0xd0be, 0x00ee}, + {0xd0bf, 0x00ef}, + {0xd180, 0x00f0}, + {0xd181, 0x00f1}, + {0xd182, 0x00f2}, + {0xd183, 0x00f3}, + {0xd184, 0x00f4}, + {0xd185, 0x00f5}, + {0xd186, 0x00f6}, + {0xd187, 0x00f7}, + {0xd188, 0x00f8}, + {0xd189, 0x00f9}, + {0xd18a, 0x00fa}, + {0xd18b, 0x00fb}, + {0xd18c, 0x00fc}, + {0xd18d, 0x00fd}, + {0xd18e, 0x00fe}, + {0xd18f, 0x00ff}, + {0xd191, 0x00b8}, + {0xd192, 0x0090}, + {0xd193, 0x0083}, + {0xd194, 0x00ba}, + {0xd195, 0x00be}, + {0xd196, 0x00b3}, + {0xd197, 0x00bf}, + {0xd198, 0x00bc}, + {0xd199, 0x009a}, + {0xd19a, 0x009c}, + {0xd19b, 0x009e}, + {0xd19c, 0x009d}, + {0xd19e, 0x00a2}, + {0xd19f, 0x009f}, + {0xd290, 0x00a5}, + {0xd291, 0x00b4}, + {0xe28093, 0x0096}, + {0xe28094, 0x0097}, + {0xe28098, 0x0091}, + {0xe28099, 0x0092}, + {0xe2809a, 0x0082}, + {0xe2809c, 0x0093}, + {0xe2809d, 0x0094}, + {0xe2809e, 0x0084}, + {0xe280a0, 0x0086}, + {0xe280a1, 0x0087}, + {0xe280a2, 0x0095}, + {0xe280a6, 0x0085}, + {0xe280b0, 0x0089}, + {0xe280b9, 0x008b}, + {0xe280ba, 0x009b}, + {0xe28496, 0x00b9}, + {0xe284a2, 0x0099} +}; diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c index 30b1a0d92a..99e7eafbc7 100644 --- a/src/backend/utils/mb/conv.c +++ b/src/backend/utils/mb/conv.c @@ -6,7 +6,7 @@ * WIN1250 client encoding support contributed by Pavel Behal * SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya * - * $Id: conv.c,v 1.24 2001/03/22 04:00:01 momjian Exp $ + * $Id: conv.c,v 1.25 2001/04/29 07:27:38 ishii Exp $ * * */ @@ -38,6 +38,14 @@ #include "Unicode/sjis_to_utf8.map" #include "Unicode/utf8_to_big5.map" #include "Unicode/big5_to_utf8.map" +/* Cyrillic charset conversion */ +#include "Unicode/ALT_to_utf8.map" +#include "Unicode/KOI8_to_utf8.map" +#include "Unicode/WIN_to_utf8.map" +#include "Unicode/utf8_to_ALT.map" +#include "Unicode/utf8_to_KOI8.map" +#include "Unicode/utf8_to_WIN.map" + #endif /* UNICODE_CONVERSION */ @@ -1330,6 +1338,39 @@ utf_to_latin5(unsigned char *utf, unsigned char *iso, int len) { utf_to_local(utf, iso, ULmapISO8859_5, sizeof(ULmapISO8859_5) / sizeof(pg_utf_to_local), len); } +/* + * Cyrillic charsets + */ + +/* + * UTF-8 --->KOI8 + */ +static void +utf_to_KOI8(unsigned char *utf, unsigned char *iso, int len) + +{ + utf_to_local(utf, iso, ULmap_KOI8, sizeof(ULmap_KOI8) / sizeof(pg_utf_to_local), len); +} + +/* + * UTF-8 --->WIN + */ +static void +utf_to_WIN(unsigned char *utf, unsigned char *iso, int len) + +{ + utf_to_local(utf, iso, ULmap_WIN, sizeof(ULmap_WIN) / sizeof(pg_utf_to_local), len); +} + +/* + * UTF-8 --->ALT + */ +static void +utf_to_ALT(unsigned char *utf, unsigned char *iso, int len) + +{ + utf_to_local(utf, iso, ULmap_ALT, sizeof(ULmap_ALT) / sizeof(pg_utf_to_local), len); +} /* * local code ---> UTF-8 @@ -1434,6 +1475,32 @@ latin5_to_utf(unsigned char *iso, unsigned char *utf, int len) local_to_utf(iso, utf, LUmapISO8859_5, sizeof(LUmapISO8859_5) / sizeof(pg_local_to_utf), LATIN5, len); } +/* + * KOI8 ---> UTF-8 + */ +static void +KOI8_to_utf(unsigned char *iso, unsigned char *utf, int len) +{ + local_to_utf(iso, utf, LUmapKOI8, sizeof(LUmapKOI8) / sizeof(pg_local_to_utf), KOI8, len); +} + +/* + * WIN ---> UTF-8 + */ +static void +WIN_to_utf(unsigned char *iso, unsigned char *utf, int len) +{ + local_to_utf(iso, utf, LUmapWIN, sizeof(LUmapWIN) / sizeof(pg_local_to_utf), WIN, len); +} + +/* + * ALT ---> UTF-8 + */ +static void +ALT_to_utf(unsigned char *iso, unsigned char *utf, int len) +{ + local_to_utf(iso, utf, LUmapALT, sizeof(LUmapALT) / sizeof(pg_local_to_utf), ALT, len); +} /* * UTF-8 ---> EUC_JP */ @@ -1586,11 +1653,11 @@ pg_encoding_conv_tbl pg_conv_tbl[] = { {LATIN5, "LATIN5", 0, iso2mic, mic2iso, latin5_to_utf, utf_to_latin5}, /* ISO 8859 Latin 5 */ {KOI8, "KOI8", 0, koi2mic, mic2koi, - 0, 0}, /* KOI8-R */ + KOI8_to_utf, utf_to_KOI8}, /* KOI8-R */ {WIN, "WIN", 0, win2mic, mic2win, - 0, 0}, /* CP1251 */ + WIN_to_utf , utf_to_WIN}, /* CP1251 */ {ALT, "ALT", 0, alt2mic, mic2alt, - 0, 0}, /* CP866 */ + ALT_to_utf, utf_to_ALT}, /* CP866 */ {SJIS, "SJIS", 1, sjis2mic, mic2sjis, sjis_to_utf, utf_to_sjis}, /* SJIS */ {BIG5, "BIG5", 1, big52mic, mic2big5,