From: Peter Eisentraut Date: Tue, 10 Feb 2009 19:29:39 +0000 (+0000) Subject: Support for KOI8U encoding X-Git-Tag: REL8_4_BETA1~281 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8b9dd6b5fd4e6052c593edd5d2fbe0e54da742ef;p=postgresql Support for KOI8U encoding --- diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index a77d3893c0..d9194b2b5b 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -1,4 +1,4 @@ - + Localization</> @@ -457,12 +457,20 @@ initdb --locale=sv_SE <entry></entry> </row> <row> - <entry><literal>KOI8</literal></entry> - <entry><acronym>KOI</acronym>8-R(U)</entry> - <entry>Cyrillic</entry> + <entry><literal>KOI8R</literal></entry> + <entry><acronym>KOI</acronym>8-R</entry> + <entry>Cyrillic (Russian)</entry> <entry>Yes</entry> <entry>1</entry> - <entry><literal>KOI8R</></entry> + <entry><literal>KOI8</></entry> + </row> + <row> + <entry><literal>KOI8U</literal></entry> + <entry><acronym>KOI</acronym>8-U</entry> + <entry>Cyrillic (Ukrainian)</entry> + <entry>Yes</entry> + <entry>1</entry> + <entry></entry> </row> <row> <entry><literal>LATIN1</literal></entry> diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl index 8ffde56b39..0e245d1071 100644 --- a/src/backend/utils/mb/Unicode/UCS_to_most.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl @@ -2,7 +2,7 @@ # # Copyright (c) 2001-2009, PostgreSQL Global Development Group # -# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl,v 1.6 2009/02/10 16:36:55 petere Exp $ +# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl,v 1.7 2009/02/10 19:29:39 petere Exp $ # # Generate UTF-8 <--> character code conversion tables from # map files provided by Unicode organization. @@ -43,6 +43,7 @@ require "ucs2utf.pl"; 'ISO8859_15' => '8859-15.TXT', 'ISO8859_16' => '8859-16.TXT', 'KOI8R' => 'KOI8-R.TXT', + 'KOI8U' => 'KOI8-U.TXT', 'GBK' => 'CP936.TXT', 'UHC' => 'CP949.TXT', 'JOHAB' => 'JOHAB.TXT', diff --git a/src/backend/utils/mb/Unicode/koi8u_to_utf8.map b/src/backend/utils/mb/Unicode/koi8u_to_utf8.map new file mode 100644 index 0000000000..659f4868e5 --- /dev/null +++ b/src/backend/utils/mb/Unicode/koi8u_to_utf8.map @@ -0,0 +1,130 @@ +static pg_local_to_utf LUmapKOI8U[ 128 ] = { + {0x0080, 0xe29480}, + {0x0081, 0xe29482}, + {0x0082, 0xe2948c}, + {0x0083, 0xe29490}, + {0x0084, 0xe29494}, + {0x0085, 0xe29498}, + {0x0086, 0xe2949c}, + {0x0087, 0xe294a4}, + {0x0088, 0xe294ac}, + {0x0089, 0xe294b4}, + {0x008a, 0xe294bc}, + {0x008b, 0xe29680}, + {0x008c, 0xe29684}, + {0x008d, 0xe29688}, + {0x008e, 0xe2968c}, + {0x008f, 0xe29690}, + {0x0090, 0xe29691}, + {0x0091, 0xe29692}, + {0x0092, 0xe29693}, + {0x0093, 0xe28ca0}, + {0x0094, 0xe296a0}, + {0x0095, 0xe28899}, + {0x0096, 0xe2889a}, + {0x0097, 0xe28988}, + {0x0098, 0xe289a4}, + {0x0099, 0xe289a5}, + {0x009a, 0xc2a0}, + {0x009b, 0xe28ca1}, + {0x009c, 0xc2b0}, + {0x009d, 0xc2b2}, + {0x009e, 0xc2b7}, + {0x009f, 0xc3b7}, + {0x00a0, 0xe29590}, + {0x00a1, 0xe29591}, + {0x00a2, 0xe29592}, + {0x00a3, 0xd191}, + {0x00a4, 0xd194}, + {0x00a5, 0xe29594}, + {0x00a6, 0xd196}, + {0x00a7, 0xd197}, + {0x00a8, 0xe29597}, + {0x00a9, 0xe29598}, + {0x00aa, 0xe29599}, + {0x00ab, 0xe2959a}, + {0x00ac, 0xe2959b}, + {0x00ad, 0xd291}, + {0x00ae, 0xe2959d}, + {0x00af, 0xe2959e}, + {0x00b0, 0xe2959f}, + {0x00b1, 0xe295a0}, + {0x00b2, 0xe295a1}, + {0x00b3, 0xd081}, + {0x00b4, 0xd084}, + {0x00b5, 0xe295a3}, + {0x00b6, 0xd086}, + {0x00b7, 0xd087}, + {0x00b8, 0xe295a6}, + {0x00b9, 0xe295a7}, + {0x00ba, 0xe295a8}, + {0x00bb, 0xe295a9}, + {0x00bc, 0xe295aa}, + {0x00bd, 0xd290}, + {0x00be, 0xe295ac}, + {0x00bf, 0xc2a9}, + {0x00c0, 0xd18e}, + {0x00c1, 0xd0b0}, + {0x00c2, 0xd0b1}, + {0x00c3, 0xd186}, + {0x00c4, 0xd0b4}, + {0x00c5, 0xd0b5}, + {0x00c6, 0xd184}, + {0x00c7, 0xd0b3}, + {0x00c8, 0xd185}, + {0x00c9, 0xd0b8}, + {0x00ca, 0xd0b9}, + {0x00cb, 0xd0ba}, + {0x00cc, 0xd0bb}, + {0x00cd, 0xd0bc}, + {0x00ce, 0xd0bd}, + {0x00cf, 0xd0be}, + {0x00d0, 0xd0bf}, + {0x00d1, 0xd18f}, + {0x00d2, 0xd180}, + {0x00d3, 0xd181}, + {0x00d4, 0xd182}, + {0x00d5, 0xd183}, + {0x00d6, 0xd0b6}, + {0x00d7, 0xd0b2}, + {0x00d8, 0xd18c}, + {0x00d9, 0xd18b}, + {0x00da, 0xd0b7}, + {0x00db, 0xd188}, + {0x00dc, 0xd18d}, + {0x00dd, 0xd189}, + {0x00de, 0xd187}, + {0x00df, 0xd18a}, + {0x00e0, 0xd0ae}, + {0x00e1, 0xd090}, + {0x00e2, 0xd091}, + {0x00e3, 0xd0a6}, + {0x00e4, 0xd094}, + {0x00e5, 0xd095}, + {0x00e6, 0xd0a4}, + {0x00e7, 0xd093}, + {0x00e8, 0xd0a5}, + {0x00e9, 0xd098}, + {0x00ea, 0xd099}, + {0x00eb, 0xd09a}, + {0x00ec, 0xd09b}, + {0x00ed, 0xd09c}, + {0x00ee, 0xd09d}, + {0x00ef, 0xd09e}, + {0x00f0, 0xd09f}, + {0x00f1, 0xd0af}, + {0x00f2, 0xd0a0}, + {0x00f3, 0xd0a1}, + {0x00f4, 0xd0a2}, + {0x00f5, 0xd0a3}, + {0x00f6, 0xd096}, + {0x00f7, 0xd092}, + {0x00f8, 0xd0ac}, + {0x00f9, 0xd0ab}, + {0x00fa, 0xd097}, + {0x00fb, 0xd0a8}, + {0x00fc, 0xd0ad}, + {0x00fd, 0xd0a9}, + {0x00fe, 0xd0a7}, + {0x00ff, 0xd0aa} +}; diff --git a/src/backend/utils/mb/Unicode/utf8_to_koi8u.map b/src/backend/utils/mb/Unicode/utf8_to_koi8u.map new file mode 100644 index 0000000000..7f262a4aaa --- /dev/null +++ b/src/backend/utils/mb/Unicode/utf8_to_koi8u.map @@ -0,0 +1,130 @@ +static pg_utf_to_local ULmapKOI8U[ 128 ] = { + {0xc2a0, 0x009a}, + {0xc2a9, 0x00bf}, + {0xc2b0, 0x009c}, + {0xc2b2, 0x009d}, + {0xc2b7, 0x009e}, + {0xc3b7, 0x009f}, + {0xd081, 0x00b3}, + {0xd084, 0x00b4}, + {0xd086, 0x00b6}, + {0xd087, 0x00b7}, + {0xd090, 0x00e1}, + {0xd091, 0x00e2}, + {0xd092, 0x00f7}, + {0xd093, 0x00e7}, + {0xd094, 0x00e4}, + {0xd095, 0x00e5}, + {0xd096, 0x00f6}, + {0xd097, 0x00fa}, + {0xd098, 0x00e9}, + {0xd099, 0x00ea}, + {0xd09a, 0x00eb}, + {0xd09b, 0x00ec}, + {0xd09c, 0x00ed}, + {0xd09d, 0x00ee}, + {0xd09e, 0x00ef}, + {0xd09f, 0x00f0}, + {0xd0a0, 0x00f2}, + {0xd0a1, 0x00f3}, + {0xd0a2, 0x00f4}, + {0xd0a3, 0x00f5}, + {0xd0a4, 0x00e6}, + {0xd0a5, 0x00e8}, + {0xd0a6, 0x00e3}, + {0xd0a7, 0x00fe}, + {0xd0a8, 0x00fb}, + {0xd0a9, 0x00fd}, + {0xd0aa, 0x00ff}, + {0xd0ab, 0x00f9}, + {0xd0ac, 0x00f8}, + {0xd0ad, 0x00fc}, + {0xd0ae, 0x00e0}, + {0xd0af, 0x00f1}, + {0xd0b0, 0x00c1}, + {0xd0b1, 0x00c2}, + {0xd0b2, 0x00d7}, + {0xd0b3, 0x00c7}, + {0xd0b4, 0x00c4}, + {0xd0b5, 0x00c5}, + {0xd0b6, 0x00d6}, + {0xd0b7, 0x00da}, + {0xd0b8, 0x00c9}, + {0xd0b9, 0x00ca}, + {0xd0ba, 0x00cb}, + {0xd0bb, 0x00cc}, + {0xd0bc, 0x00cd}, + {0xd0bd, 0x00ce}, + {0xd0be, 0x00cf}, + {0xd0bf, 0x00d0}, + {0xd180, 0x00d2}, + {0xd181, 0x00d3}, + {0xd182, 0x00d4}, + {0xd183, 0x00d5}, + {0xd184, 0x00c6}, + {0xd185, 0x00c8}, + {0xd186, 0x00c3}, + {0xd187, 0x00de}, + {0xd188, 0x00db}, + {0xd189, 0x00dd}, + {0xd18a, 0x00df}, + {0xd18b, 0x00d9}, + {0xd18c, 0x00d8}, + {0xd18d, 0x00dc}, + {0xd18e, 0x00c0}, + {0xd18f, 0x00d1}, + {0xd191, 0x00a3}, + {0xd194, 0x00a4}, + {0xd196, 0x00a6}, + {0xd197, 0x00a7}, + {0xd290, 0x00bd}, + {0xd291, 0x00ad}, + {0xe28899, 0x0095}, + {0xe2889a, 0x0096}, + {0xe28988, 0x0097}, + {0xe289a4, 0x0098}, + {0xe289a5, 0x0099}, + {0xe28ca0, 0x0093}, + {0xe28ca1, 0x009b}, + {0xe29480, 0x0080}, + {0xe29482, 0x0081}, + {0xe2948c, 0x0082}, + {0xe29490, 0x0083}, + {0xe29494, 0x0084}, + {0xe29498, 0x0085}, + {0xe2949c, 0x0086}, + {0xe294a4, 0x0087}, + {0xe294ac, 0x0088}, + {0xe294b4, 0x0089}, + {0xe294bc, 0x008a}, + {0xe29590, 0x00a0}, + {0xe29591, 0x00a1}, + {0xe29592, 0x00a2}, + {0xe29594, 0x00a5}, + {0xe29597, 0x00a8}, + {0xe29598, 0x00a9}, + {0xe29599, 0x00aa}, + {0xe2959a, 0x00ab}, + {0xe2959b, 0x00ac}, + {0xe2959d, 0x00ae}, + {0xe2959e, 0x00af}, + {0xe2959f, 0x00b0}, + {0xe295a0, 0x00b1}, + {0xe295a1, 0x00b2}, + {0xe295a3, 0x00b5}, + {0xe295a6, 0x00b8}, + {0xe295a7, 0x00b9}, + {0xe295a8, 0x00ba}, + {0xe295a9, 0x00bb}, + {0xe295aa, 0x00bc}, + {0xe295ac, 0x00be}, + {0xe29680, 0x008b}, + {0xe29684, 0x008c}, + {0xe29688, 0x008d}, + {0xe2968c, 0x008e}, + {0xe29690, 0x008f}, + {0xe29691, 0x0090}, + {0xe29692, 0x0091}, + {0xe29693, 0x0092}, + {0xe296a0, 0x0094} +}; diff --git a/src/backend/utils/mb/conversion_procs/Makefile b/src/backend/utils/mb/conversion_procs/Makefile index 051e885ed8..2d0b37564a 100644 --- a/src/backend/utils/mb/conversion_procs/Makefile +++ b/src/backend/utils/mb/conversion_procs/Makefile @@ -4,7 +4,7 @@ # Makefile for utils/mb/conversion_procs # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.20 2008/08/23 20:31:37 momjian Exp $ +# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.21 2009/02/10 19:29:39 petere Exp $ # #------------------------------------------------------------------------- @@ -84,6 +84,8 @@ CONVERSIONS = \ utf8_to_big5 UTF8 BIG5 utf8_to_big5 utf8_and_big5 \ utf8_to_koi8_r UTF8 KOI8R utf8_to_koi8r utf8_and_cyrillic \ koi8_r_to_utf8 KOI8R UTF8 koi8r_to_utf8 utf8_and_cyrillic \ + utf8_to_koi8_u UTF8 KOI8U utf8_to_koi8u utf8_and_cyrillic \ + koi8_u_to_utf8 KOI8U UTF8 koi8u_to_utf8 utf8_and_cyrillic \ utf8_to_windows_866 UTF8 WIN866 utf8_to_win utf8_and_win \ windows_866_to_utf8 WIN866 UTF8 win_to_utf8 utf8_and_win \ utf8_to_windows_874 UTF8 WIN874 utf8_to_win utf8_and_win \ diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c index 0f22256a43..0231df6c8c 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.23 2009/01/29 19:23:40 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.24 2009/02/10 19:29:39 petere Exp $ * *------------------------------------------------------------------------- */ @@ -16,15 +16,23 @@ #include "mb/pg_wchar.h" #include "../../Unicode/utf8_to_koi8r.map" #include "../../Unicode/koi8r_to_utf8.map" +#include "../../Unicode/utf8_to_koi8u.map" +#include "../../Unicode/koi8u_to_utf8.map" PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(utf8_to_koi8r); PG_FUNCTION_INFO_V1(koi8r_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_koi8u); +PG_FUNCTION_INFO_V1(koi8u_to_utf8); + extern Datum utf8_to_koi8r(PG_FUNCTION_ARGS); extern Datum koi8r_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_koi8u(PG_FUNCTION_ARGS); +extern Datum koi8u_to_utf8(PG_FUNCTION_ARGS); + /* ---------- * conv_proc( * INTEGER, -- source encoding id @@ -65,3 +73,33 @@ koi8r_to_utf8(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +Datum +utf8_to_koi8u(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U); + + UtfToLocal(src, dest, ULmapKOI8U, NULL, + sizeof(ULmapKOI8U) / sizeof(pg_utf_to_local), 0, PG_KOI8U, len); + + PG_RETURN_VOID(); +} + +Datum +koi8u_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8); + + LocalToUtf(src, dest, LUmapKOI8U, NULL, + sizeof(LUmapKOI8U) / sizeof(pg_local_to_utf), 0, PG_KOI8U, len); + + PG_RETURN_VOID(); +} diff --git a/src/backend/utils/mb/encnames.c b/src/backend/utils/mb/encnames.c index 0167dffc06..c441f98736 100644 --- a/src/backend/utils/mb/encnames.c +++ b/src/backend/utils/mb/encnames.c @@ -2,7 +2,7 @@ * Encoding names and routines for work with it. All * in this file is shared bedween FE and BE. * - * $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.37 2007/11/15 21:14:40 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.38 2009/02/10 19:29:39 petere Exp $ */ #ifdef FRONTEND #include "postgres_fe.h" @@ -122,6 +122,9 @@ pg_encname pg_encname_tbl[] = { "koi8r", PG_KOI8R }, /* KOI8-R; RFC1489 */ + { + "koi8u", PG_KOI8U + }, /* KOI8-U; RFC2319 */ { "latin1", PG_LATIN1 }, /* alias for ISO-8859-1 */ @@ -366,7 +369,7 @@ pg_enc2name pg_enc2name_tbl[] = "WIN874", PG_WIN874 }, { - "KOI8", PG_KOI8R + "KOI8R", PG_KOI8R }, { "WIN1251", PG_WIN1251 @@ -401,6 +404,9 @@ pg_enc2name pg_enc2name_tbl[] = { "WIN1257", PG_WIN1257 }, + { + "KOI8U", PG_KOI8U + }, { "SJIS", PG_SJIS }, diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index d14d37d619..7b7ebf6bbf 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multibyte streams. * Tatsuo Ishii - * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.70 2009/02/10 16:44:44 petere Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.71 2009/02/10 19:29:39 petere Exp $ * */ /* can be used in either frontend or backend */ @@ -1373,6 +1373,7 @@ pg_wchar_tbl pg_wchar_table[] = { {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */ {0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */ {0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */ {0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */ diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 65c99a2797..12820d4e20 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.83 2009/01/29 19:23:42 tgl Exp $ + * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.84 2009/02/10 19:29:39 petere Exp $ * * NOTES * This is used both by the backend and by libpq, but should not be @@ -202,6 +202,7 @@ typedef enum pg_enc PG_WIN1254, /* windows-1254 */ PG_WIN1255, /* windows-1255 */ PG_WIN1257, /* windows-1257 */ + PG_KOI8U, /* KOI8-U */ /* PG_ENCODING_BE_LAST points to the above entry */ /* followings are for client encoding only */ @@ -216,7 +217,7 @@ typedef enum pg_enc } pg_enc; -#define PG_ENCODING_BE_LAST PG_WIN1257 +#define PG_ENCODING_BE_LAST PG_KOI8U /* * Please use these tests before access to pg_encconv_tbl[] diff --git a/src/port/chklocale.c b/src/port/chklocale.c index 74f440abbe..864071d05e 100644 --- a/src/port/chklocale.c +++ b/src/port/chklocale.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/port/chklocale.c,v 1.10 2009/01/01 17:24:04 momjian Exp $ + * $PostgreSQL: pgsql/src/port/chklocale.c,v 1.11 2009/02/10 19:29:39 petere Exp $ * *------------------------------------------------------------------------- */ @@ -123,6 +123,9 @@ static const struct encoding_match encoding_match_list[] = { {PG_KOI8R, "KOI8-R"}, {PG_KOI8R, "CP20866"}, + {PG_KOI8U, "KOI8-U"}, + {PG_KOI8U, "CP21866"}, + {PG_WIN866, "CP866"}, {PG_WIN874, "CP874"}, {PG_WIN1250, "CP1250"},