]> granicus.if.org Git - postgresql/commitdiff
Teach UtfToLocal/LocalToUtf to support algorithmic encoding conversions.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 15 May 2015 02:27:07 +0000 (22:27 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 15 May 2015 02:27:12 +0000 (22:27 -0400)
Until now, these functions have only supported encoding conversions using
lookup tables, which is fine as long as there's not too many code points
to convert.  However, GB18030 expects all 1.1 million Unicode code points
to be convertible, which would require a ridiculously-sized lookup table.
Fortunately, a large fraction of those conversions can be expressed through
arithmetic, ie the conversions are one-to-one in certain defined ranges.
To support that, provide a callback function that is used after consulting
the lookup tables.  (This patch doesn't actually change anything about the
GB18030 conversion behavior, just provide infrastructure for fixing it.)

Since this requires changing the APIs of UtfToLocal/LocalToUtf anyway,
take the opportunity to rearrange their argument lists into what seems
to me a saner order.  And beautify the call sites by using lengthof()
instead of error-prone sizeof() arithmetic.

In passing, also mark all the lookup tables used by these calls "const".
This moves an impressive amount of stuff into the text segment, at least
on my machine, and is safer anyhow.

108 files changed:
src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
src/backend/utils/mb/Unicode/UCS_to_most.pl
src/backend/utils/mb/Unicode/big5_to_utf8.map
src/backend/utils/mb/Unicode/euc_cn_to_utf8.map
src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map
src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map
src/backend/utils/mb/Unicode/euc_jp_to_utf8.map
src/backend/utils/mb/Unicode/euc_kr_to_utf8.map
src/backend/utils/mb/Unicode/euc_tw_to_utf8.map
src/backend/utils/mb/Unicode/gb18030_to_utf8.map
src/backend/utils/mb/Unicode/gbk_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_10_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_13_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_14_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_15_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_16_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_2_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_3_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_4_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_5_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_6_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_7_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_8_to_utf8.map
src/backend/utils/mb/Unicode/iso8859_9_to_utf8.map
src/backend/utils/mb/Unicode/johab_to_utf8.map
src/backend/utils/mb/Unicode/koi8r_to_utf8.map
src/backend/utils/mb/Unicode/koi8u_to_utf8.map
src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map
src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map
src/backend/utils/mb/Unicode/sjis_to_utf8.map
src/backend/utils/mb/Unicode/uhc_to_utf8.map
src/backend/utils/mb/Unicode/utf8_to_big5.map
src/backend/utils/mb/Unicode/utf8_to_euc_cn.map
src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map
src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map
src/backend/utils/mb/Unicode/utf8_to_euc_jp.map
src/backend/utils/mb/Unicode/utf8_to_euc_kr.map
src/backend/utils/mb/Unicode/utf8_to_euc_tw.map
src/backend/utils/mb/Unicode/utf8_to_gb18030.map
src/backend/utils/mb/Unicode/utf8_to_gbk.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_10.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_13.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_14.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_15.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_16.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_2.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_3.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_4.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_5.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_6.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_7.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_8.map
src/backend/utils/mb/Unicode/utf8_to_iso8859_9.map
src/backend/utils/mb/Unicode/utf8_to_johab.map
src/backend/utils/mb/Unicode/utf8_to_koi8r.map
src/backend/utils/mb/Unicode/utf8_to_koi8u.map
src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map
src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map
src/backend/utils/mb/Unicode/utf8_to_sjis.map
src/backend/utils/mb/Unicode/utf8_to_uhc.map
src/backend/utils/mb/Unicode/utf8_to_win1250.map
src/backend/utils/mb/Unicode/utf8_to_win1251.map
src/backend/utils/mb/Unicode/utf8_to_win1252.map
src/backend/utils/mb/Unicode/utf8_to_win1253.map
src/backend/utils/mb/Unicode/utf8_to_win1254.map
src/backend/utils/mb/Unicode/utf8_to_win1255.map
src/backend/utils/mb/Unicode/utf8_to_win1256.map
src/backend/utils/mb/Unicode/utf8_to_win1257.map
src/backend/utils/mb/Unicode/utf8_to_win1258.map
src/backend/utils/mb/Unicode/utf8_to_win866.map
src/backend/utils/mb/Unicode/utf8_to_win874.map
src/backend/utils/mb/Unicode/win1250_to_utf8.map
src/backend/utils/mb/Unicode/win1251_to_utf8.map
src/backend/utils/mb/Unicode/win1252_to_utf8.map
src/backend/utils/mb/Unicode/win1253_to_utf8.map
src/backend/utils/mb/Unicode/win1254_to_utf8.map
src/backend/utils/mb/Unicode/win1255_to_utf8.map
src/backend/utils/mb/Unicode/win1256_to_utf8.map
src/backend/utils/mb/Unicode/win1257_to_utf8.map
src/backend/utils/mb/Unicode/win1258_to_utf8.map
src/backend/utils/mb/Unicode/win866_to_utf8.map
src/backend/utils/mb/Unicode/win874_to_utf8.map
src/backend/utils/mb/conv.c
src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c
src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c
src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c
src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c
src/include/mb/pg_wchar.h

index c741d6e703ec199ac18bfedded5340735b6f002b..bd479298ec7a45e82886ba49d97a4d11082bcefc 100755 (executable)
@@ -97,7 +97,7 @@ close(FILE);
 
 $file = lc("utf8_to_big5.map");
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_utf_to_local ULmapBIG5[ $count ] = {\n";
+print FILE "static const pg_utf_to_local ULmapBIG5[ $count ] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -185,7 +185,7 @@ close(FILE);
 
 $file = lc("big5_to_utf8.map");
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_local_to_utf LUmapBIG5[ $count ] = {\n";
+print FILE "static const pg_local_to_utf LUmapBIG5[ $count ] = {\n";
 for $index (sort { $a <=> $b } keys(%array))
 {
        $utf = $array{$index};
index cb9a8cb0039b91b1b7f70bfdc27f8f618f9f75cc..bfc99123bf627bcef8d9faaeb1ce953dcffe32e0 100755 (executable)
@@ -55,7 +55,7 @@ close(FILE);
 
 $file = "utf8_to_euc_cn.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
+print FILE "static const pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -109,7 +109,7 @@ close(FILE);
 
 $file = "euc_cn_to_utf8.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
+print FILE "static const pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
 for $index (sort { $a <=> $b } keys(%array))
 {
        $utf = $array{$index};
index 67962e39d26d935bb5c8a536abc5067a6831a62f..7860736b3bb0216bf7d51f5a6cc7624a8eef5f33 100755 (executable)
@@ -72,7 +72,7 @@ open(FILE, "> $file") || die("cannot open $file");
 print FILE "/*\n";
 print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
 print FILE " */\n";
-print FILE "static pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
+print FILE "static const pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -133,7 +133,7 @@ print FILE "/*\n";
 print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
 print FILE " */\n";
 print FILE
-  "static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
+  "static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
 
 for $index (sort { $a cmp $b } keys(%array1))
 {
@@ -256,7 +256,7 @@ open(FILE, "> $file") || die("cannot open $file");
 print FILE "/*\n";
 print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
 print FILE " */\n";
-print FILE "static pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
+print FILE "static const pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -283,7 +283,7 @@ print FILE "/*\n";
 print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
 print FILE " */\n";
 print FILE
-  "static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
+  "static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array1))
 {
index c2297e5d67b61f3347628b81c5e9ba1271471207..79bc05b4bb00e9251b9836877d9d03db5efdda10 100755 (executable)
@@ -136,7 +136,7 @@ close(FILE);
 
 $file = "utf8_to_euc_jp.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
+print FILE "static const pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -263,7 +263,7 @@ close(FILE);
 
 $file = "euc_jp_to_utf8.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
+print FILE "static const pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
 for $index (sort { $a <=> $b } keys(%array))
 {
        $utf = $array{$index};
index 42cd20028c0d24df9d5bba06b06ddd40fabbf314..fa553fdafa95d6032993734695b478547f2ce848 100755 (executable)
@@ -55,7 +55,7 @@ close(FILE);
 
 $file = "utf8_to_euc_kr.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
+print FILE "static const pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -109,7 +109,7 @@ close(FILE);
 
 $file = "euc_kr_to_utf8.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
+print FILE "static const pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
 for $index (sort { $a <=> $b } keys(%array))
 {
        $utf = $array{$index};
index 1d76f135e676d66447b7fb66f15f91fa5ec2e32e..02414ba20232cf5807c73163c5be0816d695d6de 100755 (executable)
@@ -71,7 +71,7 @@ close(FILE);
 
 $file = "utf8_to_euc_tw.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
+print FILE "static const pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -138,7 +138,7 @@ close(FILE);
 
 $file = "euc_tw_to_utf8.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
+print FILE "static const pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
 for $index (sort { $a <=> $b } keys(%array))
 {
        $utf = $array{$index};
index 4f0bd078f72ebc77b82ef9697325fabb50e87597..259cb5d9ddbda33f9c3ab55f5ace3d021e310423 100755 (executable)
@@ -52,7 +52,7 @@ close(FILE);
 
 $file = "utf8_to_gb18030.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_utf_to_local ULmapGB18030[ $count ] = {\n";
+print FILE "static const pg_utf_to_local ULmapGB18030[ $count ] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -106,7 +106,7 @@ close(FILE);
 
 $file = "gb18030_to_utf8.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_local_to_utf LUmapGB18030[ $count ] = {\n";
+print FILE "static const pg_local_to_utf LUmapGB18030[ $count ] = {\n";
 for $index (sort { $a <=> $b } keys(%array))
 {
        $utf = $array{$index};
index 1a367d71a3a665e2031d593a637bef35be5e6f4f..edfb61bcd93b06f1280f6aec0f713520260942a3 100755 (executable)
@@ -72,7 +72,7 @@ open(FILE, "> $file") || die("cannot open $file");
 print FILE "/*\n";
 print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
 print FILE " */\n";
-print FILE "static pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
+print FILE "static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -99,7 +99,7 @@ print FILE "/*\n";
 print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
 print FILE " */\n";
 print FILE
-  "static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
+  "static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
 
 for $index (sort { $a cmp $b } keys(%array1))
 {
@@ -185,7 +185,7 @@ open(FILE, "> $file") || die("cannot open $file");
 print FILE "/*\n";
 print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n";
 print FILE " */\n";
-print FILE "static pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
+print FILE "static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -212,7 +212,7 @@ print FILE "/*\n";
 print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
 print FILE " */\n";
 print FILE
-  "static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
+  "static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array1))
 {
index 66597989963afa477598e37d8e7e2d84508002ab..74cd7ac5f88cf323f94ab6f4f8d029afea3eae10 100755 (executable)
@@ -72,7 +72,7 @@ close(FILE);
 
 $file = "utf8_to_sjis.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_utf_to_local ULmapSJIS[ $count ] = {\n";
+print FILE "static const pg_utf_to_local ULmapSJIS[ $count ] = {\n";
 
 for $index (sort { $a <=> $b } keys(%array))
 {
@@ -122,7 +122,7 @@ close(FILE);
 
 $file = "sjis_to_utf8.map";
 open(FILE, "> $file") || die("cannot open $file");
-print FILE "static pg_local_to_utf LUmapSJIS[ $count ] = {\n";
+print FILE "static const pg_local_to_utf LUmapSJIS[ $count ] = {\n";
 for $index (sort { $a <=> $b } keys(%array))
 {
        $utf = $array{$index};
index 9c8e39b9e5d020886462e30eee1f2d0b2b30fe4a..94e13fa241c5d2f98430699ea380e3475a813b35 100644 (file)
@@ -88,7 +88,7 @@ foreach $charset (@charsets)
 
        $file = lc("utf8_to_${charset}.map");
        open(FILE, "> $file") || die("cannot open $file");
-       print FILE "static pg_utf_to_local ULmap${charset}[ $count ] = {\n";
+       print FILE "static const pg_utf_to_local ULmap${charset}[ $count ] = {\n";
 
        for $index (sort { $a <=> $b } keys(%array))
        {
@@ -140,7 +140,7 @@ foreach $charset (@charsets)
 
        $file = lc("${charset}_to_utf8.map");
        open(FILE, "> $file") || die("cannot open $file");
-       print FILE "static pg_local_to_utf LUmap${charset}[ $count ] = {\n";
+       print FILE "static const pg_local_to_utf LUmap${charset}[ $count ] = {\n";
        for $index (sort { $a <=> $b } keys(%array))
        {
                $utf = $array{$index};
index cf180b61cd19a0506a37e4fcfacb00d9fed01a16..8d6dbd2a35f6335f4fbc0e0ed774e50f62589708 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapBIG5[ 13717 ] = {
+static const pg_local_to_utf LUmapBIG5[ 13717 ] = {
   {0xa140, 0xe38080},
   {0xa141, 0xefbc8c},
   {0xa142, 0xe38081},
index bd12ebe39bd177c3484b86ee81d3602ff988e110..4052379832e82645cf5d5d486f891d4021509281 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/euc_cn_to_utf8.map */
 
-static pg_local_to_utf LUmapEUC_CN[ 7445 ] = {
+static const pg_local_to_utf LUmapEUC_CN[ 7445 ] = {
   {0xa1a1, 0xe38080},
   {0xa1a2, 0xe38081},
   {0xa1a3, 0xe38082},
index d81b00b46a1e11fcce32ca705540f806b2c7bf8d..509592975214c3412353f574622b7442bdc2e6b6 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file was generated by UCS_to_EUC_JIS_2004.pl
  */
-static pg_local_to_utf LUmapEUC_JIS_2004[] = {
+static const pg_local_to_utf LUmapEUC_JIS_2004[] = {
   {0x000000, 0x00000000},      /* U+0000        <control> */
   {0x000001, 0x00000001},      /* U+0001        <control> */
   {0x000002, 0x00000002},      /* U+0002        <control> */
index 318c26f2ba7316f097d16587cb94e977de3939c3..fb4b2477723024274cde41c25114b00df3f11489 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file was generated by UCS_to_EUC_JIS_2004.pl
  */
-static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {
+static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {
   {0x00a4f7, 0x00e3818b, 0x00e3829a},  /* U+304B+309A          [2000] */
   {0x00a4f8, 0x00e3818d, 0x00e3829a},  /* U+304D+309A          [2000] */
   {0x00a4f9, 0x00e3818f, 0x00e3829a},  /* U+304F+309A          [2000] */
index ae796c12c2ea48071e5db8d760efdaa49afdbe1d..db427cbb24cdc94617ed026b3fe50638e42d8f9e 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/euc_jp_to_utf8.map */
 
-static pg_local_to_utf LUmapEUC_JP[] = {
+static const pg_local_to_utf LUmapEUC_JP[] = {
   {0x8ea1, 0xefbda1},
   {0x8ea2, 0xefbda2},
   {0x8ea3, 0xefbda3},
index c3283be6fc8ea4b864113e3a8938b61aa8cc3c1c..e37152137d6b93bc95466e9135fb5df2d7d71784 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapEUC_KR[ 8227 ] = {
+static const pg_local_to_utf LUmapEUC_KR[ 8227 ] = {
   {0xa1a1, 0xe38080},
   {0xa1a2, 0xe38081},
   {0xa1a3, 0xe38082},
index d3a303f64b5906e2ca136b632bcb1f8d1afd5541..b430b446dd953e86ad9e94bdde94fa93280d1377 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/euc_tw_to_utf8.map */
 
-static pg_local_to_utf LUmapEUC_TW[ 23575 ] = {
+static const pg_local_to_utf LUmapEUC_TW[ 23575 ] = {
   {0xa1a1, 0xe38080},
   {0xa1a2, 0xefbc8c},
   {0xa1a3, 0xe38081},
index 95669451d270d33cda36bb43736ff9e66b8d63b7..1715f6dd53ece1719d5f1693a6f68405e1813196 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/gb18030_to_utf8.map */
 
-static pg_local_to_utf LUmapGB18030[ 63360 ] = {
+static const pg_local_to_utf LUmapGB18030[ 63360 ] = {
   {0x8140, 0xe4b882},
   {0x8141, 0xe4b884},
   {0x8142, 0xe4b885},
index 6804f5dc6955854990e8d173baab4ad405dd1191..fced1f459041c63d68413accdff103e34535ece0 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/gbk_to_utf8.map */
 
-static pg_local_to_utf LUmapGBK[ 21792 ] = {
+static const pg_local_to_utf LUmapGBK[ 21792 ] = {
   {0x0080, 0xe282ac},
   {0x8140, 0xe4b882},
   {0x8141, 0xe4b884},
index 607b8e940163605c290d83affc6cd4f9639509de..8a650ee5e7ae806d8d43da5e4c6dba2aa91cdfc2 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_10_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_10[ 128 ] = {
+static const pg_local_to_utf LUmapISO8859_10[ 128 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index d50ce084f0937c00400c4a8effff06c1ef08ecef..207570635de4de4786d95b0c6656f263221a52b6 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_13_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_13[ 128 ] = {
+static const pg_local_to_utf LUmapISO8859_13[ 128 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index eaecef88e8a291e672e75500cd9d6b65a8ad8f91..49d63d4f076436c5d6433441e055aa1baafeae94 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_14_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_14[ 128 ] = {
+static const pg_local_to_utf LUmapISO8859_14[ 128 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index e11a6bd241935f316134ac94e7e6a96e57fa22a5..349b64cbda480e11e68c9d0a8b37e30bb0c2b6da 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_15_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_15[ 128 ] = {
+static const pg_local_to_utf LUmapISO8859_15[ 128 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index 77382cb7ed40417cba99a399ad8d4a9a078cc0b2..d8e280166ccb2ab8e82650c831e74db42802f08b 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_16_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_16[ 128 ] = {
+static const pg_local_to_utf LUmapISO8859_16[ 128 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index 5cc984470d97db04a77a9c51415744b1c60d1015..30d487a0c037397640c09ab77b30187915d158d4 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_2_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_2[ 128 ] = {
+static const pg_local_to_utf LUmapISO8859_2[ 128 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index 2a7e2855976efca47655312404d134444421940a..94b5bc4f2036b1e2cdc2ed3b41e72dd381f6bf78 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_3_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_3[ 121 ] = {
+static const pg_local_to_utf LUmapISO8859_3[ 121 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index 315f1d3be64724ca4dc3c83f14b4b0f0b23405dc..f339c1991f5faea68c4b7ef05862f6623a4f30b1 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_4_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_4[ 128 ] = {
+static const pg_local_to_utf LUmapISO8859_4[ 128 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index 60838e1be73f999876df530731647c0edac331ae..601be303e066914ed44800a99608db7f2a10144b 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_5_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_5[ 128 ] = {
+static const pg_local_to_utf LUmapISO8859_5[ 128 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index f097a01bf5ae0e6ecd49b1cc9104f365480da53c..289f97e7b5065926d6c00889705d73f39f5ce895 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_6_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_6[ 83 ] = {
+static const pg_local_to_utf LUmapISO8859_6[ 83 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index 8cc6826a795f015246e26aeb1e2708d969a796c8..fbbecaa340007c7fd0431f4a1c67fccea37b0313 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_7_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_7[ 125 ] = {
+static const pg_local_to_utf LUmapISO8859_7[ 125 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index 22e0b4e26614fce9ae0324a530d5e600e34e98cf..4ed316c7891e6bece6d51dbedfa7043086ebb898 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_8_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_8[ 92 ] = {
+static const pg_local_to_utf LUmapISO8859_8[ 92 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index 268f0e4c933a271949528444867540b7ed31c25c..f86cc65129bd5ef54cb7615be4822ad7a6500ed1 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/iso8859_9_to_utf8.map */
 
-static pg_local_to_utf LUmapISO8859_9[ 128 ] = {
+static const pg_local_to_utf LUmapISO8859_9[ 128 ] = {
   {0x0080, 0xc280},
   {0x0081, 0xc281},
   {0x0082, 0xc282},
index a4584c5bc5829e10c61b35689649bc9f3830a785..8110f6e8531c7aeebd298ac368475d272640c76d 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapJOHAB[ 17049 ] = {
+static const pg_local_to_utf LUmapJOHAB[ 17049 ] = {
   {0x8444, 0xe384b3},
   {0x8446, 0xe384b5},
   {0x8447, 0xe384b6},
index 9364e5efe9ae1fdf54a35f927f6b9612d7a3f9d2..738f160bfaf39e4d3b50f7170e3db8b410b7f5dc 100644 (file)
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/koi8r_to_utf8.map */
 
-static pg_local_to_utf LUmapKOI8R[ 128 ] = {
+static const pg_local_to_utf LUmapKOI8R[ 128 ] = {
   {0x0080, 0xe29480},
   {0x0081, 0xe29482},
   {0x0082, 0xe2948c},
index 659f4868e597cb127e747eb1b5e310d6f21cdac0..087ad4a5f5ee61f8f817c9146846be11b46f4ca9 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapKOI8U[ 128 ] = {
+static const pg_local_to_utf LUmapKOI8U[ 128 ] = {
   {0x0080, 0xe29480},
   {0x0081, 0xe29482},
   {0x0082, 0xe2948c},
index 5a1496c88c960e35f5e4e862b00c52ab34891a69..f2b268acd396f633f8bd55b9dbaad80126f33f9c 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file was generated by UCS_to_SHIFTJIS_2004.pl
  */
-static pg_local_to_utf LUmapSHIFT_JIS_2004[] = {
+static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {
   {0x0000, 0x00000000},        /* U+0000        <control> */
   {0x0001, 0x00000001},        /* U+0001        <control> */
   {0x0002, 0x00000002},        /* U+0002        <control> */
index 57d54c9f09551bde69becb2f41d2265eb5b28a42..b1c7bced5fd605816fd02bc2185c7d59f83ddabc 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file was generated by UCS_to_SHIFT_JIS_2004.pl
  */
-static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {
+static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {
   {0x82f5, 0x00e3818b, 0x00e3829a},    /* U+304B+309A          [2000] */
   {0x82f6, 0x00e3818d, 0x00e3829a},    /* U+304D+309A          [2000] */
   {0x82f7, 0x00e3818f, 0x00e3829a},    /* U+304F+309A          [2000] */
index 05e846177b7a679548a7bba2f86a3e0358a149f4..6bafaa330fab7fcc3d6ffb3e8f454495658e44b7 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapSJIS[ 7787 ] = {
+static const pg_local_to_utf LUmapSJIS[ 7787 ] = {
   {0x00a1, 0xefbda1},
   {0x00a2, 0xefbda2},
   {0x00a3, 0xefbda3},
index 47bcfec53b273c66bba00a4a4ad7c63288871d24..26a7b18f658672ef404d3894d2e9d64fb316305c 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapUHC[ 17237 ] = {
+static const pg_local_to_utf LUmapUHC[ 17237 ] = {
   {0x8141, 0xeab082},
   {0x8142, 0xeab083},
   {0x8143, 0xeab085},
index 85b3c24f1d542357f10ed4305a9e3c06e9e6571b..68cccf441c025ad98cb0005cf36e32d37161707a 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapBIG5[ 13711 ] = {
+static const pg_utf_to_local ULmapBIG5[ 13711 ] = {
   {0xc2a2, 0xa246},
   {0xc2a3, 0xa247},
   {0xc2a5, 0xa244},
index 949bade3eb0f7d250db576bcc5c7aae7f359f3ca..b28eb9cc0c7ba0cb7f6d2ebc14f5a275c005a09a 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapEUC_CN[ 7445 ] = {
+static const pg_utf_to_local ULmapEUC_CN[ 7445 ] = {
   {0xc2a4, 0xa1e8},
   {0xc2a7, 0xa1ec},
   {0xc2a8, 0xa1a7},
index b51589cd6e6cd181d2d8374a4b84770237971804..250771f19bb3e9836b035b3fa8077be6491fcf7e 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file was generated by UCS_to_EUC_JIS_2004.pl
  */
-static pg_utf_to_local ULmapEUC_JIS_2004[] = {
+static const pg_utf_to_local ULmapEUC_JIS_2004[] = {
   {0x00000000, 0x000000},      /* U+0000        <control> */
   {0x00000001, 0x000001},      /* U+0001        <control> */
   {0x00000002, 0x000002},      /* U+0002        <control> */
index c26cc8d7014856f39031eaf86ff874a6759ca6d8..d098e256df7e39d9f0fc3443be74edbc4d1e9753 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file was generated by UCS_to_EUC_JIS_2004.pl
  */
-static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {
+static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {
   {0x0000c3a6, 0x0000cc80, 0x00abc4},  /* U+00E6+0300          [2000] */
   {0x0000c994, 0x0000cc80, 0x00abc8},  /* U+0254+0300          [2000] */
   {0x0000c994, 0x0000cc81, 0x00abc9},  /* U+0254+0301          [2000] */
index a5b0944440ec60771b0b440435b2b5898144e101..137d4fdef614e574fa53ba34ea3872ac34e2a83d 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapEUC_JP[ 13175 ] = {
+static const pg_utf_to_local ULmapEUC_JP[ 13175 ] = {
   {0xc2a1, 0x8fa2c2},
   {0xc2a4, 0x8fa2f0},
   {0xc2a6, 0x8fa2c3},
index f5c9d0ab8b4d70b733168eb7d1e0f36c46732f6b..4a78b260ea45759058d2762b3a0a247fd819de97 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapEUC_KR[ 8227 ] = {
+static const pg_utf_to_local ULmapEUC_KR[ 8227 ] = {
   {0xc2a1, 0xa2ae},
   {0xc2a4, 0xa2b4},
   {0xc2a7, 0xa1d7},
index 3ddf9a017624e47f08b6ac08df8eac57bb03498f..0ade01aa8851438fef090fc992d6b0d8656d527f 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapEUC_TW[ 17711 ] = {
+static const pg_utf_to_local ULmapEUC_TW[ 17711 ] = {
   {0xc2a7, 0xa1f0},
   {0xc2b0, 0xa2f8},
   {0xc2b1, 0xa2b4},
index 476625b22b8affc37984922f22c4431ea264a79c..52d380c5c14ee7a683d89770d414b5fffab87728 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapGB18030[ 63360 ] = {
+static const pg_utf_to_local ULmapGB18030[ 63360 ] = {
   {0xc280, 0x81308130},
   {0xc281, 0x81308131},
   {0xc282, 0x81308132},
index df7c4e2a9e70595878fd6911b35043949bd2cecf..70febd7b2c4a0b46aeddb0213af94dfc764d79c8 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapGBK[ 21792 ] = {
+static const pg_utf_to_local ULmapGBK[ 21792 ] = {
   {0xc2a4, 0xa1e8},
   {0xc2a7, 0xa1ec},
   {0xc2a8, 0xa1a7},
index b4aeafee2ba19d39ddc6050bd44f6e0bce190952..85bbd23cea6a52c90d30263455425753576fe369 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_10[ 128 ] = {
+static const pg_utf_to_local ULmapISO8859_10[ 128 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index eca37849248e82d0dd7b914c49c2773cb1932bf2..24588af340129390497034ba7366de028ebf33af 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_13[ 128 ] = {
+static const pg_utf_to_local ULmapISO8859_13[ 128 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index bef24578eaa86c9fb877ad9453166bbc66aa35ff..8f273050861b3c4566024362c6711c4425075e27 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_14[ 128 ] = {
+static const pg_utf_to_local ULmapISO8859_14[ 128 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index e1e098865c6b1571c30e9a59146ec51bb36f0dd4..f314021b9c7df93cb5e8950f142b3e6e0b0e4ffb 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_15[ 128 ] = {
+static const pg_utf_to_local ULmapISO8859_15[ 128 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index 63ba6bdafaf3d45445537f7dd81cee6d91b1108f..6a8c754bc0f639f3020967f68fd9e12e3702e46d 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_16[ 128 ] = {
+static const pg_utf_to_local ULmapISO8859_16[ 128 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index 85ff468e84a71a2ebe6c38f182106a6d97f7552c..8e65a6a4afe738a617fe3f608e8675533c9ece40 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_2[ 128 ] = {
+static const pg_utf_to_local ULmapISO8859_2[ 128 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index e1eced40db9a201b61e62370da4b9b0b8d153b80..8d0242dd15109d3324a6cbfe512cf66a1d5e04b4 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_3[ 121 ] = {
+static const pg_utf_to_local ULmapISO8859_3[ 121 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index a621c5a7c4449a2c113981b4aa56fe1a24796f04..30fe4f3a501e538e5f13e749060493184ab399ef 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_4[ 128 ] = {
+static const pg_utf_to_local ULmapISO8859_4[ 128 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index 524c585bb60678675d6f427f5b7c360e84b2bcec..6509d7f6ebfe2c8af83f9cba7599ff5441397bef 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_5[ 128 ] = {
+static const pg_utf_to_local ULmapISO8859_5[ 128 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index 291f7cf5da16af98c1298887f8491f4da9f7ee5c..8f29f26ffe610f31f8ae136e20a11543fb5dee43 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_6[ 83 ] = {
+static const pg_utf_to_local ULmapISO8859_6[ 83 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index 8e4b7d0eb00d817f159eaf2c0dd3660ada0ef191..b0488ec3e0c0d7e2247194c40baa87a0978e980c 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_7[ 125 ] = {
+static const pg_utf_to_local ULmapISO8859_7[ 125 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index 62095e54f48b31bb513b81fc22c14a57731cda0f..7f31c5dee9903773b6210a87be1ed2956844004b 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_8[ 92 ] = {
+static const pg_utf_to_local ULmapISO8859_8[ 92 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index baff0f0319fdc384697bf0ebbee0499d3f62cafb..d34b8afcf2e08d7ece6a40b7f0c3fd1fcfa81cc5 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapISO8859_9[ 128 ] = {
+static const pg_utf_to_local ULmapISO8859_9[ 128 ] = {
   {0xc280, 0x0080},
   {0xc281, 0x0081},
   {0xc282, 0x0082},
index 5469f1280bdac69a13b4c1b94e375661220477a8..869f8213d214bb23d05ce309c844ce58e7b2825b 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapJOHAB[ 17049 ] = {
+static const pg_utf_to_local ULmapJOHAB[ 17049 ] = {
   {0xc2a1, 0xd9ae},
   {0xc2a4, 0xd9b4},
   {0xc2a7, 0xd967},
index 97ab485e195f957e618f883263fd2c6333a6f943..b4760da43c8a2f6ae28c655bc63d0025c4939e25 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapKOI8R[ 128 ] = {
+static const pg_utf_to_local ULmapKOI8R[ 128 ] = {
   {0xc2a0, 0x009a},
   {0xc2a9, 0x00bf},
   {0xc2b0, 0x009c},
index 7f262a4aaa422cdda5ea4c0457fd3b0308888b0d..b6366e8718dcb2895cb15e9095231c0cc6d04442 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapKOI8U[ 128 ] = {
+static const pg_utf_to_local ULmapKOI8U[ 128 ] = {
   {0xc2a0, 0x009a},
   {0xc2a9, 0x00bf},
   {0xc2b0, 0x009c},
index 2c52d4b57474dfe02ff334140b707c4df5d97d31..2db1902f58779b042aed51b9b16260db54e3246b 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file was generated by UCS_to_SHIFT_JIS_2004.pl
  */
-static pg_utf_to_local ULmapSHIFT_JIS_2004[] = {
+static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {
   {0x00000000, 0x000000},      /* U+0000        <control> */
   {0x00000001, 0x000001},      /* U+0001        <control> */
   {0x00000002, 0x000002},      /* U+0002        <control> */
index c6502e3a8e917e66ca40121487acb3b74828cb5f..e55d4a2a6cfd53eca7e96653dc4f38c32b605ebf 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file was generated by UCS_to_SHIFT_JIS_2004.pl
  */
-static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {
+static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {
   {0x0000c3a6, 0x0000cc80, 0x8663},    /* U+00E6+0300          [2000] */
   {0x0000c994, 0x0000cc80, 0x8667},    /* U+0254+0300          [2000] */
   {0x0000c994, 0x0000cc81, 0x8668},    /* U+0254+0301          [2000] */
index d827ae5c269b22fce0dd05bb8ceb3e60662e6f2e..bcb76c9150e93464e528aa8901ae600756b3bd18 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapSJIS[ 7398 ] = {
+static const pg_utf_to_local ULmapSJIS[ 7398 ] = {
   {0xc19c, 0x815f},
   {0xc2a2, 0x8191},
   {0xc2a3, 0x8192},
index e252eecc093e31959433f2889fdcadcf21cafb9d..15dfb56a09958393bc1d7bbea776ab45a5f179f2 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapUHC[ 17237 ] = {
+static const pg_utf_to_local ULmapUHC[ 17237 ] = {
   {0xc2a1, 0xa2ae},
   {0xc2a4, 0xa2b4},
   {0xc2a7, 0xa1d7},
index ef0381dc4342148735813f9470f9a7bb5cb235bc..4dd4631ab82ffd3e61e57201fc3f311411fb7f35 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN1250[ 123 ] = {
+static const pg_utf_to_local ULmapWIN1250[ 123 ] = {
   {0xc2a0, 0x00a0},
   {0xc2a4, 0x00a4},
   {0xc2a6, 0x00a6},
index e69fd6573e2fe0aef49bb99dfdbf4bafe942517b..3dc9f2b5e3ae7c1e73a6819b349986306ccfe6eb 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN1251[ 127 ] = {
+static const pg_utf_to_local ULmapWIN1251[ 127 ] = {
   {0xc2a0, 0x00a0},
   {0xc2a4, 0x00a4},
   {0xc2a6, 0x00a6},
index ba9594b74cfbf2d436a347fb60d056837bbec422..bc460a340b2c5bb3b2edd6d012df68cd969d1f1b 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN1252[ 123 ] = {
+static const pg_utf_to_local ULmapWIN1252[ 123 ] = {
   {0xc2a0, 0x00a0},
   {0xc2a1, 0x00a1},
   {0xc2a2, 0x00a2},
index a7961e95f12798ed53e8c2b04086f0501ecd909c..4cf03301363e263fe1aaf050db072d38efbeca8c 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN1253[ 111 ] = {
+static const pg_utf_to_local ULmapWIN1253[ 111 ] = {
   {0xc2a0, 0x00a0},
   {0xc2a3, 0x00a3},
   {0xc2a4, 0x00a4},
index e1afbe8f40d7abe17fef5c86c461833314c4d709..54f1e0fe4cbf8f0ed699a804309efc6e09b2c2cb 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN1254[ 121 ] = {
+static const pg_utf_to_local ULmapWIN1254[ 121 ] = {
   {0xc2a0, 0x00a0},
   {0xc2a1, 0x00a1},
   {0xc2a2, 0x00a2},
index 9071fe0a61b00311f038ee26317080cc07d170d5..328f0a1293d2ba4b8ae3e3fc4bebae536b8256dd 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN1255[ 105 ] = {
+static const pg_utf_to_local ULmapWIN1255[ 105 ] = {
   {0xc2a0, 0x00a0},
   {0xc2a1, 0x00a1},
   {0xc2a2, 0x00a2},
index 1ae675f79168b2cdc1ed886ee0b594f8d0439bff..aa7d36a18e2ff018a484a0be7084453dc08214c6 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN1256[ 128 ] = {
+static const pg_utf_to_local ULmapWIN1256[ 128 ] = {
   {0xc2a0, 0x00a0},
   {0xc2a2, 0x00a2},
   {0xc2a3, 0x00a3},
index 562678119e9d178c5801c506cb7db01a5c37293b..dca28d28ae9cdcf81f320a193cf277c23a444e09 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN1257[ 116 ] = {
+static const pg_utf_to_local ULmapWIN1257[ 116 ] = {
   {0xc2a0, 0x00a0},
   {0xc2a2, 0x00a2},
   {0xc2a3, 0x00a3},
index 7c4629ff91fce3de9290ad27695ec9eab9240bec..371e315bdb6cdf810f5315243d0fcf7aadb49562 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN1258[ 119 ] = {
+static const pg_utf_to_local ULmapWIN1258[ 119 ] = {
   {0xc2a0, 0x00a0},
   {0xc2a1, 0x00a1},
   {0xc2a2, 0x00a2},
index e5767e09307781c1c039dbab65989b7356c03aaf..dbd705b96c7b025a1f3d31d88d8584101b626c26 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN866[ 128 ] = {
+static const pg_utf_to_local ULmapWIN866[ 128 ] = {
   {0xc2a0, 0x00ff},
   {0xc2a4, 0x00fd},
   {0xc2b0, 0x00f8},
index d765744461fffdfabe2c5ff3d7f57c238905c991..9265a39f4e445a78b57b2dae2aa22b18273df429 100644 (file)
@@ -1,4 +1,4 @@
-static pg_utf_to_local ULmapWIN874[ 97 ] = {
+static const pg_utf_to_local ULmapWIN874[ 97 ] = {
   {0xc2a0, 0x00a0},
   {0xe0b881, 0x00a1},
   {0xe0b882, 0x00a2},
index 22f44b7f58ac5ecd6cc71a56b1f93dd3ef090ddd..dd44dceafc8a5f3464c51dc8b22c9f18ccefda0e 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN1250[ 123 ] = {
+static const pg_local_to_utf LUmapWIN1250[ 123 ] = {
   {0x0080, 0xe282ac},
   {0x0082, 0xe2809a},
   {0x0084, 0xe2809e},
index cdea6fe4459c48a588d68585542497c5ef0b952d..a4f1aeb49da0fb498f31bdd027a13c87d68aa6b5 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN1251[ 127 ] = {
+static const pg_local_to_utf LUmapWIN1251[ 127 ] = {
   {0x0080, 0xd082},
   {0x0081, 0xd083},
   {0x0082, 0xe2809a},
index ad849ee011dd98349e2a7593f718532b054da0f8..7b547becc81c7fdfe47e870dc8b44c2965997f77 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN1252[ 123 ] = {
+static const pg_local_to_utf LUmapWIN1252[ 123 ] = {
   {0x0080, 0xe282ac},
   {0x0082, 0xe2809a},
   {0x0083, 0xc692},
index 519a435d7507a0054b411b46f2a2f3d223c5eae9..d22b72f23f708b1e79988943f3e18bd3845d97b6 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN1253[ 111 ] = {
+static const pg_local_to_utf LUmapWIN1253[ 111 ] = {
   {0x0080, 0xe282ac},
   {0x0082, 0xe2809a},
   {0x0083, 0xc692},
index 370e4bc910100b77721534c19b9df2d1af5d4f82..3eff3d657a864fbe634c381dbd1657c8a717bce3 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN1254[ 121 ] = {
+static const pg_local_to_utf LUmapWIN1254[ 121 ] = {
   {0x0080, 0xe282ac},
   {0x0082, 0xe2809a},
   {0x0083, 0xc692},
index f5d7454c29f8bbd38e2da21751cc1d0a9c7c2acd..5be6b63e08d239dc7f3ed3357230b3031a386975 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN1255[ 105 ] = {
+static const pg_local_to_utf LUmapWIN1255[ 105 ] = {
   {0x0080, 0xe282ac},
   {0x0082, 0xe2809a},
   {0x0083, 0xc692},
index edf3ae9de306fece88f3161477ef7051d0495a9e..a61aa45ec26d2f920c7f9deaf3a0d173ff24930d 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN1256[ 128 ] = {
+static const pg_local_to_utf LUmapWIN1256[ 128 ] = {
   {0x0080, 0xe282ac},
   {0x0081, 0xd9be},
   {0x0082, 0xe2809a},
index 45d946d57c9c6f585a6b5e357d4814eb45778c3b..134f9726a859025cf95270b93b223f4df55142c6 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN1257[ 116 ] = {
+static const pg_local_to_utf LUmapWIN1257[ 116 ] = {
   {0x0080, 0xe282ac},
   {0x0082, 0xe2809a},
   {0x0084, 0xe2809e},
index ed8a9146fc1812c52a48387fbda114c5eac3e8b8..5807d112dc216a3257214770bba3b51c1c8b6856 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN1258[ 119 ] = {
+static const pg_local_to_utf LUmapWIN1258[ 119 ] = {
   {0x0080, 0xe282ac},
   {0x0082, 0xe2809a},
   {0x0083, 0xc692},
index d2a377a537505545cf17aa11d9be17c5c71bbe40..411c043b277174799fe3138b5498f664f01fba10 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN866[ 128 ] = {
+static const pg_local_to_utf LUmapWIN866[ 128 ] = {
   {0x0080, 0xd090},
   {0x0081, 0xd091},
   {0x0082, 0xd092},
index bd4eadbbb6a54654f034ebd96145f706f0e6efbc..1472b0f1e6fc941f118a52b93645cf89e8d7ac44 100644 (file)
@@ -1,4 +1,4 @@
-static pg_local_to_utf LUmapWIN874[ 97 ] = {
+static const pg_local_to_utf LUmapWIN874[ 97 ] = {
   {0x0080, 0xe282ac},
   {0x0085, 0xe280a6},
   {0x0091, 0xe28098},
index d5c5c74a9cee8a4c7b9ecd7efb60afae8eab18e1..f957b6efd320dd9c38c1a847fb54581b02614fdb 100644 (file)
@@ -302,47 +302,62 @@ compare4(const void *p1, const void *p2)
 }
 
 /*
- * convert 32bit wide character to mutibye stream pointed to by iso
+ * store 32bit character representation into multibyte stream
  */
-static unsigned char *
-set_iso_code(unsigned char *iso, uint32 code)
+static inline unsigned char *
+store_coded_char(unsigned char *dest, uint32 code)
 {
        if (code & 0xff000000)
-               *iso++ = code >> 24;
+               *dest++ = code >> 24;
        if (code & 0x00ff0000)
-               *iso++ = (code & 0x00ff0000) >> 16;
+               *dest++ = code >> 16;
        if (code & 0x0000ff00)
-               *iso++ = (code & 0x0000ff00) >> 8;
+               *dest++ = code >> 8;
        if (code & 0x000000ff)
-               *iso++ = code & 0x000000ff;
-       return iso;
+               *dest++ = code;
+       return dest;
 }
 
 /*
  * UTF8 ---> local code
  *
- * utf: input UTF8 string (need not be null-terminated).
+ * utf: input string in UTF8 encoding (need not be null-terminated)
+ * len: length of input string (in bytes)
  * iso: pointer to the output area (must be large enough!)
- * map: the conversion map.
- * cmap: the conversion map for combined characters.
- *               (optional)
- * size1: the size of the conversion map.
- * size2: the size of the conversion map for combined characters
- *               (optional)
- * encoding: the PG identifier for the local encoding.
- * len: length of input string.
+                 (output string will be null-terminated)
+ * map: conversion map for single characters
+ * mapsize: number of entries in the conversion map
+ * cmap: conversion map for combined characters
+ *               (optional, pass NULL if none)
+ * cmapsize: number of entries in the conversion map for combined characters
+ *               (optional, pass 0 if none)
+ * conv_func: algorithmic encoding conversion function
+ *               (optional, pass NULL if none)
+ * encoding: PG identifier for the local encoding
+ *
+ * For each character, the cmap (if provided) is consulted first; if no match,
+ * the map is consulted next; if still no match, the conv_func (if provided)
+ * is applied.  An error is raised if no match is found.
+ *
+ * See pg_wchar.h for more details about the data structures used here.
  */
 void
-UtfToLocal(const unsigned char *utf, unsigned char *iso,
-                  const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
-                  int size1, int size2, int encoding, int len)
+UtfToLocal(const unsigned char *utf, int len,
+                  unsigned char *iso,
+                  const pg_utf_to_local *map, int mapsize,
+                  const pg_utf_to_local_combined *cmap, int cmapsize,
+                  utf_local_conversion_func conv_func,
+                  int encoding)
 {
        uint32          iutf;
-       uint32          cutf[2];
-       uint32          code;
-       pg_utf_to_local *p;
-       pg_utf_to_local_combined *cp;
        int                     l;
+       const pg_utf_to_local *p;
+       const pg_utf_to_local_combined *cp;
+
+       if (!PG_VALID_ENCODING(encoding))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("invalid encoding number: %d", encoding)));
 
        for (; len > 0; len -= l)
        {
@@ -351,7 +366,6 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
                        break;
 
                l = pg_utf_mblen(utf);
-
                if (len < l)
                        break;
 
@@ -360,11 +374,13 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
 
                if (l == 1)
                {
-                       /* ASCII case is easy */
+                       /* ASCII case is easy, assume it's one-to-one conversion */
                        *iso++ = *utf++;
                        continue;
                }
-               else if (l == 2)
+
+               /* collect coded char of length l */
+               if (l == 2)
                {
                        iutf = *utf++ << 8;
                        iutf |= *utf++;
@@ -388,15 +404,14 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
                        iutf = 0;                       /* keep compiler quiet */
                }
 
-               /*
-                * first, try with combined map if possible
-                */
+               /* First, try with combined map if possible */
                if (cmap && len > l)
                {
                        const unsigned char *utf_save = utf;
                        int                     len_save = len;
                        int                     l_save = l;
 
+                       /* collect next character, same as above */
                        len -= l;
 
                        l = pg_utf_mblen(utf);
@@ -406,83 +421,83 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
                        if (!pg_utf8_islegal(utf, l))
                                break;
 
-                       cutf[0] = iutf;
-
-                       if (l == 1)
+                       /* We assume ASCII character cannot be in combined map */
+                       if (l > 1)
                        {
-                               if (len_save > 1)
+                               uint32          iutf2;
+                               uint32          cutf[2];
+
+                               if (l == 2)
+                               {
+                                       iutf2 = *utf++ << 8;
+                                       iutf2 |= *utf++;
+                               }
+                               else if (l == 3)
+                               {
+                                       iutf2 = *utf++ << 16;
+                                       iutf2 |= *utf++ << 8;
+                                       iutf2 |= *utf++;
+                               }
+                               else if (l == 4)
+                               {
+                                       iutf2 = *utf++ << 24;
+                                       iutf2 |= *utf++ << 16;
+                                       iutf2 |= *utf++ << 8;
+                                       iutf2 |= *utf++;
+                               }
+                               else
                                {
-                                       p = bsearch(&cutf[0], map, size1,
-                                                               sizeof(pg_utf_to_local), compare1);
-                                       if (p == NULL)
-                                               report_untranslatable_char(PG_UTF8, encoding,
-                                                          (const char *) (utf_save - l_save), len_save);
-                                       iso = set_iso_code(iso, p->code);
+                                       elog(ERROR, "unsupported character length %d", l);
+                                       iutf2 = 0;      /* keep compiler quiet */
                                }
 
-                               /* ASCII case is easy */
-                               *iso++ = *utf++;
-                               continue;
-                       }
-                       else if (l == 2)
-                       {
-                               iutf = *utf++ << 8;
-                               iutf |= *utf++;
-                       }
-                       else if (l == 3)
-                       {
-                               iutf = *utf++ << 16;
-                               iutf |= *utf++ << 8;
-                               iutf |= *utf++;
-                       }
-                       else if (l == 4)
-                       {
-                               iutf = *utf++ << 24;
-                               iutf |= *utf++ << 16;
-                               iutf |= *utf++ << 8;
-                               iutf |= *utf++;
-                       }
-                       else
-                       {
-                               elog(ERROR, "unsupported character length %d", l);
-                               iutf = 0;               /* keep compiler quiet */
-                       }
+                               cutf[0] = iutf;
+                               cutf[1] = iutf2;
 
-                       cutf[1] = iutf;
-                       cp = bsearch(cutf, cmap, size2,
-                                                sizeof(pg_utf_to_local_combined), compare3);
-                       if (cp)
-                               code = cp->code;
-                       else
-                       {
-                               /* not found in combined map. try with ordinary map */
-                               p = bsearch(&cutf[0], map, size1,
-                                                       sizeof(pg_utf_to_local), compare1);
-                               if (p == NULL)
-                                       report_untranslatable_char(PG_UTF8, encoding,
-                                                          (const char *) (utf_save - l_save), len_save);
-                               iso = set_iso_code(iso, p->code);
-
-                               p = bsearch(&cutf[1], map, size1,
-                                                       sizeof(pg_utf_to_local), compare1);
-                               if (p == NULL)
-                                       report_untranslatable_char(PG_UTF8, encoding,
-                                                                                          (const char *) (utf - l), len);
-                               code = p->code;
+                               cp = bsearch(cutf, cmap, cmapsize,
+                                                        sizeof(pg_utf_to_local_combined), compare3);
+
+                               if (cp)
+                               {
+                                       iso = store_coded_char(iso, cp->code);
+                                       continue;
+                               }
                        }
+
+                       /* fail, so back up to reprocess second character next time */
+                       utf = utf_save;
+                       len = len_save;
+                       l = l_save;
                }
-               else    /* no cmap or no remaining data */
+
+               /* Now check ordinary map */
+               p = bsearch(&iutf, map, mapsize,
+                                       sizeof(pg_utf_to_local), compare1);
+
+               if (p)
                {
-                       p = bsearch(&iutf, map, size1,
-                                               sizeof(pg_utf_to_local), compare1);
-                       if (p == NULL)
-                               report_untranslatable_char(PG_UTF8, encoding,
-                                                                                  (const char *) (utf - l), len);
-                       code = p->code;
+                       iso = store_coded_char(iso, p->code);
+                       continue;
+               }
+
+               /* if there's a conversion function, try that */
+               if (conv_func)
+               {
+                       uint32          converted = (*conv_func) (iutf);
+
+                       if (converted)
+                       {
+                               iso = store_coded_char(iso, converted);
+                               continue;
+                       }
                }
-               iso = set_iso_code(iso, code);
+
+               /* failed to translate this character */
+               report_untranslatable_char(PG_UTF8, encoding,
+                                                                  (const char *) (utf - l), len);
        }
 
+       /* if we broke out of loop early, must be invalid input */
        if (len > 0)
                report_invalid_encoding(PG_UTF8, (const char *) utf, len);
 
@@ -492,26 +507,38 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
 /*
  * local code ---> UTF8
  *
- * iso: input local string (need not be null-terminated).
+ * iso: input string in local encoding (need not be null-terminated)
+ * len: length of input string (in bytes)
  * utf: pointer to the output area (must be large enough!)
- * map: the conversion map.
- * cmap: the conversion map for combined characters.
- *               (optional)
- * size1: the size of the conversion map.
- * size2: the size of the conversion map for combined characters
- *               (optional)
- * encoding: the PG identifier for the local encoding.
- * len: length of input string.
+                 (output string will be null-terminated)
+ * map: conversion map for single characters
+ * mapsize: number of entries in the conversion map
+ * cmap: conversion map for combined characters
+ *               (optional, pass NULL if none)
+ * cmapsize: number of entries in the conversion map for combined characters
+ *               (optional, pass 0 if none)
+ * conv_func: algorithmic encoding conversion function
+ *               (optional, pass NULL if none)
+ * encoding: PG identifier for the local encoding
+ *
+ * For each character, the map is consulted first; if no match, the cmap
+ * (if provided) is consulted next; if still no match, the conv_func
+ * (if provided) is applied.  An error is raised if no match is found.
+ *
+ * See pg_wchar.h for more details about the data structures used here.
  */
 void
-LocalToUtf(const unsigned char *iso, unsigned char *utf,
-                  const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
-                  int size1, int size2, int encoding, int len)
+LocalToUtf(const unsigned char *iso, int len,
+                  unsigned char *utf,
+                  const pg_local_to_utf *map, int mapsize,
+                  const pg_local_to_utf_combined *cmap, int cmapsize,
+                  utf_local_conversion_func conv_func,
+                  int encoding)
 {
-       unsigned int iiso;
+       uint32          iiso;
        int                     l;
-       pg_local_to_utf *p;
-       pg_local_to_utf_combined *cp;
+       const pg_local_to_utf *p;
+       const pg_local_to_utf_combined *cp;
 
        if (!PG_VALID_ENCODING(encoding))
                ereport(ERROR,
@@ -526,7 +553,7 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
 
                if (!IS_HIGHBIT_SET(*iso))
                {
-                       /* ASCII case is easy */
+                       /* ASCII case is easy, assume it's one-to-one conversion */
                        *utf++ = *iso++;
                        l = 1;
                        continue;
@@ -536,6 +563,7 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
                if (l < 0)
                        break;
 
+               /* collect coded char of length l */
                if (l == 1)
                        iiso = *iso++;
                else if (l == 2)
@@ -562,61 +590,48 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
                        iiso = 0;                       /* keep compiler quiet */
                }
 
-               p = bsearch(&iiso, map, size1,
+               /* First check ordinary map */
+               p = bsearch(&iiso, map, mapsize,
                                        sizeof(pg_local_to_utf), compare2);
 
-               if (p == NULL)
+               if (p)
                {
-                       /*
-                        * not found in the ordinary map. if there's a combined character
-                        * map, try with it
-                        */
-                       if (cmap)
-                       {
-                               cp = bsearch(&iiso, cmap, size2,
-                                                        sizeof(pg_local_to_utf_combined), compare4);
+                       utf = store_coded_char(utf, p->utf);
+                       continue;
+               }
 
-                               if (cp)
-                               {
-                                       if (cp->utf1 & 0xff000000)
-                                               *utf++ = cp->utf1 >> 24;
-                                       if (cp->utf1 & 0x00ff0000)
-                                               *utf++ = (cp->utf1 & 0x00ff0000) >> 16;
-                                       if (cp->utf1 & 0x0000ff00)
-                                               *utf++ = (cp->utf1 & 0x0000ff00) >> 8;
-                                       if (cp->utf1 & 0x000000ff)
-                                               *utf++ = cp->utf1 & 0x000000ff;
-
-                                       if (cp->utf2 & 0xff000000)
-                                               *utf++ = cp->utf2 >> 24;
-                                       if (cp->utf2 & 0x00ff0000)
-                                               *utf++ = (cp->utf2 & 0x00ff0000) >> 16;
-                                       if (cp->utf2 & 0x0000ff00)
-                                               *utf++ = (cp->utf2 & 0x0000ff00) >> 8;
-                                       if (cp->utf2 & 0x000000ff)
-                                               *utf++ = cp->utf2 & 0x000000ff;
+               /* If there's a combined character map, try that */
+               if (cmap)
+               {
+                       cp = bsearch(&iiso, cmap, cmapsize,
+                                                sizeof(pg_local_to_utf_combined), compare4);
 
-                                       continue;
-                               }
+                       if (cp)
+                       {
+                               utf = store_coded_char(utf, cp->utf1);
+                               utf = store_coded_char(utf, cp->utf2);
+                               continue;
                        }
-
-                       report_untranslatable_char(encoding, PG_UTF8,
-                                                                          (const char *) (iso - l), len);
-
                }
-               else
+
+               /* if there's a conversion function, try that */
+               if (conv_func)
                {
-                       if (p->utf & 0xff000000)
-                               *utf++ = p->utf >> 24;
-                       if (p->utf & 0x00ff0000)
-                               *utf++ = (p->utf & 0x00ff0000) >> 16;
-                       if (p->utf & 0x0000ff00)
-                               *utf++ = (p->utf & 0x0000ff00) >> 8;
-                       if (p->utf & 0x000000ff)
-                               *utf++ = p->utf & 0x000000ff;
+                       uint32          converted = (*conv_func) (iiso);
+
+                       if (converted)
+                       {
+                               utf = store_coded_char(utf, converted);
+                               continue;
+                       }
                }
+
+               /* failed to translate this character */
+               report_untranslatable_char(encoding, PG_UTF8,
+                                                                  (const char *) (iso - l), len);
        }
 
+       /* if we broke out of loop early, must be invalid input */
        if (len > 0)
                report_invalid_encoding(encoding, (const char *) iso, len);
 
index 6861572655220a24d7e52d4456468d132387a2b2..1d9b10f8a7a9ab3db27514b3761c143bdcbb9e7e 100644 (file)
@@ -22,7 +22,7 @@ typedef struct
 } codes_t;
 
 /* map Big5 Level 1 to CNS 11643-1992 Plane 1 */
-static codes_t big5Level1ToCnsPlane1[25] = {   /* range */
+static const codes_t big5Level1ToCnsPlane1[25] = {     /* range */
        {0xA140, 0x2121},
        {0xA1F6, 0x2258},
        {0xA1F7, 0x2257},
@@ -51,7 +51,7 @@ static codes_t big5Level1ToCnsPlane1[25] = {  /* range */
 };
 
 /* map CNS 11643-1992 Plane 1 to Big5 Level 1 */
-static codes_t cnsPlane1ToBig5Level1[26] = {   /* range */
+static const codes_t cnsPlane1ToBig5Level1[26] = {     /* range */
        {0x2121, 0xA140},
        {0x2257, 0xA1F7},
        {0x2258, 0xA1F6},
@@ -81,7 +81,7 @@ static codes_t cnsPlane1ToBig5Level1[26] = {  /* range */
 };
 
 /* map Big5 Level 2 to CNS 11643-1992 Plane 2 */
-static codes_t big5Level2ToCnsPlane2[48] = {   /* range */
+static const codes_t big5Level2ToCnsPlane2[48] = {     /* range */
        {0xC940, 0x2121},
        {0xc94a, 0x0000},
        {0xC94B, 0x212B},
@@ -133,7 +133,7 @@ static codes_t big5Level2ToCnsPlane2[48] = {        /* range */
 };
 
 /* map CNS 11643-1992 Plane 2 to Big5 Level 2 */
-static codes_t cnsPlane2ToBig5Level2[49] = {   /* range */
+static const codes_t cnsPlane2ToBig5Level2[49] = {     /* range */
        {0x2121, 0xC940},
        {0x212B, 0xC94B},
        {0x214C, 0xC9BE},
@@ -186,7 +186,7 @@ static codes_t cnsPlane2ToBig5Level2[49] = {        /* range */
 };
 
 /* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */
-static unsigned short b1c4[][2] = {
+static const unsigned short b1c4[][2] = {
        {0xC879, 0x2123},
        {0xC87B, 0x2124},
        {0xC87D, 0x212A},
@@ -194,7 +194,7 @@ static unsigned short b1c4[][2] = {
 };
 
 /* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */
-static unsigned short b2c3[][2] = {
+static const unsigned short b2c3[][2] = {
        {0xF9D6, 0x4337},
        {0xF9D7, 0x4F50},
        {0xF9D8, 0x444E},
@@ -205,7 +205,7 @@ static unsigned short b2c3[][2] = {
 };
 
 static unsigned short BinarySearchRange
-                       (codes_t *array, int high, unsigned short code)
+                       (const codes_t *array, int high, unsigned short code)
 {
        int                     low,
                                mid,
index 35e71a2ce5316c711f68ea6f45b0333a9551c0b6..a422a26d0083a6f71409076858ecf9aa08856085 100644 (file)
@@ -44,8 +44,11 @@ big5_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapBIG5, NULL,
-                          sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), 0, PG_BIG5, len);
+       LocalToUtf(src, len, dest,
+                          LUmapBIG5, lengthof(LUmapBIG5),
+                          NULL, 0,
+                          NULL,
+                          PG_BIG5);
 
        PG_RETURN_VOID();
 }
@@ -59,8 +62,11 @@ utf8_to_big5(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5);
 
-       UtfToLocal(src, dest, ULmapBIG5, NULL,
-                          sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), 0, PG_BIG5, len);
+       UtfToLocal(src, len, dest,
+                          ULmapBIG5, lengthof(ULmapBIG5),
+                          NULL, 0,
+                          NULL,
+                          PG_BIG5);
 
        PG_RETURN_VOID();
 }
index 5d9e9150e446467a261919c04d68150948d8e724..f2b01e50a349488e0603ab712d3016b01466965c 100644 (file)
@@ -53,8 +53,11 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R);
 
-       UtfToLocal(src, dest, ULmapKOI8R, NULL,
-                        sizeof(ULmapKOI8R) / sizeof(pg_utf_to_local), 0, PG_KOI8R, len);
+       UtfToLocal(src, len, dest,
+                          ULmapKOI8R, lengthof(ULmapKOI8R),
+                          NULL, 0,
+                          NULL,
+                          PG_KOI8R);
 
        PG_RETURN_VOID();
 }
@@ -68,8 +71,11 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapKOI8R, NULL,
-                        sizeof(LUmapKOI8R) / sizeof(pg_local_to_utf), 0, PG_KOI8R, len);
+       LocalToUtf(src, len, dest,
+                          LUmapKOI8R, lengthof(LUmapKOI8R),
+                          NULL, 0,
+                          NULL,
+                          PG_KOI8R);
 
        PG_RETURN_VOID();
 }
@@ -83,8 +89,11 @@ utf8_to_koi8u(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
 
-       UtfToLocal(src, dest, ULmapKOI8U, NULL,
-                        sizeof(ULmapKOI8U) / sizeof(pg_utf_to_local), 0, PG_KOI8U, len);
+       UtfToLocal(src, len, dest,
+                          ULmapKOI8U, lengthof(ULmapKOI8U),
+                          NULL, 0,
+                          NULL,
+                          PG_KOI8U);
 
        PG_RETURN_VOID();
 }
@@ -98,8 +107,11 @@ koi8u_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapKOI8U, NULL,
-                        sizeof(LUmapKOI8U) / sizeof(pg_local_to_utf), 0, PG_KOI8U, len);
+       LocalToUtf(src, len, dest,
+                          LUmapKOI8U, lengthof(LUmapKOI8U),
+                          NULL, 0,
+                          NULL,
+                          PG_KOI8U);
 
        PG_RETURN_VOID();
 }
index 15d079cfa68003a5e0924c51457daa6d91f2aa1f..a49eda8838ced89768d59daa2e96cb183f6d0af6 100644 (file)
@@ -46,10 +46,11 @@ euc_jis_2004_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapEUC_JIS_2004, LUmapEUC_JIS_2004_combined,
-                          sizeof(LUmapEUC_JIS_2004) / sizeof(pg_local_to_utf),
-          sizeof(LUmapEUC_JIS_2004_combined) / sizeof(pg_local_to_utf_combined),
-                          PG_EUC_JIS_2004, len);
+       LocalToUtf(src, len, dest,
+                          LUmapEUC_JIS_2004, lengthof(LUmapEUC_JIS_2004),
+                       LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
+                          NULL,
+                          PG_EUC_JIS_2004);
 
        PG_RETURN_VOID();
 }
@@ -63,10 +64,11 @@ utf8_to_euc_jis_2004(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004);
 
-       UtfToLocal(src, dest, ULmapEUC_JIS_2004, ULmapEUC_JIS_2004_combined,
-                          sizeof(ULmapEUC_JIS_2004) / sizeof(pg_utf_to_local),
-          sizeof(ULmapEUC_JIS_2004_combined) / sizeof(pg_utf_to_local_combined),
-                          PG_EUC_JIS_2004, len);
+       UtfToLocal(src, len, dest,
+                          ULmapEUC_JIS_2004, lengthof(ULmapEUC_JIS_2004),
+                       ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
+                          NULL,
+                          PG_EUC_JIS_2004);
 
        PG_RETURN_VOID();
 }
index 7a321aa38ee6f5f48b3b8be6204a1d887d01a2a0..83329263179ebd6c574e11f47c502a1ce04c1cb1 100644 (file)
@@ -44,8 +44,11 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapEUC_CN, NULL,
-                  sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), 0, PG_EUC_CN, len);
+       LocalToUtf(src, len, dest,
+                          LUmapEUC_CN, lengthof(LUmapEUC_CN),
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_CN);
 
        PG_RETURN_VOID();
 }
@@ -59,8 +62,11 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN);
 
-       UtfToLocal(src, dest, ULmapEUC_CN, NULL,
-                  sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), 0, PG_EUC_CN, len);
+       UtfToLocal(src, len, dest,
+                          ULmapEUC_CN, lengthof(ULmapEUC_CN),
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_CN);
 
        PG_RETURN_VOID();
 }
index bc6d6484359a7f15ab55271ffaeabb790a878bcb..6d9aeb4138bc9b9f5a4d7137dba8daee29ac0ac9 100644 (file)
@@ -44,8 +44,11 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapEUC_JP, NULL,
-                  sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), 0, PG_EUC_JP, len);
+       LocalToUtf(src, len, dest,
+                          LUmapEUC_JP, lengthof(LUmapEUC_JP),
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_JP);
 
        PG_RETURN_VOID();
 }
@@ -59,8 +62,11 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP);
 
-       UtfToLocal(src, dest, ULmapEUC_JP, NULL,
-                  sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), 0, PG_EUC_JP, len);
+       UtfToLocal(src, len, dest,
+                          ULmapEUC_JP, lengthof(ULmapEUC_JP),
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_JP);
 
        PG_RETURN_VOID();
 }
index 7233f06a7dd507619d24925e3655d89a87a52c64..7edd09b1154c1a0bc5031454b7c3c632cec72e7a 100644 (file)
@@ -44,8 +44,11 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapEUC_KR, NULL,
-                  sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), 0, PG_EUC_KR, len);
+       LocalToUtf(src, len, dest,
+                          LUmapEUC_KR, lengthof(LUmapEUC_KR),
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_KR);
 
        PG_RETURN_VOID();
 }
@@ -59,8 +62,11 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR);
 
-       UtfToLocal(src, dest, ULmapEUC_KR, NULL,
-                  sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), 0, PG_EUC_KR, len);
+       UtfToLocal(src, len, dest,
+                          ULmapEUC_KR, lengthof(ULmapEUC_KR),
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_KR);
 
        PG_RETURN_VOID();
 }
index 8bd093a59aa2068729230fd2d27782d4100b31bc..bc129993ba4ebfebfa628bbad1f55bfbcc9f9286 100644 (file)
@@ -44,8 +44,11 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapEUC_TW, NULL,
-                  sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), 0, PG_EUC_TW, len);
+       LocalToUtf(src, len, dest,
+                          LUmapEUC_TW, lengthof(LUmapEUC_TW),
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_TW);
 
        PG_RETURN_VOID();
 }
@@ -59,8 +62,11 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW);
 
-       UtfToLocal(src, dest, ULmapEUC_TW, NULL,
-                  sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), 0, PG_EUC_TW, len);
+       UtfToLocal(src, len, dest,
+                          ULmapEUC_TW, lengthof(ULmapEUC_TW),
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_TW);
 
        PG_RETURN_VOID();
 }
index 4427fea41491a9ffae759c2c207a8373b687db37..b41e4a84b073f19ead1c124e00f6fc38dd848cc5 100644 (file)
@@ -44,8 +44,11 @@ gb18030_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapGB18030, NULL,
-                sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), 0, PG_GB18030, len);
+       LocalToUtf(src, len, dest,
+                          LUmapGB18030, lengthof(LUmapGB18030),
+                          NULL, 0,
+                          NULL,
+                          PG_GB18030);
 
        PG_RETURN_VOID();
 }
@@ -59,8 +62,11 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030);
 
-       UtfToLocal(src, dest, ULmapGB18030, NULL,
-                sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), 0, PG_GB18030, len);
+       UtfToLocal(src, len, dest,
+                          ULmapGB18030, lengthof(ULmapGB18030),
+                          NULL, 0,
+                          NULL,
+                          PG_GB18030);
 
        PG_RETURN_VOID();
 }
index 456cc04bb6333db1370eb9ddbf2ea1cde9735805..adc33a68e6b18687e36033703de1a9259a659306 100644 (file)
@@ -44,8 +44,11 @@ gbk_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_GBK, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapGBK, NULL,
-                          sizeof(LUmapGBK) / sizeof(pg_local_to_utf), 0, PG_GBK, len);
+       LocalToUtf(src, len, dest,
+                          LUmapGBK, lengthof(LUmapGBK),
+                          NULL, 0,
+                          NULL,
+                          PG_GBK);
 
        PG_RETURN_VOID();
 }
@@ -59,8 +62,11 @@ utf8_to_gbk(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GBK);
 
-       UtfToLocal(src, dest, ULmapGBK, NULL,
-                          sizeof(ULmapGBK) / sizeof(pg_utf_to_local), 0, PG_GBK, len);
+       UtfToLocal(src, len, dest,
+                          ULmapGBK, lengthof(ULmapGBK),
+                          NULL, 0,
+                          NULL,
+                          PG_GBK);
 
        PG_RETURN_VOID();
 }
index 886e296d9aa63691f51436c53e932d625ec4ee6c..39e4ce12e5a525ff81087fe094607f01edf65263 100644 (file)
@@ -63,52 +63,52 @@ extern Datum utf8_to_iso8859(PG_FUNCTION_ARGS);
 typedef struct
 {
        pg_enc          encoding;
-       pg_local_to_utf *map1;          /* to UTF8 map name */
-       pg_utf_to_local *map2;          /* from UTF8 map name */
+       const pg_local_to_utf *map1;    /* to UTF8 map name */
+       const pg_utf_to_local *map2;    /* from UTF8 map name */
        int                     size1;                  /* size of map1 */
        int                     size2;                  /* size of map2 */
 } pg_conv_map;
 
-static pg_conv_map maps[] = {
+static const pg_conv_map maps[] = {
        {PG_LATIN2, LUmapISO8859_2, ULmapISO8859_2,
-               sizeof(LUmapISO8859_2) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_2) / sizeof(pg_utf_to_local)},      /* ISO-8859-2 Latin 2 */
+               lengthof(LUmapISO8859_2),
+       lengthof(ULmapISO8859_2)},      /* ISO-8859-2 Latin 2 */
        {PG_LATIN3, LUmapISO8859_3, ULmapISO8859_3,
-               sizeof(LUmapISO8859_3) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_3) / sizeof(pg_utf_to_local)},      /* ISO-8859-3 Latin 3 */
+               lengthof(LUmapISO8859_3),
+       lengthof(ULmapISO8859_3)},      /* ISO-8859-3 Latin 3 */
        {PG_LATIN4, LUmapISO8859_4, ULmapISO8859_4,
-               sizeof(LUmapISO8859_4) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_4) / sizeof(pg_utf_to_local)},      /* ISO-8859-4 Latin 4 */
+               lengthof(LUmapISO8859_4),
+       lengthof(ULmapISO8859_4)},      /* ISO-8859-4 Latin 4 */
        {PG_LATIN5, LUmapISO8859_9, ULmapISO8859_9,
-               sizeof(LUmapISO8859_9) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_9) / sizeof(pg_utf_to_local)},      /* ISO-8859-9 Latin 5 */
+               lengthof(LUmapISO8859_9),
+       lengthof(ULmapISO8859_9)},      /* ISO-8859-9 Latin 5 */
        {PG_LATIN6, LUmapISO8859_10, ULmapISO8859_10,
-               sizeof(LUmapISO8859_10) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_10) / sizeof(pg_utf_to_local)}, /* ISO-8859-10 Latin 6 */
+               lengthof(LUmapISO8859_10),
+       lengthof(ULmapISO8859_10)}, /* ISO-8859-10 Latin 6 */
        {PG_LATIN7, LUmapISO8859_13, ULmapISO8859_13,
-               sizeof(LUmapISO8859_13) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_13) / sizeof(pg_utf_to_local)}, /* ISO-8859-13 Latin 7 */
+               lengthof(LUmapISO8859_13),
+       lengthof(ULmapISO8859_13)}, /* ISO-8859-13 Latin 7 */
        {PG_LATIN8, LUmapISO8859_14, ULmapISO8859_14,
-               sizeof(LUmapISO8859_14) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_14) / sizeof(pg_utf_to_local)}, /* ISO-8859-14 Latin 8 */
+               lengthof(LUmapISO8859_14),
+       lengthof(ULmapISO8859_14)}, /* ISO-8859-14 Latin 8 */
        {PG_LATIN9, LUmapISO8859_15, ULmapISO8859_15,
-               sizeof(LUmapISO8859_15) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_15) / sizeof(pg_utf_to_local)}, /* ISO-8859-15 Latin 9 */
+               lengthof(LUmapISO8859_15),
+       lengthof(ULmapISO8859_15)}, /* ISO-8859-15 Latin 9 */
        {PG_LATIN10, LUmapISO8859_16, ULmapISO8859_16,
-               sizeof(LUmapISO8859_16) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_16) / sizeof(pg_utf_to_local)}, /* ISO-8859-16 Latin 10 */
+               lengthof(LUmapISO8859_16),
+       lengthof(ULmapISO8859_16)}, /* ISO-8859-16 Latin 10 */
        {PG_ISO_8859_5, LUmapISO8859_5, ULmapISO8859_5,
-               sizeof(LUmapISO8859_5) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_5) / sizeof(pg_utf_to_local)},      /* ISO-8859-5 */
+               lengthof(LUmapISO8859_5),
+       lengthof(ULmapISO8859_5)},      /* ISO-8859-5 */
        {PG_ISO_8859_6, LUmapISO8859_6, ULmapISO8859_6,
-               sizeof(LUmapISO8859_6) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_6) / sizeof(pg_utf_to_local)},      /* ISO-8859-6 */
+               lengthof(LUmapISO8859_6),
+       lengthof(ULmapISO8859_6)},      /* ISO-8859-6 */
        {PG_ISO_8859_7, LUmapISO8859_7, ULmapISO8859_7,
-               sizeof(LUmapISO8859_7) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_7) / sizeof(pg_utf_to_local)},      /* ISO-8859-7 */
+               lengthof(LUmapISO8859_7),
+       lengthof(ULmapISO8859_7)},      /* ISO-8859-7 */
        {PG_ISO_8859_8, LUmapISO8859_8, ULmapISO8859_8,
-               sizeof(LUmapISO8859_8) / sizeof(pg_local_to_utf),
-       sizeof(ULmapISO8859_8) / sizeof(pg_utf_to_local)},      /* ISO-8859-8 */
+               lengthof(LUmapISO8859_8),
+       lengthof(ULmapISO8859_8)},      /* ISO-8859-8 */
 };
 
 Datum
@@ -122,18 +122,23 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
 
-       for (i = 0; i < sizeof(maps) / sizeof(pg_conv_map); i++)
+       for (i = 0; i < lengthof(maps); i++)
        {
                if (encoding == maps[i].encoding)
                {
-                       LocalToUtf(src, dest, maps[i].map1, NULL, maps[i].size1, 0, encoding, len);
+                       LocalToUtf(src, len, dest,
+                                          maps[i].map1, maps[i].size1,
+                                          NULL, 0,
+                                          NULL,
+                                          encoding);
                        PG_RETURN_VOID();
                }
        }
 
        ereport(ERROR,
                        (errcode(ERRCODE_INTERNAL_ERROR),
-                        errmsg("unexpected encoding ID %d for ISO 8859 character sets", encoding)));
+                        errmsg("unexpected encoding ID %d for ISO 8859 character sets",
+                                       encoding)));
 
        PG_RETURN_VOID();
 }
@@ -149,18 +154,23 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
 
-       for (i = 0; i < sizeof(maps) / sizeof(pg_conv_map); i++)
+       for (i = 0; i < lengthof(maps); i++)
        {
                if (encoding == maps[i].encoding)
                {
-                       UtfToLocal(src, dest, maps[i].map2, NULL, maps[i].size2, 0, encoding, len);
+                       UtfToLocal(src, len, dest,
+                                          maps[i].map2, maps[i].size2,
+                                          NULL, 0,
+                                          NULL,
+                                          encoding);
                        PG_RETURN_VOID();
                }
        }
 
        ereport(ERROR,
                        (errcode(ERRCODE_INTERNAL_ERROR),
-                        errmsg("unexpected encoding ID %d for ISO 8859 character sets", encoding)));
+                        errmsg("unexpected encoding ID %d for ISO 8859 character sets",
+                                       encoding)));
 
        PG_RETURN_VOID();
 }
index 42c010585433ada1ecebff8ed6c9f898739f259f..15b49f24b1ead2155adffb1e4c70b65bc7d5f263 100644 (file)
@@ -44,8 +44,11 @@ johab_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_JOHAB, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapJOHAB, NULL,
-                        sizeof(LUmapJOHAB) / sizeof(pg_local_to_utf), 0, PG_JOHAB, len);
+       LocalToUtf(src, len, dest,
+                          LUmapJOHAB, lengthof(LUmapJOHAB),
+                          NULL, 0,
+                          NULL,
+                          PG_JOHAB);
 
        PG_RETURN_VOID();
 }
@@ -59,8 +62,11 @@ utf8_to_johab(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB);
 
-       UtfToLocal(src, dest, ULmapJOHAB, NULL,
-                        sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), 0, PG_JOHAB, len);
+       UtfToLocal(src, len, dest,
+                          ULmapJOHAB, lengthof(ULmapJOHAB),
+                          NULL, 0,
+                          NULL,
+                          PG_JOHAB);
 
        PG_RETURN_VOID();
 }
index 7f7e7b0b0b6903f2f1a3f93f36574942f9ea86a5..7a3281b640b0d47205c893d78c2aee3e925aa30e 100644 (file)
@@ -44,8 +44,11 @@ sjis_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapSJIS, NULL,
-                          sizeof(LUmapSJIS) / sizeof(pg_local_to_utf), 0, PG_SJIS, len);
+       LocalToUtf(src, len, dest,
+                          LUmapSJIS, lengthof(LUmapSJIS),
+                          NULL, 0,
+                          NULL,
+                          PG_SJIS);
 
        PG_RETURN_VOID();
 }
@@ -59,8 +62,11 @@ utf8_to_sjis(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SJIS);
 
-       UtfToLocal(src, dest, ULmapSJIS, NULL,
-                          sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), 0, PG_SJIS, len);
+       UtfToLocal(src, len, dest,
+                          ULmapSJIS, lengthof(ULmapSJIS),
+                          NULL, 0,
+                          NULL,
+                          PG_SJIS);
 
        PG_RETURN_VOID();
 }
index 2acdcacaaa6cbf7382f5b162a96f9f59c83d5ba3..29f4256fe01b0d2181dd637a5f3fb601280cb84a 100644 (file)
@@ -46,10 +46,11 @@ shift_jis_2004_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapSHIFT_JIS_2004, LUmapSHIFT_JIS_2004_combined,
-                          sizeof(LUmapSHIFT_JIS_2004) / sizeof(pg_local_to_utf),
-        sizeof(LUmapSHIFT_JIS_2004_combined) / sizeof(pg_local_to_utf_combined),
-                          PG_SHIFT_JIS_2004, len);
+       LocalToUtf(src, len, dest,
+                          LUmapSHIFT_JIS_2004, lengthof(LUmapSHIFT_JIS_2004),
+               LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
+                          NULL,
+                          PG_SHIFT_JIS_2004);
 
        PG_RETURN_VOID();
 }
@@ -63,10 +64,11 @@ utf8_to_shift_jis_2004(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SHIFT_JIS_2004);
 
-       UtfToLocal(src, dest, ULmapSHIFT_JIS_2004, ULmapSHIFT_JIS_2004_combined,
-                          sizeof(ULmapSHIFT_JIS_2004) / sizeof(pg_utf_to_local),
-        sizeof(ULmapSHIFT_JIS_2004_combined) / sizeof(pg_utf_to_local_combined),
-                          PG_SHIFT_JIS_2004, len);
+       UtfToLocal(src, len, dest,
+                          ULmapSHIFT_JIS_2004, lengthof(ULmapSHIFT_JIS_2004),
+               ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined),
+                          NULL,
+                          PG_SHIFT_JIS_2004);
 
        PG_RETURN_VOID();
 }
index 72b72d87bb776373ffcff00a9ae422f78330be8e..984980935d2f2efa5b6062d2e6af6c13a32aa8dd 100644 (file)
@@ -44,8 +44,11 @@ uhc_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UHC, PG_UTF8);
 
-       LocalToUtf(src, dest, LUmapUHC, NULL,
-                          sizeof(LUmapUHC) / sizeof(pg_local_to_utf), 0, PG_UHC, len);
+       LocalToUtf(src, len, dest,
+                          LUmapUHC, lengthof(LUmapUHC),
+                          NULL, 0,
+                          NULL,
+                          PG_UHC);
 
        PG_RETURN_VOID();
 }
@@ -59,8 +62,11 @@ utf8_to_uhc(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_UHC);
 
-       UtfToLocal(src, dest, ULmapUHC, NULL,
-                          sizeof(ULmapUHC) / sizeof(pg_utf_to_local), 0, PG_UHC, len);
+       UtfToLocal(src, len, dest,
+                          ULmapUHC, lengthof(ULmapUHC),
+                          NULL, 0,
+                          NULL,
+                          PG_UHC);
 
        PG_RETURN_VOID();
 }
index 97d890206fbdea03e79a2a953ec7333c706466a4..c2a4ec35f7f7a00c65d8f949f5c199f9ef5a532b 100644 (file)
@@ -59,46 +59,46 @@ extern Datum utf8_to_win(PG_FUNCTION_ARGS);
 typedef struct
 {
        pg_enc          encoding;
-       pg_local_to_utf *map1;          /* to UTF8 map name */
-       pg_utf_to_local *map2;          /* from UTF8 map name */
+       const pg_local_to_utf *map1;    /* to UTF8 map name */
+       const pg_utf_to_local *map2;    /* from UTF8 map name */
        int                     size1;                  /* size of map1 */
        int                     size2;                  /* size of map2 */
 } pg_conv_map;
 
-static pg_conv_map maps[] = {
+static const pg_conv_map maps[] = {
        {PG_WIN866, LUmapWIN866, ULmapWIN866,
-               sizeof(LUmapWIN866) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN866) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN866),
+       lengthof(ULmapWIN866)},
        {PG_WIN874, LUmapWIN874, ULmapWIN874,
-               sizeof(LUmapWIN874) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN874) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN874),
+       lengthof(ULmapWIN874)},
        {PG_WIN1250, LUmapWIN1250, ULmapWIN1250,
-               sizeof(LUmapWIN1250) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN1250) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN1250),
+       lengthof(ULmapWIN1250)},
        {PG_WIN1251, LUmapWIN1251, ULmapWIN1251,
-               sizeof(LUmapWIN1251) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN1251) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN1251),
+       lengthof(ULmapWIN1251)},
        {PG_WIN1252, LUmapWIN1252, ULmapWIN1252,
-               sizeof(LUmapWIN1252) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN1252) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN1252),
+       lengthof(ULmapWIN1252)},
        {PG_WIN1253, LUmapWIN1253, ULmapWIN1253,
-               sizeof(LUmapWIN1253) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN1253) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN1253),
+       lengthof(ULmapWIN1253)},
        {PG_WIN1254, LUmapWIN1254, ULmapWIN1254,
-               sizeof(LUmapWIN1254) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN1254) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN1254),
+       lengthof(ULmapWIN1254)},
        {PG_WIN1255, LUmapWIN1255, ULmapWIN1255,
-               sizeof(LUmapWIN1255) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN1255) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN1255),
+       lengthof(ULmapWIN1255)},
        {PG_WIN1256, LUmapWIN1256, ULmapWIN1256,
-               sizeof(LUmapWIN1256) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN1256) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN1256),
+       lengthof(ULmapWIN1256)},
        {PG_WIN1257, LUmapWIN1257, ULmapWIN1257,
-               sizeof(LUmapWIN1257) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN1257) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN1257),
+       lengthof(ULmapWIN1257)},
        {PG_WIN1258, LUmapWIN1258, ULmapWIN1258,
-               sizeof(LUmapWIN1258) / sizeof(pg_local_to_utf),
-       sizeof(ULmapWIN1258) / sizeof(pg_utf_to_local)},
+               lengthof(LUmapWIN1258),
+       lengthof(ULmapWIN1258)},
 };
 
 Datum
@@ -112,18 +112,23 @@ win_to_utf8(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
 
-       for (i = 0; i < sizeof(maps) / sizeof(pg_conv_map); i++)
+       for (i = 0; i < lengthof(maps); i++)
        {
                if (encoding == maps[i].encoding)
                {
-                       LocalToUtf(src, dest, maps[i].map1, NULL, maps[i].size1, 0, encoding, len);
+                       LocalToUtf(src, len, dest,
+                                          maps[i].map1, maps[i].size1,
+                                          NULL, 0,
+                                          NULL,
+                                          encoding);
                        PG_RETURN_VOID();
                }
        }
 
        ereport(ERROR,
                        (errcode(ERRCODE_INTERNAL_ERROR),
-         errmsg("unexpected encoding ID %d for WIN character sets", encoding)));
+                        errmsg("unexpected encoding ID %d for WIN character sets",
+                                       encoding)));
 
        PG_RETURN_VOID();
 }
@@ -139,18 +144,23 @@ utf8_to_win(PG_FUNCTION_ARGS)
 
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
 
-       for (i = 0; i < sizeof(maps) / sizeof(pg_conv_map); i++)
+       for (i = 0; i < lengthof(maps); i++)
        {
                if (encoding == maps[i].encoding)
                {
-                       UtfToLocal(src, dest, maps[i].map2, NULL, maps[i].size2, 0, encoding, len);
+                       UtfToLocal(src, len, dest,
+                                          maps[i].map2, maps[i].size2,
+                                          NULL, 0,
+                                          NULL,
+                                          encoding);
                        PG_RETURN_VOID();
                }
        }
 
        ereport(ERROR,
                        (errcode(ERRCODE_INTERNAL_ERROR),
-         errmsg("unexpected encoding ID %d for WIN character sets", encoding)));
+                        errmsg("unexpected encoding ID %d for WIN character sets",
+                                       encoding)));
 
        PG_RETURN_VOID();
 }
index f7222fc1779eb124d88569886235123360ae6c65..f8b0edc678e2124e8bb48eac3b240ea280669bf6 100644 (file)
@@ -366,9 +366,16 @@ typedef struct
 extern const pg_wchar_tbl pg_wchar_table[];
 
 /*
+ * Data structures for conversions between UTF-8 and other encodings
+ * (UtfToLocal() and LocalToUtf()).  In these data structures, characters of
+ * either encoding are represented by uint32 words; hence we can only support
+ * characters up to 4 bytes long.  For example, the byte sequence 0xC2 0x89
+ * would be represented by 0x0000C289, and 0xE8 0xA2 0xB4 by 0x00E8A2B4.
+ *
+ * Maps are arrays of these structs, which must be in order by the lookup key
+ * (so that bsearch() can be used).
+ *
  * UTF-8 to local code conversion map
- * Note that we limit the max length of UTF-8 to 4 bytes,
- * which is UCS-4 00010000-001FFFFF range.
  */
 typedef struct
 {
@@ -386,7 +393,7 @@ typedef struct
 } pg_local_to_utf;
 
 /*
- * UTF-8 to local code conversion map(combined characters)
+ * UTF-8 to local code conversion map (for combined characters)
  */
 typedef struct
 {
@@ -396,7 +403,7 @@ typedef struct
 } pg_utf_to_local_combined;
 
 /*
- * local code to UTF-8 conversion map(combined characters)
+ * local code to UTF-8 conversion map (for combined characters)
  */
 typedef struct
 {
@@ -405,6 +412,13 @@ typedef struct
        uint32          utf2;                   /* UTF-8 code 2 */
 } pg_local_to_utf_combined;
 
+/*
+ * callback function for algorithmic encoding conversions (in either direction)
+ *
+ * if function returns zero, it does not know how to convert the code
+ */
+typedef uint32 (*utf_local_conversion_func) (uint32 code);
+
 /*
  * Support macro for encoding conversion functions to validate their
  * arguments.  (This could be made more compact if we included fmgr.h
@@ -494,13 +508,18 @@ extern char *pg_server_to_any(const char *s, int len, int encoding);
 extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
 extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
 
-extern void LocalToUtf(const unsigned char *iso, unsigned char *utf,
-                  const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
-                  int size1, int size2, int encoding, int len);
-
-extern void UtfToLocal(const unsigned char *utf, unsigned char *iso,
-                  const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
-                  int size1, int size2, int encoding, int len);
+extern void UtfToLocal(const unsigned char *utf, int len,
+                  unsigned char *iso,
+                  const pg_utf_to_local *map, int mapsize,
+                  const pg_utf_to_local_combined *cmap, int cmapsize,
+                  utf_local_conversion_func conv_func,
+                  int encoding);
+extern void LocalToUtf(const unsigned char *iso, int len,
+                  unsigned char *utf,
+                  const pg_local_to_utf *map, int mapsize,
+                  const pg_local_to_utf_combined *cmap, int cmapsize,
+                  utf_local_conversion_func conv_func,
+                  int encoding);
 
 extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
 extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,