From: Moriyoshi Koizumi Date: Tue, 24 Feb 2009 15:09:43 +0000 (+0000) Subject: - Revert the patch then. X-Git-Tag: php-5.2.9~2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7db52b84b78644f2182b6938ed2dc001c868d581;p=php - Revert the patch then. --- diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c index 56d6dd4c97..4b0e9b9e0f 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c @@ -171,9 +171,7 @@ int mbfl_filt_conv_utf32_wchar(int c, mbfl_convert_filter *filter) CK((*filter->output_function)(0xfeff, filter->data)); } else { filter->status &= ~0xff; - if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { - CK((*filter->output_function)(n, filter->data)); - } + CK((*filter->output_function)(n, filter->data)); } break; } @@ -203,9 +201,7 @@ int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter) } else { filter->status = 0; n = (c & 0xff) | filter->cache; - if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { - CK((*filter->output_function)(n, filter->data)); - } + CK((*filter->output_function)(n, filter->data)); } return c; } @@ -215,7 +211,7 @@ int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter) */ int mbfl_filt_conv_wchar_utf32be(int c, mbfl_convert_filter *filter) { - if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) { + if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) { CK((*filter->output_function)((c >> 24) & 0xff, filter->data)); CK((*filter->output_function)((c >> 16) & 0xff, filter->data)); CK((*filter->output_function)((c >> 8) & 0xff, filter->data)); @@ -251,9 +247,7 @@ int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter) } else { filter->status = 0; n = ((c & 0xff) << 24) | filter->cache; - if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { - CK((*filter->output_function)(n, filter->data)); - } + CK((*filter->output_function)(n, filter->data)); } return c; } @@ -263,7 +257,7 @@ int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter) */ int mbfl_filt_conv_wchar_utf32le(int c, mbfl_convert_filter *filter) { - if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) { + if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) { CK((*filter->output_function)(c & 0xff, filter->data)); CK((*filter->output_function)((c >> 8) & 0xff, filter->data)); CK((*filter->output_function)((c >> 16) & 0xff, filter->data)); diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8.c b/ext/mbstring/libmbfl/filters/mbfilter_utf8.c index 20ff983e11..8b95897eac 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf8.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8.c @@ -106,8 +106,7 @@ int mbfl_filt_conv_utf8_wchar(int c, mbfl_convert_filter *filter) } filter->status = 0; } else if (c < 0xc0) { - int status = filter->status & 0xff; - switch (status) { + switch (filter->status & 0xff) { case 0x10: /* 2byte code 2nd char */ case 0x21: /* 3byte code 3rd char */ case 0x32: /* 4byte code 4th char */ @@ -115,11 +114,7 @@ int mbfl_filt_conv_utf8_wchar(int c, mbfl_convert_filter *filter) case 0x54: /* 6byte code 6th char */ filter->status = 0; s = filter->cache | (c & 0x3f); - if ((status == 0x10 && s >= 0x80) || - (status == 0x21 && s >= 0x800 && (s < 0xd800 || s > 0xdfff)) || - (status == 0x32 && s >= 0x10000) || - (status == 0x43 && s >= 0x200000) || - (status == 0x54 && s >= 0x4000000 && s < MBFL_WCSGROUP_UCS4MAX)) { + if (s >= 0x80) { CK((*filter->output_function)(s, filter->data)); } break; diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h index cf4eaff1db..f500766b49 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h @@ -47,7 +47,6 @@ /* wchar plane, special charactor */ #define MBFL_WCSPLANE_MASK 0xffff #define MBFL_WCSPLANE_UCS2MAX 0x00010000 -#define MBFL_WCSPLANE_UTF32MAX 0x00110000 #define MBFL_WCSPLANE_SUPMIN 0x00010000 #define MBFL_WCSPLANE_SUPMAX 0x00200000 #define MBFL_WCSPLANE_JIS0208 0x70e10000 /* JIS HEX : 2121h - 7E7Eh */ diff --git a/ext/mbstring/tests/illformed_utf_sequences.phpt b/ext/mbstring/tests/illformed_utf_sequences.phpt deleted file mode 100644 index a462cd0745..0000000000 --- a/ext/mbstring/tests/illformed_utf_sequences.phpt +++ /dev/null @@ -1,148 +0,0 @@ ---TEST-- -Unicode standard conformance test (ill-formed UTF sequences.) ---SKIPIF-- - ---FILE-- -> 12), 0x80 | ($i >> 6) & 0x3f, 0x80 | $i & 0x3f), "UCS-4BE", "UTF-8"); -} -var_dump(bin2hex($out)); - -echo "UTF-32 code range\n"; -var_dump(bin2hex(mb_convert_encoding("\x00\x11\x00\x00", "UCS-4BE", "UTF-32BE"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x10\xff\xff", "UCS-4BE", "UTF-32BE"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x00\x11\x00", "UCS-4BE", "UTF-32LE"))); -var_dump(bin2hex(mb_convert_encoding("\xff\xff\x10\x00", "UCS-4BE", "UTF-32LE"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x11\x00\x00", "UCS-4BE", "UTF-32"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x10\xff\xff", "UCS-4BE", "UTF-32"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x00\xfe\xff\x00\x11\x00\x00", "UCS-4BE", "UTF-32"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x00\xfe\xff\x00\x10\xff\xff", "UCS-4BE", "UTF-32"))); -var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x00\x00\x00\x00\x11\x00", "UCS-4BE", "UTF-32"))); -var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x00\x00\xff\xff\x10\x00", "UCS-4BE", "UTF-32"))); - -echo "UTF-32 and surrogates area\n"; -$out = ''; -for ($i = 0xd7ff; $i <= 0xe000; ++$i) { - $out .= mb_convert_encoding(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32BE"); -} -var_dump(bin2hex($out)); - -$out = ''; -for ($i = 0xd7ff; $i <= 0xe000; ++$i) { - $out .= mb_convert_encoding(pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), "UCS-4BE", "UTF-32LE"); -} -var_dump(bin2hex($out)); - -$out = ''; -for ($i = 0xd7ff; $i <= 0xe000; ++$i) { - $out .= mb_convert_encoding(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32"); -} -var_dump(bin2hex($out)); - -$out = ''; -for ($i = 0xd7ff; $i <= 0xe000; ++$i) { - $out .= mb_convert_encoding("\x00\x00\xfe\xff". pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32"); -} -var_dump(bin2hex(str_replace("\x00\x00\xfe\xff", "", $out))); - - -$out = ''; -for ($i = 0xd7ff; $i <= 0xe000; ++$i) { - $out .= mb_convert_encoding("\xff\xfe\x00\x00". pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), "UCS-4BE", "UTF-32"); -} -var_dump(bin2hex(str_replace("\x00\x00\xfe\xff", "", $out))); -?> ---EXPECT-- -UTF-8 redundancy -string(24) "000000310000003200000033" -string(24) "000000410000004200000043" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(24) "000000a2000000a3000000a5" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(8) "00000080" -string(8) "000007ff" -string(0) "" -string(8) "00000800" -string(8) "0000ffff" -string(0) "" -string(8) "00010000" -string(8) "001fffff" -string(0) "" -string(8) "00200000" -string(8) "03ffffff" -string(0) "" -string(8) "04000000" -string(8) "6fffffff" -string(0) "" -UTF-8 and surrogates area -string(16) "0000d7ff0000e000" -UTF-32 code range -string(0) "" -string(8) "0010ffff" -string(0) "" -string(8) "0010ffff" -string(0) "" -string(8) "0010ffff" -string(8) "0000feff" -string(16) "0000feff0010ffff" -string(8) "0000feff" -string(16) "0000feff0010ffff" -UTF-32 and surrogates area -string(16) "0000d7ff0000e000" -string(16) "0000d7ff0000e000" -string(16) "0000d7ff0000e000" -string(16) "0000d7ff0000e000" -string(16) "0000d7ff0000e000"