From: Rui Hirokawa Date: Sun, 11 Sep 2011 12:12:24 +0000 (+0000) Subject: fixed test case failures. X-Git-Tag: php-5.5.0alpha1~1192 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=01bb437b8568599bfb28aec198b6e16f00411fc1;p=php fixed test case failures. --- diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c index 56d6dd4c97..3cef6b051a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c @@ -173,6 +173,9 @@ int mbfl_filt_conv_utf32_wchar(int c, mbfl_convert_filter *filter) filter->status &= ~0xff; if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { CK((*filter->output_function)(n, filter->data)); + } else { + n = (n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(n, filter->data)); } } break; @@ -205,6 +208,9 @@ int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter) n = (c & 0xff) | filter->cache; if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { CK((*filter->output_function)(n, filter->data)); + } else { + n = (n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(n, filter->data)); } } return c; @@ -253,7 +259,10 @@ int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter) n = ((c & 0xff) << 24) | filter->cache; if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { CK((*filter->output_function)(n, filter->data)); - } + } else { + n = (n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(n, filter->data)); + } } return c; } diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 75eb48f0a7..af68c2b104 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2025,7 +2025,7 @@ PHP_FUNCTION(mb_preferred_mime_name) #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0) #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0) -/* {{{ proto bool mb_parse_str(string encoded_string , array result) +/* {{{ proto bool mb_parse_str(string encoded_string [, array result]) Parses GET/POST/COOKIE data and sets global variables */ PHP_FUNCTION(mb_parse_str) { @@ -2036,12 +2036,12 @@ PHP_FUNCTION(mb_parse_str) const mbfl_encoding *detected; track_vars_array = NULL; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sz", &encstr, &encstr_len, &track_vars_array) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) { return; } - /* Clear out the array */ if (track_vars_array != NULL) { + /* Clear out the array */ zval_dtor(track_vars_array); array_init(track_vars_array); } @@ -2057,7 +2057,16 @@ PHP_FUNCTION(mb_parse_str) info.num_from_encodings = MBSTRG(http_input_list_size); info.from_language = MBSTRG(language); - detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC); + if (track_vars_array != NULL) { + detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC); + } else { + zval tmp; + if (!EG(active_symbol_table)) { + zend_rebuild_symbol_table(TSRMLS_C); + } + Z_ARRVAL(tmp) = EG(active_symbol_table); + detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC); + } MBSTRG(http_input_identify) = detected; diff --git a/ext/mbstring/tests/bug20087.phpt b/ext/mbstring/tests/bug20087.phpt index bcd190185b..01bcc90388 100644 --- a/ext/mbstring/tests/bug20087.phpt +++ b/ext/mbstring/tests/bug20087.phpt @@ -2,7 +2,7 @@ Bug #20087 (Assertion failure) --SKIPIF-- ---XFAIL-- +--FAIL-- register_globals calls killed the ability for mb_parse_str() to register into the global scope --FILE-- --EXPECT-- -8101 +8085 63 diff --git a/ext/mbstring/tests/bug49536.phpt b/ext/mbstring/tests/bug49536.phpt index 32685d2267..40328f79f0 100644 --- a/ext/mbstring/tests/bug49536.phpt +++ b/ext/mbstring/tests/bug49536.phpt @@ -16,5 +16,5 @@ var_dump(mb_detect_encoding("\xc0\x00", "UTF-8", true)); --EXPECT-- string(4) "SJIS" bool(false) -string(5) "UTF-8" +bool(false) bool(false) diff --git a/ext/mbstring/tests/illformed_utf_sequences.phpt b/ext/mbstring/tests/illformed_utf_sequences.phpt index a462cd0745..b5b9d94db8 100644 --- a/ext/mbstring/tests/illformed_utf_sequences.phpt +++ b/ext/mbstring/tests/illformed_utf_sequences.phpt @@ -4,145 +4,214 @@ Unicode standard conformance test (ill-formed UTF sequences.) --FILE-- > 12), 0x80 | ($i >> 6) & 0x3f, 0x80 | $i & 0x3f), "UCS-4BE", "UTF-8"); + $s = chk_enc(pack('C3', 0xe0 | ($i >> 12), 0x80 | ($i >> 6) & 0x3f, 0x80 | $i & 0x3f), 2); + if ($s === false) { + $cnt++; + } else { + $out .= $s; + } } -var_dump(bin2hex($out)); +var_dump($cnt); +var_dump($out); echo "UTF-32 code range\n"; -var_dump(bin2hex(mb_convert_encoding("\x00\x11\x00\x00", "UCS-4BE", "UTF-32BE"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x10\xff\xff", "UCS-4BE", "UTF-32BE"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x00\x11\x00", "UCS-4BE", "UTF-32LE"))); -var_dump(bin2hex(mb_convert_encoding("\xff\xff\x10\x00", "UCS-4BE", "UTF-32LE"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x11\x00\x00", "UCS-4BE", "UTF-32"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x10\xff\xff", "UCS-4BE", "UTF-32"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x00\xfe\xff\x00\x11\x00\x00", "UCS-4BE", "UTF-32"))); -var_dump(bin2hex(mb_convert_encoding("\x00\x00\xfe\xff\x00\x10\xff\xff", "UCS-4BE", "UTF-32"))); -var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x00\x00\x00\x00\x11\x00", "UCS-4BE", "UTF-32"))); -var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x00\x00\xff\xff\x10\x00", "UCS-4BE", "UTF-32"))); +var_dump(chk_enc("\x00\x11\x00\x00", 1, "UTF-32BE")); +var_dump(chk_enc("\x00\x10\xff\xff", 0, "UTF-32BE")); +var_dump(chk_enc("\x00\x00\x11\x00", 1, "UTF-32LE")); +var_dump(chk_enc("\xff\xff\x10\x00", 0, "UTF-32LE")); +var_dump(chk_enc("\x00\x11\x00\x00", 1, "UTF-32")); +var_dump(chk_enc("\x00\x10\xff\xff", 0, "UTF-32")); +var_dump(chk_enc("\x00\x00\xfe\xff\x00\x11\x00\x00", 0, "UTF-32")); +var_dump(chk_enc("\x00\x00\xfe\xff\x00\x10\xff\xff", 0, "UTF-32")); +var_dump(chk_enc("\xff\xfe\x00\x00\x00\x00\x11\x00", 0, "UTF-32")); +var_dump(chk_enc("\xff\xfe\x00\x00\xff\xff\x10\x00", 0, "UTF-32")); echo "UTF-32 and surrogates area\n"; $out = ''; +$cnt = 0; for ($i = 0xd7ff; $i <= 0xe000; ++$i) { - $out .= mb_convert_encoding(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32BE"); + $s = chk_enc(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), 1, "UTF-32BE"); + if ($s === false) { + $cnt++; + } else { + $out .= $s; + } } -var_dump(bin2hex($out)); +var_dump($cnt); +var_dump($out); $out = ''; +$cnt = 0; for ($i = 0xd7ff; $i <= 0xe000; ++$i) { - $out .= mb_convert_encoding(pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), "UCS-4BE", "UTF-32LE"); + $s = chk_enc(pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), 1, "UTF-32LE"); + if ($s === false) { + $cnt++; + } else { + $out .= $s; + } } -var_dump(bin2hex($out)); +var_dump($cnt); +var_dump($out); $out = ''; +$cnt = 0; for ($i = 0xd7ff; $i <= 0xe000; ++$i) { - $out .= mb_convert_encoding(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32"); + $s = chk_enc(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), 1, "UTF-32"); + if ($s === false) { + $cnt++; + } else { + $out .= $s; + } } -var_dump(bin2hex($out)); +var_dump($cnt); +var_dump($out); + +echo "UTF-32 and surrogates area with BOM\n"; $out = ''; +$cnt = 0; for ($i = 0xd7ff; $i <= 0xe000; ++$i) { - $out .= mb_convert_encoding("\x00\x00\xfe\xff". pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32"); + $s = chk_enc("\x00\x00\xfe\xff". pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), + 1, "UTF-32", true); + if ($s === false) { + $cnt++; + } else { + $out .= $s; + } } -var_dump(bin2hex(str_replace("\x00\x00\xfe\xff", "", $out))); - +var_dump($cnt); +var_dump(str_replace("0000feff","",$out)); $out = ''; +$cnt = 0; for ($i = 0xd7ff; $i <= 0xe000; ++$i) { - $out .= mb_convert_encoding("\xff\xfe\x00\x00". pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), "UCS-4BE", "UTF-32"); + $s = chk_enc("\xff\xfe\x00\x00". pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), + 1, "UTF-32", true); + if ($s === false) { + $cnt++; + } else { + $out .= $s; + } } -var_dump(bin2hex(str_replace("\x00\x00\xfe\xff", "", $out))); +var_dump($cnt); +var_dump(str_replace("0000feff","",$out)); + ?> --EXPECT-- UTF-8 redundancy string(24) "000000310000003200000033" string(24) "000000410000004200000043" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) string(24) "000000a2000000a3000000a5" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) string(8) "00000080" string(8) "000007ff" -string(0) "" +bool(false) string(8) "00000800" string(8) "0000ffff" -string(0) "" +bool(false) string(8) "00010000" -string(8) "001fffff" -string(0) "" -string(8) "00200000" -string(8) "03ffffff" -string(0) "" -string(8) "04000000" -string(8) "6fffffff" -string(0) "" +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) UTF-8 and surrogates area +int(2048) string(16) "0000d7ff0000e000" UTF-32 code range -string(0) "" +bool(false) string(8) "0010ffff" -string(0) "" +bool(false) string(8) "0010ffff" -string(0) "" +bool(false) string(8) "0010ffff" -string(8) "0000feff" +string(16) "0000feff0000fffd" string(16) "0000feff0010ffff" -string(8) "0000feff" +string(16) "0000feff0000fffd" string(16) "0000feff0010ffff" UTF-32 and surrogates area +int(2048) string(16) "0000d7ff0000e000" +int(2048) string(16) "0000d7ff0000e000" +int(2048) string(16) "0000d7ff0000e000" +UTF-32 and surrogates area with BOM +int(2048) string(16) "0000d7ff0000e000" +int(2048) string(16) "0000d7ff0000e000" diff --git a/ext/mbstring/tests/mb_parse_str.phpt b/ext/mbstring/tests/mb_parse_str.phpt index f11b943c26..59d187bd05 100644 --- a/ext/mbstring/tests/mb_parse_str.phpt +++ b/ext/mbstring/tests/mb_parse_str.phpt @@ -2,7 +2,7 @@ mb_parse_str() --SKIPIF-- ---XFAIL-- +--FAIL-- register_globals calls killed the ability for mb_parse_str() to register into the global scope --INI-- arg_separator.input=& diff --git a/ext/mbstring/tests/mb_parse_str02.phpt b/ext/mbstring/tests/mb_parse_str02.phpt index d9b5eb20d9..f35699928e 100644 --- a/ext/mbstring/tests/mb_parse_str02.phpt +++ b/ext/mbstring/tests/mb_parse_str02.phpt @@ -2,7 +2,7 @@ mb_parse_str() test 2 --SKIPIF-- ---XFAIL-- +--FAIL-- register_globals calls killed the ability for mb_parse_str() to register into the global scope --INI-- arg_separator.input=&#