From: Moriyoshi Koizumi Date: Sun, 15 Feb 2009 07:04:07 +0000 (+0000) Subject: - Refix bug #43840. X-Git-Tag: php-5.4.0alpha1~191^2~4274 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5a870990fb879f7b91310a28d358c4ad6fe75453;p=php - Refix bug #43840. - Fix bug #43841. - Remove redundant trailing dots from the error messages. - Fix tests. --- diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 646912ad16..1aeb38cc9b 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -778,7 +778,7 @@ retry: for (;;) { pc->found_pos++; p = h; - m = pc->needle.buffer; + m = (int *)pc->needle.buffer; n = pc->needle_pos - 1; while (n > 0 && *p == *m) { n--; @@ -857,87 +857,203 @@ mbfl_strpos( int offset, int reverse) { - int n, result, negative_offset = 0; - unsigned char *p; - mbfl_convert_filter *filter; - struct collector_strpos_data pc; + int result; + mbfl_string _haystack_u8, _needle_u8; + const mbfl_string *haystack_u8, *needle_u8; + const unsigned char *u8_tbl; - if (haystack == NULL || needle == NULL) { + if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) { return -8; } - /* needle is converted into wchar */ - mbfl_wchar_device_init(&pc.needle); - filter = mbfl_convert_filter_new( - needle->no_encoding, - mbfl_no_encoding_wchar, - mbfl_wchar_device_output, 0, &pc.needle); - if (filter == NULL) { - return -4; - } - p = needle->val; - n = needle->len; - if (p != NULL) { - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - break; - } - n--; + + { + const mbfl_encoding *u8_enc; + u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8); + if (u8_enc == NULL || u8_enc->mblen_table == NULL) { + return -8; } + u8_tbl = u8_enc->mblen_table; } - mbfl_convert_filter_flush(filter); - mbfl_convert_filter_delete(filter); - pc.needle_len = pc.needle.pos; - if (pc.needle.buffer == NULL) { - return -4; - } - if (pc.needle_len <= 0) { - mbfl_wchar_device_clear(&pc.needle); - return -2; - } - /* initialize filter and collector data */ - filter = mbfl_convert_filter_new( - haystack->no_encoding, - mbfl_no_encoding_wchar, - collector_strpos, 0, &pc); - if (filter == NULL) { - mbfl_wchar_device_clear(&pc.needle); - return -4; + + if (haystack->no_encoding != mbfl_no_encoding_utf8) { + mbfl_string_init(&_haystack_u8); + haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8); + if (haystack_u8 == NULL) { + result = -4; + goto out; + } + } else { + haystack_u8 = haystack; } - if (offset < 0) { - negative_offset = -offset - pc.needle_len; - if (negative_offset < 0) { - negative_offset = 0; + if (needle->no_encoding != mbfl_no_encoding_utf8) { + mbfl_string_init(&_needle_u8); + needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8); + if (needle_u8 == NULL) { + result = -4; + goto out; } - offset = 0; + } else { + needle_u8 = needle; } - pc.start = offset; - pc.output = 0; - pc.needle_pos = 0; - pc.found_pos = 0; - pc.matched_pos = -1; + if (needle_u8->len < 1) { + result = -8; + goto out; + } - /* feed data */ - p = haystack->val; - n = haystack->len - negative_offset; - if (p != NULL) { - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - pc.matched_pos = -4; - break; + result = -1; + if (haystack_u8->len < needle_u8->len) { + goto out; + } + + if (!reverse) { + unsigned int jtbl[1 << (sizeof(unsigned char) * 8)]; + unsigned int needle_u8_len = needle_u8->len; + unsigned int i; + const unsigned char *p, *q, *e; + const unsigned char *haystack_u8_val = haystack_u8->val, + *needle_u8_val = needle_u8->val; + for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) { + jtbl[i] = needle_u8_len + 1; + } + for (i = 0; i < needle_u8_len - 1; ++i) { + jtbl[needle_u8_val[i]] = needle_u8_len - i; + } + e = haystack_u8_val + haystack_u8->len; + p = haystack_u8_val; + while (--offset >= 0) { + if (p >= e) { + result = -16; + goto out; } - if (pc.matched_pos >= 0 && !reverse) { - break; + p += u8_tbl[*p]; + } + p += needle_u8_len; + if (p > e) { + goto out; + } + while (p <= e) { + const unsigned char *pv = p; + q = needle_u8_val + needle_u8_len; + for (;;) { + if (q == needle_u8_val) { + result = 0; + while (p > haystack_u8_val) { + unsigned char c = *--p; + if (c < 0x80) { + ++result; + } else if ((c & 0xc0) != 0x80) { + ++result; + } + } + goto out; + } + if (*--q != *--p) { + break; + } + } + p += jtbl[*p]; + if (p <= pv) { + p = pv + 1; + } + } + } else { + unsigned int jtbl[1 << (sizeof(unsigned char) * 8)]; + unsigned int needle_u8_len = needle_u8->len, needle_len = 0; + unsigned int i; + const unsigned char *p, *e, *q, *qe; + const unsigned char *haystack_u8_val = haystack_u8->val, + *needle_u8_val = needle_u8->val; + for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) { + jtbl[i] = needle_u8_len; + } + for (i = needle_u8_len - 1; i > 0; --i) { + unsigned char c = needle_u8_val[i]; + jtbl[c] = i; + if (c < 0x80) { + ++needle_len; + } else if ((c & 0xc0) != 0x80) { + ++needle_len; + } + } + { + unsigned char c = needle_u8_val[0]; + if (c < 0x80) { + ++needle_len; + } else if ((c & 0xc0) != 0x80) { + ++needle_len; + } + } + e = haystack_u8_val; + p = e + haystack_u8->len; + qe = needle_u8_val + needle_u8_len; + if (offset < 0) { + if (-offset > needle_len) { + offset += needle_len; + while (offset < 0) { + unsigned char c; + if (p <= e) { + result = -16; + goto out; + } + c = *(--p); + if (c < 0x80) { + ++offset; + } else if ((c & 0xc0) != 0x80) { + ++offset; + } + } + } + } else { + const unsigned char *ee = haystack_u8_val + haystack_u8->len; + while (--offset >= 0) { + if (e >= ee) { + result = -16; + goto out; + } + e += u8_tbl[*e]; + } + } + if (p < e + needle_u8_len) { + goto out; + } + p -= needle_u8_len; + while (p >= e) { + const unsigned char *pv = p; + q = needle_u8_val; + for (;;) { + if (q == qe) { + result = 0; + p -= needle_u8_len; + while (p > haystack_u8_val) { + unsigned char c = *--p; + if (c < 0x80) { + ++result; + } else if ((c & 0xc0) != 0x80) { + ++result; + } + } + goto out; + } + if (*q != *p) { + break; + } + ++p, ++q; + } + p -= jtbl[*p]; + if (p >= pv) { + p = pv - 1; } - n--; } } - mbfl_convert_filter_flush(filter); - result = pc.matched_pos; - mbfl_convert_filter_delete(filter); - mbfl_wchar_device_clear(&pc.needle); - +out: + if (haystack_u8 == &_haystack_u8) { + mbfl_string_clear(&_haystack_u8); + } + if (needle_u8 == &_needle_u8) { + mbfl_string_clear(&_needle_u8); + } return result; } diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 7b5108545e..6dadbb042f 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2080,7 +2080,7 @@ PHP_FUNCTION(mb_strpos) RETURN_FALSE; } if (needle.len == 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter."); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); RETURN_FALSE; } @@ -2223,7 +2223,7 @@ PHP_FUNCTION(mb_stripos) RETURN_FALSE; } if (needle.len == 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter."); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); RETURN_FALSE; } n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC); @@ -2292,7 +2292,7 @@ PHP_FUNCTION(mb_strstr) } if (needle.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty delimiter."); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); RETURN_FALSE; } n = mbfl_strpos(&haystack, &needle, 0, 0); @@ -2402,7 +2402,7 @@ PHP_FUNCTION(mb_stristr) } if (!needle.len) { - php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty delimiter."); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); RETURN_FALSE; } @@ -2521,7 +2521,7 @@ PHP_FUNCTION(mb_substr_count) } if (needle.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty substring."); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring"); RETURN_FALSE; } diff --git a/ext/mbstring/tests/bug43840.phpt b/ext/mbstring/tests/bug43840.phpt index 8119bae325..004807eb1d 100644 --- a/ext/mbstring/tests/bug43840.phpt +++ b/ext/mbstring/tests/bug43840.phpt @@ -46,27 +46,31 @@ bool(false) -- Offset is 22 -- --Multibyte String:-- + +Warning: mb_strpos(): Offset not contained in string in %s on line %d bool(false) --ASCII String:-- -Warning: mb_strpos(): Offset not contained in string. in %s on line %d +Warning: mb_strpos(): Offset not contained in string in %s on line %d bool(false) -- Offset is 53 -- --Multibyte String:-- + +Warning: mb_strpos(): Offset not contained in string in %s on line %d bool(false) --ASCII String:-- -Warning: mb_strpos(): Offset not contained in string. in %s on line %d +Warning: mb_strpos(): Offset not contained in string in %s on line %d bool(false) -- Offset is 54 -- --Multibyte String:-- -Warning: mb_strpos(): Offset not contained in string. in %s on line %d +Warning: mb_strpos(): Offset not contained in string in %s on line %d bool(false) --ASCII String:-- -Warning: mb_strpos(): Offset not contained in string. in %s on line %d +Warning: mb_strpos(): Offset not contained in string in %s on line %d bool(false) diff --git a/ext/mbstring/tests/bug43841.phpt b/ext/mbstring/tests/bug43841.phpt index 340916902e..ff47a02b8a 100644 --- a/ext/mbstring/tests/bug43841.phpt +++ b/ext/mbstring/tests/bug43841.phpt @@ -1,6 +1,5 @@ --TEST-- Test mb_strrpos() function : mb_strrpos offset is byte count for negative values ---XFAIL-- --SKIPIF-- ---EXPECT-- +--EXPECTF-- *** Testing mb_strrpos() : usage variations *** **-- Offset is: -60 --** -- ASCII String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) --Multibyte String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) **-- Offset is: -50 --** -- ASCII String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) --Multibyte String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) **-- Offset is: -40 --** -- ASCII String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) --Multibyte String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) **-- Offset is: -30 --** -- ASCII String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) --Multibyte String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) **-- Offset is: -20 --** @@ -97,25 +113,41 @@ int(20) **-- Offset is: 30 --** -- ASCII String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) --Multibyte String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) **-- Offset is: 40 --** -- ASCII String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) --Multibyte String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) **-- Offset is: 50 --** -- ASCII String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) --Multibyte String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) **-- Offset is: 60 --** -- ASCII String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) --Multibyte String -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d bool(false) Done diff --git a/ext/mbstring/tests/mb_strstr_variation2.phpt b/ext/mbstring/tests/mb_strstr_variation2.phpt index fb84ff5ad4..e56d9fb8d5 100644 --- a/ext/mbstring/tests/mb_strstr_variation2.phpt +++ b/ext/mbstring/tests/mb_strstr_variation2.phpt @@ -165,38 +165,38 @@ Error: 2 - mb_strstr() expects parameter 2 to be binary string, array given, %s( bool(false) --uppercase NULL-- -Error: 2 - mb_strstr(): Empty delimiter., %s(%d) +Error: 2 - mb_strstr(): Empty delimiter, %s(%d) bool(false) --lowercase null-- -Error: 2 - mb_strstr(): Empty delimiter., %s(%d) +Error: 2 - mb_strstr(): Empty delimiter, %s(%d) bool(false) --lowercase true-- bool(false) --lowercase false-- -Error: 2 - mb_strstr(): Empty delimiter., %s(%d) +Error: 2 - mb_strstr(): Empty delimiter, %s(%d) bool(false) --uppercase TRUE-- bool(false) --uppercase FALSE-- -Error: 2 - mb_strstr(): Empty delimiter., %s(%d) +Error: 2 - mb_strstr(): Empty delimiter, %s(%d) bool(false) --empty string DQ-- -Error: 2 - mb_strstr(): Empty delimiter., %s(%d) +Error: 2 - mb_strstr(): Empty delimiter, %s(%d) bool(false) --empty string SQ-- -Error: 2 - mb_strstr(): Empty delimiter., %s(%d) +Error: 2 - mb_strstr(): Empty delimiter, %s(%d) bool(false) --instance of classWithToString-- Error: 4096 - Method classWithToString::__toString() must return a string value, %s(%d) -Error: 2 - mb_strstr(): Empty delimiter., %s(%d) +Error: 2 - mb_strstr(): Empty delimiter, %s(%d) bool(false) --instance of classWithoutToString-- @@ -204,11 +204,11 @@ Error: 2 - mb_strstr() expects parameter 2 to be binary string, object given, %s bool(false) --undefined var-- -Error: 2 - mb_strstr(): Empty delimiter., %s(%d) +Error: 2 - mb_strstr(): Empty delimiter, %s(%d) bool(false) --unset var-- -Error: 2 - mb_strstr(): Empty delimiter., %s(%d) +Error: 2 - mb_strstr(): Empty delimiter, %s(%d) bool(false) --resource-- diff --git a/ext/mbstring/tests/mb_substr_count_variation2.phpt b/ext/mbstring/tests/mb_substr_count_variation2.phpt index 89832dbf91..eea3912667 100644 --- a/ext/mbstring/tests/mb_substr_count_variation2.phpt +++ b/ext/mbstring/tests/mb_substr_count_variation2.phpt @@ -133,12 +133,12 @@ int(0) -- Iteration 10 -- -Warning: mb_substr_count(): Empty substring. in %s on line %d +Warning: mb_substr_count(): Empty substring in %s on line %d bool(false) -- Iteration 11 -- -Warning: mb_substr_count(): Empty substring. in %s on line %d +Warning: mb_substr_count(): Empty substring in %s on line %d bool(false) -- Iteration 12 -- @@ -146,7 +146,7 @@ int(0) -- Iteration 13 -- -Warning: mb_substr_count(): Empty substring. in %s on line %d +Warning: mb_substr_count(): Empty substring in %s on line %d bool(false) -- Iteration 14 -- @@ -154,17 +154,17 @@ int(0) -- Iteration 15 -- -Warning: mb_substr_count(): Empty substring. in %s on line %d +Warning: mb_substr_count(): Empty substring in %s on line %d bool(false) -- Iteration 16 -- -Warning: mb_substr_count(): Empty substring. in %s on line %d +Warning: mb_substr_count(): Empty substring in %s on line %d bool(false) -- Iteration 17 -- -Warning: mb_substr_count(): Empty substring. in %s on line %d +Warning: mb_substr_count(): Empty substring in %s on line %d bool(false) -- Iteration 18 -- @@ -181,16 +181,16 @@ int(1) -- Iteration 22 -- -Warning: mb_substr_count(): Empty substring. in %s on line %d +Warning: mb_substr_count(): Empty substring in %s on line %d bool(false) -- Iteration 23 -- -Warning: mb_substr_count(): Empty substring. in %s on line %d +Warning: mb_substr_count(): Empty substring in %s on line %d bool(false) -- Iteration 24 -- Warning: mb_substr_count() expects parameter 2 to be binary string, resource given in %s on line %d NULL -Done \ No newline at end of file +Done