From: Nikita Popov Date: Wed, 19 Jul 2017 21:50:14 +0000 (+0200) Subject: Directly accept encoding in php_unicode_convert_case() X-Git-Tag: php-7.3.0alpha1~1878 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9c73be898d4e5aa2e64b21da14797ec9ad202134;p=php Directly accept encoding in php_unicode_convert_case() As a side-effect mb_strtolower() and mb_strtoupper() now correctly handle a NULL encoding parameter by using the internal encoding. This is what caused the two test changes. --- diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 9c64092c2e..1ce966610c 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -3514,12 +3514,13 @@ PHP_FUNCTION(mb_convert_encoding) Returns a case-folded version of sourcestring */ PHP_FUNCTION(mb_convert_case) { - const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + const char *from_encoding = NULL; char *str; size_t str_len, from_encoding_len; zend_long case_mode = 0; char *newstr; size_t ret_len; + const mbfl_encoding *enc; RETVAL_FALSE; if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len, @@ -3527,7 +3528,12 @@ PHP_FUNCTION(mb_convert_case) return; } - newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding); + enc = php_mb_get_encoding(from_encoding); + if (!enc) { + return; + } + + newstr = php_unicode_convert_case(case_mode, str, str_len, &ret_len, enc); if (newstr) { // TODO: avoid reallocation ??? @@ -3542,17 +3548,24 @@ PHP_FUNCTION(mb_convert_case) */ PHP_FUNCTION(mb_strtoupper) { - const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + const char *from_encoding = NULL; char *str; size_t str_len, from_encoding_len; char *newstr; size_t ret_len; + const mbfl_encoding *enc; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len, &from_encoding, &from_encoding_len) == FAILURE) { return; } - newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding); + + enc = php_mb_get_encoding(from_encoding); + if (!enc) { + RETURN_FALSE; + } + + newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc); if (newstr) { // TODO: avoid reallocation ??? @@ -3569,17 +3582,24 @@ PHP_FUNCTION(mb_strtoupper) */ PHP_FUNCTION(mb_strtolower) { - const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + const char *from_encoding = NULL; char *str; size_t str_len, from_encoding_len; char *newstr; size_t ret_len; + const mbfl_encoding *enc; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len, &from_encoding, &from_encoding_len) == FAILURE) { return; } - newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding); + + enc = php_mb_get_encoding(from_encoding); + if (!enc) { + RETURN_FALSE; + } + + newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc); if (newstr) { // TODO: avoid reallocation ??? @@ -5566,20 +5586,25 @@ MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nby */ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding) { - int n; + int n = -1; mbfl_string haystack, needle; - n = -1; + const mbfl_encoding *enc; + + enc = php_mb_get_encoding(from_encoding); + if (!enc) { + return -1; + } mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; + haystack.no_encoding = enc->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; + needle.no_encoding = enc->no_encoding; do { size_t len = 0; - haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding); + haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, enc); haystack.len = len; if (!haystack.val) { @@ -5590,7 +5615,7 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int break; } - needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding); + needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, enc); needle.len = len; if (!needle.val) { @@ -5601,12 +5626,6 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int break; } - haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding); - if (haystack.no_encoding == mbfl_no_encoding_invalid) { - php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding); - break; - } - { int haystack_char_len = mbfl_strlen(&haystack); diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c index 490a985f90..5d6ccbfdd5 100644 --- a/ext/mbstring/php_unicode.c +++ b/ext/mbstring/php_unicode.c @@ -274,22 +274,15 @@ MBSTRING_API unsigned long php_unicode_totitle(unsigned long code, enum mbfl_no_ ((unsigned char*)(ptr))[3] = (v ) & 0xff;\ } -MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, size_t *ret_len, - const char *src_encoding_name) +MBSTRING_API char *php_unicode_convert_case( + int case_mode, const char *srcstr, size_t srclen, size_t *ret_len, + const mbfl_encoding *src_encoding) { char *unicode, *newstr; size_t unicode_len; unsigned char *unicode_ptr; size_t i; - enum mbfl_no_encoding src_no_encoding; - - const mbfl_encoding *src_encoding = mbfl_name2encoding(src_encoding_name); - if (!src_encoding) { - php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", src_encoding_name); - return NULL; - } - - src_no_encoding = src_encoding->no_encoding; + enum mbfl_no_encoding src_no_encoding = src_encoding->no_encoding; unicode = php_mb_convert_encoding_ex(srcstr, srclen, &mbfl_encoding_ucs4be, src_encoding, &unicode_len); if (unicode == NULL) diff --git a/ext/mbstring/php_unicode.h b/ext/mbstring/php_unicode.h index ac6dd6e57a..3a6c75ce86 100644 --- a/ext/mbstring/php_unicode.h +++ b/ext/mbstring/php_unicode.h @@ -94,8 +94,10 @@ MBSTRING_API int php_unicode_is_prop(unsigned long code, ...); MBSTRING_API int php_unicode_is_prop1(unsigned long code, int prop); -MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, size_t *retlen, - const char *src_encoding); + +MBSTRING_API char *php_unicode_convert_case( + int case_mode, const char *srcstr, size_t srclen, size_t *retlen, + const mbfl_encoding *src_encoding); #define PHP_UNICODE_CASE_UPPER 0 #define PHP_UNICODE_CASE_LOWER 1 diff --git a/ext/mbstring/tests/mb_strtolower_variation2.phpt b/ext/mbstring/tests/mb_strtolower_variation2.phpt index 43d0f53728..ef69eb00af 100644 --- a/ext/mbstring/tests/mb_strtolower_variation2.phpt +++ b/ext/mbstring/tests/mb_strtolower_variation2.phpt @@ -157,14 +157,10 @@ Warning: mb_strtolower(): Unknown encoding "0.5" in %s on line %d bool(false) -- Iteration 10 -- - -Warning: mb_strtolower(): Unknown encoding "(null)" in %s on line %d -bool(false) +string(24) "68656c6c6f2c20776f726c64" -- Iteration 11 -- - -Warning: mb_strtolower(): Unknown encoding "(null)" in %s on line %d -bool(false) +string(24) "68656c6c6f2c20776f726c64" -- Iteration 12 -- @@ -209,17 +205,13 @@ string(24) "68656c6c6f2c20776f726c64" string(24) "68656c6c6f2c20776f726c64" -- Iteration 22 -- - -Warning: mb_strtolower(): Unknown encoding "(null)" in %s on line %d -bool(false) +string(24) "68656c6c6f2c20776f726c64" -- Iteration 23 -- - -Warning: mb_strtolower(): Unknown encoding "(null)" in %s on line %d -bool(false) +string(24) "68656c6c6f2c20776f726c64" -- Iteration 24 -- Warning: mb_strtolower() expects parameter 2 to be string, resource given in %s on line %d NULL -Done \ No newline at end of file +Done diff --git a/ext/mbstring/tests/mb_strtoupper_variation2.phpt b/ext/mbstring/tests/mb_strtoupper_variation2.phpt index 52beb3d741..25b9a53e91 100644 --- a/ext/mbstring/tests/mb_strtoupper_variation2.phpt +++ b/ext/mbstring/tests/mb_strtoupper_variation2.phpt @@ -158,14 +158,10 @@ Warning: mb_strtoupper(): Unknown encoding "0.5" in %s on line %d bool(false) -- Iteration 10 -- - -Warning: mb_strtoupper(): Unknown encoding "(null)" in %s on line %d -bool(false) +string(24) "48454c4c4f2c20574f524c44" -- Iteration 11 -- - -Warning: mb_strtoupper(): Unknown encoding "(null)" in %s on line %d -bool(false) +string(24) "48454c4c4f2c20574f524c44" -- Iteration 12 -- @@ -210,17 +206,13 @@ string(24) "48454c4c4f2c20574f524c44" string(24) "48454c4c4f2c20574f524c44" -- Iteration 22 -- - -Warning: mb_strtoupper(): Unknown encoding "(null)" in %s on line %d -bool(false) +string(24) "48454c4c4f2c20574f524c44" -- Iteration 23 -- - -Warning: mb_strtoupper(): Unknown encoding "(null)" in %s on line %d -bool(false) +string(24) "48454c4c4f2c20574f524c44" -- Iteration 24 -- Warning: mb_strtoupper() expects parameter 2 to be string, resource given in %s on line %d NULL -Done \ No newline at end of file +Done