From f73f190c3f97479f735b97c22a8e4885344edca1 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 16 Apr 2019 16:35:35 +0200 Subject: [PATCH] Fix internal_encoding fallback in mbstring By introducing a hook that is called whenever one of internal_encoding / input_encoding / output_encoding changes, so that mbstring can adjust it's internal state. This also makes internal_encoding work with zend multibyte. --- NEWS | 3 + Zend/tests/multibyte/bug68665.phpt | 2 +- .../multibyte/multibyte_encoding_001.phpt | 2 +- .../multibyte/multibyte_encoding_002.phpt | 2 +- .../multibyte/multibyte_encoding_003.phpt | Bin 471 -> 462 bytes .../multibyte/multibyte_encoding_004.phpt | 2 +- .../multibyte/multibyte_encoding_005.phpt | 2 +- ext/iconv/iconv.c | 36 ++-- ext/mbstring/mb_gpc.c | 5 - ext/mbstring/mbstring.c | 156 ++++++++---------- ext/mbstring/mbstring.h | 7 +- ext/mbstring/tests/ini_encoding2.phpt | 2 +- ext/mbstring/tests/internal_encoding.phpt | 66 ++++++++ .../tests/mb_internal_encoding_basic2.phpt | 2 +- main/main.c | 41 +++++ main/php.h | 5 + 16 files changed, 211 insertions(+), 122 deletions(-) create mode 100644 ext/mbstring/tests/internal_encoding.phpt diff --git a/NEWS b/NEWS index 63f7704f05..08340c8536 100644 --- a/NEWS +++ b/NEWS @@ -60,6 +60,9 @@ PHP NEWS - LDAP: . Deprecated ldap_control_paged_result_response and ldap_control_paged_result +- Mbstring: + . Fixed bug #77907 (mb-functions do not respect default_encoding). (Nikita) + - Opcache: . Implemented preloading RFC: https://wiki.php.net/rfc/preload. (Dmitry) diff --git a/Zend/tests/multibyte/bug68665.phpt b/Zend/tests/multibyte/bug68665.phpt index 74ff01da33..6846776570 100644 --- a/Zend/tests/multibyte/bug68665.phpt +++ b/Zend/tests/multibyte/bug68665.phpt @@ -11,7 +11,7 @@ if (!extension_loaded("mbstring")) { ?> --INI-- zend.multibyte=1 -mbstring.internal_encoding=big5 +internal_encoding=big5 --FILE-- --INI-- zend.multibyte=1 -mbstring.internal_encoding=SJIS +internal_encoding=SJIS --FILE-- --INI-- zend.multibyte=1 -mbstring.internal_encoding=iso-8859-1 +internal_encoding=iso-8859-1 --FILE-- last_used_encoding_name = NULL; mbstring_globals->last_used_encoding = NULL; + mbstring_globals->internal_encoding_set = 0; + mbstring_globals->http_output_set = 0; + mbstring_globals->http_input_set = 0; } /* }}} */ @@ -1603,6 +1582,11 @@ ZEND_TSRMLS_CACHE_UPDATE(); REGISTER_INI_ENTRIES(); + /* We assume that we're the only user of the hook. */ + ZEND_ASSERT(php_internal_encoding_changed == NULL); + php_internal_encoding_changed = mbstring_internal_encoding_changed_hook; + mbstring_internal_encoding_changed_hook(); + /* This is a global handler. Should not be set in a per-request handler. */ sapi_register_treat_data(mbstr_treat_data); @@ -1763,6 +1747,10 @@ PHP_RSHUTDOWN_FUNCTION(mbstring) MBSTRG(last_used_encoding_name) = NULL; } + MBSTRG(internal_encoding_set) = 0; + MBSTRG(http_output_set) = 0; + MBSTRG(http_input_set) = 0; + #if HAVE_MBREGEX PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif @@ -1846,6 +1834,7 @@ PHP_FUNCTION(mb_internal_encoding) RETURN_FALSE; } else { MBSTRG(current_internal_encoding) = encoding; + MBSTRG(internal_encoding_set) = 1; RETURN_TRUE; } } @@ -1969,6 +1958,7 @@ PHP_FUNCTION(mb_http_output) php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name); RETURN_FALSE; } else { + MBSTRG(http_output_set) = 1; MBSTRG(current_http_output_encoding) = encoding; RETURN_TRUE; } diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index cd882c1c03..5a713e5496 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -130,9 +130,6 @@ MBSTRING_API size_t php_mb_mbchar_bytes(const char *s); MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, zend_string *from_encoding); MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc); -/* internal use only */ -int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length); - ZEND_BEGIN_MODULE_GLOBALS(mbstring) char *internal_encoding_name; const mbfl_encoding *internal_encoding; @@ -169,6 +166,10 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring) #endif zend_string *last_used_encoding_name; const mbfl_encoding *last_used_encoding; + /* Whether an explicit internal_encoding / http_output / http_input encoding was set. */ + zend_bool internal_encoding_set; + zend_bool http_output_set; + zend_bool http_input_set; ZEND_END_MODULE_GLOBALS(mbstring) #define MB_OVERLOAD_MAIL 1 diff --git a/ext/mbstring/tests/ini_encoding2.phpt b/ext/mbstring/tests/ini_encoding2.phpt index f3728486ae..64cc23ec91 100644 --- a/ext/mbstring/tests/ini_encoding2.phpt +++ b/ext/mbstring/tests/ini_encoding2.phpt @@ -50,7 +50,7 @@ string(6) "EUC-JP" string(0) "" string(0) "" string(0) "" -string(5) "UTF-8" +string(6) "EUC-JP" string(0) "" string(0) "" Setting INI diff --git a/ext/mbstring/tests/internal_encoding.phpt b/ext/mbstring/tests/internal_encoding.phpt new file mode 100644 index 0000000000..3d097ac79c --- /dev/null +++ b/ext/mbstring/tests/internal_encoding.phpt @@ -0,0 +1,66 @@ +--TEST-- +Check that "internal_encoding" ini is picked up by mbstring +--INI-- +internal_encoding=iso-8859-1 +--FILE-- + +--EXPECTF-- +string(10) "ISO-8859-1" +int(2) + +Deprecated: ini_set(): Use of mbstring.internal_encoding is deprecated in %s on line %d +string(5) "UTF-8" +int(1) +string(5) "UTF-8" +int(1) + +Deprecated: ini_set(): Use of mbstring.internal_encoding is deprecated in %s on line %d +string(10) "ISO-8859-2" +int(2) +string(5) "UTF-8" +int(1) +string(5) "UTF-8" +int(1) + +Deprecated: ini_set(): Use of mbstring.internal_encoding is deprecated in %s on line %d +string(10) "ISO-8859-3" +int(2) diff --git a/ext/mbstring/tests/mb_internal_encoding_basic2.phpt b/ext/mbstring/tests/mb_internal_encoding_basic2.phpt index 8090b47be5..99b92027e9 100644 --- a/ext/mbstring/tests/mb_internal_encoding_basic2.phpt +++ b/ext/mbstring/tests/mb_internal_encoding_basic2.phpt @@ -47,7 +47,7 @@ string(10) "ISO-8859-1" string(0) "" string(0) "" string(0) "" -string(5) "UTF-8" +string(10) "ISO-8859-1" bool(true) string(5) "UTF-8" Done diff --git a/main/main.c b/main/main.c index 5eb34b7fa4..f021f22af5 100644 --- a/main/main.c +++ b/main/main.c @@ -578,12 +578,44 @@ static PHP_INI_DISP(display_errors_mode) } /* }}} */ +PHPAPI const char *php_get_internal_encoding() { + if (PG(internal_encoding) && PG(internal_encoding)[0]) { + return PG(internal_encoding); + } else if (SG(default_charset)) { + return SG(default_charset); + } + return ""; +} + +PHPAPI const char *php_get_input_encoding() { + if (PG(input_encoding) && PG(input_encoding)[0]) { + return PG(input_encoding); + } else if (SG(default_charset)) { + return SG(default_charset); + } + return ""; +} + +PHPAPI const char *php_get_output_encoding() { + if (PG(output_encoding) && PG(output_encoding)[0]) { + return PG(output_encoding); + } else if (SG(default_charset)) { + return SG(default_charset); + } + return ""; +} + +PHPAPI void (*php_internal_encoding_changed)(void) = NULL; + /* {{{ PHP_INI_MH */ static PHP_INI_MH(OnUpdateDefaultCharset) { if (new_value) { OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage); + if (php_internal_encoding_changed) { + php_internal_encoding_changed(); + } #ifdef PHP_WIN32 php_win32_cp_do_update(ZSTR_VAL(new_value)); #endif @@ -598,6 +630,9 @@ static PHP_INI_MH(OnUpdateInternalEncoding) { if (new_value) { OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage); + if (php_internal_encoding_changed) { + php_internal_encoding_changed(); + } #ifdef PHP_WIN32 php_win32_cp_do_update(ZSTR_VAL(new_value)); #endif @@ -612,6 +647,9 @@ static PHP_INI_MH(OnUpdateInputEncoding) { if (new_value) { OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage); + if (php_internal_encoding_changed) { + php_internal_encoding_changed(); + } #ifdef PHP_WIN32 php_win32_cp_do_update(NULL); #endif @@ -626,6 +664,9 @@ static PHP_INI_MH(OnUpdateOutputEncoding) { if (new_value) { OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage); + if (php_internal_encoding_changed) { + php_internal_encoding_changed(); + } #ifdef PHP_WIN32 php_win32_cp_do_update(NULL); #endif diff --git a/main/php.h b/main/php.h index 48b4633879..be9917922a 100644 --- a/main/php.h +++ b/main/php.h @@ -371,6 +371,11 @@ PHPAPI int php_mergesort(void *base, size_t nmemb, size_t size, int (*cmp)(const PHPAPI void php_register_pre_request_shutdown(void (*func)(void *), void *userdata); PHPAPI void php_com_initialize(void); PHPAPI char *php_get_current_user(void); + +PHPAPI const char *php_get_internal_encoding(void); +PHPAPI const char *php_get_input_encoding(void); +PHPAPI const char *php_get_output_encoding(void); +PHPAPI extern void (*php_internal_encoding_changed)(void); END_EXTERN_C() /* PHP-named Zend macro wrappers */ -- 2.50.1