From: Nikita Popov Date: Thu, 7 May 2020 13:46:08 +0000 (+0200) Subject: Add quiet parameter to internal HTML entities API X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c50cfc4d3d60e69040c9d2f3836b12e886b587ac;p=php Add quiet parameter to internal HTML entities API In some places, we need to make sure that no warnings are thrown due to unknown encoding. The error reporting code tried to avoid this by determining a "safe charset", but this introduces subtle discrepancies in which charset is picked (normally internal_encoding takes precedence). Avoid this by suppressing the warning in the first place. While here, use the fallback logic to print error messages with substitution characters more consistently, to avoid skipping parts of the error message entirely. --- diff --git a/ext/filter/sanitizing_filters.c b/ext/filter/sanitizing_filters.c index 25b2f39fde..9243b419cf 100644 --- a/ext/filter/sanitizing_filters.c +++ b/ext/filter/sanitizing_filters.c @@ -251,7 +251,9 @@ void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL) } else { quotes = ENT_NOQUOTES; } - buf = php_escape_html_entities_ex((unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0); + buf = php_escape_html_entities_ex( + (unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), /* all */ 1, quotes, + /* charset_hint */ NULL, /* double_encode */ 0, /* quiet */ 0); zval_ptr_dtor(value); ZVAL_STR(value, buf); } diff --git a/ext/standard/html.c b/ext/standard/html.c index 9306937107..b29dcd535e 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -367,7 +367,7 @@ static inline unsigned int get_next_char( /* {{{ entity_charset determine_charset * returns the charset identifier based on current locale or a hint. * defaults to UTF-8 */ -static enum entity_charset determine_charset(char *charset_hint) +static enum entity_charset determine_charset(char *charset_hint, zend_bool quiet) { size_t i; const zend_encoding *zenc; @@ -401,8 +401,10 @@ det_charset: } } - php_error_docref(NULL, E_WARNING, "Charset `%s' not supported, assuming utf-8", - charset_hint); + if (!quiet) { + php_error_docref(NULL, E_WARNING, "Charset `%s' not supported, assuming utf-8", + charset_hint); + } } return cs_utf_8; } @@ -1006,7 +1008,7 @@ PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int fl } if (all) { - charset = determine_charset(hint_charset); + charset = determine_charset(hint_charset, /* quiet */ 0); } else { charset = cs_8859_1; /* charset shouldn't matter, use ISO-8859-1 for performance */ } @@ -1030,9 +1032,9 @@ PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int fl } /* }}} */ -PHPAPI zend_string *php_escape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset) +PHPAPI zend_string *php_escape_html_entities(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset) { - return php_escape_html_entities_ex(old, oldlen, all, flags, hint_charset, 1); + return php_escape_html_entities_ex(old, oldlen, all, flags, hint_charset, 1, /* quiet */ 0); } /* {{{ find_entity_for_char */ @@ -1042,7 +1044,7 @@ static inline void find_entity_for_char( const entity_stage1_row *table, const unsigned char **entity, size_t *entity_len, - unsigned char *old, + const unsigned char *old, size_t oldlen, size_t *cursor) { @@ -1118,11 +1120,11 @@ static inline void find_entity_for_char_basic( /* {{{ php_escape_html_entities */ -PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode) +PHPAPI zend_string *php_escape_html_entities_ex(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode, zend_bool quiet) { size_t cursor, maxlen, len; zend_string *replaced; - enum entity_charset charset = determine_charset(hint_charset); + enum entity_charset charset = determine_charset(hint_charset, quiet); int doctype = flags & ENT_HTML_DOC_TYPE_MASK; entity_table_opt entity_table; const enc_to_uni *to_uni_table = NULL; @@ -1132,7 +1134,7 @@ PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldle size_t replacement_len = 0; if (all) { /* replace with all named entities */ - if (CHARSET_PARTIAL_SUPPORT(charset)) { + if (!quiet && CHARSET_PARTIAL_SUPPORT(charset)) { php_error_docref(NULL, E_NOTICE, "Only basic entities " "substitution is supported for multi-byte encodings other than UTF-8; " "functionality is equivalent to htmlspecialchars"); @@ -1349,7 +1351,7 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all) replaced = php_escape_html_entities_ex( (unsigned char*)ZSTR_VAL(str), ZSTR_LEN(str), all, (int) flags, - hint_charset ? ZSTR_VAL(hint_charset) : NULL, double_encode); + hint_charset ? ZSTR_VAL(hint_charset) : NULL, double_encode, /* quiet */ 0); RETVAL_STR(replaced); } /* }}} */ @@ -1519,7 +1521,7 @@ PHP_FUNCTION(get_html_translation_table) Z_PARAM_STRING(charset_hint, charset_hint_len) ZEND_PARSE_PARAMETERS_END(); - charset = determine_charset(charset_hint); + charset = determine_charset(charset_hint, /* quiet */ 0); doctype = flags & ENT_HTML_DOC_TYPE_MASK; LIMIT_ALL(all, doctype, charset); diff --git a/ext/standard/html.h b/ext/standard/html.h index 2a8d24ccde..818ee1a0ca 100644 --- a/ext/standard/html.h +++ b/ext/standard/html.h @@ -44,8 +44,8 @@ void register_html_constants(INIT_FUNC_ARGS); -PHPAPI zend_string *php_escape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset); -PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode); +PHPAPI zend_string *php_escape_html_entities(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset); +PHPAPI zend_string *php_escape_html_entities_ex(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode, zend_bool quiet); PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int flags, char *hint_charset); PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, int *status); diff --git a/ext/standard/tests/strings/bug68996.phpt b/ext/standard/tests/strings/bug68996.phpt index bd5be022fd..260138c51b 100644 --- a/ext/standard/tests/strings/bug68996.phpt +++ b/ext/standard/tests/strings/bug68996.phpt @@ -6,6 +6,6 @@ html_errors=1 ---EXPECTF-- +--EXPECT--
-Warning: : Failed to open stream: No such file or directory in %sbug68996.php on line %d
+Warning: fopen(�c): Failed to open stream: No such file or directory in /home/nikic/php-src/ext/standard/tests/strings/bug68996.php on line 2
diff --git a/ext/standard/url_scanner_ex.re b/ext/standard/url_scanner_ex.re index a83a91b534..d80bd0d2a1 100644 --- a/ext/standard/url_scanner_ex.re +++ b/ext/standard/url_scanner_ex.re @@ -758,9 +758,9 @@ static inline int php_url_scanner_add_var_impl(char *name, size_t name_len, char smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded); encoded = php_raw_url_encode(value, value_len); smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded); - encoded = php_escape_html_entities_ex((unsigned char*)name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0); + encoded = php_escape_html_entities_ex((unsigned char*)name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1); smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded); - encoded = php_escape_html_entities_ex((unsigned char*)value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0); + encoded = php_escape_html_entities_ex((unsigned char*)value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1); smart_str_appendl(&hvalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded); } else { smart_str_appendl(&sname, name, name_len); @@ -860,7 +860,7 @@ static inline int php_url_scanner_reset_var_impl(zend_string *name, int encode, encoded = php_raw_url_encode(ZSTR_VAL(name), ZSTR_LEN(name)); smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded); - encoded = php_escape_html_entities_ex((unsigned char *)ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0); + encoded = php_escape_html_entities_ex((unsigned char *)ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), /* double_encode */ 0, /* quiet */ 1); smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded); } else { diff --git a/main/main.c b/main/main.c index dc9182e481..98aaa5bece 100644 --- a/main/main.c +++ b/main/main.c @@ -95,31 +95,6 @@ PHPAPI size_t core_globals_offset; #define SAFE_FILENAME(f) ((f)?(f):"-") -static char *get_safe_charset_hint(void) { - ZEND_TLS char *lastHint = NULL; - ZEND_TLS char *lastCodeset = NULL; - char *hint = SG(default_charset); - size_t len = strlen(hint); - size_t i = 0; - - if (lastHint == SG(default_charset)) { - return lastCodeset; - } - - lastHint = hint; - lastCodeset = NULL; - - for (i = 0; i < sizeof(charset_map)/sizeof(charset_map[0]); i++) { - if (len == charset_map[i].codeset_len - && zend_binary_strcasecmp(hint, len, charset_map[i].codeset, len) == 0) { - lastCodeset = (char*)charset_map[i].codeset; - break; - } - } - - return lastCodeset; -} - /* {{{ PHP_INI_MH */ static PHP_INI_MH(OnSetFacility) @@ -937,6 +912,19 @@ PHPAPI size_t php_printf(const char *format, ...) } /* }}} */ +static zend_string *escape_html(const char *buffer, size_t buffer_len) { + zend_string *result = php_escape_html_entities_ex( + (const unsigned char *) buffer, buffer_len, 0, ENT_COMPAT, + /* charset_hint */ NULL, /* double_encode */ 1, /* quiet */ 1); + if (!result || ZSTR_LEN(result) == 0) { + /* Retry with substituting invalid chars on fail. */ + result = php_escape_html_entities_ex( + (const unsigned char *) buffer, buffer_len, 0, ENT_COMPAT | ENT_HTML_SUBSTITUTE_ERRORS, + /* charset_hint */ NULL, /* double_encode */ 1, /* quiet */ 1); + } + return result; +} + /* {{{ php_verror */ /* php_verror is called from php_error_docref functions. * Its purpose is to unify error messages and automatically generate clickable @@ -962,12 +950,7 @@ PHPAPI ZEND_COLD void php_verror(const char *docref, const char *params, int typ buffer_len = (int)vspprintf(&buffer, 0, format, args); if (PG(html_errors)) { - replace_buffer = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT, get_safe_charset_hint()); - /* Retry with substituting invalid chars on fail. */ - if (!replace_buffer || ZSTR_LEN(replace_buffer) < 1) { - replace_buffer = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT | ENT_HTML_SUBSTITUTE_ERRORS, get_safe_charset_hint()); - } - + replace_buffer = escape_html(buffer, buffer_len); efree(buffer); if (replace_buffer) { @@ -1032,7 +1015,7 @@ PHPAPI ZEND_COLD void php_verror(const char *docref, const char *params, int typ } if (PG(html_errors)) { - replace_origin = php_escape_html_entities((unsigned char*)origin, origin_len, 0, ENT_COMPAT, get_safe_charset_hint()); + replace_origin = escape_html(origin, origin_len); efree(origin); origin = ZSTR_VAL(replace_origin); } @@ -1335,7 +1318,7 @@ static ZEND_COLD void php_error_cb(int orig_type, const char *error_filename, co if (PG(html_errors)) { if (type == E_ERROR || type == E_PARSE) { - zend_string *buf = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT, get_safe_charset_hint()); + zend_string *buf = escape_html(buffer, buffer_len); php_printf("%s
\n%s: %s in %s on line %" PRIu32 "
\n%s", STR_PRINT(prepend_string), error_type_str, ZSTR_VAL(buf), error_filename, error_lineno, STR_PRINT(append_string)); zend_string_free(buf); } else { diff --git a/sapi/cli/php_cli_server.c b/sapi/cli/php_cli_server.c index 1062c5b34c..dbee30700f 100644 --- a/sapi/cli/php_cli_server.c +++ b/sapi/cli/php_cli_server.c @@ -1978,7 +1978,7 @@ static int php_cli_server_send_error_page(php_cli_server *server, php_cli_server php_cli_server_content_sender_ctor(&client->content_sender); client->content_sender_initialized = 1; - escaped_request_uri = php_escape_html_entities_ex((unsigned char *)client->request.request_uri, client->request.request_uri_len, 0, ENT_QUOTES, NULL, 0); + escaped_request_uri = php_escape_html_entities_ex((unsigned char *)client->request.request_uri, client->request.request_uri_len, 0, ENT_QUOTES, NULL, /* double_encode */ 0, /* quiet */ 0); { static const char prologue_template[] = "%d %s"; diff --git a/sapi/fpm/fpm/fpm_status.c b/sapi/fpm/fpm/fpm_status.c index 36d2240635..2e4fff937a 100644 --- a/sapi/fpm/fpm/fpm_status.c +++ b/sapi/fpm/fpm/fpm_status.c @@ -517,7 +517,7 @@ int fpm_status_handle_request(void) /* {{{ */ if (!encode) { query_string = proc.query_string; } else { - tmp_query_string = php_escape_html_entities_ex((unsigned char *)proc.query_string, strlen(proc.query_string), 1, ENT_HTML_IGNORE_ERRORS & ENT_COMPAT, NULL, 1); + tmp_query_string = php_escape_html_entities_ex((unsigned char *)proc.query_string, strlen(proc.query_string), 1, ENT_HTML_IGNORE_ERRORS & ENT_COMPAT, NULL, /* double_encode */ 1, /* quiet */ 0); query_string = ZSTR_VAL(tmp_query_string); } }