]> granicus.if.org Git - php/commitdiff
Add quiet parameter to internal HTML entities API
authorNikita Popov <nikita.ppv@gmail.com>
Thu, 7 May 2020 13:46:08 +0000 (15:46 +0200)
committerNikita Popov <nikita.ppv@gmail.com>
Thu, 7 May 2020 13:46:08 +0000 (15:46 +0200)
In some places, we need to make sure that no warnings are thrown
due to unknown encoding. The error reporting code tried to avoid
this by determining a "safe charset", but this introduces subtle
discrepancies in which charset is picked (normally
internal_encoding takes precedence). Avoid this by suppressing
the warning in the first place.

While here, use the fallback logic to print error messages with
substitution characters more consistently, to avoid skipping
parts of the error message entirely.

ext/filter/sanitizing_filters.c
ext/standard/html.c
ext/standard/html.h
ext/standard/tests/strings/bug68996.phpt
ext/standard/url_scanner_ex.re
main/main.c
sapi/cli/php_cli_server.c
sapi/fpm/fpm/fpm_status.c

index 25b2f39fde38f873aa8a883bb5237e95d42c05cb..9243b419cf543a6ab70f7f6b10b11407b868bc91 100644 (file)
@@ -251,7 +251,9 @@ void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
        } else {
                quotes = ENT_NOQUOTES;
        }
-       buf = php_escape_html_entities_ex((unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0);
+       buf = php_escape_html_entities_ex(
+               (unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), /* all */ 1, quotes,
+               /* charset_hint */ NULL, /* double_encode */ 0, /* quiet */ 0);
        zval_ptr_dtor(value);
        ZVAL_STR(value, buf);
 }
index 93069371070abb81f103f794ed9686c56e1ab652..b29dcd535e615606615b581daa5df37253690cf5 100644 (file)
@@ -367,7 +367,7 @@ static inline unsigned int get_next_char(
 /* {{{ entity_charset determine_charset
  * returns the charset identifier based on current locale or a hint.
  * defaults to UTF-8 */
-static enum entity_charset determine_charset(char *charset_hint)
+static enum entity_charset determine_charset(char *charset_hint, zend_bool quiet)
 {
        size_t i;
        const zend_encoding *zenc;
@@ -401,8 +401,10 @@ det_charset:
                        }
                }
 
-               php_error_docref(NULL, E_WARNING, "Charset `%s' not supported, assuming utf-8",
-                               charset_hint);
+               if (!quiet) {
+                       php_error_docref(NULL, E_WARNING, "Charset `%s' not supported, assuming utf-8",
+                                       charset_hint);
+               }
        }
        return cs_utf_8;
 }
@@ -1006,7 +1008,7 @@ PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int fl
        }
 
        if (all) {
-               charset = determine_charset(hint_charset);
+               charset = determine_charset(hint_charset, /* quiet */ 0);
        } else {
                charset = cs_8859_1; /* charset shouldn't matter, use ISO-8859-1 for performance */
        }
@@ -1030,9 +1032,9 @@ PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int fl
 }
 /* }}} */
 
-PHPAPI zend_string *php_escape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset)
+PHPAPI zend_string *php_escape_html_entities(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset)
 {
-       return php_escape_html_entities_ex(old, oldlen, all, flags, hint_charset, 1);
+       return php_escape_html_entities_ex(old, oldlen, all, flags, hint_charset, 1, /* quiet */ 0);
 }
 
 /* {{{ find_entity_for_char */
@@ -1042,7 +1044,7 @@ static inline void find_entity_for_char(
        const entity_stage1_row *table,
        const unsigned char **entity,
        size_t *entity_len,
-       unsigned char *old,
+       const unsigned char *old,
        size_t oldlen,
        size_t *cursor)
 {
@@ -1118,11 +1120,11 @@ static inline void find_entity_for_char_basic(
 
 /* {{{ php_escape_html_entities
  */
-PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode)
+PHPAPI zend_string *php_escape_html_entities_ex(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode, zend_bool quiet)
 {
        size_t cursor, maxlen, len;
        zend_string *replaced;
-       enum entity_charset charset = determine_charset(hint_charset);
+       enum entity_charset charset = determine_charset(hint_charset, quiet);
        int doctype = flags & ENT_HTML_DOC_TYPE_MASK;
        entity_table_opt entity_table;
        const enc_to_uni *to_uni_table = NULL;
@@ -1132,7 +1134,7 @@ PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldle
        size_t replacement_len = 0;
 
        if (all) { /* replace with all named entities */
-               if (CHARSET_PARTIAL_SUPPORT(charset)) {
+               if (!quiet && CHARSET_PARTIAL_SUPPORT(charset)) {
                        php_error_docref(NULL, E_NOTICE, "Only basic entities "
                                "substitution is supported for multi-byte encodings other than UTF-8; "
                                "functionality is equivalent to htmlspecialchars");
@@ -1349,7 +1351,7 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
 
        replaced = php_escape_html_entities_ex(
                (unsigned char*)ZSTR_VAL(str), ZSTR_LEN(str), all, (int) flags,
-               hint_charset ? ZSTR_VAL(hint_charset) : NULL, double_encode);
+               hint_charset ? ZSTR_VAL(hint_charset) : NULL, double_encode, /* quiet */ 0);
        RETVAL_STR(replaced);
 }
 /* }}} */
@@ -1519,7 +1521,7 @@ PHP_FUNCTION(get_html_translation_table)
                Z_PARAM_STRING(charset_hint, charset_hint_len)
        ZEND_PARSE_PARAMETERS_END();
 
-       charset = determine_charset(charset_hint);
+       charset = determine_charset(charset_hint, /* quiet */ 0);
        doctype = flags & ENT_HTML_DOC_TYPE_MASK;
        LIMIT_ALL(all, doctype, charset);
 
index 2a8d24ccdeb6ac863dd10c104e9f361f22a4536b..818ee1a0ca8e4082d3a29cc3d394cd3879781c3f 100644 (file)
@@ -44,8 +44,8 @@
 
 void register_html_constants(INIT_FUNC_ARGS);
 
-PHPAPI zend_string *php_escape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset);
-PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode);
+PHPAPI zend_string *php_escape_html_entities(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset);
+PHPAPI zend_string *php_escape_html_entities_ex(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode, zend_bool quiet);
 PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int flags, char *hint_charset);
 PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, int *status);
 
index bd5be022fd6408a920064f981929cafb14faabcd..260138c51bd19ec140d8dd4fd69684946e16c04a 100644 (file)
@@ -6,6 +6,6 @@ html_errors=1
 <?php
 fopen("\xfc\x63", "r");
 ?>
---EXPECTF--
+--EXPECT--
 <br />
-<b>Warning</b>:  : Failed to open stream: No such file or directory in <b>%sbug68996.php</b> on line <b>%d</b><br />
+<b>Warning</b>:  fopen(�c): Failed to open stream: No such file or directory in <b>/home/nikic/php-src/ext/standard/tests/strings/bug68996.php</b> on line <b>2</b><br />
index a83a91b534f3420326425114cc2aa393cde743bb..d80bd0d2a103e1f498da7afc213117e205bc2d87 100644 (file)
@@ -758,9 +758,9 @@ static inline int php_url_scanner_add_var_impl(char *name, size_t name_len, char
                smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
                encoded = php_raw_url_encode(value, value_len);
                smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
-               encoded = php_escape_html_entities_ex((unsigned char*)name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
+               encoded = php_escape_html_entities_ex((unsigned char*)name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1);
                smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
-               encoded = php_escape_html_entities_ex((unsigned char*)value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
+               encoded = php_escape_html_entities_ex((unsigned char*)value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1);
                smart_str_appendl(&hvalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
        } else {
                smart_str_appendl(&sname, name, name_len);
@@ -860,7 +860,7 @@ static inline int php_url_scanner_reset_var_impl(zend_string *name, int encode,
                encoded = php_raw_url_encode(ZSTR_VAL(name), ZSTR_LEN(name));
                smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
                zend_string_free(encoded);
-               encoded = php_escape_html_entities_ex((unsigned char *)ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
+               encoded = php_escape_html_entities_ex((unsigned char *)ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), /* double_encode */ 0, /* quiet */ 1);
                smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
                zend_string_free(encoded);
        } else {
index dc9182e48114a247da97d65260b89e070430a013..98aaa5beceb241e6dd3c4e7d4cff16ed7cf3b8f9 100644 (file)
@@ -95,31 +95,6 @@ PHPAPI size_t core_globals_offset;
 
 #define SAFE_FILENAME(f) ((f)?(f):"-")
 
-static char *get_safe_charset_hint(void) {
-       ZEND_TLS char *lastHint = NULL;
-       ZEND_TLS char *lastCodeset = NULL;
-       char *hint = SG(default_charset);
-       size_t len = strlen(hint);
-       size_t i = 0;
-
-       if (lastHint == SG(default_charset)) {
-               return lastCodeset;
-       }
-
-       lastHint = hint;
-       lastCodeset = NULL;
-
-       for (i = 0; i < sizeof(charset_map)/sizeof(charset_map[0]); i++) {
-               if (len == charset_map[i].codeset_len
-                       && zend_binary_strcasecmp(hint, len, charset_map[i].codeset, len) == 0) {
-                       lastCodeset = (char*)charset_map[i].codeset;
-                       break;
-               }
-       }
-
-       return lastCodeset;
-}
-
 /* {{{ PHP_INI_MH
  */
 static PHP_INI_MH(OnSetFacility)
@@ -937,6 +912,19 @@ PHPAPI size_t php_printf(const char *format, ...)
 }
 /* }}} */
 
+static zend_string *escape_html(const char *buffer, size_t buffer_len) {
+       zend_string *result = php_escape_html_entities_ex(
+               (const unsigned char *) buffer, buffer_len, 0, ENT_COMPAT,
+               /* charset_hint */ NULL, /* double_encode */ 1, /* quiet */ 1);
+       if (!result || ZSTR_LEN(result) == 0) {
+               /* Retry with substituting invalid chars on fail. */
+               result = php_escape_html_entities_ex(
+                       (const unsigned char *) buffer, buffer_len, 0, ENT_COMPAT | ENT_HTML_SUBSTITUTE_ERRORS,
+                       /* charset_hint */ NULL, /* double_encode */ 1, /* quiet */ 1);
+       }
+       return result;
+}
+
 /* {{{ php_verror */
 /* php_verror is called from php_error_docref<n> functions.
  * Its purpose is to unify error messages and automatically generate clickable
@@ -962,12 +950,7 @@ PHPAPI ZEND_COLD void php_verror(const char *docref, const char *params, int typ
        buffer_len = (int)vspprintf(&buffer, 0, format, args);
 
        if (PG(html_errors)) {
-               replace_buffer = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT, get_safe_charset_hint());
-               /* Retry with substituting invalid chars on fail. */
-               if (!replace_buffer || ZSTR_LEN(replace_buffer) < 1) {
-                       replace_buffer = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT | ENT_HTML_SUBSTITUTE_ERRORS, get_safe_charset_hint());
-               }
-
+               replace_buffer = escape_html(buffer, buffer_len);
                efree(buffer);
 
                if (replace_buffer) {
@@ -1032,7 +1015,7 @@ PHPAPI ZEND_COLD void php_verror(const char *docref, const char *params, int typ
        }
 
        if (PG(html_errors)) {
-               replace_origin = php_escape_html_entities((unsigned char*)origin, origin_len, 0, ENT_COMPAT, get_safe_charset_hint());
+               replace_origin = escape_html(origin, origin_len);
                efree(origin);
                origin = ZSTR_VAL(replace_origin);
        }
@@ -1335,7 +1318,7 @@ static ZEND_COLD void php_error_cb(int orig_type, const char *error_filename, co
 
                                if (PG(html_errors)) {
                                        if (type == E_ERROR || type == E_PARSE) {
-                                               zend_string *buf = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT, get_safe_charset_hint());
+                                               zend_string *buf = escape_html(buffer, buffer_len);
                                                php_printf("%s<br />\n<b>%s</b>:  %s in <b>%s</b> on line <b>%" PRIu32 "</b><br />\n%s", STR_PRINT(prepend_string), error_type_str, ZSTR_VAL(buf), error_filename, error_lineno, STR_PRINT(append_string));
                                                zend_string_free(buf);
                                        } else {
index 1062c5b34cfcda4cb2785732bdab1ae907209c1d..dbee30700f834fa785530c40598cf4bdaf417525 100644 (file)
@@ -1978,7 +1978,7 @@ static int php_cli_server_send_error_page(php_cli_server *server, php_cli_server
        php_cli_server_content_sender_ctor(&client->content_sender);
        client->content_sender_initialized = 1;
 
-       escaped_request_uri = php_escape_html_entities_ex((unsigned char *)client->request.request_uri, client->request.request_uri_len, 0, ENT_QUOTES, NULL, 0);
+       escaped_request_uri = php_escape_html_entities_ex((unsigned char *)client->request.request_uri, client->request.request_uri_len, 0, ENT_QUOTES, NULL, /* double_encode */ 0, /* quiet */ 0);
 
        {
                static const char prologue_template[] = "<!doctype html><html><head><title>%d %s</title>";
index 36d224063583001a234ebeeb0160e41cac60581c..2e4fff937a82a18c27d313eb0fa7cdb394a2ad8c 100644 (file)
@@ -517,7 +517,7 @@ int fpm_status_handle_request(void) /* {{{ */
                                        if (!encode) {
                                                query_string = proc.query_string;
                                        } else {
-                                               tmp_query_string = php_escape_html_entities_ex((unsigned char *)proc.query_string, strlen(proc.query_string), 1, ENT_HTML_IGNORE_ERRORS & ENT_COMPAT, NULL, 1);
+                                               tmp_query_string = php_escape_html_entities_ex((unsigned char *)proc.query_string, strlen(proc.query_string), 1, ENT_HTML_IGNORE_ERRORS & ENT_COMPAT, NULL, /* double_encode */ 1, /* quiet */ 0);
                                                query_string = ZSTR_VAL(tmp_query_string);
                                        }
                                }