From 68dc7549988fcc929717a7b0c69e007000512fc1 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Tue, 6 Jun 2017 16:09:26 +0300 Subject: [PATCH] Avoid string reallocations in html_entity_decode() and htmlspecialchars_decode() --- ext/opcache/Optimizer/zend_func_info.c | 4 +-- ext/standard/html.c | 37 +++++++++++--------------- ext/standard/html.h | 2 +- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/ext/opcache/Optimizer/zend_func_info.c b/ext/opcache/Optimizer/zend_func_info.c index 33138ff6f4..5fdb61e84f 100644 --- a/ext/opcache/Optimizer/zend_func_info.c +++ b/ext/opcache/Optimizer/zend_func_info.c @@ -285,8 +285,8 @@ static const func_info_t func_infos[] = { F1("wordwrap", MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING), F1("htmlspecialchars", MAY_BE_NULL | MAY_BE_STRING), F1("htmlentities", MAY_BE_NULL | MAY_BE_STRING), - F1("html_entity_decode", MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING), - F1("htmlspecialchars_decode", MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING), + FN("html_entity_decode", MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING), + FN("htmlspecialchars_decode", MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING), F1("get_html_translation_table", MAY_BE_NULL | MAY_BE_ARRAY | MAY_BE_ARRAY_KEY_STRING | MAY_BE_ARRAY_OF_STRING), F1("sha1", MAY_BE_NULL | MAY_BE_STRING), F1("sha1_file", MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING), diff --git a/ext/standard/html.c b/ext/standard/html.c index c6643e2893..22f1f09a4c 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -1095,13 +1095,16 @@ static entity_table_opt determine_entity_table(int all, int doctype) * only the basic ones, i.e., those in basic_entities_ex + the numeric entities * that correspond to quotes. */ -PHPAPI zend_string *php_unescape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset) +PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int flags, char *hint_charset) { - size_t retlen; zend_string *ret; enum entity_charset charset; - const entity_ht *inverse_map = NULL; - size_t new_size = TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(oldlen); + const entity_ht *inverse_map; + size_t new_size; + + if (!memchr(ZSTR_VAL(str), '&', ZSTR_LEN(str))) { + return zend_string_copy(str); + } if (all) { charset = determine_charset(hint_charset); @@ -1111,26 +1114,19 @@ PHPAPI zend_string *php_unescape_html_entities(unsigned char *old, size_t oldlen /* don't use LIMIT_ALL! */ - if (oldlen > new_size) { + new_size = TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(ZSTR_LEN(str)); + if (ZSTR_LEN(str) > new_size) { /* overflow, refuse to do anything */ - ret = zend_string_init((char*)old, oldlen, 0); - retlen = oldlen; - goto empty_source; + return zend_string_copy(str); } + ret = zend_string_alloc(new_size, 0); - ZSTR_VAL(ret)[0] = '\0'; - ZSTR_LEN(ret) = oldlen; - retlen = oldlen; - if (retlen == 0) { - goto empty_source; - } inverse_map = unescape_inverse_map(all, flags); /* replace numeric entities */ - traverse_for_entities((char*)old, oldlen, ret, all, flags, inverse_map, charset); + traverse_for_entities(ZSTR_VAL(str), ZSTR_LEN(str), ret, all, flags, inverse_map, charset); -empty_source: return ret; } /* }}} */ @@ -1495,18 +1491,17 @@ PHP_FUNCTION(htmlspecialchars) Convert special HTML entities back to characters */ PHP_FUNCTION(htmlspecialchars_decode) { - char *str; - size_t str_len; + zend_string *str; zend_long quote_style = ENT_COMPAT; zend_string *replaced; ZEND_PARSE_PARAMETERS_START(1, 2) - Z_PARAM_STRING(str, str_len) + Z_PARAM_STR(str) Z_PARAM_OPTIONAL Z_PARAM_LONG(quote_style) ZEND_PARSE_PARAMETERS_END(); - replaced = php_unescape_html_entities((unsigned char*)str, str_len, 0 /*!all*/, (int)quote_style, NULL); + replaced = php_unescape_html_entities(str, 0 /*!all*/, (int)quote_style, NULL); if (replaced) { RETURN_STR(replaced); } @@ -1533,7 +1528,7 @@ PHP_FUNCTION(html_entity_decode) if (!hint_charset) { default_charset = get_default_charset(); } - replaced = php_unescape_html_entities((unsigned char*)ZSTR_VAL(str), ZSTR_LEN(str), 1 /*all*/, (int)quote_style, (hint_charset ? ZSTR_VAL(hint_charset) : default_charset)); + replaced = php_unescape_html_entities(str, 1 /*all*/, (int)quote_style, (hint_charset ? ZSTR_VAL(hint_charset) : default_charset)); if (replaced) { RETURN_STR(replaced); diff --git a/ext/standard/html.h b/ext/standard/html.h index e090aab692..9ef23bc322 100644 --- a/ext/standard/html.h +++ b/ext/standard/html.h @@ -56,7 +56,7 @@ PHP_FUNCTION(get_html_translation_table); PHPAPI zend_string *php_escape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset); PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode); -PHPAPI zend_string *php_unescape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset); +PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int flags, char *hint_charset); PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, int *status); #endif /* HTML_H */ -- 2.50.1