]> granicus.if.org Git - php/commitdiff
Avoid string reallocations in html_entity_decode() and htmlspecialchars_decode()
authorDmitry Stogov <dmitry@zend.com>
Tue, 6 Jun 2017 13:09:26 +0000 (16:09 +0300)
committerDmitry Stogov <dmitry@zend.com>
Tue, 6 Jun 2017 13:09:26 +0000 (16:09 +0300)
ext/opcache/Optimizer/zend_func_info.c
ext/standard/html.c
ext/standard/html.h

index 33138ff6f4f881c689fec8acd0f7775fcaf13ec1..5fdb61e84f73419723ae9858a814fae9292d8713 100644 (file)
@@ -285,8 +285,8 @@ static const func_info_t func_infos[] = {
        F1("wordwrap",                     MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING),
        F1("htmlspecialchars",             MAY_BE_NULL | MAY_BE_STRING),
        F1("htmlentities",                 MAY_BE_NULL | MAY_BE_STRING),
-       F1("html_entity_decode",           MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING),
-       F1("htmlspecialchars_decode",      MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING),
+       FN("html_entity_decode",           MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING),
+       FN("htmlspecialchars_decode",      MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING),
        F1("get_html_translation_table",   MAY_BE_NULL | MAY_BE_ARRAY | MAY_BE_ARRAY_KEY_STRING | MAY_BE_ARRAY_OF_STRING),
        F1("sha1",                         MAY_BE_NULL | MAY_BE_STRING),
        F1("sha1_file",                    MAY_BE_NULL | MAY_BE_FALSE | MAY_BE_STRING),
index c6643e2893bf42256ff45604b05cbb98db5b7f7a..22f1f09a4c55cf140671899b503e6445adbbb3a7 100644 (file)
@@ -1095,13 +1095,16 @@ static entity_table_opt determine_entity_table(int all, int doctype)
  * only the basic ones, i.e., those in basic_entities_ex + the numeric entities
  * that correspond to quotes.
  */
-PHPAPI zend_string *php_unescape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset)
+PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int flags, char *hint_charset)
 {
-       size_t retlen;
        zend_string *ret;
        enum entity_charset charset;
-       const entity_ht *inverse_map = NULL;
-       size_t new_size = TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(oldlen);
+       const entity_ht *inverse_map;
+       size_t new_size;
+
+       if (!memchr(ZSTR_VAL(str), '&', ZSTR_LEN(str))) {
+               return zend_string_copy(str);
+       }
 
        if (all) {
                charset = determine_charset(hint_charset);
@@ -1111,26 +1114,19 @@ PHPAPI zend_string *php_unescape_html_entities(unsigned char *old, size_t oldlen
 
        /* don't use LIMIT_ALL! */
 
-       if (oldlen > new_size) {
+       new_size = TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(ZSTR_LEN(str));
+       if (ZSTR_LEN(str) > new_size) {
                /* overflow, refuse to do anything */
-               ret = zend_string_init((char*)old, oldlen, 0);
-               retlen = oldlen;
-               goto empty_source;
+               return zend_string_copy(str);
        }
+
        ret = zend_string_alloc(new_size, 0);
-       ZSTR_VAL(ret)[0] = '\0';
-       ZSTR_LEN(ret) = oldlen;
-       retlen = oldlen;
-       if (retlen == 0) {
-               goto empty_source;
-       }
 
        inverse_map = unescape_inverse_map(all, flags);
 
        /* replace numeric entities */
-       traverse_for_entities((char*)old, oldlen, ret, all, flags, inverse_map, charset);
+       traverse_for_entities(ZSTR_VAL(str), ZSTR_LEN(str), ret, all, flags, inverse_map, charset);
 
-empty_source:
        return ret;
 }
 /* }}} */
@@ -1495,18 +1491,17 @@ PHP_FUNCTION(htmlspecialchars)
    Convert special HTML entities back to characters */
 PHP_FUNCTION(htmlspecialchars_decode)
 {
-       char *str;
-       size_t str_len;
+       zend_string *str;
        zend_long quote_style = ENT_COMPAT;
        zend_string *replaced;
 
        ZEND_PARSE_PARAMETERS_START(1, 2)
-               Z_PARAM_STRING(str, str_len)
+               Z_PARAM_STR(str)
                Z_PARAM_OPTIONAL
                Z_PARAM_LONG(quote_style)
        ZEND_PARSE_PARAMETERS_END();
 
-       replaced = php_unescape_html_entities((unsigned char*)str, str_len, 0 /*!all*/, (int)quote_style, NULL);
+       replaced = php_unescape_html_entities(str, 0 /*!all*/, (int)quote_style, NULL);
        if (replaced) {
                RETURN_STR(replaced);
        }
@@ -1533,7 +1528,7 @@ PHP_FUNCTION(html_entity_decode)
        if (!hint_charset) {
                default_charset = get_default_charset();
        }
-       replaced = php_unescape_html_entities((unsigned char*)ZSTR_VAL(str), ZSTR_LEN(str), 1 /*all*/, (int)quote_style, (hint_charset ? ZSTR_VAL(hint_charset) : default_charset));
+       replaced = php_unescape_html_entities(str, 1 /*all*/, (int)quote_style, (hint_charset ? ZSTR_VAL(hint_charset) : default_charset));
 
        if (replaced) {
                RETURN_STR(replaced);
index e090aab692c18a08e04e4a50008d7e5b5da56582..9ef23bc322ec12968d40f3a2a81ed620cc724825 100644 (file)
@@ -56,7 +56,7 @@ PHP_FUNCTION(get_html_translation_table);
 
 PHPAPI zend_string *php_escape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset);
 PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode);
-PHPAPI zend_string *php_unescape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset);
+PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int flags, char *hint_charset);
 PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, int *status);
 
 #endif /* HTML_H */