From: Ilia Alshanetsky Date: Tue, 22 May 2007 12:37:00 +0000 (+0000) Subject: [DOC] Added a 4th parameter flag to htmlspecialchars() and htmlentities() X-Git-Tag: php-5.2.3RC1~23 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c98cbb6020fff5f35fcefd7a92a63157467ef863;p=php [DOC] Added a 4th parameter flag to htmlspecialchars() and htmlentities() that makes the function not encode existing html entities. The feature is disabled by default and can be activated by passing FALSE as the 4th param --- diff --git a/NEWS b/NEWS index 7fb9a4c214..1d5c84ab2c 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,8 @@ PHP NEWS - Optimized out a couple of per-request syscalls (Rasmus) - Optimized digest generation in md5() and sha1() functions. (Ilia) - Upgraded SQLite 3 to version 3.3.16 (Ilia) +- Added a 4th parameter flag to htmlspecialchars() and htmlentities() that + makes the function not encode existing html entities. (Ilia) - Added PDO::FETCH_KEY_PAIR mode that will fetch a 2 column result set into an associated array. (Ilia) - Added function mysql_set_charset(). Allows connection encoding to be diff --git a/ext/standard/html.c b/ext/standard/html.c index e2badee474..8ac7b417c5 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -1078,12 +1078,15 @@ empty_source: } /* }}} */ - +PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC) +{ + return php_escape_html_entities_ex(old, oldlen, newlen, all, quote_style, hint_charset, 1 TSRMLS_CC); +} /* {{{ php_escape_html_entities */ -PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC) +PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC) { int i, j, maxlen, len; char *replaced; @@ -1145,8 +1148,34 @@ PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newle int is_basic = 0; if (this_char == '&') { - memcpy(replaced + len, "&", sizeof("&") - 1); - len += sizeof("&") - 1; + if (double_encode) { +encode_amp: + memcpy(replaced + len, "&", sizeof("&") - 1); + len += sizeof("&") - 1; + } else { + char *e = memchr(old + i, ';', len - i); + char *s = old + i + 1; + + if (!e || (e - s) > 10) { /* minor optimization to avoid "entities" over 10 chars in length */ + goto encode_amp; + } else { + if (*s == '#') { /* numeric entities */ + s++; + while (s < e) { + if (!isdigit(*s++)) { + goto encode_amp; + } + } + } else { /* text entities */ + while (s < e) { + if (!isalnum(*s++)) { + goto encode_amp; + } + } + } + replaced[len++] = '&'; + } + } is_basic = 1; } else { for (j = 0; basic_entities[j].charcode != 0; j++) { @@ -1193,12 +1222,13 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all) int len; long quote_style = ENT_COMPAT; char *replaced; + zend_bool double_encode = 1; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls", &str, &str_len, "e_style, &hint_charset, &hint_charset_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|lsb", &str, &str_len, "e_style, &hint_charset, &hint_charset_len, &double_encode) == FAILURE) { return; } - replaced = php_escape_html_entities(str, str_len, &len, all, quote_style, hint_charset TSRMLS_CC); + replaced = php_escape_html_entities_ex(str, str_len, &len, all, quote_style, hint_charset, double_encode TSRMLS_CC); RETVAL_STRINGL(replaced, len, 0); } /* }}} */ diff --git a/ext/standard/html.h b/ext/standard/html.h index 3e896e18b5..fec44bb0e4 100644 --- a/ext/standard/html.h +++ b/ext/standard/html.h @@ -38,6 +38,7 @@ PHP_FUNCTION(html_entity_decode); PHP_FUNCTION(get_html_translation_table); PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC); +PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC); PHPAPI char *php_unescape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC); #endif /* HTML_H */ diff --git a/ext/standard/tests/strings/htmlentities18.phpt b/ext/standard/tests/strings/htmlentities18.phpt new file mode 100644 index 0000000000..f171adaca2 --- /dev/null +++ b/ext/standard/tests/strings/htmlentities18.phpt @@ -0,0 +1,31 @@ +--TEST-- +htmlentities() / htmlspecialchars() "don't double encode" flag support +--FILE-- + +--EXPECT-- +string(3) "abc" +string(3) "abc" +string(13) "abc&sfdsa" +string(13) "abc&sfdsa" +string(33) "test+s & some more D" +string(33) "test+s & some more D" +string(20) "&; &amp &#a; &9;" +string(20) "&; &amp &#a; &9;" +string(32) "&kffjadfdhsjfhjasdhffasdfas;" +string(32) "&kffjadfdhsjfhjasdhffasdfas;" +string(16) "&#8787978789" +string(16) "&#8787978789"