]> granicus.if.org Git - php/commitdiff
MFB htmlentities() / htmlspecialchars() "don't double encode" flag support
authorDmitry Stogov <dmitry@php.net>
Tue, 10 Jul 2007 15:12:15 +0000 (15:12 +0000)
committerDmitry Stogov <dmitry@php.net>
Tue, 10 Jul 2007 15:12:15 +0000 (15:12 +0000)
ext/standard/html.c

index 30770f2dc2cb0e850bab2927ce8c87fdf72f25eb..2df5658d350da99326a4c44139fa42468f46cb04 100644 (file)
@@ -1092,9 +1092,8 @@ empty_source:
 
 /* {{{ php_escape_html_entities
  */
-PHPAPI char *php_escape_html_entities(char *orig, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
+PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC)
 {
-       unsigned char *old = (unsigned char *)orig;
        int i, j, maxlen, len;
        char *replaced;
        enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
@@ -1155,8 +1154,34 @@ PHPAPI char *php_escape_html_entities(char *orig, int oldlen, int *newlen, int a
                        int is_basic = 0;
 
                        if (this_char == '&') {
-                               memcpy(replaced + len, "&amp;", sizeof("&amp;") - 1);
-                               len += sizeof("&amp;") - 1;
+                               if (double_encode) {
+encode_amp:
+                                       memcpy(replaced + len, "&amp;", sizeof("&amp;") - 1);
+                                       len += sizeof("&amp;") - 1;
+                               } else {
+                                       char *e = memchr(old + i, ';', oldlen - i);
+                                       char *s = (char*)old + i;
+
+                                       if (!e || (e - s) > 10) { /* minor optimization to avoid "entities" over 10 chars in length */
+                                               goto encode_amp;
+                                       } else {
+                                               if (*s == '#') { /* numeric entities */
+                                                       s++;
+                                                       while (s < e) {
+                                                               if (!isdigit(*s++)) {
+                                                                       goto encode_amp;
+                                                               }
+                                                       }
+                                               } else { /* text entities */
+                                                       while (s < e) {
+                                                               if (!isalnum(*s++)) {
+                                                                       goto encode_amp;
+                                                               }
+                                                       }
+                                               }
+                                               replaced[len++] = '&';
+                                       }
+                               }
                                is_basic = 1;
                        } else {
                                for (j = 0; basic_entities[j].charcode != 0; j++) {
@@ -1194,6 +1219,11 @@ PHPAPI char *php_escape_html_entities(char *orig, int oldlen, int *newlen, int a
 }
 /* }}} */
 
+PHPAPI char *php_escape_html_entities(char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
+{
+       return php_escape_html_entities_ex((unsigned char*)old, oldlen, newlen, all, quote_style, hint_charset, 1 TSRMLS_CC);
+}
+
 /* {{{ php_html_entities
  */
 static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
@@ -1207,8 +1237,9 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
        long quote_style = ENT_COMPAT;
        zend_uchar type;
        char *replaced;
+       zend_bool double_encode = 1;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|ls", &str, &str_len, &type, &quote_style, &hint_charset, &hint_charset_len) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|ls!b", &str, &str_len, &type, &quote_style, &hint_charset, &hint_charset_len, &double_encode) == FAILURE) {
                return;
        }
 
@@ -1219,7 +1250,7 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
                hint_charset = "utf-8";
        }
 
-       replaced = php_escape_html_entities(str.s, str_len, &len, all, quote_style, hint_charset TSRMLS_CC);
+       replaced = php_escape_html_entities_ex((unsigned char*)str.s, str_len, &len, all, quote_style, hint_charset, double_encode TSRMLS_CC);
 
        if (type == IS_UNICODE) {
                RETVAL_U_STRINGL(UG(utf8_conv), replaced, len, ZSTR_AUTOFREE);
@@ -1245,7 +1276,7 @@ void register_html_constants(INIT_FUNC_ARGS)
 }
 /* }}} */
 
-/* {{{ proto string htmlspecialchars(string string [, int quote_style][, string charset]) U
+/* {{{ proto string htmlspecialchars(string string [, int quote_style[, string charset[, bool double_encode]]])
    Convert special characters to HTML entities */
 PHP_FUNCTION(htmlspecialchars)
 {
@@ -1380,7 +1411,7 @@ PHP_FUNCTION(html_entity_decode)
 /* }}} */
 
 
-/* {{{ proto string htmlentities(string string [, int quote_style][, string charset]) U
+/* {{{ proto string htmlentities(string string [, int quote_style[, string charset[, bool double_encode]]])
    Convert all applicable characters to HTML entities */
 PHP_FUNCTION(htmlentities)
 {