]> granicus.if.org Git - php/commitdiff
[DOC] Added a 4th parameter flag to htmlspecialchars() and htmlentities()
authorIlia Alshanetsky <iliaa@php.net>
Tue, 22 May 2007 12:37:00 +0000 (12:37 +0000)
committerIlia Alshanetsky <iliaa@php.net>
Tue, 22 May 2007 12:37:00 +0000 (12:37 +0000)
that makes the function not encode existing html entities. The feature is
disabled by default and can be activated by passing FALSE as the 4th param

NEWS
ext/standard/html.c
ext/standard/html.h
ext/standard/tests/strings/htmlentities18.phpt [new file with mode: 0644]

diff --git a/NEWS b/NEWS
index 7fb9a4c21459ba9ce69c58d4916ae8e4a633c890..1d5c84ab2c932d194986c0ef93ee95d6a9fab5ef 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,8 @@ PHP                                                                        NEWS
 - Optimized out a couple of per-request syscalls (Rasmus)
 - Optimized digest generation in md5() and sha1() functions. (Ilia)
 - Upgraded SQLite 3 to version 3.3.16 (Ilia)
+- Added a 4th parameter flag to htmlspecialchars() and htmlentities() that 
+  makes the function not encode existing html entities. (Ilia)
 - Added PDO::FETCH_KEY_PAIR mode that will fetch a 2 column result set into 
   an associated array. (Ilia)
 - Added function mysql_set_charset(). Allows connection encoding to be 
index e2badee4748fa5097b266ff6a3acf52e56c5b2cb..8ac7b417c5fa96827b0317fce8cabf543676b241 100644 (file)
@@ -1078,12 +1078,15 @@ empty_source:
 }
 /* }}} */
 
-
+PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
+{
+       return php_escape_html_entities_ex(old, oldlen, newlen, all, quote_style, hint_charset, 1 TSRMLS_CC);
+}
 
 
 /* {{{ php_escape_html_entities
  */
-PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
+PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC)
 {
        int i, j, maxlen, len;
        char *replaced;
@@ -1145,8 +1148,34 @@ PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newle
                        int is_basic = 0;
 
                        if (this_char == '&') {
-                               memcpy(replaced + len, "&amp;", sizeof("&amp;") - 1);
-                               len += sizeof("&amp;") - 1;
+                               if (double_encode) {
+encode_amp:
+                                       memcpy(replaced + len, "&amp;", sizeof("&amp;") - 1);
+                                       len += sizeof("&amp;") - 1;
+                               } else {
+                                       char *e = memchr(old + i, ';', len - i);
+                                       char *s = old + i + 1;
+
+                                       if (!e || (e - s) > 10) { /* minor optimization to avoid "entities" over 10 chars in length */
+                                               goto encode_amp;
+                                       } else {
+                                               if (*s == '#') { /* numeric entities */
+                                                       s++;
+                                                       while (s < e) {
+                                                               if (!isdigit(*s++)) {
+                                                                       goto encode_amp;
+                                                               }
+                                                       }
+                                               } else { /* text entities */
+                                                       while (s < e) {
+                                                               if (!isalnum(*s++)) {
+                                                                       goto encode_amp;
+                                                               }
+                                                       }
+                                               }
+                                               replaced[len++] = '&';
+                                       }
+                               }
                                is_basic = 1;
                        } else {
                                for (j = 0; basic_entities[j].charcode != 0; j++) {
@@ -1193,12 +1222,13 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
        int len;
        long quote_style = ENT_COMPAT;
        char *replaced;
+       zend_bool double_encode = 1;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls", &str, &str_len, &quote_style, &hint_charset, &hint_charset_len) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|lsb", &str, &str_len, &quote_style, &hint_charset, &hint_charset_len, &double_encode) == FAILURE) {
                return;
        }
 
-       replaced = php_escape_html_entities(str, str_len, &len, all, quote_style, hint_charset TSRMLS_CC);
+       replaced = php_escape_html_entities_ex(str, str_len, &len, all, quote_style, hint_charset, double_encode TSRMLS_CC);
        RETVAL_STRINGL(replaced, len, 0);
 }
 /* }}} */
index 3e896e18b5c8bc92d9ace5fe05b5e642a792c300..fec44bb0e4500dd193d4953bfc2c87bf10bee737 100644 (file)
@@ -38,6 +38,7 @@ PHP_FUNCTION(html_entity_decode);
 PHP_FUNCTION(get_html_translation_table);
 
 PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC);
+PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC);
 PHPAPI char *php_unescape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC);
 
 #endif /* HTML_H */
diff --git a/ext/standard/tests/strings/htmlentities18.phpt b/ext/standard/tests/strings/htmlentities18.phpt
new file mode 100644 (file)
index 0000000..f171ada
--- /dev/null
@@ -0,0 +1,31 @@
+--TEST--
+htmlentities() / htmlspecialchars() "don't double encode" flag support
+--FILE--
+<?php
+$tests = array(
+       "abc",
+       "abc&amp;sfdsa",
+       "test&#043;s &amp; some more &#68;",
+       "&; &amp &#a; &9;",
+       "&kffjadfdhsjfhjasdhffasdfas;",
+       "&#8787978789",
+);
+
+foreach ($tests as $test) {
+       var_dump(htmlentities($test, ENT_QUOTES, NULL, FALSE));
+       var_dump(htmlspecialchars($test, ENT_QUOTES, NULL, FALSE));
+}
+?>
+--EXPECT--
+string(3) "abc"
+string(3) "abc"
+string(13) "abc&amp;sfdsa"
+string(13) "abc&amp;sfdsa"
+string(33) "test&#043;s &amp; some more &#68;"
+string(33) "test&#043;s &amp; some more &#68;"
+string(20) "&; &amp;amp &#a; &9;"
+string(20) "&; &amp;amp &#a; &9;"
+string(32) "&amp;kffjadfdhsjfhjasdhffasdfas;"
+string(32) "&amp;kffjadfdhsjfhjasdhffasdfas;"
+string(16) "&amp;#8787978789"
+string(16) "&amp;#8787978789"