]> granicus.if.org Git - php/commitdiff
[DOC] Added ENT_IGNORE as a compatibility flag for htmlentities() and
authorArnaud Le Blanc <lbarnaud@php.net>
Wed, 26 Nov 2008 02:57:32 +0000 (02:57 +0000)
committerArnaud Le Blanc <lbarnaud@php.net>
Wed, 26 Nov 2008 02:57:32 +0000 (02:57 +0000)
htmlspecialchars() to skip multibyte sequences intead of returning an
empty string (as iconv's //IGNORE). These functions will still never
return an invalid or incomplete multibyte sequence.
Example: htmlspecialchars("...", ENT_QUOTES | ENT_COMPAT, "utf-8");

ext/standard/html.c
ext/standard/html.h
ext/standard/tests/strings/htmlentities-utf-2.phpt [new file with mode: 0755]
ext/standard/tests/strings/htmlentities-utf.phpt

index 8ea7d9af2ca423f5b44e0dfaac9b8b55d27a2b17..7245c0db4f5326e9fe02b7eef4ae67cf06619d3e 100644 (file)
@@ -491,6 +491,7 @@ struct basic_entities_dec {
 
 #define CHECK_LEN(pos, chars_need)                     \
        if((str_len - (pos)) < chars_need) {    \
+               *newpos = pos;                                          \
                *status = FAILURE;                                      \
                return 0;                                                       \
        }
@@ -535,6 +536,7 @@ inline static unsigned short get_next_char(enum entity_charset charset,
                                                more = 0;
                                                if(stat) {
                                                        /* we didn't finish the UTF sequence correctly */
+                                                       --pos;
                                                        *status = FAILURE;
                                                }
                                                break;
@@ -1138,6 +1140,9 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *ne
 
                if(status == FAILURE) {
                        /* invalid MB sequence */
+                       if (quote_style & ENT_HTML_IGNORE_ERRORS) {
+                               continue;
+                       }
                        efree(replaced);
                        if(!PG(display_errors)) {
                                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument");
@@ -1319,6 +1324,7 @@ void register_html_constants(INIT_FUNC_ARGS)
        REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT, CONST_PERSISTENT|CONST_CS);
        REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES, CONST_PERSISTENT|CONST_CS);
        REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES, CONST_PERSISTENT|CONST_CS);
+       REGISTER_LONG_CONSTANT("ENT_IGNORE", ENT_IGNORE, CONST_PERSISTENT|CONST_CS);
 }
 /* }}} */
 
index 9accb7fae05afbb62e2cb7eaa5eccba652d6e7df..597d59c771072b4bc285cd329c02bc824d6b52f3 100644 (file)
 #define ENT_HTML_QUOTE_NONE            0
 #define ENT_HTML_QUOTE_SINGLE  1
 #define ENT_HTML_QUOTE_DOUBLE  2
+#define ENT_HTML_IGNORE_ERRORS 4
 
 #define ENT_COMPAT    ENT_HTML_QUOTE_DOUBLE
 #define ENT_QUOTES    (ENT_HTML_QUOTE_DOUBLE | ENT_HTML_QUOTE_SINGLE)
 #define ENT_NOQUOTES  ENT_HTML_QUOTE_NONE
+#define ENT_IGNORE    ENT_HTML_IGNORE_ERRORS
 
 void register_html_constants(INIT_FUNC_ARGS);
 
diff --git a/ext/standard/tests/strings/htmlentities-utf-2.phpt b/ext/standard/tests/strings/htmlentities-utf-2.phpt
new file mode 100755 (executable)
index 0000000..a80100c
--- /dev/null
@@ -0,0 +1,70 @@
+--TEST--
+HTML entities with invalid chars and ENT_IGNORE
+--INI--
+output_handler=
+--FILE--
+<?php 
+@setlocale (LC_CTYPE, "C");
+$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0", b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79", b"\xE0\x80\xBE", 
+       b"Voil\xE0", b"Clich\xE9s",
+       b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF", b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF",
+       b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42", b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD"
+       );
+foreach($strings as $string) {
+       $sc_encoded = htmlspecialchars ($string, ENT_QUOTES | ENT_IGNORE, "utf-8");
+       var_dump(bin2hex($sc_encoded));
+       $ent_encoded = htmlentities ($string, ENT_QUOTES | ENT_IGNORE, "utf-8");
+       var_dump(bin2hex($ent_encoded));
+}
+?>
+--EXPECTF--
+%unicode|string%(8) "266c743b"
+%unicode|string%(8) "266c743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(4) "d090"
+%unicode|string%(4) "d090"
+%unicode|string%(4) "d090"
+%unicode|string%(4) "d090"
+%unicode|string%(8) "d090d0b0"
+%unicode|string%(8) "d090d0b0"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(2) "41"
+%unicode|string%(2) "41"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(2) "79"
+%unicode|string%(2) "79"
+%unicode|string%(8) "2667743b"
+%unicode|string%(8) "2667743b"
+%unicode|string%(8) "566f696c"
+%unicode|string%(8) "566f696c"
+%unicode|string%(12) "436c69636873"
+%unicode|string%(12) "436c69636873"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(2) "41"
+%unicode|string%(2) "41"
+%unicode|string%(4) "c3a9"
+%unicode|string%(16) "266561637574653b"
+%unicode|string%(2) "79"
+%unicode|string%(2) "79"
+%unicode|string%(8) "f7bfbfbf"
+%unicode|string%(8) "f7bfbfbf"
+%unicode|string%(10) "fbbfbfbfbf"
+%unicode|string%(10) "fbbfbfbfbf"
+%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(4) "4142"
+%unicode|string%(4) "4142"
+%unicode|string%(4) "4242"
+%unicode|string%(4) "4242"
+%unicode|string%(4) "4342"
+%unicode|string%(4) "4342"
+%unicode|string%(2) "44"
+%unicode|string%(2) "44"
+%unicode|string%(2) "45"
+%unicode|string%(2) "45"
+%unicode|string%(2) "46"
+%unicode|string%(2) "46"
index 9aad7f225f36e7c864c8db5277d8ee5d4cebc428..b85803a16324b67c1096f23dca175c57876151f3 100755 (executable)
@@ -4,8 +4,12 @@ HTML entities with invalid chars
 output_handler=
 --FILE--
 <?php 
-setlocale (LC_CTYPE, "C");
-$strings = array("<", "\xD0", "\xD0\x90", "\xD0\x90\xD0", "\xD0\x90\xD0\xB0", "\xE0", "A\xE0", "\xE0\x80", "\xE0\x80\xBE");
+@setlocale (LC_CTYPE, "C");
+$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0", b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79", b"\xE0\x80\xBE",
+       b"Voil\xE0", b"Clich\xE9s",
+       b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF", b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF",
+       b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42", b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD"
+       );
 foreach($strings as $string) {
        $sc_encoded = htmlspecialchars ($string, ENT_QUOTES, "utf-8");
        var_dump(bin2hex($sc_encoded));
@@ -13,22 +17,54 @@ foreach($strings as $string) {
        var_dump(bin2hex($ent_encoded));
 }
 ?>
---EXPECT--
-unicode(8) "266c743b"
-unicode(8) "266c743b"
-unicode(0) ""
-unicode(0) ""
-unicode(4) "d090"
-unicode(4) "d090"
-unicode(0) ""
-unicode(0) ""
-unicode(8) "d090d0b0"
-unicode(8) "d090d0b0"
-unicode(0) ""
-unicode(0) ""
-unicode(0) ""
-unicode(0) ""
-unicode(0) ""
-unicode(0) ""
-unicode(8) "2667743b"
-unicode(8) "2667743b"
+--EXPECTF--
+%unicode|string%(8) "266c743b"
+%unicode|string%(8) "266c743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(4) "d090"
+%unicode|string%(4) "d090"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(8) "d090d0b0"
+%unicode|string%(8) "d090d0b0"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(8) "2667743b"
+%unicode|string%(8) "2667743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(4) "c3a9"
+%unicode|string%(16) "266561637574653b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(8) "f7bfbfbf"
+%unicode|string%(8) "f7bfbfbf"
+%unicode|string%(10) "fbbfbfbfbf"
+%unicode|string%(10) "fbbfbfbfbf"
+%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""