From: Arnaud Le Blanc Date: Wed, 26 Nov 2008 03:00:06 +0000 (+0000) Subject: MFH: Added ENT_IGNORE as a compatibility flag for htmlentities() and X-Git-Tag: php-5.3.0alpha2~72 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=18794addbdd5f0463e9ac27396115716243ef930;p=php MFH: Added ENT_IGNORE as a compatibility flag for htmlentities() and htmlspecialchars() to skip multibyte sequences intead of returning an empty string (as iconv's //IGNORE). These functions will still never return an invalid or incomplete multibyte sequence. Fixes #43896 --- diff --git a/ext/standard/html.c b/ext/standard/html.c index 7a2d9cd8c7..14b4fab941 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -485,6 +485,7 @@ struct basic_entities_dec { #define CHECK_LEN(pos, chars_need) \ if((str_len - (pos)) < chars_need) { \ + *newpos = pos; \ *status = FAILURE; \ return 0; \ } @@ -529,6 +530,7 @@ inline static unsigned short get_next_char(enum entity_charset charset, more = 0; if(stat) { /* we didn't finish the UTF sequence correctly */ + --pos; *status = FAILURE; } break; @@ -1135,6 +1137,9 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *ne if(status == FAILURE) { /* invalid MB sequence */ + if (quote_style & ENT_HTML_IGNORE_ERRORS) { + continue; + } efree(replaced); if(!PG(display_errors)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument"); @@ -1293,6 +1298,7 @@ void register_html_constants(INIT_FUNC_ARGS) REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT, CONST_PERSISTENT|CONST_CS); REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES, CONST_PERSISTENT|CONST_CS); REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES, CONST_PERSISTENT|CONST_CS); + REGISTER_LONG_CONSTANT("ENT_IGNORE", ENT_IGNORE, CONST_PERSISTENT|CONST_CS); } /* }}} */ diff --git a/ext/standard/html.h b/ext/standard/html.h index 76a27418aa..003496ca2e 100644 --- a/ext/standard/html.h +++ b/ext/standard/html.h @@ -24,10 +24,12 @@ #define ENT_HTML_QUOTE_NONE 0 #define ENT_HTML_QUOTE_SINGLE 1 #define ENT_HTML_QUOTE_DOUBLE 2 +#define ENT_HTML_IGNORE_ERRORS 4 #define ENT_COMPAT ENT_HTML_QUOTE_DOUBLE #define ENT_QUOTES (ENT_HTML_QUOTE_DOUBLE | ENT_HTML_QUOTE_SINGLE) #define ENT_NOQUOTES ENT_HTML_QUOTE_NONE +#define ENT_IGNORE ENT_HTML_IGNORE_ERRORS void register_html_constants(INIT_FUNC_ARGS); diff --git a/ext/standard/tests/strings/htmlentities-utf-2.phpt b/ext/standard/tests/strings/htmlentities-utf-2.phpt new file mode 100755 index 0000000000..a80100cb10 --- /dev/null +++ b/ext/standard/tests/strings/htmlentities-utf-2.phpt @@ -0,0 +1,70 @@ +--TEST-- +HTML entities with invalid chars and ENT_IGNORE +--INI-- +output_handler= +--FILE-- + +--EXPECTF-- +%unicode|string%(8) "266c743b" +%unicode|string%(8) "266c743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(8) "d090d0b0" +%unicode|string%(8) "d090d0b0" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(2) "41" +%unicode|string%(2) "41" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(2) "79" +%unicode|string%(2) "79" +%unicode|string%(8) "2667743b" +%unicode|string%(8) "2667743b" +%unicode|string%(8) "566f696c" +%unicode|string%(8) "566f696c" +%unicode|string%(12) "436c69636873" +%unicode|string%(12) "436c69636873" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(2) "41" +%unicode|string%(2) "41" +%unicode|string%(4) "c3a9" +%unicode|string%(16) "266561637574653b" +%unicode|string%(2) "79" +%unicode|string%(2) "79" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(4) "4142" +%unicode|string%(4) "4142" +%unicode|string%(4) "4242" +%unicode|string%(4) "4242" +%unicode|string%(4) "4342" +%unicode|string%(4) "4342" +%unicode|string%(2) "44" +%unicode|string%(2) "44" +%unicode|string%(2) "45" +%unicode|string%(2) "45" +%unicode|string%(2) "46" +%unicode|string%(2) "46" diff --git a/ext/standard/tests/strings/htmlentities-utf.phpt b/ext/standard/tests/strings/htmlentities-utf.phpt index 6b83afc778..b85803a163 100755 --- a/ext/standard/tests/strings/htmlentities-utf.phpt +++ b/ext/standard/tests/strings/htmlentities-utf.phpt @@ -4,8 +4,12 @@ HTML entities with invalid chars output_handler= --FILE-- ---EXPECT-- -string(8) "266c743b" -string(8) "266c743b" -string(0) "" -string(0) "" -string(4) "d090" -string(4) "d090" -string(0) "" -string(0) "" -string(8) "d090d0b0" -string(8) "d090d0b0" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(8) "2667743b" -string(8) "2667743b" \ No newline at end of file +--EXPECTF-- +%unicode|string%(8) "266c743b" +%unicode|string%(8) "266c743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "d090d0b0" +%unicode|string%(8) "d090d0b0" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "2667743b" +%unicode|string%(8) "2667743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "c3a9" +%unicode|string%(16) "266561637574653b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) ""