From: Arnaud Le Blanc Date: Wed, 26 Nov 2008 02:57:32 +0000 (+0000) Subject: [DOC] Added ENT_IGNORE as a compatibility flag for htmlentities() and X-Git-Tag: BEFORE_HEAD_NS_CHANGES_MERGE~81 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=eef1ad9ddff382f3035c1e82fad42a1c684a03bd;p=php [DOC] Added ENT_IGNORE as a compatibility flag for htmlentities() and htmlspecialchars() to skip multibyte sequences intead of returning an empty string (as iconv's //IGNORE). These functions will still never return an invalid or incomplete multibyte sequence. Example: htmlspecialchars("...", ENT_QUOTES | ENT_COMPAT, "utf-8"); --- diff --git a/ext/standard/html.c b/ext/standard/html.c index 8ea7d9af2c..7245c0db4f 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -491,6 +491,7 @@ struct basic_entities_dec { #define CHECK_LEN(pos, chars_need) \ if((str_len - (pos)) < chars_need) { \ + *newpos = pos; \ *status = FAILURE; \ return 0; \ } @@ -535,6 +536,7 @@ inline static unsigned short get_next_char(enum entity_charset charset, more = 0; if(stat) { /* we didn't finish the UTF sequence correctly */ + --pos; *status = FAILURE; } break; @@ -1138,6 +1140,9 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *ne if(status == FAILURE) { /* invalid MB sequence */ + if (quote_style & ENT_HTML_IGNORE_ERRORS) { + continue; + } efree(replaced); if(!PG(display_errors)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument"); @@ -1319,6 +1324,7 @@ void register_html_constants(INIT_FUNC_ARGS) REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT, CONST_PERSISTENT|CONST_CS); REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES, CONST_PERSISTENT|CONST_CS); REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES, CONST_PERSISTENT|CONST_CS); + REGISTER_LONG_CONSTANT("ENT_IGNORE", ENT_IGNORE, CONST_PERSISTENT|CONST_CS); } /* }}} */ diff --git a/ext/standard/html.h b/ext/standard/html.h index 9accb7fae0..597d59c771 100644 --- a/ext/standard/html.h +++ b/ext/standard/html.h @@ -24,10 +24,12 @@ #define ENT_HTML_QUOTE_NONE 0 #define ENT_HTML_QUOTE_SINGLE 1 #define ENT_HTML_QUOTE_DOUBLE 2 +#define ENT_HTML_IGNORE_ERRORS 4 #define ENT_COMPAT ENT_HTML_QUOTE_DOUBLE #define ENT_QUOTES (ENT_HTML_QUOTE_DOUBLE | ENT_HTML_QUOTE_SINGLE) #define ENT_NOQUOTES ENT_HTML_QUOTE_NONE +#define ENT_IGNORE ENT_HTML_IGNORE_ERRORS void register_html_constants(INIT_FUNC_ARGS); diff --git a/ext/standard/tests/strings/htmlentities-utf-2.phpt b/ext/standard/tests/strings/htmlentities-utf-2.phpt new file mode 100755 index 0000000000..a80100cb10 --- /dev/null +++ b/ext/standard/tests/strings/htmlentities-utf-2.phpt @@ -0,0 +1,70 @@ +--TEST-- +HTML entities with invalid chars and ENT_IGNORE +--INI-- +output_handler= +--FILE-- + +--EXPECTF-- +%unicode|string%(8) "266c743b" +%unicode|string%(8) "266c743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(8) "d090d0b0" +%unicode|string%(8) "d090d0b0" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(2) "41" +%unicode|string%(2) "41" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(2) "79" +%unicode|string%(2) "79" +%unicode|string%(8) "2667743b" +%unicode|string%(8) "2667743b" +%unicode|string%(8) "566f696c" +%unicode|string%(8) "566f696c" +%unicode|string%(12) "436c69636873" +%unicode|string%(12) "436c69636873" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(2) "41" +%unicode|string%(2) "41" +%unicode|string%(4) "c3a9" +%unicode|string%(16) "266561637574653b" +%unicode|string%(2) "79" +%unicode|string%(2) "79" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(4) "4142" +%unicode|string%(4) "4142" +%unicode|string%(4) "4242" +%unicode|string%(4) "4242" +%unicode|string%(4) "4342" +%unicode|string%(4) "4342" +%unicode|string%(2) "44" +%unicode|string%(2) "44" +%unicode|string%(2) "45" +%unicode|string%(2) "45" +%unicode|string%(2) "46" +%unicode|string%(2) "46" diff --git a/ext/standard/tests/strings/htmlentities-utf.phpt b/ext/standard/tests/strings/htmlentities-utf.phpt index 9aad7f225f..b85803a163 100755 --- a/ext/standard/tests/strings/htmlentities-utf.phpt +++ b/ext/standard/tests/strings/htmlentities-utf.phpt @@ -4,8 +4,12 @@ HTML entities with invalid chars output_handler= --FILE-- ---EXPECT-- -unicode(8) "266c743b" -unicode(8) "266c743b" -unicode(0) "" -unicode(0) "" -unicode(4) "d090" -unicode(4) "d090" -unicode(0) "" -unicode(0) "" -unicode(8) "d090d0b0" -unicode(8) "d090d0b0" -unicode(0) "" -unicode(0) "" -unicode(0) "" -unicode(0) "" -unicode(0) "" -unicode(0) "" -unicode(8) "2667743b" -unicode(8) "2667743b" +--EXPECTF-- +%unicode|string%(8) "266c743b" +%unicode|string%(8) "266c743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "d090d0b0" +%unicode|string%(8) "d090d0b0" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "2667743b" +%unicode|string%(8) "2667743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "c3a9" +%unicode|string%(16) "266561637574653b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) ""