From bfcb754eae94c5dbb3940253c773e1fba67ba04f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Gustavo=20Andr=C3=A9=20dos=20Santos=20Lopes?= Date: Thu, 14 Oct 2010 19:14:06 +0000 Subject: [PATCH] - Fixed get_next_char(), used by htmlentities/htmlspecialchars, accepting certain ill-formed UTF-8 sequences. --- ext/standard/html.c | 6 +- .../tests/strings/htmlentities-utf-2.phpt | 4 +- .../tests/strings/htmlentities-utf-3.phpt | 83 +++++++++++++++++++ .../tests/strings/htmlentities-utf.phpt | 4 +- 4 files changed, 90 insertions(+), 7 deletions(-) create mode 100644 ext/standard/tests/strings/htmlentities-utf-3.phpt diff --git a/ext/standard/html.c b/ext/standard/html.c index d32246d513..5d683e237b 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -129,7 +129,7 @@ inline static unsigned int get_next_char(enum entity_charset charset, MB_WRITE(c); this_char = c; pos++; - } else if (c < 0xc0) { + } else if (c < 0xc2) { MB_FAILURE(pos); } else if (c < 0xe0) { CHECK_LEN(pos, 2); @@ -161,7 +161,7 @@ inline static unsigned int get_next_char(enum entity_charset charset, MB_WRITE((unsigned char)str[pos + 1]); MB_WRITE((unsigned char)str[pos + 2]); pos += 3; - } else if (c < 0xf8) { + } else if (c < 0xf5) { CHECK_LEN(pos, 4); if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { MB_FAILURE(pos); @@ -173,7 +173,7 @@ inline static unsigned int get_next_char(enum entity_charset charset, MB_FAILURE(pos); } this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); - if (this_char < 0x10000) { + if (this_char < 0x10000 || this_char > 0x10FFFF) { MB_FAILURE(pos); } MB_WRITE((unsigned char)c); diff --git a/ext/standard/tests/strings/htmlentities-utf-2.phpt b/ext/standard/tests/strings/htmlentities-utf-2.phpt index c5f4ac4ea6..d515dc0ff1 100755 --- a/ext/standard/tests/strings/htmlentities-utf-2.phpt +++ b/ext/standard/tests/strings/htmlentities-utf-2.phpt @@ -50,8 +50,8 @@ foreach($strings as $string) { %unicode|string%(16) "266561637574653b" %unicode|string%(2) "79" %unicode|string%(2) "79" -%unicode|string%(8) "f7bfbfbf" -%unicode|string%(8) "f7bfbfbf" +%unicode|string%(0) "" +%unicode|string%(0) "" %unicode|string%(0) "" %unicode|string%(0) "" %unicode|string%(0) "" diff --git a/ext/standard/tests/strings/htmlentities-utf-3.phpt b/ext/standard/tests/strings/htmlentities-utf-3.phpt new file mode 100644 index 0000000000..c28917ba16 --- /dev/null +++ b/ext/standard/tests/strings/htmlentities-utf-3.phpt @@ -0,0 +1,83 @@ +--TEST-- +Test get_next_char(), used by htmlentities()/htmlspecialchars(): validity of UTF-8 sequences +--FILE-- += $l[0][0] && $b <= $l[0][1]) { + if (count($l) != strlen($seq)) { + return false; + } + for ($n = 1; $n < strlen($seq); $n++) { + if (ord($seq[$n]) < $l[$n][0] || ord($seq[$n]) > $l[$n][1]) { + return false; + } + } + return true; + } + } + return false; +} + +function concordance($s) { + $vhe = strlen(htmlspecialchars($s, ENT_QUOTES, "UTF-8")) > 0; + $v = is_valid($s); + return ($vhe === $v); +} + +for ($b1 = 0xC0; $b1 < 0xE0; $b1++) { + for ($b2 = 0x80; $b2 < 0xBF; $b2++) { + $s = chr($b1).chr($b2); + if (!concordance($s)) + echo "Discordance for ".bin2hex($s),"\n"; + } +} + + +for ($b1 = 0xE0; $b1 < 0xEF; $b1++) { + for ($b2 = 0x80; $b2 < 0xBF; $b2++) { + $s = chr($b1).chr($b2)."\x80"; + if (!concordance($s)) + echo "Discordance for ".bin2hex($s),"\n"; + $s = chr($b1).chr($b2)."\xBF"; + if (!concordance($s)) + echo "Discordance for ".bin2hex($s),"\n"; + } +} + +for ($b1 = 0xF0; $b1 < 0xFF; $b1++) { + for ($b2 = 0x80; $b2 < 0xBF; $b2++) { + $s = chr($b1).chr($b2)."\x80\x80"; + if (!concordance($s)) + echo "Discordance for ".bin2hex($s),"\n"; + $s = chr($b1).chr($b2)."\xBF\x80"; + if (!concordance($s)) + echo "Discordance for ".bin2hex($s),"\n"; + $s = chr($b1).chr($b2)."\x80\xBF"; + if (!concordance($s)) + echo "Discordance for ".bin2hex($s),"\n"; + $s = chr($b1).chr($b2)."\xBF\xBF"; + if (!concordance($s)) + echo "Discordance for ".bin2hex($s),"\n"; + } +} +echo "Done.\n"; +--EXPECT-- +Done. diff --git a/ext/standard/tests/strings/htmlentities-utf.phpt b/ext/standard/tests/strings/htmlentities-utf.phpt index 1daafc61d8..6a66e4df45 100755 --- a/ext/standard/tests/strings/htmlentities-utf.phpt +++ b/ext/standard/tests/strings/htmlentities-utf.phpt @@ -50,8 +50,8 @@ foreach($strings as $string) { %unicode|string%(16) "266561637574653b" %unicode|string%(0) "" %unicode|string%(0) "" -%unicode|string%(8) "f7bfbfbf" -%unicode|string%(8) "f7bfbfbf" +%unicode|string%(0) "" +%unicode|string%(0) "" %unicode|string%(0) "" %unicode|string%(0) "" %unicode|string%(0) "" -- 2.40.0