From: Rasmus Lerdorf Date: Tue, 29 Jan 2008 23:21:11 +0000 (+0000) Subject: Fixed bug #43957 (utf8_decode() bogus conversion of partial multibyte UTF8 char) X-Git-Tag: RELEASE_1_3_1~229 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9ae934d5c4f96e3d151455e6dfb6450912d0d2fb;p=php Fixed bug #43957 (utf8_decode() bogus conversion of partial multibyte UTF8 char) --- diff --git a/ext/xml/tests/bug43957.phpt b/ext/xml/tests/bug43957.phpt new file mode 100644 index 0000000000..34ddcd951d --- /dev/null +++ b/ext/xml/tests/bug43957.phpt @@ -0,0 +1,13 @@ +--TEST-- +Bug #43957 - utf8_decode() bogus conversion on multibyte indicator near end of string +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +abc? diff --git a/ext/xml/xml.c b/ext/xml/xml.c index 863a95013d..f605778ce9 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -579,15 +579,27 @@ PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_ while (pos > 0) { c = (unsigned char)(*s); if (c >= 0xf0) { /* four bytes encoded, 21 bits */ - c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63); + if(pos-4 >= 0) { + c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63); + } else { + c = '?'; + } s += 4; pos -= 4; } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */ - c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63); + if(pos-3 >= 0) { + c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63); + } else { + c = '?'; + } s += 3; pos -= 3; } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */ - c = ((s[0]&63)<<6) | (s[1]&63); + if(pos-3 >= 0) { + c = ((s[0]&63)<<6) | (s[1]&63); + } else { + c = '?'; + } s += 2; pos -= 2; } else {