From 7b624a37bc89019c33bbddb656f287ca82f8ddbc Mon Sep 17 00:00:00 2001 From: Rasmus Lerdorf Date: Tue, 29 Jan 2008 23:21:20 +0000 Subject: [PATCH] Fixed bug #43957 (utf8_decode() bogus conversion of partial multibyte UTF8 char) --- ext/xml/tests/bug43957.phpt | 13 +++++++++++++ ext/xml/xml.c | 18 +++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 ext/xml/tests/bug43957.phpt diff --git a/ext/xml/tests/bug43957.phpt b/ext/xml/tests/bug43957.phpt new file mode 100644 index 0000000000..34ddcd951d --- /dev/null +++ b/ext/xml/tests/bug43957.phpt @@ -0,0 +1,13 @@ +--TEST-- +Bug #43957 - utf8_decode() bogus conversion on multibyte indicator near end of string +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +abc? diff --git a/ext/xml/xml.c b/ext/xml/xml.c index b9de7a9bd9..14bf2ff9e8 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -579,15 +579,27 @@ PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_ while (pos > 0) { c = (unsigned char)(*s); if (c >= 0xf0) { /* four bytes encoded, 21 bits */ - c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63); + if(pos-4 >= 0) { + c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63); + } else { + c = '?'; + } s += 4; pos -= 4; } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */ - c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63); + if(pos-3 >= 0) { + c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63); + } else { + c = '?'; + } s += 3; pos -= 3; } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */ - c = ((s[0]&63)<<6) | (s[1]&63); + if(pos-3 >= 0) { + c = ((s[0]&63)<<6) | (s[1]&63); + } else { + c = '?'; + } s += 2; pos -= 2; } else { -- 2.40.0