]> granicus.if.org Git - php/commitdiff
Fixed bug #43957 (utf8_decode() bogus conversion of partial multibyte UTF8 char)
authorRasmus Lerdorf <rasmus@php.net>
Tue, 29 Jan 2008 23:21:20 +0000 (23:21 +0000)
committerRasmus Lerdorf <rasmus@php.net>
Tue, 29 Jan 2008 23:21:20 +0000 (23:21 +0000)
ext/xml/tests/bug43957.phpt [new file with mode: 0644]
ext/xml/xml.c

diff --git a/ext/xml/tests/bug43957.phpt b/ext/xml/tests/bug43957.phpt
new file mode 100644 (file)
index 0000000..34ddcd9
--- /dev/null
@@ -0,0 +1,13 @@
+--TEST--
+Bug #43957 - utf8_decode() bogus conversion on multibyte indicator near end of string
+--SKIPIF--
+<?php
+require_once("skipif.inc");
+if (!extension_loaded('xml')) die ("skip xml extension not available");
+?>
+--FILE--
+<?php
+  echo utf8_decode('abc'.chr(0xe0));
+?>
+--EXPECTF--
+abc?
index b9de7a9bd9d17ffd79918c1d0189df709369e7ee..14bf2ff9e856f94027c5af4f1dd225c8ebba9a52 100644 (file)
@@ -579,15 +579,27 @@ PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_
        while (pos > 0) {
                c = (unsigned char)(*s);
                if (c >= 0xf0) { /* four bytes encoded, 21 bits */
-                       c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
+                       if(pos-4 >= 0) {
+                               c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
+                       } else {
+                               c = '?';        
+                       }
                        s += 4;
                        pos -= 4;
                } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */
-                       c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
+                       if(pos-3 >= 0) {
+                               c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
+                       } else {
+                               c = '?';
+                       }
                        s += 3;
                        pos -= 3;
                } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */
-                       c = ((s[0]&63)<<6) | (s[1]&63);
+                       if(pos-3 >= 0) {
+                               c = ((s[0]&63)<<6) | (s[1]&63);
+                       } else {
+                               c = '?';
+                       }
                        s += 2;
                        pos -= 2;
                } else {