]> granicus.if.org Git - php/commitdiff
* Merge two patches from 5.3 branch (intentionally uncommited because 5.2.11
authorMoriyoshi Koizumi <moriyoshi@php.net>
Wed, 23 Sep 2009 14:26:51 +0000 (14:26 +0000)
committerMoriyoshi Koizumi <moriyoshi@php.net>
Wed, 23 Sep 2009 14:26:51 +0000 (14:26 +0000)
  was about to come at that time).

NEWS
ext/mbstring/libmbfl/filters/mbfilter_utf16.c
ext/mbstring/libmbfl/filters/mbfilter_utf8.c
ext/mbstring/libmbfl/mbfl/mbfilter.c
ext/mbstring/tests/bug49528.phpt [new file with mode: 0644]
ext/mbstring/tests/bug49536.phpt [new file with mode: 0644]

diff --git a/NEWS b/NEWS
index 174a7c576f85cd0c8d0846b5bdb2ed9f7f71a2a8..7f1a7b3515482f1904db234d4ed7d4d68693e73d 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -3,8 +3,12 @@ PHP                                                                        NEWS
 ?? ??? ????, PHP 5.2.12
 - Fixed bug #49630 (imap_listscan function missing). (Felipe)
 - Fixed bug #49578 (make install-pear fails). (Hannes)
+- Fixed bug #49536 (mb_detect_encoding() returns incorrect results when
+  mbstring.strict_mode is turned on). (Moriyoshi)
 - Fixed bug #49531 (CURLOPT_INFILESIZE sometimes causes warning "CURLPROTO_FILE
   cannot be set"). (Felipe)
+- Fixed bug #49528 (UTF-16 strings prefixed by BOMs wrondly converted).
+  (Moriyoshi)
 
 17 Sep 2009, PHP 5.2.11
 - Fixed certificate validation inside php_openssl_apply_verification_policy.
index b6c2aeda5b3e16911883d2cb227cd2075e3fb5aa..5df6551d4dd027eb713644390c9ca74ec5caa234 100644 (file)
@@ -127,7 +127,7 @@ int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
        int n, endian;
 
        endian = filter->status & 0xff00;
-       switch (filter->status & 0xff) {
+       switch (filter->status & 0x0f) {
        case 0:
                if (endian) {
                        n = c & 0xff;
@@ -144,15 +144,8 @@ int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
                        n = c & 0xff;
                }
                n |= filter->cache & 0xffff;
-               filter->status &= ~0xff;
-               if (n == 0xfffe) {
-                       if (endian) {
-                               filter->status = 0;             /* big-endian */
-                       } else {
-                               filter->status = 0x100;         /* little-endian */
-                       }
-                       CK((*filter->output_function)(0xfeff, filter->data));
-               } else if (n >= 0xd800 && n < 0xdc00) {
+               filter->status &= ~0x0f;
+               if (n >= 0xd800 && n < 0xdc00) {
                        filter->cache = ((n & 0x3ff) << 16) + 0x400000;
                } else if (n >= 0xdc00 && n < 0xe000) {
                        n &= 0x3ff;
@@ -166,7 +159,21 @@ int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
                                CK((*filter->output_function)(n, filter->data));
                        }
                } else {
+                       int is_first = filter->status & 0x10;
                        filter->cache = 0;
+                       filter->status |= 0x10;
+                       if (!is_first) {
+                               if (n == 0xfffe) {
+                                       if (endian) {
+                                               filter->status &= ~0x100;               /* big-endian */
+                                       } else {
+                                               filter->status |= 0x100;                /* little-endian */
+                                       }
+                                       break;
+                               } else if (n == 0xfeff) {
+                                       break;
+                               }
+                       }
                        CK((*filter->output_function)(n, filter->data));
                }
                break;
index 8b95897eac7519ae0def4f7de894aa88308feea5..55027dc2685bcf24c197d3cddde9274c5dad6579 100644 (file)
@@ -215,7 +215,7 @@ static int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter)
        if (c < 0x80) {
                if (c < 0) { 
                        filter->flag = 1;       /* bad */
-               } else if (c != 0 && filter->status) {
+               } else if (filter->status) {
                        filter->flag = 1;       /* bad */
                }
                filter->status = 0;
index 1aeb38cc9b7ddcec3d3961a762ec1819e1d26dc0..4997c5a8866dcfa02ce4502ad3a12a789aa8f48e 100644 (file)
@@ -622,7 +622,7 @@ mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int el
        if (!encoding) {
                for (i = 0; i < num; i++) {
                        filter = &flist[i];
-                       if (!filter->flag) {
+                       if (!filter->flag && (!strict || !filter->status)) {
                                encoding = filter->encoding;
                                break;
                        }
diff --git a/ext/mbstring/tests/bug49528.phpt b/ext/mbstring/tests/bug49528.phpt
new file mode 100644 (file)
index 0000000..b06c35f
--- /dev/null
@@ -0,0 +1,20 @@
+--TEST--
+Bug #49528 (UTF-16 strings prefixed by BOM wrongly converted)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+?>
+--EXPECT--
+string(8) "02010403"
+string(8) "01020304"
+string(12) "feff02010403"
+string(12) "fffe02010403"
+string(12) "fffe01020304"
+string(12) "feff01020304"
diff --git a/ext/mbstring/tests/bug49536.phpt b/ext/mbstring/tests/bug49536.phpt
new file mode 100644 (file)
index 0000000..32685d2
--- /dev/null
@@ -0,0 +1,20 @@
+--TEST--
+Bug #49536 (mb_detect_encoding() returns incorrect results when strict_mode is turned on)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+// non-strict mode
+var_dump(mb_detect_encoding("A\x81", "SJIS", false));
+// strict mode
+var_dump(mb_detect_encoding("A\x81", "SJIS", true));
+// non-strict mode
+var_dump(mb_detect_encoding("\xc0\x00", "UTF-8", false));
+// strict mode
+var_dump(mb_detect_encoding("\xc0\x00", "UTF-8", true));
+?>
+--EXPECT--
+string(4) "SJIS"
+bool(false)
+string(5) "UTF-8"
+bool(false)