]> granicus.if.org Git - php/commitdiff
- Fix bug #49528 (UTF-16 strings prefixed by BOM wrongly converted).
authorMoriyoshi Koizumi <moriyoshi@php.net>
Fri, 11 Sep 2009 08:22:19 +0000 (08:22 +0000)
committerMoriyoshi Koizumi <moriyoshi@php.net>
Fri, 11 Sep 2009 08:22:19 +0000 (08:22 +0000)
ext/mbstring/libmbfl/filters/mbfilter_utf16.c
ext/mbstring/tests/bug49528.phpt [new file with mode: 0644]

index b6c2aeda5b3e16911883d2cb227cd2075e3fb5aa..5df6551d4dd027eb713644390c9ca74ec5caa234 100644 (file)
@@ -127,7 +127,7 @@ int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
        int n, endian;
 
        endian = filter->status & 0xff00;
-       switch (filter->status & 0xff) {
+       switch (filter->status & 0x0f) {
        case 0:
                if (endian) {
                        n = c & 0xff;
@@ -144,15 +144,8 @@ int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
                        n = c & 0xff;
                }
                n |= filter->cache & 0xffff;
-               filter->status &= ~0xff;
-               if (n == 0xfffe) {
-                       if (endian) {
-                               filter->status = 0;             /* big-endian */
-                       } else {
-                               filter->status = 0x100;         /* little-endian */
-                       }
-                       CK((*filter->output_function)(0xfeff, filter->data));
-               } else if (n >= 0xd800 && n < 0xdc00) {
+               filter->status &= ~0x0f;
+               if (n >= 0xd800 && n < 0xdc00) {
                        filter->cache = ((n & 0x3ff) << 16) + 0x400000;
                } else if (n >= 0xdc00 && n < 0xe000) {
                        n &= 0x3ff;
@@ -166,7 +159,21 @@ int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
                                CK((*filter->output_function)(n, filter->data));
                        }
                } else {
+                       int is_first = filter->status & 0x10;
                        filter->cache = 0;
+                       filter->status |= 0x10;
+                       if (!is_first) {
+                               if (n == 0xfffe) {
+                                       if (endian) {
+                                               filter->status &= ~0x100;               /* big-endian */
+                                       } else {
+                                               filter->status |= 0x100;                /* little-endian */
+                                       }
+                                       break;
+                               } else if (n == 0xfeff) {
+                                       break;
+                               }
+                       }
                        CK((*filter->output_function)(n, filter->data));
                }
                break;
diff --git a/ext/mbstring/tests/bug49528.phpt b/ext/mbstring/tests/bug49528.phpt
new file mode 100644 (file)
index 0000000..b06c35f
--- /dev/null
@@ -0,0 +1,20 @@
+--TEST--
+Bug #49528 (UTF-16 strings prefixed by BOM wrongly converted)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+?>
+--EXPECT--
+string(8) "02010403"
+string(8) "01020304"
+string(12) "feff02010403"
+string(12) "fffe02010403"
+string(12) "fffe01020304"
+string(12) "feff01020304"