int n, endian;
endian = filter->status & 0xff00;
- switch (filter->status & 0xff) {
+ switch (filter->status & 0x0f) {
case 0:
if (endian) {
n = c & 0xff;
n = c & 0xff;
}
n |= filter->cache & 0xffff;
- filter->status &= ~0xff;
- if (n == 0xfffe) {
- if (endian) {
- filter->status = 0; /* big-endian */
- } else {
- filter->status = 0x100; /* little-endian */
- }
- CK((*filter->output_function)(0xfeff, filter->data));
- } else if (n >= 0xd800 && n < 0xdc00) {
+ filter->status &= ~0x0f;
+ if (n >= 0xd800 && n < 0xdc00) {
filter->cache = ((n & 0x3ff) << 16) + 0x400000;
} else if (n >= 0xdc00 && n < 0xe000) {
n &= 0x3ff;
CK((*filter->output_function)(n, filter->data));
}
} else {
+ int is_first = filter->status & 0x10;
filter->cache = 0;
+ filter->status |= 0x10;
+ if (!is_first) {
+ if (n == 0xfffe) {
+ if (endian) {
+ filter->status &= ~0x100; /* big-endian */
+ } else {
+ filter->status |= 0x100; /* little-endian */
+ }
+ break;
+ } else if (n == 0xfeff) {
+ break;
+ }
+ }
CK((*filter->output_function)(n, filter->data));
}
break;
--- /dev/null
+--TEST--
+Bug #49528 (UTF-16 strings prefixed by BOM wrongly converted)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16")));
+?>
+--EXPECT--
+string(8) "02010403"
+string(8) "01020304"
+string(12) "feff02010403"
+string(12) "fffe02010403"
+string(12) "fffe01020304"
+string(12) "feff01020304"