Forgot to merge this back in January
--- /dev/null
+--TEST--
+Bug #46944 (json_encode() doesn't handle 3 byte utf8 correctly)
+--SKIPIF--
+<?php if (!extension_loaded('json')) print 'skip'; ?>
+--FILE--
+<?php
+
+for ($i = 1; $i <= 16; $i++) {
+ $first = 0xf0|($i >> 2);
+ $second = 0x8f|($i & 3) << 4;
+ $string = sprintf("aa%c%c\xbf\xbdzz", $first, $second);
+ echo json_encode($string) . "\n";
+}
+
+
+echo "Done\n";
+?>
+--EXPECT--
+"aa\ud83f\udffdzz"
+"aa\ud87f\udffdzz"
+"aa\ud8bf\udffdzz"
+"aa\ud8ff\udffdzz"
+"aa\ud93f\udffdzz"
+"aa\ud97f\udffdzz"
+"aa\ud9bf\udffdzz"
+"aa\ud9ff\udffdzz"
+"aa\uda3f\udffdzz"
+"aa\uda7f\udffdzz"
+"aa\udabf\udffdzz"
+"aa\udaff\udffdzz"
+"aa\udb3f\udffdzz"
+"aa\udb7f\udffdzz"
+"aa\udbbf\udffdzz"
+"aa\udbff\udffdzz"
+Done
/*
Three continuation (65536 to 1114111)
*/
- if ((c & 0xF1) == 0xF0) {
+ if ((c & 0xF8) == 0xF0) {
int c1 = cont(utf8);
int c2 = cont(utf8);
int c3 = cont(utf8);
w[the_index] = (unsigned short)c;
the_index += 1;
} else {
- c &= 0xFFFF;
+ c -= 0x10000;
w[the_index] = (unsigned short)(0xD800 | (c >> 10));
the_index += 1;
w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF));