c = str[pos];
if (c < 0x80) {
MB_WRITE(c);
- this_char = c;
+ this_char = c;
pos++;
} else if (c < 0xc0) {
MB_FAILURE(pos);
} else if (c < 0xe0) {
CHECK_LEN(pos, 2);
if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
- MB_FAILURE(pos);
+ MB_FAILURE(pos);
}
this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
if (this_char < 0x80) {
- *status = FAILURE;
- return 0;
+ MB_FAILURE(pos);
}
MB_WRITE((unsigned char)c);
MB_WRITE((unsigned char)str[pos + 1]);
} else if (c < 0xf0) {
CHECK_LEN(pos, 3);
if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
- MB_FAILURE(pos);
+ MB_FAILURE(pos);
}
if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
- MB_FAILURE(pos);
+ MB_FAILURE(pos);
}
this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
if (this_char < 0x800) {
- MB_FAILURE(pos);
+ MB_FAILURE(pos);
}
MB_WRITE((unsigned char)c);
MB_WRITE((unsigned char)str[pos + 1]);
} else if (c < 0xf8) {
CHECK_LEN(pos, 4);
if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
- MB_FAILURE(pos);
+ MB_FAILURE(pos);
}
if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
- MB_FAILURE(pos);
+ MB_FAILURE(pos);
}
if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
- MB_FAILURE(pos);
+ MB_FAILURE(pos);
}
this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
if (this_char < 0x10000) {
- MB_FAILURE(pos);
+ MB_FAILURE(pos);
}
MB_WRITE((unsigned char)c);
MB_WRITE((unsigned char)str[pos + 1]);
MB_WRITE((unsigned char)str[pos + 3]);
pos += 4;
} else {
- MB_FAILURE(pos);
+ MB_FAILURE(pos);
}
}
break;
var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
+echo "--\n";
+// UTF-8: with ENT_IGNORE
+var_dump(_bin2hex(htmlentities("\xc0\xa0\xc2\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x80\x80\xe0\xa0\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+
echo "--\n";
// UTF-8: alternative (invalid) UTF-8 sequence
var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8')));
string(0) ""
string(0) ""
--
+string(4) "c280"
+string(6) "e0a080"
+string(8) "f0908080"
+--
string(0) ""
string(0) ""
string(0) ""