int
mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
{
- int mode_backup, ret, n, m, r;
+ int mode_backup, substchar_backup, ret, n, m, r;
ret = 0;
+
mode_backup = filter->illegal_mode;
- filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
+ substchar_backup = filter->illegal_substchar;
+
+ /* The used substitution character may not be supported by the target character encoding.
+ * If that happens, first try to use "?" instead and if that also fails, silently drop the
+ * character. */
+ if (filter->illegal_mode == MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR
+ && filter->illegal_substchar != 0x3f) {
+ filter->illegal_substchar = 0x3f;
+ } else {
+ filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
+ }
+
switch (mode_backup) {
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR:
- ret = (*filter->filter_function)(filter->illegal_substchar, filter);
+ ret = (*filter->filter_function)(substchar_backup, filter);
break;
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
if (c >= 0) {
}
ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
} else {
- ret = (*filter->filter_function)(filter->illegal_substchar, filter);
+ ret = (*filter->filter_function)(substchar_backup, filter);
}
}
break;
default:
break;
}
+
filter->illegal_mode = mode_backup;
+ filter->illegal_substchar = substchar_backup;
filter->num_illegalchar++;
return ret;
if (convd == NULL) {
return -1;
}
+
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
return NULL;
}
- mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
-
- if (string.no_encoding == MBSTRG(current_internal_encoding)->no_encoding) {
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
- } else if (php_mb_is_no_encoding_unicode(string.no_encoding) && php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
-
- if (php_mb_is_no_encoding_utf8(string.no_encoding)) {
-
- if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff &&
- 0xe000 > MBSTRG(current_filter_illegal_substchar)
- ) {
- mbfl_buffer_converter_illegal_substchar(convd, 0x3f);
- } else {
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
- }
-
- } else {
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
- }
- } else {
- mbfl_buffer_converter_illegal_substchar(convd, 0x3f);
- }
+ mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
/* do it */
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
if (php_mb_is_no_encoding_utf8(no_enc)) {
if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) {
- if (php_mb_is_no_encoding_utf8(MBSTRG(current_internal_encoding)->no_encoding)) {
- cp = MBSTRG(current_filter_illegal_substchar);
- } else if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
- if (0xd800 > MBSTRG(current_filter_illegal_substchar) || MBSTRG(current_filter_illegal_substchar) > 0xdfff) {
- cp = MBSTRG(current_filter_illegal_substchar);
- } else {
- cp = 0x3f;
- }
- } else {
- cp = 0x3f;
- }
+ cp = MBSTRG(current_filter_illegal_substchar);
}
if (cp < 0x80) {
var_dump("?" === mb_convert_encoding("\x80", "Shift_JIS", "EUC-JP"));
mb_internal_encoding("UCS-4BE");
var_dump("\x00\x00\xff\xfd" === mb_convert_encoding("\x80", "UCS-4BE", "UTF-8"));
+
+mb_internal_encoding("UTF-8");
+mb_substitute_character(0xfffd);
+var_dump("\u{fffd}" === mb_convert_encoding("\x80", "UTF-8", "EUC-JP-2004"));
+
?>
--EXPECT--
bool(true)
bool(true)
+bool(true)
mb_internal_encoding("EUC-JP");
mb_substitute_character(0xa4a2);
var_dump(
- "?" === mb_chr(0xd800, "UTF-8")
+ "\u{a4a2}" === mb_chr(0xd800, "UTF-8")
);
// Invalid
string(14) "3f3f3f3f3f3f3f"
string(14) "42424242424242"
string(0) ""
-string(0) ""
+string(14) "3f3f3f3f3f3f3f"
===DONE===