]> granicus.if.org Git - php/commitdiff
SJIS-2004 encoding conversion: handle invalid (or truncated) 2nd byte for Kanji correctly
authorAlex Dowad <alexinbeijing@gmail.com>
Tue, 8 Sep 2020 20:57:28 +0000 (22:57 +0200)
committerAlex Dowad <alexinbeijing@gmail.com>
Wed, 11 Nov 2020 09:18:58 +0000 (11:18 +0200)
If the 2nd byte of a 2-byte character is invalid, then mb_substitute_character()
should be respected. Instead, what mbstring was doing was 'swallowing' the
first byte, then emitting the 2nd byte as if it was an ASCII character.

Likewise, if the 2nd byte is missing, instead of just keeping quiet, report an
illegal character as specified by mb_substitute_character().

ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c
ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c
ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h

index d832358e1d85d77007810920ab1994b7afd7b766..a14d37ec93bf97be73a22be3f776bc311f349fe8 100644 (file)
@@ -62,6 +62,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004 = {
        mbfl_filt_conv_common_ctor,
        NULL,
        mbfl_filt_conv_wchar_jis2004,
-       mbfl_filt_conv_jis2004_flush,
+       mbfl_filt_conv_wchar_jis2004_flush,
        NULL,
 };
index ce9267a2ecd67d8a7523cdbb0c6ddd6ad8ba311d..a8f2a1da536074e252504c6992c9761f331abced 100644 (file)
@@ -62,6 +62,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = {
        mbfl_filt_conv_common_ctor,
        NULL,
        mbfl_filt_conv_wchar_jis2004,
-       mbfl_filt_conv_jis2004_flush,
+       mbfl_filt_conv_wchar_jis2004_flush,
        NULL,
 };
index 8be40f02aaa4f814706c6106ef66c5c74508a427..a81cd29b705378d4028c21cb5c5697016b3924a3 100644 (file)
@@ -57,7 +57,7 @@ const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = {
        mbfl_filt_conv_common_ctor,
        NULL,
        mbfl_filt_conv_jis2004_wchar,
-       mbfl_filt_conv_common_flush,
+       mbfl_filt_conv_jis2004_wchar_flush,
        NULL,
 };
 
@@ -67,7 +67,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = {
        mbfl_filt_conv_common_ctor,
        NULL,
        mbfl_filt_conv_wchar_jis2004,
-       mbfl_filt_conv_jis2004_flush,
+       mbfl_filt_conv_wchar_jis2004_flush,
        NULL,
 };
 
@@ -202,6 +202,9 @@ retry:
                } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) {
                        if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
                                SJIS_DECODE(c1, c, s1, s2);
+                       } else {
+                               CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data));
+                               break;
                        }
                } else {
                        s1 = c1;
@@ -471,6 +474,14 @@ retry:
        return c;
 }
 
+int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter)
+{
+       if (filter->status & 0xF) {
+               CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data));
+       }
+       return 0;
+}
+
 int
 mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) {
        int k;
@@ -665,7 +676,7 @@ retry:
 }
 
 int
-mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter)
+mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter)
 {
        int k, c1, c2, s1, s2;
 
index 5eb72ca8913161ba9952d66c9d1dd0c73f38b457..869fd145c1ce0bf516013c8631247b90f04003f9 100644 (file)
@@ -39,7 +39,8 @@ extern const struct mbfl_convert_vtbl vtbl_wchar_sjis2004;
 int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter);
 int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter);
 
-int mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter);
+int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter);
 
 #endif /* MBFL_MBFILTER_SJIS_2004_H */