From: Alex Dowad Date: Wed, 7 Oct 2020 20:12:27 +0000 (+0200) Subject: Don't pass invalid JIS X 0208 characters through X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8ae0473324ab7f26ee9401252b3133aa4e0fe169;p=php Don't pass invalid JIS X 0208 characters through Many Japanese encodings, such as JIS7/8, Shift JIS, ISO-2022-JP, EUC-JP, and so on encode characters from the JIS X 0208 character set. JIS X 0208 is based on the concept of a 94x94 table, with numbered rows and columns. However, more than a thousand of the cells in that table are empty; JIS X 0208 does not actually use all 94x94=8,836 possible kuten codes. mbstring had a dubious feature whereby, if a Japanese string contained one of these 'unmapped' kuten codes, and it was being converted to another Japanese encoding which was also based on JIS X 0208, the non-existent character would be silently passed through, and the unmapped kuten code would be re-encoded using the normal encoding method of the target text encoding. Again, this _only_ happened if converting the text with the funky kuten code to a Japanese encoding. If one tried converting it to Unicode, mbstring would treat that as an error. If somebody, somewhere, made their own private extension to JIS X 0208, and used the regular Japanese encodings like Shift JIS and EUC-JP to encode this private character set, then this feature might conceivably be useful. But how likely is that? If someone is using Shift JIS, EUC-JP, ISO-2022-JP, etc. to encode a funky version of JIS X 0208 with extra characters added, then that should be treated as a separate text encoding. The code which flags such characters with MBFL_WCSPLANE_JIS0208 is retained solely for error reporting in `mbfl_filt_conv_illegal_output`. --- diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c index a4d1724e8f..b630d3fcb0 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c @@ -409,9 +409,7 @@ mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter) /* do some transliteration */ if (s <= 0) { c1 = c & ~MBFL_WCSPLANE_MASK; - if (c1 == MBFL_WCSPLANE_JIS0208) { - s = c & MBFL_WCSPLANE_MASK; - } else if (c1 == MBFL_WCSPLANE_JIS0212) { + if (c1 == MBFL_WCSPLANE_JIS0212) { s = c & MBFL_WCSPLANE_MASK; s |= 0x8080; } else if (c == 0xa5) { /* YEN SIGN */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c index cb95469408..25944a10dd 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c @@ -216,14 +216,6 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */ s1 = -1; } - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; - if ((s1 >= ((85 + 0x20) << 8) && /* 85ku - 94ku */ - s1 <= ((88 + 0x20) << 8)) ||/* IBM extension */ - (s1 >= ((93 + 0x20) << 8) && /* 89ku - 92ku */ - s1 <= ((94 + 0x20) << 8))) { - s1 = -1; - } } else if (c == 0xa5) { /* YEN SIGN */ s1 = 0x005c; /* YEN SIGN */ } else if (c == 0x203e) { /* OVER LINE */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c index c0fe18e038..64ccd57361 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c @@ -255,8 +255,6 @@ mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c index b93fc9101a..df51be0183 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c @@ -279,11 +279,6 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */ s1 = -1; } - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; - if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 94ku */ - s1 = -1; - } } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; if (s1 >= ((83 + 0x20) << 8)) { /* 83ku - 94ku */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c index 950365045c..af54a1c605 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c @@ -306,8 +306,6 @@ mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c index e6300675e8..1b4857dfe2 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c @@ -334,8 +334,6 @@ mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c index ba43872d83..8ba46a5b9a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c @@ -288,9 +288,7 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) } if (s <= 0) { c1 = c & ~MBFL_WCSPLANE_MASK; - if (c1 == MBFL_WCSPLANE_JIS0208) { - s = c & MBFL_WCSPLANE_MASK; - } else if (c1 == MBFL_WCSPLANE_JIS0212) { + if (c1 == MBFL_WCSPLANE_JIS0212) { s = c & MBFL_WCSPLANE_MASK; s |= 0x8080; } else if (c == 0xa5) { /* YEN SIGN */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c index 45b87a8f98..6361195606 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c @@ -420,8 +420,6 @@ mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c index 255a457c58..10ac7d9ce4 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c @@ -794,8 +794,6 @@ mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c index d37f01568e..ea19e6b105 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c @@ -245,8 +245,6 @@ mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080;