From ecf718470b78c2fddc9d07cf1215a27773264515 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Sat, 14 Nov 2020 23:07:17 +0200 Subject: [PATCH] Convert U+FF5E (FULLWIDTH TILDE) to 0x8160 (WAVE DASH) in SJIS variants By entering this character in the JIS X 0208 conversion table, we can remove a bunch of explicit `if` clauses in different conversion filters. It also means that U+FF5E can be converted into SJIS-mac now; I don't know why this one SJIS variant rejected U+FF5E before, since 0x8160 means the same thing in SJIS-mac as the others. --- ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c | 6 ------ ext/mbstring/libmbfl/filters/mbfilter_cp51932.c | 2 -- ext/mbstring/libmbfl/filters/mbfilter_cp932.c | 2 -- ext/mbstring/libmbfl/filters/mbfilter_jis.c | 4 ---- ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c | 2 -- ext/mbstring/libmbfl/filters/unicode_table_jis.h | 2 +- ext/mbstring/tests/sjismac_encoding.phpt | 3 +++ 7 files changed, 4 insertions(+), 17 deletions(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c index 3f4fb7132c..55cb154b62 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c @@ -413,8 +413,6 @@ mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter) s = 0x1007e; } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ @@ -629,8 +627,6 @@ mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) s = 0x1007e; } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ @@ -773,8 +769,6 @@ mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) s = 0x1007e; } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c index 0be771c3b8..1d64429c21 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c @@ -218,8 +218,6 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) s1 = 0x216F; /* FULLWIDTH YEN SIGN */ } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s1 = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s1 = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s1 = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c index 120c5e626d..64c0247119 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c @@ -255,8 +255,6 @@ mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter) s1 = 0x216F; /* FULLWIDTH YEN SIGN */ } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s1 = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s1 = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s1 = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c index 5e0c05c8fc..15c1bb30a1 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c @@ -292,8 +292,6 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) s = 0x1007e; } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ @@ -389,8 +387,6 @@ mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter) s = 0x1007e; } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c index bad0423503..53f5242dda 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c @@ -751,8 +751,6 @@ int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter) s1 = 0x216F; /* FULLWIDTH YEN SIGN */ } else if (c == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */ s1 = 0x2140; - } else if (c == 0xFF5E) { /* FULLWIDTH TILDE */ - s1 = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s1 = 0x2142; } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_jis.h b/ext/mbstring/libmbfl/filters/unicode_table_jis.h index 450428c578..640c5587d8 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_jis.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_jis.h @@ -5824,7 +5824,7 @@ const unsigned short ucs_r_jis_table[] = { 0x212E,0x2361,0x2362,0x2363,0x2364,0x2365,0x2366,0x2367, 0x2368,0x2369,0x236A,0x236B,0x236C,0x236D,0x236E,0x236F, 0x2370,0x2371,0x2372,0x2373,0x2374,0x2375,0x2376,0x2377, - 0x2378,0x2379,0x237A,0x2150,0x2143,0x2151,0x0000,0x0000, + 0x2378,0x2379,0x237A,0x2150,0x2143,0x2151,0x2141,0x0000, 0x0000,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7, 0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF, 0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7, diff --git a/ext/mbstring/tests/sjismac_encoding.phpt b/ext/mbstring/tests/sjismac_encoding.phpt index e8b09d266f..3c36484f4a 100644 --- a/ext/mbstring/tests/sjismac_encoding.phpt +++ b/ext/mbstring/tests/sjismac_encoding.phpt @@ -65,6 +65,9 @@ $fromUnicode["\x20\x15"] = "\x81\x5C"; /* Convert U+203E (OVERLINE) to 0x8150 (FULLWIDTH MACRON) */ $fromUnicode["\x20\x3E"] = "\x81\x50"; +/* Convert U+FF5E (FULLWIDTH TILDE) to 0x8160 (WAVE DASH) */ +$fromUnicode["\xFF\x5E"] = "\x81\x60"; + testAllValidChars($validChars, 'SJIS-mac', 'UTF-32BE'); echo "MacJapanese verification and conversion works on all valid characters\n"; -- 2.40.0