From: Alex Dowad Date: Sat, 14 Nov 2020 19:51:23 +0000 (+0200) Subject: 0x7E is not a tilde in Shift-JIS{,-2004} X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0d0029d729259f7977217fb94930f6fe973b1192;p=php 0x7E is not a tilde in Shift-JIS{,-2004} --- diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c index 39e7879c1a..36f374a952 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c @@ -207,6 +207,10 @@ int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter) /* Unicode 0x5C is a backslash; but Shift-JIS uses 0x5C for the * Yen sign. JIS X 0208 kuten 0x2140 is a backslash. */ s1 = 0x2140; + } else if (c == 0x7E) { + /* Unicode 0x7E is a tilde, but Shift-JIS uses 0x7E for overline (or + * macron). JIS X 0208 kuten 0x2141 is 'WAVE DASH' */ + s1 = 0x2141; } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { diff --git a/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h b/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h index c8bc433d34..09f7c43726 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h @@ -1612,7 +1612,7 @@ static const unsigned short ucs_a1_jisx0213_table[] = { // 0x0000 - 0x045f 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067, 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F, 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077, -0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F, +0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x2141,0x007F, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, diff --git a/ext/mbstring/tests/sjis2004_encoding.phpt b/ext/mbstring/tests/sjis2004_encoding.phpt index 3278e85dfe..3b4299e753 100644 --- a/ext/mbstring/tests/sjis2004_encoding.phpt +++ b/ext/mbstring/tests/sjis2004_encoding.phpt @@ -36,7 +36,9 @@ while ($line = fgets($fp, 256)) { } } } -$fromUnicode["\x00\x7E"] = "\x7E"; /* Not reversible; SJIS 0x7E -> U+203E */ + +/* U+007E is TILDE, Shift-JIS 0x8160 is WAVE DASH */ +$fromUnicode["\x00\x7E"] = "\x81\x60"; /* U+005C is backslash, Shift-JIS 0x815F is REVERSE SOLIDUS * (ie. a fancy way to say "backslash") */ diff --git a/ext/mbstring/tests/sjis_encoding.phpt b/ext/mbstring/tests/sjis_encoding.phpt index a75ce85a4f..8ac3b0563e 100644 --- a/ext/mbstring/tests/sjis_encoding.phpt +++ b/ext/mbstring/tests/sjis_encoding.phpt @@ -19,12 +19,11 @@ for ($i = 0; $i < 0x20; $i++) { $fromUnicode["\x00" . chr($i)] = chr($i); } +/* U+007E is TILDE; convert to Shift-JIS 0x8160 (WAVE DASH) */ +$fromUnicode["\x00\x7E"] = "\x81\x60"; /* DEL character */ $validChars["\x7F"] = "\x00\x7F"; $fromUnicode["\x00\x7F"] = "\x7F"; -/* Although Shift-JIS uses 0x7E for an overline, we will map Unicode 0x7E - * (tilde) to Shift-JIS 0x7E (as iconv does) */ -$fromUnicode["\x00\x7E"] = "\x7E"; /* Use fullwidth reverse solidus, not (halfwidth) backslash (0x5C) */ $validChars["\x81\x5F"] = "\xFF\x3C"; $fromUnicode["\xFF\x3C"] = "\x81\x5F";