Except for vanilla Shift-JIS, where 0x7E is a halfwidth overline/macron.
As for Shift-JIS-2004, it has an added character (byte sequence 0x854A)
which was defined as a halfwidth macron in JIS X 0213:2000, so we use that.
{
int s = 0;
- if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
+ if (c == 0xAF) { /* U+00AF is MACRON */
+ s = 0xA2B4; /* Use JIS X 0212 overline */
+ } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
/* Unicode 0x7E is a tilde, but Shift-JIS uses 0x7E for overline (or
* macron). JIS X 0208 kuten 0x2141 is 'WAVE DASH' */
s1 = 0x2141;
- } else if (c == 0x203E) { /* U+203E is OVERLINE */
+ } else if (c == 0xAF || c == 0x203E) { /* U+00AF is MACRON, U+203E is OVERLINE */
s1 = 0x7E; /* Halfwidth overline/macron */
} else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0xA2C2,0x2171,0x2172,0xA2F0,0x0000,0xA2C3,0x2178,
- 0x212F,0xA2ED,0xA2EC,0x0000,0x224C,0x0000,0xA2EE,0xA2B4,
+ 0x212F,0xA2ED,0xA2EC,0x0000,0x224C,0x0000,0xA2EE,0x2131,
0x216B,0x215E,0x0000,0x0000,0x212D,0x0000,0x2279,0x0000,
0xA2B1,0x0000,0xA2EB,0x0000,0x0000,0x0000,0x0000,0xA2C4,
0xAAA2,0xAAA1,0xAAA4,0xAAAA,0xAAA3,0xAAA9,0xA9A1,0xAAAE,
$fromUnicode["\x00\xA5"] = "\xA1\xEF";
/* U+203E is OVERLINE; convert to FULLWIDTH MACRON */
$fromUnicode["\x20\x3E"] = "\xA1\xB1";
+/* U+00AF is MACRON; convert to FULLWIDTH MACRON */
+$fromUnicode["\x00\xAF"] = "\xA1\xB1";
testAllValidChars($validChars, 'CP51932', 'UTF-16BE', false);
testAllValidChars($fromUnicode, 'UTF-16BE', 'CP51932', false);
/* U+203E is OVERLINE; convert to JIS X 0208 FULLWIDTH MACRON */
$fromUnicode["\x20\x3E"] = "\x81\x50";
+/* U+00AF is MACRON; it can also go to FULLWIDTH MACRON */
+$fromUnicode["\x00\xAF"] = "\x81\x50";
+
findInvalidChars($validChars, $invalidChars, $truncated, array_fill_keys(range(0x81, 0x9F), 2) + array_fill_keys(range(0xE0, 0xFC), 2));
findInvalidChars($fromUnicode, $invalidCodepoints, $unused, array_fill_keys(range(0, 0xFF), 2));
/* DEL character */
$validChars["\x7F"] = "\x00\x7F";
$fromUnicode["\x00\x7F"] = "\x7F";
+/* U+00AF is MACRON; Shift-JIS 0x7E is overline */
+$fromUnicode["\x00\xAF"] = "\x7E";
/* Use fullwidth reverse solidus, not (halfwidth) backslash (0x5C) */
$validChars["\x81\x5F"] = "\xFF\x3C";
$fromUnicode["\xFF\x3C"] = "\x81\x5F";
/* Convert U+203E (OVERLINE) to 0x8150 (FULLWIDTH MACRON) */
$fromUnicode["\x20\x3E"] = "\x81\x50";
+/* And also U+00AF (MACRON) */
+$fromUnicode["\x00\xAF"] = "\x81\x50";
/* Convert U+FF5E (FULLWIDTH TILDE) to 0x8160 (WAVE DASH) */
$fromUnicode["\xFF\x5E"] = "\x81\x60";