From: Alex Dowad Date: Mon, 9 Nov 2020 20:59:07 +0000 (+0200) Subject: Test EUC-JP and Shift-JIS more thoroughly X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d1d50c2b7acc7e48331c46bf9ed2c3817ccd110a;p=php Test EUC-JP and Shift-JIS more thoroughly Previously, the unit tests for these text encodings covered all mappings from legacy -> Unicode, and all _reversible_ mappings from Unicode -> legacy. However, we should also test the few Unicode -> legacy mappings which are not reversible. --- diff --git a/ext/mbstring/tests/armscii8_encoding.phpt b/ext/mbstring/tests/armscii8_encoding.phpt index 960836fe6b..b5bea077d0 100644 --- a/ext/mbstring/tests/armscii8_encoding.phpt +++ b/ext/mbstring/tests/armscii8_encoding.phpt @@ -23,7 +23,7 @@ testAllInvalidChars($invalid, $toUnicode, 'ARMSCII-8', 'UTF-16BE', "\x00%"); testTruncatedChars($truncated, 'ARMSCII-8', 'UTF-16BE', "\x00%"); echo "Tested ARMSCII-8 -> UTF-16BE\n"; -findInvalidChars($fromUnicode, $invalid, $unused, map(range(0,0xFF), 2)); +findInvalidChars($fromUnicode, $invalid, $unused, array_fill_keys(range(0,0xFF), 2)); convertAllInvalidChars($invalid, $fromUnicode, 'UTF-16BE', 'ARMSCII-8', '%'); echo "Tested UTF-16BE -> ARMSCII-8\n"; ?> diff --git a/ext/mbstring/tests/encoding_tests.inc b/ext/mbstring/tests/encoding_tests.inc index 9a374b6137..e95bc8fd09 100644 --- a/ext/mbstring/tests/encoding_tests.inc +++ b/ext/mbstring/tests/encoding_tests.inc @@ -207,13 +207,6 @@ function findInvalidChars($valid, &$invalid, &$truncated, $startBytes = array()) } } -// Helper for building `$startBytes` map for above function -function map($keys, $value, $array = array()) { - foreach ($keys as $key) - $array[$key] = $value; - return $array; -} - function testEncodingFromUTF16ConversionTable($path, $encoding, $replacement = '%', $startBytes = array()) { srand(1000); // Make results consistent mb_substitute_character(0x25); // '%' @@ -225,7 +218,7 @@ function testEncodingFromUTF16ConversionTable($path, $encoding, $replacement = ' testTruncatedChars($truncated, $encoding, 'UTF-16BE', "\x00%"); echo "Tested $encoding -> UTF-16BE\n"; - findInvalidChars($fromUnicode, $invalid, $unused, map(range(0,0xFF), 2)); + findInvalidChars($fromUnicode, $invalid, $unused, array_fill_keys(range(0,0xFF), 2)); convertAllInvalidChars($invalid, $fromUnicode, 'UTF-16BE', $encoding, $replacement); echo "Tested UTF-16BE -> $encoding\n"; } diff --git a/ext/mbstring/tests/eucjp_encoding.phpt b/ext/mbstring/tests/eucjp_encoding.phpt index a05097cd84..dc321d3bae 100644 --- a/ext/mbstring/tests/eucjp_encoding.phpt +++ b/ext/mbstring/tests/eucjp_encoding.phpt @@ -43,7 +43,7 @@ $fromUnicode["\x00\x00\x00\x7E"] = "\x7E"; /* Likewise with 0x005C */ $fromUnicode["\x00\x00\x00\x5C"] = "\x5C"; -findInvalidChars($validChars, $invalidChars, $truncated, map(range(0xA1, 0xFE), 2, array(0x8E => 2, 0x8F => 3))); +findInvalidChars($validChars, $invalidChars, $truncated, array_fill_keys(range(0xA1, 0xFE), 2) + array(0x8E => 2, 0x8F => 3)); /* In the JIS X 0212 character set, kuten code 0x2237 (EUC-JP 0x8FA2B7) * is an ordinary tilde character @@ -61,6 +61,9 @@ echo "Encoding verification and conversion work for all invalid characters\n"; testValidString("\x8F\xA2\xB7", "\x00\x00\x00~", 'EUC-JP', 'UTF-32BE', false); echo "Irreversible mapping of 0x8FA2B7 follows JIS X 0212 correctly\n"; +testAllValidChars($fromUnicode, 'UTF-32BE', 'EUC-JP', false); +echo "Unicode -> EUC-JP conversion works on all valid characters\n"; + $invalidChars = array(); for ($cp = 0; $cp <= 0xFFFF; $cp++) { $char = pack('N', $cp); @@ -74,4 +77,5 @@ echo "Unicode -> EUC-JP conversion works on all invalid characters\n"; Encoding verification and conversion work for all valid characters Encoding verification and conversion work for all invalid characters Irreversible mapping of 0x8FA2B7 follows JIS X 0212 correctly +Unicode -> EUC-JP conversion works on all valid characters Unicode -> EUC-JP conversion works on all invalid characters diff --git a/ext/mbstring/tests/sjis_encoding.phpt b/ext/mbstring/tests/sjis_encoding.phpt index 5445a90fbd..a75ce85a4f 100644 --- a/ext/mbstring/tests/sjis_encoding.phpt +++ b/ext/mbstring/tests/sjis_encoding.phpt @@ -46,16 +46,20 @@ testAllValidChars($validChars, 'Shift-JIS', 'UTF-16BE'); echo "SJIS verification and conversion works on all valid characters\n"; findInvalidChars($validChars, $invalidChars, $truncated, - map(range(0x81, 0x9F), 2, map(range(0xE0, 0xEF), 2))); + array_fill_keys(range(0x81, 0x9F), 2) + array_fill_keys(range(0xE0, 0xEF), 2)); testAllInvalidChars($invalidChars, $validChars, 'Shift-JIS', 'UTF-16BE', "\x00%"); testTruncatedChars($truncated, 'Shift-JIS', 'UTF-16BE', "\x00%"); echo "SJIS verification and conversion works on all invalid characters\n"; -findInvalidChars($fromUnicode, $invalidChars, $unused, map(range(0, 0xFF), 2)); +testAllValidChars($fromUnicode, 'UTF-16BE', 'Shift-JIS', false); +echo "Unicode -> SJIS conversion works on all valid characters\n"; + +findInvalidChars($fromUnicode, $invalidChars, $unused, array_fill_keys(range(0, 0xFF), 2)); convertAllInvalidChars($invalidChars, $fromUnicode, 'UTF-16BE', 'Shift-JIS', '%'); echo "Unicode -> SJIS conversion works on all invalid characters\n"; ?> --EXPECT-- SJIS verification and conversion works on all valid characters SJIS verification and conversion works on all invalid characters +Unicode -> SJIS conversion works on all valid characters Unicode -> SJIS conversion works on all invalid characters