Issue #16218, #16414, #16444: Backport FS_NONASCII, TESTFN_UNDECODABLE,

author Victor Stinner <victor.stinner@gmail.com>

Thu, 3 Jan 2013 00:50:30 +0000 (01:50 +0100)

committer Victor Stinner <victor.stinner@gmail.com>

Thu, 3 Jan 2013 00:50:30 +0000 (01:50 +0100)
author Victor Stinner <victor.stinner@gmail.com>
Thu, 3 Jan 2013 00:50:30 +0000 (01:50 +0100)
committer Victor Stinner <victor.stinner@gmail.com>
Thu, 3 Jan 2013 00:50:30 +0000 (01:50 +0100)
diff --git a/Lib/test/support.py b/Lib/test/support.py

index e1ec9e2d1f7084beca003a177013fee6496cf27c..2130fd639c9f49add256e7f3e722b1e6d70b53d9 100644 (file)
--- a/Lib/test/support.py
+++ b/Lib/test/support.py
@@ -523,6 +523,49 @@ else:
  # module name.
  TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid())
  
+# FS_NONASCII: non-ASCII character encodable by os.fsencode(),
+# or None if there is no such character.
+FS_NONASCII = None
+for character in (
+    # First try printable and common characters to have a readable filename.
+    # For each character, the encoding list are just example of encodings able
+    # to encode the character (the list is not exhaustive).
+
+    # U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1
+    '\u00E6',
+    # U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3
+    '\u0130',
+    # U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257
+    '\u0141',
+    # U+03C6 (Greek Small Letter Phi): cp1253
+    '\u03C6',
+    # U+041A (Cyrillic Capital Letter Ka): cp1251
+    '\u041A',
+    # U+05D0 (Hebrew Letter Alef): Encodable to cp424
+    '\u05D0',
+    # U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic
+    '\u060C',
+    # U+062A (Arabic Letter Teh): cp720
+    '\u062A',
+    # U+0E01 (Thai Character Ko Kai): cp874
+    '\u0E01',
+
+    # Then try more "special" characters. "special" because they may be
+    # interpreted or displayed differently depending on the exact locale
+    # encoding and the font.
+
+    # U+00A0 (No-Break Space)
+    '\u00A0',
+    # U+20AC (Euro Sign)
+    '\u20AC',
+):
+    try:
+        os.fsdecode(os.fsencode(character))
+    except UnicodeError:
+        pass
+    else:
+        FS_NONASCII = character
+        break
  
  # TESTFN_UNICODE is a non-ascii filename
  TESTFN_UNICODE = TESTFN + "-\xe0\xf2\u0258\u0141\u011f"
@@ -567,6 +610,41 @@ elif sys.platform != 'darwin':
          # the byte 0xff. Skip some unicode filename tests.
          pass
  
+# TESTFN_UNDECODABLE is a filename (bytes type) that should *not* be able to be
+# decoded from the filesystem encoding (in strict mode). It can be None if we
+# cannot generate such filename (ex: the latin1 encoding can decode any byte
+# sequence). On UNIX, TESTFN_UNDECODABLE can be decoded by os.fsdecode() thanks
+# to the surrogateescape error handler (PEP 383), but not from the filesystem
+# encoding in strict mode.
+TESTFN_UNDECODABLE = None
+for name in (
+    # b'\xff' is not decodable by os.fsdecode() with code page 932. Windows
+    # accepts it to create a file or a directory, or don't accept to enter to
+    # such directory (when the bytes name is used). So test b'\xe7' first: it is
+    # not decodable from cp932.
+    b'\xe7w\xf0',
+    # undecodable from ASCII, UTF-8
+    b'\xff',
+    # undecodable from iso8859-3, iso8859-6, iso8859-7, cp424, iso8859-8, cp856
+    # and cp857
+    b'\xae\xd5'
+    # undecodable from UTF-8 (UNIX and Mac OS X)
+    b'\xed\xb2\x80', b'\xed\xb4\x80',
+    # undecodable from shift_jis, cp869, cp874, cp932, cp1250, cp1251, cp1252,
+    # cp1253, cp1254, cp1255, cp1257, cp1258
+    b'\x81\x98',
+):
+    try:
+        name.decode(TESTFN_ENCODING)
+    except UnicodeDecodeError:
+        TESTFN_UNDECODABLE = os.fsencode(TESTFN) + name
+        break
+
+if FS_NONASCII:
+    TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII
+else:
+    TESTFN_NONASCII = None
+
  # Save the initial cwd
  SAVEDCWD = os.getcwd()
  
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py

index c10bd761ac3baa62cd17978c63ff01dd93e06f52..67375cd12ad7134ccf76e8474cf304f635d01a38 100644 (file)
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -100,11 +100,11 @@ class CmdLineTest(unittest.TestCase):
          # All good if execution is successful
          assert_python_ok('-c', 'pass')
  
-    @unittest.skipIf(sys.getfilesystemencoding() == 'ascii',
-                     'need a filesystem encoding different than ASCII')
+    @unittest.skipUnless(test.support.FS_NONASCII, 'need support.FS_NONASCII')
      def test_non_ascii(self):
          # Test handling of non-ascii data
-        command = "assert(ord('\xe9') == 0xe9)"
+        command = ("assert(ord(%r) == %s)"
+                   % (test.support.FS_NONASCII, ord(test.support.FS_NONASCII)))
          assert_python_ok('-c', command)
  
      # On Windows, pass bytes to subprocess doesn't test how Python decodes the
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py

index 6b59d9634914834ee4fc64b6f3f4b53eafad72a8..70f7d1ebcc610d537e2719a097e3304f9fae6557 100644 (file)
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -294,6 +294,30 @@ class CmdLineTest(unittest.TestCase):
                      print(out)
                  self.assertEqual(rc, 1)
  
+    def test_non_ascii(self):
+        # Mac OS X denies the creation of a file with an invalid UTF-8 name.
+        # Windows allows to create a name with an arbitrary bytes name, but
+        # Python cannot a undecodable bytes argument to a subprocess.
+        if (support.TESTFN_UNDECODABLE
+        and sys.platform not in ('win32', 'darwin')):
+            name = os.fsdecode(support.TESTFN_UNDECODABLE)
+        elif support.TESTFN_NONASCII:
+            name = support.TESTFN_NONASCII
+        else:
+            self.skipTest("need support.TESTFN_NONASCII")
+
+        # Issue #16218
+        source = 'print(ascii(__file__))\n'
+        script_name = _make_test_script(os.curdir, name, source)
+        self.addCleanup(support.unlink, script_name)
+        rc, stdout, stderr = assert_python_ok(script_name)
+        self.assertEqual(
+            ascii(script_name),
+            stdout.rstrip().decode('ascii'),
+            'stdout=%r stderr=%r' % (stdout, stderr))
+        self.assertEqual(0, rc)
+
+
  def test_main():
      support.run_unittest(CmdLineTest)
      support.reap_children()
diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py

index 50638a1cf0f14cf84e469145766fd7d0fe92aa64..60209235cf3d342e8bf2b48807539efd31f181a5 100644 (file)
--- a/Lib/test/test_genericpath.py
+++ b/Lib/test/test_genericpath.py
@@ -292,11 +292,20 @@ class CommonTest(GenericTest):
                  for path in ('', 'fuu', 'f\xf9\xf9', '/fuu', 'U:\\'):
                      self.assertIsInstance(abspath(path), str)
  
-    @unittest.skipIf(sys.platform == 'darwin',
-        "Mac OS X denies the creation of a directory with an invalid utf8 name")
      def test_nonascii_abspath(self):
-        # Test non-ASCII, non-UTF8 bytes in the path.
-        with support.temp_cwd(b'\xe7w\xf0'):
+        if (support.TESTFN_UNDECODABLE
+        # Mac OS X denies the creation of a directory with an invalid
+        # UTF-8 name. Windows allows to create a directory with an
+        # arbitrary bytes name, but fails to enter this directory
+        # (when the bytes name is used).
+        and sys.platform not in ('win32', 'darwin')):
+            name = support.TESTFN_UNDECODABLE
+        elif support.TESTFN_NONASCII:
+            name = support.TESTFN_NONASCII
+        else:
+            self.skipTest("need support.TESTFN_NONASCII")
+
+        with support.temp_cwd(name):
              self.test_abspath()
  
  
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py

index bd799b2040d8540ee83408c98d351544aac808f1..720e78b317257dcb27e82010d83c3cc75a58a027 100644 (file)
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -1013,6 +1013,8 @@ if sys.platform != 'win32':
          def setUp(self):
              if support.TESTFN_UNENCODABLE:
                  self.dir = support.TESTFN_UNENCODABLE
+            elif support.TESTFN_NONASCII:
+                self.dir = support.TESTFN_NONASCII
              else:
                  self.dir = support.TESTFN
              self.bdir = os.fsencode(self.dir)
@@ -1027,6 +1029,8 @@ if sys.platform != 'win32':
              add_filename(support.TESTFN_UNICODE)
              if support.TESTFN_UNENCODABLE:
                  add_filename(support.TESTFN_UNENCODABLE)
+            if support.TESTFN_NONASCII:
+                add_filename(support.TESTFN_NONASCII)
              if not bytesfn:
                  self.skipTest("couldn't create any non-ascii filename")
author	Victor Stinner <victor.stinner@gmail.com>
	Thu, 3 Jan 2013 00:50:30 +0000 (01:50 +0100)
committer	Victor Stinner <victor.stinner@gmail.com>
	Thu, 3 Jan 2013 00:50:30 +0000 (01:50 +0100)
Lib/test/support.py		patch \| blob \| history
Lib/test/test_cmd_line.py		patch \| blob \| history
Lib/test/test_cmd_line_script.py		patch \| blob \| history
Lib/test/test_genericpath.py		patch \| blob \| history
Lib/test/test_os.py		patch \| blob \| history