Issue #13664: GzipFile now supports non-ascii Unicode filenames.

author Serhiy Storchaka <storchaka@gmail.com>

Sun, 12 Oct 2014 19:23:28 +0000 (22:23 +0300)

committer Serhiy Storchaka <storchaka@gmail.com>

Sun, 12 Oct 2014 19:23:28 +0000 (22:23 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Sun, 12 Oct 2014 19:23:28 +0000 (22:23 +0300)
committer Serhiy Storchaka <storchaka@gmail.com>
Sun, 12 Oct 2014 19:23:28 +0000 (22:23 +0300)
diff --git a/Lib/gzip.py b/Lib/gzip.py

index a613bae876dd3444162a81f21875c75133d21a42..49566fd242a259ff7b1b0bbfc6627f94a3fe34af 100644 (file)
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -164,9 +164,16 @@ class GzipFile(io.BufferedIOBase):
      def _write_gzip_header(self):
          self.fileobj.write('\037\213')             # magic header
          self.fileobj.write('\010')                 # compression method
-        fname = os.path.basename(self.name)
-        if fname.endswith(".gz"):
-            fname = fname[:-3]
+        try:
+            # RFC 1952 requires the FNAME field to be Latin-1. Do not
+            # include filenames that cannot be represented that way.
+            fname = os.path.basename(self.name)
+            if not isinstance(fname, str):
+                fname = fname.encode('latin-1')
+            if fname.endswith('.gz'):
+                fname = fname[:-3]
+        except UnicodeEncodeError:
+            fname = ''
          flags = 0
          if fname:
              flags = FNAME
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py

index aa56ed3ab509516d703f563df37e38adc30e0a1b..971306182e13dbde10c6b248bf2bf0a001ee3019 100644 (file)
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -30,6 +30,17 @@ class TestGzip(unittest.TestCase):
      def tearDown(self):
          test_support.unlink(self.filename)
  
+    @test_support.requires_unicode
+    def test_unicode_filename(self):
+        unicode_filename = test_support.TESTFN_UNICODE
+        with gzip.GzipFile(unicode_filename, "wb") as f:
+            f.write(data1 * 50)
+        with gzip.GzipFile(unicode_filename, "rb") as f:
+            self.assertEqual(f.read(), data1 * 50)
+        # Sanity check that we are actually operating on the right file.
+        with open(unicode_filename, 'rb') as fobj, \
+             gzip.GzipFile(fileobj=fobj, mode="rb") as f:
+            self.assertEqual(f.read(), data1 * 50)
  
      def test_write(self):
          with gzip.GzipFile(self.filename, 'wb') as f:
diff --git a/Misc/NEWS b/Misc/NEWS

index 6ff5fe52aaad815b8581451df8894d48235696b4..bf9caabbc5e42f122eed56e2ec9c2c7bc862a2e9 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -37,6 +37,8 @@ Core and Builtins
  Library
  -------
  
+- Issue #13664: GzipFile now supports non-ascii Unicode filenames.
+
  - Issue #13096: Fixed segfault in CTypes POINTER handling of large
    values.
author	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 12 Oct 2014 19:23:28 +0000 (22:23 +0300)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 12 Oct 2014 19:23:28 +0000 (22:23 +0300)
Lib/gzip.py		patch \| blob \| history
Lib/test/test_gzip.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history