]> granicus.if.org Git - python/commitdiff
Correctly detect bzip2 compressed streams with blocksizes other than 900k.
authorLars Gustäbel <lars@gustaebel.de>
Tue, 6 Dec 2011 12:07:09 +0000 (13:07 +0100)
committerLars Gustäbel <lars@gustaebel.de>
Tue, 6 Dec 2011 12:07:09 +0000 (13:07 +0100)
Lib/tarfile.py
Lib/test/test_tarfile.py
Misc/NEWS

index 6828eab5a5d06e1eb8a5ad6c2c914723d36a417f..ed5bcf20ee8d4feddfb758bf904b18d44dd89812 100644 (file)
@@ -627,7 +627,7 @@ class _StreamProxy(object):
     def getcomptype(self):
         if self.buf.startswith("\037\213\010"):
             return "gz"
-        if self.buf.startswith("BZh91"):
+        if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
             return "bz2"
         return "tar"
 
index 6962f8e2d78bd755ed8b5a0ea6847e3b9fcd11ab..49d2d07d90a8fc81e4dbddfd6248e8e6dc83bd85 100644 (file)
@@ -440,6 +440,23 @@ class DetectReadTest(unittest.TestCase):
     def test_detect_fileobj(self):
         self._test_modes(self._testfunc_fileobj)
 
+    def test_detect_stream_bz2(self):
+        # Originally, tarfile's stream detection looked for the string
+        # "BZh91" at the start of the file. This is incorrect because
+        # the '9' represents the blocksize (900kB). If the file was
+        # compressed using another blocksize autodetection fails.
+        if not bz2:
+            return
+
+        with open(tarname, "rb") as fobj:
+            data = fobj.read()
+
+        # Compress with blocksize 100kB, the file starts with "BZh11".
+        with bz2.BZ2File(tmpname, "wb", compresslevel=1) as fobj:
+            fobj.write(data)
+
+        self._testfunc_file(tmpname, "r|*")
+
 
 class MemberReadTest(ReadTest):
 
index 4c45a854ff3f8fb99a7da066f20e95119bddecc5..bf631537802efdebe6eed8080558d485573bfb22 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -79,6 +79,9 @@ Core and Builtins
 Library
 -------
 
+- tarfile.py: Correctly detect bzip2 compressed streams with blocksizes
+  other than 900k.
+
 - Issue #13439: Fix many errors in turtle docstrings.
 
 - Issue #12856: Ensure child processes do not inherit the parent's random