From: Lars Gustäbel Date: Tue, 6 Dec 2011 12:07:09 +0000 (+0100) Subject: Correctly detect bzip2 compressed streams with blocksizes other than 900k. X-Git-Tag: v2.7.3rc1~263 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9a38863d8ce5444be29d2e20341b28147b94f33a;p=python Correctly detect bzip2 compressed streams with blocksizes other than 900k. --- diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 6828eab5a5..ed5bcf20ee 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -627,7 +627,7 @@ class _StreamProxy(object): def getcomptype(self): if self.buf.startswith("\037\213\010"): return "gz" - if self.buf.startswith("BZh91"): + if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY": return "bz2" return "tar" diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 6962f8e2d7..49d2d07d90 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -440,6 +440,23 @@ class DetectReadTest(unittest.TestCase): def test_detect_fileobj(self): self._test_modes(self._testfunc_fileobj) + def test_detect_stream_bz2(self): + # Originally, tarfile's stream detection looked for the string + # "BZh91" at the start of the file. This is incorrect because + # the '9' represents the blocksize (900kB). If the file was + # compressed using another blocksize autodetection fails. + if not bz2: + return + + with open(tarname, "rb") as fobj: + data = fobj.read() + + # Compress with blocksize 100kB, the file starts with "BZh11". + with bz2.BZ2File(tmpname, "wb", compresslevel=1) as fobj: + fobj.write(data) + + self._testfunc_file(tmpname, "r|*") + class MemberReadTest(ReadTest): diff --git a/Misc/NEWS b/Misc/NEWS index 4c45a854ff..bf63153780 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -79,6 +79,9 @@ Core and Builtins Library ------- +- tarfile.py: Correctly detect bzip2 compressed streams with blocksizes + other than 900k. + - Issue #13439: Fix many errors in turtle docstrings. - Issue #12856: Ensure child processes do not inherit the parent's random