From 93b061bc3e1c9285ec1ce6405b85d3a1e072833f Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 12 May 2013 11:29:27 +0200 Subject: [PATCH] Issue #1159051: Back out a fix for handling corrupted gzip files that broke backwards compatibility. --- Lib/gzip.py | 73 ++++++++++++++++++++++--------------------- Lib/test/test_bz2.py | 18 ----------- Lib/test/test_gzip.py | 13 -------- Misc/NEWS | 5 +-- 4 files changed, 41 insertions(+), 68 deletions(-) mode change 100644 => 100755 Lib/test/test_gzip.py diff --git a/Lib/gzip.py b/Lib/gzip.py index 0adfd3fdf8..6aacc9a4f9 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -33,6 +33,9 @@ def write32u(output, value): # or unsigned. output.write(struct.pack(" self.extrasize: - if not self._read(readsize): - if size > self.extrasize: - size = self.extrasize - break - readsize = min(self.max_read_chunk, readsize * 2) + try: + while size > self.extrasize: + self._read(readsize) + readsize = min(self.max_read_chunk, readsize * 2) + except EOFError: + if size > self.extrasize: + size = self.extrasize offset = self.offset - self.extrastart chunk = self.extrabuf[offset: offset + size] @@ -365,9 +366,12 @@ class GzipFile(io.BufferedIOBase): if self.extrasize == 0: if self.fileobj is None: return b'' - # Ensure that we don't return b"" if we haven't reached EOF. - # 1024 is the same buffering heuristic used in read() - while self.extrasize == 0 and self._read(max(n, 1024)): + try: + # Ensure that we don't return b"" if we haven't reached EOF. + while self.extrasize == 0: + # 1024 is the same buffering heuristic used in read() + self._read(max(n, 1024)) + except EOFError: pass offset = self.offset - self.extrastart remaining = self.extrasize @@ -380,14 +384,13 @@ class GzipFile(io.BufferedIOBase): def _read(self, size=1024): if self.fileobj is None: - return False + raise EOFError("Reached EOF") if self._new_member: # If the _new_member flag is set, we have to # jump to the next member, if there is one. self._init_read() - if not self._read_gzip_header(): - return False + self._read_gzip_header() self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) self._new_member = False @@ -404,7 +407,7 @@ class GzipFile(io.BufferedIOBase): self.fileobj.prepend(self.decompress.unused_data, True) self._read_eof() self._add_read_data( uncompress ) - return False + raise EOFError('Reached EOF') uncompress = self.decompress.decompress(buf) self._add_read_data( uncompress ) @@ -420,7 +423,6 @@ class GzipFile(io.BufferedIOBase): # a new member on the next call self._read_eof() self._new_member = True - return True def _add_read_data(self, data): self.crc = zlib.crc32(data, self.crc) & 0xffffffff @@ -435,7 +437,8 @@ class GzipFile(io.BufferedIOBase): # We check the that the computed CRC and size of the # uncompressed data matches the stored values. Note that the size # stored is the true file size mod 2**32. - crc32, isize = struct.unpack("