Another round on SF patch 618135: gzip.py and files > 2G

author Tim Peters <tim.peters@gmail.com>

Tue, 5 Nov 2002 20:38:55 +0000 (20:38 +0000)

committer Tim Peters <tim.peters@gmail.com>

Tue, 5 Nov 2002 20:38:55 +0000 (20:38 +0000)
author Tim Peters <tim.peters@gmail.com>
Tue, 5 Nov 2002 20:38:55 +0000 (20:38 +0000)
committer Tim Peters <tim.peters@gmail.com>
Tue, 5 Nov 2002 20:38:55 +0000 (20:38 +0000)
diff --git a/Lib/gzip.py b/Lib/gzip.py

index 8802adb0d9ac57ad796869e2c0f77521a9b3b2dc..36f9c36fd3fbeffffbb69555f4a9f0fcdd4fe20c 100644 (file)
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -24,6 +24,10 @@ def U32(i):
          i += 1L << 32
      return i
  
+def LOWU32(i):
+    """Return the low-order 32 bits of an int, as a non-negative int."""
+    return i & 0xFFFFFFFFL
+
  def write32(output, value):
      output.write(struct.pack("<l", value))
  
@@ -295,21 +299,22 @@ class GzipFile:
          # We've read to the end of the file, so we have to rewind in order
          # to reread the 8 bytes containing the CRC and the file size.
          # We check the that the computed CRC and size of the
-        # uncompressed data matches the stored values.
+        # uncompressed data matches the stored values.  Note that the size
+        # stored is the true file size mod 2**32.
          self.fileobj.seek(-8, 1)
          crc32 = read32(self.fileobj)
          isize = U32(read32(self.fileobj))   # may exceed 2GB
          if U32(crc32) != U32(self.crc):
              raise ValueError, "CRC check failed"
-        elif isize != self.size:
+        elif isize != LOWU32(self.size):
              raise ValueError, "Incorrect length of data produced"
  
      def close(self):
          if self.mode == WRITE:
              self.fileobj.write(self.compress.flush())
              write32(self.fileobj, self.crc)
-            # self.size may exceed 2GB
-            write32u(self.fileobj, self.size)
+            # self.size may exceed 2GB, or even 4GB
+            write32u(self.fileobj, LOWU32(self.size))
              self.fileobj = None
          elif self.mode == READ:
              self.fileobj = None
diff --git a/Misc/NEWS b/Misc/NEWS

index c43474e24308cf1608f84c492e22334a8827e234..96263e7b9fe9e06f9c8825abe631c1d0d50aead5 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -355,9 +355,12 @@ Extension modules
  Library
  -------
  
-- gzip.py now handles files exceeding 2GB.  Note that 4GB is still a
-  fundamental limitation of the underlying gzip file format (it only
-  has 32 bits to record the file size).
+- gzip.py now handles files exceeding 2GB.  Files over 4GB also work
+  now (provided the OS supports it, and Python is configured with large
+  file support), but in that case the underlying gzip file format can
+  record only the least-significant 32 bits of the file size, so that
+  some tools working with gzipped files may report an incorrect file
+  size.
  
  - xml.sax.saxutils.unescape has been added, to replace entity references
    with their entity value.
author	Tim Peters <tim.peters@gmail.com>
	Tue, 5 Nov 2002 20:38:55 +0000 (20:38 +0000)
committer	Tim Peters <tim.peters@gmail.com>
	Tue, 5 Nov 2002 20:38:55 +0000 (20:38 +0000)
Lib/gzip.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history