]> granicus.if.org Git - python/commitdiff
Issue #26293: Fixed writing ZIP files that starts not from the start of the
authorSerhiy Storchaka <storchaka@gmail.com>
Fri, 7 Oct 2016 20:12:53 +0000 (23:12 +0300)
committerSerhiy Storchaka <storchaka@gmail.com>
Fri, 7 Oct 2016 20:12:53 +0000 (23:12 +0300)
file.  Offsets in ZIP file now are relative to the start of the archive in
conforming to the specification.

Lib/test/test_zipfile.py
Lib/zipfile.py
Misc/NEWS

index 4000b30be8dd640c8e05741ac0f83d3929f2a885..986808131e536e3fde5746be90caa604c3031b7d 100644 (file)
@@ -344,6 +344,49 @@ class TestsWithSourceFile(unittest.TestCase):
             f.seek(len(data))
             with zipfile.ZipFile(f, "r") as zipfp:
                 self.assertEqual(zipfp.namelist(), [TESTFN])
+                self.assertEqual(zipfp.read(TESTFN), self.data)
+        with open(TESTFN2, 'rb') as f:
+            self.assertEqual(f.read(len(data)), data)
+            zipfiledata = f.read()
+        with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN])
+            self.assertEqual(zipfp.read(TESTFN), self.data)
+
+    def test_read_concatenated_zip_file(self):
+        with io.BytesIO() as bio:
+            with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp:
+                zipfp.write(TESTFN, TESTFN)
+            zipfiledata = bio.getvalue()
+        data = b'I am not a ZipFile!'*10
+        with open(TESTFN2, 'wb') as f:
+            f.write(data)
+            f.write(zipfiledata)
+
+        with zipfile.ZipFile(TESTFN2) as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN])
+            self.assertEqual(zipfp.read(TESTFN), self.data)
+
+    def test_append_to_concatenated_zip_file(self):
+        with io.BytesIO() as bio:
+            with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp:
+                zipfp.write(TESTFN, TESTFN)
+            zipfiledata = bio.getvalue()
+        data = b'I am not a ZipFile!'*1000000
+        with open(TESTFN2, 'wb') as f:
+            f.write(data)
+            f.write(zipfiledata)
+
+        with zipfile.ZipFile(TESTFN2, 'a') as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN])
+            zipfp.writestr('strfile', self.data)
+
+        with open(TESTFN2, 'rb') as f:
+            self.assertEqual(f.read(len(data)), data)
+            zipfiledata = f.read()
+        with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN, 'strfile'])
+            self.assertEqual(zipfp.read(TESTFN), self.data)
+            self.assertEqual(zipfp.read('strfile'), self.data)
 
     def test_ignores_newline_at_end(self):
         with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp:
index 3ab66cea69d89fb32f8570449581a0c5b4a3bbd2..1d1065083bd51a92e77b72bac7c38cb95a2d01de 100644 (file)
@@ -772,6 +772,7 @@ class ZipFile(object):
                 # set the modified flag so central directory gets written
                 # even if no files are added to the archive
                 self._didModify = True
+                self._start_disk = self.fp.tell()
             elif key == 'a':
                 try:
                     # See if file is a zip file
@@ -785,6 +786,7 @@ class ZipFile(object):
                     # set the modified flag so central directory gets written
                     # even if no files are added to the archive
                     self._didModify = True
+                    self._start_disk = self.fp.tell()
             else:
                 raise RuntimeError('Mode must be "r", "w" or "a"')
         except:
@@ -815,17 +817,18 @@ class ZipFile(object):
         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
         self._comment = endrec[_ECD_COMMENT]    # archive comment
 
-        # "concat" is zero, unless zip was concatenated to another file
-        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
+        # self._start_disk:  Position of the start of ZIP archive
+        # It is zero, unless ZIP was concatenated to another file
+        self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
             # If Zip64 extension structures are present, account for them
-            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
+            self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
 
         if self.debug > 2:
-            inferred = concat + offset_cd
-            print "given, inferred, offset", offset_cd, inferred, concat
+            inferred = self._start_disk + offset_cd
+            print "given, inferred, offset", offset_cd, inferred, self._start_disk
         # self.start_dir:  Position of start of central directory
-        self.start_dir = offset_cd + concat
+        self.start_dir = offset_cd + self._start_disk
         fp.seek(self.start_dir, 0)
         data = fp.read(size_cd)
         fp = cStringIO.StringIO(data)
@@ -855,7 +858,7 @@ class ZipFile(object):
                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
 
             x._decodeExtra()
-            x.header_offset = x.header_offset + concat
+            x.header_offset = x.header_offset + self._start_disk
             x.filename = x._decodeFilename()
             self.filelist.append(x)
             self.NameToInfo[x.filename] = x
@@ -1198,7 +1201,7 @@ class ZipFile(object):
                 raise RuntimeError('Compressed size larger than uncompressed size')
         # Seek backwards and write file header (which will now include
         # correct CRC and file sizes)
-        position = self.fp.tell()       # Preserve current position in file
+        position = self.fp.tell() # Preserve current position in file
         self.fp.seek(zinfo.header_offset, 0)
         self.fp.write(zinfo.FileHeader(zip64))
         self.fp.seek(position, 0)
@@ -1284,11 +1287,10 @@ class ZipFile(object):
                         file_size = zinfo.file_size
                         compress_size = zinfo.compress_size
 
-                    if zinfo.header_offset > ZIP64_LIMIT:
-                        extra.append(zinfo.header_offset)
+                    header_offset = zinfo.header_offset - self._start_disk
+                    if header_offset > ZIP64_LIMIT:
+                        extra.append(header_offset)
                         header_offset = 0xffffffffL
-                    else:
-                        header_offset = zinfo.header_offset
 
                     extra_data = zinfo.extra
                     if extra:
@@ -1332,7 +1334,7 @@ class ZipFile(object):
                 # Write end-of-zip-archive record
                 centDirCount = len(self.filelist)
                 centDirSize = pos2 - pos1
-                centDirOffset = pos1
+                centDirOffset = pos1 - self._start_disk
                 requires_zip64 = None
                 if centDirCount > ZIP_FILECOUNT_LIMIT:
                     requires_zip64 = "Files count"
index 6be73784fc90b4c2f2bc0b0ff7b6ff5341f8e02d..1a737b84f83220ad1215a5dfa9e0168dfdce759b 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -49,6 +49,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #26293: Fixed writing ZIP files that starts not from the start of the
+  file.  Offsets in ZIP file now are relative to the start of the archive in
+  conforming to the specification.
+
 - Fix possible integer overflows and crashes in the mmap module with unusual
   usage patterns.