This allows for nested zip files, tar files within zip files, zip files within tar files, etc.
Contributed by: John Jolly
With *mode* ``'r'`` the file-like object
(``ZipExtFile``) is read-only and provides the following methods:
:meth:`~io.BufferedIOBase.read`, :meth:`~io.IOBase.readline`,
- :meth:`~io.IOBase.readlines`, :meth:`__iter__`,
- :meth:`~iterator.__next__`. These objects can operate independently of
- the ZipFile.
+ :meth:`~io.IOBase.readlines`, :meth:`~io.IOBase.seek`,
+ :meth:`~io.IOBase.tell`, :meth:`__iter__`, :meth:`~iterator.__next__`.
+ These objects can operate independently of the ZipFile.
With ``mode='w'``, a writable file handle is returned, which supports the
:meth:`~io.BufferedIOBase.write` method. While a writable file handle is open,
self.assertEqual(zipf.read('baz'), msg3)
self.assertEqual(zipf.namelist(), ['foo', 'bar', 'baz'])
+ def test_seek_tell(self):
+ # Test seek functionality
+ txt = b"Where's Bruce?"
+ bloc = txt.find(b"Bruce")
+ # Check seek on a file
+ with zipfile.ZipFile(TESTFN, "w") as zipf:
+ zipf.writestr("foo.txt", txt)
+ with zipfile.ZipFile(TESTFN, "r") as zipf:
+ with zipf.open("foo.txt", "r") as fp:
+ fp.seek(bloc, os.SEEK_SET)
+ self.assertEqual(fp.tell(), bloc)
+ fp.seek(-bloc, os.SEEK_CUR)
+ self.assertEqual(fp.tell(), 0)
+ fp.seek(bloc, os.SEEK_CUR)
+ self.assertEqual(fp.tell(), bloc)
+ self.assertEqual(fp.read(5), txt[bloc:bloc+5])
+ fp.seek(0, os.SEEK_END)
+ self.assertEqual(fp.tell(), len(txt))
+ # Check seek on memory file
+ data = io.BytesIO()
+ with zipfile.ZipFile(data, mode="w") as zipf:
+ zipf.writestr("foo.txt", txt)
+ with zipfile.ZipFile(data, mode="r") as zipf:
+ with zipf.open("foo.txt", "r") as fp:
+ fp.seek(bloc, os.SEEK_SET)
+ self.assertEqual(fp.tell(), bloc)
+ fp.seek(-bloc, os.SEEK_CUR)
+ self.assertEqual(fp.tell(), 0)
+ fp.seek(bloc, os.SEEK_CUR)
+ self.assertEqual(fp.tell(), bloc)
+ self.assertEqual(fp.read(5), txt[bloc:bloc+5])
+ fp.seek(0, os.SEEK_END)
+ self.assertEqual(fp.tell(), len(txt))
+
def tearDown(self):
unlink(TESTFN)
unlink(TESTFN2)
self._close = close
self._lock = lock
self._writing = writing
+ self.seekable = file.seekable
+ self.tell = file.tell
+
+ def seek(self, offset, whence=0):
+ with self._lock:
+ if self.writing():
+ raise ValueError("Can't reposition in the ZIP file while "
+ "there is an open writing handle on it. "
+ "Close the writing handle before trying to read.")
+ self._file.seek(self._pos)
+ self._pos = self._file.tell()
+ return self._pos
def read(self, n=-1):
with self._lock:
# Read from compressed files in 4k blocks.
MIN_READ_SIZE = 4096
+ # Chunk size to read during seek
+ MAX_SEEK_READ = 1 << 24
+
def __init__(self, fileobj, mode, zipinfo, decrypter=None,
close_fileobj=False):
self._fileobj = fileobj
else:
self._expected_crc = None
+ self._seekable = False
+ try:
+ if fileobj.seekable():
+ self._orig_compress_start = fileobj.tell()
+ self._orig_compress_size = zipinfo.compress_size
+ self._orig_file_size = zipinfo.file_size
+ self._orig_start_crc = self._running_crc
+ self._seekable = True
+ except AttributeError:
+ pass
+
def __repr__(self):
result = ['<%s.%s' % (self.__class__.__module__,
self.__class__.__qualname__)]
finally:
super().close()
+ def seekable(self):
+ return self._seekable
+
+ def seek(self, offset, whence=0):
+ if not self._seekable:
+ raise io.UnsupportedOperation("underlying stream is not seekable")
+ curr_pos = self.tell()
+ if whence == 0: # Seek from start of file
+ new_pos = offset
+ elif whence == 1: # Seek from current position
+ new_pos = curr_pos + offset
+ elif whence == 2: # Seek from EOF
+ new_pos = self._orig_file_size + offset
+ else:
+ raise ValueError("whence must be os.SEEK_SET (0), "
+ "os.SEEK_CUR (1), or os.SEEK_END (2)")
+
+ if new_pos > self._orig_file_size:
+ new_pos = self._orig_file_size
+
+ if new_pos < 0:
+ new_pos = 0
+
+ read_offset = new_pos - curr_pos
+ buff_offset = read_offset + self._offset
+
+ if buff_offset >= 0 and buff_offset < len(self._readbuffer):
+ # Just move the _offset index if the new position is in the _readbuffer
+ self._offset = buff_offset
+ read_offset = 0
+ elif read_offset < 0:
+ # Position is before the current position. Reset the ZipExtFile
+
+ self._fileobj.seek(self._orig_compress_start)
+ self._running_crc = self._orig_start_crc
+ self._compress_left = self._orig_compress_size
+ self._left = self._orig_file_size
+ self._readbuffer = b''
+ self._offset = 0
+ self._decompressor = zipfile._get_decompressor(self._compress_type)
+ self._eof = False
+ read_offset = new_pos
+
+ while read_offset > 0:
+ read_len = min(self.MAX_SEEK_READ, read_offset)
+ self.read(read_len)
+ read_offset -= read_len
+
+ return self.tell()
+
+ def tell(self):
+ if not self._seekable:
+ raise io.UnsupportedOperation("underlying stream is not seekable")
+ filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
+ return filepos
+
class _ZipWriteFile(io.BufferedIOBase):
def __init__(self, zf, zinfo, zip64):
--- /dev/null
+Added seek and tell to the ZipExtFile class. This only works if the file
+object used to open the zipfile is seekable.