]> granicus.if.org Git - python/commitdiff
[ 1488881 ] tarfile.py: support for file-objects and bz2 (cp. #1488634)
authorGeorg Brandl <georg@python.org>
Mon, 15 May 2006 19:30:35 +0000 (19:30 +0000)
committerGeorg Brandl <georg@python.org>
Mon, 15 May 2006 19:30:35 +0000 (19:30 +0000)
Lib/tarfile.py
Lib/test/test_tarfile.py
Misc/NEWS

index 8987ca709e7e54649ba2db7cf7ebf208f2ce79ef..6c297838b88f3c141d8b08339498381ba7532462 100644 (file)
@@ -556,6 +556,69 @@ class _StreamProxy(object):
         self.fileobj.close()
 # class StreamProxy
 
+class _BZ2Proxy(object):
+    """Small proxy class that enables external file object
+       support for "r:bz2" and "w:bz2" modes. This is actually
+       a workaround for a limitation in bz2 module's BZ2File
+       class which (unlike gzip.GzipFile) has no support for
+       a file object argument.
+    """
+
+    blocksize = 16 * 1024
+
+    def __init__(self, fileobj, mode):
+        self.fileobj = fileobj
+        self.mode = mode
+        self.init()
+
+    def init(self):
+        import bz2
+        self.pos = 0
+        if self.mode == "r":
+            self.bz2obj = bz2.BZ2Decompressor()
+            self.fileobj.seek(0)
+            self.buf = ""
+        else:
+            self.bz2obj = bz2.BZ2Compressor()
+
+    def read(self, size):
+        b = [self.buf]
+        x = len(self.buf)
+        while x < size:
+            try:
+                raw = self.fileobj.read(self.blocksize)
+                data = self.bz2obj.decompress(raw)
+                b.append(data)
+            except EOFError:
+                break
+            x += len(data)
+        self.buf = "".join(b)
+
+        buf = self.buf[:size]
+        self.buf = self.buf[size:]
+        self.pos += len(buf)
+        return buf
+
+    def seek(self, pos):
+        if pos < self.pos:
+            self.init()
+        self.read(pos - self.pos)
+
+    def tell(self):
+        return self.pos
+
+    def write(self, data):
+        self.pos += len(data)
+        raw = self.bz2obj.compress(data)
+        self.fileobj.write(raw)
+
+    def close(self):
+        if self.mode == "w":
+            raw = self.bz2obj.flush()
+            self.fileobj.write(raw)
+            self.fileobj.close()
+# class _BZ2Proxy
+
 #------------------------
 # Extraction file object
 #------------------------
@@ -1057,10 +1120,12 @@ class TarFile(object):
         tarname = pre + ext
 
         if fileobj is not None:
-            raise ValueError, "no support for external file objects"
+            fileobj = _BZ2Proxy(fileobj, mode)
+        else:
+            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
 
         try:
-            t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
+            t = cls.taropen(tarname, mode, fileobj)
         except IOError:
             raise ReadError, "not a bzip2 file"
         t._extfileobj = False
index 03fb55f935338cdce51de6269a138665dfd93eb7..cd58c9a459416410739639717ecb05798ba3e97c 100644 (file)
@@ -212,6 +212,17 @@ class ReadStreamTest(ReadTest):
 
         stream.close()
 
+class ReadDetectTest(ReadTest):
+
+    def setUp(self):
+        self.tar = tarfile.open(tarname(self.comp), self.mode)
+
+class ReadDetectFileobjTest(ReadTest):
+
+    def setUp(self):
+        name = tarname(self.comp)
+        self.tar = tarfile.open(name, mode=self.mode, fileobj=file(name))
+
 class ReadAsteriskTest(ReadTest):
 
     def setUp(self):
@@ -503,6 +514,10 @@ class WriteTestGzip(WriteTest):
     comp = "gz"
 class WriteStreamTestGzip(WriteStreamTest):
     comp = "gz"
+class ReadDetectTestGzip(ReadDetectTest):
+    comp = "gz"
+class ReadDetectFileobjTestGzip(ReadDetectFileobjTest):
+    comp = "gz"
 class ReadAsteriskTestGzip(ReadAsteriskTest):
     comp = "gz"
 class ReadStreamAsteriskTestGzip(ReadStreamAsteriskTest):
@@ -526,6 +541,10 @@ if bz2:
         comp = "bz2"
     class WriteStreamTestBzip2(WriteStreamTestGzip):
         comp = "bz2"
+    class ReadDetectTestBzip2(ReadDetectTest):
+        comp = "bz2"
+    class ReadDetectFileobjTestBzip2(ReadDetectFileobjTest):
+        comp = "bz2"
     class ReadAsteriskTestBzip2(ReadAsteriskTest):
         comp = "bz2"
     class ReadStreamAsteriskTestBzip2(ReadStreamAsteriskTest):
@@ -550,6 +569,8 @@ def test_main():
         FileModeTest,
         ReadTest,
         ReadStreamTest,
+        ReadDetectTest,
+        ReadDetectFileobjTest,
         ReadAsteriskTest,
         ReadStreamAsteriskTest,
         WriteTest,
@@ -567,6 +588,7 @@ def test_main():
         tests.extend([
             ReadTestGzip, ReadStreamTestGzip,
             WriteTestGzip, WriteStreamTestGzip,
+            ReadDetectTestGzip, ReadDetectFileobjTestGzip,
             ReadAsteriskTestGzip, ReadStreamAsteriskTestGzip
         ])
 
@@ -574,6 +596,7 @@ def test_main():
         tests.extend([
             ReadTestBzip2, ReadStreamTestBzip2,
             WriteTestBzip2, WriteStreamTestBzip2,
+            ReadDetectTestBzip2, ReadDetectFileobjTestBzip2,
             ReadAsteriskTestBzip2, ReadStreamAsteriskTestBzip2
         ])
     try:
index d2aefb1e0c7ad87635cd66b13d5af5ebfe6e2ebb..6551b99eb76c6ce16878951ef4f1a243aab555d6 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -40,6 +40,9 @@ Extension Modules
 Library
 -------
 
+- Patch #1488881: add support for external file objects in bz2 compressed
+  tarfiles.
+
 - Patch #721464: pdb.Pdb instances can now be given explicit stdin and
   stdout arguments, making it possible to redirect input and output
   for remote debugging.