]> granicus.if.org Git - python/commitdiff
Fix 64-bit safety issue in BZ2Compressor and BZ2Decompressor.
authorNadeem Vawda <nadeem.vawda@gmail.com>
Tue, 12 Apr 2011 21:02:42 +0000 (23:02 +0200)
committerNadeem Vawda <nadeem.vawda@gmail.com>
Tue, 12 Apr 2011 21:02:42 +0000 (23:02 +0200)
Lib/test/test_bz2.py
Modules/_bz2module.c

index cee38e0fd1758c55b4dc438e316201c7f6f073d4..3567b3625748828e6cebadba6f0037896bfc7a43 100644 (file)
@@ -1,10 +1,11 @@
 #!/usr/bin/env python3
 from test import support
-from test.support import TESTFN
+from test.support import TESTFN, precisionbigmemtest, _4G
 
 import unittest
 from io import BytesIO
 import os
+import random
 import subprocess
 import sys
 
@@ -415,6 +416,23 @@ class BZ2CompressorTest(BaseTest):
         data += bz2c.flush()
         self.assertEqual(self.decompress(data), self.TEXT)
 
+    @precisionbigmemtest(size=_4G + 100, memuse=2)
+    def testCompress4G(self, size):
+        # "Test BZ2Compressor.compress()/flush() with >4GiB input"
+        bz2c = BZ2Compressor()
+        data = b"x" * size
+        try:
+            compressed = bz2c.compress(data)
+            compressed += bz2c.flush()
+        finally:
+            data = None  # Release memory
+        data = bz2.decompress(compressed)
+        try:
+            self.assertEqual(len(data), size)
+            self.assertEqual(len(data.strip(b"x")), 0)
+        finally:
+            data = None
+
 class BZ2DecompressorTest(BaseTest):
     def test_Constructor(self):
         self.assertRaises(TypeError, BZ2Decompressor, 42)
@@ -453,6 +471,22 @@ class BZ2DecompressorTest(BaseTest):
         text = bz2d.decompress(self.DATA)
         self.assertRaises(EOFError, bz2d.decompress, b"anything")
 
+    @precisionbigmemtest(size=_4G + 100, memuse=3)
+    def testDecompress4G(self, size):
+        # "Test BZ2Decompressor.decompress() with >4GiB input"
+        blocksize = 10 * 1024 * 1024
+        block = random.getrandbits(blocksize * 8).to_bytes(blocksize, 'little')
+        try:
+            data = block * (size // blocksize + 1)
+            compressed = bz2.compress(data)
+            bz2d = BZ2Decompressor()
+            decompressed = bz2d.decompress(compressed)
+            self.assertTrue(decompressed == data)
+        finally:
+            data = None
+            compressed = None
+            decompressed = None
+
 
 class FuncTest(BaseTest):
     "Test module functions"
index 522b3e56585397e5fe8f14e51990960ac0b596a3..d329c146261bf7bfb43261b3d282dfbff1e706cc 100644 (file)
@@ -36,6 +36,8 @@
 #define RELEASE_LOCK(obj)
 #endif
 
+#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+
 
 typedef struct {
     PyObject_HEAD
@@ -145,8 +147,10 @@ compress(BZ2Compressor *c, char *data, size_t len, int action)
     if (result == NULL)
         return NULL;
     c->bzs.next_in = data;
-    /* FIXME This is not 64-bit clean - avail_in is an int. */
-    c->bzs.avail_in = len;
+    /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
+       Do compression in chunks of no more than UINT_MAX bytes each. */
+    c->bzs.avail_in = MIN(len, UINT_MAX);
+    len -= c->bzs.avail_in;
     c->bzs.next_out = PyBytes_AS_STRING(result);
     c->bzs.avail_out = PyBytes_GET_SIZE(result);
     for (;;) {
@@ -161,6 +165,11 @@ compress(BZ2Compressor *c, char *data, size_t len, int action)
         if (catch_bz2_error(bzerror))
             goto error;
 
+        if (c->bzs.avail_in == 0 && len > 0) {
+            c->bzs.avail_in = MIN(len, UINT_MAX);
+            len -= c->bzs.avail_in;
+        }
+
         /* In regular compression mode, stop when input data is exhausted.
            In flushing mode, stop when all buffered data has been flushed. */
         if ((action == BZ_RUN && c->bzs.avail_in == 0) ||
@@ -354,8 +363,10 @@ decompress(BZ2Decompressor *d, char *data, size_t len)
     if (result == NULL)
         return result;
     d->bzs.next_in = data;
-    /* FIXME This is not 64-bit clean - avail_in is an int. */
-    d->bzs.avail_in = len;
+    /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
+       Do decompression in chunks of no more than UINT_MAX bytes each. */
+    d->bzs.avail_in = MIN(len, UINT_MAX);
+    len -= d->bzs.avail_in;
     d->bzs.next_out = PyBytes_AS_STRING(result);
     d->bzs.avail_out = PyBytes_GET_SIZE(result);
     for (;;) {
@@ -371,17 +382,21 @@ decompress(BZ2Decompressor *d, char *data, size_t len)
             goto error;
         if (bzerror == BZ_STREAM_END) {
             d->eof = 1;
-            if (d->bzs.avail_in > 0) { /* Save leftover input to unused_data */
+            len += d->bzs.avail_in;
+            if (len > 0) { /* Save leftover input to unused_data */
                 Py_CLEAR(d->unused_data);
-                d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in,
-                                                           d->bzs.avail_in);
+                d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len);
                 if (d->unused_data == NULL)
                     goto error;
             }
             break;
         }
-        if (d->bzs.avail_in == 0)
-            break;
+        if (d->bzs.avail_in == 0) {
+            if (len == 0)
+                break;
+            d->bzs.avail_in = MIN(len, UINT_MAX);
+            len -= d->bzs.avail_in;
+        }
         if (d->bzs.avail_out == 0) {
             if (grow_buffer(&result) < 0)
                 goto error;