Issue #16350, part 2: Set unused_data (and unconsumed_tail) correctly in decompressob...

author Nadeem Vawda <nadeem.vawda@gmail.com>

Sun, 11 Nov 2012 01:14:36 +0000 (02:14 +0100)

committer Nadeem Vawda <nadeem.vawda@gmail.com>

Sun, 11 Nov 2012 01:14:36 +0000 (02:14 +0100)
author Nadeem Vawda <nadeem.vawda@gmail.com>
Sun, 11 Nov 2012 01:14:36 +0000 (02:14 +0100)
committer Nadeem Vawda <nadeem.vawda@gmail.com>
Sun, 11 Nov 2012 01:14:36 +0000 (02:14 +0100)
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py

index c17b4d0b36e1e04b08b62542474b8429bf215896..6d4b2c350eb1ae79472118ea46123a45de0f0f9d 100644 (file)
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -437,15 +437,27 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
      def test_decompress_unused_data(self):
          # Repeated calls to decompress() after EOF should accumulate data in
          # dco.unused_data, instead of just storing the arg to the last call.
-        x = zlib.compress(HAMLET_SCENE) + HAMLET_SCENE
-        for step in 1, 2, 100:
-            dco = zlib.decompressobj()
-            data = b''.join(dco.decompress(x[i : i + step])
-                            for i in range(0, len(x), step))
-            data += dco.flush()
-
-            self.assertEqual(data, HAMLET_SCENE)
-            self.assertEqual(dco.unused_data, HAMLET_SCENE)
+        source = b'abcdefghijklmnopqrstuvwxyz'
+        remainder = b'0123456789'
+        y = zlib.compress(source)
+        x = y + remainder
+        for maxlen in 0, 1000:
+            for step in 1, 2, len(y), len(x):
+                dco = zlib.decompressobj()
+                data = b''
+                for i in range(0, len(x), step):
+                    if i < len(y):
+                        self.assertEqual(dco.unused_data, b'')
+                    if maxlen == 0:
+                        data += dco.decompress(x[i : i + step])
+                        self.assertEqual(dco.unconsumed_tail, b'')
+                    else:
+                        data += dco.decompress(
+                                dco.unconsumed_tail + x[i : i + step], maxlen)
+                data += dco.flush()
+                self.assertEqual(data, source)
+                self.assertEqual(dco.unconsumed_tail, b'')
+                self.assertEqual(dco.unused_data, remainder)
  
      if hasattr(zlib.compressobj(), "copy"):
          def test_compresscopy(self):
diff --git a/Misc/NEWS b/Misc/NEWS

index b035e24a2f83a15d58a40678c80871b15aafba32..01517e10f9105094c763a55cd276903381b1420e 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -162,9 +162,11 @@ Library
  - Issue #16357: fix calling accept() on a SSLSocket created through
    SSLContext.wrap_socket().  Original patch by Jeff McNeil.
  
-- Issue #16350: zlib.Decompress.decompress() now accumulates data from
+- Issue #16350: zlib.decompressobj().decompress() now accumulates data from
    successive calls after EOF in unused_data, instead of only saving the argument
-  to the last call. Patch by Serhiy Storchaka.
+  to the last call. decompressobj().flush() now correctly sets unused_data and
+  unconsumed_tail. A bug in the handling of MemoryError when setting the
+  unconsumed_tail attribute has also been fixed. Patch by Serhiy Storchaka.
  
  - Issue #12759: sre_parse now raises a proper error when the name of the group
    is missing.  Initial patch by Serhiy Storchaka.
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c

index 7734ba6dfaa7fbbe92866b5f522bb9a8a55639e4..6d4aa3a251bdb00c433b395125ec91626b4f4da4 100644 (file)
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -499,6 +499,49 @@ PyZlib_objcompress(compobject *self, PyObject *args)
      return RetVal;
  }
  
+/* Helper for objdecompress() and unflush(). Saves any unconsumed input data in
+   self->unused_data or self->unconsumed_tail, as appropriate. */
+static int
+save_unconsumed_input(compobject *self, int err)
+{
+    if (err == Z_STREAM_END) {
+        /* The end of the compressed data has been reached. Store the leftover
+           input data in self->unused_data. */
+        if (self->zst.avail_in > 0) {
+            Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data);
+            Py_ssize_t new_size;
+            PyObject *new_data;
+            if (self->zst.avail_in > PY_SSIZE_T_MAX - old_size) {
+                PyErr_NoMemory();
+                return -1;
+            }
+            new_size = old_size + self->zst.avail_in;
+            new_data = PyBytes_FromStringAndSize(NULL, new_size);
+            if (new_data == NULL)
+                return -1;
+            Py_MEMCPY(PyBytes_AS_STRING(new_data),
+                      PyBytes_AS_STRING(self->unused_data), old_size);
+            Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size,
+                      self->zst.next_in, self->zst.avail_in);
+            Py_DECREF(self->unused_data);
+            self->unused_data = new_data;
+            self->zst.avail_in = 0;
+        }
+    }
+    if (self->zst.avail_in > 0 || PyBytes_GET_SIZE(self->unconsumed_tail)) {
+        /* This code handles two distinct cases:
+           1. Output limit was reached. Save leftover input in unconsumed_tail.
+           2. All input data was consumed. Clear unconsumed_tail. */
+        PyObject *new_data = PyBytes_FromStringAndSize(
+                (char *)self->zst.next_in, self->zst.avail_in);
+        if (new_data == NULL)
+            return -1;
+        Py_DECREF(self->unconsumed_tail);
+        self->unconsumed_tail = new_data;
+    }
+    return 0;
+}
+
  PyDoc_STRVAR(decomp_decompress__doc__,
  "decompress(data, max_length) -- Return a string containing the decompressed\n"
  "version of the data.\n"
@@ -585,60 +628,20 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
          Py_END_ALLOW_THREADS
      }
  
-    if(max_length) {
-        /* Not all of the compressed data could be accommodated in a buffer of
-           the specified size. Return the unconsumed tail in an attribute. */
-        Py_DECREF(self->unconsumed_tail);
-        self->unconsumed_tail = PyBytes_FromStringAndSize((char *)self->zst.next_in,
-                                                           self->zst.avail_in);
-    }
-    else if (PyBytes_GET_SIZE(self->unconsumed_tail) > 0) {
-        /* All of the compressed data was consumed. Clear unconsumed_tail. */
-        Py_DECREF(self->unconsumed_tail);
-        self->unconsumed_tail = PyBytes_FromStringAndSize("", 0);
-    }
-    if (self->unconsumed_tail == NULL) {
+    if (save_unconsumed_input(self, err) < 0) {
          Py_DECREF(RetVal);
          RetVal = NULL;
          goto error;
      }
  
-    /* The end of the compressed data has been reached, so set the
-       unused_data attribute to a string containing the remainder of the
-       data in the string.  Note that this is also a logical place to call
-       inflateEnd, but the old behaviour of only calling it on flush() is
-       preserved.
-    */
-    if (err == Z_STREAM_END) {
-        if (self->zst.avail_in > 0) {
-            /* Append the leftover data to the existing value of unused_data. */
-            Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data);
-            Py_ssize_t new_size = old_size + self->zst.avail_in;
-            PyObject *new_data;
-            if (new_size <= old_size) {  /* Check for overflow. */
-                PyErr_NoMemory();
-                Py_DECREF(RetVal);
-                RetVal = NULL;
-                goto error;
-            }
-            new_data = PyBytes_FromStringAndSize(NULL, new_size);
-            if (new_data == NULL) {
-                Py_DECREF(RetVal);
-                RetVal = NULL;
-                goto error;
-            }
-            Py_MEMCPY(PyBytes_AS_STRING(new_data),
-                      PyBytes_AS_STRING(self->unused_data), old_size);
-            Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size,
-                      self->zst.next_in, self->zst.avail_in);
-            Py_DECREF(self->unused_data);
-            self->unused_data = new_data;
-        }
+    /* This is the logical place to call inflateEnd, but the old behaviour of
+       only calling it on flush() is preserved. */
+
+    if (err != Z_STREAM_END && err != Z_OK && err != Z_BUF_ERROR) {
          /* We will only get Z_BUF_ERROR if the output buffer was full
             but there wasn't more output when we tried again, so it is
             not an error condition.
          */
-    } else if (err != Z_OK && err != Z_BUF_ERROR) {
          zlib_error(self->zst, err, "while decompressing");
          Py_DECREF(RetVal);
          RetVal = NULL;
@@ -904,6 +907,12 @@ PyZlib_unflush(compobject *self, PyObject *args)
          Py_END_ALLOW_THREADS
      }
  
+    if (save_unconsumed_input(self, err) < 0) {
+        Py_DECREF(retval);
+        retval = NULL;
+        goto error;
+    }
+
      /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
         various data structures. Note we should only get Z_STREAM_END when
         flushmode is Z_FINISH */
@@ -917,6 +926,7 @@ PyZlib_unflush(compobject *self, PyObject *args)
              goto error;
          }
      }
+
      if (_PyBytes_Resize(&retval, self->zst.total_out - start_total_out) < 0) {
          Py_DECREF(retval);
          retval = NULL;
author	Nadeem Vawda <nadeem.vawda@gmail.com>
	Sun, 11 Nov 2012 01:14:36 +0000 (02:14 +0100)
committer	Nadeem Vawda <nadeem.vawda@gmail.com>
	Sun, 11 Nov 2012 01:14:36 +0000 (02:14 +0100)
Lib/test/test_zlib.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Modules/zlibmodule.c		patch \| blob \| history