From: Nadeem Vawda <nadeem.vawda@gmail.com>
Date: Sun, 11 Nov 2012 01:21:22 +0000 (+0100)
Subject: Issue #16350, part 2: Set unused_data (and unconsumed_tail) correctly in decompressob... 
X-Git-Tag: v3.3.1rc1~655
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=dd1253abdd8564b095f24107547be0b8ce91e653;p=python

Issue #16350, part 2: Set unused_data (and unconsumed_tail) correctly in decompressobj().flush().

Additionally, fix a bug where a MemoryError in allocating a bytes object could
leave the decompressor object in an invalid state (with its unconsumed_tail
member being NULL).

Patch by Serhiy Storchaka.
---

dd1253abdd8564b095f24107547be0b8ce91e653
diff --cc Lib/test/test_zlib.py
index b6a60f4d68,6d4b2c350e..f5180e0bdc
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@@ -490,16 -437,27 +490,28 @@@ class CompressObjectTestCase(BaseCompre
      def test_decompress_unused_data(self):
          # Repeated calls to decompress() after EOF should accumulate data in
          # dco.unused_data, instead of just storing the arg to the last call.
-         x = zlib.compress(HAMLET_SCENE) + HAMLET_SCENE
-         for step in 1, 2, 100:
-             dco = zlib.decompressobj()
-             data = b''.join(dco.decompress(x[i : i + step])
-                             for i in range(0, len(x), step))
-             data += dco.flush()
- 
-             self.assertTrue(dco.eof)
-             self.assertEqual(data, HAMLET_SCENE)
-             self.assertEqual(dco.unused_data, HAMLET_SCENE)
+         source = b'abcdefghijklmnopqrstuvwxyz'
+         remainder = b'0123456789'
+         y = zlib.compress(source)
+         x = y + remainder
+         for maxlen in 0, 1000:
+             for step in 1, 2, len(y), len(x):
+                 dco = zlib.decompressobj()
+                 data = b''
+                 for i in range(0, len(x), step):
+                     if i < len(y):
+                         self.assertEqual(dco.unused_data, b'')
+                     if maxlen == 0:
+                         data += dco.decompress(x[i : i + step])
+                         self.assertEqual(dco.unconsumed_tail, b'')
+                     else:
+                         data += dco.decompress(
+                                 dco.unconsumed_tail + x[i : i + step], maxlen)
+                 data += dco.flush()
++                self.assertTrue(dco.eof)
+                 self.assertEqual(data, source)
+                 self.assertEqual(dco.unconsumed_tail, b'')
+                 self.assertEqual(dco.unused_data, remainder)
  
      if hasattr(zlib.compressobj(), "copy"):
          def test_compresscopy(self):
diff --cc Misc/NEWS
index 9c0ea03fc4,01517e10f9..de6ec4f262
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@@ -83,17 -162,11 +83,19 @@@ Librar
  - Issue #16357: fix calling accept() on a SSLSocket created through
    SSLContext.wrap_socket().  Original patch by Jeff McNeil.
  
 +- Issue #16409: The reporthook callback made by the legacy
 +  urllib.request.urlretrieve API now properly supplies a constant non-zero
 +  block_size as it did in Python 3.2 and 2.7.  This matches the behavior of
 +  urllib.request.URLopener.retrieve.
 +
 +- Issue #16431: Use the type information when constructing a Decimal subtype
 +  from a Decimal argument.
 +
- - Issue #16350: zlib.Decompress.decompress() now accumulates data from
+ - Issue #16350: zlib.decompressobj().decompress() now accumulates data from
    successive calls after EOF in unused_data, instead of only saving the argument
-   to the last call. Patch by Serhiy Storchaka.
+   to the last call. decompressobj().flush() now correctly sets unused_data and
+   unconsumed_tail. A bug in the handling of MemoryError when setting the
+   unconsumed_tail attribute has also been fixed. Patch by Serhiy Storchaka.
  
  - Issue #12759: sre_parse now raises a proper error when the name of the group
    is missing.  Initial patch by Serhiy Storchaka.
diff --cc Modules/zlibmodule.c
index 888ef5388d,6d4aa3a251..9fabb00648
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@@ -686,44 -634,15 +717,16 @@@ PyZlib_objdecompress(compobject *self, 
          goto error;
      }
  
-     /* The end of the compressed data has been reached, so set the
-        unused_data attribute to a string containing the remainder of the
-        data in the string.  Note that this is also a logical place to call
-        inflateEnd, but the old behaviour of only calling it on flush() is
-        preserved.
-     */
 -    /* This is the logical place to call inflateEnd, but the old behaviour of
 -       only calling it on flush() is preserved. */
 -
 -    if (err != Z_STREAM_END && err != Z_OK && err != Z_BUF_ERROR) {
 +    if (err == Z_STREAM_END) {
-         if (self->zst.avail_in > 0) {
-             /* Append the leftover data to the existing value of unused_data. */
-             Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data);
-             Py_ssize_t new_size = old_size + self->zst.avail_in;
-             PyObject *new_data;
-             if (new_size <= old_size) {  /* Check for overflow. */
-                 PyErr_NoMemory();
-                 Py_DECREF(RetVal);
-                 RetVal = NULL;
-                 goto error;
-             }
-             new_data = PyBytes_FromStringAndSize(NULL, new_size);
-             if (new_data == NULL) {
-                 Py_DECREF(RetVal);
-                 RetVal = NULL;
-                 goto error;
-             }
-             Py_MEMCPY(PyBytes_AS_STRING(new_data),
-                       PyBytes_AS_STRING(self->unused_data), old_size);
-             Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size,
-                       self->zst.next_in, self->zst.avail_in);
-             Py_DECREF(self->unused_data);
-             self->unused_data = new_data;
-         }
++        /* This is the logical place to call inflateEnd, but the old behaviour
++           of only calling it on flush() is preserved. */
 +        self->eof = 1;
++    } else if (err != Z_OK && err != Z_BUF_ERROR) {
          /* We will only get Z_BUF_ERROR if the output buffer was full
             but there wasn't more output when we tried again, so it is
             not an error condition.
          */
-     } else if (err != Z_OK && err != Z_BUF_ERROR) {
 -        zlib_error(self->zst, err, "while decompressing");
 +        zlib_error(self->zst, err, "while decompressing data");
          Py_DECREF(RetVal);
          RetVal = NULL;
          goto error;
@@@ -996,13 -907,20 +999,19 @@@ PyZlib_unflush(compobject *self, PyObje
          Py_END_ALLOW_THREADS
      }
  
+     if (save_unconsumed_input(self, err) < 0) {
+         Py_DECREF(retval);
+         retval = NULL;
+         goto error;
+     }
+ 
 -    /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
 -       various data structures. Note we should only get Z_STREAM_END when
 -       flushmode is Z_FINISH */
 +    /* If at end of stream, clean up any memory allocated by zlib. */
      if (err == Z_STREAM_END) {
 -        err = inflateEnd(&(self->zst));
 +        self->eof = 1;
          self->is_initialised = 0;
 +        err = inflateEnd(&(self->zst));
          if (err != Z_OK) {
 -            zlib_error(self->zst, err, "from inflateEnd()");
 +            zlib_error(self->zst, err, "while finishing decompression");
              Py_DECREF(retval);
              retval = NULL;
              goto error;