[ #403753 ] zlib decompress; uncontrollable memory usage

author Jeremy Hylton <jeremy@alum.mit.edu>

Tue, 16 Oct 2001 20:39:49 +0000 (20:39 +0000)

committer Jeremy Hylton <jeremy@alum.mit.edu>

Tue, 16 Oct 2001 20:39:49 +0000 (20:39 +0000)
author Jeremy Hylton <jeremy@alum.mit.edu>
Tue, 16 Oct 2001 20:39:49 +0000 (20:39 +0000)
committer Jeremy Hylton <jeremy@alum.mit.edu>
Tue, 16 Oct 2001 20:39:49 +0000 (20:39 +0000)
diff --git a/Doc/lib/libzlib.tex b/Doc/lib/libzlib.tex

index e384b1ff5bda9146ed03c4d7de638ace8c6d9534..b9726d78d02e21c4da61e6c9a36e8f9dd218aaf6 100644 (file)
--- a/Doc/lib/libzlib.tex
+++ b/Doc/lib/libzlib.tex
@@ -120,7 +120,7 @@ prevents compressing any more data.  After calling
  action is to delete the object.  
  \end{methoddesc}
  
-Decompression objects support the following methods, and a single attribute:
+Decompression objects support the following methods, and two attributes:
  
  \begin{memberdesc}{unused_data}
  A string which contains any unused data from the last string fed to
@@ -135,13 +135,27 @@ reading data and feeding it into a decompression object's
  no longer the empty string.  
  \end{memberdesc}
  
-\begin{methoddesc}[Decompress]{decompress}{string}
+\begin{memberdesc}{unconsumed_tail}
+A string that contains any data that was not consumed by the last
+\method{decompress} call because it exceeded the limit for the
+uncompressed data buffer.
+\end{memberdesc}
+
+\begin{methoddesc}[Decompress]{decompress}{string}{\optional{max_length}}
  Decompress \var{string}, returning a string containing the
  uncompressed data corresponding to at least part of the data in
  \var{string}.  This data should be concatenated to the output produced
  by any preceding calls to the
  \method{decompress()} method.  Some of the input data may be preserved
  in internal buffers for later processing.
+
+If the optional parameter \var{max_length} is supplied then the return value
+will be no longer than \var{max_length}. This may mean that not all of the
+compressed input can be processed; and unconsumed data will be stored
+in the attribute \member{unconsumed_tail}. This string must be passed
+to a subsequent call to \method{decompress()} if decompression is to
+continue.  If \var{max_length} is not supplied then the whole input is
+decompressed, and \member{unconsumed_tail} is an empty string.
  \end{methoddesc}
  
  \begin{methoddesc}[Decompress]{flush}{}
diff --git a/Lib/test/output/test_zlib b/Lib/test/output/test_zlib

index 61c33cf21488017528274a0a5832af3474f5fd31..1c2e2e9df6d890c5b115e8957deab67478b4d69f 100644 (file)
--- a/Lib/test/output/test_zlib
+++ b/Lib/test/output/test_zlib
@@ -8,4 +8,7 @@ normal compression/decompression succeeded
  compress/decompression obj succeeded
  decompress with init options succeeded
  decompressobj with init options succeeded
+should be '': ''
+max_length decompressobj succeeded
+unconsumed_tail should be '': ''
  Testing on 17K of random data
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py

index 439db2273906ca26818f4632af57869515e03aea..915f582abe2fc3968161b7fafc626e3ca0aabbd3 100644 (file)
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -76,6 +76,36 @@ if decomp2 != buf:
  else:
      print "decompressobj with init options succeeded"
  
+print "should be '':", `deco.unconsumed_tail`
+
+# Check a decompression object with max_length specified
+deco = zlib.decompressobj(-12)
+cb = combuf
+bufs = []
+while cb:
+    max_length = 1 + len(cb)/10
+    chunk = deco.decompress(cb, max_length)
+    if len(chunk) > max_length:
+        print 'chunk too big (%d>%d)' % (len(chunk),max_length)
+    bufs.append(chunk)
+    cb = deco.unconsumed_tail
+bufs.append(deco.flush())
+decomp2 = ''.join(buf)
+if decomp2 != buf:
+    print "max_length decompressobj failed"
+else:
+    print "max_length decompressobj succeeded"
+    
+# Misc tests of max_length
+deco = zlib.decompressobj(-12)
+try:
+    deco.decompress("", -1)
+except ValueError:
+    pass
+else:
+    print "failed to raise value error on bad max_length"
+print "unconsumed_tail should be '':", `deco.unconsumed_tail`
+
  # Test flush() with the various options, using all the different levels
  # in order to provide more variations.
  sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH']
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c

index a2e6aed61cee248c26882983f5d06a804620746d..2d9e7772a5008d24617471891c78f630a13de88b 100644 (file)
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -78,6 +78,7 @@ typedef struct
    PyObject_HEAD
    z_stream zst;
    PyObject *unused_data;
+  PyObject *unconsumed_tail;
    int is_initialised;
  } compobject;
  
@@ -100,6 +101,15 @@ newcompobject(PyTypeObject *type)
                  return NULL;
         self->is_initialised = 0;
         self->unused_data = PyString_FromString("");
+       if (self->unused_data == NULL) {
+           Py_DECREF(self);
+           return NULL;
+       }
+       self->unconsumed_tail = PyString_FromString("");
+       if (self->unconsumed_tail == NULL) {
+           Py_DECREF(self);
+           return NULL;
+       }
          return self;
  }
  
@@ -485,6 +495,7 @@ Comp_dealloc(compobject *self)
      if (self->is_initialised)
        deflateEnd(&self->zst);
      Py_XDECREF(self->unused_data);
+    Py_XDECREF(self->unconsumed_tail);
      PyObject_Del(self);
  
      LEAVE_ZLIB
@@ -498,6 +509,7 @@ Decomp_dealloc(compobject *self)
      if (self->is_initialised)
        inflateEnd(&self->zst);
      Py_XDECREF(self->unused_data);
+    Py_XDECREF(self->unconsumed_tail);
      PyObject_Del(self);
  
      LEAVE_ZLIB
@@ -595,27 +607,41 @@ PyZlib_objcompress(compobject *self, PyObject *args)
  }
  
  static char decomp_decompress__doc__[] =
-"decompress(data) -- Return a string containing the decompressed version of the data.\n\n"
+"decompress(data, max_length) -- Return a string containing\n"
+"the decompressed version of the data.\n\n"
  "After calling this function, some of the input data may still\n"
  "be stored in internal buffers for later processing.\n"
-"Call the flush() method to clear these buffers."
+"Call the flush() method to clear these buffers.\n"
+"If the max_length parameter is specified then the return value will be\n"
+"no longer than max_length.  Unconsumed input data will be stored in\n"
+"the unconsumed_tail attribute."
  ;
  
  static PyObject *
  PyZlib_objdecompress(compobject *self, PyObject *args)
  {
-  int err, inplen, length = DEFAULTALLOC;
+  int err, inplen, old_length, length = DEFAULTALLOC;
+  int max_length = 0;
    PyObject *RetVal;
    Byte *input;
    unsigned long start_total_out;
    int return_error;
    PyObject * inputString;
  
-  if (!PyArg_ParseTuple(args, "S:decompress", &inputString))
+  if (!PyArg_ParseTuple(args, "S|i:decompress", &inputString, &max_length))
+    return NULL;
+  if (max_length < 0) {
+    PyErr_SetString(PyExc_ValueError,
+                   "max_length must be greater than zero");
      return NULL;
+  }
+
    if (PyString_AsStringAndSize(inputString, (char**)&input, &inplen) == -1)
      return NULL;
  
+  /* limit amount of data allocated to max_length */
+  if (max_length && length > max_length) 
+    length = max_length;
    if (!(RetVal = PyString_FromStringAndSize(NULL, length))) {
      PyErr_SetString(PyExc_MemoryError,
                     "Can't allocate memory to compress data");
@@ -637,23 +663,46 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
    err = inflate(&(self->zst), Z_SYNC_FLUSH);
    Py_END_ALLOW_THREADS
  
-  /* while Z_OK and the output buffer is full, there might be more output,
-    so extend the output buffer and try again */
+  /* While Z_OK and the output buffer is full, there might be more output.
+     So extend the output buffer and try again.
+  */
    while (err == Z_OK && self->zst.avail_out == 0) { 
-    if (_PyString_Resize(&RetVal, length << 1) == -1) {
+    /* If max_length set, don't continue decompressing if we've already
+        reached the limit.
+    */
+    if (max_length && length >= max_length)
+      break;
+
+    /* otherwise, ... */
+    old_length = length;
+    length = length << 1;
+    if (max_length && length > max_length) 
+      length = max_length;
+
+    if (_PyString_Resize(&RetVal, length) == -1) {
        PyErr_SetString(PyExc_MemoryError,
                        "Can't allocate memory to compress data");
        return_error = 1;
        break;
      }
-    self->zst.next_out = (unsigned char *)PyString_AsString(RetVal) + length;
-    self->zst.avail_out = length;
-    length = length << 1;
+    self->zst.next_out = (unsigned char *)PyString_AsString(RetVal)+old_length;
+    self->zst.avail_out = length - old_length;
+
      Py_BEGIN_ALLOW_THREADS
      err = inflate(&(self->zst), Z_SYNC_FLUSH);
      Py_END_ALLOW_THREADS
    }
  
+  /* Not all of the compressed data could be accomodated in the output buffer
+    of specified size. Return the unconsumed tail in an attribute.*/
+  if(max_length) {
+    Py_DECREF(self->unconsumed_tail);
+    self->unconsumed_tail = PyString_FromStringAndSize(self->zst.next_in, 
+                                                      self->zst.avail_in);
+    if(!self->unconsumed_tail)
+      return_error = 1;
+  }
+
    /* The end of the compressed data has been reached, so set the unused_data 
      attribute to a string containing the remainder of the data in the string. 
      Note that this is also a logical place to call inflateEnd, but the old
@@ -885,6 +934,11 @@ Decomp_getattr(compobject *self, char *name)
             Py_INCREF(self->unused_data);
              retval = self->unused_data;
           }
+       else if (strcmp(name, "unconsumed_tail") == 0) 
+         {  
+           Py_INCREF(self->unconsumed_tail);
+           retval = self->unconsumed_tail;
+         }
         else 
           retval = Py_FindMethod(Decomp_methods, (PyObject *)self, name);
author	Jeremy Hylton <jeremy@alum.mit.edu>
	Tue, 16 Oct 2001 20:39:49 +0000 (20:39 +0000)
committer	Jeremy Hylton <jeremy@alum.mit.edu>
	Tue, 16 Oct 2001 20:39:49 +0000 (20:39 +0000)
Doc/lib/libzlib.tex		patch \| blob \| history
Lib/test/output/test_zlib		patch \| blob \| history
Lib/test/test_zlib.py		patch \| blob \| history
Modules/zlibmodule.c		patch \| blob \| history