]> granicus.if.org Git - python/commitdiff
Issue #16828: Fix error incorrectly raised by bz2.compress('').
authorNadeem Vawda <nadeem.vawda@gmail.com>
Wed, 2 Jan 2013 22:05:56 +0000 (23:05 +0100)
committerNadeem Vawda <nadeem.vawda@gmail.com>
Wed, 2 Jan 2013 22:05:56 +0000 (23:05 +0100)
Initial patch by Martin Packman.

1  2 
Lib/test/test_bz2.py
Misc/ACKS
Misc/NEWS
Modules/_bz2module.c

index f4e81db6e25a0d051edf2d33be885c206855dce2,977d10b6f63e4ccf621c977aad3b991e51489223..912fac1c3344f4359ddb81d9af333148a2c9ed5e
@@@ -22,38 -21,13 +22,39 @@@ has_cmdline_bunzip2 = sys.platform not 
  
  class BaseTest(unittest.TestCase):
      "Base for other testcases."
 -    TEXT = b'root:x:0:0:root:/root:/bin/bash\nbin:x:1:1:bin:/bin:\ndaemon:x:2:2:daemon:/sbin:\nadm:x:3:4:adm:/var/adm:\nlp:x:4:7:lp:/var/spool/lpd:\nsync:x:5:0:sync:/sbin:/bin/sync\nshutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\nhalt:x:7:0:halt:/sbin:/sbin/halt\nmail:x:8:12:mail:/var/spool/mail:\nnews:x:9:13:news:/var/spool/news:\nuucp:x:10:14:uucp:/var/spool/uucp:\noperator:x:11:0:operator:/root:\ngames:x:12:100:games:/usr/games:\ngopher:x:13:30:gopher:/usr/lib/gopher-data:\nftp:x:14:50:FTP User:/var/ftp:/bin/bash\nnobody:x:65534:65534:Nobody:/home:\npostfix:x:100:101:postfix:/var/spool/postfix:\nniemeyer:x:500:500::/home/niemeyer:/bin/bash\npostgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\nmysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\nwww:x:103:104::/var/www:/bin/false\n'
 +    TEXT_LINES = [
 +        b'root:x:0:0:root:/root:/bin/bash\n',
 +        b'bin:x:1:1:bin:/bin:\n',
 +        b'daemon:x:2:2:daemon:/sbin:\n',
 +        b'adm:x:3:4:adm:/var/adm:\n',
 +        b'lp:x:4:7:lp:/var/spool/lpd:\n',
 +        b'sync:x:5:0:sync:/sbin:/bin/sync\n',
 +        b'shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\n',
 +        b'halt:x:7:0:halt:/sbin:/sbin/halt\n',
 +        b'mail:x:8:12:mail:/var/spool/mail:\n',
 +        b'news:x:9:13:news:/var/spool/news:\n',
 +        b'uucp:x:10:14:uucp:/var/spool/uucp:\n',
 +        b'operator:x:11:0:operator:/root:\n',
 +        b'games:x:12:100:games:/usr/games:\n',
 +        b'gopher:x:13:30:gopher:/usr/lib/gopher-data:\n',
 +        b'ftp:x:14:50:FTP User:/var/ftp:/bin/bash\n',
 +        b'nobody:x:65534:65534:Nobody:/home:\n',
 +        b'postfix:x:100:101:postfix:/var/spool/postfix:\n',
 +        b'niemeyer:x:500:500::/home/niemeyer:/bin/bash\n',
 +        b'postgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\n',
 +        b'mysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\n',
 +        b'www:x:103:104::/var/www:/bin/false\n',
 +        ]
 +    TEXT = b''.join(TEXT_LINES)
      DATA = b'BZh91AY&SY.\xc8N\x18\x00\x01>_\x80\x00\x10@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe00\x01\x99\xaa\x00\xc0\x03F\x86\x8c#&\x83F\x9a\x03\x06\xa6\xd0\xa6\x93M\x0fQ\xa7\xa8\x06\x804hh\x12$\x11\xa4i4\xf14S\xd2<Q\xb5\x0fH\xd3\xd4\xdd\xd5\x87\xbb\xf8\x94\r\x8f\xafI\x12\xe1\xc9\xf8/E\x00pu\x89\x12]\xc9\xbbDL\nQ\x0e\t1\x12\xdf\xa0\xc0\x97\xac2O9\x89\x13\x94\x0e\x1c7\x0ed\x95I\x0c\xaaJ\xa4\x18L\x10\x05#\x9c\xaf\xba\xbc/\x97\x8a#C\xc8\xe1\x8cW\xf9\xe2\xd0\xd6M\xa7\x8bXa<e\x84t\xcbL\xb3\xa7\xd9\xcd\xd1\xcb\x84.\xaf\xb3\xab\xab\xad`n}\xa0lh\tE,\x8eZ\x15\x17VH>\x88\xe5\xcd9gd6\x0b\n\xe9\x9b\xd5\x8a\x99\xf7\x08.K\x8ev\xfb\xf7xw\xbb\xdf\xa1\x92\xf1\xdd|/";\xa2\xba\x9f\xd5\xb1#A\xb6\xf6\xb3o\xc9\xc5y\\\xebO\xe7\x85\x9a\xbc\xb6f8\x952\xd5\xd7"%\x89>V,\xf7\xa6z\xe2\x9f\xa3\xdf\x11\x11"\xd6E)I\xa9\x13^\xca\xf3r\xd0\x03U\x922\xf26\xec\xb6\xed\x8b\xc3U\x13\x9d\xc5\x170\xa4\xfa^\x92\xacDF\x8a\x97\xd6\x19\xfe\xdd\xb8\xbd\x1a\x9a\x19\xa3\x80ankR\x8b\xe5\xd83]\xa9\xc6\x08\x82f\xf6\xb9"6l$\xb8j@\xc0\x8a\xb0l1..\xbak\x83ls\x15\xbc\xf4\xc1\x13\xbe\xf8E\xb8\x9d\r\xa8\x9dk\x84\xd3n\xfa\xacQ\x07\xb1%y\xaav\xb4\x08\xe0z\x1b\x16\xf5\x04\xe9\xcc\xb9\x08z\x1en7.G\xfc]\xc9\x14\xe1B@\xbb!8`'
 -    DATA_CRLF = b'BZh91AY&SY\xaez\xbbN\x00\x01H\xdf\x80\x00\x12@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe0@\x01\xbc\xc6`\x86*\x8d=M\xa9\x9a\x86\xd0L@\x0fI\xa6!\xa1\x13\xc8\x88jdi\x8d@\x03@\x1a\x1a\x0c\x0c\x83 \x00\xc4h2\x19\x01\x82D\x84e\t\xe8\x99\x89\x19\x1ah\x00\r\x1a\x11\xaf\x9b\x0fG\xf5(\x1b\x1f?\t\x12\xcf\xb5\xfc\x95E\x00ps\x89\x12^\xa4\xdd\xa2&\x05(\x87\x04\x98\x89u\xe40%\xb6\x19\'\x8c\xc4\x89\xca\x07\x0e\x1b!\x91UIFU%C\x994!DI\xd2\xfa\xf0\xf1N8W\xde\x13A\xf5\x9cr%?\x9f3;I45A\xd1\x8bT\xb1<l\xba\xcb_\xc00xY\x17r\x17\x88\x08\x08@\xa0\ry@\x10\x04$)`\xf2\xce\x89z\xb0s\xec\x9b.iW\x9d\x81\xb5-+t\x9f\x1a\'\x97dB\xf5x\xb5\xbe.[.\xd7\x0e\x81\xe7\x08\x1cN`\x88\x10\xca\x87\xc3!"\x80\x92R\xa1/\xd1\xc0\xe6mf\xac\xbd\x99\xcca\xb3\x8780>\xa4\xc7\x8d\x1a\\"\xad\xa1\xabyBg\x15\xb9l\x88\x88\x91k"\x94\xa4\xd4\x89\xae*\xa6\x0b\x10\x0c\xd6\xd4m\xe86\xec\xb5j\x8a\x86j\';\xca.\x01I\xf2\xaaJ\xe8\x88\x8cU+t3\xfb\x0c\n\xa33\x13r2\r\x16\xe0\xb3(\xbf\x1d\x83r\xe7M\xf0D\x1365\xd8\x88\xd3\xa4\x92\xcb2\x06\x04\\\xc1\xb0\xea//\xbek&\xd8\xe6+t\xe5\xa1\x13\xada\x16\xder5"w]\xa2i\xb7[\x97R \xe2IT\xcd;Z\x04dk4\xad\x8a\t\xd3\x81z\x10\xf1:^`\xab\x1f\xc5\xdc\x91N\x14$+\x9e\xae\xd3\x80'
+     EMPTY_DATA = b'BZh9\x17rE8P\x90\x00\x00\x00\x00'
  
 -    with open(findfile("testbz2_bigmem.bz2"), "rb") as f:
 -        DATA_BIGMEM = f.read()
 +    def setUp(self):
 +        self.filename = TESTFN
 +
 +    def tearDown(self):
 +        if os.path.isfile(self.filename):
 +            os.unlink(self.filename)
  
      if has_cmdline_bunzip2:
          def decompress(self, data):
@@@ -584,7 -304,14 +585,13 @@@ class BZ2CompressorTest(BaseTest)
          data += bz2c.flush()
          self.assertEqual(self.decompress(data), self.TEXT)
  
+     def testCompressEmptyString(self):
+         bz2c = BZ2Compressor()
+         data = bz2c.compress(b'')
+         data += bz2c.flush()
+         self.assertEqual(data, self.EMPTY_DATA)
      def testCompressChunks10(self):
 -        # "Test BZ2Compressor.compress()/flush() with chunks of 10 bytes"
          bz2c = BZ2Compressor()
          n = 0
          data = b''
@@@ -671,7 -389,13 +678,11 @@@ class CompressDecompressTest(BaseTest)
          data = bz2.compress(self.TEXT)
          self.assertEqual(self.decompress(data), self.TEXT)
  
+     def testCompressEmptyString(self):
+         text = bz2.compress(b'')
+         self.assertEqual(text, self.EMPTY_DATA)
      def testDecompress(self):
 -        # "Test decompress() function"
          text = bz2.decompress(self.DATA)
          self.assertEqual(text, self.TEXT)
  
          text = bz2.decompress(b"")
          self.assertEqual(text, b"")
  
+     def testDecompressToEmptyString(self):
+         text = bz2.decompress(self.EMPTY_DATA)
+         self.assertEqual(text, b'')
      def testDecompressIncomplete(self):
 -        # "Test decompress() function with incomplete data"
          self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10])
  
 -    @bigmemtest(size=_4G, memuse=1.25)
 -    def testCompressBigmem(self, size):
 -        text = b"a" * size
 -        data = bz2.compress(text)
 -        del text
 -        text = self.decompress(data)
 -        self.assertEqual(len(text), size)
 -        self.assertEqual(text.strip(b"a"), b"")
 -
 -    @bigmemtest(size=_4G, memuse=1.25, dry_run=False)
 -    def testDecompressBigmem(self, unused_size):
 -        # Issue #14398: decompression fails when output data is >=2GB.
 -        text = bz2.decompress(self.DATA_BIGMEM)
 -        self.assertEqual(len(text), _4G)
 -        self.assertEqual(text.strip(b"\0"), b"")
 +    def testDecompressMultiStream(self):
 +        text = bz2.decompress(self.DATA * 5)
 +        self.assertEqual(text, self.TEXT * 5)
 +
 +
 +class OpenTest(BaseTest):
 +    def test_binary_modes(self):
 +        with bz2.open(self.filename, "wb") as f:
 +            f.write(self.TEXT)
 +        with open(self.filename, "rb") as f:
 +            file_data = bz2.decompress(f.read())
 +            self.assertEqual(file_data, self.TEXT)
 +        with bz2.open(self.filename, "rb") as f:
 +            self.assertEqual(f.read(), self.TEXT)
 +        with bz2.open(self.filename, "ab") as f:
 +            f.write(self.TEXT)
 +        with open(self.filename, "rb") as f:
 +            file_data = bz2.decompress(f.read())
 +            self.assertEqual(file_data, self.TEXT * 2)
 +
 +    def test_implicit_binary_modes(self):
 +        # Test implicit binary modes (no "b" or "t" in mode string).
 +        with bz2.open(self.filename, "w") as f:
 +            f.write(self.TEXT)
 +        with open(self.filename, "rb") as f:
 +            file_data = bz2.decompress(f.read())
 +            self.assertEqual(file_data, self.TEXT)
 +        with bz2.open(self.filename, "r") as f:
 +            self.assertEqual(f.read(), self.TEXT)
 +        with bz2.open(self.filename, "a") as f:
 +            f.write(self.TEXT)
 +        with open(self.filename, "rb") as f:
 +            file_data = bz2.decompress(f.read())
 +            self.assertEqual(file_data, self.TEXT * 2)
 +
 +    def test_text_modes(self):
 +        text = self.TEXT.decode("ascii")
 +        text_native_eol = text.replace("\n", os.linesep)
 +        with bz2.open(self.filename, "wt") as f:
 +            f.write(text)
 +        with open(self.filename, "rb") as f:
 +            file_data = bz2.decompress(f.read()).decode("ascii")
 +            self.assertEqual(file_data, text_native_eol)
 +        with bz2.open(self.filename, "rt") as f:
 +            self.assertEqual(f.read(), text)
 +        with bz2.open(self.filename, "at") as f:
 +            f.write(text)
 +        with open(self.filename, "rb") as f:
 +            file_data = bz2.decompress(f.read()).decode("ascii")
 +            self.assertEqual(file_data, text_native_eol * 2)
 +
 +    def test_fileobj(self):
 +        with bz2.open(BytesIO(self.DATA), "r") as f:
 +            self.assertEqual(f.read(), self.TEXT)
 +        with bz2.open(BytesIO(self.DATA), "rb") as f:
 +            self.assertEqual(f.read(), self.TEXT)
 +        text = self.TEXT.decode("ascii")
 +        with bz2.open(BytesIO(self.DATA), "rt") as f:
 +            self.assertEqual(f.read(), text)
 +
 +    def test_bad_params(self):
 +        # Test invalid parameter combinations.
 +        with self.assertRaises(ValueError):
 +            bz2.open(self.filename, "wbt")
 +        with self.assertRaises(ValueError):
 +            bz2.open(self.filename, "rb", encoding="utf-8")
 +        with self.assertRaises(ValueError):
 +            bz2.open(self.filename, "rb", errors="ignore")
 +        with self.assertRaises(ValueError):
 +            bz2.open(self.filename, "rb", newline="\n")
 +
 +    def test_encoding(self):
 +        # Test non-default encoding.
 +        text = self.TEXT.decode("ascii")
 +        text_native_eol = text.replace("\n", os.linesep)
 +        with bz2.open(self.filename, "wt", encoding="utf-16-le") as f:
 +            f.write(text)
 +        with open(self.filename, "rb") as f:
 +            file_data = bz2.decompress(f.read()).decode("utf-16-le")
 +            self.assertEqual(file_data, text_native_eol)
 +        with bz2.open(self.filename, "rt", encoding="utf-16-le") as f:
 +            self.assertEqual(f.read(), text)
 +
 +    def test_encoding_error_handler(self):
 +        # Test with non-default encoding error handler.
 +        with bz2.open(self.filename, "wb") as f:
 +            f.write(b"foo\xffbar")
 +        with bz2.open(self.filename, "rt", encoding="ascii", errors="ignore") \
 +                as f:
 +            self.assertEqual(f.read(), "foobar")
 +
 +    def test_newline(self):
 +        # Test with explicit newline (universal newline mode disabled).
 +        text = self.TEXT.decode("ascii")
 +        with bz2.open(self.filename, "wt", newline="\n") as f:
 +            f.write(text)
 +        with bz2.open(self.filename, "rt", newline="\r") as f:
 +            self.assertEqual(f.readlines(), [text])
 +
  
  def test_main():
      support.run_unittest(
diff --cc Misc/ACKS
Simple merge
diff --cc Misc/NEWS
index fdc313914e394cf8170833561f327b2ff9d6c727,e990e83f0dc72c5fce319fd771c3289776dbacae..eff20ee93e12eea27192135173a60406168c56dd
+++ b/Misc/NEWS
@@@ -123,6 -188,9 +123,9 @@@ Core and Builtin
  Library
  -------
  
 -- Issue #16828: Fix error incorrectly raised by bz2.compress(''). Patch by
 -  Martin Packman.
++- Issue #16828: Fix error incorrectly raised by bz2.compress(''). Initial patch
++  by Martin Packman.
  - Issue #16541: tk_setPalette() now works with keyword arguments.
  
  - Issue #16820: In configparser, `parser.popitem()` no longer raises ValueError.
index 5cac8e6d5183eb44885a7aedfd0f2e90c7b5b8f9,0000000000000000000000000000000000000000..4eee5a2feeb9db7aa98bc48339d2a461df6e4aa6
mode 100644,000000..100644
--- /dev/null
@@@ -1,605 -1,0 +1,607 @@@
-     /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
-        Do compression in chunks of no more than UINT_MAX bytes each. */
-     c->bzs.avail_in = MIN(len, UINT_MAX);
-     len -= c->bzs.avail_in;
 +/* _bz2 - Low-level Python interface to libbzip2. */
 +
 +#define PY_SSIZE_T_CLEAN
 +
 +#include "Python.h"
 +#include "structmember.h"
 +
 +#ifdef WITH_THREAD
 +#include "pythread.h"
 +#endif
 +
 +#include <bzlib.h>
 +#include <stdio.h>
 +
 +
 +#ifndef BZ_CONFIG_ERROR
 +#define BZ2_bzCompress bzCompress
 +#define BZ2_bzCompressInit bzCompressInit
 +#define BZ2_bzCompressEnd bzCompressEnd
 +#define BZ2_bzDecompress bzDecompress
 +#define BZ2_bzDecompressInit bzDecompressInit
 +#define BZ2_bzDecompressEnd bzDecompressEnd
 +#endif  /* ! BZ_CONFIG_ERROR */
 +
 +
 +#ifdef WITH_THREAD
 +#define ACQUIRE_LOCK(obj) do { \
 +    if (!PyThread_acquire_lock((obj)->lock, 0)) { \
 +        Py_BEGIN_ALLOW_THREADS \
 +        PyThread_acquire_lock((obj)->lock, 1); \
 +        Py_END_ALLOW_THREADS \
 +    } } while (0)
 +#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
 +#else
 +#define ACQUIRE_LOCK(obj)
 +#define RELEASE_LOCK(obj)
 +#endif
 +
 +#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
 +
 +
 +typedef struct {
 +    PyObject_HEAD
 +    bz_stream bzs;
 +    int flushed;
 +#ifdef WITH_THREAD
 +    PyThread_type_lock lock;
 +#endif
 +} BZ2Compressor;
 +
 +typedef struct {
 +    PyObject_HEAD
 +    bz_stream bzs;
 +    char eof;           /* T_BOOL expects a char */
 +    PyObject *unused_data;
 +#ifdef WITH_THREAD
 +    PyThread_type_lock lock;
 +#endif
 +} BZ2Decompressor;
 +
 +
 +/* Helper functions. */
 +
 +static int
 +catch_bz2_error(int bzerror)
 +{
 +    switch(bzerror) {
 +        case BZ_OK:
 +        case BZ_RUN_OK:
 +        case BZ_FLUSH_OK:
 +        case BZ_FINISH_OK:
 +        case BZ_STREAM_END:
 +            return 0;
 +
 +#ifdef BZ_CONFIG_ERROR
 +        case BZ_CONFIG_ERROR:
 +            PyErr_SetString(PyExc_SystemError,
 +                            "libbzip2 was not compiled correctly");
 +            return 1;
 +#endif
 +        case BZ_PARAM_ERROR:
 +            PyErr_SetString(PyExc_ValueError,
 +                            "Internal error - "
 +                            "invalid parameters passed to libbzip2");
 +            return 1;
 +        case BZ_MEM_ERROR:
 +            PyErr_NoMemory();
 +            return 1;
 +        case BZ_DATA_ERROR:
 +        case BZ_DATA_ERROR_MAGIC:
 +            PyErr_SetString(PyExc_IOError, "Invalid data stream");
 +            return 1;
 +        case BZ_IO_ERROR:
 +            PyErr_SetString(PyExc_IOError, "Unknown I/O error");
 +            return 1;
 +        case BZ_UNEXPECTED_EOF:
 +            PyErr_SetString(PyExc_EOFError,
 +                            "Compressed file ended before the logical "
 +                            "end-of-stream was detected");
 +            return 1;
 +        case BZ_SEQUENCE_ERROR:
 +            PyErr_SetString(PyExc_RuntimeError,
 +                            "Internal error - "
 +                            "Invalid sequence of commands sent to libbzip2");
 +            return 1;
 +        default:
 +            PyErr_Format(PyExc_IOError,
 +                         "Unrecognized error from libbzip2: %d", bzerror);
 +            return 1;
 +    }
 +}
 +
 +#if BUFSIZ < 8192
 +#define SMALLCHUNK 8192
 +#else
 +#define SMALLCHUNK BUFSIZ
 +#endif
 +
 +static int
 +grow_buffer(PyObject **buf)
 +{
 +    /* Expand the buffer by an amount proportional to the current size,
 +       giving us amortized linear-time behavior. Use a less-than-double
 +       growth factor to avoid excessive allocation. */
 +    size_t size = PyBytes_GET_SIZE(*buf);
 +    size_t new_size = size + (size >> 3) + 6;
 +    if (new_size > size) {
 +        return _PyBytes_Resize(buf, new_size);
 +    } else {  /* overflow */
 +        PyErr_SetString(PyExc_OverflowError,
 +                        "Unable to allocate buffer - output too large");
 +        return -1;
 +    }
 +}
 +
 +
 +/* BZ2Compressor class. */
 +
 +static PyObject *
 +compress(BZ2Compressor *c, char *data, size_t len, int action)
 +{
 +    size_t data_size = 0;
 +    PyObject *result;
 +
 +    result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
 +    if (result == NULL)
 +        return NULL;
++
 +    c->bzs.next_in = data;
-         Py_BEGIN_ALLOW_THREADS
-         this_out = c->bzs.next_out;
-         bzerror = BZ2_bzCompress(&c->bzs, action);
-         data_size += c->bzs.next_out - this_out;
-         Py_END_ALLOW_THREADS
-         if (catch_bz2_error(bzerror))
-             goto error;
++    c->bzs.avail_in = 0;
 +    c->bzs.next_out = PyBytes_AS_STRING(result);
 +    c->bzs.avail_out = PyBytes_GET_SIZE(result);
 +    for (;;) {
 +        char *this_out;
 +        int bzerror;
 +
-         /* In regular compression mode, stop when input data is exhausted.
-            In flushing mode, stop when all buffered data has been flushed. */
-         if ((action == BZ_RUN && c->bzs.avail_in == 0) ||
-             (action == BZ_FINISH && bzerror == BZ_STREAM_END))
++        /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
++           Do compression in chunks of no more than UINT_MAX bytes each. */
 +        if (c->bzs.avail_in == 0 && len > 0) {
 +            c->bzs.avail_in = MIN(len, UINT_MAX);
 +            len -= c->bzs.avail_in;
 +        }
 +
++        /* In regular compression mode, stop when input data is exhausted. */
++        if (action == BZ_RUN && c->bzs.avail_in == 0)
 +            break;
 +
 +        if (c->bzs.avail_out == 0) {
 +            size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
 +            if (buffer_left == 0) {
 +                if (grow_buffer(&result) < 0)
 +                    goto error;
 +                c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
 +                buffer_left = PyBytes_GET_SIZE(result) - data_size;
 +            }
 +            c->bzs.avail_out = MIN(buffer_left, UINT_MAX);
 +        }
++
++        Py_BEGIN_ALLOW_THREADS
++        this_out = c->bzs.next_out;
++        bzerror = BZ2_bzCompress(&c->bzs, action);
++        data_size += c->bzs.next_out - this_out;
++        Py_END_ALLOW_THREADS
++        if (catch_bz2_error(bzerror))
++            goto error;
++
++        /* In flushing mode, stop when all buffered data has been flushed. */
++        if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
++            break;
 +    }
 +    if (data_size != PyBytes_GET_SIZE(result))
 +        if (_PyBytes_Resize(&result, data_size) < 0)
 +            goto error;
 +    return result;
 +
 +error:
 +    Py_XDECREF(result);
 +    return NULL;
 +}
 +
 +PyDoc_STRVAR(BZ2Compressor_compress__doc__,
 +"compress(data) -> bytes\n"
 +"\n"
 +"Provide data to the compressor object. Returns a chunk of\n"
 +"compressed data if possible, or b'' otherwise.\n"
 +"\n"
 +"When you have finished providing data to the compressor, call the\n"
 +"flush() method to finish the compression process.\n");
 +
 +static PyObject *
 +BZ2Compressor_compress(BZ2Compressor *self, PyObject *args)
 +{
 +    Py_buffer buffer;
 +    PyObject *result = NULL;
 +
 +    if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
 +        return NULL;
 +
 +    ACQUIRE_LOCK(self);
 +    if (self->flushed)
 +        PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
 +    else
 +        result = compress(self, buffer.buf, buffer.len, BZ_RUN);
 +    RELEASE_LOCK(self);
 +    PyBuffer_Release(&buffer);
 +    return result;
 +}
 +
 +PyDoc_STRVAR(BZ2Compressor_flush__doc__,
 +"flush() -> bytes\n"
 +"\n"
 +"Finish the compression process. Returns the compressed data left\n"
 +"in internal buffers.\n"
 +"\n"
 +"The compressor object may not be used after this method is called.\n");
 +
 +static PyObject *
 +BZ2Compressor_flush(BZ2Compressor *self, PyObject *noargs)
 +{
 +    PyObject *result = NULL;
 +
 +    ACQUIRE_LOCK(self);
 +    if (self->flushed)
 +        PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
 +    else {
 +        self->flushed = 1;
 +        result = compress(self, NULL, 0, BZ_FINISH);
 +    }
 +    RELEASE_LOCK(self);
 +    return result;
 +}
 +
 +static int
 +BZ2Compressor_init(BZ2Compressor *self, PyObject *args, PyObject *kwargs)
 +{
 +    int compresslevel = 9;
 +    int bzerror;
 +
 +    if (!PyArg_ParseTuple(args, "|i:BZ2Compressor", &compresslevel))
 +        return -1;
 +    if (!(1 <= compresslevel && compresslevel <= 9)) {
 +        PyErr_SetString(PyExc_ValueError,
 +                        "compresslevel must be between 1 and 9");
 +        return -1;
 +    }
 +
 +#ifdef WITH_THREAD
 +    self->lock = PyThread_allocate_lock();
 +    if (self->lock == NULL) {
 +        PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
 +        return -1;
 +    }
 +#endif
 +
 +    bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
 +    if (catch_bz2_error(bzerror))
 +        goto error;
 +
 +    return 0;
 +
 +error:
 +#ifdef WITH_THREAD
 +    PyThread_free_lock(self->lock);
 +    self->lock = NULL;
 +#endif
 +    return -1;
 +}
 +
 +static void
 +BZ2Compressor_dealloc(BZ2Compressor *self)
 +{
 +    BZ2_bzCompressEnd(&self->bzs);
 +#ifdef WITH_THREAD
 +    if (self->lock != NULL)
 +        PyThread_free_lock(self->lock);
 +#endif
 +    Py_TYPE(self)->tp_free((PyObject *)self);
 +}
 +
 +static PyMethodDef BZ2Compressor_methods[] = {
 +    {"compress", (PyCFunction)BZ2Compressor_compress, METH_VARARGS,
 +     BZ2Compressor_compress__doc__},
 +    {"flush",    (PyCFunction)BZ2Compressor_flush,    METH_NOARGS,
 +     BZ2Compressor_flush__doc__},
 +    {NULL}
 +};
 +
 +PyDoc_STRVAR(BZ2Compressor__doc__,
 +"BZ2Compressor(compresslevel=9)\n"
 +"\n"
 +"Create a compressor object for compressing data incrementally.\n"
 +"\n"
 +"compresslevel, if given, must be a number between 1 and 9.\n"
 +"\n"
 +"For one-shot compression, use the compress() function instead.\n");
 +
 +static PyTypeObject BZ2Compressor_Type = {
 +    PyVarObject_HEAD_INIT(NULL, 0)
 +    "_bz2.BZ2Compressor",               /* tp_name */
 +    sizeof(BZ2Compressor),              /* tp_basicsize */
 +    0,                                  /* tp_itemsize */
 +    (destructor)BZ2Compressor_dealloc,  /* tp_dealloc */
 +    0,                                  /* tp_print */
 +    0,                                  /* tp_getattr */
 +    0,                                  /* tp_setattr */
 +    0,                                  /* tp_reserved */
 +    0,                                  /* tp_repr */
 +    0,                                  /* tp_as_number */
 +    0,                                  /* tp_as_sequence */
 +    0,                                  /* tp_as_mapping */
 +    0,                                  /* tp_hash  */
 +    0,                                  /* tp_call */
 +    0,                                  /* tp_str */
 +    0,                                  /* tp_getattro */
 +    0,                                  /* tp_setattro */
 +    0,                                  /* tp_as_buffer */
 +    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
 +    BZ2Compressor__doc__,               /* tp_doc */
 +    0,                                  /* tp_traverse */
 +    0,                                  /* tp_clear */
 +    0,                                  /* tp_richcompare */
 +    0,                                  /* tp_weaklistoffset */
 +    0,                                  /* tp_iter */
 +    0,                                  /* tp_iternext */
 +    BZ2Compressor_methods,              /* tp_methods */
 +    0,                                  /* tp_members */
 +    0,                                  /* tp_getset */
 +    0,                                  /* tp_base */
 +    0,                                  /* tp_dict */
 +    0,                                  /* tp_descr_get */
 +    0,                                  /* tp_descr_set */
 +    0,                                  /* tp_dictoffset */
 +    (initproc)BZ2Compressor_init,       /* tp_init */
 +    0,                                  /* tp_alloc */
 +    PyType_GenericNew,                  /* tp_new */
 +};
 +
 +
 +/* BZ2Decompressor class. */
 +
 +static PyObject *
 +decompress(BZ2Decompressor *d, char *data, size_t len)
 +{
 +    size_t data_size = 0;
 +    PyObject *result;
 +
 +    result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
 +    if (result == NULL)
 +        return result;
 +    d->bzs.next_in = data;
 +    /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
 +       Do decompression in chunks of no more than UINT_MAX bytes each. */
 +    d->bzs.avail_in = MIN(len, UINT_MAX);
 +    len -= d->bzs.avail_in;
 +    d->bzs.next_out = PyBytes_AS_STRING(result);
 +    d->bzs.avail_out = PyBytes_GET_SIZE(result);
 +    for (;;) {
 +        char *this_out;
 +        int bzerror;
 +
 +        Py_BEGIN_ALLOW_THREADS
 +        this_out = d->bzs.next_out;
 +        bzerror = BZ2_bzDecompress(&d->bzs);
 +        data_size += d->bzs.next_out - this_out;
 +        Py_END_ALLOW_THREADS
 +        if (catch_bz2_error(bzerror))
 +            goto error;
 +        if (bzerror == BZ_STREAM_END) {
 +            d->eof = 1;
 +            len += d->bzs.avail_in;
 +            if (len > 0) { /* Save leftover input to unused_data */
 +                Py_CLEAR(d->unused_data);
 +                d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len);
 +                if (d->unused_data == NULL)
 +                    goto error;
 +            }
 +            break;
 +        }
 +        if (d->bzs.avail_in == 0) {
 +            if (len == 0)
 +                break;
 +            d->bzs.avail_in = MIN(len, UINT_MAX);
 +            len -= d->bzs.avail_in;
 +        }
 +        if (d->bzs.avail_out == 0) {
 +            size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
 +            if (buffer_left == 0) {
 +                if (grow_buffer(&result) < 0)
 +                    goto error;
 +                d->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
 +                buffer_left = PyBytes_GET_SIZE(result) - data_size;
 +            }
 +            d->bzs.avail_out = MIN(buffer_left, UINT_MAX);
 +        }
 +    }
 +    if (data_size != PyBytes_GET_SIZE(result))
 +        if (_PyBytes_Resize(&result, data_size) < 0)
 +            goto error;
 +    return result;
 +
 +error:
 +    Py_XDECREF(result);
 +    return NULL;
 +}
 +
 +PyDoc_STRVAR(BZ2Decompressor_decompress__doc__,
 +"decompress(data) -> bytes\n"
 +"\n"
 +"Provide data to the decompressor object. Returns a chunk of\n"
 +"decompressed data if possible, or b'' otherwise.\n"
 +"\n"
 +"Attempting to decompress data after the end of stream is reached\n"
 +"raises an EOFError. Any data found after the end of the stream\n"
 +"is ignored and saved in the unused_data attribute.\n");
 +
 +static PyObject *
 +BZ2Decompressor_decompress(BZ2Decompressor *self, PyObject *args)
 +{
 +    Py_buffer buffer;
 +    PyObject *result = NULL;
 +
 +    if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
 +        return NULL;
 +
 +    ACQUIRE_LOCK(self);
 +    if (self->eof)
 +        PyErr_SetString(PyExc_EOFError, "End of stream already reached");
 +    else
 +        result = decompress(self, buffer.buf, buffer.len);
 +    RELEASE_LOCK(self);
 +    PyBuffer_Release(&buffer);
 +    return result;
 +}
 +
 +static int
 +BZ2Decompressor_init(BZ2Decompressor *self, PyObject *args, PyObject *kwargs)
 +{
 +    int bzerror;
 +
 +    if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
 +        return -1;
 +
 +#ifdef WITH_THREAD
 +    self->lock = PyThread_allocate_lock();
 +    if (self->lock == NULL) {
 +        PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
 +        return -1;
 +    }
 +#endif
 +
 +    self->unused_data = PyBytes_FromStringAndSize("", 0);
 +    if (self->unused_data == NULL)
 +        goto error;
 +
 +    bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
 +    if (catch_bz2_error(bzerror))
 +        goto error;
 +
 +    return 0;
 +
 +error:
 +    Py_CLEAR(self->unused_data);
 +#ifdef WITH_THREAD
 +    PyThread_free_lock(self->lock);
 +    self->lock = NULL;
 +#endif
 +    return -1;
 +}
 +
 +static void
 +BZ2Decompressor_dealloc(BZ2Decompressor *self)
 +{
 +    BZ2_bzDecompressEnd(&self->bzs);
 +    Py_CLEAR(self->unused_data);
 +#ifdef WITH_THREAD
 +    if (self->lock != NULL)
 +        PyThread_free_lock(self->lock);
 +#endif
 +    Py_TYPE(self)->tp_free((PyObject *)self);
 +}
 +
 +static PyMethodDef BZ2Decompressor_methods[] = {
 +    {"decompress", (PyCFunction)BZ2Decompressor_decompress, METH_VARARGS,
 +     BZ2Decompressor_decompress__doc__},
 +    {NULL}
 +};
 +
 +PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
 +"True if the end-of-stream marker has been reached.");
 +
 +PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
 +"Data found after the end of the compressed stream.");
 +
 +static PyMemberDef BZ2Decompressor_members[] = {
 +    {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
 +     READONLY, BZ2Decompressor_eof__doc__},
 +    {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
 +     READONLY, BZ2Decompressor_unused_data__doc__},
 +    {NULL}
 +};
 +
 +PyDoc_STRVAR(BZ2Decompressor__doc__,
 +"BZ2Decompressor()\n"
 +"\n"
 +"Create a decompressor object for decompressing data incrementally.\n"
 +"\n"
 +"For one-shot decompression, use the decompress() function instead.\n");
 +
 +static PyTypeObject BZ2Decompressor_Type = {
 +    PyVarObject_HEAD_INIT(NULL, 0)
 +    "_bz2.BZ2Decompressor",             /* tp_name */
 +    sizeof(BZ2Decompressor),            /* tp_basicsize */
 +    0,                                  /* tp_itemsize */
 +    (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
 +    0,                                  /* tp_print */
 +    0,                                  /* tp_getattr */
 +    0,                                  /* tp_setattr */
 +    0,                                  /* tp_reserved */
 +    0,                                  /* tp_repr */
 +    0,                                  /* tp_as_number */
 +    0,                                  /* tp_as_sequence */
 +    0,                                  /* tp_as_mapping */
 +    0,                                  /* tp_hash  */
 +    0,                                  /* tp_call */
 +    0,                                  /* tp_str */
 +    0,                                  /* tp_getattro */
 +    0,                                  /* tp_setattro */
 +    0,                                  /* tp_as_buffer */
 +    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
 +    BZ2Decompressor__doc__,             /* tp_doc */
 +    0,                                  /* tp_traverse */
 +    0,                                  /* tp_clear */
 +    0,                                  /* tp_richcompare */
 +    0,                                  /* tp_weaklistoffset */
 +    0,                                  /* tp_iter */
 +    0,                                  /* tp_iternext */
 +    BZ2Decompressor_methods,            /* tp_methods */
 +    BZ2Decompressor_members,            /* tp_members */
 +    0,                                  /* tp_getset */
 +    0,                                  /* tp_base */
 +    0,                                  /* tp_dict */
 +    0,                                  /* tp_descr_get */
 +    0,                                  /* tp_descr_set */
 +    0,                                  /* tp_dictoffset */
 +    (initproc)BZ2Decompressor_init,     /* tp_init */
 +    0,                                  /* tp_alloc */
 +    PyType_GenericNew,                  /* tp_new */
 +};
 +
 +
 +/* Module initialization. */
 +
 +static struct PyModuleDef _bz2module = {
 +    PyModuleDef_HEAD_INIT,
 +    "_bz2",
 +    NULL,
 +    -1,
 +    NULL,
 +    NULL,
 +    NULL,
 +    NULL,
 +    NULL
 +};
 +
 +PyMODINIT_FUNC
 +PyInit__bz2(void)
 +{
 +    PyObject *m;
 +
 +    if (PyType_Ready(&BZ2Compressor_Type) < 0)
 +        return NULL;
 +    if (PyType_Ready(&BZ2Decompressor_Type) < 0)
 +        return NULL;
 +
 +    m = PyModule_Create(&_bz2module);
 +    if (m == NULL)
 +        return NULL;
 +
 +    Py_INCREF(&BZ2Compressor_Type);
 +    PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
 +
 +    Py_INCREF(&BZ2Decompressor_Type);
 +    PyModule_AddObject(m, "BZ2Decompressor",
 +                       (PyObject *)&BZ2Decompressor_Type);
 +
 +    return m;
 +}