From: Nadeem Vawda Date: Wed, 2 Jan 2013 22:05:56 +0000 (+0100) Subject: Issue #16828: Fix error incorrectly raised by bz2.compress(''). X-Git-Tag: v3.3.1rc1~430 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=57cb81d16180bd896b47129585c838096d035f16;p=python Issue #16828: Fix error incorrectly raised by bz2.compress(''). Initial patch by Martin Packman. --- 57cb81d16180bd896b47129585c838096d035f16 diff --cc Lib/test/test_bz2.py index f4e81db6e2,977d10b6f6..912fac1c33 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@@ -22,38 -21,13 +22,39 @@@ has_cmdline_bunzip2 = sys.platform not class BaseTest(unittest.TestCase): "Base for other testcases." - TEXT = b'root:x:0:0:root:/root:/bin/bash\nbin:x:1:1:bin:/bin:\ndaemon:x:2:2:daemon:/sbin:\nadm:x:3:4:adm:/var/adm:\nlp:x:4:7:lp:/var/spool/lpd:\nsync:x:5:0:sync:/sbin:/bin/sync\nshutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\nhalt:x:7:0:halt:/sbin:/sbin/halt\nmail:x:8:12:mail:/var/spool/mail:\nnews:x:9:13:news:/var/spool/news:\nuucp:x:10:14:uucp:/var/spool/uucp:\noperator:x:11:0:operator:/root:\ngames:x:12:100:games:/usr/games:\ngopher:x:13:30:gopher:/usr/lib/gopher-data:\nftp:x:14:50:FTP User:/var/ftp:/bin/bash\nnobody:x:65534:65534:Nobody:/home:\npostfix:x:100:101:postfix:/var/spool/postfix:\nniemeyer:x:500:500::/home/niemeyer:/bin/bash\npostgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\nmysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\nwww:x:103:104::/var/www:/bin/false\n' + TEXT_LINES = [ + b'root:x:0:0:root:/root:/bin/bash\n', + b'bin:x:1:1:bin:/bin:\n', + b'daemon:x:2:2:daemon:/sbin:\n', + b'adm:x:3:4:adm:/var/adm:\n', + b'lp:x:4:7:lp:/var/spool/lpd:\n', + b'sync:x:5:0:sync:/sbin:/bin/sync\n', + b'shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\n', + b'halt:x:7:0:halt:/sbin:/sbin/halt\n', + b'mail:x:8:12:mail:/var/spool/mail:\n', + b'news:x:9:13:news:/var/spool/news:\n', + b'uucp:x:10:14:uucp:/var/spool/uucp:\n', + b'operator:x:11:0:operator:/root:\n', + b'games:x:12:100:games:/usr/games:\n', + b'gopher:x:13:30:gopher:/usr/lib/gopher-data:\n', + b'ftp:x:14:50:FTP User:/var/ftp:/bin/bash\n', + b'nobody:x:65534:65534:Nobody:/home:\n', + b'postfix:x:100:101:postfix:/var/spool/postfix:\n', + b'niemeyer:x:500:500::/home/niemeyer:/bin/bash\n', + b'postgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\n', + b'mysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\n', + b'www:x:103:104::/var/www:/bin/false\n', + ] + TEXT = b''.join(TEXT_LINES) DATA = b'BZh91AY&SY.\xc8N\x18\x00\x01>_\x80\x00\x10@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe00\x01\x99\xaa\x00\xc0\x03F\x86\x8c#&\x83F\x9a\x03\x06\xa6\xd0\xa6\x93M\x0fQ\xa7\xa8\x06\x804hh\x12$\x11\xa4i4\xf14S\xd2\x88\xe5\xcd9gd6\x0b\n\xe9\x9b\xd5\x8a\x99\xf7\x08.K\x8ev\xfb\xf7xw\xbb\xdf\xa1\x92\xf1\xdd|/";\xa2\xba\x9f\xd5\xb1#A\xb6\xf6\xb3o\xc9\xc5y\\\xebO\xe7\x85\x9a\xbc\xb6f8\x952\xd5\xd7"%\x89>V,\xf7\xa6z\xe2\x9f\xa3\xdf\x11\x11"\xd6E)I\xa9\x13^\xca\xf3r\xd0\x03U\x922\xf26\xec\xb6\xed\x8b\xc3U\x13\x9d\xc5\x170\xa4\xfa^\x92\xacDF\x8a\x97\xd6\x19\xfe\xdd\xb8\xbd\x1a\x9a\x19\xa3\x80ankR\x8b\xe5\xd83]\xa9\xc6\x08\x82f\xf6\xb9"6l$\xb8j@\xc0\x8a\xb0l1..\xbak\x83ls\x15\xbc\xf4\xc1\x13\xbe\xf8E\xb8\x9d\r\xa8\x9dk\x84\xd3n\xfa\xacQ\x07\xb1%y\xaav\xb4\x08\xe0z\x1b\x16\xf5\x04\xe9\xcc\xb9\x08z\x1en7.G\xfc]\xc9\x14\xe1B@\xbb!8`' - DATA_CRLF = b'BZh91AY&SY\xaez\xbbN\x00\x01H\xdf\x80\x00\x12@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe0@\x01\xbc\xc6`\x86*\x8d=M\xa9\x9a\x86\xd0L@\x0fI\xa6!\xa1\x13\xc8\x88jdi\x8d@\x03@\x1a\x1a\x0c\x0c\x83 \x00\xc4h2\x19\x01\x82D\x84e\t\xe8\x99\x89\x19\x1ah\x00\r\x1a\x11\xaf\x9b\x0fG\xf5(\x1b\x1f?\t\x12\xcf\xb5\xfc\x95E\x00ps\x89\x12^\xa4\xdd\xa2&\x05(\x87\x04\x98\x89u\xe40%\xb6\x19\'\x8c\xc4\x89\xca\x07\x0e\x1b!\x91UIFU%C\x994!DI\xd2\xfa\xf0\xf1N8W\xde\x13A\xf5\x9cr%?\x9f3;I45A\xd1\x8bT\xb1\xa4\xc7\x8d\x1a\\"\xad\xa1\xabyBg\x15\xb9l\x88\x88\x91k"\x94\xa4\xd4\x89\xae*\xa6\x0b\x10\x0c\xd6\xd4m\xe86\xec\xb5j\x8a\x86j\';\xca.\x01I\xf2\xaaJ\xe8\x88\x8cU+t3\xfb\x0c\n\xa33\x13r2\r\x16\xe0\xb3(\xbf\x1d\x83r\xe7M\xf0D\x1365\xd8\x88\xd3\xa4\x92\xcb2\x06\x04\\\xc1\xb0\xea//\xbek&\xd8\xe6+t\xe5\xa1\x13\xada\x16\xder5"w]\xa2i\xb7[\x97R \xe2IT\xcd;Z\x04dk4\xad\x8a\t\xd3\x81z\x10\xf1:^`\xab\x1f\xc5\xdc\x91N\x14$+\x9e\xae\xd3\x80' + EMPTY_DATA = b'BZh9\x17rE8P\x90\x00\x00\x00\x00' - with open(findfile("testbz2_bigmem.bz2"), "rb") as f: - DATA_BIGMEM = f.read() + def setUp(self): + self.filename = TESTFN + + def tearDown(self): + if os.path.isfile(self.filename): + os.unlink(self.filename) if has_cmdline_bunzip2: def decompress(self, data): @@@ -584,7 -304,14 +585,13 @@@ class BZ2CompressorTest(BaseTest) data += bz2c.flush() self.assertEqual(self.decompress(data), self.TEXT) + def testCompressEmptyString(self): + bz2c = BZ2Compressor() + data = bz2c.compress(b'') + data += bz2c.flush() + self.assertEqual(data, self.EMPTY_DATA) + def testCompressChunks10(self): - # "Test BZ2Compressor.compress()/flush() with chunks of 10 bytes" bz2c = BZ2Compressor() n = 0 data = b'' @@@ -671,7 -389,13 +678,11 @@@ class CompressDecompressTest(BaseTest) data = bz2.compress(self.TEXT) self.assertEqual(self.decompress(data), self.TEXT) + def testCompressEmptyString(self): + text = bz2.compress(b'') + self.assertEqual(text, self.EMPTY_DATA) + def testDecompress(self): - # "Test decompress() function" text = bz2.decompress(self.DATA) self.assertEqual(text, self.TEXT) @@@ -679,108 -403,30 +690,112 @@@ text = bz2.decompress(b"") self.assertEqual(text, b"") + def testDecompressToEmptyString(self): + text = bz2.decompress(self.EMPTY_DATA) + self.assertEqual(text, b'') + def testDecompressIncomplete(self): - # "Test decompress() function with incomplete data" self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10]) - @bigmemtest(size=_4G, memuse=1.25) - def testCompressBigmem(self, size): - text = b"a" * size - data = bz2.compress(text) - del text - text = self.decompress(data) - self.assertEqual(len(text), size) - self.assertEqual(text.strip(b"a"), b"") - - @bigmemtest(size=_4G, memuse=1.25, dry_run=False) - def testDecompressBigmem(self, unused_size): - # Issue #14398: decompression fails when output data is >=2GB. - text = bz2.decompress(self.DATA_BIGMEM) - self.assertEqual(len(text), _4G) - self.assertEqual(text.strip(b"\0"), b"") + def testDecompressMultiStream(self): + text = bz2.decompress(self.DATA * 5) + self.assertEqual(text, self.TEXT * 5) + + +class OpenTest(BaseTest): + def test_binary_modes(self): + with bz2.open(self.filename, "wb") as f: + f.write(self.TEXT) + with open(self.filename, "rb") as f: + file_data = bz2.decompress(f.read()) + self.assertEqual(file_data, self.TEXT) + with bz2.open(self.filename, "rb") as f: + self.assertEqual(f.read(), self.TEXT) + with bz2.open(self.filename, "ab") as f: + f.write(self.TEXT) + with open(self.filename, "rb") as f: + file_data = bz2.decompress(f.read()) + self.assertEqual(file_data, self.TEXT * 2) + + def test_implicit_binary_modes(self): + # Test implicit binary modes (no "b" or "t" in mode string). + with bz2.open(self.filename, "w") as f: + f.write(self.TEXT) + with open(self.filename, "rb") as f: + file_data = bz2.decompress(f.read()) + self.assertEqual(file_data, self.TEXT) + with bz2.open(self.filename, "r") as f: + self.assertEqual(f.read(), self.TEXT) + with bz2.open(self.filename, "a") as f: + f.write(self.TEXT) + with open(self.filename, "rb") as f: + file_data = bz2.decompress(f.read()) + self.assertEqual(file_data, self.TEXT * 2) + + def test_text_modes(self): + text = self.TEXT.decode("ascii") + text_native_eol = text.replace("\n", os.linesep) + with bz2.open(self.filename, "wt") as f: + f.write(text) + with open(self.filename, "rb") as f: + file_data = bz2.decompress(f.read()).decode("ascii") + self.assertEqual(file_data, text_native_eol) + with bz2.open(self.filename, "rt") as f: + self.assertEqual(f.read(), text) + with bz2.open(self.filename, "at") as f: + f.write(text) + with open(self.filename, "rb") as f: + file_data = bz2.decompress(f.read()).decode("ascii") + self.assertEqual(file_data, text_native_eol * 2) + + def test_fileobj(self): + with bz2.open(BytesIO(self.DATA), "r") as f: + self.assertEqual(f.read(), self.TEXT) + with bz2.open(BytesIO(self.DATA), "rb") as f: + self.assertEqual(f.read(), self.TEXT) + text = self.TEXT.decode("ascii") + with bz2.open(BytesIO(self.DATA), "rt") as f: + self.assertEqual(f.read(), text) + + def test_bad_params(self): + # Test invalid parameter combinations. + with self.assertRaises(ValueError): + bz2.open(self.filename, "wbt") + with self.assertRaises(ValueError): + bz2.open(self.filename, "rb", encoding="utf-8") + with self.assertRaises(ValueError): + bz2.open(self.filename, "rb", errors="ignore") + with self.assertRaises(ValueError): + bz2.open(self.filename, "rb", newline="\n") + + def test_encoding(self): + # Test non-default encoding. + text = self.TEXT.decode("ascii") + text_native_eol = text.replace("\n", os.linesep) + with bz2.open(self.filename, "wt", encoding="utf-16-le") as f: + f.write(text) + with open(self.filename, "rb") as f: + file_data = bz2.decompress(f.read()).decode("utf-16-le") + self.assertEqual(file_data, text_native_eol) + with bz2.open(self.filename, "rt", encoding="utf-16-le") as f: + self.assertEqual(f.read(), text) + + def test_encoding_error_handler(self): + # Test with non-default encoding error handler. + with bz2.open(self.filename, "wb") as f: + f.write(b"foo\xffbar") + with bz2.open(self.filename, "rt", encoding="ascii", errors="ignore") \ + as f: + self.assertEqual(f.read(), "foobar") + + def test_newline(self): + # Test with explicit newline (universal newline mode disabled). + text = self.TEXT.decode("ascii") + with bz2.open(self.filename, "wt", newline="\n") as f: + f.write(text) + with bz2.open(self.filename, "rt", newline="\r") as f: + self.assertEqual(f.readlines(), [text]) + def test_main(): support.run_unittest( diff --cc Misc/NEWS index fdc313914e,e990e83f0d..eff20ee93e --- a/Misc/NEWS +++ b/Misc/NEWS @@@ -123,6 -188,9 +123,9 @@@ Core and Builtin Library ------- -- Issue #16828: Fix error incorrectly raised by bz2.compress(''). Patch by - Martin Packman. ++- Issue #16828: Fix error incorrectly raised by bz2.compress(''). Initial patch ++ by Martin Packman. + - Issue #16541: tk_setPalette() now works with keyword arguments. - Issue #16820: In configparser, `parser.popitem()` no longer raises ValueError. diff --cc Modules/_bz2module.c index 5cac8e6d51,0000000000..4eee5a2fee mode 100644,000000..100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@@ -1,605 -1,0 +1,607 @@@ +/* _bz2 - Low-level Python interface to libbzip2. */ + +#define PY_SSIZE_T_CLEAN + +#include "Python.h" +#include "structmember.h" + +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +#include +#include + + +#ifndef BZ_CONFIG_ERROR +#define BZ2_bzCompress bzCompress +#define BZ2_bzCompressInit bzCompressInit +#define BZ2_bzCompressEnd bzCompressEnd +#define BZ2_bzDecompress bzDecompress +#define BZ2_bzDecompressInit bzDecompressInit +#define BZ2_bzDecompressEnd bzDecompressEnd +#endif /* ! BZ_CONFIG_ERROR */ + + +#ifdef WITH_THREAD +#define ACQUIRE_LOCK(obj) do { \ + if (!PyThread_acquire_lock((obj)->lock, 0)) { \ + Py_BEGIN_ALLOW_THREADS \ + PyThread_acquire_lock((obj)->lock, 1); \ + Py_END_ALLOW_THREADS \ + } } while (0) +#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock) +#else +#define ACQUIRE_LOCK(obj) +#define RELEASE_LOCK(obj) +#endif + +#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) + + +typedef struct { + PyObject_HEAD + bz_stream bzs; + int flushed; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} BZ2Compressor; + +typedef struct { + PyObject_HEAD + bz_stream bzs; + char eof; /* T_BOOL expects a char */ + PyObject *unused_data; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} BZ2Decompressor; + + +/* Helper functions. */ + +static int +catch_bz2_error(int bzerror) +{ + switch(bzerror) { + case BZ_OK: + case BZ_RUN_OK: + case BZ_FLUSH_OK: + case BZ_FINISH_OK: + case BZ_STREAM_END: + return 0; + +#ifdef BZ_CONFIG_ERROR + case BZ_CONFIG_ERROR: + PyErr_SetString(PyExc_SystemError, + "libbzip2 was not compiled correctly"); + return 1; +#endif + case BZ_PARAM_ERROR: + PyErr_SetString(PyExc_ValueError, + "Internal error - " + "invalid parameters passed to libbzip2"); + return 1; + case BZ_MEM_ERROR: + PyErr_NoMemory(); + return 1; + case BZ_DATA_ERROR: + case BZ_DATA_ERROR_MAGIC: + PyErr_SetString(PyExc_IOError, "Invalid data stream"); + return 1; + case BZ_IO_ERROR: + PyErr_SetString(PyExc_IOError, "Unknown I/O error"); + return 1; + case BZ_UNEXPECTED_EOF: + PyErr_SetString(PyExc_EOFError, + "Compressed file ended before the logical " + "end-of-stream was detected"); + return 1; + case BZ_SEQUENCE_ERROR: + PyErr_SetString(PyExc_RuntimeError, + "Internal error - " + "Invalid sequence of commands sent to libbzip2"); + return 1; + default: + PyErr_Format(PyExc_IOError, + "Unrecognized error from libbzip2: %d", bzerror); + return 1; + } +} + +#if BUFSIZ < 8192 +#define SMALLCHUNK 8192 +#else +#define SMALLCHUNK BUFSIZ +#endif + +static int +grow_buffer(PyObject **buf) +{ + /* Expand the buffer by an amount proportional to the current size, + giving us amortized linear-time behavior. Use a less-than-double + growth factor to avoid excessive allocation. */ + size_t size = PyBytes_GET_SIZE(*buf); + size_t new_size = size + (size >> 3) + 6; + if (new_size > size) { + return _PyBytes_Resize(buf, new_size); + } else { /* overflow */ + PyErr_SetString(PyExc_OverflowError, + "Unable to allocate buffer - output too large"); + return -1; + } +} + + +/* BZ2Compressor class. */ + +static PyObject * +compress(BZ2Compressor *c, char *data, size_t len, int action) +{ + size_t data_size = 0; + PyObject *result; + + result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK); + if (result == NULL) + return NULL; ++ + c->bzs.next_in = data; - /* On a 64-bit system, len might not fit in avail_in (an unsigned int). - Do compression in chunks of no more than UINT_MAX bytes each. */ - c->bzs.avail_in = MIN(len, UINT_MAX); - len -= c->bzs.avail_in; ++ c->bzs.avail_in = 0; + c->bzs.next_out = PyBytes_AS_STRING(result); + c->bzs.avail_out = PyBytes_GET_SIZE(result); + for (;;) { + char *this_out; + int bzerror; + - Py_BEGIN_ALLOW_THREADS - this_out = c->bzs.next_out; - bzerror = BZ2_bzCompress(&c->bzs, action); - data_size += c->bzs.next_out - this_out; - Py_END_ALLOW_THREADS - if (catch_bz2_error(bzerror)) - goto error; - ++ /* On a 64-bit system, len might not fit in avail_in (an unsigned int). ++ Do compression in chunks of no more than UINT_MAX bytes each. */ + if (c->bzs.avail_in == 0 && len > 0) { + c->bzs.avail_in = MIN(len, UINT_MAX); + len -= c->bzs.avail_in; + } + - /* In regular compression mode, stop when input data is exhausted. - In flushing mode, stop when all buffered data has been flushed. */ - if ((action == BZ_RUN && c->bzs.avail_in == 0) || - (action == BZ_FINISH && bzerror == BZ_STREAM_END)) ++ /* In regular compression mode, stop when input data is exhausted. */ ++ if (action == BZ_RUN && c->bzs.avail_in == 0) + break; + + if (c->bzs.avail_out == 0) { + size_t buffer_left = PyBytes_GET_SIZE(result) - data_size; + if (buffer_left == 0) { + if (grow_buffer(&result) < 0) + goto error; + c->bzs.next_out = PyBytes_AS_STRING(result) + data_size; + buffer_left = PyBytes_GET_SIZE(result) - data_size; + } + c->bzs.avail_out = MIN(buffer_left, UINT_MAX); + } ++ ++ Py_BEGIN_ALLOW_THREADS ++ this_out = c->bzs.next_out; ++ bzerror = BZ2_bzCompress(&c->bzs, action); ++ data_size += c->bzs.next_out - this_out; ++ Py_END_ALLOW_THREADS ++ if (catch_bz2_error(bzerror)) ++ goto error; ++ ++ /* In flushing mode, stop when all buffered data has been flushed. */ ++ if (action == BZ_FINISH && bzerror == BZ_STREAM_END) ++ break; + } + if (data_size != PyBytes_GET_SIZE(result)) + if (_PyBytes_Resize(&result, data_size) < 0) + goto error; + return result; + +error: + Py_XDECREF(result); + return NULL; +} + +PyDoc_STRVAR(BZ2Compressor_compress__doc__, +"compress(data) -> bytes\n" +"\n" +"Provide data to the compressor object. Returns a chunk of\n" +"compressed data if possible, or b'' otherwise.\n" +"\n" +"When you have finished providing data to the compressor, call the\n" +"flush() method to finish the compression process.\n"); + +static PyObject * +BZ2Compressor_compress(BZ2Compressor *self, PyObject *args) +{ + Py_buffer buffer; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "y*:compress", &buffer)) + return NULL; + + ACQUIRE_LOCK(self); + if (self->flushed) + PyErr_SetString(PyExc_ValueError, "Compressor has been flushed"); + else + result = compress(self, buffer.buf, buffer.len, BZ_RUN); + RELEASE_LOCK(self); + PyBuffer_Release(&buffer); + return result; +} + +PyDoc_STRVAR(BZ2Compressor_flush__doc__, +"flush() -> bytes\n" +"\n" +"Finish the compression process. Returns the compressed data left\n" +"in internal buffers.\n" +"\n" +"The compressor object may not be used after this method is called.\n"); + +static PyObject * +BZ2Compressor_flush(BZ2Compressor *self, PyObject *noargs) +{ + PyObject *result = NULL; + + ACQUIRE_LOCK(self); + if (self->flushed) + PyErr_SetString(PyExc_ValueError, "Repeated call to flush()"); + else { + self->flushed = 1; + result = compress(self, NULL, 0, BZ_FINISH); + } + RELEASE_LOCK(self); + return result; +} + +static int +BZ2Compressor_init(BZ2Compressor *self, PyObject *args, PyObject *kwargs) +{ + int compresslevel = 9; + int bzerror; + + if (!PyArg_ParseTuple(args, "|i:BZ2Compressor", &compresslevel)) + return -1; + if (!(1 <= compresslevel && compresslevel <= 9)) { + PyErr_SetString(PyExc_ValueError, + "compresslevel must be between 1 and 9"); + return -1; + } + +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (self->lock == NULL) { + PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); + return -1; + } +#endif + + bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0); + if (catch_bz2_error(bzerror)) + goto error; + + return 0; + +error: +#ifdef WITH_THREAD + PyThread_free_lock(self->lock); + self->lock = NULL; +#endif + return -1; +} + +static void +BZ2Compressor_dealloc(BZ2Compressor *self) +{ + BZ2_bzCompressEnd(&self->bzs); +#ifdef WITH_THREAD + if (self->lock != NULL) + PyThread_free_lock(self->lock); +#endif + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyMethodDef BZ2Compressor_methods[] = { + {"compress", (PyCFunction)BZ2Compressor_compress, METH_VARARGS, + BZ2Compressor_compress__doc__}, + {"flush", (PyCFunction)BZ2Compressor_flush, METH_NOARGS, + BZ2Compressor_flush__doc__}, + {NULL} +}; + +PyDoc_STRVAR(BZ2Compressor__doc__, +"BZ2Compressor(compresslevel=9)\n" +"\n" +"Create a compressor object for compressing data incrementally.\n" +"\n" +"compresslevel, if given, must be a number between 1 and 9.\n" +"\n" +"For one-shot compression, use the compress() function instead.\n"); + +static PyTypeObject BZ2Compressor_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_bz2.BZ2Compressor", /* tp_name */ + sizeof(BZ2Compressor), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)BZ2Compressor_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + BZ2Compressor__doc__, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + BZ2Compressor_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)BZ2Compressor_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + +/* BZ2Decompressor class. */ + +static PyObject * +decompress(BZ2Decompressor *d, char *data, size_t len) +{ + size_t data_size = 0; + PyObject *result; + + result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK); + if (result == NULL) + return result; + d->bzs.next_in = data; + /* On a 64-bit system, len might not fit in avail_in (an unsigned int). + Do decompression in chunks of no more than UINT_MAX bytes each. */ + d->bzs.avail_in = MIN(len, UINT_MAX); + len -= d->bzs.avail_in; + d->bzs.next_out = PyBytes_AS_STRING(result); + d->bzs.avail_out = PyBytes_GET_SIZE(result); + for (;;) { + char *this_out; + int bzerror; + + Py_BEGIN_ALLOW_THREADS + this_out = d->bzs.next_out; + bzerror = BZ2_bzDecompress(&d->bzs); + data_size += d->bzs.next_out - this_out; + Py_END_ALLOW_THREADS + if (catch_bz2_error(bzerror)) + goto error; + if (bzerror == BZ_STREAM_END) { + d->eof = 1; + len += d->bzs.avail_in; + if (len > 0) { /* Save leftover input to unused_data */ + Py_CLEAR(d->unused_data); + d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len); + if (d->unused_data == NULL) + goto error; + } + break; + } + if (d->bzs.avail_in == 0) { + if (len == 0) + break; + d->bzs.avail_in = MIN(len, UINT_MAX); + len -= d->bzs.avail_in; + } + if (d->bzs.avail_out == 0) { + size_t buffer_left = PyBytes_GET_SIZE(result) - data_size; + if (buffer_left == 0) { + if (grow_buffer(&result) < 0) + goto error; + d->bzs.next_out = PyBytes_AS_STRING(result) + data_size; + buffer_left = PyBytes_GET_SIZE(result) - data_size; + } + d->bzs.avail_out = MIN(buffer_left, UINT_MAX); + } + } + if (data_size != PyBytes_GET_SIZE(result)) + if (_PyBytes_Resize(&result, data_size) < 0) + goto error; + return result; + +error: + Py_XDECREF(result); + return NULL; +} + +PyDoc_STRVAR(BZ2Decompressor_decompress__doc__, +"decompress(data) -> bytes\n" +"\n" +"Provide data to the decompressor object. Returns a chunk of\n" +"decompressed data if possible, or b'' otherwise.\n" +"\n" +"Attempting to decompress data after the end of stream is reached\n" +"raises an EOFError. Any data found after the end of the stream\n" +"is ignored and saved in the unused_data attribute.\n"); + +static PyObject * +BZ2Decompressor_decompress(BZ2Decompressor *self, PyObject *args) +{ + Py_buffer buffer; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "y*:decompress", &buffer)) + return NULL; + + ACQUIRE_LOCK(self); + if (self->eof) + PyErr_SetString(PyExc_EOFError, "End of stream already reached"); + else + result = decompress(self, buffer.buf, buffer.len); + RELEASE_LOCK(self); + PyBuffer_Release(&buffer); + return result; +} + +static int +BZ2Decompressor_init(BZ2Decompressor *self, PyObject *args, PyObject *kwargs) +{ + int bzerror; + + if (!PyArg_ParseTuple(args, ":BZ2Decompressor")) + return -1; + +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (self->lock == NULL) { + PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); + return -1; + } +#endif + + self->unused_data = PyBytes_FromStringAndSize("", 0); + if (self->unused_data == NULL) + goto error; + + bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); + if (catch_bz2_error(bzerror)) + goto error; + + return 0; + +error: + Py_CLEAR(self->unused_data); +#ifdef WITH_THREAD + PyThread_free_lock(self->lock); + self->lock = NULL; +#endif + return -1; +} + +static void +BZ2Decompressor_dealloc(BZ2Decompressor *self) +{ + BZ2_bzDecompressEnd(&self->bzs); + Py_CLEAR(self->unused_data); +#ifdef WITH_THREAD + if (self->lock != NULL) + PyThread_free_lock(self->lock); +#endif + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyMethodDef BZ2Decompressor_methods[] = { + {"decompress", (PyCFunction)BZ2Decompressor_decompress, METH_VARARGS, + BZ2Decompressor_decompress__doc__}, + {NULL} +}; + +PyDoc_STRVAR(BZ2Decompressor_eof__doc__, +"True if the end-of-stream marker has been reached."); + +PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__, +"Data found after the end of the compressed stream."); + +static PyMemberDef BZ2Decompressor_members[] = { + {"eof", T_BOOL, offsetof(BZ2Decompressor, eof), + READONLY, BZ2Decompressor_eof__doc__}, + {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data), + READONLY, BZ2Decompressor_unused_data__doc__}, + {NULL} +}; + +PyDoc_STRVAR(BZ2Decompressor__doc__, +"BZ2Decompressor()\n" +"\n" +"Create a decompressor object for decompressing data incrementally.\n" +"\n" +"For one-shot decompression, use the decompress() function instead.\n"); + +static PyTypeObject BZ2Decompressor_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_bz2.BZ2Decompressor", /* tp_name */ + sizeof(BZ2Decompressor), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + BZ2Decompressor__doc__, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + BZ2Decompressor_methods, /* tp_methods */ + BZ2Decompressor_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)BZ2Decompressor_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + +/* Module initialization. */ + +static struct PyModuleDef _bz2module = { + PyModuleDef_HEAD_INIT, + "_bz2", + NULL, + -1, + NULL, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__bz2(void) +{ + PyObject *m; + + if (PyType_Ready(&BZ2Compressor_Type) < 0) + return NULL; + if (PyType_Ready(&BZ2Decompressor_Type) < 0) + return NULL; + + m = PyModule_Create(&_bz2module); + if (m == NULL) + return NULL; + + Py_INCREF(&BZ2Compressor_Type); + PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type); + + Py_INCREF(&BZ2Decompressor_Type); + PyModule_AddObject(m, "BZ2Decompressor", + (PyObject *)&BZ2Decompressor_Type); + + return m; +}