class BaseTest(unittest.TestCase):
"Base for other testcases."
- TEXT = b'root:x:0:0:root:/root:/bin/bash\nbin:x:1:1:bin:/bin:\ndaemon:x:2:2:daemon:/sbin:\nadm:x:3:4:adm:/var/adm:\nlp:x:4:7:lp:/var/spool/lpd:\nsync:x:5:0:sync:/sbin:/bin/sync\nshutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\nhalt:x:7:0:halt:/sbin:/sbin/halt\nmail:x:8:12:mail:/var/spool/mail:\nnews:x:9:13:news:/var/spool/news:\nuucp:x:10:14:uucp:/var/spool/uucp:\noperator:x:11:0:operator:/root:\ngames:x:12:100:games:/usr/games:\ngopher:x:13:30:gopher:/usr/lib/gopher-data:\nftp:x:14:50:FTP User:/var/ftp:/bin/bash\nnobody:x:65534:65534:Nobody:/home:\npostfix:x:100:101:postfix:/var/spool/postfix:\nniemeyer:x:500:500::/home/niemeyer:/bin/bash\npostgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\nmysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\nwww:x:103:104::/var/www:/bin/false\n'
+ TEXT_LINES = [
+ b'root:x:0:0:root:/root:/bin/bash\n',
+ b'bin:x:1:1:bin:/bin:\n',
+ b'daemon:x:2:2:daemon:/sbin:\n',
+ b'adm:x:3:4:adm:/var/adm:\n',
+ b'lp:x:4:7:lp:/var/spool/lpd:\n',
+ b'sync:x:5:0:sync:/sbin:/bin/sync\n',
+ b'shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\n',
+ b'halt:x:7:0:halt:/sbin:/sbin/halt\n',
+ b'mail:x:8:12:mail:/var/spool/mail:\n',
+ b'news:x:9:13:news:/var/spool/news:\n',
+ b'uucp:x:10:14:uucp:/var/spool/uucp:\n',
+ b'operator:x:11:0:operator:/root:\n',
+ b'games:x:12:100:games:/usr/games:\n',
+ b'gopher:x:13:30:gopher:/usr/lib/gopher-data:\n',
+ b'ftp:x:14:50:FTP User:/var/ftp:/bin/bash\n',
+ b'nobody:x:65534:65534:Nobody:/home:\n',
+ b'postfix:x:100:101:postfix:/var/spool/postfix:\n',
+ b'niemeyer:x:500:500::/home/niemeyer:/bin/bash\n',
+ b'postgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\n',
+ b'mysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\n',
+ b'www:x:103:104::/var/www:/bin/false\n',
+ ]
+ TEXT = b''.join(TEXT_LINES)
DATA = b'BZh91AY&SY.\xc8N\x18\x00\x01>_\x80\x00\x10@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe00\x01\x99\xaa\x00\xc0\x03F\x86\x8c#&\x83F\x9a\x03\x06\xa6\xd0\xa6\x93M\x0fQ\xa7\xa8\x06\x804hh\x12$\x11\xa4i4\xf14S\xd2<Q\xb5\x0fH\xd3\xd4\xdd\xd5\x87\xbb\xf8\x94\r\x8f\xafI\x12\xe1\xc9\xf8/E\x00pu\x89\x12]\xc9\xbbDL\nQ\x0e\t1\x12\xdf\xa0\xc0\x97\xac2O9\x89\x13\x94\x0e\x1c7\x0ed\x95I\x0c\xaaJ\xa4\x18L\x10\x05#\x9c\xaf\xba\xbc/\x97\x8a#C\xc8\xe1\x8cW\xf9\xe2\xd0\xd6M\xa7\x8bXa<e\x84t\xcbL\xb3\xa7\xd9\xcd\xd1\xcb\x84.\xaf\xb3\xab\xab\xad`n}\xa0lh\tE,\x8eZ\x15\x17VH>\x88\xe5\xcd9gd6\x0b\n\xe9\x9b\xd5\x8a\x99\xf7\x08.K\x8ev\xfb\xf7xw\xbb\xdf\xa1\x92\xf1\xdd|/";\xa2\xba\x9f\xd5\xb1#A\xb6\xf6\xb3o\xc9\xc5y\\\xebO\xe7\x85\x9a\xbc\xb6f8\x952\xd5\xd7"%\x89>V,\xf7\xa6z\xe2\x9f\xa3\xdf\x11\x11"\xd6E)I\xa9\x13^\xca\xf3r\xd0\x03U\x922\xf26\xec\xb6\xed\x8b\xc3U\x13\x9d\xc5\x170\xa4\xfa^\x92\xacDF\x8a\x97\xd6\x19\xfe\xdd\xb8\xbd\x1a\x9a\x19\xa3\x80ankR\x8b\xe5\xd83]\xa9\xc6\x08\x82f\xf6\xb9"6l$\xb8j@\xc0\x8a\xb0l1..\xbak\x83ls\x15\xbc\xf4\xc1\x13\xbe\xf8E\xb8\x9d\r\xa8\x9dk\x84\xd3n\xfa\xacQ\x07\xb1%y\xaav\xb4\x08\xe0z\x1b\x16\xf5\x04\xe9\xcc\xb9\x08z\x1en7.G\xfc]\xc9\x14\xe1B@\xbb!8`'
- DATA_CRLF = b'BZh91AY&SY\xaez\xbbN\x00\x01H\xdf\x80\x00\x12@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe0@\x01\xbc\xc6`\x86*\x8d=M\xa9\x9a\x86\xd0L@\x0fI\xa6!\xa1\x13\xc8\x88jdi\x8d@\x03@\x1a\x1a\x0c\x0c\x83 \x00\xc4h2\x19\x01\x82D\x84e\t\xe8\x99\x89\x19\x1ah\x00\r\x1a\x11\xaf\x9b\x0fG\xf5(\x1b\x1f?\t\x12\xcf\xb5\xfc\x95E\x00ps\x89\x12^\xa4\xdd\xa2&\x05(\x87\x04\x98\x89u\xe40%\xb6\x19\'\x8c\xc4\x89\xca\x07\x0e\x1b!\x91UIFU%C\x994!DI\xd2\xfa\xf0\xf1N8W\xde\x13A\xf5\x9cr%?\x9f3;I45A\xd1\x8bT\xb1<l\xba\xcb_\xc00xY\x17r\x17\x88\x08\x08@\xa0\ry@\x10\x04$)`\xf2\xce\x89z\xb0s\xec\x9b.iW\x9d\x81\xb5-+t\x9f\x1a\'\x97dB\xf5x\xb5\xbe.[.\xd7\x0e\x81\xe7\x08\x1cN`\x88\x10\xca\x87\xc3!"\x80\x92R\xa1/\xd1\xc0\xe6mf\xac\xbd\x99\xcca\xb3\x8780>\xa4\xc7\x8d\x1a\\"\xad\xa1\xabyBg\x15\xb9l\x88\x88\x91k"\x94\xa4\xd4\x89\xae*\xa6\x0b\x10\x0c\xd6\xd4m\xe86\xec\xb5j\x8a\x86j\';\xca.\x01I\xf2\xaaJ\xe8\x88\x8cU+t3\xfb\x0c\n\xa33\x13r2\r\x16\xe0\xb3(\xbf\x1d\x83r\xe7M\xf0D\x1365\xd8\x88\xd3\xa4\x92\xcb2\x06\x04\\\xc1\xb0\xea//\xbek&\xd8\xe6+t\xe5\xa1\x13\xada\x16\xder5"w]\xa2i\xb7[\x97R \xe2IT\xcd;Z\x04dk4\xad\x8a\t\xd3\x81z\x10\xf1:^`\xab\x1f\xc5\xdc\x91N\x14$+\x9e\xae\xd3\x80'
+ EMPTY_DATA = b'BZh9\x17rE8P\x90\x00\x00\x00\x00'
- with open(findfile("testbz2_bigmem.bz2"), "rb") as f:
- DATA_BIGMEM = f.read()
+ def setUp(self):
+ self.filename = TESTFN
+
+ def tearDown(self):
+ if os.path.isfile(self.filename):
+ os.unlink(self.filename)
if has_cmdline_bunzip2:
def decompress(self, data):
data += bz2c.flush()
self.assertEqual(self.decompress(data), self.TEXT)
+ def testCompressEmptyString(self):
+ bz2c = BZ2Compressor()
+ data = bz2c.compress(b'')
+ data += bz2c.flush()
+ self.assertEqual(data, self.EMPTY_DATA)
+
def testCompressChunks10(self):
- # "Test BZ2Compressor.compress()/flush() with chunks of 10 bytes"
bz2c = BZ2Compressor()
n = 0
data = b''
data = bz2.compress(self.TEXT)
self.assertEqual(self.decompress(data), self.TEXT)
+ def testCompressEmptyString(self):
+ text = bz2.compress(b'')
+ self.assertEqual(text, self.EMPTY_DATA)
+
def testDecompress(self):
- # "Test decompress() function"
text = bz2.decompress(self.DATA)
self.assertEqual(text, self.TEXT)
text = bz2.decompress(b"")
self.assertEqual(text, b"")
+ def testDecompressToEmptyString(self):
+ text = bz2.decompress(self.EMPTY_DATA)
+ self.assertEqual(text, b'')
+
def testDecompressIncomplete(self):
- # "Test decompress() function with incomplete data"
self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10])
- @bigmemtest(size=_4G, memuse=1.25)
- def testCompressBigmem(self, size):
- text = b"a" * size
- data = bz2.compress(text)
- del text
- text = self.decompress(data)
- self.assertEqual(len(text), size)
- self.assertEqual(text.strip(b"a"), b"")
-
- @bigmemtest(size=_4G, memuse=1.25, dry_run=False)
- def testDecompressBigmem(self, unused_size):
- # Issue #14398: decompression fails when output data is >=2GB.
- text = bz2.decompress(self.DATA_BIGMEM)
- self.assertEqual(len(text), _4G)
- self.assertEqual(text.strip(b"\0"), b"")
+ def testDecompressMultiStream(self):
+ text = bz2.decompress(self.DATA * 5)
+ self.assertEqual(text, self.TEXT * 5)
+
+
+class OpenTest(BaseTest):
+ def test_binary_modes(self):
+ with bz2.open(self.filename, "wb") as f:
+ f.write(self.TEXT)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read())
+ self.assertEqual(file_data, self.TEXT)
+ with bz2.open(self.filename, "rb") as f:
+ self.assertEqual(f.read(), self.TEXT)
+ with bz2.open(self.filename, "ab") as f:
+ f.write(self.TEXT)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read())
+ self.assertEqual(file_data, self.TEXT * 2)
+
+ def test_implicit_binary_modes(self):
+ # Test implicit binary modes (no "b" or "t" in mode string).
+ with bz2.open(self.filename, "w") as f:
+ f.write(self.TEXT)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read())
+ self.assertEqual(file_data, self.TEXT)
+ with bz2.open(self.filename, "r") as f:
+ self.assertEqual(f.read(), self.TEXT)
+ with bz2.open(self.filename, "a") as f:
+ f.write(self.TEXT)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read())
+ self.assertEqual(file_data, self.TEXT * 2)
+
+ def test_text_modes(self):
+ text = self.TEXT.decode("ascii")
+ text_native_eol = text.replace("\n", os.linesep)
+ with bz2.open(self.filename, "wt") as f:
+ f.write(text)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read()).decode("ascii")
+ self.assertEqual(file_data, text_native_eol)
+ with bz2.open(self.filename, "rt") as f:
+ self.assertEqual(f.read(), text)
+ with bz2.open(self.filename, "at") as f:
+ f.write(text)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read()).decode("ascii")
+ self.assertEqual(file_data, text_native_eol * 2)
+
+ def test_fileobj(self):
+ with bz2.open(BytesIO(self.DATA), "r") as f:
+ self.assertEqual(f.read(), self.TEXT)
+ with bz2.open(BytesIO(self.DATA), "rb") as f:
+ self.assertEqual(f.read(), self.TEXT)
+ text = self.TEXT.decode("ascii")
+ with bz2.open(BytesIO(self.DATA), "rt") as f:
+ self.assertEqual(f.read(), text)
+
+ def test_bad_params(self):
+ # Test invalid parameter combinations.
+ with self.assertRaises(ValueError):
+ bz2.open(self.filename, "wbt")
+ with self.assertRaises(ValueError):
+ bz2.open(self.filename, "rb", encoding="utf-8")
+ with self.assertRaises(ValueError):
+ bz2.open(self.filename, "rb", errors="ignore")
+ with self.assertRaises(ValueError):
+ bz2.open(self.filename, "rb", newline="\n")
+
+ def test_encoding(self):
+ # Test non-default encoding.
+ text = self.TEXT.decode("ascii")
+ text_native_eol = text.replace("\n", os.linesep)
+ with bz2.open(self.filename, "wt", encoding="utf-16-le") as f:
+ f.write(text)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read()).decode("utf-16-le")
+ self.assertEqual(file_data, text_native_eol)
+ with bz2.open(self.filename, "rt", encoding="utf-16-le") as f:
+ self.assertEqual(f.read(), text)
+
+ def test_encoding_error_handler(self):
+ # Test with non-default encoding error handler.
+ with bz2.open(self.filename, "wb") as f:
+ f.write(b"foo\xffbar")
+ with bz2.open(self.filename, "rt", encoding="ascii", errors="ignore") \
+ as f:
+ self.assertEqual(f.read(), "foobar")
+
+ def test_newline(self):
+ # Test with explicit newline (universal newline mode disabled).
+ text = self.TEXT.decode("ascii")
+ with bz2.open(self.filename, "wt", newline="\n") as f:
+ f.write(text)
+ with bz2.open(self.filename, "rt", newline="\r") as f:
+ self.assertEqual(f.readlines(), [text])
+
def test_main():
support.run_unittest(
--- /dev/null
- /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
- Do compression in chunks of no more than UINT_MAX bytes each. */
- c->bzs.avail_in = MIN(len, UINT_MAX);
- len -= c->bzs.avail_in;
+/* _bz2 - Low-level Python interface to libbzip2. */
+
+#define PY_SSIZE_T_CLEAN
+
+#include "Python.h"
+#include "structmember.h"
+
+#ifdef WITH_THREAD
+#include "pythread.h"
+#endif
+
+#include <bzlib.h>
+#include <stdio.h>
+
+
+#ifndef BZ_CONFIG_ERROR
+#define BZ2_bzCompress bzCompress
+#define BZ2_bzCompressInit bzCompressInit
+#define BZ2_bzCompressEnd bzCompressEnd
+#define BZ2_bzDecompress bzDecompress
+#define BZ2_bzDecompressInit bzDecompressInit
+#define BZ2_bzDecompressEnd bzDecompressEnd
+#endif /* ! BZ_CONFIG_ERROR */
+
+
+#ifdef WITH_THREAD
+#define ACQUIRE_LOCK(obj) do { \
+ if (!PyThread_acquire_lock((obj)->lock, 0)) { \
+ Py_BEGIN_ALLOW_THREADS \
+ PyThread_acquire_lock((obj)->lock, 1); \
+ Py_END_ALLOW_THREADS \
+ } } while (0)
+#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
+#else
+#define ACQUIRE_LOCK(obj)
+#define RELEASE_LOCK(obj)
+#endif
+
+#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+
+
+typedef struct {
+ PyObject_HEAD
+ bz_stream bzs;
+ int flushed;
+#ifdef WITH_THREAD
+ PyThread_type_lock lock;
+#endif
+} BZ2Compressor;
+
+typedef struct {
+ PyObject_HEAD
+ bz_stream bzs;
+ char eof; /* T_BOOL expects a char */
+ PyObject *unused_data;
+#ifdef WITH_THREAD
+ PyThread_type_lock lock;
+#endif
+} BZ2Decompressor;
+
+
+/* Helper functions. */
+
+static int
+catch_bz2_error(int bzerror)
+{
+ switch(bzerror) {
+ case BZ_OK:
+ case BZ_RUN_OK:
+ case BZ_FLUSH_OK:
+ case BZ_FINISH_OK:
+ case BZ_STREAM_END:
+ return 0;
+
+#ifdef BZ_CONFIG_ERROR
+ case BZ_CONFIG_ERROR:
+ PyErr_SetString(PyExc_SystemError,
+ "libbzip2 was not compiled correctly");
+ return 1;
+#endif
+ case BZ_PARAM_ERROR:
+ PyErr_SetString(PyExc_ValueError,
+ "Internal error - "
+ "invalid parameters passed to libbzip2");
+ return 1;
+ case BZ_MEM_ERROR:
+ PyErr_NoMemory();
+ return 1;
+ case BZ_DATA_ERROR:
+ case BZ_DATA_ERROR_MAGIC:
+ PyErr_SetString(PyExc_IOError, "Invalid data stream");
+ return 1;
+ case BZ_IO_ERROR:
+ PyErr_SetString(PyExc_IOError, "Unknown I/O error");
+ return 1;
+ case BZ_UNEXPECTED_EOF:
+ PyErr_SetString(PyExc_EOFError,
+ "Compressed file ended before the logical "
+ "end-of-stream was detected");
+ return 1;
+ case BZ_SEQUENCE_ERROR:
+ PyErr_SetString(PyExc_RuntimeError,
+ "Internal error - "
+ "Invalid sequence of commands sent to libbzip2");
+ return 1;
+ default:
+ PyErr_Format(PyExc_IOError,
+ "Unrecognized error from libbzip2: %d", bzerror);
+ return 1;
+ }
+}
+
+#if BUFSIZ < 8192
+#define SMALLCHUNK 8192
+#else
+#define SMALLCHUNK BUFSIZ
+#endif
+
+static int
+grow_buffer(PyObject **buf)
+{
+ /* Expand the buffer by an amount proportional to the current size,
+ giving us amortized linear-time behavior. Use a less-than-double
+ growth factor to avoid excessive allocation. */
+ size_t size = PyBytes_GET_SIZE(*buf);
+ size_t new_size = size + (size >> 3) + 6;
+ if (new_size > size) {
+ return _PyBytes_Resize(buf, new_size);
+ } else { /* overflow */
+ PyErr_SetString(PyExc_OverflowError,
+ "Unable to allocate buffer - output too large");
+ return -1;
+ }
+}
+
+
+/* BZ2Compressor class. */
+
+static PyObject *
+compress(BZ2Compressor *c, char *data, size_t len, int action)
+{
+ size_t data_size = 0;
+ PyObject *result;
+
+ result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
+ if (result == NULL)
+ return NULL;
++
+ c->bzs.next_in = data;
- Py_BEGIN_ALLOW_THREADS
- this_out = c->bzs.next_out;
- bzerror = BZ2_bzCompress(&c->bzs, action);
- data_size += c->bzs.next_out - this_out;
- Py_END_ALLOW_THREADS
- if (catch_bz2_error(bzerror))
- goto error;
-
++ c->bzs.avail_in = 0;
+ c->bzs.next_out = PyBytes_AS_STRING(result);
+ c->bzs.avail_out = PyBytes_GET_SIZE(result);
+ for (;;) {
+ char *this_out;
+ int bzerror;
+
- /* In regular compression mode, stop when input data is exhausted.
- In flushing mode, stop when all buffered data has been flushed. */
- if ((action == BZ_RUN && c->bzs.avail_in == 0) ||
- (action == BZ_FINISH && bzerror == BZ_STREAM_END))
++ /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
++ Do compression in chunks of no more than UINT_MAX bytes each. */
+ if (c->bzs.avail_in == 0 && len > 0) {
+ c->bzs.avail_in = MIN(len, UINT_MAX);
+ len -= c->bzs.avail_in;
+ }
+
++ /* In regular compression mode, stop when input data is exhausted. */
++ if (action == BZ_RUN && c->bzs.avail_in == 0)
+ break;
+
+ if (c->bzs.avail_out == 0) {
+ size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
+ if (buffer_left == 0) {
+ if (grow_buffer(&result) < 0)
+ goto error;
+ c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
+ buffer_left = PyBytes_GET_SIZE(result) - data_size;
+ }
+ c->bzs.avail_out = MIN(buffer_left, UINT_MAX);
+ }
++
++ Py_BEGIN_ALLOW_THREADS
++ this_out = c->bzs.next_out;
++ bzerror = BZ2_bzCompress(&c->bzs, action);
++ data_size += c->bzs.next_out - this_out;
++ Py_END_ALLOW_THREADS
++ if (catch_bz2_error(bzerror))
++ goto error;
++
++ /* In flushing mode, stop when all buffered data has been flushed. */
++ if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
++ break;
+ }
+ if (data_size != PyBytes_GET_SIZE(result))
+ if (_PyBytes_Resize(&result, data_size) < 0)
+ goto error;
+ return result;
+
+error:
+ Py_XDECREF(result);
+ return NULL;
+}
+
+PyDoc_STRVAR(BZ2Compressor_compress__doc__,
+"compress(data) -> bytes\n"
+"\n"
+"Provide data to the compressor object. Returns a chunk of\n"
+"compressed data if possible, or b'' otherwise.\n"
+"\n"
+"When you have finished providing data to the compressor, call the\n"
+"flush() method to finish the compression process.\n");
+
+static PyObject *
+BZ2Compressor_compress(BZ2Compressor *self, PyObject *args)
+{
+ Py_buffer buffer;
+ PyObject *result = NULL;
+
+ if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
+ return NULL;
+
+ ACQUIRE_LOCK(self);
+ if (self->flushed)
+ PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
+ else
+ result = compress(self, buffer.buf, buffer.len, BZ_RUN);
+ RELEASE_LOCK(self);
+ PyBuffer_Release(&buffer);
+ return result;
+}
+
+PyDoc_STRVAR(BZ2Compressor_flush__doc__,
+"flush() -> bytes\n"
+"\n"
+"Finish the compression process. Returns the compressed data left\n"
+"in internal buffers.\n"
+"\n"
+"The compressor object may not be used after this method is called.\n");
+
+static PyObject *
+BZ2Compressor_flush(BZ2Compressor *self, PyObject *noargs)
+{
+ PyObject *result = NULL;
+
+ ACQUIRE_LOCK(self);
+ if (self->flushed)
+ PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
+ else {
+ self->flushed = 1;
+ result = compress(self, NULL, 0, BZ_FINISH);
+ }
+ RELEASE_LOCK(self);
+ return result;
+}
+
+static int
+BZ2Compressor_init(BZ2Compressor *self, PyObject *args, PyObject *kwargs)
+{
+ int compresslevel = 9;
+ int bzerror;
+
+ if (!PyArg_ParseTuple(args, "|i:BZ2Compressor", &compresslevel))
+ return -1;
+ if (!(1 <= compresslevel && compresslevel <= 9)) {
+ PyErr_SetString(PyExc_ValueError,
+ "compresslevel must be between 1 and 9");
+ return -1;
+ }
+
+#ifdef WITH_THREAD
+ self->lock = PyThread_allocate_lock();
+ if (self->lock == NULL) {
+ PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
+ return -1;
+ }
+#endif
+
+ bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
+ if (catch_bz2_error(bzerror))
+ goto error;
+
+ return 0;
+
+error:
+#ifdef WITH_THREAD
+ PyThread_free_lock(self->lock);
+ self->lock = NULL;
+#endif
+ return -1;
+}
+
+static void
+BZ2Compressor_dealloc(BZ2Compressor *self)
+{
+ BZ2_bzCompressEnd(&self->bzs);
+#ifdef WITH_THREAD
+ if (self->lock != NULL)
+ PyThread_free_lock(self->lock);
+#endif
+ Py_TYPE(self)->tp_free((PyObject *)self);
+}
+
+static PyMethodDef BZ2Compressor_methods[] = {
+ {"compress", (PyCFunction)BZ2Compressor_compress, METH_VARARGS,
+ BZ2Compressor_compress__doc__},
+ {"flush", (PyCFunction)BZ2Compressor_flush, METH_NOARGS,
+ BZ2Compressor_flush__doc__},
+ {NULL}
+};
+
+PyDoc_STRVAR(BZ2Compressor__doc__,
+"BZ2Compressor(compresslevel=9)\n"
+"\n"
+"Create a compressor object for compressing data incrementally.\n"
+"\n"
+"compresslevel, if given, must be a number between 1 and 9.\n"
+"\n"
+"For one-shot compression, use the compress() function instead.\n");
+
+static PyTypeObject BZ2Compressor_Type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "_bz2.BZ2Compressor", /* tp_name */
+ sizeof(BZ2Compressor), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ BZ2Compressor__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ BZ2Compressor_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)BZ2Compressor_init, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+
+/* BZ2Decompressor class. */
+
+static PyObject *
+decompress(BZ2Decompressor *d, char *data, size_t len)
+{
+ size_t data_size = 0;
+ PyObject *result;
+
+ result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
+ if (result == NULL)
+ return result;
+ d->bzs.next_in = data;
+ /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
+ Do decompression in chunks of no more than UINT_MAX bytes each. */
+ d->bzs.avail_in = MIN(len, UINT_MAX);
+ len -= d->bzs.avail_in;
+ d->bzs.next_out = PyBytes_AS_STRING(result);
+ d->bzs.avail_out = PyBytes_GET_SIZE(result);
+ for (;;) {
+ char *this_out;
+ int bzerror;
+
+ Py_BEGIN_ALLOW_THREADS
+ this_out = d->bzs.next_out;
+ bzerror = BZ2_bzDecompress(&d->bzs);
+ data_size += d->bzs.next_out - this_out;
+ Py_END_ALLOW_THREADS
+ if (catch_bz2_error(bzerror))
+ goto error;
+ if (bzerror == BZ_STREAM_END) {
+ d->eof = 1;
+ len += d->bzs.avail_in;
+ if (len > 0) { /* Save leftover input to unused_data */
+ Py_CLEAR(d->unused_data);
+ d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len);
+ if (d->unused_data == NULL)
+ goto error;
+ }
+ break;
+ }
+ if (d->bzs.avail_in == 0) {
+ if (len == 0)
+ break;
+ d->bzs.avail_in = MIN(len, UINT_MAX);
+ len -= d->bzs.avail_in;
+ }
+ if (d->bzs.avail_out == 0) {
+ size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
+ if (buffer_left == 0) {
+ if (grow_buffer(&result) < 0)
+ goto error;
+ d->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
+ buffer_left = PyBytes_GET_SIZE(result) - data_size;
+ }
+ d->bzs.avail_out = MIN(buffer_left, UINT_MAX);
+ }
+ }
+ if (data_size != PyBytes_GET_SIZE(result))
+ if (_PyBytes_Resize(&result, data_size) < 0)
+ goto error;
+ return result;
+
+error:
+ Py_XDECREF(result);
+ return NULL;
+}
+
+PyDoc_STRVAR(BZ2Decompressor_decompress__doc__,
+"decompress(data) -> bytes\n"
+"\n"
+"Provide data to the decompressor object. Returns a chunk of\n"
+"decompressed data if possible, or b'' otherwise.\n"
+"\n"
+"Attempting to decompress data after the end of stream is reached\n"
+"raises an EOFError. Any data found after the end of the stream\n"
+"is ignored and saved in the unused_data attribute.\n");
+
+static PyObject *
+BZ2Decompressor_decompress(BZ2Decompressor *self, PyObject *args)
+{
+ Py_buffer buffer;
+ PyObject *result = NULL;
+
+ if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
+ return NULL;
+
+ ACQUIRE_LOCK(self);
+ if (self->eof)
+ PyErr_SetString(PyExc_EOFError, "End of stream already reached");
+ else
+ result = decompress(self, buffer.buf, buffer.len);
+ RELEASE_LOCK(self);
+ PyBuffer_Release(&buffer);
+ return result;
+}
+
+static int
+BZ2Decompressor_init(BZ2Decompressor *self, PyObject *args, PyObject *kwargs)
+{
+ int bzerror;
+
+ if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
+ return -1;
+
+#ifdef WITH_THREAD
+ self->lock = PyThread_allocate_lock();
+ if (self->lock == NULL) {
+ PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
+ return -1;
+ }
+#endif
+
+ self->unused_data = PyBytes_FromStringAndSize("", 0);
+ if (self->unused_data == NULL)
+ goto error;
+
+ bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
+ if (catch_bz2_error(bzerror))
+ goto error;
+
+ return 0;
+
+error:
+ Py_CLEAR(self->unused_data);
+#ifdef WITH_THREAD
+ PyThread_free_lock(self->lock);
+ self->lock = NULL;
+#endif
+ return -1;
+}
+
+static void
+BZ2Decompressor_dealloc(BZ2Decompressor *self)
+{
+ BZ2_bzDecompressEnd(&self->bzs);
+ Py_CLEAR(self->unused_data);
+#ifdef WITH_THREAD
+ if (self->lock != NULL)
+ PyThread_free_lock(self->lock);
+#endif
+ Py_TYPE(self)->tp_free((PyObject *)self);
+}
+
+static PyMethodDef BZ2Decompressor_methods[] = {
+ {"decompress", (PyCFunction)BZ2Decompressor_decompress, METH_VARARGS,
+ BZ2Decompressor_decompress__doc__},
+ {NULL}
+};
+
+PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
+"True if the end-of-stream marker has been reached.");
+
+PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
+"Data found after the end of the compressed stream.");
+
+static PyMemberDef BZ2Decompressor_members[] = {
+ {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
+ READONLY, BZ2Decompressor_eof__doc__},
+ {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
+ READONLY, BZ2Decompressor_unused_data__doc__},
+ {NULL}
+};
+
+PyDoc_STRVAR(BZ2Decompressor__doc__,
+"BZ2Decompressor()\n"
+"\n"
+"Create a decompressor object for decompressing data incrementally.\n"
+"\n"
+"For one-shot decompression, use the decompress() function instead.\n");
+
+static PyTypeObject BZ2Decompressor_Type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "_bz2.BZ2Decompressor", /* tp_name */
+ sizeof(BZ2Decompressor), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ BZ2Decompressor__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ BZ2Decompressor_methods, /* tp_methods */
+ BZ2Decompressor_members, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)BZ2Decompressor_init, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+
+/* Module initialization. */
+
+static struct PyModuleDef _bz2module = {
+ PyModuleDef_HEAD_INIT,
+ "_bz2",
+ NULL,
+ -1,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
+PyMODINIT_FUNC
+PyInit__bz2(void)
+{
+ PyObject *m;
+
+ if (PyType_Ready(&BZ2Compressor_Type) < 0)
+ return NULL;
+ if (PyType_Ready(&BZ2Decompressor_Type) < 0)
+ return NULL;
+
+ m = PyModule_Create(&_bz2module);
+ if (m == NULL)
+ return NULL;
+
+ Py_INCREF(&BZ2Compressor_Type);
+ PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
+
+ Py_INCREF(&BZ2Decompressor_Type);
+ PyModule_AddObject(m, "BZ2Decompressor",
+ (PyObject *)&BZ2Decompressor_Type);
+
+ return m;
+}