From 972ee13e037432497fa003d4a786b2342a38db94 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 6 Sep 2010 18:48:21 +0000 Subject: [PATCH] Issue #5506: BytesIO objects now have a getbuffer() method exporting a view of their contents without duplicating them. The view is both readable and writable. --- Doc/library/io.rst | 18 +++++ Lib/_pyio.py | 5 ++ Lib/test/test_memoryio.py | 26 ++++++- Misc/NEWS | 4 ++ Modules/_io/_iomodule.c | 2 + Modules/_io/_iomodule.h | 2 + Modules/_io/bytesio.c | 141 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 197 insertions(+), 1 deletion(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 2476acc6c7..e61aa90432 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -518,6 +518,24 @@ In many situations, buffered I/O streams will provide higher performance :class:`BytesIO` provides or overrides these methods in addition to those from :class:`BufferedIOBase` and :class:`IOBase`: + .. method:: getbuffer() + + Return a readable and writable view over the contents of the buffer + without copying them. Also, mutating the view will transparently + update the contents of the buffer:: + + >>> b = io.BytesIO(b"abcdef") + >>> view = b.getbuffer() + >>> view[2:4] = b"56" + >>> b.getvalue() + b'ab56ef' + + .. note:: + As long as the view exists, the :class:`BytesIO` object cannot be + resized. + + .. versionadded:: 3.2 + .. method:: getvalue() Return ``bytes`` containing the entire contents of the buffer. diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 12ae4b6a2b..6b2564008c 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -785,6 +785,11 @@ class BytesIO(BufferedIOBase): raise ValueError("getvalue on closed file") return bytes(self._buffer) + def getbuffer(self): + """Return a readable and writable view of the buffer. + """ + return memoryview(self._buffer) + def read(self, n=None): if self.closed: raise ValueError("read from closed file") diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 0decda5ebc..dcf6d51575 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -384,7 +384,31 @@ class MemoryTestMixin: del __main__.PickleTestMemIO -class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin, unittest.TestCase): +class BytesIOMixin: + + def test_getbuffer(self): + memio = self.ioclass(b"1234567890") + buf = memio.getbuffer() + self.assertEqual(bytes(buf), b"1234567890") + memio.seek(5) + buf = memio.getbuffer() + self.assertEqual(bytes(buf), b"1234567890") + # Trying to change the size of the BytesIO while a buffer is exported + # raises a BufferError. + self.assertRaises(BufferError, memio.write, b'x' * 100) + self.assertRaises(BufferError, memio.truncate) + # Mutating the buffer updates the BytesIO + buf[3:6] = b"abc" + self.assertEqual(bytes(buf), b"123abc7890") + self.assertEqual(memio.getvalue(), b"123abc7890") + # After the buffer gets released, we can resize the BytesIO again + del buf + support.gc_collect() + memio.truncate() + + +class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin, + BytesIOMixin, unittest.TestCase): UnsupportedOperation = pyio.UnsupportedOperation diff --git a/Misc/NEWS b/Misc/NEWS index 1ac1dec3f4..a64f69494d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -13,6 +13,10 @@ Core and Builtins Library ------- +- Issue #5506: BytesIO objects now have a getbuffer() method exporting a + view of their contents without duplicating them. The view is both readable + and writable. + - Issue #7566: Implement os.path.sameopenfile for Windows. - Issue #9293: I/O streams now raise ``io.UnsupportedOperation`` when an diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 733a7b95d7..c0c8154654 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -710,6 +710,8 @@ PyInit__io(void) /* BytesIO */ PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type; ADD_TYPE(&PyBytesIO_Type, "BytesIO"); + if (PyType_Ready(&_PyBytesIOBuffer_Type) < 0) + goto fail; /* StringIO */ PyStringIO_Type.tp_base = &PyTextIOBase_Type; diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index 2b8e8a1d4b..925e4f2cc7 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -169,3 +169,5 @@ extern PyObject *_PyIO_str_write; extern PyObject *_PyIO_empty_str; extern PyObject *_PyIO_empty_bytes; extern PyObject *_PyIO_zero; + +extern PyTypeObject _PyBytesIOBuffer_Type; diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 3ef9e2e26f..c5654040b4 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -10,8 +10,15 @@ typedef struct { size_t buf_size; PyObject *dict; PyObject *weakreflist; + Py_ssize_t exports; } bytesio; +typedef struct { + PyObject_HEAD + bytesio *source; +} bytesiobuf; + + #define CHECK_CLOSED(self) \ if ((self)->buf == NULL) { \ PyErr_SetString(PyExc_ValueError, \ @@ -19,6 +26,14 @@ typedef struct { return NULL; \ } +#define CHECK_EXPORTS(self) \ + if ((self)->exports > 0) { \ + PyErr_SetString(PyExc_BufferError, \ + "Existing exports of data: object cannot be re-sized"); \ + return NULL; \ + } + + /* Internal routine to get a line from the buffer of a BytesIO object. Returns the length between the current position to the next newline character. */ @@ -173,6 +188,30 @@ bytesio_flush(bytesio *self) Py_RETURN_NONE; } +PyDoc_STRVAR(getbuffer_doc, +"getbuffer() -> bytes.\n" +"\n" +"Get a read-write view over the contents of the BytesIO object."); + +static PyObject * +bytesio_getbuffer(bytesio *self) +{ + PyTypeObject *type = &_PyBytesIOBuffer_Type; + bytesiobuf *buf; + PyObject *view; + + CHECK_CLOSED(self); + + buf = (bytesiobuf *) type->tp_alloc(type, 0); + if (buf == NULL) + return NULL; + Py_INCREF(self); + buf->source = self; + view = PyMemoryView_FromObject((PyObject *) buf); + Py_DECREF(buf); + return view; +} + PyDoc_STRVAR(getval_doc, "getvalue() -> bytes.\n" "\n" @@ -422,6 +461,7 @@ bytesio_truncate(bytesio *self, PyObject *args) PyObject *arg = Py_None; CHECK_CLOSED(self); + CHECK_EXPORTS(self); if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) return NULL; @@ -543,6 +583,7 @@ bytesio_write(bytesio *self, PyObject *obj) PyObject *result = NULL; CHECK_CLOSED(self); + CHECK_EXPORTS(self); if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0) return NULL; @@ -664,6 +705,7 @@ bytesio_setstate(bytesio *self, PyObject *state) Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); return NULL; } + CHECK_EXPORTS(self); /* Reset the object to its default state. This is only needed to handle the case of repeated calls to __setstate__. */ self->string_size = 0; @@ -724,6 +766,11 @@ static void bytesio_dealloc(bytesio *self) { _PyObject_GC_UNTRACK(self); + if (self->exports > 0) { + PyErr_SetString(PyExc_SystemError, + "deallocated BytesIO object has exported buffers"); + PyErr_Print(); + } if (self->buf != NULL) { PyMem_Free(self->buf); self->buf = NULL; @@ -818,6 +865,7 @@ static struct PyMethodDef bytesio_methods[] = { {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc}, {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc}, {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc}, + {"getbuffer", (PyCFunction)bytesio_getbuffer, METH_NOARGS, getbuffer_doc}, {"getvalue", (PyCFunction)bytesio_getvalue, METH_NOARGS, getval_doc}, {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc}, {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc}, @@ -873,3 +921,96 @@ PyTypeObject PyBytesIO_Type = { 0, /*tp_alloc*/ bytesio_new, /*tp_new*/ }; + + +/* + * Implementation of the small intermediate object used by getbuffer(). + * getbuffer() returns a memoryview over this object, which should make it + * invisible from Python code. + */ + +static int +bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags) +{ + int ret; + void *ptr; + bytesio *b = (bytesio *) obj->source; + if (view == NULL) { + b->exports++; + return 0; + } + ptr = (void *) obj; + ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size, + 0, flags); + if (ret >= 0) { + b->exports++; + } + return ret; +} + +static void +bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view) +{ + bytesio *b = (bytesio *) obj->source; + b->exports--; +} + +static int +bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg) +{ + Py_VISIT(self->source); + return 0; +} + +static void +bytesiobuf_dealloc(bytesiobuf *self) +{ + Py_CLEAR(self->source); + Py_TYPE(self)->tp_free(self); +} + +static PyBufferProcs bytesiobuf_as_buffer = { + (getbufferproc) bytesiobuf_getbuffer, + (releasebufferproc) bytesiobuf_releasebuffer, +}; + +PyTypeObject _PyBytesIOBuffer_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._BytesIOBuffer", /*tp_name*/ + sizeof(bytesiobuf), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)bytesiobuf_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + &bytesiobuf_as_buffer, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /*tp_doc*/ + (traverseproc)bytesiobuf_traverse, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + 0, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + 0, /*tp_init*/ + 0, /*tp_alloc*/ + 0, /*tp_new*/ +}; -- 2.40.0