]> granicus.if.org Git - python/commitdiff
Issue #5506: BytesIO objects now have a getbuffer() method exporting a
authorAntoine Pitrou <solipsis@pitrou.net>
Mon, 6 Sep 2010 18:48:21 +0000 (18:48 +0000)
committerAntoine Pitrou <solipsis@pitrou.net>
Mon, 6 Sep 2010 18:48:21 +0000 (18:48 +0000)
view of their contents without duplicating them.  The view is both readable
and writable.

Doc/library/io.rst
Lib/_pyio.py
Lib/test/test_memoryio.py
Misc/NEWS
Modules/_io/_iomodule.c
Modules/_io/_iomodule.h
Modules/_io/bytesio.c

index 2476acc6c7e8f9994fbdee9905708693c94d9a4a..e61aa9043274515e8064d8f618cde70d7b64838b 100644 (file)
@@ -518,6 +518,24 @@ In many situations, buffered I/O streams will provide higher performance
    :class:`BytesIO` provides or overrides these methods in addition to those
    from :class:`BufferedIOBase` and :class:`IOBase`:
 
+   .. method:: getbuffer()
+
+      Return a readable and writable view over the contents of the buffer
+      without copying them.  Also, mutating the view will transparently
+      update the contents of the buffer::
+
+         >>> b = io.BytesIO(b"abcdef")
+         >>> view = b.getbuffer()
+         >>> view[2:4] = b"56"
+         >>> b.getvalue()
+         b'ab56ef'
+
+      .. note::
+         As long as the view exists, the :class:`BytesIO` object cannot be
+         resized.
+
+      .. versionadded:: 3.2
+
    .. method:: getvalue()
 
       Return ``bytes`` containing the entire contents of the buffer.
index 12ae4b6a2be5620ea1f929bb1dfe1f898d2d32ae..6b2564008cf3cc0a644a14d3dfba5b0891d897c9 100644 (file)
@@ -785,6 +785,11 @@ class BytesIO(BufferedIOBase):
             raise ValueError("getvalue on closed file")
         return bytes(self._buffer)
 
+    def getbuffer(self):
+        """Return a readable and writable view of the buffer.
+        """
+        return memoryview(self._buffer)
+
     def read(self, n=None):
         if self.closed:
             raise ValueError("read from closed file")
index 0decda5ebc9402cf8f61a135127a4d71e36e4bd0..dcf6d51575cfad1daba667be6f63c852cc53b027 100644 (file)
@@ -384,7 +384,31 @@ class MemoryTestMixin:
         del __main__.PickleTestMemIO
 
 
-class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin, unittest.TestCase):
+class BytesIOMixin:
+
+    def test_getbuffer(self):
+        memio = self.ioclass(b"1234567890")
+        buf = memio.getbuffer()
+        self.assertEqual(bytes(buf), b"1234567890")
+        memio.seek(5)
+        buf = memio.getbuffer()
+        self.assertEqual(bytes(buf), b"1234567890")
+        # Trying to change the size of the BytesIO while a buffer is exported
+        # raises a BufferError.
+        self.assertRaises(BufferError, memio.write, b'x' * 100)
+        self.assertRaises(BufferError, memio.truncate)
+        # Mutating the buffer updates the BytesIO
+        buf[3:6] = b"abc"
+        self.assertEqual(bytes(buf), b"123abc7890")
+        self.assertEqual(memio.getvalue(), b"123abc7890")
+        # After the buffer gets released, we can resize the BytesIO again
+        del buf
+        support.gc_collect()
+        memio.truncate()
+
+
+class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin,
+                    BytesIOMixin, unittest.TestCase):
 
     UnsupportedOperation = pyio.UnsupportedOperation
 
index 1ac1dec3f49fef05477c269132ac08b1ed5fcb82..a64f69494d64b502dee14400770f7550beeef3fb 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #5506: BytesIO objects now have a getbuffer() method exporting a
+  view of their contents without duplicating them.  The view is both readable
+  and writable.
+
 - Issue #7566: Implement os.path.sameopenfile for Windows.
 
 - Issue #9293: I/O streams now raise ``io.UnsupportedOperation`` when an
index 733a7b95d7252e1ea31dac54465eab57baf2d273..c0c81546542b184bb819fbb23d7703efed3f43a3 100644 (file)
@@ -710,6 +710,8 @@ PyInit__io(void)
     /* BytesIO */
     PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type;
     ADD_TYPE(&PyBytesIO_Type, "BytesIO");
+    if (PyType_Ready(&_PyBytesIOBuffer_Type) < 0)
+        goto fail;
 
     /* StringIO */
     PyStringIO_Type.tp_base = &PyTextIOBase_Type;
index 2b8e8a1d4b0d639cb55fe40b517baed3794531be..925e4f2cc72a11ef5502472d49b74b274e5be22a 100644 (file)
@@ -169,3 +169,5 @@ extern PyObject *_PyIO_str_write;
 extern PyObject *_PyIO_empty_str;
 extern PyObject *_PyIO_empty_bytes;
 extern PyObject *_PyIO_zero;
+
+extern PyTypeObject _PyBytesIOBuffer_Type;
index 3ef9e2e26ff4f12c3d632149d351fca419b8e01c..c5654040b45023c5e68b7c7eeed794f8af0a52fe 100644 (file)
@@ -10,8 +10,15 @@ typedef struct {
     size_t buf_size;
     PyObject *dict;
     PyObject *weakreflist;
+    Py_ssize_t exports;
 } bytesio;
 
+typedef struct {
+    PyObject_HEAD
+    bytesio *source;
+} bytesiobuf;
+
+
 #define CHECK_CLOSED(self)                                  \
     if ((self)->buf == NULL) {                              \
         PyErr_SetString(PyExc_ValueError,                   \
@@ -19,6 +26,14 @@ typedef struct {
         return NULL;                                        \
     }
 
+#define CHECK_EXPORTS(self) \
+    if ((self)->exports > 0) { \
+        PyErr_SetString(PyExc_BufferError, \
+                        "Existing exports of data: object cannot be re-sized"); \
+        return NULL; \
+    }
+
+
 /* Internal routine to get a line from the buffer of a BytesIO
    object. Returns the length between the current position to the
    next newline character. */
@@ -173,6 +188,30 @@ bytesio_flush(bytesio *self)
     Py_RETURN_NONE;
 }
 
+PyDoc_STRVAR(getbuffer_doc,
+"getbuffer() -> bytes.\n"
+"\n"
+"Get a read-write view over the contents of the BytesIO object.");
+
+static PyObject *
+bytesio_getbuffer(bytesio *self)
+{
+    PyTypeObject *type = &_PyBytesIOBuffer_Type;
+    bytesiobuf *buf;
+    PyObject *view;
+
+    CHECK_CLOSED(self);
+
+    buf = (bytesiobuf *) type->tp_alloc(type, 0);
+    if (buf == NULL)
+        return NULL;
+    Py_INCREF(self);
+    buf->source = self;
+    view = PyMemoryView_FromObject((PyObject *) buf);
+    Py_DECREF(buf);
+    return view;
+}
+
 PyDoc_STRVAR(getval_doc,
 "getvalue() -> bytes.\n"
 "\n"
@@ -422,6 +461,7 @@ bytesio_truncate(bytesio *self, PyObject *args)
     PyObject *arg = Py_None;
 
     CHECK_CLOSED(self);
+    CHECK_EXPORTS(self);
 
     if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
         return NULL;
@@ -543,6 +583,7 @@ bytesio_write(bytesio *self, PyObject *obj)
     PyObject *result = NULL;
 
     CHECK_CLOSED(self);
+    CHECK_EXPORTS(self);
 
     if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0)
         return NULL;
@@ -664,6 +705,7 @@ bytesio_setstate(bytesio *self, PyObject *state)
                      Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
         return NULL;
     }
+    CHECK_EXPORTS(self);
     /* Reset the object to its default state. This is only needed to handle
        the case of repeated calls to __setstate__. */
     self->string_size = 0;
@@ -724,6 +766,11 @@ static void
 bytesio_dealloc(bytesio *self)
 {
     _PyObject_GC_UNTRACK(self);
+    if (self->exports > 0) {
+        PyErr_SetString(PyExc_SystemError,
+                        "deallocated BytesIO object has exported buffers");
+        PyErr_Print();
+    }
     if (self->buf != NULL) {
         PyMem_Free(self->buf);
         self->buf = NULL;
@@ -818,6 +865,7 @@ static struct PyMethodDef bytesio_methods[] = {
     {"readline",   (PyCFunction)bytesio_readline,   METH_VARARGS, readline_doc},
     {"readlines",  (PyCFunction)bytesio_readlines,  METH_VARARGS, readlines_doc},
     {"read",       (PyCFunction)bytesio_read,       METH_VARARGS, read_doc},
+    {"getbuffer",  (PyCFunction)bytesio_getbuffer,  METH_NOARGS,  getbuffer_doc},
     {"getvalue",   (PyCFunction)bytesio_getvalue,   METH_NOARGS,  getval_doc},
     {"seek",       (PyCFunction)bytesio_seek,       METH_VARARGS, seek_doc},
     {"truncate",   (PyCFunction)bytesio_truncate,   METH_VARARGS, truncate_doc},
@@ -873,3 +921,96 @@ PyTypeObject PyBytesIO_Type = {
     0,                                         /*tp_alloc*/
     bytesio_new,                               /*tp_new*/
 };
+
+
+/*
+ * Implementation of the small intermediate object used by getbuffer().
+ * getbuffer() returns a memoryview over this object, which should make it
+ * invisible from Python code.
+ */
+
+static int
+bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags)
+{
+    int ret;
+    void *ptr;
+    bytesio *b = (bytesio *) obj->source;
+    if (view == NULL) {
+        b->exports++;
+        return 0;
+    }
+    ptr = (void *) obj;
+    ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size,
+                            0, flags);
+    if (ret >= 0) {
+        b->exports++;
+    }
+    return ret;
+}
+
+static void
+bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view)
+{
+    bytesio *b = (bytesio *) obj->source;
+    b->exports--;
+}
+
+static int
+bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg)
+{
+    Py_VISIT(self->source);
+    return 0;
+}
+
+static void
+bytesiobuf_dealloc(bytesiobuf *self)
+{
+    Py_CLEAR(self->source);
+    Py_TYPE(self)->tp_free(self);
+}
+
+static PyBufferProcs bytesiobuf_as_buffer = {
+    (getbufferproc) bytesiobuf_getbuffer,
+    (releasebufferproc) bytesiobuf_releasebuffer,
+};
+
+PyTypeObject _PyBytesIOBuffer_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_io._BytesIOBuffer",                      /*tp_name*/
+    sizeof(bytesiobuf),                        /*tp_basicsize*/
+    0,                                         /*tp_itemsize*/
+    (destructor)bytesiobuf_dealloc,            /*tp_dealloc*/
+    0,                                         /*tp_print*/
+    0,                                         /*tp_getattr*/
+    0,                                         /*tp_setattr*/
+    0,                                         /*tp_reserved*/
+    0,                                         /*tp_repr*/
+    0,                                         /*tp_as_number*/
+    0,                                         /*tp_as_sequence*/
+    0,                                         /*tp_as_mapping*/
+    0,                                         /*tp_hash*/
+    0,                                         /*tp_call*/
+    0,                                         /*tp_str*/
+    0,                                         /*tp_getattro*/
+    0,                                         /*tp_setattro*/
+    &bytesiobuf_as_buffer,                     /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
+    0,                                         /*tp_doc*/
+    (traverseproc)bytesiobuf_traverse,         /*tp_traverse*/
+    0,                                         /*tp_clear*/
+    0,                                         /*tp_richcompare*/
+    0,                                         /*tp_weaklistoffset*/
+    0,                                         /*tp_iter*/
+    0,                                         /*tp_iternext*/
+    0,                                         /*tp_methods*/
+    0,                                         /*tp_members*/
+    0,                                         /*tp_getset*/
+    0,                                         /*tp_base*/
+    0,                                         /*tp_dict*/
+    0,                                         /*tp_descr_get*/
+    0,                                         /*tp_descr_set*/
+    0,                                         /*tp_dictoffset*/
+    0,                                         /*tp_init*/
+    0,                                         /*tp_alloc*/
+    0,                                         /*tp_new*/
+};