]> granicus.if.org Git - python/commitdiff
bpo-36785: PEP 574 implementation (GH-7076)
authorAntoine Pitrou <antoine@python.org>
Sun, 26 May 2019 15:10:09 +0000 (17:10 +0200)
committerGitHub <noreply@github.com>
Sun, 26 May 2019 15:10:09 +0000 (17:10 +0200)
19 files changed:
Doc/library/pickle.rst
Include/Python.h
Include/picklebufobject.h [new file with mode: 0644]
Lib/pickle.py
Lib/pickletools.py
Lib/test/pickletester.py
Lib/test/test_inspect.py
Lib/test/test_pickle.py
Lib/test/test_picklebuffer.py [new file with mode: 0644]
Lib/test/test_pickletools.py
Lib/test/test_pyclbr.py
Makefile.pre.in
Misc/NEWS.d/next/Library/2019-05-03-20-47-55.bpo-36785.PQLnPq.rst [new file with mode: 0644]
Modules/_pickle.c
Modules/clinic/_pickle.c.h
Objects/object.c
Objects/picklebufobject.c [new file with mode: 0644]
PCbuild/pythoncore.vcxproj
PCbuild/pythoncore.vcxproj.filters

index f4c41ac68d2f7997004f2a59a6ff05cccc0ee187..6aa30492c7060cc0e74f79924c0cadc822479673 100644 (file)
@@ -195,34 +195,29 @@ The :mod:`pickle` module provides the following constants:
 The :mod:`pickle` module provides the following functions to make the pickling
 process more convenient:
 
-.. function:: dump(obj, file, protocol=None, \*, fix_imports=True)
+.. function:: dump(obj, file, protocol=None, \*, fix_imports=True, buffer_callback=None)
 
    Write a pickled representation of *obj* to the open :term:`file object` *file*.
    This is equivalent to ``Pickler(file, protocol).dump(obj)``.
 
-   The optional *protocol* argument, an integer, tells the pickler to use
-   the given protocol; supported protocols are 0 to :data:`HIGHEST_PROTOCOL`.
-   If not specified, the default is :data:`DEFAULT_PROTOCOL`.  If a negative
-   number is specified, :data:`HIGHEST_PROTOCOL` is selected.
+   Arguments *file*, *protocol*, *fix_imports* and *buffer_callback* have
+   the same meaning as in the :class:`Pickler` constructor.
 
-   The *file* argument must have a write() method that accepts a single bytes
-   argument.  It can thus be an on-disk file opened for binary writing, an
-   :class:`io.BytesIO` instance, or any other custom object that meets this
-   interface.
-
-   If *fix_imports* is true and *protocol* is less than 3, pickle will try to
-   map the new Python 3 names to the old module names used in Python 2, so
-   that the pickle data stream is readable with Python 2.
+   .. versionchanged:: 3.8
+      The *buffer_callback* argument was added.
 
-.. function:: dumps(obj, protocol=None, \*, fix_imports=True)
+.. function:: dumps(obj, protocol=None, \*, fix_imports=True, buffer_callback=None)
 
    Return the pickled representation of the object as a :class:`bytes` object,
    instead of writing it to a file.
 
-   Arguments *protocol* and *fix_imports* have the same meaning as in
-   :func:`dump`.
+   Arguments *protocol*, *fix_imports* and *buffer_callback* have the same
+   meaning as in the :class:`Pickler` constructor.
+
+   .. versionchanged:: 3.8
+      The *buffer_callback* argument was added.
 
-.. function:: load(file, \*, fix_imports=True, encoding="ASCII", errors="strict")
+.. function:: load(file, \*, fix_imports=True, encoding="ASCII", errors="strict", buffers=None)
 
    Read a pickled object representation from the open :term:`file object`
    *file* and return the reconstituted object hierarchy specified therein.
@@ -232,24 +227,13 @@ process more convenient:
    protocol argument is needed.  Bytes past the pickled object's
    representation are ignored.
 
-   The argument *file* must have two methods, a read() method that takes an
-   integer argument, and a readline() method that requires no arguments.  Both
-   methods should return bytes.  Thus *file* can be an on-disk file opened for
-   binary reading, an :class:`io.BytesIO` object, or any other custom object
-   that meets this interface.
-
-   Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
-   which are used to control compatibility support for pickle stream generated
-   by Python 2.  If *fix_imports* is true, pickle will try to map the old
-   Python 2 names to the new names used in Python 3.  The *encoding* and
-   *errors* tell pickle how to decode 8-bit string instances pickled by Python
-   2; these default to 'ASCII' and 'strict', respectively.  The *encoding* can
-   be 'bytes' to read these 8-bit string instances as bytes objects.
-   Using ``encoding='latin1'`` is required for unpickling NumPy arrays and
-   instances of :class:`~datetime.datetime`, :class:`~datetime.date` and
-   :class:`~datetime.time` pickled by Python 2.
+   Arguments *file*, *fix_imports*, *encoding*, *errors*, *strict* and *buffers*
+   have the same meaning as in the :class:`Unpickler` constructor.
 
-.. function:: loads(bytes_object, \*, fix_imports=True, encoding="ASCII", errors="strict")
+   .. versionchanged:: 3.8
+      The *buffers* argument was added.
+
+.. function:: loads(bytes_object, \*, fix_imports=True, encoding="ASCII", errors="strict", buffers=None)
 
    Read a pickled object hierarchy from a :class:`bytes` object and return the
    reconstituted object hierarchy specified therein.
@@ -258,16 +242,11 @@ process more convenient:
    protocol argument is needed.  Bytes past the pickled object's
    representation are ignored.
 
-   Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
-   which are used to control compatibility support for pickle stream generated
-   by Python 2.  If *fix_imports* is true, pickle will try to map the old
-   Python 2 names to the new names used in Python 3.  The *encoding* and
-   *errors* tell pickle how to decode 8-bit string instances pickled by Python
-   2; these default to 'ASCII' and 'strict', respectively.  The *encoding* can
-   be 'bytes' to read these 8-bit string instances as bytes objects.
-   Using ``encoding='latin1'`` is required for unpickling NumPy arrays and
-   instances of :class:`~datetime.datetime`, :class:`~datetime.date` and
-   :class:`~datetime.time` pickled by Python 2.
+   Arguments *file*, *fix_imports*, *encoding*, *errors*, *strict* and *buffers*
+   have the same meaning as in the :class:`Unpickler` constructor.
+
+   .. versionchanged:: 3.8
+      The *buffers* argument was added.
 
 
 The :mod:`pickle` module defines three exceptions:
@@ -295,10 +274,10 @@ The :mod:`pickle` module defines three exceptions:
    IndexError.
 
 
-The :mod:`pickle` module exports two classes, :class:`Pickler` and
-:class:`Unpickler`:
+The :mod:`pickle` module exports three classes, :class:`Pickler`,
+:class:`Unpickler` and :class:`PickleBuffer`:
 
-.. class:: Pickler(file, protocol=None, \*, fix_imports=True)
+.. class:: Pickler(file, protocol=None, \*, fix_imports=True, buffer_callback=None)
 
    This takes a binary file for writing a pickle data stream.
 
@@ -316,6 +295,20 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and
    map the new Python 3 names to the old module names used in Python 2, so
    that the pickle data stream is readable with Python 2.
 
+   If *buffer_callback* is None (the default), buffer views are
+   serialized into *file* as part of the pickle stream.
+
+   If *buffer_callback* is not None, then it can be called any number
+   of times with a buffer view.  If the callback returns a false value
+   (such as None), the given buffer is :ref:`out-of-band <pickle-oob>`;
+   otherwise the buffer is serialized in-band, i.e. inside the pickle stream.
+
+   It is an error if *buffer_callback* is not None and *protocol* is
+   None or smaller than 5.
+
+   .. versionchanged:: 3.8
+      The *buffer_callback* argument was added.
+
    .. method:: dump(obj)
 
       Write a pickled representation of *obj* to the open file object given in
@@ -379,26 +372,43 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and
       Use :func:`pickletools.optimize` if you need more compact pickles.
 
 
-.. class:: Unpickler(file, \*, fix_imports=True, encoding="ASCII", errors="strict")
+.. class:: Unpickler(file, \*, fix_imports=True, encoding="ASCII", errors="strict", buffers=None)
 
    This takes a binary file for reading a pickle data stream.
 
    The protocol version of the pickle is detected automatically, so no
    protocol argument is needed.
 
-   The argument *file* must have two methods, a read() method that takes an
-   integer argument, and a readline() method that requires no arguments.  Both
-   methods should return bytes.  Thus *file* can be an on-disk file object
+   The argument *file* must have three methods, a read() method that takes an
+   integer argument, a readinto() method that takes a buffer argument
+   and a readline() method that requires no arguments, as in the
+   :class:`io.BufferedIOBase` interface.  Thus *file* can be an on-disk file
    opened for binary reading, an :class:`io.BytesIO` object, or any other
    custom object that meets this interface.
 
-   Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
-   which are used to control compatibility support for pickle stream generated
-   by Python 2.  If *fix_imports* is true, pickle will try to map the old
-   Python 2 names to the new names used in Python 3.  The *encoding* and
-   *errors* tell pickle how to decode 8-bit string instances pickled by Python
-   2; these default to 'ASCII' and 'strict', respectively.  The *encoding* can
+   The optional arguments *fix_imports*, *encoding* and *errors* are used
+   to control compatibility support for pickle stream generated by Python 2.
+   If *fix_imports* is true, pickle will try to map the old Python 2 names
+   to the new names used in Python 3.  The *encoding* and *errors* tell
+   pickle how to decode 8-bit string instances pickled by Python 2;
+   these default to 'ASCII' and 'strict', respectively.  The *encoding* can
    be 'bytes' to read these 8-bit string instances as bytes objects.
+   Using ``encoding='latin1'`` is required for unpickling NumPy arrays and
+   instances of :class:`~datetime.datetime`, :class:`~datetime.date` and
+   :class:`~datetime.time` pickled by Python 2.
+
+   If *buffers* is None (the default), then all data necessary for
+   deserialization must be contained in the pickle stream.  This means
+   that the *buffer_callback* argument was None when a :class:`Pickler`
+   was instantiated (or when :func:`dump` or :func:`dumps` was called).
+
+   If *buffers* is not None, it should be an iterable of buffer-enabled
+   objects that is consumed each time the pickle stream references
+   an :ref:`out-of-band <pickle-oob>` buffer view.  Such buffers have been
+   given in order to the *buffer_callback* of a Pickler object.
+
+   .. versionchanged:: 3.8
+      The *buffers* argument was added.
 
    .. method:: load()
 
@@ -429,6 +439,34 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and
 
       .. audit-event:: pickle.find_class "module name"
 
+.. class:: PickleBuffer(buffer)
+
+   A wrapper for a buffer representing picklable data.  *buffer* must be a
+   :ref:`buffer-providing <bufferobjects>` object, such as a
+   :term:`bytes-like object` or a N-dimensional array.
+
+   :class:`PickleBuffer` is itself a buffer provider, therefore it is
+   possible to pass it to other APIs expecting a buffer-providing object,
+   such as :class:`memoryview`.
+
+   :class:`PickleBuffer` objects can only be serialized using pickle
+   protocol 5 or higher.  They are eligible for
+   :ref:`out-of-band serialization <pickle-oob>`.
+
+   .. versionadded:: 3.8
+
+   .. method:: raw()
+
+      Return a :class:`memoryview` of the memory area underlying this buffer.
+      The returned object is a one-dimensional, C-contiguous memoryview
+      with format ``B`` (unsigned bytes).  :exc:`BufferError` is raised if
+      the buffer is neither C- nor Fortran-contiguous.
+
+   .. method:: release()
+
+      Release the underlying buffer exposed by the PickleBuffer object.
+
+
 .. _pickle-picklable:
 
 What can be pickled and unpickled?
@@ -864,6 +902,125 @@ a given class::
    assert unpickled_class.my_attribute == 1
 
 
+.. _pickle-oob:
+
+Out-of-band Buffers
+-------------------
+
+.. versionadded:: 3.8
+
+In some contexts, the :mod:`pickle` module is used to transfer massive amounts
+of data.  Therefore, it can be important to minimize the number of memory
+copies, to preserve performance and resource consumption.  However, normal
+operation of the :mod:`pickle` module, as it transforms a graph-like structure
+of objects into a sequential stream of bytes, intrinsically involves copying
+data to and from the pickle stream.
+
+This constraint can be eschewed if both the *provider* (the implementation
+of the object types to be transferred) and the *consumer* (the implementation
+of the communications system) support the out-of-band transfer facilities
+provided by pickle protocol 5 and higher.
+
+Provider API
+^^^^^^^^^^^^
+
+The large data objects to be pickled must implement a :meth:`__reduce_ex__`
+method specialized for protocol 5 and higher, which returns a
+:class:`PickleBuffer` instance (instead of e.g. a :class:`bytes` object)
+for any large data.
+
+A :class:`PickleBuffer` object *signals* that the underlying buffer is
+eligible for out-of-band data transfer.  Those objects remain compatible
+with normal usage of the :mod:`pickle` module.  However, consumers can also
+opt-in to tell :mod:`pickle` that they will handle those buffers by
+themselves.
+
+Consumer API
+^^^^^^^^^^^^
+
+A communications system can enable custom handling of the :class:`PickleBuffer`
+objects generated when serializing an object graph.
+
+On the sending side, it needs to pass a *buffer_callback* argument to
+:class:`Pickler` (or to the :func:`dump` or :func:`dumps` function), which
+will be called with each :class:`PickleBuffer` generated while pickling
+the object graph.  Buffers accumulated by the *buffer_callback* will not
+see their data copied into the pickle stream, only a cheap marker will be
+inserted.
+
+On the receiving side, it needs to pass a *buffers* argument to
+:class:`Unpickler` (or to the :func:`load` or :func:`loads` function),
+which is an iterable of the buffers which were passed to *buffer_callback*.
+That iterable should produce buffers in the same order as they were passed
+to *buffer_callback*.  Those buffers will provide the data expected by the
+reconstructors of the objects whose pickling produced the original
+:class:`PickleBuffer` objects.
+
+Between the sending side and the receiving side, the communications system
+is free to implement its own transfer mechanism for out-of-band buffers.
+Potential optimizations include the use of shared memory or datatype-dependent
+compression.
+
+Example
+^^^^^^^
+
+Here is a trivial example where we implement a :class:`bytearray` subclass
+able to participate in out-of-band buffer pickling::
+
+   class ZeroCopyByteArray(bytearray):
+
+       def __reduce_ex__(self, protocol):
+           if protocol >= 5:
+               return type(self)._reconstruct, (PickleBuffer(self),), None
+           else:
+               # PickleBuffer is forbidden with pickle protocols <= 4.
+               return type(self)._reconstruct, (bytearray(self),)
+
+       @classmethod
+       def _reconstruct(cls, obj):
+           with memoryview(obj) as m:
+               # Get a handle over the original buffer object
+               obj = m.obj
+               if type(obj) is cls:
+                   # Original buffer object is a ZeroCopyByteArray, return it
+                   # as-is.
+                   return obj
+               else:
+                   return cls(obj)
+
+The reconstructor (the ``_reconstruct`` class method) returns the buffer's
+providing object if it has the right type.  This is an easy way to simulate
+zero-copy behaviour on this toy example.
+
+On the consumer side, we can pickle those objects the usual way, which
+when unserialized will give us a copy of the original object::
+
+   b = ZeroCopyByteArray(b"abc")
+   data = pickle.dumps(b, protocol=5)
+   new_b = pickle.loads(data)
+   print(b == new_b)  # True
+   print(b is new_b)  # False: a copy was made
+
+But if we pass a *buffer_callback* and then give back the accumulated
+buffers when unserializing, we are able to get back the original object::
+
+   b = ZeroCopyByteArray(b"abc")
+   buffers = []
+   data = pickle.dumps(b, protocol=5, buffer_callback=buffers.append)
+   new_b = pickle.loads(data, buffers=buffers)
+   print(b == new_b)  # True
+   print(b is new_b)  # True: no copy was made
+
+This example is limited by the fact that :class:`bytearray` allocates its
+own memory: you cannot create a :class:`bytearray` instance that is backed
+by another object's memory.  However, third-party datatypes such as NumPy
+arrays do not have this limitation, and allow use of zero-copy pickling
+(or making as few copies as possible) when transferring between distinct
+processes or systems.
+
+.. seealso:: :pep:`574` -- Pickle protocol 5 with out-of-band data
+
+
 .. _pickle-restrict:
 
 Restricting Globals
index 55b06aeea9846f8bb6c9cd25e3efba684aa4b55f..17ad5b3071d40526043f7c9fc753945c1942b1c4 100644 (file)
 #include "weakrefobject.h"
 #include "structseq.h"
 #include "namespaceobject.h"
+#include "picklebufobject.h"
 
 #include "codecs.h"
 #include "pyerrors.h"
diff --git a/Include/picklebufobject.h b/Include/picklebufobject.h
new file mode 100644 (file)
index 0000000..f07e900
--- /dev/null
@@ -0,0 +1,31 @@
+/* PickleBuffer object. This is built-in for ease of use from third-party
+ * C extensions.
+ */
+
+#ifndef Py_PICKLEBUFOBJECT_H
+#define Py_PICKLEBUFOBJECT_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_LIMITED_API
+
+PyAPI_DATA(PyTypeObject) PyPickleBuffer_Type;
+
+#define PyPickleBuffer_Check(op) (Py_TYPE(op) == &PyPickleBuffer_Type)
+
+/* Create a PickleBuffer redirecting to the given buffer-enabled object */
+PyAPI_FUNC(PyObject *) PyPickleBuffer_FromObject(PyObject *);
+/* Get the PickleBuffer's underlying view to the original object
+ * (NULL if released)
+ */
+PyAPI_FUNC(const Py_buffer *) PyPickleBuffer_GetBuffer(PyObject *);
+/* Release the PickleBuffer.  Returns 0 on success, -1 on error. */
+PyAPI_FUNC(int) PyPickleBuffer_Release(PyObject *);
+
+#endif /* !Py_LIMITED_API */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_PICKLEBUFOBJECT_H */
index be8e3811947b74aaf0f854dc38cb8aa581b747e0..cb768b28586a1de6acd146de288e9a9785c13a02 100644 (file)
@@ -36,8 +36,10 @@ import io
 import codecs
 import _compat_pickle
 
+from _pickle import PickleBuffer
+
 __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
-           "Unpickler", "dump", "dumps", "load", "loads"]
+           "Unpickler", "dump", "dumps", "load", "loads", "PickleBuffer"]
 
 # Shortcut for use in isinstance testing
 bytes_types = (bytes, bytearray)
@@ -51,10 +53,11 @@ compatible_formats = ["1.0",            # Original protocol 0
                       "2.0",            # Protocol 2
                       "3.0",            # Protocol 3
                       "4.0",            # Protocol 4
+                      "5.0",            # Protocol 5
                       ]                 # Old format versions we can read
 
 # This is the highest protocol number we know how to read.
-HIGHEST_PROTOCOL = 4
+HIGHEST_PROTOCOL = 5
 
 # The protocol we write by default.  May be less than HIGHEST_PROTOCOL.
 # Only bump this if the oldest still supported version of Python already
@@ -167,6 +170,7 @@ BINBYTES       = b'B'   # push bytes; counted binary string argument
 SHORT_BINBYTES = b'C'   #  "     "   ;    "      "       "      " < 256 bytes
 
 # Protocol 4
+
 SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
 BINUNICODE8      = b'\x8d'  # push very long string
 BINBYTES8        = b'\x8e'  # push very long bytes string
@@ -178,6 +182,12 @@ STACK_GLOBAL     = b'\x93'  # same as GLOBAL but using names on the stacks
 MEMOIZE          = b'\x94'  # store top of the stack in memo
 FRAME            = b'\x95'  # indicate the beginning of a new frame
 
+# Protocol 5
+
+BYTEARRAY8       = b'\x96'  # push bytearray
+NEXT_BUFFER      = b'\x97'  # push next out-of-band buffer
+READONLY_BUFFER  = b'\x98'  # make top of stack readonly
+
 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
 
 
@@ -251,6 +261,23 @@ class _Unframer:
         self.file_readline = file_readline
         self.current_frame = None
 
+    def readinto(self, buf):
+        if self.current_frame:
+            n = self.current_frame.readinto(buf)
+            if n == 0 and len(buf) != 0:
+                self.current_frame = None
+                n = len(buf)
+                buf[:] = self.file_read(n)
+                return n
+            if n < len(buf):
+                raise UnpicklingError(
+                    "pickle exhausted before end of frame")
+            return n
+        else:
+            n = len(buf)
+            buf[:] = self.file_read(n)
+            return n
+
     def read(self, n):
         if self.current_frame:
             data = self.current_frame.read(n)
@@ -371,7 +398,8 @@ def decode_long(data):
 
 class _Pickler:
 
-    def __init__(self, file, protocol=None, *, fix_imports=True):
+    def __init__(self, file, protocol=None, *, fix_imports=True,
+                 buffer_callback=None):
         """This takes a binary file for writing a pickle data stream.
 
         The optional *protocol* argument tells the pickler to use the
@@ -393,6 +421,17 @@ class _Pickler:
         will try to map the new Python 3 names to the old module names
         used in Python 2, so that the pickle data stream is readable
         with Python 2.
+
+        If *buffer_callback* is None (the default), buffer views are
+        serialized into *file* as part of the pickle stream.
+
+        If *buffer_callback* is not None, then it can be called any number
+        of times with a buffer view.  If the callback returns a false value
+        (such as None), the given buffer is out-of-band; otherwise the
+        buffer is serialized in-band, i.e. inside the pickle stream.
+
+        It is an error if *buffer_callback* is not None and *protocol*
+        is None or smaller than 5.
         """
         if protocol is None:
             protocol = DEFAULT_PROTOCOL
@@ -400,6 +439,9 @@ class _Pickler:
             protocol = HIGHEST_PROTOCOL
         elif not 0 <= protocol <= HIGHEST_PROTOCOL:
             raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
+        if buffer_callback is not None and protocol < 5:
+            raise ValueError("buffer_callback needs protocol >= 5")
+        self._buffer_callback = buffer_callback
         try:
             self._file_write = file.write
         except AttributeError:
@@ -756,6 +798,46 @@ class _Pickler:
         self.memoize(obj)
     dispatch[bytes] = save_bytes
 
+    def save_bytearray(self, obj):
+        if self.proto < 5:
+            if not obj:  # bytearray is empty
+                self.save_reduce(bytearray, (), obj=obj)
+            else:
+                self.save_reduce(bytearray, (bytes(obj),), obj=obj)
+            return
+        n = len(obj)
+        if n >= self.framer._FRAME_SIZE_TARGET:
+            self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
+        else:
+            self.write(BYTEARRAY8 + pack("<Q", n) + obj)
+    dispatch[bytearray] = save_bytearray
+
+    def save_picklebuffer(self, obj):
+        if self.proto < 5:
+            raise PicklingError("PickleBuffer can only pickled with "
+                                "protocol >= 5")
+        with obj.raw() as m:
+            if not m.contiguous:
+                raise PicklingError("PickleBuffer can not be pickled when "
+                                    "pointing to a non-contiguous buffer")
+            in_band = True
+            if self._buffer_callback is not None:
+                in_band = bool(self._buffer_callback(obj))
+            if in_band:
+                # Write data in-band
+                # XXX The C implementation avoids a copy here
+                if m.readonly:
+                    self.save_bytes(m.tobytes())
+                else:
+                    self.save_bytearray(m.tobytes())
+            else:
+                # Write data out-of-band
+                self.write(NEXT_BUFFER)
+                if m.readonly:
+                    self.write(READONLY_BUFFER)
+
+    dispatch[PickleBuffer] = save_picklebuffer
+
     def save_str(self, obj):
         if self.bin:
             encoded = obj.encode('utf-8', 'surrogatepass')
@@ -1042,7 +1124,7 @@ class _Pickler:
 class _Unpickler:
 
     def __init__(self, file, *, fix_imports=True,
-                 encoding="ASCII", errors="strict"):
+                 encoding="ASCII", errors="strict", buffers=None):
         """This takes a binary file for reading a pickle data stream.
 
         The protocol version of the pickle is detected automatically, so
@@ -1061,7 +1143,17 @@ class _Unpickler:
         reading, a BytesIO object, or any other custom object that
         meets this interface.
 
-        Optional keyword arguments are *fix_imports*, *encoding* and
+        If *buffers* is not None, it should be an iterable of buffer-enabled
+        objects that is consumed each time the pickle stream references
+        an out-of-band buffer view.  Such buffers have been given in order
+        to the *buffer_callback* of a Pickler object.
+
+        If *buffers* is None (the default), then the buffers are taken
+        from the pickle stream, assuming they are serialized there.
+        It is an error for *buffers* to be None if the pickle stream
+        was produced with a non-None *buffer_callback*.
+
+        Other optional arguments are *fix_imports*, *encoding* and
         *errors*, which are used to control compatibility support for
         pickle stream generated by Python 2.  If *fix_imports* is True,
         pickle will try to map the old Python 2 names to the new names
@@ -1070,6 +1162,7 @@ class _Unpickler:
         default to 'ASCII' and 'strict', respectively. *encoding* can be
         'bytes' to read theses 8-bit string instances as bytes objects.
         """
+        self._buffers = iter(buffers) if buffers is not None else None
         self._file_readline = file.readline
         self._file_read = file.read
         self.memo = {}
@@ -1090,6 +1183,7 @@ class _Unpickler:
                                   "%s.__init__()" % (self.__class__.__name__,))
         self._unframer = _Unframer(self._file_read, self._file_readline)
         self.read = self._unframer.read
+        self.readinto = self._unframer.readinto
         self.readline = self._unframer.readline
         self.metastack = []
         self.stack = []
@@ -1276,6 +1370,34 @@ class _Unpickler:
         self.append(self.read(len))
     dispatch[BINBYTES8[0]] = load_binbytes8
 
+    def load_bytearray8(self):
+        len, = unpack('<Q', self.read(8))
+        if len > maxsize:
+            raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
+                                  "of %d bytes" % maxsize)
+        b = bytearray(len)
+        self.readinto(b)
+        self.append(b)
+    dispatch[BYTEARRAY8[0]] = load_bytearray8
+
+    def load_next_buffer(self):
+        if self._buffers is None:
+            raise UnpicklingError("pickle stream refers to out-of-band data "
+                                  "but no *buffers* argument was given")
+        try:
+            buf = next(self._buffers)
+        except StopIteration:
+            raise UnpicklingError("not enough out-of-band buffers")
+        self.append(buf)
+    dispatch[NEXT_BUFFER[0]] = load_next_buffer
+
+    def load_readonly_buffer(self):
+        buf = self.stack[-1]
+        with memoryview(buf) as m:
+            if not m.readonly:
+                self.stack[-1] = m.toreadonly()
+    dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
+
     def load_short_binstring(self):
         len = self.read(1)[0]
         data = self.read(len)
@@ -1600,25 +1722,29 @@ class _Unpickler:
 
 # Shorthands
 
-def _dump(obj, file, protocol=None, *, fix_imports=True):
-    _Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
+def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
+    _Pickler(file, protocol, fix_imports=fix_imports,
+             buffer_callback=buffer_callback).dump(obj)
 
-def _dumps(obj, protocol=None, *, fix_imports=True):
+def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
     f = io.BytesIO()
-    _Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
+    _Pickler(f, protocol, fix_imports=fix_imports,
+             buffer_callback=buffer_callback).dump(obj)
     res = f.getvalue()
     assert isinstance(res, bytes_types)
     return res
 
-def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
-    return _Unpickler(file, fix_imports=fix_imports,
+def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
+          buffers=None):
+    return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
                      encoding=encoding, errors=errors).load()
 
-def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
+def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict",
+           buffers=None):
     if isinstance(s, str):
         raise TypeError("Can't load pickle from unicode string")
     file = io.BytesIO(s)
-    return _Unpickler(file, fix_imports=fix_imports,
+    return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
                       encoding=encoding, errors=errors).load()
 
 # Use the faster _pickle if possible
index ed8bee36e8c586abc87770cbcf4237e3d91fb9fa..95706e746c9870c92996a622ef56a25c41aa88cd 100644 (file)
@@ -565,6 +565,41 @@ bytes8 = ArgumentDescriptor(
               the number of bytes, and the second argument is that many bytes.
               """)
 
+
+def read_bytearray8(f):
+    r"""
+    >>> import io, struct, sys
+    >>> read_bytearray8(io.BytesIO(b"\x00\x00\x00\x00\x00\x00\x00\x00abc"))
+    bytearray(b'')
+    >>> read_bytearray8(io.BytesIO(b"\x03\x00\x00\x00\x00\x00\x00\x00abcdef"))
+    bytearray(b'abc')
+    >>> bigsize8 = struct.pack("<Q", sys.maxsize//3)
+    >>> read_bytearray8(io.BytesIO(bigsize8 + b"abcdef"))  #doctest: +ELLIPSIS
+    Traceback (most recent call last):
+    ...
+    ValueError: expected ... bytes in a bytearray8, but only 6 remain
+    """
+
+    n = read_uint8(f)
+    assert n >= 0
+    if n > sys.maxsize:
+        raise ValueError("bytearray8 byte count > sys.maxsize: %d" % n)
+    data = f.read(n)
+    if len(data) == n:
+        return bytearray(data)
+    raise ValueError("expected %d bytes in a bytearray8, but only %d remain" %
+                     (n, len(data)))
+
+bytearray8 = ArgumentDescriptor(
+              name="bytearray8",
+              n=TAKEN_FROM_ARGUMENT8U,
+              reader=read_bytearray8,
+              doc="""A counted bytearray.
+
+              The first argument is an 8-byte little-endian unsigned int giving
+              the number of bytes, and the second argument is that many bytes.
+              """)
+
 def read_unicodestringnl(f):
     r"""
     >>> import io
@@ -970,6 +1005,11 @@ pybytes = StackObject(
     obtype=bytes,
     doc="A Python bytes object.")
 
+pybytearray = StackObject(
+    name='bytearray',
+    obtype=bytearray,
+    doc="A Python bytearray object.")
+
 pyunicode = StackObject(
     name='str',
     obtype=str,
@@ -1005,6 +1045,11 @@ pyfrozenset = StackObject(
     obtype=set,
     doc="A Python frozenset object.")
 
+pybuffer = StackObject(
+    name='buffer',
+    obtype=object,
+    doc="A Python buffer-like object.")
+
 anyobject = StackObject(
     name='any',
     obtype=object,
@@ -1265,7 +1310,7 @@ opcodes = [
       object instead.
       """),
 
-    # Bytes (protocol 3 only; older protocols don't support bytes at all)
+    # Bytes (protocol 3 and higher)
 
     I(name='BINBYTES',
       code='B',
@@ -1306,6 +1351,39 @@ opcodes = [
       which are taken literally as the string content.
       """),
 
+    # Bytearray (protocol 5 and higher)
+
+    I(name='BYTEARRAY8',
+      code='\x96',
+      arg=bytearray8,
+      stack_before=[],
+      stack_after=[pybytearray],
+      proto=5,
+      doc="""Push a Python bytearray object.
+
+      There are two arguments:  the first is an 8-byte unsigned int giving
+      the number of bytes in the bytearray, and the second is that many bytes,
+      which are taken literally as the bytearray content.
+      """),
+
+    # Out-of-band buffer (protocol 5 and higher)
+
+    I(name='NEXT_BUFFER',
+      code='\x97',
+      arg=None,
+      stack_before=[],
+      stack_after=[pybuffer],
+      proto=5,
+      doc="Push an out-of-band buffer object."),
+
+    I(name='READONLY_BUFFER',
+      code='\x98',
+      arg=None,
+      stack_before=[pybuffer],
+      stack_after=[pybuffer],
+      proto=5,
+      doc="Make an out-of-band buffer object read-only."),
+
     # Ways to spell None.
 
     I(name='NONE',
index 4f8c2942df93dd72b1508930908c2d050fdaae08..f6fda9ee6d8363cc2f043eea036d4ff6afa22c87 100644 (file)
@@ -16,6 +16,16 @@ import weakref
 from textwrap import dedent
 from http.cookies import SimpleCookie
 
+try:
+    import _testbuffer
+except ImportError:
+    _testbuffer = None
+
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
 from test import support
 from test.support import (
     TestFailed, TESTFN, run_with_locale, no_tracing,
@@ -162,6 +172,139 @@ def create_dynamic_class(name, bases):
     result.reduce_args = (name, bases)
     return result
 
+
+class ZeroCopyBytes(bytes):
+    readonly = True
+    c_contiguous = True
+    f_contiguous = True
+    zero_copy_reconstruct = True
+
+    def __reduce_ex__(self, protocol):
+        if protocol >= 5:
+            return type(self)._reconstruct, (pickle.PickleBuffer(self),), None
+        else:
+            return type(self)._reconstruct, (bytes(self),)
+
+    def __repr__(self):
+        return "{}({!r})".format(self.__class__.__name__, bytes(self))
+
+    __str__ = __repr__
+
+    @classmethod
+    def _reconstruct(cls, obj):
+        with memoryview(obj) as m:
+            obj = m.obj
+            if type(obj) is cls:
+                # Zero-copy
+                return obj
+            else:
+                return cls(obj)
+
+
+class ZeroCopyBytearray(bytearray):
+    readonly = False
+    c_contiguous = True
+    f_contiguous = True
+    zero_copy_reconstruct = True
+
+    def __reduce_ex__(self, protocol):
+        if protocol >= 5:
+            return type(self)._reconstruct, (pickle.PickleBuffer(self),), None
+        else:
+            return type(self)._reconstruct, (bytes(self),)
+
+    def __repr__(self):
+        return "{}({!r})".format(self.__class__.__name__, bytes(self))
+
+    __str__ = __repr__
+
+    @classmethod
+    def _reconstruct(cls, obj):
+        with memoryview(obj) as m:
+            obj = m.obj
+            if type(obj) is cls:
+                # Zero-copy
+                return obj
+            else:
+                return cls(obj)
+
+
+if _testbuffer is not None:
+
+    class PicklableNDArray:
+        # A not-really-zero-copy picklable ndarray, as the ndarray()
+        # constructor doesn't allow for it
+
+        zero_copy_reconstruct = False
+
+        def __init__(self, *args, **kwargs):
+            self.array = _testbuffer.ndarray(*args, **kwargs)
+
+        def __getitem__(self, idx):
+            cls = type(self)
+            new = cls.__new__(cls)
+            new.array = self.array[idx]
+            return new
+
+        @property
+        def readonly(self):
+            return self.array.readonly
+
+        @property
+        def c_contiguous(self):
+            return self.array.c_contiguous
+
+        @property
+        def f_contiguous(self):
+            return self.array.f_contiguous
+
+        def __eq__(self, other):
+            if not isinstance(other, PicklableNDArray):
+                return NotImplemented
+            return (other.array.format == self.array.format and
+                    other.array.shape == self.array.shape and
+                    other.array.strides == self.array.strides and
+                    other.array.readonly == self.array.readonly and
+                    other.array.tobytes() == self.array.tobytes())
+
+        def __ne__(self, other):
+            if not isinstance(other, PicklableNDArray):
+                return NotImplemented
+            return not (self == other)
+
+        def __repr__(self):
+            return (f"{type(self)}(shape={self.array.shape},"
+                    f"strides={self.array.strides}, "
+                    f"bytes={self.array.tobytes()})")
+
+        def __reduce_ex__(self, protocol):
+            if not self.array.contiguous:
+                raise NotImplementedError("Reconstructing a non-contiguous "
+                                          "ndarray does not seem possible")
+            ndarray_kwargs = {"shape": self.array.shape,
+                              "strides": self.array.strides,
+                              "format": self.array.format,
+                              "flags": (0 if self.readonly
+                                        else _testbuffer.ND_WRITABLE)}
+            pb = pickle.PickleBuffer(self.array)
+            if protocol >= 5:
+                return (type(self)._reconstruct,
+                        (pb, ndarray_kwargs))
+            else:
+                # Need to serialize the bytes in physical order
+                with pb.raw() as m:
+                    return (type(self)._reconstruct,
+                            (m.tobytes(), ndarray_kwargs))
+
+        @classmethod
+        def _reconstruct(cls, obj, kwargs):
+            with memoryview(obj) as m:
+                # For some reason, ndarray() wants a list of integers...
+                # XXX This only works if format == 'B'
+                items = list(m.tobytes())
+            return cls(items, **kwargs)
+
+
 # DATA0 .. DATA4 are the pickles we expect under the various protocols, for
 # the object returned by create_data().
 
@@ -888,12 +1031,22 @@ class AbstractUnpickleTests(unittest.TestCase):
         dumped = b'\x80\x04\x8d\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
         self.assertEqual(self.loads(dumped), '\u20ac\x00')
 
+    def test_bytearray8(self):
+        dumped = b'\x80\x05\x96\x03\x00\x00\x00\x00\x00\x00\x00xxx.'
+        self.assertEqual(self.loads(dumped), bytearray(b'xxx'))
+
     @requires_32b
     def test_large_32b_binbytes8(self):
         dumped = b'\x80\x04\x8e\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
         self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
                                     dumped)
 
+    @requires_32b
+    def test_large_32b_bytearray8(self):
+        dumped = b'\x80\x05\x96\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
+        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
+                                    dumped)
+
     @requires_32b
     def test_large_32b_binunicode8(self):
         dumped = b'\x80\x04\x8d\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
@@ -1171,6 +1324,10 @@ class AbstractUnpickleTests(unittest.TestCase):
             b'\x8e\x03\x00\x00\x00\x00\x00\x00',
             b'\x8e\x03\x00\x00\x00\x00\x00\x00\x00',
             b'\x8e\x03\x00\x00\x00\x00\x00\x00\x00ab',
+            b'\x96',                    # BYTEARRAY8
+            b'\x96\x03\x00\x00\x00\x00\x00\x00',
+            b'\x96\x03\x00\x00\x00\x00\x00\x00\x00',
+            b'\x96\x03\x00\x00\x00\x00\x00\x00\x00ab',
             b'\x95',                    # FRAME
             b'\x95\x02\x00\x00\x00\x00\x00\x00',
             b'\x95\x02\x00\x00\x00\x00\x00\x00\x00',
@@ -1482,6 +1639,25 @@ class AbstractPickleTests(unittest.TestCase):
                 p = self.dumps(s, proto)
                 self.assert_is_copy(s, self.loads(p))
 
+    def test_bytearray(self):
+        for proto in protocols:
+            for s in b'', b'xyz', b'xyz'*100:
+                b = bytearray(s)
+                p = self.dumps(b, proto)
+                bb = self.loads(p)
+                self.assertIsNot(bb, b)
+                self.assert_is_copy(b, bb)
+                if proto <= 3:
+                    # bytearray is serialized using a global reference
+                    self.assertIn(b'bytearray', p)
+                    self.assertTrue(opcode_in_pickle(pickle.GLOBAL, p))
+                elif proto == 4:
+                    self.assertIn(b'bytearray', p)
+                    self.assertTrue(opcode_in_pickle(pickle.STACK_GLOBAL, p))
+                elif proto == 5:
+                    self.assertNotIn(b'bytearray', p)
+                    self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
+
     def test_ints(self):
         for proto in protocols:
             n = sys.maxsize
@@ -2114,7 +2290,8 @@ class AbstractPickleTests(unittest.TestCase):
         the following consistency check.
         """
         frame_end = frameless_start = None
-        frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8', 'BINUNICODE8'}
+        frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8',
+                             'BINUNICODE8', 'BYTEARRAY8'}
         for op, arg, pos in pickletools.genops(pickled):
             if frame_end is not None:
                 self.assertLessEqual(pos, frame_end)
@@ -2225,19 +2402,20 @@ class AbstractPickleTests(unittest.TestCase):
         num_frames = 20
         # Large byte objects (dict values) intermittent with small objects
         # (dict keys)
-        obj = {i: bytes([i]) * frame_size for i in range(num_frames)}
+        for bytes_type in (bytes, bytearray):
+            obj = {i: bytes_type([i]) * frame_size for i in range(num_frames)}
 
-        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
-            pickled = self.dumps(obj, proto)
+            for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
+                pickled = self.dumps(obj, proto)
 
-            frameless_pickle = remove_frames(pickled)
-            self.assertEqual(count_opcode(pickle.FRAME, frameless_pickle), 0)
-            self.assertEqual(obj, self.loads(frameless_pickle))
+                frameless_pickle = remove_frames(pickled)
+                self.assertEqual(count_opcode(pickle.FRAME, frameless_pickle), 0)
+                self.assertEqual(obj, self.loads(frameless_pickle))
 
-            some_frames_pickle = remove_frames(pickled, lambda i: i % 2)
-            self.assertLess(count_opcode(pickle.FRAME, some_frames_pickle),
-                            count_opcode(pickle.FRAME, pickled))
-            self.assertEqual(obj, self.loads(some_frames_pickle))
+                some_frames_pickle = remove_frames(pickled, lambda i: i % 2)
+                self.assertLess(count_opcode(pickle.FRAME, some_frames_pickle),
+                                count_opcode(pickle.FRAME, pickled))
+                self.assertEqual(obj, self.loads(some_frames_pickle))
 
     def test_framed_write_sizes_with_delayed_writer(self):
         class ChunkAccumulator:
@@ -2452,6 +2630,186 @@ class AbstractPickleTests(unittest.TestCase):
             with self.assertRaises((AttributeError, pickle.PicklingError)):
                 pickletools.dis(self.dumps(f, proto))
 
+    #
+    # PEP 574 tests below
+    #
+
+    def buffer_like_objects(self):
+        # Yield buffer-like objects with the bytestring "abcdef" in them
+        bytestring = b"abcdefgh"
+        yield ZeroCopyBytes(bytestring)
+        yield ZeroCopyBytearray(bytestring)
+        if _testbuffer is not None:
+            items = list(bytestring)
+            value = int.from_bytes(bytestring, byteorder='little')
+            for flags in (0, _testbuffer.ND_WRITABLE):
+                # 1-D, contiguous
+                yield PicklableNDArray(items, format='B', shape=(8,),
+                                       flags=flags)
+                # 2-D, C-contiguous
+                yield PicklableNDArray(items, format='B', shape=(4, 2),
+                                       strides=(2, 1), flags=flags)
+                # 2-D, Fortran-contiguous
+                yield PicklableNDArray(items, format='B',
+                                       shape=(4, 2), strides=(1, 4),
+                                       flags=flags)
+
+    def test_in_band_buffers(self):
+        # Test in-band buffers (PEP 574)
+        for obj in self.buffer_like_objects():
+            for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
+                data = self.dumps(obj, proto)
+                if obj.c_contiguous and proto >= 5:
+                    # The raw memory bytes are serialized in physical order
+                    self.assertIn(b"abcdefgh", data)
+                self.assertEqual(count_opcode(pickle.NEXT_BUFFER, data), 0)
+                if proto >= 5:
+                    self.assertEqual(count_opcode(pickle.SHORT_BINBYTES, data),
+                                     1 if obj.readonly else 0)
+                    self.assertEqual(count_opcode(pickle.BYTEARRAY8, data),
+                                     0 if obj.readonly else 1)
+                    # Return a true value from buffer_callback should have
+                    # the same effect
+                    def buffer_callback(obj):
+                        return True
+                    data2 = self.dumps(obj, proto,
+                                       buffer_callback=buffer_callback)
+                    self.assertEqual(data2, data)
+
+                new = self.loads(data)
+                # It's a copy
+                self.assertIsNot(new, obj)
+                self.assertIs(type(new), type(obj))
+                self.assertEqual(new, obj)
+
+    # XXX Unfortunately cannot test non-contiguous array
+    # (see comment in PicklableNDArray.__reduce_ex__)
+
+    def test_oob_buffers(self):
+        # Test out-of-band buffers (PEP 574)
+        for obj in self.buffer_like_objects():
+            for proto in range(0, 5):
+                # Need protocol >= 5 for buffer_callback
+                with self.assertRaises(ValueError):
+                    self.dumps(obj, proto,
+                               buffer_callback=[].append)
+            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+                buffers = []
+                buffer_callback = lambda pb: buffers.append(pb.raw())
+                data = self.dumps(obj, proto,
+                                  buffer_callback=buffer_callback)
+                self.assertNotIn(b"abcdefgh", data)
+                self.assertEqual(count_opcode(pickle.SHORT_BINBYTES, data), 0)
+                self.assertEqual(count_opcode(pickle.BYTEARRAY8, data), 0)
+                self.assertEqual(count_opcode(pickle.NEXT_BUFFER, data), 1)
+                self.assertEqual(count_opcode(pickle.READONLY_BUFFER, data),
+                                 1 if obj.readonly else 0)
+
+                if obj.c_contiguous:
+                    self.assertEqual(bytes(buffers[0]), b"abcdefgh")
+                # Need buffers argument to unpickle properly
+                with self.assertRaises(pickle.UnpicklingError):
+                    self.loads(data)
+
+                new = self.loads(data, buffers=buffers)
+                if obj.zero_copy_reconstruct:
+                    # Zero-copy achieved
+                    self.assertIs(new, obj)
+                else:
+                    self.assertIs(type(new), type(obj))
+                    self.assertEqual(new, obj)
+                # Non-sequence buffers accepted too
+                new = self.loads(data, buffers=iter(buffers))
+                if obj.zero_copy_reconstruct:
+                    # Zero-copy achieved
+                    self.assertIs(new, obj)
+                else:
+                    self.assertIs(type(new), type(obj))
+                    self.assertEqual(new, obj)
+
+    def test_oob_buffers_writable_to_readonly(self):
+        # Test reconstructing readonly object from writable buffer
+        obj = ZeroCopyBytes(b"foobar")
+        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+            buffers = []
+            buffer_callback = buffers.append
+            data = self.dumps(obj, proto, buffer_callback=buffer_callback)
+
+            buffers = map(bytearray, buffers)
+            new = self.loads(data, buffers=buffers)
+            self.assertIs(type(new), type(obj))
+            self.assertEqual(new, obj)
+
+    def test_picklebuffer_error(self):
+        # PickleBuffer forbidden with protocol < 5
+        pb = pickle.PickleBuffer(b"foobar")
+        for proto in range(0, 5):
+            with self.assertRaises(pickle.PickleError):
+                self.dumps(pb, proto)
+
+    def test_buffer_callback_error(self):
+        def buffer_callback(buffers):
+            1/0
+        pb = pickle.PickleBuffer(b"foobar")
+        with self.assertRaises(ZeroDivisionError):
+            self.dumps(pb, 5, buffer_callback=buffer_callback)
+
+    def test_buffers_error(self):
+        pb = pickle.PickleBuffer(b"foobar")
+        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+            data = self.dumps(pb, proto, buffer_callback=[].append)
+            # Non iterable buffers
+            with self.assertRaises(TypeError):
+                self.loads(data, buffers=object())
+            # Buffer iterable exhausts too early
+            with self.assertRaises(pickle.UnpicklingError):
+                self.loads(data, buffers=[])
+
+    @unittest.skipIf(np is None, "Test needs Numpy")
+    def test_buffers_numpy(self):
+        def check_no_copy(x, y):
+            np.testing.assert_equal(x, y)
+            self.assertEqual(x.ctypes.data, y.ctypes.data)
+
+        def check_copy(x, y):
+            np.testing.assert_equal(x, y)
+            self.assertNotEqual(x.ctypes.data, y.ctypes.data)
+
+        def check_array(arr):
+            # In-band
+            for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
+                data = self.dumps(arr, proto)
+                new = self.loads(data)
+                check_copy(arr, new)
+            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+                buffer_callback = lambda _: True
+                data = self.dumps(arr, proto, buffer_callback=buffer_callback)
+                new = self.loads(data)
+                check_copy(arr, new)
+            # Out-of-band
+            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+                buffers = []
+                buffer_callback = buffers.append
+                data = self.dumps(arr, proto, buffer_callback=buffer_callback)
+                new = self.loads(data, buffers=buffers)
+                if arr.flags.c_contiguous or arr.flags.f_contiguous:
+                    check_no_copy(arr, new)
+                else:
+                    check_copy(arr, new)
+
+        # 1-D
+        arr = np.arange(6)
+        check_array(arr)
+        # 1-D, non-contiguous
+        check_array(arr[::2])
+        # 2-D, C-contiguous
+        arr = np.arange(12).reshape((3, 4))
+        check_array(arr)
+        # 2-D, F-contiguous
+        check_array(arr.T)
+        # 2-D, non-contiguous
+        check_array(arr[::2])
+
 
 class BigmemPickleTests(unittest.TestCase):
 
@@ -2736,7 +3094,7 @@ class AbstractPickleModuleTests(unittest.TestCase):
 
     def test_highest_protocol(self):
         # Of course this needs to be changed when HIGHEST_PROTOCOL changes.
-        self.assertEqual(pickle.HIGHEST_PROTOCOL, 4)
+        self.assertEqual(pickle.HIGHEST_PROTOCOL, 5)
 
     def test_callapi(self):
         f = io.BytesIO()
@@ -2760,6 +3118,47 @@ class AbstractPickleModuleTests(unittest.TestCase):
         self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
         self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
 
+    def check_dumps_loads_oob_buffers(self, dumps, loads):
+        # No need to do the full gamut of tests here, just enough to
+        # check that dumps() and loads() redirect their arguments
+        # to the underlying Pickler and Unpickler, respectively.
+        obj = ZeroCopyBytes(b"foo")
+
+        for proto in range(0, 5):
+            # Need protocol >= 5 for buffer_callback
+            with self.assertRaises(ValueError):
+                dumps(obj, protocol=proto,
+                      buffer_callback=[].append)
+        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+            buffers = []
+            buffer_callback = buffers.append
+            data = dumps(obj, protocol=proto,
+                         buffer_callback=buffer_callback)
+            self.assertNotIn(b"foo", data)
+            self.assertEqual(bytes(buffers[0]), b"foo")
+            # Need buffers argument to unpickle properly
+            with self.assertRaises(pickle.UnpicklingError):
+                loads(data)
+            new = loads(data, buffers=buffers)
+            self.assertIs(new, obj)
+
+    def test_dumps_loads_oob_buffers(self):
+        # Test out-of-band buffers (PEP 574) with top-level dumps() and loads()
+        self.check_dumps_loads_oob_buffers(self.dumps, self.loads)
+
+    def test_dump_load_oob_buffers(self):
+        # Test out-of-band buffers (PEP 574) with top-level dump() and load()
+        def dumps(obj, **kwargs):
+            f = io.BytesIO()
+            self.dump(obj, f, **kwargs)
+            return f.getvalue()
+
+        def loads(data, **kwargs):
+            f = io.BytesIO(data)
+            return self.load(f, **kwargs)
+
+        self.check_dumps_loads_oob_buffers(dumps, loads)
+
 
 class AbstractPersistentPicklerTests(unittest.TestCase):
 
index be52b389e62dc1b85ae0a61d87d2fba86b801b05..b3aae3a18ecfae6e7662cee0e7a83968392c2919 100644 (file)
@@ -2894,16 +2894,15 @@ class TestSignatureObject(unittest.TestCase):
     @unittest.skipIf(MISSING_C_DOCSTRINGS,
                      "Signature information for builtins requires docstrings")
     def test_signature_on_builtin_class(self):
-        self.assertEqual(str(inspect.signature(_pickle.Pickler)),
-                         '(file, protocol=None, fix_imports=True)')
+        expected = ('(file, protocol=None, fix_imports=True, '
+                    'buffer_callback=None)')
+        self.assertEqual(str(inspect.signature(_pickle.Pickler)), expected)
 
         class P(_pickle.Pickler): pass
         class EmptyTrait: pass
         class P2(EmptyTrait, P): pass
-        self.assertEqual(str(inspect.signature(P)),
-                         '(file, protocol=None, fix_imports=True)')
-        self.assertEqual(str(inspect.signature(P2)),
-                         '(file, protocol=None, fix_imports=True)')
+        self.assertEqual(str(inspect.signature(P)), expected)
+        self.assertEqual(str(inspect.signature(P2)), expected)
 
         class P3(P2):
             def __init__(self, spam):
index 435c248802d3d7e537334b480e34770fa579b7b9..5f7a879b935d922fac54296fb7eb18c31513518a 100644 (file)
@@ -57,9 +57,9 @@ class PyPicklerTests(AbstractPickleTests):
     pickler = pickle._Pickler
     unpickler = pickle._Unpickler
 
-    def dumps(self, arg, proto=None):
+    def dumps(self, arg, proto=None, **kwargs):
         f = io.BytesIO()
-        p = self.pickler(f, proto)
+        p = self.pickler(f, proto, **kwargs)
         p.dump(arg)
         f.seek(0)
         return bytes(f.read())
@@ -78,8 +78,8 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
                         AttributeError, ValueError,
                         struct.error, IndexError, ImportError)
 
-    def dumps(self, arg, protocol=None):
-        return pickle.dumps(arg, protocol)
+    def dumps(self, arg, protocol=None, **kwargs):
+        return pickle.dumps(arg, protocol, **kwargs)
 
     def loads(self, buf, **kwds):
         return pickle.loads(buf, **kwds)
@@ -271,7 +271,7 @@ if has_c_implementation:
         check_sizeof = support.check_sizeof
 
         def test_pickler(self):
-            basesize = support.calcobjsize('6P2n3i2n3i2P')
+            basesize = support.calcobjsize('7P2n3i2n3i2P')
             p = _pickle.Pickler(io.BytesIO())
             self.assertEqual(object.__sizeof__(p), basesize)
             MT_size = struct.calcsize('3nP0n')
@@ -288,7 +288,7 @@ if has_c_implementation:
                 0)  # Write buffer is cleared after every dump().
 
         def test_unpickler(self):
-            basesize = support.calcobjsize('2P2n2P 2P2n2i5P 2P3n6P2n2i')
+            basesize = support.calcobjsize('2P2n2P 2P2n2i5P 2P3n8P2n2i')
             unpickler = _pickle.Unpickler
             P = struct.calcsize('P')  # Size of memo table entry.
             n = struct.calcsize('n')  # Size of mark table entry.
diff --git a/Lib/test/test_picklebuffer.py b/Lib/test/test_picklebuffer.py
new file mode 100644 (file)
index 0000000..7e72157
--- /dev/null
@@ -0,0 +1,154 @@
+"""Unit tests for the PickleBuffer object.
+
+Pickling tests themselves are in pickletester.py.
+"""
+
+import gc
+from pickle import PickleBuffer
+import sys
+import weakref
+import unittest
+
+from test import support
+
+
+class B(bytes):
+    pass
+
+
+class PickleBufferTest(unittest.TestCase):
+
+    def check_memoryview(self, pb, equiv):
+        with memoryview(pb) as m:
+            with memoryview(equiv) as expected:
+                self.assertEqual(m.nbytes, expected.nbytes)
+                self.assertEqual(m.readonly, expected.readonly)
+                self.assertEqual(m.itemsize, expected.itemsize)
+                self.assertEqual(m.shape, expected.shape)
+                self.assertEqual(m.strides, expected.strides)
+                self.assertEqual(m.c_contiguous, expected.c_contiguous)
+                self.assertEqual(m.f_contiguous, expected.f_contiguous)
+                self.assertEqual(m.format, expected.format)
+                self.assertEqual(m.tobytes(), expected.tobytes())
+
+    def test_constructor_failure(self):
+        with self.assertRaises(TypeError):
+            PickleBuffer()
+        with self.assertRaises(TypeError):
+            PickleBuffer("foo")
+        # Released memoryview fails taking a buffer
+        m = memoryview(b"foo")
+        m.release()
+        with self.assertRaises(ValueError):
+            PickleBuffer(m)
+
+    def test_basics(self):
+        pb = PickleBuffer(b"foo")
+        self.assertEqual(b"foo", bytes(pb))
+        with memoryview(pb) as m:
+            self.assertTrue(m.readonly)
+
+        pb = PickleBuffer(bytearray(b"foo"))
+        self.assertEqual(b"foo", bytes(pb))
+        with memoryview(pb) as m:
+            self.assertFalse(m.readonly)
+            m[0] = 48
+        self.assertEqual(b"0oo", bytes(pb))
+
+    def test_release(self):
+        pb = PickleBuffer(b"foo")
+        pb.release()
+        with self.assertRaises(ValueError) as raises:
+            memoryview(pb)
+        self.assertIn("operation forbidden on released PickleBuffer object",
+                      str(raises.exception))
+        # Idempotency
+        pb.release()
+
+    def test_cycle(self):
+        b = B(b"foo")
+        pb = PickleBuffer(b)
+        b.cycle = pb
+        wpb = weakref.ref(pb)
+        del b, pb
+        gc.collect()
+        self.assertIsNone(wpb())
+
+    def test_ndarray_2d(self):
+        # C-contiguous
+        ndarray = support.import_module("_testbuffer").ndarray
+        arr = ndarray(list(range(12)), shape=(4, 3), format='<i')
+        self.assertTrue(arr.c_contiguous)
+        self.assertFalse(arr.f_contiguous)
+        pb = PickleBuffer(arr)
+        self.check_memoryview(pb, arr)
+        # Non-contiguous
+        arr = arr[::2]
+        self.assertFalse(arr.c_contiguous)
+        self.assertFalse(arr.f_contiguous)
+        pb = PickleBuffer(arr)
+        self.check_memoryview(pb, arr)
+        # F-contiguous
+        arr = ndarray(list(range(12)), shape=(3, 4), strides=(4, 12), format='<i')
+        self.assertTrue(arr.f_contiguous)
+        self.assertFalse(arr.c_contiguous)
+        pb = PickleBuffer(arr)
+        self.check_memoryview(pb, arr)
+
+    # Tests for PickleBuffer.raw()
+
+    def check_raw(self, obj, equiv):
+        pb = PickleBuffer(obj)
+        with pb.raw() as m:
+            self.assertIsInstance(m, memoryview)
+            self.check_memoryview(m, equiv)
+
+    def test_raw(self):
+        for obj in (b"foo", bytearray(b"foo")):
+            with self.subTest(obj=obj):
+                self.check_raw(obj, obj)
+
+    def test_raw_ndarray(self):
+        # 1-D, contiguous
+        ndarray = support.import_module("_testbuffer").ndarray
+        arr = ndarray(list(range(3)), shape=(3,), format='<h')
+        equiv = b"\x00\x00\x01\x00\x02\x00"
+        self.check_raw(arr, equiv)
+        # 2-D, C-contiguous
+        arr = ndarray(list(range(6)), shape=(2, 3), format='<h')
+        equiv = b"\x00\x00\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00"
+        self.check_raw(arr, equiv)
+        # 2-D, F-contiguous
+        arr = ndarray(list(range(6)), shape=(2, 3), strides=(2, 4),
+                      format='<h')
+        # Note this is different from arr.tobytes()
+        equiv = b"\x00\x00\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00"
+        self.check_raw(arr, equiv)
+        # 0-D
+        arr = ndarray(456, shape=(), format='<i')
+        equiv = b'\xc8\x01\x00\x00'
+        self.check_raw(arr, equiv)
+
+    def check_raw_non_contiguous(self, obj):
+        pb = PickleBuffer(obj)
+        with self.assertRaisesRegex(BufferError, "non-contiguous"):
+            pb.raw()
+
+    def test_raw_non_contiguous(self):
+        # 1-D
+        ndarray = support.import_module("_testbuffer").ndarray
+        arr = ndarray(list(range(6)), shape=(6,), format='<i')[::2]
+        self.check_raw_non_contiguous(arr)
+        # 2-D
+        arr = ndarray(list(range(12)), shape=(4, 3), format='<i')[::2]
+        self.check_raw_non_contiguous(arr)
+
+    def test_raw_released(self):
+        pb = PickleBuffer(b"foo")
+        pb.release()
+        with self.assertRaises(ValueError) as raises:
+            pb.raw()
+
+
+if __name__ == "__main__":
+    unittest.main()
index e40a958f7ebaefb59827498f2bd3a81a39109031..8cc6ca58cd04b43f793793054d99a2b87db4aafd 100644 (file)
@@ -6,8 +6,8 @@ import unittest
 
 class OptimizedPickleTests(AbstractPickleTests):
 
-    def dumps(self, arg, proto=None):
-        return pickletools.optimize(pickle.dumps(arg, proto))
+    def dumps(self, arg, proto=None, **kwargs):
+        return pickletools.optimize(pickle.dumps(arg, proto, **kwargs))
 
     def loads(self, buf, **kwds):
         return pickle.loads(buf, **kwds)
@@ -71,23 +71,24 @@ class MiscTestCase(unittest.TestCase):
                      'read_uint8', 'read_stringnl', 'read_stringnl_noescape',
                      'read_stringnl_noescape_pair', 'read_string1',
                      'read_string4', 'read_bytes1', 'read_bytes4',
-                     'read_bytes8', 'read_unicodestringnl',
+                     'read_bytes8', 'read_bytearray8', 'read_unicodestringnl',
                      'read_unicodestring1', 'read_unicodestring4',
                      'read_unicodestring8', 'read_decimalnl_short',
                      'read_decimalnl_long', 'read_floatnl', 'read_float8',
                      'read_long1', 'read_long4',
                      'uint1', 'uint2', 'int4', 'uint4', 'uint8', 'stringnl',
                      'stringnl_noescape', 'stringnl_noescape_pair', 'string1',
-                     'string4', 'bytes1', 'bytes4', 'bytes8',
+                     'string4', 'bytes1', 'bytes4', 'bytes8', 'bytearray8',
                      'unicodestringnl', 'unicodestring1', 'unicodestring4',
                      'unicodestring8', 'decimalnl_short', 'decimalnl_long',
                      'floatnl', 'float8', 'long1', 'long4',
                      'StackObject',
                      'pyint', 'pylong', 'pyinteger_or_bool', 'pybool', 'pyfloat',
-                     'pybytes_or_str', 'pystring', 'pybytes', 'pyunicode',
-                     'pynone', 'pytuple', 'pylist', 'pydict', 'pyset',
-                     'pyfrozenset', 'anyobject', 'markobject', 'stackslice',
-                     'OpcodeInfo', 'opcodes', 'code2op',
+                     'pybytes_or_str', 'pystring', 'pybytes', 'pybytearray',
+                     'pyunicode', 'pynone', 'pytuple', 'pylist', 'pydict',
+                     'pyset', 'pyfrozenset', 'pybuffer', 'anyobject',
+                     'markobject', 'stackslice', 'OpcodeInfo', 'opcodes',
+                     'code2op',
                      }
         support.check__all__(self, pickletools, blacklist=blacklist)
 
index 839c58f0fde5be1424bd47c6e0ed836ec91f9fe5..0b3934f6226e3673508f2613cf9a855a7da5aa3e 100644 (file)
@@ -224,7 +224,7 @@ class PyclbrTest(TestCase):
         # These were once about the 10 longest modules
         cm('random', ignore=('Random',))  # from _random import Random as CoreGenerator
         cm('cgi', ignore=('log',))      # set with = in module
-        cm('pickle', ignore=('partial',))
+        cm('pickle', ignore=('partial', 'PickleBuffer'))
         # TODO(briancurtin): openfp is deprecated as of 3.7.
         # Update this once it has been removed.
         cm('aifc', ignore=('openfp', '_aifc_params'))  # set with = in module
index ee94ad54a272e543aed338e4b5d0b3d03e6db6b1..8071a94dd6ff160574b9c2565248d22b2580ca99 100644 (file)
@@ -382,6 +382,7 @@ OBJECT_OBJS=        \
                Objects/bytearrayobject.o \
                Objects/bytesobject.o \
                Objects/call.o \
+               Objects/capsule.o \
                Objects/cellobject.o \
                Objects/classobject.o \
                Objects/codeobject.o \
@@ -406,7 +407,7 @@ OBJECT_OBJS=        \
                Objects/namespaceobject.o \
                Objects/object.o \
                Objects/obmalloc.o \
-               Objects/capsule.o \
+               Objects/picklebufobject.o \
                Objects/rangeobject.o \
                Objects/setobject.o \
                Objects/sliceobject.o \
@@ -1009,6 +1010,7 @@ PYTHON_HEADERS= \
                $(srcdir)/Include/osdefs.h \
                $(srcdir)/Include/osmodule.h \
                $(srcdir)/Include/patchlevel.h \
+               $(srcdir)/Include/picklebufobject.h \
                $(srcdir)/Include/pyarena.h \
                $(srcdir)/Include/pycapsule.h \
                $(srcdir)/Include/pyctype.h \
diff --git a/Misc/NEWS.d/next/Library/2019-05-03-20-47-55.bpo-36785.PQLnPq.rst b/Misc/NEWS.d/next/Library/2019-05-03-20-47-55.bpo-36785.PQLnPq.rst
new file mode 100644 (file)
index 0000000..0a86054
--- /dev/null
@@ -0,0 +1 @@
+Implement PEP 574 (pickle protocol 5 with out-of-band buffers).
index 24a5d22770127412cfb06e249226e59cb365010b..a3f02ae8813d9109beb61d67e3a1373663547b25 100644 (file)
@@ -27,7 +27,7 @@ class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoPro
    Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
    already includes it. */
 enum {
-    HIGHEST_PROTOCOL = 4,
+    HIGHEST_PROTOCOL = 5,
     DEFAULT_PROTOCOL = 4
 };
 
@@ -104,7 +104,12 @@ enum opcode {
     NEWOBJ_EX        = '\x92',
     STACK_GLOBAL     = '\x93',
     MEMOIZE          = '\x94',
-    FRAME            = '\x95'
+    FRAME            = '\x95',
+
+    /* Protocol 5 */
+    BYTEARRAY8       = '\x96',
+    NEXT_BUFFER      = '\x97',
+    READONLY_BUFFER  = '\x98'
 };
 
 enum {
@@ -643,6 +648,7 @@ typedef struct PicklerObject {
     int fix_imports;            /* Indicate whether Pickler should fix
                                    the name of globals for Python 2.x. */
     PyObject *fast_memo;
+    PyObject *buffer_callback;  /* Callback for out-of-band buffers, or NULL */
 } PicklerObject;
 
 typedef struct UnpicklerObject {
@@ -667,8 +673,10 @@ typedef struct UnpicklerObject {
     Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
 
     PyObject *read;             /* read() method of the input stream. */
+    PyObject *readinto;         /* readinto() method of the input stream. */
     PyObject *readline;         /* readline() method of the input stream. */
     PyObject *peek;             /* peek() method of the input stream, or NULL */
+    PyObject *buffers;          /* iterable of out-of-band buffers, or NULL */
 
     char *encoding;             /* Name of the encoding to be used for
                                    decoding strings pickled using Python
@@ -1102,6 +1110,7 @@ _Pickler_New(void)
 
     self->pers_func = NULL;
     self->dispatch_table = NULL;
+    self->buffer_callback = NULL;
     self->write = NULL;
     self->proto = 0;
     self->bin = 0;
@@ -1174,6 +1183,23 @@ _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
     return 0;
 }
 
+static int
+_Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
+{
+    if (buffer_callback == Py_None) {
+        buffer_callback = NULL;
+    }
+    if (buffer_callback != NULL && self->proto < 5) {
+        PyErr_SetString(PyExc_ValueError,
+                        "buffer_callback needs protocol >= 5");
+        return -1;
+    }
+
+    Py_XINCREF(buffer_callback);
+    self->buffer_callback = buffer_callback;
+    return 0;
+}
+
 /* Returns the size of the input on success, -1 on failure. This takes its
    own reference to `input`. */
 static Py_ssize_t
@@ -1198,6 +1224,7 @@ bad_readline(void)
     return -1;
 }
 
+/* Skip any consumed data that was only prefetched using peek() */
 static int
 _Unpickler_SkipConsumed(UnpicklerObject *self)
 {
@@ -1305,6 +1332,7 @@ _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
     if (!self->read)
         return bad_readline();
 
+    /* Extend the buffer to satisfy desired size */
     num_read = _Unpickler_ReadFromFile(self, n);
     if (num_read < 0)
         return -1;
@@ -1315,6 +1343,66 @@ _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
     return n;
 }
 
+/* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
+ *
+ * This should only be used for non-small data reads where potentially
+ * avoiding a copy is beneficial.  This method does not try to prefetch
+ * more data into the input buffer.
+ *
+ * _Unpickler_Read() is recommended in most cases.
+ */
+static Py_ssize_t
+_Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
+{
+    assert(n != READ_WHOLE_LINE);
+
+    /* Read from available buffer data, if any */
+    Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
+    if (in_buffer > 0) {
+        Py_ssize_t to_read = Py_MIN(in_buffer, n);
+        memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
+        self->next_read_idx += to_read;
+        buf += to_read;
+        n -= to_read;
+        if (n == 0) {
+            /* Entire read was satisfied from buffer */
+            return n;
+        }
+    }
+
+    /* Read from file */
+    if (!self->readinto) {
+        return bad_readline();
+    }
+    if (_Unpickler_SkipConsumed(self) < 0) {
+        return -1;
+    }
+
+    /* Call readinto() into user buffer */
+    PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
+    if (buf_obj == NULL) {
+        return -1;
+    }
+    PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
+    if (read_size_obj == NULL) {
+        return -1;
+    }
+    Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
+    Py_DECREF(read_size_obj);
+
+    if (read_size < 0) {
+        if (!PyErr_Occurred()) {
+            PyErr_SetString(PyExc_ValueError,
+                            "readinto() returned negative size");
+        }
+        return -1;
+    }
+    if (read_size < n) {
+        return bad_readline();
+    }
+    return n;
+}
+
 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
 
    This should be used for all data reads, rather than accessing the unpickler's
@@ -1482,8 +1570,10 @@ _Unpickler_New(void)
     self->next_read_idx = 0;
     self->prefetched_idx = 0;
     self->read = NULL;
+    self->readinto = NULL;
     self->readline = NULL;
     self->peek = NULL;
+    self->buffers = NULL;
     self->encoding = NULL;
     self->errors = NULL;
     self->marks = NULL;
@@ -1507,25 +1597,29 @@ _Unpickler_New(void)
 }
 
 /* Returns -1 (with an exception set) on failure, 0 on success. This may
-   be called once on a freshly created Pickler. */
+   be called once on a freshly created Unpickler. */
 static int
 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
 {
     _Py_IDENTIFIER(peek);
     _Py_IDENTIFIER(read);
+    _Py_IDENTIFIER(readinto);
     _Py_IDENTIFIER(readline);
 
     if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
         return -1;
     }
     (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
+    (void)_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto);
     (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
-    if (self->readline == NULL || self->read == NULL) {
+    if (!self->readline || !self->readinto || !self->read) {
         if (!PyErr_Occurred()) {
             PyErr_SetString(PyExc_TypeError,
-                            "file must have 'read' and 'readline' attributes");
+                            "file must have 'read', 'readinto' and "
+                            "'readline' attributes");
         }
         Py_CLEAR(self->read);
+        Py_CLEAR(self->readinto);
         Py_CLEAR(self->readline);
         Py_CLEAR(self->peek);
         return -1;
@@ -1534,7 +1628,7 @@ _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
 }
 
 /* Returns -1 (with an exception set) on failure, 0 on success. This may
-   be called once on a freshly created Pickler. */
+   be called once on a freshly created Unpickler. */
 static int
 _Unpickler_SetInputEncoding(UnpicklerObject *self,
                             const char *encoding,
@@ -1554,6 +1648,23 @@ _Unpickler_SetInputEncoding(UnpicklerObject *self,
     return 0;
 }
 
+/* Returns -1 (with an exception set) on failure, 0 on success. This may
+   be called once on a freshly created Unpickler. */
+static int
+_Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
+{
+    if (buffers == NULL) {
+        self->buffers = NULL;
+    }
+    else {
+        self->buffers = PyObject_GetIter(buffers);
+        if (self->buffers == NULL) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
 /* Generate a GET opcode for an object stored in the memo. */
 static int
 memo_get(PicklerObject *self, PyObject *key)
@@ -2209,6 +2320,54 @@ _Pickler_write_bytes(PicklerObject *self,
     return 0;
 }
 
+static int
+_save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
+                 Py_ssize_t size)
+{
+    assert(self->proto >= 3);
+
+    char header[9];
+    Py_ssize_t len;
+
+    if (size < 0)
+        return -1;
+
+    if (size <= 0xff) {
+        header[0] = SHORT_BINBYTES;
+        header[1] = (unsigned char)size;
+        len = 2;
+    }
+    else if ((size_t)size <= 0xffffffffUL) {
+        header[0] = BINBYTES;
+        header[1] = (unsigned char)(size & 0xff);
+        header[2] = (unsigned char)((size >> 8) & 0xff);
+        header[3] = (unsigned char)((size >> 16) & 0xff);
+        header[4] = (unsigned char)((size >> 24) & 0xff);
+        len = 5;
+    }
+    else if (self->proto >= 4) {
+        header[0] = BINBYTES8;
+        _write_size64(header + 1, size);
+        len = 9;
+    }
+    else {
+        PyErr_SetString(PyExc_OverflowError,
+                        "serializing a bytes object larger than 4 GiB "
+                        "requires pickle protocol 4 or higher");
+        return -1;
+    }
+
+    if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
+        return -1;
+    }
+
+    if (memo_put(self, obj) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
 static int
 save_bytes(PicklerObject *self, PyObject *obj)
 {
@@ -2255,49 +2414,132 @@ save_bytes(PicklerObject *self, PyObject *obj)
         return status;
     }
     else {
-        Py_ssize_t size;
-        char header[9];
-        Py_ssize_t len;
+        return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
+                                PyBytes_GET_SIZE(obj));
+    }
+}
 
-        size = PyBytes_GET_SIZE(obj);
-        if (size < 0)
+static int
+_save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
+                     Py_ssize_t size)
+{
+    assert(self->proto >= 5);
+
+    char header[9];
+    Py_ssize_t len;
+
+    if (size < 0)
+        return -1;
+
+    header[0] = BYTEARRAY8;
+    _write_size64(header + 1, size);
+    len = 9;
+
+    if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
+        return -1;
+    }
+
+    if (memo_put(self, obj) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int
+save_bytearray(PicklerObject *self, PyObject *obj)
+{
+    if (self->proto < 5) {
+        /* Older pickle protocols do not have an opcode for pickling
+         * bytearrays. */
+        PyObject *reduce_value = NULL;
+        int status;
+
+        if (PyByteArray_GET_SIZE(obj) == 0) {
+            reduce_value = Py_BuildValue("(O())",
+                                         (PyObject *) &PyByteArray_Type);
+        }
+        else {
+            PyObject *bytes_obj = PyBytes_FromObject(obj);
+            if (bytes_obj != NULL) {
+                reduce_value = Py_BuildValue("(O(O))",
+                                             (PyObject *) &PyByteArray_Type,
+                                             bytes_obj);
+                Py_DECREF(bytes_obj);
+            }
+        }
+        if (reduce_value == NULL)
             return -1;
 
-        if (size <= 0xff) {
-            header[0] = SHORT_BINBYTES;
-            header[1] = (unsigned char)size;
-            len = 2;
+        /* save_reduce() will memoize the object automatically. */
+        status = save_reduce(self, reduce_value, obj);
+        Py_DECREF(reduce_value);
+        return status;
+    }
+    else {
+        return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
+                                    PyByteArray_GET_SIZE(obj));
+    }
+}
+
+static int
+save_picklebuffer(PicklerObject *self, PyObject *obj)
+{
+    if (self->proto < 5) {
+        PickleState *st = _Pickle_GetGlobalState();
+        PyErr_SetString(st->PicklingError,
+                        "PickleBuffer can only pickled with protocol >= 5");
+        return -1;
+    }
+    const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
+    if (view == NULL) {
+        return -1;
+    }
+    if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
+        PickleState *st = _Pickle_GetGlobalState();
+        PyErr_SetString(st->PicklingError,
+                        "PickleBuffer can not be pickled when "
+                        "pointing to a non-contiguous buffer");
+        return -1;
+    }
+    int in_band = 1;
+    if (self->buffer_callback != NULL) {
+        PyObject *ret = PyObject_CallFunctionObjArgs(self->buffer_callback,
+                                                     obj, NULL);
+        if (ret == NULL) {
+            return -1;
         }
-        else if ((size_t)size <= 0xffffffffUL) {
-            header[0] = BINBYTES;
-            header[1] = (unsigned char)(size & 0xff);
-            header[2] = (unsigned char)((size >> 8) & 0xff);
-            header[3] = (unsigned char)((size >> 16) & 0xff);
-            header[4] = (unsigned char)((size >> 24) & 0xff);
-            len = 5;
+        in_band = PyObject_IsTrue(ret);
+        Py_DECREF(ret);
+        if (in_band == -1) {
+            return -1;
         }
-        else if (self->proto >= 4) {
-            header[0] = BINBYTES8;
-            _write_size64(header + 1, size);
-            len = 9;
+    }
+    if (in_band) {
+        /* Write data in-band */
+        if (view->readonly) {
+            return _save_bytes_data(self, obj, (const char*) view->buf,
+                                    view->len);
         }
         else {
-            PyErr_SetString(PyExc_OverflowError,
-                            "cannot serialize a bytes object larger than 4 GiB");
-            return -1;          /* string too large */
+            return _save_bytearray_data(self, obj, (const char*) view->buf,
+                                        view->len);
         }
-
-        if (_Pickler_write_bytes(self, header, len,
-                                 PyBytes_AS_STRING(obj), size, obj) < 0)
-        {
+    }
+    else {
+        /* Write data out-of-band */
+        const char next_buffer_op = NEXT_BUFFER;
+        if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
             return -1;
         }
-
-        if (memo_put(self, obj) < 0)
-            return -1;
-
-        return 0;
+        if (view->readonly) {
+            const char readonly_buffer_op = READONLY_BUFFER;
+            if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
+                return -1;
+            }
+        }
     }
+    return 0;
 }
 
 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
@@ -2417,7 +2659,8 @@ write_unicode_binary(PicklerObject *self, PyObject *obj)
     }
     else {
         PyErr_SetString(PyExc_OverflowError,
-                        "cannot serialize a string larger than 4GiB");
+                        "serializing a string larger than 4 GiB "
+                        "requires pickle protocol 4 or higher");
         Py_XDECREF(encoded);
         return -1;
     }
@@ -4062,6 +4305,14 @@ save(PicklerObject *self, PyObject *obj, int pers_save)
         status = save_tuple(self, obj);
         goto done;
     }
+    else if (type == &PyByteArray_Type) {
+        status = save_bytearray(self, obj);
+        goto done;
+    }
+    else if (type == &PyPickleBuffer_Type) {
+        status = save_picklebuffer(self, obj);
+        goto done;
+    }
 
     /* Now, check reducer_override.  If it returns NotImplemented,
      * fallback to save_type or save_global, and then perhaps to the
@@ -4342,6 +4593,7 @@ Pickler_dealloc(PicklerObject *self)
     Py_XDECREF(self->dispatch_table);
     Py_XDECREF(self->fast_memo);
     Py_XDECREF(self->reducer_override);
+    Py_XDECREF(self->buffer_callback);
 
     PyMemoTable_Del(self->memo);
 
@@ -4356,6 +4608,7 @@ Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
     Py_VISIT(self->dispatch_table);
     Py_VISIT(self->fast_memo);
     Py_VISIT(self->reducer_override);
+    Py_VISIT(self->buffer_callback);
     return 0;
 }
 
@@ -4368,6 +4621,7 @@ Pickler_clear(PicklerObject *self)
     Py_CLEAR(self->dispatch_table);
     Py_CLEAR(self->fast_memo);
     Py_CLEAR(self->reducer_override);
+    Py_CLEAR(self->buffer_callback);
 
     if (self->memo != NULL) {
         PyMemoTable *memo = self->memo;
@@ -4385,6 +4639,7 @@ _pickle.Pickler.__init__
   file: object
   protocol: object = NULL
   fix_imports: bool = True
+  buffer_callback: object = NULL
 
 This takes a binary file for writing a pickle data stream.
 
@@ -4404,12 +4659,25 @@ this interface.
 If *fix_imports* is True and protocol is less than 3, pickle will try
 to map the new Python 3 names to the old module names used in Python
 2, so that the pickle data stream is readable with Python 2.
+
+If *buffer_callback* is None (the default), buffer views are
+serialized into *file* as part of the pickle stream.
+
+If *buffer_callback* is not None, then it can be called any number
+of times with a buffer view.  If the callback returns a false value
+(such as None), the given buffer is out-of-band; otherwise the
+buffer is serialized in-band, i.e. inside the pickle stream.
+
+It is an error if *buffer_callback* is not None and *protocol*
+is None or smaller than 5.
+
 [clinic start generated code]*/
 
 static int
 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
-                              PyObject *protocol, int fix_imports)
-/*[clinic end generated code: output=b5f31078dab17fb0 input=4faabdbc763c2389]*/
+                              PyObject *protocol, int fix_imports,
+                              PyObject *buffer_callback)
+/*[clinic end generated code: output=0abedc50590d259b input=9a43a1c50ab91652]*/
 {
     _Py_IDENTIFIER(persistent_id);
     _Py_IDENTIFIER(dispatch_table);
@@ -4424,6 +4692,9 @@ _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
     if (_Pickler_SetOutputStream(self, file) < 0)
         return -1;
 
+    if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
+        return -1;
+
     /* memo and output_buffer may have already been created in _Pickler_New */
     if (self->memo == NULL) {
         self->memo = PyMemoTable_New();
@@ -5212,17 +5483,100 @@ load_counted_binbytes(UnpicklerObject *self, int nbytes)
         return -1;
     }
 
-    if (_Unpickler_Read(self, &s, size) < 0)
-        return -1;
-
-    bytes = PyBytes_FromStringAndSize(s, size);
+    bytes = PyBytes_FromStringAndSize(NULL, size);
     if (bytes == NULL)
         return -1;
+    if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
+        Py_DECREF(bytes);
+        return -1;
+    }
 
     PDATA_PUSH(self->stack, bytes, -1);
     return 0;
 }
 
+static int
+load_counted_bytearray(UnpicklerObject *self)
+{
+    PyObject *bytearray;
+    Py_ssize_t size;
+    char *s;
+
+    if (_Unpickler_Read(self, &s, 8) < 0) {
+        return -1;
+    }
+
+    size = calc_binsize(s, 8);
+    if (size < 0) {
+        PyErr_Format(PyExc_OverflowError,
+                     "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
+                     PY_SSIZE_T_MAX);
+        return -1;
+    }
+
+    bytearray = PyByteArray_FromStringAndSize(NULL, size);
+    if (bytearray == NULL) {
+        return -1;
+    }
+    if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
+        Py_DECREF(bytearray);
+        return -1;
+    }
+
+    PDATA_PUSH(self->stack, bytearray, -1);
+    return 0;
+}
+
+static int
+load_next_buffer(UnpicklerObject *self)
+{
+    if (self->buffers == NULL) {
+        PickleState *st = _Pickle_GetGlobalState();
+        PyErr_SetString(st->UnpicklingError,
+                        "pickle stream refers to out-of-band data "
+                        "but no *buffers* argument was given");
+        return -1;
+    }
+    PyObject *buf = PyIter_Next(self->buffers);
+    if (buf == NULL) {
+        if (!PyErr_Occurred()) {
+            PickleState *st = _Pickle_GetGlobalState();
+            PyErr_SetString(st->UnpicklingError,
+                            "not enough out-of-band buffers");
+        }
+        return -1;
+    }
+
+    PDATA_PUSH(self->stack, buf, -1);
+    return 0;
+}
+
+static int
+load_readonly_buffer(UnpicklerObject *self)
+{
+    Py_ssize_t len = Py_SIZE(self->stack);
+    if (len <= self->stack->fence) {
+        return Pdata_stack_underflow(self->stack);
+    }
+
+    PyObject *obj = self->stack->data[len - 1];
+    PyObject *view = PyMemoryView_FromObject(obj);
+    if (view == NULL) {
+        return -1;
+    }
+    if (!PyMemoryView_GET_BUFFER(view)->readonly) {
+        /* Original object is writable */
+        PyMemoryView_GET_BUFFER(view)->readonly = 1;
+        self->stack->data[len - 1] = view;
+        Py_DECREF(obj);
+    }
+    else {
+        /* Original object is read-only, no need to replace it */
+        Py_DECREF(view);
+    }
+    return 0;
+}
+
 static int
 load_unicode(UnpicklerObject *self)
 {
@@ -6511,6 +6865,9 @@ load(UnpicklerObject *self)
         OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
         OP_ARG(BINBYTES, load_counted_binbytes, 4)
         OP_ARG(BINBYTES8, load_counted_binbytes, 8)
+        OP(BYTEARRAY8, load_counted_bytearray)
+        OP(NEXT_BUFFER, load_next_buffer)
+        OP(READONLY_BUFFER, load_readonly_buffer)
         OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
         OP_ARG(BINSTRING, load_counted_binstring, 4)
         OP(STRING, load_string)
@@ -6771,10 +7128,12 @@ Unpickler_dealloc(UnpicklerObject *self)
 {
     PyObject_GC_UnTrack((PyObject *)self);
     Py_XDECREF(self->readline);
+    Py_XDECREF(self->readinto);
     Py_XDECREF(self->read);
     Py_XDECREF(self->peek);
     Py_XDECREF(self->stack);
     Py_XDECREF(self->pers_func);
+    Py_XDECREF(self->buffers);
     if (self->buffer.buf != NULL) {
         PyBuffer_Release(&self->buffer);
         self->buffer.buf = NULL;
@@ -6793,10 +7152,12 @@ static int
 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
 {
     Py_VISIT(self->readline);
+    Py_VISIT(self->readinto);
     Py_VISIT(self->read);
     Py_VISIT(self->peek);
     Py_VISIT(self->stack);
     Py_VISIT(self->pers_func);
+    Py_VISIT(self->buffers);
     return 0;
 }
 
@@ -6804,10 +7165,12 @@ static int
 Unpickler_clear(UnpicklerObject *self)
 {
     Py_CLEAR(self->readline);
+    Py_CLEAR(self->readinto);
     Py_CLEAR(self->read);
     Py_CLEAR(self->peek);
     Py_CLEAR(self->stack);
     Py_CLEAR(self->pers_func);
+    Py_CLEAR(self->buffers);
     if (self->buffer.buf != NULL) {
         PyBuffer_Release(&self->buffer);
         self->buffer.buf = NULL;
@@ -6835,6 +7198,7 @@ _pickle.Unpickler.__init__
   fix_imports: bool = True
   encoding: str = 'ASCII'
   errors: str = 'strict'
+  buffers: object = NULL
 
 This takes a binary file for reading a pickle data stream.
 
@@ -6861,8 +7225,8 @@ string instances as bytes objects.
 static int
 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
                                 int fix_imports, const char *encoding,
-                                const char *errors)
-/*[clinic end generated code: output=e2c8ce748edc57b0 input=f9b7da04f5f4f335]*/
+                                const char *errors, PyObject *buffers)
+/*[clinic end generated code: output=09f0192649ea3f85 input=da4b62d9edb68700]*/
 {
     _Py_IDENTIFIER(persistent_load);
 
@@ -6876,6 +7240,9 @@ _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
     if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
         return -1;
 
+    if (_Unpickler_SetBuffers(self, buffers) < 0)
+        return -1;
+
     self->fix_imports = fix_imports;
 
     if (init_method_ref((PyObject *)self, &PyId_persistent_load,
@@ -7254,6 +7621,7 @@ _pickle.dump
   protocol: object = NULL
   *
   fix_imports: bool = True
+  buffer_callback: object = NULL
 
 Write a pickled representation of obj to the open file object file.
 
@@ -7277,12 +7645,18 @@ this interface.
 If *fix_imports* is True and protocol is less than 3, pickle will try
 to map the new Python 3 names to the old module names used in Python
 2, so that the pickle data stream is readable with Python 2.
+
+If *buffer_callback* is None (the default), buffer views are serialized
+into *file* as part of the pickle stream.  It is an error if
+*buffer_callback* is not None and *protocol* is None or smaller than 5.
+
 [clinic start generated code]*/
 
 static PyObject *
 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
-                  PyObject *protocol, int fix_imports)
-/*[clinic end generated code: output=a4774d5fde7d34de input=93f1408489a87472]*/
+                  PyObject *protocol, int fix_imports,
+                  PyObject *buffer_callback)
+/*[clinic end generated code: output=706186dba996490c input=2f035f02cc0f9547]*/
 {
     PicklerObject *pickler = _Pickler_New();
 
@@ -7295,6 +7669,9 @@ _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
     if (_Pickler_SetOutputStream(pickler, file) < 0)
         goto error;
 
+    if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
+        goto error;
+
     if (dump(pickler, obj) < 0)
         goto error;
 
@@ -7317,6 +7694,7 @@ _pickle.dumps
   protocol: object = NULL
   *
   fix_imports: bool = True
+  buffer_callback: object = NULL
 
 Return the pickled representation of the object as a bytes object.
 
@@ -7332,12 +7710,17 @@ version of Python needed to read the pickle produced.
 If *fix_imports* is True and *protocol* is less than 3, pickle will
 try to map the new Python 3 names to the old module names used in
 Python 2, so that the pickle data stream is readable with Python 2.
+
+If *buffer_callback* is None (the default), buffer views are serialized
+into *file* as part of the pickle stream.  It is an error if
+*buffer_callback* is not None and *protocol* is None or smaller than 5.
+
 [clinic start generated code]*/
 
 static PyObject *
 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
-                   int fix_imports)
-/*[clinic end generated code: output=d75d5cda456fd261 input=b6efb45a7d19b5ab]*/
+                   int fix_imports, PyObject *buffer_callback)
+/*[clinic end generated code: output=fbab0093a5580fdf input=001f167df711b9f1]*/
 {
     PyObject *result;
     PicklerObject *pickler = _Pickler_New();
@@ -7348,6 +7731,9 @@ _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
         goto error;
 
+    if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
+        goto error;
+
     if (dump(pickler, obj) < 0)
         goto error;
 
@@ -7369,6 +7755,7 @@ _pickle.load
   fix_imports: bool = True
   encoding: str = 'ASCII'
   errors: str = 'strict'
+  buffers: object = NULL
 
 Read and return an object from the pickle data stored in a file.
 
@@ -7397,8 +7784,9 @@ string instances as bytes objects.
 
 static PyObject *
 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
-                  const char *encoding, const char *errors)
-/*[clinic end generated code: output=69e298160285199e input=01b44dd3fc07afa7]*/
+                  const char *encoding, const char *errors,
+                  PyObject *buffers)
+/*[clinic end generated code: output=250452d141c23e76 input=29fae982fe778156]*/
 {
     PyObject *result;
     UnpicklerObject *unpickler = _Unpickler_New();
@@ -7412,6 +7800,9 @@ _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
         goto error;
 
+    if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
+        goto error;
+
     unpickler->fix_imports = fix_imports;
 
     result = load(unpickler);
@@ -7432,6 +7823,7 @@ _pickle.loads
   fix_imports: bool = True
   encoding: str = 'ASCII'
   errors: str = 'strict'
+  buffers: object = NULL
 
 Read and return an object from the given pickle data.
 
@@ -7451,8 +7843,9 @@ string instances as bytes objects.
 
 static PyObject *
 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
-                   const char *encoding, const char *errors)
-/*[clinic end generated code: output=1e7cb2343f2c440f input=70605948a719feb9]*/
+                   const char *encoding, const char *errors,
+                   PyObject *buffers)
+/*[clinic end generated code: output=82ac1e6b588e6d02 input=c6004393f8276867]*/
 {
     PyObject *result;
     UnpicklerObject *unpickler = _Unpickler_New();
@@ -7466,6 +7859,9 @@ _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
         goto error;
 
+    if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
+        goto error;
+
     unpickler->fix_imports = fix_imports;
 
     result = load(unpickler);
@@ -7558,12 +7954,17 @@ PyInit__pickle(void)
     if (m == NULL)
         return NULL;
 
+    /* Add types */
     Py_INCREF(&Pickler_Type);
     if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
         return NULL;
     Py_INCREF(&Unpickler_Type);
     if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
         return NULL;
+    Py_INCREF(&PyPickleBuffer_Type);
+    if (PyModule_AddObject(m, "PickleBuffer",
+                           (PyObject *)&PyPickleBuffer_Type) < 0)
+        return NULL;
 
     st = _Pickle_GetState(m);
 
index 1da2f936be4399ee7b465437a1a940fab0d40ba4..8ac723fd43a681d155516a6a596538cc4e765052 100644 (file)
@@ -63,7 +63,7 @@ exit:
 }
 
 PyDoc_STRVAR(_pickle_Pickler___init____doc__,
-"Pickler(file, protocol=None, fix_imports=True)\n"
+"Pickler(file, protocol=None, fix_imports=True, buffer_callback=None)\n"
 "--\n"
 "\n"
 "This takes a binary file for writing a pickle data stream.\n"
@@ -83,27 +83,40 @@ PyDoc_STRVAR(_pickle_Pickler___init____doc__,
 "\n"
 "If *fix_imports* is True and protocol is less than 3, pickle will try\n"
 "to map the new Python 3 names to the old module names used in Python\n"
-"2, so that the pickle data stream is readable with Python 2.");
+"2, so that the pickle data stream is readable with Python 2.\n"
+"\n"
+"If *buffer_callback* is None (the default), buffer views are\n"
+"serialized into *file* as part of the pickle stream.\n"
+"\n"
+"If *buffer_callback* is not None, then it can be called any number\n"
+"of times with a buffer view.  If the callback returns a false value\n"
+"(such as None), the given buffer is out-of-band; otherwise the\n"
+"buffer is serialized in-band, i.e. inside the pickle stream.\n"
+"\n"
+"It is an error if *buffer_callback* is not None and *protocol*\n"
+"is None or smaller than 5.");
 
 static int
 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
-                              PyObject *protocol, int fix_imports);
+                              PyObject *protocol, int fix_imports,
+                              PyObject *buffer_callback);
 
 static int
 _pickle_Pickler___init__(PyObject *self, PyObject *args, PyObject *kwargs)
 {
     int return_value = -1;
-    static const char * const _keywords[] = {"file", "protocol", "fix_imports", NULL};
+    static const char * const _keywords[] = {"file", "protocol", "fix_imports", "buffer_callback", NULL};
     static _PyArg_Parser _parser = {NULL, _keywords, "Pickler", 0};
-    PyObject *argsbuf[3];
+    PyObject *argsbuf[4];
     PyObject * const *fastargs;
     Py_ssize_t nargs = PyTuple_GET_SIZE(args);
     Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
     PyObject *file;
     PyObject *protocol = NULL;
     int fix_imports = 1;
+    PyObject *buffer_callback = NULL;
 
-    fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 3, 0, argsbuf);
+    fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 4, 0, argsbuf);
     if (!fastargs) {
         goto exit;
     }
@@ -117,12 +130,18 @@ _pickle_Pickler___init__(PyObject *self, PyObject *args, PyObject *kwargs)
             goto skip_optional_pos;
         }
     }
-    fix_imports = PyObject_IsTrue(fastargs[2]);
-    if (fix_imports < 0) {
-        goto exit;
+    if (fastargs[2]) {
+        fix_imports = PyObject_IsTrue(fastargs[2]);
+        if (fix_imports < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_pos;
+        }
     }
+    buffer_callback = fastargs[3];
 skip_optional_pos:
-    return_value = _pickle_Pickler___init___impl((PicklerObject *)self, file, protocol, fix_imports);
+    return_value = _pickle_Pickler___init___impl((PicklerObject *)self, file, protocol, fix_imports, buffer_callback);
 
 exit:
     return return_value;
@@ -272,7 +291,8 @@ exit:
 }
 
 PyDoc_STRVAR(_pickle_Unpickler___init____doc__,
-"Unpickler(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n"
+"Unpickler(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\',\n"
+"          buffers=None)\n"
 "--\n"
 "\n"
 "This takes a binary file for reading a pickle data stream.\n"
@@ -299,15 +319,15 @@ PyDoc_STRVAR(_pickle_Unpickler___init____doc__,
 static int
 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
                                 int fix_imports, const char *encoding,
-                                const char *errors);
+                                const char *errors, PyObject *buffers);
 
 static int
 _pickle_Unpickler___init__(PyObject *self, PyObject *args, PyObject *kwargs)
 {
     int return_value = -1;
-    static const char * const _keywords[] = {"file", "fix_imports", "encoding", "errors", NULL};
+    static const char * const _keywords[] = {"file", "fix_imports", "encoding", "errors", "buffers", NULL};
     static _PyArg_Parser _parser = {NULL, _keywords, "Unpickler", 0};
-    PyObject *argsbuf[4];
+    PyObject *argsbuf[5];
     PyObject * const *fastargs;
     Py_ssize_t nargs = PyTuple_GET_SIZE(args);
     Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
@@ -315,6 +335,7 @@ _pickle_Unpickler___init__(PyObject *self, PyObject *args, PyObject *kwargs)
     int fix_imports = 1;
     const char *encoding = "ASCII";
     const char *errors = "strict";
+    PyObject *buffers = NULL;
 
     fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 0, argsbuf);
     if (!fastargs) {
@@ -351,21 +372,27 @@ _pickle_Unpickler___init__(PyObject *self, PyObject *args, PyObject *kwargs)
             goto skip_optional_kwonly;
         }
     }
-    if (!PyUnicode_Check(fastargs[3])) {
-        _PyArg_BadArgument("Unpickler", 4, "str", fastargs[3]);
-        goto exit;
-    }
-    Py_ssize_t errors_length;
-    errors = PyUnicode_AsUTF8AndSize(fastargs[3], &errors_length);
-    if (errors == NULL) {
-        goto exit;
-    }
-    if (strlen(errors) != (size_t)errors_length) {
-        PyErr_SetString(PyExc_ValueError, "embedded null character");
-        goto exit;
+    if (fastargs[3]) {
+        if (!PyUnicode_Check(fastargs[3])) {
+            _PyArg_BadArgument("Unpickler", 4, "str", fastargs[3]);
+            goto exit;
+        }
+        Py_ssize_t errors_length;
+        errors = PyUnicode_AsUTF8AndSize(fastargs[3], &errors_length);
+        if (errors == NULL) {
+            goto exit;
+        }
+        if (strlen(errors) != (size_t)errors_length) {
+            PyErr_SetString(PyExc_ValueError, "embedded null character");
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
     }
+    buffers = fastargs[4];
 skip_optional_kwonly:
-    return_value = _pickle_Unpickler___init___impl((UnpicklerObject *)self, file, fix_imports, encoding, errors);
+    return_value = _pickle_Unpickler___init___impl((UnpicklerObject *)self, file, fix_imports, encoding, errors, buffers);
 
 exit:
     return return_value;
@@ -426,7 +453,8 @@ _pickle_UnpicklerMemoProxy___reduce__(UnpicklerMemoProxyObject *self, PyObject *
 }
 
 PyDoc_STRVAR(_pickle_dump__doc__,
-"dump($module, /, obj, file, protocol=None, *, fix_imports=True)\n"
+"dump($module, /, obj, file, protocol=None, *, fix_imports=True,\n"
+"     buffer_callback=None)\n"
 "--\n"
 "\n"
 "Write a pickled representation of obj to the open file object file.\n"
@@ -450,27 +478,33 @@ PyDoc_STRVAR(_pickle_dump__doc__,
 "\n"
 "If *fix_imports* is True and protocol is less than 3, pickle will try\n"
 "to map the new Python 3 names to the old module names used in Python\n"
-"2, so that the pickle data stream is readable with Python 2.");
+"2, so that the pickle data stream is readable with Python 2.\n"
+"\n"
+"If *buffer_callback* is None (the default), buffer views are serialized\n"
+"into *file* as part of the pickle stream.  It is an error if\n"
+"*buffer_callback* is not None and *protocol* is None or smaller than 5.");
 
 #define _PICKLE_DUMP_METHODDEF    \
     {"dump", (PyCFunction)(void(*)(void))_pickle_dump, METH_FASTCALL|METH_KEYWORDS, _pickle_dump__doc__},
 
 static PyObject *
 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
-                  PyObject *protocol, int fix_imports);
+                  PyObject *protocol, int fix_imports,
+                  PyObject *buffer_callback);
 
 static PyObject *
 _pickle_dump(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
-    static const char * const _keywords[] = {"obj", "file", "protocol", "fix_imports", NULL};
+    static const char * const _keywords[] = {"obj", "file", "protocol", "fix_imports", "buffer_callback", NULL};
     static _PyArg_Parser _parser = {NULL, _keywords, "dump", 0};
-    PyObject *argsbuf[4];
+    PyObject *argsbuf[5];
     Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 2;
     PyObject *obj;
     PyObject *file;
     PyObject *protocol = NULL;
     int fix_imports = 1;
+    PyObject *buffer_callback = NULL;
 
     args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 3, 0, argsbuf);
     if (!args) {
@@ -491,19 +525,26 @@ skip_optional_pos:
     if (!noptargs) {
         goto skip_optional_kwonly;
     }
-    fix_imports = PyObject_IsTrue(args[3]);
-    if (fix_imports < 0) {
-        goto exit;
+    if (args[3]) {
+        fix_imports = PyObject_IsTrue(args[3]);
+        if (fix_imports < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
     }
+    buffer_callback = args[4];
 skip_optional_kwonly:
-    return_value = _pickle_dump_impl(module, obj, file, protocol, fix_imports);
+    return_value = _pickle_dump_impl(module, obj, file, protocol, fix_imports, buffer_callback);
 
 exit:
     return return_value;
 }
 
 PyDoc_STRVAR(_pickle_dumps__doc__,
-"dumps($module, /, obj, protocol=None, *, fix_imports=True)\n"
+"dumps($module, /, obj, protocol=None, *, fix_imports=True,\n"
+"      buffer_callback=None)\n"
 "--\n"
 "\n"
 "Return the pickled representation of the object as a bytes object.\n"
@@ -519,26 +560,31 @@ PyDoc_STRVAR(_pickle_dumps__doc__,
 "\n"
 "If *fix_imports* is True and *protocol* is less than 3, pickle will\n"
 "try to map the new Python 3 names to the old module names used in\n"
-"Python 2, so that the pickle data stream is readable with Python 2.");
+"Python 2, so that the pickle data stream is readable with Python 2.\n"
+"\n"
+"If *buffer_callback* is None (the default), buffer views are serialized\n"
+"into *file* as part of the pickle stream.  It is an error if\n"
+"*buffer_callback* is not None and *protocol* is None or smaller than 5.");
 
 #define _PICKLE_DUMPS_METHODDEF    \
     {"dumps", (PyCFunction)(void(*)(void))_pickle_dumps, METH_FASTCALL|METH_KEYWORDS, _pickle_dumps__doc__},
 
 static PyObject *
 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
-                   int fix_imports);
+                   int fix_imports, PyObject *buffer_callback);
 
 static PyObject *
 _pickle_dumps(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
-    static const char * const _keywords[] = {"obj", "protocol", "fix_imports", NULL};
+    static const char * const _keywords[] = {"obj", "protocol", "fix_imports", "buffer_callback", NULL};
     static _PyArg_Parser _parser = {NULL, _keywords, "dumps", 0};
-    PyObject *argsbuf[3];
+    PyObject *argsbuf[4];
     Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
     PyObject *obj;
     PyObject *protocol = NULL;
     int fix_imports = 1;
+    PyObject *buffer_callback = NULL;
 
     args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf);
     if (!args) {
@@ -558,12 +604,18 @@ skip_optional_pos:
     if (!noptargs) {
         goto skip_optional_kwonly;
     }
-    fix_imports = PyObject_IsTrue(args[2]);
-    if (fix_imports < 0) {
-        goto exit;
+    if (args[2]) {
+        fix_imports = PyObject_IsTrue(args[2]);
+        if (fix_imports < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
     }
+    buffer_callback = args[3];
 skip_optional_kwonly:
-    return_value = _pickle_dumps_impl(module, obj, protocol, fix_imports);
+    return_value = _pickle_dumps_impl(module, obj, protocol, fix_imports, buffer_callback);
 
 exit:
     return return_value;
@@ -571,7 +623,7 @@ exit:
 
 PyDoc_STRVAR(_pickle_load__doc__,
 "load($module, /, file, *, fix_imports=True, encoding=\'ASCII\',\n"
-"     errors=\'strict\')\n"
+"     errors=\'strict\', buffers=None)\n"
 "--\n"
 "\n"
 "Read and return an object from the pickle data stored in a file.\n"
@@ -603,20 +655,22 @@ PyDoc_STRVAR(_pickle_load__doc__,
 
 static PyObject *
 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
-                  const char *encoding, const char *errors);
+                  const char *encoding, const char *errors,
+                  PyObject *buffers);
 
 static PyObject *
 _pickle_load(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
-    static const char * const _keywords[] = {"file", "fix_imports", "encoding", "errors", NULL};
+    static const char * const _keywords[] = {"file", "fix_imports", "encoding", "errors", "buffers", NULL};
     static _PyArg_Parser _parser = {NULL, _keywords, "load", 0};
-    PyObject *argsbuf[4];
+    PyObject *argsbuf[5];
     Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
     PyObject *file;
     int fix_imports = 1;
     const char *encoding = "ASCII";
     const char *errors = "strict";
+    PyObject *buffers = NULL;
 
     args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
     if (!args) {
@@ -653,21 +707,27 @@ _pickle_load(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject
             goto skip_optional_kwonly;
         }
     }
-    if (!PyUnicode_Check(args[3])) {
-        _PyArg_BadArgument("load", 4, "str", args[3]);
-        goto exit;
-    }
-    Py_ssize_t errors_length;
-    errors = PyUnicode_AsUTF8AndSize(args[3], &errors_length);
-    if (errors == NULL) {
-        goto exit;
-    }
-    if (strlen(errors) != (size_t)errors_length) {
-        PyErr_SetString(PyExc_ValueError, "embedded null character");
-        goto exit;
+    if (args[3]) {
+        if (!PyUnicode_Check(args[3])) {
+            _PyArg_BadArgument("load", 4, "str", args[3]);
+            goto exit;
+        }
+        Py_ssize_t errors_length;
+        errors = PyUnicode_AsUTF8AndSize(args[3], &errors_length);
+        if (errors == NULL) {
+            goto exit;
+        }
+        if (strlen(errors) != (size_t)errors_length) {
+            PyErr_SetString(PyExc_ValueError, "embedded null character");
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
     }
+    buffers = args[4];
 skip_optional_kwonly:
-    return_value = _pickle_load_impl(module, file, fix_imports, encoding, errors);
+    return_value = _pickle_load_impl(module, file, fix_imports, encoding, errors, buffers);
 
 exit:
     return return_value;
@@ -675,7 +735,7 @@ exit:
 
 PyDoc_STRVAR(_pickle_loads__doc__,
 "loads($module, /, data, *, fix_imports=True, encoding=\'ASCII\',\n"
-"      errors=\'strict\')\n"
+"      errors=\'strict\', buffers=None)\n"
 "--\n"
 "\n"
 "Read and return an object from the given pickle data.\n"
@@ -698,20 +758,22 @@ PyDoc_STRVAR(_pickle_loads__doc__,
 
 static PyObject *
 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
-                   const char *encoding, const char *errors);
+                   const char *encoding, const char *errors,
+                   PyObject *buffers);
 
 static PyObject *
 _pickle_loads(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
-    static const char * const _keywords[] = {"data", "fix_imports", "encoding", "errors", NULL};
+    static const char * const _keywords[] = {"data", "fix_imports", "encoding", "errors", "buffers", NULL};
     static _PyArg_Parser _parser = {NULL, _keywords, "loads", 0};
-    PyObject *argsbuf[4];
+    PyObject *argsbuf[5];
     Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
     PyObject *data;
     int fix_imports = 1;
     const char *encoding = "ASCII";
     const char *errors = "strict";
+    PyObject *buffers = NULL;
 
     args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
     if (!args) {
@@ -748,23 +810,29 @@ _pickle_loads(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
             goto skip_optional_kwonly;
         }
     }
-    if (!PyUnicode_Check(args[3])) {
-        _PyArg_BadArgument("loads", 4, "str", args[3]);
-        goto exit;
-    }
-    Py_ssize_t errors_length;
-    errors = PyUnicode_AsUTF8AndSize(args[3], &errors_length);
-    if (errors == NULL) {
-        goto exit;
-    }
-    if (strlen(errors) != (size_t)errors_length) {
-        PyErr_SetString(PyExc_ValueError, "embedded null character");
-        goto exit;
+    if (args[3]) {
+        if (!PyUnicode_Check(args[3])) {
+            _PyArg_BadArgument("loads", 4, "str", args[3]);
+            goto exit;
+        }
+        Py_ssize_t errors_length;
+        errors = PyUnicode_AsUTF8AndSize(args[3], &errors_length);
+        if (errors == NULL) {
+            goto exit;
+        }
+        if (strlen(errors) != (size_t)errors_length) {
+            PyErr_SetString(PyExc_ValueError, "embedded null character");
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
     }
+    buffers = args[4];
 skip_optional_kwonly:
-    return_value = _pickle_loads_impl(module, data, fix_imports, encoding, errors);
+    return_value = _pickle_loads_impl(module, data, fix_imports, encoding, errors, buffers);
 
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=8f972562c8f71e2b input=a9049054013a1b77]*/
+/*[clinic end generated code: output=8dc0e862f96c4afe input=a9049054013a1b77]*/
index f842ab3889675e840a0aac01dec529e9c25f8593..6d79165683e15746e5bf12b87f571f133f7d141b 100644 (file)
@@ -1839,6 +1839,7 @@ _PyTypes_Init(void)
     INIT_TYPE(&PyMethodDescr_Type, "method descr");
     INIT_TYPE(&PyCallIter_Type, "call iter");
     INIT_TYPE(&PySeqIter_Type, "sequence iterator");
+    INIT_TYPE(&PyPickleBuffer_Type, "pickle.PickleBuffer");
     INIT_TYPE(&PyCoro_Type, "coroutine");
     INIT_TYPE(&_PyCoroWrapper_Type, "coroutine wrapper");
     INIT_TYPE(&_PyInterpreterID_Type, "interpreter ID");
diff --git a/Objects/picklebufobject.c b/Objects/picklebufobject.c
new file mode 100644 (file)
index 0000000..a135e55
--- /dev/null
@@ -0,0 +1,219 @@
+/* PickleBuffer object implementation */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+#include <stddef.h>
+
+typedef struct {
+    PyObject_HEAD
+    /* The view exported by the original object */
+    Py_buffer view;
+    PyObject *weakreflist;
+} PyPickleBufferObject;
+
+/* C API */
+
+PyObject *
+PyPickleBuffer_FromObject(PyObject *base)
+{
+    PyTypeObject *type = &PyPickleBuffer_Type;
+    PyPickleBufferObject *self;
+
+    self = (PyPickleBufferObject *) type->tp_alloc(type, 0);
+    if (self == NULL) {
+        return NULL;
+    }
+    self->view.obj = NULL;
+    self->weakreflist = NULL;
+    if (PyObject_GetBuffer(base, &self->view, PyBUF_FULL_RO) < 0) {
+        Py_DECREF(self);
+        return NULL;
+    }
+    return (PyObject *) self;
+}
+
+const Py_buffer *
+PyPickleBuffer_GetBuffer(PyObject *obj)
+{
+    PyPickleBufferObject *self = (PyPickleBufferObject *) obj;
+
+    if (!PyPickleBuffer_Check(obj)) {
+        PyErr_Format(PyExc_TypeError,
+                     "expected PickleBuffer, %.200s found",
+                     Py_TYPE(obj)->tp_name);
+        return NULL;
+    }
+    if (self->view.obj == NULL) {
+        PyErr_SetString(PyExc_ValueError,
+                        "operation forbidden on released PickleBuffer object");
+        return NULL;
+    }
+    return &self->view;
+}
+
+int
+PyPickleBuffer_Release(PyObject *obj)
+{
+    PyPickleBufferObject *self = (PyPickleBufferObject *) obj;
+
+    if (!PyPickleBuffer_Check(obj)) {
+        PyErr_Format(PyExc_TypeError,
+                     "expected PickleBuffer, %.200s found",
+                     Py_TYPE(obj)->tp_name);
+        return -1;
+    }
+    PyBuffer_Release(&self->view);
+    return 0;
+}
+
+static PyObject *
+picklebuf_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyPickleBufferObject *self;
+    PyObject *base;
+    char *keywords[] = {"", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:PickleBuffer",
+                                     keywords, &base)) {
+        return NULL;
+    }
+
+    self = (PyPickleBufferObject *) type->tp_alloc(type, 0);
+    if (self == NULL) {
+        return NULL;
+    }
+    self->view.obj = NULL;
+    self->weakreflist = NULL;
+    if (PyObject_GetBuffer(base, &self->view, PyBUF_FULL_RO) < 0) {
+        Py_DECREF(self);
+        return NULL;
+    }
+    return (PyObject *) self;
+}
+
+static int
+picklebuf_traverse(PyPickleBufferObject *self, visitproc visit, void *arg)
+{
+    Py_VISIT(self->view.obj);
+    return 0;
+}
+
+static int
+picklebuf_clear(PyPickleBufferObject *self)
+{
+    PyBuffer_Release(&self->view);
+    return 0;
+}
+
+static void
+picklebuf_dealloc(PyPickleBufferObject *self)
+{
+    PyObject_GC_UnTrack(self);
+    if (self->weakreflist != NULL)
+        PyObject_ClearWeakRefs((PyObject *) self);
+    PyBuffer_Release(&self->view);
+    Py_TYPE(self)->tp_free((PyObject *) self);
+}
+
+/* Buffer API */
+
+static int
+picklebuf_getbuf(PyPickleBufferObject *self, Py_buffer *view, int flags)
+{
+    if (self->view.obj == NULL) {
+        PyErr_SetString(PyExc_ValueError,
+                        "operation forbidden on released PickleBuffer object");
+        return -1;
+    }
+    return PyObject_GetBuffer(self->view.obj, view, flags);
+}
+
+static void
+picklebuf_releasebuf(PyPickleBufferObject *self, Py_buffer *view)
+{
+    /* Since our bf_getbuffer redirects to the original object, this
+     * implementation is never called.  It only exists to signal that
+     * buffers exported by PickleBuffer have non-trivial releasing
+     * behaviour (see check in Python/getargs.c).
+     */
+}
+
+static PyBufferProcs picklebuf_as_buffer = {
+    .bf_getbuffer = (getbufferproc) picklebuf_getbuf,
+    .bf_releasebuffer = (releasebufferproc) picklebuf_releasebuf,
+};
+
+/* Methods */
+
+static PyObject *
+picklebuf_raw(PyPickleBufferObject *self, PyObject *Py_UNUSED(ignored))
+{
+    if (self->view.obj == NULL) {
+        PyErr_SetString(PyExc_ValueError,
+                        "operation forbidden on released PickleBuffer object");
+        return NULL;
+    }
+    if (self->view.suboffsets != NULL
+        || !PyBuffer_IsContiguous(&self->view, 'A')) {
+        PyErr_SetString(PyExc_BufferError,
+                        "cannot extract raw buffer from non-contiguous buffer");
+        return NULL;
+    }
+    PyObject *m = PyMemoryView_FromObject((PyObject *) self);
+    if (m == NULL) {
+        return NULL;
+    }
+    PyMemoryViewObject *mv = (PyMemoryViewObject *) m;
+    assert(mv->view.suboffsets == NULL);
+    /* Mutate memoryview instance to make it a "raw" memoryview */
+    mv->view.format = "B";
+    mv->view.ndim = 1;
+    mv->view.itemsize = 1;
+    /* shape = (length,) */
+    mv->view.shape = &mv->view.len;
+    /* strides = (1,) */
+    mv->view.strides = &mv->view.itemsize;
+    /* Fix memoryview state flags */
+    /* XXX Expose memoryobject.c's init_flags() instead? */
+    mv->flags = _Py_MEMORYVIEW_C | _Py_MEMORYVIEW_FORTRAN;
+    return m;
+}
+
+PyDoc_STRVAR(picklebuf_raw_doc,
+"raw($self, /)\n--\n\
+\n\
+Return a memoryview of the raw memory underlying this buffer.\n\
+Will raise BufferError is the buffer isn't contiguous.");
+
+static PyObject *
+picklebuf_release(PyPickleBufferObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyBuffer_Release(&self->view);
+    Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(picklebuf_release_doc,
+"release($self, /)\n--\n\
+\n\
+Release the underlying buffer exposed by the PickleBuffer object.");
+
+static PyMethodDef picklebuf_methods[] = {
+    {"raw",     (PyCFunction) picklebuf_raw,     METH_NOARGS, picklebuf_raw_doc},
+    {"release", (PyCFunction) picklebuf_release, METH_NOARGS, picklebuf_release_doc},
+    {NULL,      NULL}
+};
+
+PyTypeObject PyPickleBuffer_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    .tp_name = "pickle.PickleBuffer",
+    .tp_doc = "Wrapper for potentially out-of-band buffers",
+    .tp_basicsize = sizeof(PyPickleBufferObject),
+    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
+    .tp_new = picklebuf_new,
+    .tp_dealloc = (destructor) picklebuf_dealloc,
+    .tp_traverse = (traverseproc) picklebuf_traverse,
+    .tp_clear = (inquiry) picklebuf_clear,
+    .tp_weaklistoffset = offsetof(PyPickleBufferObject, weakreflist),
+    .tp_as_buffer = &picklebuf_as_buffer,
+    .tp_methods = picklebuf_methods,
+};
index 10f51dd431b7c1081baeeea190367c7d83128457..db691cd39c8bb46795b3a0022c1df50642e020f3 100644 (file)
     <ClInclude Include="..\Include\osmodule.h" />
     <ClInclude Include="..\Include\parsetok.h" />
     <ClInclude Include="..\Include\patchlevel.h" />
+    <ClInclude Include="..\Include\picklebufobject.h" />
     <ClInclude Include="..\Include\pyhash.h" />
     <ClInclude Include="..\Include\py_curses.h" />
     <ClInclude Include="..\Include\pyarena.h" />
     <ClCompile Include="..\Objects\object.c" />
     <ClCompile Include="..\Objects\obmalloc.c" />
     <ClCompile Include="..\Objects\odictobject.c" />
+    <ClCompile Include="..\Objects\picklebufobject.c" />
     <ClCompile Include="..\Objects\rangeobject.c" />
     <ClCompile Include="..\Objects\setobject.c" />
     <ClCompile Include="..\Objects\sliceobject.c" />
index 396d146513d799a6cd87000a607fe6ec0bc78cb8..dba47e9aa2d1fb8765237b67002c1374ba4090ff 100644 (file)
     <ClInclude Include="..\Include\patchlevel.h">
       <Filter>Include</Filter>
     </ClInclude>
+    <ClInclude Include="..\Include\picklebufobject.h">
+      <Filter>Include</Filter>
+    </ClInclude>
     <ClInclude Include="..\Include\py_curses.h">
       <Filter>Include</Filter>
     </ClInclude>
     <ClCompile Include="..\Objects\obmalloc.c">
       <Filter>Objects</Filter>
     </ClCompile>
+    <ClCompile Include="..\Objects\picklebufobject.c">
+      <Filter>Objects</Filter>
+    </ClCompile>
     <ClCompile Include="..\Objects\rangeobject.c">
       <Filter>Objects</Filter>
     </ClCompile>