]> granicus.if.org Git - python/commitdiff
Close #17839: support bytes-like objects in base64 module
authorNick Coghlan <ncoghlan@gmail.com>
Wed, 2 Oct 2013 14:43:22 +0000 (00:43 +1000)
committerNick Coghlan <ncoghlan@gmail.com>
Wed, 2 Oct 2013 14:43:22 +0000 (00:43 +1000)
This mostly affected the encodebytes and decodebytes function
(which are used by base64_codec)

Also added a test to ensure all bytes-bytes codecs can handle
memoryview input and tests for handling of multidimensional
and non-bytes format input in the modern base64 API.

Doc/library/base64.rst
Doc/library/codecs.rst
Lib/base64.py
Lib/test/test_base64.py
Lib/test/test_codecs.py
Misc/NEWS

index 3b23e795d4269f6a06c88c42d832c5247f1f891a..de8744137c0e6e75a456a851fc7d5958c8ec6e3a 100644 (file)
@@ -27,6 +27,10 @@ byte strings, but only using the Base64 standard alphabet.
    ASCII-only Unicode strings are now accepted by the decoding functions of
    the modern interface.
 
+.. versionchanged:: 3.4
+   Any :term:`bytes-like object`\ s are now accepted by all
+   encoding and decoding functions in this module.
+
 The modern interface provides:
 
 .. function:: b64encode(s, altchars=None)
index fcef948895c1207b79b6ab6f513c70b7a3b97680..48c3b24248218ed0005ab6bc8f91f8875d9d37e9 100644 (file)
@@ -1208,36 +1208,41 @@ mappings.
 
 .. tabularcolumns:: |l|L|L|
 
-+----------------------+---------------------------+------------------------------+
-| Codec                | Purpose                   | Encoder/decoder              |
-+======================+===========================+==============================+
-| base64_codec [#b64]_ | Convert operand to MIME   | :meth:`base64.b64encode`,    |
-|                      | base64 (the result always | :meth:`base64.b64decode`     |
-|                      | includes a trailing       |                              |
-|                      | ``'\n'``)                 |                              |
-+----------------------+---------------------------+------------------------------+
-| bz2_codec            | Compress the operand      | :meth:`bz2.compress`,        |
-|                      | using bz2                 | :meth:`bz2.decompress`       |
-+----------------------+---------------------------+------------------------------+
-| hex_codec            | Convert operand to        | :meth:`base64.b16encode`,    |
-|                      | hexadecimal               | :meth:`base64.b16decode`     |
-|                      | representation, with two  |                              |
-|                      | digits per byte           |                              |
-+----------------------+---------------------------+------------------------------+
-| quopri_codec         | Convert operand to MIME   | :meth:`quopri.encodestring`, |
-|                      | quoted printable          | :meth:`quopri.decodestring`  |
-+----------------------+---------------------------+------------------------------+
-| uu_codec             | Convert the operand using | :meth:`uu.encode`,           |
-|                      | uuencode                  | :meth:`uu.decode`            |
-+----------------------+---------------------------+------------------------------+
-| zlib_codec           | Compress the operand      | :meth:`zlib.compress`,       |
-|                      | using gzip                | :meth:`zlib.decompress`      |
-+----------------------+---------------------------+------------------------------+
-
-.. [#b64] Rather than accepting any :term:`bytes-like object`,
-   ``'base64_codec'`` accepts only :class:`bytes` and :class:`bytearray` for
-   encoding and only :class:`bytes`, :class:`bytearray`, and ASCII-only
-   instances of :class:`str` for decoding
++----------------------+------------------------------+------------------------------+
+| Codec                | Purpose                      | Encoder / decoder            |
++======================+==============================+==============================+
+| base64_codec [#b64]_ | Convert operand to MIME      | :meth:`base64.b64encode` /   |
+|                      | base64 (the result always    | :meth:`base64.b64decode`     |
+|                      | includes a trailing          |                              |
+|                      | ``'\n'``)                    |                              |
+|                      |                              |                              |
+|                      | .. versionchanged:: 3.4      |                              |
+|                      |    accepts any               |                              |
+|                      |    :term:`bytes-like object` |                              |
+|                      |    as input for encoding and |                              |
+|                      |    decoding                  |                              |
++----------------------+------------------------------+------------------------------+
+| bz2_codec            | Compress the operand         | :meth:`bz2.compress` /       |
+|                      | using bz2                    | :meth:`bz2.decompress`       |
++----------------------+------------------------------+------------------------------+
+| hex_codec            | Convert operand to           | :meth:`base64.b16encode` /   |
+|                      | hexadecimal                  | :meth:`base64.b16decode`     |
+|                      | representation, with two     |                              |
+|                      | digits per byte              |                              |
++----------------------+------------------------------+------------------------------+
+| quopri_codec         | Convert operand to MIME      | :meth:`quopri.encodestring` /|
+|                      | quoted printable             | :meth:`quopri.decodestring`  |
++----------------------+------------------------------+------------------------------+
+| uu_codec             | Convert the operand using    | :meth:`uu.encode` /          |
+|                      | uuencode                     | :meth:`uu.decode`            |
++----------------------+------------------------------+------------------------------+
+| zlib_codec           | Compress the operand         | :meth:`zlib.compress` /      |
+|                      | using gzip                   | :meth:`zlib.decompress`      |
++----------------------+------------------------------+------------------------------+
+
+.. [#b64] In addition to :term:`bytes-like objects <bytes-like object>`,
+   ``'base64_codec'`` also accepts ASCII-only instances of :class:`str` for
+   decoding
 
 
 The following codecs provide :class:`str` to :class:`str` mappings.
index 9c15752fe3123520ba236be27f3ad06953304707..0a93f2ecb6272abf71041ef6d54ebfd11f640269 100755 (executable)
@@ -35,11 +35,13 @@ def _bytes_from_decode_data(s):
             return s.encode('ascii')
         except UnicodeEncodeError:
             raise ValueError('string argument should contain only ASCII characters')
-    elif isinstance(s, bytes_types):
+    if isinstance(s, bytes_types):
         return s
-    else:
-        raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__)
-
+    try:
+        return memoryview(s).tobytes()
+    except TypeError:
+        raise TypeError("argument should be a bytes-like object or ASCII "
+                        "string, not %r" % s.__class__.__name__) from None
 
 
 # Base64 encoding/decoding uses binascii
@@ -54,14 +56,9 @@ def b64encode(s, altchars=None):
 
     The encoded byte string is returned.
     """
-    if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
     # Strip off the trailing newline
     encoded = binascii.b2a_base64(s)[:-1]
     if altchars is not None:
-        if not isinstance(altchars, bytes_types):
-            raise TypeError("expected bytes, not %s"
-                            % altchars.__class__.__name__)
         assert len(altchars) == 2, repr(altchars)
         return encoded.translate(bytes.maketrans(b'+/', altchars))
     return encoded
@@ -149,7 +146,7 @@ def b32encode(s):
     s is the byte string to encode.  The encoded byte string is returned.
     """
     if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+        s = memoryview(s).tobytes()
     leftover = len(s) % 5
     # Pad the last quantum with zero bits if necessary
     if leftover:
@@ -250,8 +247,6 @@ def b16encode(s):
 
     s is the byte string to encode.  The encoded byte string is returned.
     """
-    if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
     return binascii.hexlify(s).upper()
 
 
@@ -306,12 +301,26 @@ def decode(input, output):
         s = binascii.a2b_base64(line)
         output.write(s)
 
+def _input_type_check(s):
+    try:
+        m = memoryview(s)
+    except TypeError as err:
+        msg = "expected bytes-like object, not %s" % s.__class__.__name__
+        raise TypeError(msg) from err
+    if m.format not in ('c', 'b', 'B'):
+        msg = ("expected single byte elements, not %r from %s" %
+                                          (m.format, s.__class__.__name__))
+        raise TypeError(msg)
+    if m.ndim != 1:
+        msg = ("expected 1-D data, not %d-D data from %s" %
+                                          (m.ndim, s.__class__.__name__))
+        raise TypeError(msg)
+
 
 def encodebytes(s):
     """Encode a bytestring into a bytestring containing multiple lines
     of base-64 data."""
-    if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+    _input_type_check(s)
     pieces = []
     for i in range(0, len(s), MAXBINSIZE):
         chunk = s[i : i + MAXBINSIZE]
@@ -328,8 +337,7 @@ def encodestring(s):
 
 def decodebytes(s):
     """Decode a bytestring of base-64 data into a bytestring."""
-    if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+    _input_type_check(s)
     return binascii.a2b_base64(s)
 
 def decodestring(s):
index 13695de67e652f576d889f205ef664189b73d45b..54f392d4d696a5ab08cb50d1e26d728856f0abde 100644 (file)
@@ -5,10 +5,21 @@ import binascii
 import os
 import sys
 import subprocess
-
+import struct
+from array import array
 
 
 class LegacyBase64TestCase(unittest.TestCase):
+
+    # Legacy API is not as permissive as the modern API
+    def check_type_errors(self, f):
+        self.assertRaises(TypeError, f, "")
+        self.assertRaises(TypeError, f, [])
+        multidimensional = memoryview(b"1234").cast('B', (2, 2))
+        self.assertRaises(TypeError, f, multidimensional)
+        int_data = memoryview(b"1234").cast('I')
+        self.assertRaises(TypeError, f, int_data)
+
     def test_encodebytes(self):
         eq = self.assertEqual
         eq(base64.encodebytes(b"www.python.org"), b"d3d3LnB5dGhvbi5vcmc=\n")
@@ -24,7 +35,9 @@ class LegacyBase64TestCase(unittest.TestCase):
            b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==\n")
         # Non-bytes
         eq(base64.encodebytes(bytearray(b'abc')), b'YWJj\n')
-        self.assertRaises(TypeError, base64.encodebytes, "")
+        eq(base64.encodebytes(memoryview(b'abc')), b'YWJj\n')
+        eq(base64.encodebytes(array('B', b'abc')), b'YWJj\n')
+        self.check_type_errors(base64.encodebytes)
 
     def test_decodebytes(self):
         eq = self.assertEqual
@@ -41,7 +54,9 @@ class LegacyBase64TestCase(unittest.TestCase):
         eq(base64.decodebytes(b''), b'')
         # Non-bytes
         eq(base64.decodebytes(bytearray(b'YWJj\n')), b'abc')
-        self.assertRaises(TypeError, base64.decodebytes, "")
+        eq(base64.decodebytes(memoryview(b'YWJj\n')), b'abc')
+        eq(base64.decodebytes(array('B', b'YWJj\n')), b'abc')
+        self.check_type_errors(base64.decodebytes)
 
     def test_encode(self):
         eq = self.assertEqual
@@ -73,6 +88,38 @@ class LegacyBase64TestCase(unittest.TestCase):
 
 
 class BaseXYTestCase(unittest.TestCase):
+
+    # Modern API completely ignores exported dimension and format data and
+    # treats any buffer as a stream of bytes
+    def check_encode_type_errors(self, f):
+        self.assertRaises(TypeError, f, "")
+        self.assertRaises(TypeError, f, [])
+
+    def check_decode_type_errors(self, f):
+        self.assertRaises(TypeError, f, [])
+
+    def check_other_types(self, f, bytes_data, expected):
+        eq = self.assertEqual
+        eq(f(bytearray(bytes_data)), expected)
+        eq(f(memoryview(bytes_data)), expected)
+        eq(f(array('B', bytes_data)), expected)
+        self.check_nonbyte_element_format(base64.b64encode, bytes_data)
+        self.check_multidimensional(base64.b64encode, bytes_data)
+
+    def check_multidimensional(self, f, data):
+        padding = b"\x00" if len(data) % 2 else b""
+        bytes_data = data + padding # Make sure cast works
+        shape = (len(bytes_data) // 2, 2)
+        multidimensional = memoryview(bytes_data).cast('B', shape)
+        self.assertEqual(f(multidimensional), f(bytes_data))
+
+    def check_nonbyte_element_format(self, f, data):
+        padding = b"\x00" * ((4 - len(data)) % 4)
+        bytes_data = data + padding # Make sure cast works
+        int_data = memoryview(bytes_data).cast('I')
+        self.assertEqual(f(int_data), f(bytes_data))
+
+
     def test_b64encode(self):
         eq = self.assertEqual
         # Test default alphabet
@@ -90,13 +137,16 @@ class BaseXYTestCase(unittest.TestCase):
            b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==")
         # Test with arbitrary alternative characters
         eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=b'*$'), b'01a*b$cd')
-        # Non-bytes
-        eq(base64.b64encode(bytearray(b'abcd')), b'YWJjZA==')
         eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=bytearray(b'*$')),
            b'01a*b$cd')
-        # Check if passing a str object raises an error
-        self.assertRaises(TypeError, base64.b64encode, "")
-        self.assertRaises(TypeError, base64.b64encode, b"", altchars="")
+        eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=memoryview(b'*$')),
+           b'01a*b$cd')
+        eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=array('B', b'*$')),
+           b'01a*b$cd')
+        # Non-bytes
+        self.check_other_types(base64.b64encode, b'abcd', b'YWJjZA==')
+        self.check_encode_type_errors(base64.b64encode)
+        self.assertRaises(TypeError, base64.b64encode, b"", altchars="*$")
         # Test standard alphabet
         eq(base64.standard_b64encode(b"www.python.org"), b"d3d3LnB5dGhvbi5vcmc=")
         eq(base64.standard_b64encode(b"a"), b"YQ==")
@@ -110,15 +160,15 @@ class BaseXYTestCase(unittest.TestCase):
            b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
            b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==")
         # Non-bytes
-        eq(base64.standard_b64encode(bytearray(b'abcd')), b'YWJjZA==')
-        # Check if passing a str object raises an error
-        self.assertRaises(TypeError, base64.standard_b64encode, "")
+        self.check_other_types(base64.standard_b64encode,
+                               b'abcd', b'YWJjZA==')
+        self.check_encode_type_errors(base64.standard_b64encode)
         # Test with 'URL safe' alternative characters
         eq(base64.urlsafe_b64encode(b'\xd3V\xbeo\xf7\x1d'), b'01a-b_cd')
         # Non-bytes
-        eq(base64.urlsafe_b64encode(bytearray(b'\xd3V\xbeo\xf7\x1d')), b'01a-b_cd')
-        # Check if passing a str object raises an error
-        self.assertRaises(TypeError, base64.urlsafe_b64encode, "")
+        self.check_other_types(base64.urlsafe_b64encode,
+                               b'\xd3V\xbeo\xf7\x1d', b'01a-b_cd')
+        self.check_encode_type_errors(base64.urlsafe_b64encode)
 
     def test_b64decode(self):
         eq = self.assertEqual
@@ -141,7 +191,8 @@ class BaseXYTestCase(unittest.TestCase):
             eq(base64.b64decode(data), res)
             eq(base64.b64decode(data.decode('ascii')), res)
         # Non-bytes
-        eq(base64.b64decode(bytearray(b"YWJj")), b"abc")
+        self.check_other_types(base64.b64decode, b"YWJj", b"abc")
+        self.check_decode_type_errors(base64.b64decode)
 
         # Test with arbitrary alternative characters
         tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d',
@@ -160,7 +211,8 @@ class BaseXYTestCase(unittest.TestCase):
             eq(base64.standard_b64decode(data), res)
             eq(base64.standard_b64decode(data.decode('ascii')), res)
         # Non-bytes
-        eq(base64.standard_b64decode(bytearray(b"YWJj")), b"abc")
+        self.check_other_types(base64.standard_b64decode, b"YWJj", b"abc")
+        self.check_decode_type_errors(base64.standard_b64decode)
 
         # Test with 'URL safe' alternative characters
         tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d',
@@ -170,7 +222,9 @@ class BaseXYTestCase(unittest.TestCase):
             eq(base64.urlsafe_b64decode(data), res)
             eq(base64.urlsafe_b64decode(data.decode('ascii')), res)
         # Non-bytes
-        eq(base64.urlsafe_b64decode(bytearray(b'01a-b_cd')), b'\xd3V\xbeo\xf7\x1d')
+        self.check_other_types(base64.urlsafe_b64decode, b'01a-b_cd',
+                               b'\xd3V\xbeo\xf7\x1d')
+        self.check_decode_type_errors(base64.urlsafe_b64decode)
 
     def test_b64decode_padding_error(self):
         self.assertRaises(binascii.Error, base64.b64decode, b'abc')
@@ -205,8 +259,8 @@ class BaseXYTestCase(unittest.TestCase):
         eq(base64.b32encode(b'abcd'), b'MFRGGZA=')
         eq(base64.b32encode(b'abcde'), b'MFRGGZDF')
         # Non-bytes
-        eq(base64.b32encode(bytearray(b'abcd')), b'MFRGGZA=')
-        self.assertRaises(TypeError, base64.b32encode, "")
+        self.check_other_types(base64.b32encode, b'abcd', b'MFRGGZA=')
+        self.check_encode_type_errors(base64.b32encode)
 
     def test_b32decode(self):
         eq = self.assertEqual
@@ -222,7 +276,8 @@ class BaseXYTestCase(unittest.TestCase):
             eq(base64.b32decode(data), res)
             eq(base64.b32decode(data.decode('ascii')), res)
         # Non-bytes
-        eq(base64.b32decode(bytearray(b'MFRGG===')), b'abc')
+        self.check_other_types(base64.b32decode, b'MFRGG===', b"abc")
+        self.check_decode_type_errors(base64.b32decode)
 
     def test_b32decode_casefold(self):
         eq = self.assertEqual
@@ -277,8 +332,9 @@ class BaseXYTestCase(unittest.TestCase):
         eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF')
         eq(base64.b16encode(b'\x00'), b'00')
         # Non-bytes
-        eq(base64.b16encode(bytearray(b'\x01\x02\xab\xcd\xef')), b'0102ABCDEF')
-        self.assertRaises(TypeError, base64.b16encode, "")
+        self.check_other_types(base64.b16encode, b'\x01\x02\xab\xcd\xef',
+                               b'0102ABCDEF')
+        self.check_encode_type_errors(base64.b16encode)
 
     def test_b16decode(self):
         eq = self.assertEqual
@@ -293,7 +349,15 @@ class BaseXYTestCase(unittest.TestCase):
         eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef')
         eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef')
         # Non-bytes
-        eq(base64.b16decode(bytearray(b"0102ABCDEF")), b'\x01\x02\xab\xcd\xef')
+        self.check_other_types(base64.b16decode, b"0102ABCDEF",
+                               b'\x01\x02\xab\xcd\xef')
+        self.check_decode_type_errors(base64.b16decode)
+        eq(base64.b16decode(bytearray(b"0102abcdef"), True),
+           b'\x01\x02\xab\xcd\xef')
+        eq(base64.b16decode(memoryview(b"0102abcdef"), True),
+           b'\x01\x02\xab\xcd\xef')
+        eq(base64.b16decode(array('B', b"0102abcdef"), True),
+           b'\x01\x02\xab\xcd\xef')
 
     def test_decode_nonascii_str(self):
         decode_funcs = (base64.b64decode,
index 2f3cf4d9f5d802269ad4e567006f1ccd44c9d5f2..99d928dcf1f19d00d0005462f419fa030768c252 100644 (file)
@@ -2285,6 +2285,24 @@ class TransformCodecTest(unittest.TestCase):
             sout = reader.readline()
             self.assertEqual(sout, b"\x80")
 
+    def test_buffer_api_usage(self):
+        # We check all the transform codecs accept memoryview input
+        # for encoding and decoding
+        # and also that they roundtrip correctly
+        original = b"12345\x80"
+        for encoding in bytes_transform_encodings:
+            data = original
+            view = memoryview(data)
+            data = codecs.encode(data, encoding)
+            view_encoded = codecs.encode(view, encoding)
+            self.assertEqual(view_encoded, data)
+            view = memoryview(data)
+            data = codecs.decode(data, encoding)
+            self.assertEqual(data, original)
+            view_decoded = codecs.decode(view, encoding)
+            self.assertEqual(view_decoded, data)
+
+
 
 @unittest.skipUnless(sys.platform == 'win32',
                      'code pages are specific to Windows')
index f7a118c12fabf938b4126378243da8c52d40d9ea..57901cff5ea894f9c690652d4917207bf35595d6 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -20,6 +20,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #17839: base64.decodebytes and base64.encodebytes now accept any
+  object that exports a 1 dimensional array of bytes (this means the same
+  is now also true for base64_codec)
+
 - Issue #19132: The pprint module now supports compact mode.
 
 - Issue #19137: The pprint module now correctly formats instances of set and