#1466065: add validate option to base64.b64decode

author R. David Murray <rdmurray@bitdance.com>

Thu, 11 Nov 2010 20:09:20 +0000 (20:09 +0000)

committer R. David Murray <rdmurray@bitdance.com>

Thu, 11 Nov 2010 20:09:20 +0000 (20:09 +0000)
author R. David Murray <rdmurray@bitdance.com>
Thu, 11 Nov 2010 20:09:20 +0000 (20:09 +0000)
committer R. David Murray <rdmurray@bitdance.com>
Thu, 11 Nov 2010 20:09:20 +0000 (20:09 +0000)
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst

index c10a74ac8abf1f7db3f65c4ae8454e90fb7dec9e..2401ae7a1320500783282c039f7ea26c59f57dd2 100644 (file)
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -37,7 +37,7 @@ The modern interface provides:
     The encoded byte string is returned.
  
  
-.. function:: b64decode(s, altchars=None)
+.. function:: b64decode(s, altchars=None, validate=False)
  
     Decode a Base64 encoded byte string.
  
@@ -45,9 +45,13 @@ The modern interface provides:
     at least length 2 (additional characters are ignored) which specifies the
     alternative alphabet used instead of the ``+`` and ``/`` characters.
  
-   The decoded byte string is returned.  A :exc:`TypeError` is raised if *s* were
-   incorrectly padded or if there are non-alphabet characters present in the
-   string.
+   The decoded string is returned.  A `binascii.Error` is raised if *s* is
+   incorrectly padded.
+
+   If *validate* is ``False`` (the default), non-base64-alphabet characters are
+   discarded prior to the padding check.  If *validate* is ``True``,
+   non-base64-alphabet characters in the input result in a
+   :exc:`binascii.Error`.
  
  
  .. function:: standard_b64encode(s)
diff --git a/Lib/base64.py b/Lib/base64.py

index af7cf644658f0877935fe479eb5c57c3d4534814..895d813f7ee586494b6b4be18a2db6706e771027 100755 (executable)
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -65,16 +65,19 @@ def b64encode(s, altchars=None):
      return encoded
  
  
-def b64decode(s, altchars=None):
+def b64decode(s, altchars=None, validate=False):
      """Decode a Base64 encoded byte string.
  
      s is the byte string to decode.  Optional altchars must be a
      string of length 2 which specifies the alternative alphabet used
      instead of the '+' and '/' characters.
  
-    The decoded byte string is returned.  binascii.Error is raised if
-    s were incorrectly padded or if there are non-alphabet characters
-    present in the string.
+    The decoded string is returned.  A binascii.Error is raised if s is
+    incorrectly padded.
+
+    If validate is False (the default), non-base64-alphabet characters are
+    discarded prior to the padding check.  If validate is True,
+    non-base64-alphabet characters in the input result in a binascii.Error.
      """
      if not isinstance(s, bytes_types):
          raise TypeError("expected bytes, not %s" % s.__class__.__name__)
@@ -84,6 +87,8 @@ def b64decode(s, altchars=None):
                              % altchars.__class__.__name__)
          assert len(altchars) == 2, repr(altchars)
          s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'})
+    if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
+        raise binascii.Error('Non-base64 digit found')
      return binascii.a2b_base64(s)
  
  
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py

index 49edf395f169da12dfecc5fd2c7678cbf3e453c2..228a0fb23dd2babb1452aac4791cebfdb40e13c2 100644 (file)
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -138,9 +138,25 @@ class BaseXYTestCase(unittest.TestCase):
          eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d')
          self.assertRaises(TypeError, base64.urlsafe_b64decode, "")
  
-    def test_b64decode_error(self):
+    def test_b64decode_padding_error(self):
          self.assertRaises(binascii.Error, base64.b64decode, b'abc')
  
+    def test_b64decode_invalid_chars(self):
+        # issue 1466065: Test some invalid characters.
+        tests = ((b'%3d==', b'\xdd'),
+                 (b'$3d==', b'\xdd'),
+                 (b'[==', b''),
+                 (b'YW]3=', b'am'),
+                 (b'3{d==', b'\xdd'),
+                 (b'3d}==', b'\xdd'),
+                 (b'@@', b''),
+                 (b'!', b''),
+                 (b'YWJj\nYWI=', b'abcab'))
+        for bstr, res in tests:
+            self.assertEquals(base64.b64decode(bstr), res)
+            with self.assertRaises(binascii.Error):
+                base64.b64decode(bstr, validate=True)
+
      def test_b32encode(self):
          eq = self.assertEqual
          eq(base64.b32encode(b''), b'')
diff --git a/Misc/NEWS b/Misc/NEWS

index a887fafd9fd98220fc0ab053bb41f07495595e0a..2ff724747272e84e6b0551c455f2f82aa15b2592 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -63,6 +63,9 @@ Core and Builtins
  Library
  -------
  
+- Issue #1466065: Add 'validate' option to base64.b64decode to raise
+  an error if there are non-base64 alphabet characters in the input.
+
  - Issue #10386: Add __all__ to token module; this simplifies importing
    in tokenize module and prevents leaking of private names through
    import *.
author	R. David Murray <rdmurray@bitdance.com>
	Thu, 11 Nov 2010 20:09:20 +0000 (20:09 +0000)
committer	R. David Murray <rdmurray@bitdance.com>
	Thu, 11 Nov 2010 20:09:20 +0000 (20:09 +0000)
Doc/library/base64.rst		patch \| blob \| history
Lib/base64.py		patch \| blob \| history
Lib/test/test_base64.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history