Issue #22088: Clarify base-64 alphabets and which characters are discarded

author Martin Panter <vadmium+py@gmail.com>

Tue, 23 Feb 2016 22:30:50 +0000 (22:30 +0000)

committer Martin Panter <vadmium+py@gmail.com>

Tue, 23 Feb 2016 22:30:50 +0000 (22:30 +0000)
author Martin Panter <vadmium+py@gmail.com>
Tue, 23 Feb 2016 22:30:50 +0000 (22:30 +0000)
committer Martin Panter <vadmium+py@gmail.com>
Tue, 23 Feb 2016 22:30:50 +0000 (22:30 +0000)
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst

index e346efbbc1f222da9661203f79a556b12cd44d37..7d1a6e0c7f90ba7299e93b219cc4d2974c5711b0 100644 (file)
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -16,8 +16,8 @@ email, used as parts of URLs, or included as part of an HTTP POST request.  The
  encoding algorithm is not the same as the :program:`uuencode` program.
  
  There are two interfaces provided by this module.  The modern interface supports
-encoding and decoding string objects using all three :rfc:`3548` defined
-alphabets (normal, URL-safe, and filesystem-safe).  The legacy
+encoding and decoding string objects using both base-64 alphabets defined
+in :rfc:`3548` (normal, and URL- and filesystem-safe).  The legacy
  interface provides for encoding and decoding to and from file-like objects as
  well as strings, but only using the Base64 standard alphabet.
  
@@ -26,7 +26,7 @@ The modern interface, which was introduced in Python 2.4, provides:
  
  .. function:: b64encode(s[, altchars])
  
-   Encode a string use Base64.
+   Encode a string using Base64.
  
     *s* is the string to encode.  Optional *altchars* must be a string of at least
     length 2 (additional characters are ignored) which specifies an alternative
@@ -46,7 +46,8 @@ The modern interface, which was introduced in Python 2.4, provides:
     alphabet used instead of the ``+`` and ``/`` characters.
  
     The decoded string is returned.  A :exc:`TypeError` is raised if *s* is
-   incorrectly padded.  Non-base64-alphabet characters are
+   incorrectly padded.  Characters that are neither
+   in the normal base-64 alphabet nor the alternative alphabet are
     discarded prior to the padding check.
  
  
@@ -62,14 +63,16 @@ The modern interface, which was introduced in Python 2.4, provides:
  
  .. function:: urlsafe_b64encode(s)
  
-   Encode string *s* using a URL-safe alphabet, which substitutes ``-`` instead of
+   Encode string *s* using the URL- and filesystem-safe
+   alphabet, which substitutes ``-`` instead of
     ``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet.  The result
     can still contain ``=``.
  
  
  .. function:: urlsafe_b64decode(s)
  
-   Decode string *s* using a URL-safe alphabet, which substitutes ``-`` instead of
+   Decode string *s* using the URL- and filesystem-safe
+   alphabet, which substitutes ``-`` instead of
     ``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet.
  
  
diff --git a/Lib/base64.py b/Lib/base64.py

index 844907feef56ae0e2fca5e106a8f7ac362b64feb..38bc61ee984c13b1164795d0efda7cae4c77c0c3 100755 (executable)
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -65,8 +65,9 @@ def b64decode(s, altchars=None):
      alternative alphabet used instead of the '+' and '/' characters.
  
      The decoded string is returned.  A TypeError is raised if s is
-    incorrectly padded.  Non-base64-alphabet characters are discarded prior
-    to the padding check.
+    incorrectly padded.  Characters that are neither in the normal base-64
+    alphabet nor the alternative alphabet are discarded prior to the padding
+    check.
      """
      if altchars is not None:
          s = s.translate(string.maketrans(altchars[:2], '+/'))
@@ -87,9 +88,10 @@ def standard_b64encode(s):
  def standard_b64decode(s):
      """Decode a string encoded with the standard Base64 alphabet.
  
-    s is the string to decode.  The decoded string is returned.  A TypeError
-    is raised if the string is incorrectly padded or if there are non-alphabet
-    characters present in the string.
+    Argument s is the string to decode.  The decoded string is returned.  A
+    TypeError is raised if the string is incorrectly padded.  Characters that
+    are not in the standard alphabet are discarded prior to the padding
+    check.
      """
      return b64decode(s)
  
@@ -97,19 +99,20 @@ _urlsafe_encode_translation = string.maketrans(b'+/', b'-_')
  _urlsafe_decode_translation = string.maketrans(b'-_', b'+/')
  
  def urlsafe_b64encode(s):
-    """Encode a string using a url-safe Base64 alphabet.
+    """Encode a string using the URL- and filesystem-safe Base64 alphabet.
  
-    s is the string to encode.  The encoded string is returned.  The alphabet
-    uses '-' instead of '+' and '_' instead of '/'.
+    Argument s is the string to encode.  The encoded string is returned.  The
+    alphabet uses '-' instead of '+' and '_' instead of '/'.
      """
      return b64encode(s).translate(_urlsafe_encode_translation)
  
  def urlsafe_b64decode(s):
-    """Decode a string encoded with the standard Base64 alphabet.
+    """Decode a string using the URL- and filesystem-safe Base64 alphabet.
  
-    s is the string to decode.  The decoded string is returned.  A TypeError
-    is raised if the string is incorrectly padded or if there are non-alphabet
-    characters present in the string.
+    Argument s is the string to decode.  The decoded string is returned.  A
+    TypeError is raised if the string is incorrectly padded.  Characters that
+    are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash
+    '/', are discarded prior to the padding check.
  
      The alphabet uses '-' instead of '+' and '_' instead of '/'.
      """
@@ -267,7 +270,7 @@ def b16decode(s, casefold=False):
      a lowercase alphabet is acceptable as input.  For security purposes, the
      default is False.
  
-    The decoded string is returned.  A TypeError is raised if s were
+    The decoded string is returned.  A TypeError is raised if s is
      incorrectly padded or if there are non-alphabet characters present in the
      string.
      """
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py

index 5dd283bb995af9047f683d98656a47782d9f31c4..6e67dc0ac189cb4b807263e62bd16df01d990e5d 100644 (file)
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -153,6 +153,13 @@ class BaseXYTestCase(unittest.TestCase):
                   (b'YWJj\nYWI=', b'abcab'))
          for bstr, res in tests:
              self.assertEqual(base64.b64decode(bstr), res)
+            self.assertEqual(base64.standard_b64decode(bstr), res)
+            self.assertEqual(base64.urlsafe_b64decode(bstr), res)
+
+        # Normal alphabet characters not discarded when alternative given
+        res = b'\xFB\xEF\xBE\xFF\xFF\xFF'
+        self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res)
+        self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res)
  
      def test_b32encode(self):
          eq = self.assertEqual
@@ -220,6 +227,10 @@ class BaseXYTestCase(unittest.TestCase):
          eq(base64.b16decode('0102abcdef', True), '\x01\x02\xab\xcd\xef')
          # Non-bytes
          eq(base64.b16decode(bytearray("0102ABCDEF")), '\x01\x02\xab\xcd\xef')
+        # Non-alphabet characters
+        self.assertRaises(TypeError, base64.b16decode, '0102AG')
+        # Incorrect "padding"
+        self.assertRaises(TypeError, base64.b16decode, '010')
  
  
  \f
author	Martin Panter <vadmium+py@gmail.com>
	Tue, 23 Feb 2016 22:30:50 +0000 (22:30 +0000)
committer	Martin Panter <vadmium+py@gmail.com>
	Tue, 23 Feb 2016 22:30:50 +0000 (22:30 +0000)
Doc/library/base64.rst		patch \| blob \| history
Lib/base64.py		patch \| blob \| history
Lib/test/test_base64.py		patch \| blob \| history