bpo-27397: Make email module properly handle invalid-length base64 strings (#7583)

author Tal Einat <taleinat+github@gmail.com>

Tue, 12 Jun 2018 12:46:22 +0000 (15:46 +0300)

committer GitHub <noreply@github.com>

Tue, 12 Jun 2018 12:46:22 +0000 (15:46 +0300)
author Tal Einat <taleinat+github@gmail.com>
Tue, 12 Jun 2018 12:46:22 +0000 (15:46 +0300)
committer GitHub <noreply@github.com>
Tue, 12 Jun 2018 12:46:22 +0000 (15:46 +0300)
diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst

index 5838767b18f742cf68b9b6a1ef10e72c0327b323..511ad16358319796a3940064e504e3668e61bc4d 100644 (file)
--- a/Doc/library/email.errors.rst
+++ b/Doc/library/email.errors.rst
@@ -108,3 +108,7 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`.
  * :class:`InvalidBase64CharactersDefect` -- When decoding a block of base64
    encoded bytes, characters outside the base64 alphabet were encountered.
    The characters are ignored, but the resulting decoded bytes may be invalid.
+
+* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
+  bytes, the number of non-padding base64 characters was invalid (1 more than
+  a multiple of 4).  The encoded block was kept as-is.
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py

index c40ffa917b56c2c3a5ca129450add75ba52959c9..295ae7eb21237c6d9dd3be3bdcdc24f378b7d64a 100644 (file)
--- a/Lib/email/_encoded_words.py
+++ b/Lib/email/_encoded_words.py
@@ -98,30 +98,42 @@ def len_q(bstring):
  #
  
  def decode_b(encoded):
-    defects = []
+    # First try encoding with validate=True, fixing the padding if needed.
+    # This will succeed only if encoded includes no invalid characters.
      pad_err = len(encoded) % 4
-    if pad_err:
-        defects.append(errors.InvalidBase64PaddingDefect())
-        padded_encoded = encoded + b'==='[:4-pad_err]
-    else:
-        padded_encoded = encoded
+    missing_padding = b'==='[:4-pad_err] if pad_err else b''
      try:
-        return base64.b64decode(padded_encoded, validate=True), defects
+        return (
+            base64.b64decode(encoded + missing_padding, validate=True),
+            [errors.InvalidBase64PaddingDefect()] if pad_err else [],
+        )
      except binascii.Error:
-        # Since we had correct padding, this must an invalid char error.
-        defects = [errors.InvalidBase64CharactersDefect()]
+        # Since we had correct padding, this is likely an invalid char error.
+        #
          # The non-alphabet characters are ignored as far as padding
-        # goes, but we don't know how many there are.  So we'll just
-        # try various padding lengths until something works.
-        for i in 0, 1, 2, 3:
+        # goes, but we don't know how many there are.  So try without adding
+        # padding to see if it works.
+        try:
+            return (
+                base64.b64decode(encoded, validate=False),
+                [errors.InvalidBase64CharactersDefect()],
+            )
+        except binascii.Error:
+            # Add as much padding as could possibly be necessary (extra padding
+            # is ignored).
              try:
-                return base64.b64decode(encoded+b'='*i, validate=False), defects
+                return (
+                    base64.b64decode(encoded + b'==', validate=False),
+                    [errors.InvalidBase64CharactersDefect(),
+                     errors.InvalidBase64PaddingDefect()],
+                )
              except binascii.Error:
-                if i==0:
-                    defects.append(errors.InvalidBase64PaddingDefect())
-        else:
-            # This should never happen.
-            raise AssertionError("unexpected binascii.Error")
+                # This only happens when the encoded string's length is 1 more
+                # than a multiple of 4, which is invalid.
+                #
+                # bpo-27397: Just return the encoded string since there's no
+                # way to decode.
+                return encoded, [errors.InvalidBase64LengthDefect()]
  
  def encode_b(bstring):
      return base64.b64encode(bstring).decode('ascii')
diff --git a/Lib/email/errors.py b/Lib/email/errors.py

index 791239fa6a54cc0a826d1c53df8c42cba5a9619c..d28a6800104babc44734bf4b75f4f94785178bae 100644 (file)
--- a/Lib/email/errors.py
+++ b/Lib/email/errors.py
@@ -73,6 +73,9 @@ class InvalidBase64PaddingDefect(MessageDefect):
  class InvalidBase64CharactersDefect(MessageDefect):
      """base64 encoded sequence had characters not in base64 alphabet"""
  
+class InvalidBase64LengthDefect(MessageDefect):
+    """base64 encoded sequence had invalid length (1 mod 4)"""
+
  # These errors are specific to header parsing.
  
  class HeaderDefect(MessageDefect):
diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py

index 900e1d0e64d4341c4ca286c7127a34d0e34dc0af..5a59aebba89be4c7f7fb09364bec7ac82a1807c1 100644 (file)
--- a/Lib/test/test_email/test__encoded_words.py
+++ b/Lib/test/test_email/test__encoded_words.py
@@ -33,7 +33,10 @@ class TestDecodeB(TestEmailBase):
          self._test(b'Zm9v', b'foo')
  
      def test_missing_padding(self):
+        # 1 missing padding character
          self._test(b'dmk', b'vi', [errors.InvalidBase64PaddingDefect])
+        # 2 missing padding characters
+        self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect])
  
      def test_invalid_character(self):
          self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect])
@@ -42,6 +45,9 @@ class TestDecodeB(TestEmailBase):
          self._test(b'dm\x01k', b'vi', [errors.InvalidBase64CharactersDefect,
                                         errors.InvalidBase64PaddingDefect])
  
+    def test_invalid_length(self):
+        self._test(b'abcde', b'abcde', [errors.InvalidBase64LengthDefect])
+
  
  class TestDecode(TestEmailBase):
  
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py

index 5cdc4bcecad44769c57889b98032ba15568a28ce..5036de2ca0c3586c36239899135c6f6152df0123 100644 (file)
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -347,6 +347,15 @@ class TestParser(TestParserMixin, TestEmailBase):
               errors.InvalidBase64PaddingDefect],
              '')
  
+    def test_get_unstructured_invalid_base64_length(self):
+        # bpo-27397: Return the encoded string since there's no way to decode.
+        self._test_get_x(self._get_unst,
+            '=?utf-8?b?abcde?=',
+            'abcde',
+            'abcde',
+            [errors.InvalidBase64LengthDefect],
+            '')
+
      def test_get_unstructured_no_whitespace_between_ews(self):
          self._test_get_x(self._get_unst,
              '=?utf-8?q?foo?==?utf-8?q?bar?=',
diff --git a/Lib/test/test_email/test_defect_handling.py b/Lib/test/test_email/test_defect_handling.py

index f36b907573995d774032a6cadd83eff5a08f5488..781f657418220c282ca94dff719db91844071c2a 100644 (file)
--- a/Lib/test/test_email/test_defect_handling.py
+++ b/Lib/test/test_email/test_defect_handling.py
@@ -254,6 +254,23 @@ class TestDefectsBase:
          self.assertDefectsEqual(self.get_defects(msg),
                                  [errors.InvalidBase64CharactersDefect])
  
+    def test_invalid_length_of_base64_payload(self):
+        source = textwrap.dedent("""\
+            Subject: test
+            MIME-Version: 1.0
+            Content-Type: text/plain; charset="utf-8"
+            Content-Transfer-Encoding: base64
+
+            abcde
+            """)
+        msg = self._str_msg(source)
+        with self._raise_point(errors.InvalidBase64LengthDefect):
+            payload = msg.get_payload(decode=True)
+        if self.raise_expected: return
+        self.assertEqual(payload, b'abcde')
+        self.assertDefectsEqual(self.get_defects(msg),
+                                [errors.InvalidBase64LengthDefect])
+
      def test_missing_ending_boundary(self):
          source = textwrap.dedent("""\
              To: 1@harrydomain4.com
diff --git a/Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst b/Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst

new file mode 100644 (file)

index 0000000..1098172
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst
@@ -0,0 +1 @@
+Make email module properly handle invalid-length base64 strings.
author	Tal Einat <taleinat+github@gmail.com>
	Tue, 12 Jun 2018 12:46:22 +0000 (15:46 +0300)
committer	GitHub <noreply@github.com>
	Tue, 12 Jun 2018 12:46:22 +0000 (15:46 +0300)
Doc/library/email.errors.rst		patch \| blob \| history
Lib/email/_encoded_words.py		patch \| blob \| history
Lib/email/errors.py		patch \| blob \| history
Lib/test/test_email/test__encoded_words.py		patch \| blob \| history
Lib/test/test_email/test__header_value_parser.py		patch \| blob \| history
Lib/test/test_email/test_defect_handling.py		patch \| blob \| history
Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst	[new file with mode: 0644]	patch \| blob