#1368247: make set_charset/MIMEText automatically encode unicode _payload.

author R. David Murray <rdmurray@bitdance.com>

Wed, 2 Jun 2010 22:03:15 +0000 (22:03 +0000)

committer R. David Murray <rdmurray@bitdance.com>

Wed, 2 Jun 2010 22:03:15 +0000 (22:03 +0000)
author R. David Murray <rdmurray@bitdance.com>
Wed, 2 Jun 2010 22:03:15 +0000 (22:03 +0000)
committer R. David Murray <rdmurray@bitdance.com>
Wed, 2 Jun 2010 22:03:15 +0000 (22:03 +0000)
diff --git a/Doc/library/email.message.rst b/Doc/library/email.message.rst

index 5ebc96086090728e06d5c7b60c43041b4603b033..fa1df8888fe49b4d6451f79ff5e44b17ce058907 100644 (file)
--- a/Doc/library/email.message.rst
+++ b/Doc/library/email.message.rst
@@ -136,9 +136,10 @@ Here are the methods of the :class:`Message` class:
        :mailheader:`Content-Type` header. Anything else will generate a
        :exc:`TypeError`.
  
-      The message will be assumed to be of type :mimetype:`text/\*` encoded with
-      *charset.input_charset*.  It will be converted to *charset.output_charset*
-      and encoded properly, if needed, when generating the plain text
+      The message will be assumed to be of type :mimetype:`text/\*`, with the
+      payload either in unicode or encoded with *charset.input_charset*.
+      It will be encoded or converted to *charset.output_charset*
+      and transfer encoded properly, if needed, when generating the plain text
        representation of the message.  MIME headers (:mailheader:`MIME-Version`,
        :mailheader:`Content-Type`, :mailheader:`Content-Transfer-Encoding`) will
        be added as needed.
diff --git a/Doc/library/email.mime.rst b/Doc/library/email.mime.rst

index 10f3e37f80e609d4de994bd178cd120b437114fc..a092feb5eda1be013425d6e614101292ae3b18a9 100644 (file)
--- a/Doc/library/email.mime.rst
+++ b/Doc/library/email.mime.rst
@@ -191,9 +191,11 @@ Here are the classes:
     minor type and defaults to :mimetype:`plain`.  *_charset* is the character
     set of the text and is passed as a parameter to the
     :class:`~email.mime.nonmultipart.MIMENonMultipart` constructor; it defaults
-   to ``us-ascii``.  No guessing or encoding is performed on the text data.
+   to ``us-ascii``.  If *_text* is unicode, it is encoded using the
+   *output_charset* of *_charset*, otherwise it is used as-is.
  
     .. versionchanged:: 2.4
-      The previously deprecated *_encoding* argument has been removed.  Encoding
-      happens implicitly based on the *_charset* argument.
+      The previously deprecated *_encoding* argument has been removed.  Content
+      Transfer Encoding now happens happens implicitly based on the *_charset*
+      argument.
  
diff --git a/Lib/email/message.py b/Lib/email/message.py

index 993a1ac0f9877bc3b86289b52f78087b39862f21..08423cd229897cb25998458e2da4b85d3f226f73 100644 (file)
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -256,6 +256,8 @@ class Message:
                              charset=charset.get_output_charset())
          else:
              self.set_param('charset', charset.get_output_charset())
+        if isinstance(self._payload, unicode):
+            self._payload = self._payload.encode(charset.output_charset)
          if str(charset) != charset.get_output_charset():
              self._payload = charset.body_encode(self._payload)
          if 'Content-Transfer-Encoding' not in self:
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py

index bf41be7570319ad47ce25662c9988e386ee52c31..7d0107936cc5f36dc1c087fd02b0be9d52a595f0 100644 (file)
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -1045,6 +1045,31 @@ class TestMIMEText(unittest.TestCase):
          eq(msg.get_charset().input_charset, 'us-ascii')
          eq(msg['content-type'], 'text/plain; charset="us-ascii"')
  
+    def test_7bit_unicode_input(self):
+        eq = self.assertEqual
+        msg = MIMEText(u'hello there', _charset='us-ascii')
+        eq(msg.get_charset().input_charset, 'us-ascii')
+        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+
+    def test_7bit_unicode_input_no_charset(self):
+        eq = self.assertEqual
+        msg = MIMEText(u'hello there')
+        eq(msg.get_charset(), 'us-ascii')
+        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+        self.assertTrue('hello there' in msg.as_string())
+
+    def test_8bit_unicode_input(self):
+        teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
+        eq = self.assertEqual
+        msg = MIMEText(teststr, _charset='utf-8')
+        eq(msg.get_charset().output_charset, 'utf-8')
+        eq(msg['content-type'], 'text/plain; charset="utf-8"')
+        eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
+
+    def test_8bit_unicode_input_no_charset(self):
+        teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
+        self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
+
  
  \f
  # Test complicated multipart/* messages
diff --git a/Misc/NEWS b/Misc/NEWS

index 4e0b0d2948608a77659aff5841dfcc867ff2c624..cd9da3773bfaa21fd690ca14ffa471edcdb39966 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -46,6 +46,9 @@ C-API
  Library
  -------
  
+- Issue #1368247: set_charset (and therefore MIMEText) now automatically
+  encodes a unicode _payload to the output_charset.
+
  - Issue #7150: Raise OverflowError if the result of adding or subtracting
    timedelta from date or datetime falls outside of the MINYEAR:MAXYEAR range.
author	R. David Murray <rdmurray@bitdance.com>
	Wed, 2 Jun 2010 22:03:15 +0000 (22:03 +0000)
committer	R. David Murray <rdmurray@bitdance.com>
	Wed, 2 Jun 2010 22:03:15 +0000 (22:03 +0000)
Doc/library/email.message.rst		patch \| blob \| history
Doc/library/email.mime.rst		patch \| blob \| history
Lib/email/message.py		patch \| blob \| history
Lib/email/test/test_email.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history