__init__(): Fix an invariant, that the charset item in a chunk tuple

author Barry Warsaw <barry@python.org>

Mon, 14 Oct 2002 15:13:17 +0000 (15:13 +0000)

committer Barry Warsaw <barry@python.org>

Mon, 14 Oct 2002 15:13:17 +0000 (15:13 +0000)
author Barry Warsaw <barry@python.org>
Mon, 14 Oct 2002 15:13:17 +0000 (15:13 +0000)
committer Barry Warsaw <barry@python.org>
Mon, 14 Oct 2002 15:13:17 +0000 (15:13 +0000)
diff --git a/Lib/email/Header.py b/Lib/email/Header.py

index 378b3dd13a7404409b8b922c725070a3fa761d96..9bbc32fd84c51b5e7064d4370101b53c623817ab 100644 (file)
--- a/Lib/email/Header.py
+++ b/Lib/email/Header.py
@@ -153,6 +153,8 @@ class Header:
          """
          if charset is None:
              charset = USASCII
+        if not isinstance(charset, Charset):
+            charset = Charset(charset)
          self._charset = charset
          self._continuation_ws = continuation_ws
          cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
@@ -233,14 +235,21 @@ class Header:
          self._chunks.append((s, charset))
  
      def _split(self, s, charset, firstline=False):
-        # Split up a header safely for use with encode_chunks.  BAW: this
-        # appears to be a private convenience method.
+        # Split up a header safely for use with encode_chunks.
          splittable = charset.to_splittable(s)
          encoded = charset.from_splittable(splittable)
          elen = charset.encoded_header_len(encoded)
  
          if elen <= self._maxlinelen:
              return [(encoded, charset)]
+        # If we have undetermined raw 8bit characters sitting in a byte
+        # string, we really don't know what the right thing to do is.  We
+        # can't really split it because it might be multibyte data which we
+        # could break if we split it between pairs.  The least harm seems to
+        # be to not split the header at all, but that means they could go out
+        # longer than maxlinelen.
+        elif charset == '8bit':
+            return [(s, charset)]
          # BAW: I'm not sure what the right test here is.  What we're trying to
          # do is be faithful to RFC 2822's recommendation that ($2.2.3):
          #
author	Barry Warsaw <barry@python.org>
	Mon, 14 Oct 2002 15:13:17 +0000 (15:13 +0000)
committer	Barry Warsaw <barry@python.org>
	Mon, 14 Oct 2002 15:13:17 +0000 (15:13 +0000)