Fix issue1753: TextIOWrapper.write writes utf BOM for every string.

author Alexandre Vassalotti <alexandre@peadrop.com>

Mon, 7 Jan 2008 18:30:48 +0000 (18:30 +0000)

committer Alexandre Vassalotti <alexandre@peadrop.com>

Mon, 7 Jan 2008 18:30:48 +0000 (18:30 +0000)
author Alexandre Vassalotti <alexandre@peadrop.com>
Mon, 7 Jan 2008 18:30:48 +0000 (18:30 +0000)
committer Alexandre Vassalotti <alexandre@peadrop.com>
Mon, 7 Jan 2008 18:30:48 +0000 (18:30 +0000)
diff --git a/Lib/io.py b/Lib/io.py

index 2a5348d172076caa4474a3dbe456ceea3ce436cc..e427fe6969ab535239deef17a53652f8779a7866 100644 (file)
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -1182,6 +1182,7 @@ class TextIOWrapper(TextIOBase):
          self._readnl = newline
          self._writetranslate = newline != ''
          self._writenl = newline or os.linesep
+        self._encoder = None
          self._decoder = None
          self._pending = ""
          self._snapshot = None
@@ -1240,8 +1241,9 @@ class TextIOWrapper(TextIOBase):
          haslf = (self._writetranslate or self._line_buffering) and "\n" in s
          if haslf and self._writetranslate and self._writenl != "\n":
              s = s.replace("\n", self._writenl)
+        encoder = self._encoder or self._get_encoder()
          # XXX What if we were just reading?
-        b = s.encode(self._encoding, self._errors)
+        b = encoder.encode(s)
          self.buffer.write(b)
          if self._line_buffering and (haslf or "\r" in s):
              self.flush()
@@ -1250,11 +1252,13 @@ class TextIOWrapper(TextIOBase):
              self._decoder.reset()
          return length
  
+    def _get_encoder(self):
+        make_encoder = codecs.getincrementalencoder(self._encoding)
+        self._encoder = make_encoder(self._errors)
+        return self._encoder
+
      def _get_decoder(self):
          make_decoder = codecs.getincrementaldecoder(self._encoding)
-        if make_decoder is None:
-            raise IOError("Can't find an incremental decoder for encoding %s" %
-                          self._encoding)
          decoder = make_decoder(self._errors)
          if self._readuniversal:
              decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py

index 33b32e0a1bbac42704c2c3b52d9c1105c02246b6..4963416ffaaae6d5a4f8b9a156bcdb272a56d9e6 100644 (file)
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -765,6 +765,24 @@ class TextIOWrapperTest(unittest.TestCase):
          f.readline()
          f.tell()
  
+    def testEncodedWrites(self):
+        data = "1234567890"
+        tests = ("utf-16",
+                 "utf-16-le",
+                 "utf-16-be",
+                 "utf-32",
+                 "utf-32-le",
+                 "utf-32-be")
+        for encoding in tests:
+            buf = io.BytesIO()
+            f = io.TextIOWrapper(buf, encoding=encoding)
+            # Check if the BOM is written only once (see issue1753).
+            f.write(data)
+            f.write(data)
+            f.seek(0)
+            self.assertEquals(f.read(), data * 2)
+            self.assertEquals(buf.getvalue(), (data * 2).encode(encoding))
+
      def timingTest(self):
          timer = time.time
          enc = "utf8"
author	Alexandre Vassalotti <alexandre@peadrop.com>
	Mon, 7 Jan 2008 18:30:48 +0000 (18:30 +0000)
committer	Alexandre Vassalotti <alexandre@peadrop.com>
	Mon, 7 Jan 2008 18:30:48 +0000 (18:30 +0000)
Lib/io.py		patch \| blob \| history
Lib/test/test_io.py		patch \| blob \| history