]> granicus.if.org Git - python/commitdiff
Issue #22982: Improve BOM handling when seeking to multiple positions of a writable...
authorAntoine Pitrou <solipsis@pitrou.net>
Mon, 13 Apr 2015 18:01:21 +0000 (20:01 +0200)
committerAntoine Pitrou <solipsis@pitrou.net>
Mon, 13 Apr 2015 18:01:21 +0000 (20:01 +0200)
Lib/_pyio.py
Lib/test/test_io.py
Misc/NEWS
Modules/_io/textio.c

index 3ed02e410dc2f77e1ad0e7e26e36a5c3bee6c430..c0b5fd12af53491afc4a9f5b34b5be0976f70a83 100644 (file)
@@ -1865,6 +1865,19 @@ class TextIOWrapper(TextIOBase):
         return buffer
 
     def seek(self, cookie, whence=0):
+        def _reset_encoder(position):
+            """Reset the encoder (merely useful for proper BOM handling)"""
+            try:
+                encoder = self._encoder or self._get_encoder()
+            except LookupError:
+                # Sometimes the encoder doesn't exist
+                pass
+            else:
+                if position != 0:
+                    encoder.setstate(0)
+                else:
+                    encoder.reset()
+
         if self.closed:
             raise ValueError("tell on closed file")
         if not self._seekable:
@@ -1885,6 +1898,7 @@ class TextIOWrapper(TextIOBase):
             self._snapshot = None
             if self._decoder:
                 self._decoder.reset()
+            _reset_encoder(position)
             return position
         if whence != 0:
             raise ValueError("unsupported whence (%r)" % (whence,))
@@ -1922,17 +1936,7 @@ class TextIOWrapper(TextIOBase):
                 raise OSError("can't restore logical file position")
             self._decoded_chars_used = chars_to_skip
 
-        # Finally, reset the encoder (merely useful for proper BOM handling)
-        try:
-            encoder = self._encoder or self._get_encoder()
-        except LookupError:
-            # Sometimes the encoder doesn't exist
-            pass
-        else:
-            if cookie != 0:
-                encoder.setstate(0)
-            else:
-                encoder.reset()
+        _reset_encoder(cookie)
         return cookie
 
     def read(self, size=None):
index dfa3d771f4202e64469acd19ae70e0c7eb27029b..ea109acaa0c5c3d95b97c617912733f74ebb23c4 100644 (file)
@@ -2669,6 +2669,19 @@ class TextIOWrapperTest(unittest.TestCase):
             with self.open(filename, 'rb') as f:
                 self.assertEqual(f.read(), 'bbbzzz'.encode(charset))
 
+    def test_seek_append_bom(self):
+        # Same test, but first seek to the start and then to the end
+        filename = support.TESTFN
+        for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
+            with self.open(filename, 'w', encoding=charset) as f:
+                f.write('aaa')
+            with self.open(filename, 'a', encoding=charset) as f:
+                f.seek(0)
+                f.seek(0, self.SEEK_END)
+                f.write('xxx')
+            with self.open(filename, 'rb') as f:
+                self.assertEqual(f.read(), 'aaaxxx'.encode(charset))
+
     def test_errors_property(self):
         with self.open(support.TESTFN, "w") as f:
             self.assertEqual(f.errors, "strict")
index fe91ae2c1a163d1d8c64de652efbdc49611a504e..6ed85efc4844a4421bc8e612ccbe1ea13c4395ad 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -29,6 +29,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #22982: Improve BOM handling when seeking to multiple positions of
+  a writable text file.
+
 - Issue #23865: close() methods in multiple modules now are idempotent and more
   robust at shutdown. If needs to release multiple resources, they are released
   even if errors are occured.
index d1c0d012328080d08279fde4d0c6306563465716..b419275a8093a8d3e1e1f11d4877ee56fca05821 100644 (file)
@@ -2042,11 +2042,10 @@ _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
 }
 
 static int
-_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
+_textiowrapper_encoder_reset(textio *self, int start_of_stream)
 {
     PyObject *res;
-    /* Same as _textiowrapper_decoder_setstate() above. */
-    if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
+    if (start_of_stream) {
         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
         self->encoding_start_of_stream = 1;
     }
@@ -2061,6 +2060,14 @@ _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
     return 0;
 }
 
+static int
+_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
+{
+    /* Same as _textiowrapper_decoder_setstate() above. */
+    return _textiowrapper_encoder_reset(
+        self, cookie->start_pos == 0 && cookie->dec_flags == 0);
+}
+
 static PyObject *
 textiowrapper_seek(textio *self, PyObject *args)
 {
@@ -2128,7 +2135,17 @@ textiowrapper_seek(textio *self, PyObject *args)
         }
 
         res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
-        Py_XDECREF(cookieObj);
+        Py_CLEAR(cookieObj);
+        if (res == NULL)
+            goto fail;
+        if (self->encoder) {
+            /* If seek() == 0, we are at the start of stream, otherwise not */
+            cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
+            if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
+                Py_DECREF(res);
+                goto fail;
+            }
+        }
         return res;
     }
     else if (whence != 0) {