]> granicus.if.org Git - python/commitdiff
bpo-17852: Maintain a list of BufferedWriter objects. Flush them on exit. (#3372)
authorNeil Schemenauer <nas-github@arctrix.com>
Fri, 22 Sep 2017 17:17:30 +0000 (10:17 -0700)
committerGitHub <noreply@github.com>
Fri, 22 Sep 2017 17:17:30 +0000 (10:17 -0700)
* Maintain a list of BufferedWriter objects.  Flush them on exit.

In Python 3, the buffer and the underlying file object are separate
and so the order in which objects are finalized matters.  This is
unlike Python 2 where the file and buffer were a single object and
finalization was done for both at the same time.  In Python 3, if
the file is finalized and closed before the buffer then the data in
the buffer is lost.

This change adds a doubly linked list of open file buffers.  An atexit
hook ensures they are flushed before proceeding with interpreter
shutdown.  This is addition does not remove the need to properly close
files as there are other reasons why buffered data could get lost during
finalization.

Initial patch by Armin Rigo.

* Use weakref.WeakSet instead of WeakKeyDictionary.

* Simplify buffered double-linked list types.

* In _flush_all_writers(), suppress errors from flush().

* Remove NEWS entry, use blurb.

* Take more care when flushing file buffers from atexit.

The previous implementation was not careful enough to avoid
causing issues in multi-threaded cases.  Check for buf->ok
and buf->finalizing before actually doing the flush.  Also,
increase the refcnt to ensure the object does not disappear.

Lib/_pyio.py
Misc/NEWS.d/next/Core and Builtins/2017-09-04-12-46-25.bpo-17852.OxAtCg.rst [new file with mode: 0644]
Modules/_io/_iomodule.c
Modules/_io/_iomodule.h
Modules/_io/bufferedio.c

index 1e105f27734c6b382710d559c10820fd0bdf40b0..6833883dadbdd9e61cdcd922cbea4ec7134818b0 100644 (file)
@@ -1182,6 +1182,7 @@ class BufferedWriter(_BufferedIOMixin):
         self.buffer_size = buffer_size
         self._write_buf = bytearray()
         self._write_lock = Lock()
+        _register_writer(self)
 
     def writable(self):
         return self.raw.writable()
@@ -2571,3 +2572,26 @@ class StringIO(TextIOWrapper):
     def detach(self):
         # This doesn't make sense on StringIO.
         self._unsupported("detach")
+
+
+# ____________________________________________________________
+
+import atexit, weakref
+
+_all_writers = weakref.WeakSet()
+
+def _register_writer(w):
+    # keep weak-ref to buffered writer
+    _all_writers.add(w)
+
+def _flush_all_writers():
+    # Ensure all buffered writers are flushed before proceeding with
+    # normal shutdown.  Otherwise, if the underlying file objects get
+    # finalized before the buffered writer wrapping it then any buffered
+    # data will be lost.
+    for w in _all_writers:
+        try:
+            w.flush()
+        except:
+            pass
+atexit.register(_flush_all_writers)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-09-04-12-46-25.bpo-17852.OxAtCg.rst b/Misc/NEWS.d/next/Core and Builtins/2017-09-04-12-46-25.bpo-17852.OxAtCg.rst
new file mode 100644 (file)
index 0000000..185664c
--- /dev/null
@@ -0,0 +1,2 @@
+Maintain a list of open buffered files, flush them before exiting the
+interpreter.  Based on a patch from Armin Rigo.
index f0621f4d4ab4bba7c3c98a939b73feba1bea4d1c..5db44f970d22ba6d5300ad267ee7baca741576b6 100644 (file)
@@ -766,6 +766,8 @@ PyInit__io(void)
         !(_PyIO_empty_bytes = PyBytes_FromStringAndSize(NULL, 0)))
         goto fail;
 
+    _Py_PyAtExit(_PyIO_atexit_flush);
+
     state->initialized = 1;
 
     return m;
index db8403774ead23dabf0efe781082a1a49019bf74..1dce5dada4e23a014451bb4e58be047ebaa87721 100644 (file)
@@ -183,3 +183,5 @@ extern PyObject *_PyIO_empty_str;
 extern PyObject *_PyIO_empty_bytes;
 
 extern PyTypeObject _PyBytesIOBuffer_Type;
+
+extern void _PyIO_atexit_flush(void);
index b2b9ade2c7c4333ae17fd0cdd29a2b3c95aa698c..edc4ba5a537632aa1352f03e7e5fbe602c36bbf4 100644 (file)
@@ -197,7 +197,7 @@ bufferediobase_write(PyObject *self, PyObject *args)
 }
 
 
-typedef struct {
+typedef struct _buffered {
     PyObject_HEAD
 
     PyObject *raw;
@@ -239,8 +239,18 @@ typedef struct {
 
     PyObject *dict;
     PyObject *weakreflist;
+
+    /* a doubly-linked chained list of "buffered" objects that need to
+       be flushed when the process exits */
+    struct _buffered *next, *prev;
 } buffered;
 
+/* the actual list of buffered objects */
+static buffered buffer_list_end = {
+    .next = &buffer_list_end,
+    .prev = &buffer_list_end
+};
+
 /*
     Implementation notes:
 
@@ -378,10 +388,21 @@ _enter_buffered_busy(buffered *self)
         (self->buffer_size * (size / self->buffer_size)))
 
 
+static void
+remove_from_linked_list(buffered *self)
+{
+    self->next->prev = self->prev;
+    self->prev->next = self->next;
+    self->prev = NULL;
+    self->next = NULL;
+}
+
 static void
 buffered_dealloc(buffered *self)
 {
     self->finalizing = 1;
+    if (self->next != NULL)
+        remove_from_linked_list(self);
     if (_PyIOBase_finalize((PyObject *) self) < 0)
         return;
     _PyObject_GC_UNTRACK(self);
@@ -1805,10 +1826,38 @@ _io_BufferedWriter___init___impl(buffered *self, PyObject *raw,
     self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedWriter_Type &&
                                 Py_TYPE(raw) == &PyFileIO_Type);
 
+    if (self->next == NULL) {
+        self->prev = &buffer_list_end;
+        self->next = buffer_list_end.next;
+        buffer_list_end.next->prev = self;
+        buffer_list_end.next = self;
+    }
+
     self->ok = 1;
     return 0;
 }
 
+/*
+* Ensure all buffered writers are flushed before proceeding with
+* normal shutdown.  Otherwise, if the underlying file objects get
+* finalized before the buffered writer wrapping it then any buffered
+* data will be lost.
+*/
+void _PyIO_atexit_flush(void)
+{
+    while (buffer_list_end.next != &buffer_list_end) {
+        buffered *buf = buffer_list_end.next;
+        remove_from_linked_list(buf);
+        if (buf->ok && !buf->finalizing) {
+            /* good state and not finalizing */
+            Py_INCREF(buf);
+            buffered_flush(buf, NULL);
+            Py_DECREF(buf);
+            PyErr_Clear();
+        }
+    }
+}
+
 static Py_ssize_t
 _bufferedwriter_raw_write(buffered *self, char *start, Py_ssize_t len)
 {