bpo-32236: open() emits RuntimeWarning if buffering=1 for binary mode (GH-4842)

author Alexey Izbyshev <izbyshev@ispras.ru>

Sat, 20 Oct 2018 00:22:31 +0000 (03:22 +0300)

committer Victor Stinner <vstinner@redhat.com>

Sat, 20 Oct 2018 00:22:31 +0000 (02:22 +0200)
author Alexey Izbyshev <izbyshev@ispras.ru>
Sat, 20 Oct 2018 00:22:31 +0000 (03:22 +0300)
committer Victor Stinner <vstinner@redhat.com>
Sat, 20 Oct 2018 00:22:31 +0000 (02:22 +0200)
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst

index b9c04c241a8b48fb5ac5781d921b0873a170cdfd..b323ab527d26ed5cbcbb2b19e845626e22fb0fb6 100644 (file)
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -174,7 +174,7 @@ recommended approach for working with encoded text files, this module
  provides additional utility functions and classes that allow the use of a
  wider range of codecs when working with binary files:
  
-.. function:: open(filename, mode='r', encoding=None, errors='strict', buffering=1)
+.. function:: open(filename, mode='r', encoding=None, errors='strict', buffering=-1)
  
     Open an encoded file using the given *mode* and return an instance of
     :class:`StreamReaderWriter`, providing transparent encoding/decoding.
@@ -194,8 +194,8 @@ wider range of codecs when working with binary files:
     *errors* may be given to define the error handling. It defaults to ``'strict'``
     which causes a :exc:`ValueError` to be raised in case an encoding error occurs.
  
-   *buffering* has the same meaning as for the built-in :func:`open` function.  It
-   defaults to line buffered.
+   *buffering* has the same meaning as for the built-in :func:`open` function.
+   It defaults to -1 which means that the default buffer size will be used.
  
  
  .. function:: EncodedFile(file, data_encoding, file_encoding=None, errors='strict')
diff --git a/Lib/_pyio.py b/Lib/_pyio.py

index 01ef5b7b0bb5623c83c6b75abcf5f56a5fe5459d..b8975ff533d7eb8dcfb6dd3d1bd5931724cee97f 100644 (file)
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -198,6 +198,11 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
          raise ValueError("binary mode doesn't take an errors argument")
      if binary and newline is not None:
          raise ValueError("binary mode doesn't take a newline argument")
+    if binary and buffering == 1:
+        import warnings
+        warnings.warn("line buffering (buffering=1) isn't supported in binary "
+                      "mode, the default buffer size will be used",
+                      RuntimeWarning, 2)
      raw = FileIO(file,
                   (creating and "x" or "") +
                   (reading and "r" or "") +
diff --git a/Lib/codecs.py b/Lib/codecs.py

index a70ed20f2bc794a3b31cd0e0ab76c107093bde40..6b028adb1d28e6b30aa140cc621cd60a2101c27a 100644 (file)
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -862,7 +862,7 @@ class StreamRecoder:
  
  ### Shortcuts
  
-def open(filename, mode='r', encoding=None, errors='strict', buffering=1):
+def open(filename, mode='r', encoding=None, errors='strict', buffering=-1):
  
      """ Open an encoded file using the given mode and return
          a wrapped version providing transparent encoding/decoding.
@@ -883,7 +883,8 @@ def open(filename, mode='r', encoding=None, errors='strict', buffering=1):
          encoding error occurs.
  
          buffering has the same meaning as for the builtin open() API.
-        It defaults to line buffered.
+        It defaults to -1 which means that the default buffer size will
+        be used.
  
          The returned wrapped file object provides an extra attribute
          .encoding which allows querying the used encoding. This
diff --git a/Lib/subprocess.py b/Lib/subprocess.py

index c827113a933fd767563de0aec8d9ae32f98a6be8..5db6f0cc24ba331e35a8f124d7072a601ede156f 100644 (file)
--- a/Lib/subprocess.py
+++ b/Lib/subprocess.py
@@ -743,12 +743,21 @@ class Popen(object):
  
          self._closed_child_pipe_fds = False
  
+        if self.text_mode:
+            if bufsize == 1:
+                line_buffering = True
+                # Use the default buffer size for the underlying binary streams
+                # since they don't support line buffering.
+                bufsize = -1
+            else:
+                line_buffering = False
+
          try:
              if p2cwrite != -1:
                  self.stdin = io.open(p2cwrite, 'wb', bufsize)
                  if self.text_mode:
                      self.stdin = io.TextIOWrapper(self.stdin, write_through=True,
-                            line_buffering=(bufsize == 1),
+                            line_buffering=line_buffering,
                              encoding=encoding, errors=errors)
              if c2pread != -1:
                  self.stdout = io.open(c2pread, 'rb', bufsize)
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py

index ed0d46de6426d61b8089c7e4020fc5209429dec4..01e8935cc8d7614faf788ac8f265a4e03ed2977c 100644 (file)
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -107,7 +107,8 @@ __all__ = [
      # threads
      "threading_setup", "threading_cleanup", "reap_threads", "start_threads",
      # miscellaneous
-    "check_warnings", "check_no_resource_warning", "EnvironmentVarGuard",
+    "check_warnings", "check_no_resource_warning", "check_no_warnings",
+    "EnvironmentVarGuard",
      "run_with_locale", "swap_item",
      "swap_attr", "Matcher", "set_memlimit", "SuppressCrashReport", "sortdict",
      "run_with_tz", "PGO", "missing_compiler_executable", "fd_count",
@@ -1252,6 +1253,30 @@ def check_warnings(*filters, **kwargs):
      return _filterwarnings(filters, quiet)
  
  
+@contextlib.contextmanager
+def check_no_warnings(testcase, message='', category=Warning, force_gc=False):
+    """Context manager to check that no warnings are emitted.
+
+    This context manager enables a given warning within its scope
+    and checks that no warnings are emitted even with that warning
+    enabled.
+
+    If force_gc is True, a garbage collection is attempted before checking
+    for warnings. This may help to catch warnings emitted when objects
+    are deleted, such as ResourceWarning.
+
+    Other keyword arguments are passed to warnings.filterwarnings().
+    """
+    with warnings.catch_warnings(record=True) as warns:
+        warnings.filterwarnings('always',
+                                message=message,
+                                category=category)
+        yield
+        if force_gc:
+            gc_collect()
+    testcase.assertEqual(warns, [])
+
+
  @contextlib.contextmanager
  def check_no_resource_warning(testcase):
      """Context manager to check that no ResourceWarning is emitted.
@@ -1266,11 +1291,8 @@ def check_no_resource_warning(testcase):
      You must remove the object which may emit ResourceWarning before
      the end of the context manager.
      """
-    with warnings.catch_warnings(record=True) as warns:
-        warnings.filterwarnings('always', category=ResourceWarning)
+    with check_no_warnings(testcase, category=ResourceWarning, force_gc=True):
          yield
-        gc_collect()
-    testcase.assertEqual(warns, [])
  
  
  class CleanImport(object):
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py

index 5ec9bbbb1230e65634ea8e1339324f6ffe1d1291..bc812eac3742ea41e78ae3b4aa82a8581d22f6cd 100644 (file)
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -169,10 +169,10 @@ class CmdLineTest(unittest.TestCase):
      @contextlib.contextmanager
      def interactive_python(self, separate_stderr=False):
          if separate_stderr:
-            p = spawn_python('-i', bufsize=1, stderr=subprocess.PIPE)
+            p = spawn_python('-i', stderr=subprocess.PIPE)
              stderr = p.stderr
          else:
-            p = spawn_python('-i', bufsize=1, stderr=subprocess.STDOUT)
+            p = spawn_python('-i', stderr=subprocess.STDOUT)
              stderr = p.stdout
          try:
              # Drain stderr until prompt
diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py

index f58d1dae6045f49ff17ae8a852ed6ab8371f92b0..cd642e7aaf8bb8634a14f751c924585369b355bc 100644 (file)
--- a/Lib/test/test_file.py
+++ b/Lib/test/test_file.py
@@ -169,22 +169,33 @@ class OtherFileTests:
              f.close()
              self.fail("no error for invalid mode: %s" % bad_mode)
  
+    def _checkBufferSize(self, s):
+        try:
+            f = self.open(TESTFN, 'wb', s)
+            f.write(str(s).encode("ascii"))
+            f.close()
+            f.close()
+            f = self.open(TESTFN, 'rb', s)
+            d = int(f.read().decode("ascii"))
+            f.close()
+            f.close()
+        except OSError as msg:
+            self.fail('error setting buffer size %d: %s' % (s, str(msg)))
+        self.assertEqual(d, s)
+
      def testSetBufferSize(self):
          # make sure that explicitly setting the buffer size doesn't cause
          # misbehaviour especially with repeated close() calls
-        for s in (-1, 0, 1, 512):
-            try:
-                f = self.open(TESTFN, 'wb', s)
-                f.write(str(s).encode("ascii"))
-                f.close()
-                f.close()
-                f = self.open(TESTFN, 'rb', s)
-                d = int(f.read().decode("ascii"))
-                f.close()
-                f.close()
-            except OSError as msg:
-                self.fail('error setting buffer size %d: %s' % (s, str(msg)))
-            self.assertEqual(d, s)
+        for s in (-1, 0, 512):
+            with support.check_no_warnings(self,
+                                           message='line buffering',
+                                           category=RuntimeWarning):
+                self._checkBufferSize(s)
+
+        # test that attempts to use line buffering in binary mode cause
+        # a warning
+        with self.assertWarnsRegex(RuntimeWarning, 'line buffering'):
+            self._checkBufferSize(1)
  
      def testTruncateOnWindows(self):
          # SF bug <http://www.python.org/sf/801631>
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py

index c68b2fea858b718c928b983f59813f86184b0c04..abd55387bdbf87ca7f3f33c59497851b5d313252 100644 (file)
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -593,7 +593,7 @@ class IOTest(unittest.TestCase):
              self.large_file_ops(f)
  
      def test_with_open(self):
-        for bufsize in (0, 1, 100):
+        for bufsize in (0, 100):
              f = None
              with self.open(support.TESTFN, "wb", bufsize) as f:
                  f.write(b"xxx")
diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py

index c56e1b632150531b6366ea8dbe5b49507fbc977c..b0b6b06e92759e3c0b3a0a5816e667524fba0351 100644 (file)
--- a/Lib/test/test_subprocess.py
+++ b/Lib/test/test_subprocess.py
@@ -1136,7 +1136,8 @@ class ProcessTestCase(BaseTestCase):
          # line is not flushed in binary mode with bufsize=1.
          # we should get empty response
          line = b'line' + os.linesep.encode() # assume ascii-based locale
-        self._test_bufsize_equal_one(line, b'', universal_newlines=False)
+        with self.assertWarnsRegex(RuntimeWarning, 'line buffering'):
+            self._test_bufsize_equal_one(line, b'', universal_newlines=False)
  
      def test_leaking_fds_on_error(self):
          # see bug #5179: Popen leaks file descriptors to PIPEs if
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-09-11-23-50-40.bpo-32236.3RupnN.rst b/Misc/NEWS.d/next/Core and Builtins/2018-09-11-23-50-40.bpo-32236.3RupnN.rst

new file mode 100644 (file)

index 0000000..6fc1387
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-09-11-23-50-40.bpo-32236.3RupnN.rst
@@ -0,0 +1,2 @@
+Warn that line buffering is not supported if :func:`open` is called with
+binary mode and ``buffering=1``.
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c

index eedca011d2ed864f221e4a386a671f5d36f1c9fe..965c4846bdb8b46f9a3dd5da5d48eed5eceb792d 100644 (file)
--- a/Modules/_io/_iomodule.c
+++ b/Modules/_io/_iomodule.c
@@ -363,6 +363,15 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode,
          goto error;
      }
  
+    if (binary && buffering == 1) {
+        if (PyErr_WarnEx(PyExc_RuntimeWarning,
+                         "line buffering (buffering=1) isn't supported in "
+                         "binary mode, the default buffer size will be used",
+                         1) < 0) {
+           goto error;
+        }
+    }
+
      /* Create the Raw file stream */
      {
          PyObject *RawIO_class = (PyObject *)&PyFileIO_Type;
author	Alexey Izbyshev <izbyshev@ispras.ru>
	Sat, 20 Oct 2018 00:22:31 +0000 (03:22 +0300)
committer	Victor Stinner <vstinner@redhat.com>
	Sat, 20 Oct 2018 00:22:31 +0000 (02:22 +0200)
Doc/library/codecs.rst		patch \| blob \| history
Lib/_pyio.py		patch \| blob \| history
Lib/codecs.py		patch \| blob \| history
Lib/subprocess.py		patch \| blob \| history
Lib/test/support/__init__.py		patch \| blob \| history
Lib/test/test_cmd_line_script.py		patch \| blob \| history
Lib/test/test_file.py		patch \| blob \| history
Lib/test/test_io.py		patch \| blob \| history
Lib/test/test_subprocess.py		patch \| blob \| history
Misc/NEWS.d/next/Core and Builtins/2018-09-11-23-50-40.bpo-32236.3RupnN.rst	[new file with mode: 0644]	patch \| blob
Modules/_io/_iomodule.c		patch \| blob \| history