]> granicus.if.org Git - python/commitdiff
Issue #19977: When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale),
authorVictor Stinner <victor.stinner@gmail.com>
Tue, 18 Mar 2014 00:18:21 +0000 (01:18 +0100)
committerVictor Stinner <victor.stinner@gmail.com>
Tue, 18 Mar 2014 00:18:21 +0000 (01:18 +0100)
:py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the
``surrogateescape`` error handler, instead of the ``strict`` error handler.

Doc/whatsnew/3.5.rst
Lib/test/test_sys.py
Misc/NEWS
Python/pythonrun.c

index 176160b3ed111d2caa818c25c94cff4231fdb569..2c044ae6c835e51a41960d01b2044e3091f3080f 100644 (file)
@@ -79,7 +79,10 @@ New built-in features:
 
 Implementation improvements:
 
-* None yet.
+* When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale),
+  :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the
+  ``surrogateescape`` error handler, instead of the ``strict`` error handler
+  (:issue:`19977`).
 
 Significantly Improved Library Modules:
 
index 5a9699ff2045f8725575966ae019bdffd30d634f..f3d0b42be243935cec84df1616e1f108a2ad94ce 100644 (file)
@@ -615,6 +615,50 @@ class SysModuleTest(unittest.TestCase):
             expected = None
         self.check_fsencoding(fs_encoding, expected)
 
+    @unittest.skipIf(sys.platform == 'win32',
+                     'test specific to UNIX')
+    def test_c_locale_surrogateescape(self):
+        # Force the POSIX locale
+        env = os.environ.copy()
+        env["LC_ALL"] = "C"
+        code = '\n'.join((
+            'import codecs, sys',
+            'def dump(name):',
+            '    std = getattr(sys, name)',
+            '    encoding = codecs.lookup(std.encoding).name',
+            '    print("%s: %s:%s" % (name, encoding, std.errors))',
+            'dump("stdin")',
+            'dump("stdout")',
+            'dump("stderr")',
+        ))
+        p = subprocess.Popen([sys.executable, "-I", "-c", code],
+                              stdout=subprocess.PIPE, env=env)
+        out = p.communicate()[0]
+        self.assertEqual(out,
+                         b'stdin: ascii:surrogateescape\n'
+                         b'stdout: ascii:surrogateescape\n'
+                         b'stderr: ascii:backslashreplace\n')
+
+        # replace the default error handler
+        env['PYTHONIOENCODING'] = ':strict'
+        p = subprocess.Popen([sys.executable, "-c", code],
+                              stdout=subprocess.PIPE, env=env)
+        out = p.communicate()[0]
+        self.assertEqual(out,
+                         b'stdin: ascii:strict\n'
+                         b'stdout: ascii:strict\n'
+                         b'stderr: ascii:backslashreplace\n')
+
+        # force the encoding
+        env['PYTHONIOENCODING'] = 'iso8859-1'
+        p = subprocess.Popen([sys.executable, "-c", code],
+                              stdout=subprocess.PIPE, env=env)
+        out = p.communicate()[0]
+        self.assertEqual(out,
+                         b'stdin: iso8859-1:surrogateescape\n'
+                         b'stdout: iso8859-1:surrogateescape\n'
+                         b'stderr: iso8859-1:backslashreplace\n')
+
     def test_implementation(self):
         # This test applies to all implementations equally.
 
index 5946bc99f38e7277990ef41594aa3ca8f00e622d..2072204267dfb2908b6712ef04b988f527a5da08 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #19977: When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale),
+  :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the
+  ``surrogateescape`` error handler, instead of the ``strict`` error handler.
+
 - Issue #20574: Implement incremental decoder for cp65001 code (Windows code
   page 65001, Microsoft UTF-8).
 
index e9947e9ff68be0952c3f6010b7ea3c15033fe9b9..bb9f425fb1fd2bcb5fb17a709d4a550da215cf97 100644 (file)
@@ -1156,6 +1156,15 @@ initstdio(void)
     encoding = _Py_StandardStreamEncoding;
     errors = _Py_StandardStreamErrors;
     if (!encoding || !errors) {
+        if (!errors) {
+            /* When the LC_CTYPE locale is the POSIX locale ("C locale"),
+               stdin and stdout use the surrogateescape error handler by
+               default, instead of the strict error handler. */
+            char *loc = setlocale(LC_CTYPE, NULL);
+            if (loc != NULL && strcmp(loc, "C") == 0)
+                errors = "surrogateescape";
+        }
+
         pythonioencoding = Py_GETENV("PYTHONIOENCODING");
         if (pythonioencoding) {
             char *err;
@@ -1168,7 +1177,7 @@ initstdio(void)
             if (err) {
                 *err = '\0';
                 err++;
-                if (*err && !errors) {
+                if (*err && !_Py_StandardStreamErrors) {
                     errors = err;
                 }
             }