]> granicus.if.org Git - python/commitdiff
bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986)
authorVictor Stinner <vstinner@redhat.com>
Wed, 29 Aug 2018 07:58:12 +0000 (09:58 +0200)
committerGitHub <noreply@github.com>
Wed, 29 Aug 2018 07:58:12 +0000 (09:58 +0200)
Standard streams like sys.stdout now use the "surrogateescape" error
handler, instead of "strict", on the POSIX locale (when the C locale is not
coerced and the UTF-8 Mode is disabled).

Add tests on sys.stdout.errors with LC_ALL=POSIX.

Lib/test/test_sys.py
Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst [new file with mode: 0644]
Python/pylifecycle.c

index 005c82d13dc7ff7947c081e828f03904a40505de..f3dd3bb67b38e2902d404bd36bc22c7deb6431e7 100644 (file)
@@ -654,10 +654,10 @@ class SysModuleTest(unittest.TestCase):
             expected = None
         self.check_fsencoding(fs_encoding, expected)
 
-    def c_locale_get_error_handler(self, isolated=False, encoding=None):
+    def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
         # Force the POSIX locale
         env = os.environ.copy()
-        env["LC_ALL"] = "C"
+        env["LC_ALL"] = locale
         env["PYTHONCOERCECLOCALE"] = "0"
         code = '\n'.join((
             'import sys',
@@ -683,44 +683,50 @@ class SysModuleTest(unittest.TestCase):
         stdout, stderr = p.communicate()
         return stdout
 
-    def test_c_locale_surrogateescape(self):
-        out = self.c_locale_get_error_handler(isolated=True)
+    def check_locale_surrogateescape(self, locale):
+        out = self.c_locale_get_error_handler(locale, isolated=True)
         self.assertEqual(out,
                          'stdin: surrogateescape\n'
                          'stdout: surrogateescape\n'
                          'stderr: backslashreplace\n')
 
         # replace the default error handler
-        out = self.c_locale_get_error_handler(encoding=':ignore')
+        out = self.c_locale_get_error_handler(locale, encoding=':ignore')
         self.assertEqual(out,
                          'stdin: ignore\n'
                          'stdout: ignore\n'
                          'stderr: backslashreplace\n')
 
         # force the encoding
-        out = self.c_locale_get_error_handler(encoding='iso8859-1')
+        out = self.c_locale_get_error_handler(locale, encoding='iso8859-1')
         self.assertEqual(out,
                          'stdin: strict\n'
                          'stdout: strict\n'
                          'stderr: backslashreplace\n')
-        out = self.c_locale_get_error_handler(encoding='iso8859-1:')
+        out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:')
         self.assertEqual(out,
                          'stdin: strict\n'
                          'stdout: strict\n'
                          'stderr: backslashreplace\n')
 
         # have no any effect
-        out = self.c_locale_get_error_handler(encoding=':')
+        out = self.c_locale_get_error_handler(locale, encoding=':')
         self.assertEqual(out,
                          'stdin: surrogateescape\n'
                          'stdout: surrogateescape\n'
                          'stderr: backslashreplace\n')
-        out = self.c_locale_get_error_handler(encoding='')
+        out = self.c_locale_get_error_handler(locale, encoding='')
         self.assertEqual(out,
                          'stdin: surrogateescape\n'
                          'stdout: surrogateescape\n'
                          'stderr: backslashreplace\n')
 
+    def test_c_locale_surrogateescape(self):
+        self.check_locale_surrogateescape('C')
+
+    def test_posix_locale_surrogateescape(self):
+        self.check_locale_surrogateescape('POSIX')
+
     def test_implementation(self):
         # This test applies to all implementations equally.
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst
new file mode 100644 (file)
index 0000000..893e4f5
--- /dev/null
@@ -0,0 +1,3 @@
+Standard streams like sys.stdout now use the "surrogateescape" error
+handler, instead of "strict", on the POSIX locale (when the C locale is not
+coerced and the UTF-8 Mode is disabled).
index 8c77859209ab2ca2ea618fc57ff0ee5544f83e87..33af06ec18b939d9c9b098944f12fa50089bce23 100644 (file)
@@ -345,13 +345,13 @@ get_stdio_errors(void)
 {
     const char *ctype_loc = setlocale(LC_CTYPE, NULL);
     if (ctype_loc != NULL) {
-        /* "surrogateescape" is the default in the legacy C locale */
-        if (strcmp(ctype_loc, "C") == 0) {
+        /* surrogateescape is the default in the legacy C and POSIX locales */
+        if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
             return "surrogateescape";
         }
 
 #ifdef PY_COERCE_C_LOCALE
-        /* "surrogateescape" is the default in locale coercion target locales */
+        /* surrogateescape is the default in locale coercion target locales */
         const _LocaleCoercionTarget *target = NULL;
         for (target = _TARGET_LOCALES; target->locale_name; target++) {
             if (strcmp(ctype_loc, target->locale_name) == 0) {
@@ -1791,16 +1791,29 @@ init_sys_streams(PyInterpreterState *interp)
             if (err) {
                 *err = '\0';
                 err++;
-                if (*err && !errors) {
-                    errors = err;
+                if (!err[0]) {
+                    err = NULL;
                 }
             }
-            if (!encoding && *pythonioencoding) {
-                encoding = pythonioencoding;
-                if (!errors) {
-                    errors = "strict";
+
+            /* Does PYTHONIOENCODING contain an encoding? */
+            if (pythonioencoding[0]) {
+                if (!encoding) {
+                    encoding = pythonioencoding;
+                }
+
+                /* If the encoding is set but not the error handler,
+                   use "strict" error handler by default.
+                   PYTHONIOENCODING=latin1 behaves as
+                   PYTHONIOENCODING=latin1:strict. */
+                if (!err) {
+                    err = "strict";
                 }
             }
+
+            if (!errors && err != NULL) {
+                errors = err;
+            }
         }
 
         if (interp->core_config.utf8_mode) {