From 315877dc361d554bec34b4b62c270479ad36a1be Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 29 Aug 2018 09:58:12 +0200 Subject: [PATCH] bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) Standard streams like sys.stdout now use the "surrogateescape" error handler, instead of "strict", on the POSIX locale (when the C locale is not coerced and the UTF-8 Mode is disabled). Add tests on sys.stdout.errors with LC_ALL=POSIX. --- Lib/test/test_sys.py | 24 ++++++++------ .../2018-08-29-09-27-47.bpo-34485.5aJCmw.rst | 3 ++ Python/pylifecycle.c | 31 +++++++++++++------ 3 files changed, 40 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 005c82d13d..f3dd3bb67b 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -654,10 +654,10 @@ class SysModuleTest(unittest.TestCase): expected = None self.check_fsencoding(fs_encoding, expected) - def c_locale_get_error_handler(self, isolated=False, encoding=None): + def c_locale_get_error_handler(self, locale, isolated=False, encoding=None): # Force the POSIX locale env = os.environ.copy() - env["LC_ALL"] = "C" + env["LC_ALL"] = locale env["PYTHONCOERCECLOCALE"] = "0" code = '\n'.join(( 'import sys', @@ -683,44 +683,50 @@ class SysModuleTest(unittest.TestCase): stdout, stderr = p.communicate() return stdout - def test_c_locale_surrogateescape(self): - out = self.c_locale_get_error_handler(isolated=True) + def check_locale_surrogateescape(self, locale): + out = self.c_locale_get_error_handler(locale, isolated=True) self.assertEqual(out, 'stdin: surrogateescape\n' 'stdout: surrogateescape\n' 'stderr: backslashreplace\n') # replace the default error handler - out = self.c_locale_get_error_handler(encoding=':ignore') + out = self.c_locale_get_error_handler(locale, encoding=':ignore') self.assertEqual(out, 'stdin: ignore\n' 'stdout: ignore\n' 'stderr: backslashreplace\n') # force the encoding - out = self.c_locale_get_error_handler(encoding='iso8859-1') + out = self.c_locale_get_error_handler(locale, encoding='iso8859-1') self.assertEqual(out, 'stdin: strict\n' 'stdout: strict\n' 'stderr: backslashreplace\n') - out = self.c_locale_get_error_handler(encoding='iso8859-1:') + out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:') self.assertEqual(out, 'stdin: strict\n' 'stdout: strict\n' 'stderr: backslashreplace\n') # have no any effect - out = self.c_locale_get_error_handler(encoding=':') + out = self.c_locale_get_error_handler(locale, encoding=':') self.assertEqual(out, 'stdin: surrogateescape\n' 'stdout: surrogateescape\n' 'stderr: backslashreplace\n') - out = self.c_locale_get_error_handler(encoding='') + out = self.c_locale_get_error_handler(locale, encoding='') self.assertEqual(out, 'stdin: surrogateescape\n' 'stdout: surrogateescape\n' 'stderr: backslashreplace\n') + def test_c_locale_surrogateescape(self): + self.check_locale_surrogateescape('C') + + def test_posix_locale_surrogateescape(self): + self.check_locale_surrogateescape('POSIX') + def test_implementation(self): # This test applies to all implementations equally. diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst new file mode 100644 index 0000000000..893e4f573f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst @@ -0,0 +1,3 @@ +Standard streams like sys.stdout now use the "surrogateescape" error +handler, instead of "strict", on the POSIX locale (when the C locale is not +coerced and the UTF-8 Mode is disabled). diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 8c77859209..33af06ec18 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -345,13 +345,13 @@ get_stdio_errors(void) { const char *ctype_loc = setlocale(LC_CTYPE, NULL); if (ctype_loc != NULL) { - /* "surrogateescape" is the default in the legacy C locale */ - if (strcmp(ctype_loc, "C") == 0) { + /* surrogateescape is the default in the legacy C and POSIX locales */ + if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) { return "surrogateescape"; } #ifdef PY_COERCE_C_LOCALE - /* "surrogateescape" is the default in locale coercion target locales */ + /* surrogateescape is the default in locale coercion target locales */ const _LocaleCoercionTarget *target = NULL; for (target = _TARGET_LOCALES; target->locale_name; target++) { if (strcmp(ctype_loc, target->locale_name) == 0) { @@ -1791,16 +1791,29 @@ init_sys_streams(PyInterpreterState *interp) if (err) { *err = '\0'; err++; - if (*err && !errors) { - errors = err; + if (!err[0]) { + err = NULL; } } - if (!encoding && *pythonioencoding) { - encoding = pythonioencoding; - if (!errors) { - errors = "strict"; + + /* Does PYTHONIOENCODING contain an encoding? */ + if (pythonioencoding[0]) { + if (!encoding) { + encoding = pythonioencoding; + } + + /* If the encoding is set but not the error handler, + use "strict" error handler by default. + PYTHONIOENCODING=latin1 behaves as + PYTHONIOENCODING=latin1:strict. */ + if (!err) { + err = "strict"; } } + + if (!errors && err != NULL) { + errors = err; + } } if (interp->core_config.utf8_mode) { -- 2.40.0