]> granicus.if.org Git - python/commitdiff
bpo-34485: Enhance init_sys_streams() (GH-8978)
authorVictor Stinner <vstinner@redhat.com>
Tue, 28 Aug 2018 21:26:33 +0000 (23:26 +0200)
committerGitHub <noreply@github.com>
Tue, 28 Aug 2018 21:26:33 +0000 (23:26 +0200)
Python now gets the locale encoding with C code to initialize the encoding
of standard streams like sys.stdout. Moreover, the encoding is now
initialized to the Python codec name to get a normalized encoding name and
to ensure that the codec is loaded. The change avoids importing
_bootlocale and _locale modules at startup by default.

When the PYTHONIOENCODING environment variable only contains an encoding,
the error handler is now is now set explicitly to "strict".

Rename also get_default_standard_stream_error_handler() to
get_stdio_errors().

Reduce the buffer to format the "cpXXX" string (Windows locale encoding).

Lib/test/test_embed.py
Lib/test/test_sys.py
Lib/test/test_utf8_mode.py
Misc/NEWS.d/next/Core and Builtins/2018-08-28-17-48-40.bpo-34485.aFwck2.rst [new file with mode: 0644]
Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst [new file with mode: 0644]
Modules/_localemodule.c
Programs/_testembed.c
Python/pylifecycle.c

index 25593bdf4208665d91805e96670bbca96b777140..3922447c645ea4f3a68a5720fbb58723c7485791 100644 (file)
@@ -171,17 +171,17 @@ class EmbeddingTests(EmbeddingTestsMixin, unittest.TestCase):
         "stdout: {out_encoding}:ignore",
         "stderr: {out_encoding}:backslashreplace",
         "--- Set encoding only ---",
-        "Expected encoding: latin-1",
+        "Expected encoding: iso8859-1",
         "Expected errors: default",
-        "stdin: latin-1:{errors}",
-        "stdout: latin-1:{errors}",
-        "stderr: latin-1:backslashreplace",
+        "stdin: iso8859-1:{errors}",
+        "stdout: iso8859-1:{errors}",
+        "stderr: iso8859-1:backslashreplace",
         "--- Set encoding and errors ---",
-        "Expected encoding: latin-1",
+        "Expected encoding: iso8859-1",
         "Expected errors: replace",
-        "stdin: latin-1:replace",
-        "stdout: latin-1:replace",
-        "stderr: latin-1:backslashreplace"])
+        "stdin: iso8859-1:replace",
+        "stdout: iso8859-1:replace",
+        "stderr: iso8859-1:backslashreplace"])
         expected_output = expected_output.format(
                                 in_encoding=expected_stream_encoding,
                                 out_encoding=expected_stream_encoding,
index 336ae447a8de5e5aad9c89886cd40a168f3d5200..005c82d13dc7ff7947c081e828f03904a40505de 100644 (file)
@@ -668,7 +668,7 @@ class SysModuleTest(unittest.TestCase):
             'dump("stdout")',
             'dump("stderr")',
         ))
-        args = [sys.executable, "-c", code]
+        args = [sys.executable, "-X", "utf8=0", "-c", code]
         if isolated:
             args.append("-I")
         if encoding is not None:
@@ -712,8 +712,8 @@ class SysModuleTest(unittest.TestCase):
         # have no any effect
         out = self.c_locale_get_error_handler(encoding=':')
         self.assertEqual(out,
-                         'stdin: strict\n'
-                         'stdout: strict\n'
+                         'stdin: surrogateescape\n'
+                         'stdout: surrogateescape\n'
                          'stderr: backslashreplace\n')
         out = self.c_locale_get_error_handler(encoding='')
         self.assertEqual(out,
index df988c1fc9e20cc881684aa582e1ea8316963984..7280ce77ef8279b3c5f9b6dab5679399c5329948 100644 (file)
@@ -139,16 +139,16 @@ class UTF8ModeTests(unittest.TestCase):
         out = self.get_output('-X', 'utf8', '-c', code,
                               PYTHONIOENCODING="latin1")
         self.assertEqual(out.splitlines(),
-                         ['stdin: latin1/strict',
-                          'stdout: latin1/strict',
-                          'stderr: latin1/backslashreplace'])
+                         ['stdin: iso8859-1/strict',
+                          'stdout: iso8859-1/strict',
+                          'stderr: iso8859-1/backslashreplace'])
 
         out = self.get_output('-X', 'utf8', '-c', code,
                               PYTHONIOENCODING=":namereplace")
         self.assertEqual(out.splitlines(),
-                         ['stdin: UTF-8/namereplace',
-                          'stdout: UTF-8/namereplace',
-                          'stderr: UTF-8/backslashreplace'])
+                         ['stdin: utf-8/namereplace',
+                          'stdout: utf-8/namereplace',
+                          'stderr: utf-8/backslashreplace'])
 
     def test_io(self):
         code = textwrap.dedent('''
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-17-48-40.bpo-34485.aFwck2.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-17-48-40.bpo-34485.aFwck2.rst
new file mode 100644 (file)
index 0000000..f6cd951
--- /dev/null
@@ -0,0 +1,5 @@
+Python now gets the locale encoding with C code to initialize the encoding
+of standard streams like sys.stdout. Moreover, the encoding is now
+initialized to the Python codec name to get a normalized encoding name and
+to ensure that the codec is loaded. The change avoids importing _bootlocale
+and _locale modules at startup by default.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst
new file mode 100644 (file)
index 0000000..5ca373a
--- /dev/null
@@ -0,0 +1,3 @@
+Fix the error handler of standard streams like sys.stdout:
+PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
+"strict".
index 524886d466009b44ce4e045ef0ac2da5228ed2e5..3fdbc5ea8122b3df99afbbc8e2f1e6fd7ee6be44 100644 (file)
@@ -319,7 +319,7 @@ exit:
 static PyObject*
 PyLocale_getdefaultlocale(PyObject* self, PyObject *Py_UNUSED(ignored))
 {
-    char encoding[100];
+    char encoding[20];
     char locale[100];
 
     PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
index f1d30f6c54ddda8b7e226cc0a891679aa4d330ae..d0c00cfc6cd4fed70da795f25b631f88daba646c 100644 (file)
@@ -113,9 +113,9 @@ static int test_forced_io_encoding(void)
     printf("--- Set errors only ---\n");
     check_stdio_details(NULL, "ignore");
     printf("--- Set encoding only ---\n");
-    check_stdio_details("latin-1", NULL);
+    check_stdio_details("iso8859-1", NULL);
     printf("--- Set encoding and errors ---\n");
-    check_stdio_details("latin-1", "replace");
+    check_stdio_details("iso8859-1", "replace");
 
     /* Check calling after initialization fails */
     Py_Initialize();
index cc64cf956d28bdce6fb8469b745345c3be63f2ba..29711dfc982b2ba72a0ddeaf0228ace9df53ad69 100644 (file)
@@ -244,22 +244,26 @@ error:
     return NULL;
 }
 
-static char*
-get_locale_encoding(void)
+static _PyInitError
+get_locale_encoding(char **locale_encoding)
 {
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
-    char* codeset = nl_langinfo(CODESET);
-    if (!codeset || codeset[0] == '\0') {
-        PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
-        return NULL;
-    }
-    return get_codec_name(codeset);
+#ifdef MS_WINDOWS
+    char encoding[20];
+    PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
 #elif defined(__ANDROID__)
-    return get_codec_name("UTF-8");
+    const char *encoding = "UTF-8";
 #else
-    PyErr_SetNone(PyExc_NotImplementedError);
-    return NULL;
+    const char *encoding = nl_langinfo(CODESET);
+    if (!encoding || encoding[0] == '\0') {
+        return _Py_INIT_USER_ERR("failed to get the locale encoding: "
+                                 "nl_langinfo(CODESET) failed");
+    }
 #endif
+    *locale_encoding = _PyMem_RawStrdup(encoding);
+    if (*locale_encoding == NULL) {
+        return _Py_INIT_NO_MEMORY();
+    }
+    return _Py_INIT_OK();
 }
 
 static _PyInitError
@@ -397,7 +401,7 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = {
 };
 
 static const char *
-get_default_standard_stream_error_handler(void)
+get_stdio_errors(void)
 {
     const char *ctype_loc = setlocale(LC_CTYPE, NULL);
     if (ctype_loc != NULL) {
@@ -417,8 +421,7 @@ get_default_standard_stream_error_handler(void)
 #endif
    }
 
-   /* Otherwise return NULL to request the typical default error handler */
-   return NULL;
+   return "strict";
 }
 
 #ifdef PY_COERCE_C_LOCALE
@@ -1586,9 +1589,17 @@ initfsencoding(PyInterpreterState *interp)
             Py_HasFileSystemDefaultEncoding = 1;
         }
         else {
-            Py_FileSystemDefaultEncoding = get_locale_encoding();
+            char *locale_encoding;
+            _PyInitError err = get_locale_encoding(&locale_encoding);
+            if (_Py_INIT_FAILED(err)) {
+                return err;
+            }
+
+            Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
+            PyMem_RawFree(locale_encoding);
             if (Py_FileSystemDefaultEncoding == NULL) {
-                return _Py_INIT_ERR("Unable to get the locale encoding");
+                return _Py_INIT_ERR("failed to get the Python codec "
+                                    "of the locale encoding");
             }
 
             Py_HasFileSystemDefaultEncoding = 0;
@@ -1787,6 +1798,8 @@ init_sys_streams(PyInterpreterState *interp)
     PyObject * encoding_attr;
     char *pythonioencoding = NULL;
     const char *encoding, *errors;
+    char *locale_encoding = NULL;
+    char *codec_name = NULL;
     _PyInitError res = _Py_INIT_OK();
 
     /* Hack to avoid a nasty recursion issue when Python is invoked
@@ -1838,21 +1851,46 @@ init_sys_streams(PyInterpreterState *interp)
                     errors = err;
                 }
             }
-            if (*pythonioencoding && !encoding) {
+            if (!encoding && *pythonioencoding) {
                 encoding = pythonioencoding;
+                if (!errors) {
+                    errors = "strict";
+                }
             }
         }
-        else if (interp->core_config.utf8_mode) {
-            encoding = "utf-8";
-            errors = "surrogateescape";
+
+        if (interp->core_config.utf8_mode) {
+            if (!encoding) {
+                encoding = "utf-8";
+            }
+            if (!errors) {
+                errors = "surrogateescape";
+            }
         }
 
-        if (!errors && !pythonioencoding) {
+        if (!errors) {
             /* Choose the default error handler based on the current locale */
-            errors = get_default_standard_stream_error_handler();
+            errors = get_stdio_errors();
         }
     }
 
+    if (encoding == NULL) {
+        _PyInitError err = get_locale_encoding(&locale_encoding);
+        if (_Py_INIT_FAILED(err)) {
+            return err;
+        }
+        encoding = locale_encoding;
+    }
+
+    codec_name = get_codec_name(encoding);
+    if (codec_name == NULL) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "failed to get the Python codec name "
+                        "of stdio encoding");
+        goto error;
+    }
+    encoding = codec_name;
+
     /* Set sys.stdin */
     fd = fileno(stdin);
     /* Under some conditions stdin, stdout and stderr may not be connected
@@ -1928,6 +1966,8 @@ done:
 
     PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
 
+    PyMem_RawFree(locale_encoding);
+    PyMem_RawFree(codec_name);
     PyMem_Free(pythonioencoding);
     Py_XDECREF(bimod);
     Py_XDECREF(iomod);