]> granicus.if.org Git - python/commitdiff
bpo-36775: _PyCoreConfig only uses wchar_t* (GH-13062)
authorVictor Stinner <vstinner@redhat.com>
Thu, 2 May 2019 18:56:30 +0000 (14:56 -0400)
committerGitHub <noreply@github.com>
Thu, 2 May 2019 18:56:30 +0000 (14:56 -0400)
_PyCoreConfig: Change filesystem_encoding, filesystem_errors,
stdio_encoding and stdio_errors fields type from char* to wchar_t*.

Changes:

* PyInterpreterState: replace fscodec_initialized (int) with fs_codec
  structure.
* Add get_error_handler_wide() and unicode_encode_utf8() helper
  functions.
* Add error_handler parameter to unicode_encode_locale()
  and unicode_decode_locale().
* Remove _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideString() to _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideStringFromString()
  to _PyCoreConfig_DecodeLocale().

Include/cpython/coreconfig.h
Include/internal/pycore_coreconfig.h
Include/internal/pycore_pylifecycle.h
Include/internal/pycore_pystate.h
Objects/stringlib/codecs.h
Objects/unicodeobject.c
Programs/_testembed.c
Python/coreconfig.c
Python/preconfig.c
Python/pylifecycle.c
Python/sysmodule.c

index 1aab5e4f0e624903422afe7fd03b57b9932b3a8e..5672080b784fdee00c9f650f07bff872ee350eac 100644 (file)
@@ -207,8 +207,8 @@ typedef struct {
 
        See Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors.
        */
-    char *filesystem_encoding;
-    char *filesystem_errors;
+    wchar_t *filesystem_encoding;
+    wchar_t *filesystem_errors;
 
     wchar_t *pycache_prefix;  /* PYTHONPYCACHEPREFIX, -X pycache_prefix=PATH */
     wchar_t *program_name;    /* Program name, see also Py_GetProgramName() */
@@ -334,13 +334,13 @@ typedef struct {
        Value set from PYTHONIOENCODING environment variable and
        Py_SetStandardStreamEncoding() function.
        See also 'stdio_errors' attribute. */
-    char *stdio_encoding;
+    wchar_t *stdio_encoding;
 
     /* Error handler of sys.stdin and sys.stdout.
        Value set from PYTHONIOENCODING environment variable and
        Py_SetStandardStreamEncoding() function.
        See also 'stdio_encoding' attribute. */
-    char *stdio_errors;
+    wchar_t *stdio_errors;
 
 #ifdef MS_WINDOWS
     /* If greater than zero, use io.FileIO instead of WindowsConsoleIO for sys
index 8af310d2b0ceafdd3d2008ef78ad1062a0e1a528..d48904e482a45d9a1c99b570e3414f5c71363388 100644 (file)
@@ -106,12 +106,9 @@ PyAPI_FUNC(_PyInitError) _PyCoreConfig_Copy(
     _PyCoreConfig *config,
     const _PyCoreConfig *config2);
 PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetString(
-    char **config_str,
-    const char *str);
-PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetWideString(
     wchar_t **config_str,
     const wchar_t *str);
-PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetWideStringFromString(
+PyAPI_FUNC(_PyInitError) _PyCoreConfig_DecodeLocale(
     wchar_t **config_str,
     const char *str);
 PyAPI_FUNC(_PyInitError) _PyCoreConfig_InitPathConfig(_PyCoreConfig *config);
index a2383d476ee9f732ebad2fda7ebceb09b5ba1efe..321cc5d27889bd0d16e8fc89a4f76af9ce62d719 100644 (file)
@@ -21,6 +21,9 @@ extern int _Py_SetFileSystemEncoding(
     const char *errors);
 extern void _Py_ClearFileSystemEncoding(void);
 extern _PyInitError _PyUnicode_InitEncodings(PyInterpreterState *interp);
+#ifdef MS_WINDOWS
+extern int _PyUnicode_EnableLegacyWindowsFSEncoding(void);
+#endif
 
 PyAPI_FUNC(void) _Py_ClearStandardStreamEncoding(void);
 
index 2c24f679dc020369e47acd82c97f604d217a4bed..67bcd147e2829f155c3534cf502bfacb37d13185 100644 (file)
@@ -56,7 +56,14 @@ struct _is {
     PyObject *codec_search_cache;
     PyObject *codec_error_registry;
     int codecs_initialized;
-    int fscodec_initialized;
+
+    /* fs_codec.encoding is initialized to NULL.
+       Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
+    struct {
+        char *encoding;   /* Filesystem encoding (encoded to UTF-8) */
+        char *errors;     /* Filesystem errors (encoded to UTF-8) */
+        _Py_error_handler error_handler;
+    } fs_codec;
 
     _PyCoreConfig core_config;
 #ifdef HAVE_DLOPEN
index 0abb4c8abb9282c07682906f23c22e8fff80a7a2..8645bc26cff8cac5075e56645de0b479f9a5c780 100644 (file)
@@ -260,6 +260,7 @@ Py_LOCAL_INLINE(PyObject *)
 STRINGLIB(utf8_encoder)(PyObject *unicode,
                         STRINGLIB_CHAR *data,
                         Py_ssize_t size,
+                        _Py_error_handler error_handler,
                         const char *errors)
 {
     Py_ssize_t i;                /* index into data of next input character */
@@ -268,7 +269,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
     PyObject *error_handler_obj = NULL;
     PyObject *exc = NULL;
     PyObject *rep = NULL;
-    _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
 #endif
 #if STRINGLIB_SIZEOF_CHAR == 1
     const Py_ssize_t max_char_size = 2;
index 5b6b241cb62b68b491ca92fa6470c097730f6769..4d86519e8637694f9824bb7afe87f682a9d8c81d 100644 (file)
@@ -40,6 +40,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
+#include "pycore_coreconfig.h"
 #include "pycore_fileutils.h"
 #include "pycore_object.h"
 #include "pycore_pylifecycle.h"
@@ -264,6 +265,13 @@ unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
 /* Forward declaration */
 static inline int
 _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
+static PyObject *
+unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
+                    const char *errors);
+static PyObject *
+unicode_decode_utf8(const char *s, Py_ssize_t size,
+                    _Py_error_handler error_handler, const char *errors,
+                    Py_ssize_t *consumed);
 
 /* List of static strings. */
 static _Py_Identifier *static_strings = NULL;
@@ -388,6 +396,35 @@ _Py_GetErrorHandler(const char *errors)
     return _Py_ERROR_OTHER;
 }
 
+
+static _Py_error_handler
+get_error_handler_wide(const wchar_t *errors)
+{
+    if (errors == NULL || wcscmp(errors, L"strict") == 0) {
+        return _Py_ERROR_STRICT;
+    }
+    if (wcscmp(errors, L"surrogateescape") == 0) {
+        return _Py_ERROR_SURROGATEESCAPE;
+    }
+    if (wcscmp(errors, L"replace") == 0) {
+        return _Py_ERROR_REPLACE;
+    }
+    if (wcscmp(errors, L"ignore") == 0) {
+        return _Py_ERROR_IGNORE;
+    }
+    if (wcscmp(errors, L"backslashreplace") == 0) {
+        return _Py_ERROR_BACKSLASHREPLACE;
+    }
+    if (wcscmp(errors, L"surrogatepass") == 0) {
+        return _Py_ERROR_SURROGATEPASS;
+    }
+    if (wcscmp(errors, L"xmlcharrefreplace") == 0) {
+        return _Py_ERROR_XMLCHARREFREPLACE;
+    }
+    return _Py_ERROR_OTHER;
+}
+
+
 /* The max unicode value is always 0x10FFFF while using the PEP-393 API.
    This function is kept for backward compatibility with the old API. */
 Py_UNICODE
@@ -3445,11 +3482,9 @@ PyUnicode_AsEncodedObject(PyObject *unicode,
 
 
 static PyObject *
-unicode_encode_locale(PyObject *unicode, const char *errors,
+unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
                       int current_locale)
 {
-    _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
-
     Py_ssize_t wlen;
     wchar_t *wstr = PyUnicode_AsWideCharString(unicode, &wlen);
     if (wstr == NULL) {
@@ -3499,30 +3534,44 @@ unicode_encode_locale(PyObject *unicode, const char *errors,
 PyObject *
 PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
 {
-    return unicode_encode_locale(unicode, errors, 1);
+    _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+    return unicode_encode_locale(unicode, error_handler, 1);
 }
 
 PyObject *
 PyUnicode_EncodeFSDefault(PyObject *unicode)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-    const _PyCoreConfig *config = &interp->core_config;
 #ifdef _Py_FORCE_UTF8_FS_ENCODING
-    return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
+    if (interp->fs_codec.encoding) {
+        return unicode_encode_utf8(unicode,
+                                   interp->fs_codec.error_handler,
+                                   interp->fs_codec.errors);
+    }
+    else {
+        const _PyCoreConfig *config = &interp->core_config;
+        _Py_error_handler errors;
+        errors = get_error_handler_wide(config->filesystem_errors);
+        assert(errors != _Py_ERROR_UNKNOWN);
+        return unicode_encode_utf8(unicode, errors, NULL);
+    }
 #else
     /* Bootstrap check: if the filesystem codec is implemented in Python, we
        cannot use it to encode and decode filenames before it is loaded. Load
        the Python codec requires to encode at least its own filename. Use the C
        implementation of the locale codec until the codec registry is
        initialized and the Python codec is loaded. See initfsencoding(). */
-    if (interp->fscodec_initialized) {
+    if (interp->fs_codec.encoding) {
         return PyUnicode_AsEncodedString(unicode,
-                                         config->filesystem_encoding,
-                                         config->filesystem_errors);
+                                         interp->fs_codec.encoding,
+                                         interp->fs_codec.errors);
     }
     else {
-        return unicode_encode_locale(unicode,
-                                     config->filesystem_errors, 0);
+        const _PyCoreConfig *config = &interp->core_config;
+        _Py_error_handler errors;
+        errors = get_error_handler_wide(config->filesystem_errors);
+        assert(errors != _Py_ERROR_UNKNOWN);
+        return unicode_encode_locale(unicode, errors, 0);
     }
 #endif
 }
@@ -3663,11 +3712,9 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
 }
 
 static PyObject*
-unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
-                      int current_locale)
+unicode_decode_locale(const char *str, Py_ssize_t len,
+                      _Py_error_handler errors, int current_locale)
 {
-    _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
-
     if (str[len] != '\0' || (size_t)len != strlen(str))  {
         PyErr_SetString(PyExc_ValueError, "embedded null byte");
         return NULL;
@@ -3677,7 +3724,7 @@ unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
     size_t wlen;
     const char *reason;
     int res = _Py_DecodeLocaleEx(str, &wstr, &wlen, &reason,
-                                 current_locale, error_handler);
+                                 current_locale, errors);
     if (res != 0) {
         if (res == -2) {
             PyObject *exc;
@@ -3709,14 +3756,16 @@ PyObject*
 PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
                               const char *errors)
 {
-    return unicode_decode_locale(str, len, errors, 1);
+    _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+    return unicode_decode_locale(str, len, error_handler, 1);
 }
 
 PyObject*
 PyUnicode_DecodeLocale(const char *str, const char *errors)
 {
     Py_ssize_t size = (Py_ssize_t)strlen(str);
-    return unicode_decode_locale(str, size, errors, 1);
+    _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+    return unicode_decode_locale(str, size, error_handler, 1);
 }
 
 
@@ -3730,23 +3779,36 @@ PyObject*
 PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-    const _PyCoreConfig *config = &interp->core_config;
 #ifdef _Py_FORCE_UTF8_FS_ENCODING
-    return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
+    if (interp->fs_codec.encoding) {
+        return unicode_decode_utf8(s, size,
+                                   interp->fs_codec.error_handler,
+                                   interp->fs_codec.errors,
+                                   NULL);
+    }
+    else {
+        const _PyCoreConfig *config = &interp->core_config;
+        _Py_error_handler errors;
+        errors = get_error_handler_wide(config->filesystem_errors);
+        assert(errors != _Py_ERROR_UNKNOWN);
+        return unicode_decode_utf8(s, size, errors, NULL, NULL);
+    }
 #else
     /* Bootstrap check: if the filesystem codec is implemented in Python, we
        cannot use it to encode and decode filenames before it is loaded. Load
        the Python codec requires to encode at least its own filename. Use the C
        implementation of the locale codec until the codec registry is
        initialized and the Python codec is loaded. See initfsencoding(). */
-    if (interp->fscodec_initialized) {
+    if (interp->fs_codec.encoding) {
         return PyUnicode_Decode(s, size,
-                                config->filesystem_encoding,
-                                config->filesystem_errors);
+                                interp->fs_codec.encoding,
+                                interp->fs_codec.errors);
     }
     else {
-        return unicode_decode_locale(s, size,
-                                     config->filesystem_errors, 0);
+        const _PyCoreConfig *config = &interp->core_config;
+        _Py_error_handler errors;
+        errors = get_error_handler_wide(config->filesystem_errors);
+        return unicode_decode_locale(s, size, errors, 0);
     }
 #endif
 }
@@ -4810,11 +4872,10 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
     return p - start;
 }
 
-PyObject *
-PyUnicode_DecodeUTF8Stateful(const char *s,
-                             Py_ssize_t size,
-                             const char *errors,
-                             Py_ssize_t *consumed)
+static PyObject *
+unicode_decode_utf8(const char *s, Py_ssize_t size,
+                    _Py_error_handler error_handler, const char *errors,
+                    Py_ssize_t *consumed)
 {
     _PyUnicodeWriter writer;
     const char *starts = s;
@@ -4825,7 +4886,6 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
     const char *errmsg = "";
     PyObject *error_handler_obj = NULL;
     PyObject *exc = NULL;
-    _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
 
     if (size == 0) {
         if (consumed)
@@ -4948,6 +5008,16 @@ onError:
 }
 
 
+PyObject *
+PyUnicode_DecodeUTF8Stateful(const char *s,
+                             Py_ssize_t size,
+                             const char *errors,
+                             Py_ssize_t *consumed)
+{
+    return unicode_decode_utf8(s, size, _Py_ERROR_UNKNOWN, errors, consumed);
+}
+
+
 /* UTF-8 decoder: use surrogateescape error handler if 'surrogateescape' is
    non-zero, use strict error handler otherwise.
 
@@ -5231,8 +5301,9 @@ _Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
    maximum possible needed (4 result bytes per Unicode character), and return
    the excess memory at the end.
 */
-PyObject *
-_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
+static PyObject *
+unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
+                    const char *errors)
 {
     enum PyUnicode_Kind kind;
     void *data;
@@ -5260,14 +5331,21 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
     case PyUnicode_1BYTE_KIND:
         /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
         assert(!PyUnicode_IS_ASCII(unicode));
-        return ucs1lib_utf8_encoder(unicode, data, size, errors);
+        return ucs1lib_utf8_encoder(unicode, data, size, error_handler, errors);
     case PyUnicode_2BYTE_KIND:
-        return ucs2lib_utf8_encoder(unicode, data, size, errors);
+        return ucs2lib_utf8_encoder(unicode, data, size, error_handler, errors);
     case PyUnicode_4BYTE_KIND:
-        return ucs4lib_utf8_encoder(unicode, data, size, errors);
+        return ucs4lib_utf8_encoder(unicode, data, size, error_handler, errors);
     }
 }
 
+PyObject *
+_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
+{
+    return unicode_encode_utf8(unicode, _Py_ERROR_UNKNOWN, errors);
+}
+
+
 PyObject *
 PyUnicode_EncodeUTF8(const Py_UNICODE *s,
                      Py_ssize_t size,
@@ -15575,12 +15653,35 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
 }
 
 
-static char*
-get_codec_name(const char *encoding)
+static int
+encode_wstr_utf8(wchar_t *wstr, char **str, const char *name)
 {
-    PyObject *codec, *name_obj = NULL;
+    int res;
+    res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT);
+    if (res == -2) {
+        PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name);
+        return -1;
+    }
+    if (res < 0) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    return 0;
+}
+
+
+static int
+config_get_codec_name(wchar_t **config_encoding)
+{
+    char *encoding;
+    if (encode_wstr_utf8(*config_encoding, &encoding, "stdio_encoding") < 0) {
+        return -1;
+    }
+
+    PyObject *name_obj = NULL;
+    PyObject *codec = _PyCodec_Lookup(encoding);
+    PyMem_RawFree(encoding);
 
-    codec = _PyCodec_Lookup(encoding);
     if (!codec)
         goto error;
 
@@ -15590,71 +15691,107 @@ get_codec_name(const char *encoding)
         goto error;
     }
 
-    const char *name_utf8 = PyUnicode_AsUTF8(name_obj);
-    if (name_utf8 == NULL) {
+    wchar_t *wname = PyUnicode_AsWideCharString(name_obj, NULL);
+    Py_DECREF(name_obj);
+    if (wname == NULL) {
         goto error;
     }
 
-    char *name = _PyMem_RawStrdup(name_utf8);
-    Py_DECREF(name_obj);
-    if (name == NULL) {
+    wchar_t *raw_wname = _PyMem_RawWcsdup(wname);
+    if (raw_wname == NULL) {
+        PyMem_Free(wname);
         PyErr_NoMemory();
-        return NULL;
+        goto error;
     }
-    return name;
+
+    PyMem_RawFree(*config_encoding);
+    *config_encoding = raw_wname;
+
+    PyMem_Free(wname);
+    return 0;
 
 error:
     Py_XDECREF(codec);
     Py_XDECREF(name_obj);
-    return NULL;
+    return -1;
 }
 
 
 static _PyInitError
 init_stdio_encoding(PyInterpreterState *interp)
 {
+    /* Update the stdio encoding to the normalized Python codec name. */
     _PyCoreConfig *config = &interp->core_config;
-
-    char *codec_name = get_codec_name(config->stdio_encoding);
-    if (codec_name == NULL) {
+    if (config_get_codec_name(&config->stdio_encoding) < 0) {
         return _Py_INIT_ERR("failed to get the Python codec name "
                             "of the stdio encoding");
     }
-    PyMem_RawFree(config->stdio_encoding);
-    config->stdio_encoding = codec_name;
     return _Py_INIT_OK();
 }
 
 
-static _PyInitError
-init_fs_encoding(PyInterpreterState *interp)
+static int
+init_fs_codec(PyInterpreterState *interp)
 {
     _PyCoreConfig *config = &interp->core_config;
 
-    char *encoding = get_codec_name(config->filesystem_encoding);
-    if (encoding == NULL) {
-        /* Such error can only occurs in critical situations: no more
-           memory, import a module of the standard library failed, etc. */
-        return _Py_INIT_ERR("failed to get the Python codec "
-                            "of the filesystem encoding");
+    _Py_error_handler error_handler;
+    error_handler = get_error_handler_wide(config->filesystem_errors);
+    if (error_handler == _Py_ERROR_UNKNOWN) {
+        PyErr_SetString(PyExc_RuntimeError, "unknow filesystem error handler");
+        return -1;
     }
 
-    /* Update the filesystem encoding to the normalized Python codec name.
-       For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
-       (Python codec name). */
-    PyMem_RawFree(config->filesystem_encoding);
-    config->filesystem_encoding = encoding;
+    char *encoding, *errors;
+    if (encode_wstr_utf8(config->filesystem_encoding,
+                         &encoding,
+                         "filesystem_encoding") < 0) {
+        return -1;
+    }
+
+    if (encode_wstr_utf8(config->filesystem_errors,
+                         &errors,
+                         "filesystem_errors") < 0) {
+        PyMem_RawFree(encoding);
+        return -1;
+    }
+
+    PyMem_RawFree(interp->fs_codec.encoding);
+    interp->fs_codec.encoding = encoding;
+    PyMem_RawFree(interp->fs_codec.errors);
+    interp->fs_codec.errors = errors;
+    interp->fs_codec.error_handler = error_handler;
+
+    /* At this point, PyUnicode_EncodeFSDefault() and
+       PyUnicode_DecodeFSDefault() can now use the Python codec rather than
+       the C implementation of the filesystem encoding. */
 
     /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
        global configuration variables. */
-    if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
-                                  config->filesystem_errors) < 0) {
-        return _Py_INIT_NO_MEMORY();
+    if (_Py_SetFileSystemEncoding(interp->fs_codec.encoding,
+                                  interp->fs_codec.errors) < 0) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    return 0;
+}
+
+
+static _PyInitError
+init_fs_encoding(PyInterpreterState *interp)
+{
+    /* Update the filesystem encoding to the normalized Python codec name.
+       For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
+       (Python codec name). */
+    _PyCoreConfig *config = &interp->core_config;
+    if (config_get_codec_name(&config->filesystem_encoding) < 0) {
+        return _Py_INIT_ERR("failed to get the Python codec "
+                            "of the filesystem encoding");
     }
 
-    /* PyUnicode can now use the Python codec rather than C implementation
-       for the filesystem encoding */
-    interp->fscodec_initialized = 1;
+    if (init_fs_codec(interp) < 0) {
+        return _Py_INIT_ERR("cannot initialize filesystem codec");
+    }
     return _Py_INIT_OK();
 }
 
@@ -15671,6 +15808,33 @@ _PyUnicode_InitEncodings(PyInterpreterState *interp)
 }
 
 
+#ifdef MS_WINDOWS
+int
+_PyUnicode_EnableLegacyWindowsFSEncoding(void)
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
+    _PyCoreConfig *config = &interp->core_config;
+
+    /* Set the filesystem encoding to mbcs/replace (PEP 529) */
+    wchar_t *encoding = _PyMem_RawWcsdup(L"mbcs");
+    wchar_t *errors = _PyMem_RawWcsdup(L"replace");
+    if (encoding == NULL || errors == NULL) {
+        PyMem_RawFree(encoding);
+        PyMem_RawFree(errors);
+        PyErr_NoMemory();
+        return -1;
+    }
+
+    PyMem_RawFree(config->filesystem_encoding);
+    config->filesystem_encoding = encoding;
+    PyMem_RawFree(config->filesystem_errors);
+    config->filesystem_errors = errors;
+
+    return init_fs_codec(interp);
+}
+#endif
+
+
 void
 _PyUnicode_Fini(void)
 {
@@ -15694,6 +15858,12 @@ _PyUnicode_Fini(void)
     }
     _PyUnicode_ClearStaticStrings();
     (void)PyUnicode_ClearFreeList();
+
+    PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
+    PyMem_RawFree(interp->fs_codec.encoding);
+    interp->fs_codec.encoding = NULL;
+    PyMem_RawFree(interp->fs_codec.errors);
+    interp->fs_codec.errors = NULL;
 }
 
 
index 6e764e3b6cce57884c46a3342fc020b30f01a43a..2cadf82cb17faada1fcf7fad3c2df49ddd928414 100644 (file)
@@ -488,8 +488,8 @@ static int test_init_from_config(void)
        Force it to 0 through the config. */
     config.legacy_windows_stdio = 0;
 #endif
-    config.stdio_encoding = "iso8859-1";
-    config.stdio_errors = "replace";
+    config.stdio_encoding = L"iso8859-1";
+    config.stdio_errors = L"replace";
 
     putenv("PYTHONNOUSERSITE=");
     Py_NoUserSiteDirectory = 0;
index c40c1f859ec2df5be231c36bc237ece2e42a49ed..15643be3765aa0f38dba19d37b8524e4f9cd19b1 100644 (file)
@@ -523,27 +523,7 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
 
 /* Copy str into *config_str (duplicate the string) */
 _PyInitError
-_PyCoreConfig_SetString(char **config_str, const char *str)
-{
-    char *str2;
-    if (str != NULL) {
-        str2 = _PyMem_RawStrdup(str);
-        if (str2 == NULL) {
-            return _Py_INIT_NO_MEMORY();
-        }
-    }
-    else {
-        str2 = NULL;
-    }
-    PyMem_RawFree(*config_str);
-    *config_str = str2;
-    return _Py_INIT_OK();
-}
-
-
-/* Copy str into *config_str (duplicate the string) */
-_PyInitError
-_PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str)
+_PyCoreConfig_SetString(wchar_t **config_str, const wchar_t *str)
 {
     wchar_t *str2;
     if (str != NULL) {
@@ -563,8 +543,8 @@ _PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str)
 
 /* Decode str using Py_DecodeLocale() and set the result into *config_str */
 static _PyInitError
-_PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str,
-                                         const char *decode_err_msg)
+_PyCoreConfig_DecodeLocaleErr(wchar_t **config_str, const char *str,
+                              const char *decode_err_msg)
 {
     wchar_t *str2;
     if (str != NULL) {
@@ -588,19 +568,17 @@ _PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str,
 }
 
 
+#define CONFIG_DECODE_LOCALE(config_str, str, NAME) \
+    _PyCoreConfig_DecodeLocaleErr(config_str, str, "cannot decode " NAME)
+
+
 _PyInitError
-_PyCoreConfig_SetWideStringFromString(wchar_t **config_str, const char *str)
+_PyCoreConfig_DecodeLocale(wchar_t **config_str, const char *str)
 {
-    return _PyCoreConfig_SetWideStringFromStringErr(
-                config_str, str, "cannot decode string");
+    return CONFIG_DECODE_LOCALE(config_str, str, "string");
 }
 
 
-#define CONFIG_DECODE_LOCALE(config_str, str, NAME) \
-    _PyCoreConfig_SetWideStringFromStringErr(config_str, str, \
-                                             "cannot decode " NAME)
-
-
 _PyInitError
 _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
 {
@@ -608,16 +586,9 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
     _PyCoreConfig_Clear(config);
 
 #define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
-#define COPY_STR_ATTR(ATTR) \
-    do { \
-        err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
-        if (_Py_INIT_FAILED(err)) { \
-            return err; \
-        } \
-    } while (0)
 #define COPY_WSTR_ATTR(ATTR) \
     do { \
-        err = _PyCoreConfig_SetWideString(&config->ATTR, config2->ATTR); \
+        err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
         if (_Py_INIT_FAILED(err)) { \
             return err; \
         } \
@@ -676,10 +647,10 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
     COPY_ATTR(quiet);
     COPY_ATTR(user_site_directory);
     COPY_ATTR(buffered_stdio);
-    COPY_STR_ATTR(filesystem_encoding);
-    COPY_STR_ATTR(filesystem_errors);
-    COPY_STR_ATTR(stdio_encoding);
-    COPY_STR_ATTR(stdio_errors);
+    COPY_WSTR_ATTR(filesystem_encoding);
+    COPY_WSTR_ATTR(filesystem_errors);
+    COPY_WSTR_ATTR(stdio_encoding);
+    COPY_WSTR_ATTR(stdio_errors);
 #ifdef MS_WINDOWS
     COPY_ATTR(legacy_windows_stdio);
 #endif
@@ -692,7 +663,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
     COPY_ATTR(_init_main);
 
 #undef COPY_ATTR
-#undef COPY_STR_ATTR
 #undef COPY_WSTR_ATTR
 #undef COPY_WSTRLIST
     return _Py_INIT_OK();
@@ -721,16 +691,10 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
                 goto fail; \
             } \
         } while (0)
-#define FROM_STRING(STR) \
-    ((STR != NULL) ? \
-        PyUnicode_FromString(STR) \
-        : (Py_INCREF(Py_None), Py_None))
 #define SET_ITEM_INT(ATTR) \
     SET_ITEM(#ATTR, PyLong_FromLong(config->ATTR))
 #define SET_ITEM_UINT(ATTR) \
     SET_ITEM(#ATTR, PyLong_FromUnsignedLong(config->ATTR))
-#define SET_ITEM_STR(ATTR) \
-    SET_ITEM(#ATTR, FROM_STRING(config->ATTR))
 #define FROM_WSTRING(STR) \
     ((STR != NULL) ? \
         PyUnicode_FromWideChar(STR, -1) \
@@ -753,8 +717,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
     SET_ITEM_INT(show_alloc_count);
     SET_ITEM_INT(dump_refs);
     SET_ITEM_INT(malloc_stats);
-    SET_ITEM_STR(filesystem_encoding);
-    SET_ITEM_STR(filesystem_errors);
+    SET_ITEM_WSTR(filesystem_encoding);
+    SET_ITEM_WSTR(filesystem_errors);
     SET_ITEM_WSTR(pycache_prefix);
     SET_ITEM_WSTR(program_name);
     SET_ITEM_WSTRLIST(argv);
@@ -783,8 +747,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
     SET_ITEM_INT(quiet);
     SET_ITEM_INT(user_site_directory);
     SET_ITEM_INT(buffered_stdio);
-    SET_ITEM_STR(stdio_encoding);
-    SET_ITEM_STR(stdio_errors);
+    SET_ITEM_WSTR(stdio_encoding);
+    SET_ITEM_WSTR(stdio_errors);
 #ifdef MS_WINDOWS
     SET_ITEM_INT(legacy_windows_stdio);
 #endif
@@ -803,12 +767,10 @@ fail:
     Py_DECREF(dict);
     return NULL;
 
-#undef FROM_STRING
 #undef FROM_WSTRING
 #undef SET_ITEM
 #undef SET_ITEM_INT
 #undef SET_ITEM_UINT
-#undef SET_ITEM_STR
 #undef SET_ITEM_WSTR
 #undef SET_ITEM_WSTRLIST
 }
@@ -845,7 +807,7 @@ _PyCoreConfig_GetEnvDup(const _PyCoreConfig *config,
         return _Py_INIT_OK();
     }
 
-    return _PyCoreConfig_SetWideString(dest, var);
+    return _PyCoreConfig_SetString(dest, var);
 #else
     const char *var = getenv(name);
     if (!var || var[0] == '\0') {
@@ -853,7 +815,7 @@ _PyCoreConfig_GetEnvDup(const _PyCoreConfig *config,
         return _Py_INIT_OK();
     }
 
-    return _PyCoreConfig_SetWideStringFromStringErr(dest, var, decode_err_msg);
+    return _PyCoreConfig_DecodeLocaleErr(dest, var, decode_err_msg);
 #endif
 }
 
@@ -996,8 +958,7 @@ config_init_program_name(_PyCoreConfig *config)
 
     /* Use argv[0] by default, if available */
     if (config->program != NULL) {
-        err = _PyCoreConfig_SetWideString(&config->program_name,
-                                          config->program);
+        err = _PyCoreConfig_SetString(&config->program_name, config->program);
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -1010,7 +971,7 @@ config_init_program_name(_PyCoreConfig *config)
 #else
     const wchar_t *default_program_name = L"python3";
 #endif
-    err = _PyCoreConfig_SetWideString(&config->program_name, default_program_name);
+    err = _PyCoreConfig_SetString(&config->program_name, default_program_name);
     if (_Py_INIT_FAILED(err)) {
         return err;
     }
@@ -1025,8 +986,8 @@ config_init_executable(_PyCoreConfig *config)
     /* If Py_SetProgramFullPath() was called, use its value */
     const wchar_t *program_full_path = _Py_path_config.program_full_path;
     if (program_full_path != NULL) {
-        _PyInitError err = _PyCoreConfig_SetWideString(&config->executable,
-                                                       program_full_path);
+        _PyInitError err = _PyCoreConfig_SetString(&config->executable,
+                                                   program_full_path);
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -1051,7 +1012,7 @@ config_init_home(_PyCoreConfig *config)
     /* If Py_SetPythonHome() was called, use its value */
     wchar_t *home = _Py_path_config.home;
     if (home) {
-        _PyInitError err = _PyCoreConfig_SetWideString(&config->home, home);
+        _PyInitError err = _PyCoreConfig_SetString(&config->home, home);
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -1280,7 +1241,7 @@ config_read_complex_options(_PyCoreConfig *config)
 }
 
 
-static const char *
+static const wchar_t *
 config_get_stdio_errors(const _PyCoreConfig *config)
 {
 #ifndef MS_WINDOWS
@@ -1288,43 +1249,44 @@ config_get_stdio_errors(const _PyCoreConfig *config)
     if (loc != NULL) {
         /* surrogateescape is the default in the legacy C and POSIX locales */
         if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) {
-            return "surrogateescape";
+            return L"surrogateescape";
         }
 
 #ifdef PY_COERCE_C_LOCALE
         /* surrogateescape is the default in locale coercion target locales */
         if (_Py_IsLocaleCoercionTarget(loc)) {
-            return "surrogateescape";
+            return L"surrogateescape";
         }
 #endif
     }
 
-    return "strict";
+    return L"strict";
 #else
     /* On Windows, always use surrogateescape by default */
-    return "surrogateescape";
+    return L"surrogateescape";
 #endif
 }
 
 
 static _PyInitError
-config_get_locale_encoding(char **locale_encoding)
+config_get_locale_encoding(wchar_t **locale_encoding)
 {
 #ifdef MS_WINDOWS
     char encoding[20];
     PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
+    return _PyCoreConfig_DecodeLocale(locale_encoding, encoding);
 #elif defined(_Py_FORCE_UTF8_LOCALE)
-    const char *encoding = "UTF-8";
+    return _PyCoreConfig_SetString(locale_encoding, L"utf-8");
 #else
     const char *encoding = nl_langinfo(CODESET);
     if (!encoding || encoding[0] == '\0') {
         return _Py_INIT_ERR("failed to get the locale encoding: "
                             "nl_langinfo(CODESET) failed");
     }
+    /* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
+    return CONFIG_DECODE_LOCALE(locale_encoding, encoding,
+                                "nl_langinfo(CODESET)");
 #endif
-
-    assert(*locale_encoding == NULL);
-    return _PyCoreConfig_SetString(locale_encoding, encoding);
 }
 
 
@@ -1337,16 +1299,18 @@ config_init_stdio_encoding(_PyCoreConfig *config,
     /* If Py_SetStandardStreamEncoding() have been called, use these
         parameters. */
     if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
-        err = _PyCoreConfig_SetString(&config->stdio_encoding,
-                                      _Py_StandardStreamEncoding);
+        err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
+                                   _Py_StandardStreamEncoding,
+                                   "_Py_StandardStreamEncoding");
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
     }
 
     if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) {
-        err = _PyCoreConfig_SetString(&config->stdio_errors,
-                                      _Py_StandardStreamErrors);
+        err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
+                                   _Py_StandardStreamErrors,
+                                   "_Py_StandardStreamErrors");
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -1359,11 +1323,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
     /* PYTHONIOENCODING environment variable */
     const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING");
     if (opt) {
-        /* _PyCoreConfig_SetString() requires dest to be initialized to NULL */
-        char *pythonioencoding = NULL;
-        err = _PyCoreConfig_SetString(&pythonioencoding, opt);
-        if (_Py_INIT_FAILED(err)) {
-            return err;
+        char *pythonioencoding = _PyMem_RawStrdup(opt);
+        if (pythonioencoding == NULL) {
+            return _Py_INIT_NO_MEMORY();
         }
 
         char *errors = strchr(pythonioencoding, ':');
@@ -1378,8 +1340,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
         /* Does PYTHONIOENCODING contain an encoding? */
         if (pythonioencoding[0]) {
             if (config->stdio_encoding == NULL) {
-                err = _PyCoreConfig_SetString(&config->stdio_encoding,
-                                              pythonioencoding);
+                err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
+                                           pythonioencoding,
+                                           "PYTHONIOENCODING environment variable");
                 if (_Py_INIT_FAILED(err)) {
                     PyMem_RawFree(pythonioencoding);
                     return err;
@@ -1396,7 +1359,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
         }
 
         if (config->stdio_errors == NULL && errors != NULL) {
-            err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
+            err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
+                                       errors,
+                                       "PYTHONIOENCODING environment variable");
             if (_Py_INIT_FAILED(err)) {
                 PyMem_RawFree(pythonioencoding);
                 return err;
@@ -1409,15 +1374,14 @@ config_init_stdio_encoding(_PyCoreConfig *config,
     /* UTF-8 Mode uses UTF-8/surrogateescape */
     if (preconfig->utf8_mode) {
         if (config->stdio_encoding == NULL) {
-            err = _PyCoreConfig_SetString(&config->stdio_encoding,
-                                          "utf-8");
+            err = _PyCoreConfig_SetString(&config->stdio_encoding, L"utf-8");
             if (_Py_INIT_FAILED(err)) {
                 return err;
             }
         }
         if (config->stdio_errors == NULL) {
             err = _PyCoreConfig_SetString(&config->stdio_errors,
-                                          "surrogateescape");
+                                          L"surrogateescape");
             if (_Py_INIT_FAILED(err)) {
                 return err;
             }
@@ -1432,7 +1396,7 @@ config_init_stdio_encoding(_PyCoreConfig *config,
         }
     }
     if (config->stdio_errors == NULL) {
-        const char *errors = config_get_stdio_errors(config);
+        const wchar_t *errors = config_get_stdio_errors(config);
         assert(errors != NULL);
 
         err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
@@ -1452,33 +1416,32 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
 
     if (config->filesystem_encoding == NULL) {
 #ifdef _Py_FORCE_UTF8_FS_ENCODING
-        err = _PyCoreConfig_SetString(&config->filesystem_encoding,
-                                      "utf-8");
+        err = _PyCoreConfig_SetString(&config->filesystem_encoding, L"utf-8");
 #else
 
 #ifdef MS_WINDOWS
         if (preconfig->legacy_windows_fs_encoding) {
             /* Legacy Windows filesystem encoding: mbcs/replace */
             err = _PyCoreConfig_SetString(&config->filesystem_encoding,
-                                          "mbcs");
+                                          L"mbcs");
         }
         else
 #endif
         if (preconfig->utf8_mode) {
             err = _PyCoreConfig_SetString(&config->filesystem_encoding,
-                                          "utf-8");
+                                          L"utf-8");
         }
 #ifndef MS_WINDOWS
         else if (_Py_GetForceASCII()) {
             err = _PyCoreConfig_SetString(&config->filesystem_encoding,
-                                          "ascii");
+                                          L"ascii");
         }
 #endif
         else {
 #ifdef MS_WINDOWS
             /* Windows defaults to utf-8/surrogatepass (PEP 529). */
             err = _PyCoreConfig_SetString(&config->filesystem_encoding,
-                                          "utf-8");
+                                          L"utf-8");
 #else
             err = config_get_locale_encoding(&config->filesystem_encoding);
 #endif
@@ -1491,16 +1454,16 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
     }
 
     if (config->filesystem_errors == NULL) {
-        const char *errors;
+        const wchar_t *errors;
 #ifdef MS_WINDOWS
         if (preconfig->legacy_windows_fs_encoding) {
-            errors = "replace";
+            errors = L"replace";
         }
         else {
-            errors = "surrogatepass";
+            errors = L"surrogatepass";
         }
 #else
-        errors = "surrogateescape";
+        errors = L"surrogateescape";
 #endif
         err = _PyCoreConfig_SetString(&config->filesystem_errors, errors);
         if (_Py_INIT_FAILED(err)) {
@@ -1745,8 +1708,8 @@ config_parse_cmdline(_PyCoreConfig *config, _PyPreCmdline *precmdline,
                 || wcscmp(_PyOS_optarg, L"never") == 0
                 || wcscmp(_PyOS_optarg, L"default") == 0)
             {
-                err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode,
-                                                  _PyOS_optarg);
+                err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode,
+                                              _PyOS_optarg);
                 if (_Py_INIT_FAILED(err)) {
                     return err;
                 }
@@ -2119,7 +2082,7 @@ config_read_cmdline(_PyCoreConfig *config, _PyPreCmdline *precmdline)
     }
 
     if (config->check_hash_pycs_mode == NULL) {
-        err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode, L"default");
+        err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode, L"default");
         if (_Py_INIT_FAILED(err)) {
             goto done;
         }
index 108cbc6660617631e38d074faaf1993beef7de7a..48b9e8383aae146c697c0da1fae629eacaa1f255 100644 (file)
 /* --- File system encoding/errors -------------------------------- */
 
 /* The filesystem encoding is chosen by config_init_fs_encoding(),
-   see also initfsencoding(). */
+   see also initfsencoding().
+
+   Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
+   are encoded to UTF-8. */
 const char *Py_FileSystemDefaultEncoding = NULL;
 int Py_HasFileSystemDefaultEncoding = 0;
 const char *Py_FileSystemDefaultEncodeErrors = NULL;
index 01ef027b9d860f3a81d8e9f4fbfe22351fcdeb50..2a633cf1cf92733f4577146368c4757e0f31969b 100644 (file)
@@ -1668,7 +1668,7 @@ is_valid_fd(int fd)
 static PyObject*
 create_stdio(const _PyCoreConfig *config, PyObject* io,
     int fd, int write_mode, const char* name,
-    const char* encoding, const char* errors)
+    const wchar_t* encoding, const wchar_t* errors)
 {
     PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res;
     const char* mode;
@@ -1718,7 +1718,7 @@ create_stdio(const _PyCoreConfig *config, PyObject* io,
 #ifdef MS_WINDOWS
     /* Windows console IO is always UTF-8 encoded */
     if (PyWindowsConsoleIO_Check(raw))
-        encoding = "utf-8";
+        encoding = L"utf-8";
 #endif
 
     text = PyUnicode_FromString(name);
@@ -1754,10 +1754,25 @@ create_stdio(const _PyCoreConfig *config, PyObject* io,
     newline = "\n";
 #endif
 
-    stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OsssOO",
-                                    buf, encoding, errors,
+    PyObject *encoding_str = PyUnicode_FromWideChar(encoding, -1);
+    if (encoding_str == NULL) {
+        Py_CLEAR(buf);
+        goto error;
+    }
+
+    PyObject *errors_str = PyUnicode_FromWideChar(errors, -1);
+    if (errors_str == NULL) {
+        Py_CLEAR(buf);
+        Py_CLEAR(encoding_str);
+        goto error;
+    }
+
+    stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OOOsOO",
+                                    buf, encoding_str, errors_str,
                                     newline, line_buffering, write_through);
     Py_CLEAR(buf);
+    Py_CLEAR(encoding_str);
+    Py_CLEAR(errors_str);
     if (stream == NULL)
         goto error;
 
@@ -1874,7 +1889,7 @@ init_sys_streams(PyInterpreterState *interp)
     fd = fileno(stderr);
     std = create_stdio(config, iomod, fd, 1, "<stderr>",
                        config->stdio_encoding,
-                       "backslashreplace");
+                       L"backslashreplace");
     if (std == NULL)
         goto error;
 
index 0f7af2c69da538b1fca5f34957f7ffa08d895986..fbdeb9b5565cf87787b12014ffce880e26dd5577 100644 (file)
@@ -424,7 +424,7 @@ sys_getfilesystemencoding_impl(PyObject *module)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
     const _PyCoreConfig *config = &interp->core_config;
-    return PyUnicode_FromString(config->filesystem_encoding);
+    return PyUnicode_FromWideChar(config->filesystem_encoding, -1);
 }
 
 /*[clinic input]
@@ -439,7 +439,7 @@ sys_getfilesystemencodeerrors_impl(PyObject *module)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
     const _PyCoreConfig *config = &interp->core_config;
-    return PyUnicode_FromString(config->filesystem_errors);
+    return PyUnicode_FromWideChar(config->filesystem_errors, -1);
 }
 
 /*[clinic input]
@@ -1211,30 +1211,9 @@ static PyObject *
 sys__enablelegacywindowsfsencoding_impl(PyObject *module)
 /*[clinic end generated code: output=f5c3855b45e24fe9 input=2bfa931a20704492]*/
 {
-    PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-    _PyCoreConfig *config = &interp->core_config;
-
-    /* Set the filesystem encoding to mbcs/replace (PEP 529) */
-    char *encoding = _PyMem_RawStrdup("mbcs");
-    char *errors = _PyMem_RawStrdup("replace");
-    if (encoding == NULL || errors == NULL) {
-        PyMem_Free(encoding);
-        PyMem_Free(errors);
-        PyErr_NoMemory();
-        return NULL;
-    }
-
-    PyMem_RawFree(config->filesystem_encoding);
-    config->filesystem_encoding = encoding;
-    PyMem_RawFree(config->filesystem_errors);
-    config->filesystem_errors = errors;
-
-    if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
-                                  config->filesystem_errors) < 0) {
-        PyErr_NoMemory();
+    if (_PyUnicode_EnableLegacyWindowsFSEncoding() < 0) {
         return NULL;
     }
-
     Py_RETURN_NONE;
 }