See Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors.
*/
- char *filesystem_encoding;
- char *filesystem_errors;
+ wchar_t *filesystem_encoding;
+ wchar_t *filesystem_errors;
wchar_t *pycache_prefix; /* PYTHONPYCACHEPREFIX, -X pycache_prefix=PATH */
wchar_t *program_name; /* Program name, see also Py_GetProgramName() */
Value set from PYTHONIOENCODING environment variable and
Py_SetStandardStreamEncoding() function.
See also 'stdio_errors' attribute. */
- char *stdio_encoding;
+ wchar_t *stdio_encoding;
/* Error handler of sys.stdin and sys.stdout.
Value set from PYTHONIOENCODING environment variable and
Py_SetStandardStreamEncoding() function.
See also 'stdio_encoding' attribute. */
- char *stdio_errors;
+ wchar_t *stdio_errors;
#ifdef MS_WINDOWS
/* If greater than zero, use io.FileIO instead of WindowsConsoleIO for sys
_PyCoreConfig *config,
const _PyCoreConfig *config2);
PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetString(
- char **config_str,
- const char *str);
-PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetWideString(
wchar_t **config_str,
const wchar_t *str);
-PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetWideStringFromString(
+PyAPI_FUNC(_PyInitError) _PyCoreConfig_DecodeLocale(
wchar_t **config_str,
const char *str);
PyAPI_FUNC(_PyInitError) _PyCoreConfig_InitPathConfig(_PyCoreConfig *config);
const char *errors);
extern void _Py_ClearFileSystemEncoding(void);
extern _PyInitError _PyUnicode_InitEncodings(PyInterpreterState *interp);
+#ifdef MS_WINDOWS
+extern int _PyUnicode_EnableLegacyWindowsFSEncoding(void);
+#endif
PyAPI_FUNC(void) _Py_ClearStandardStreamEncoding(void);
PyObject *codec_search_cache;
PyObject *codec_error_registry;
int codecs_initialized;
- int fscodec_initialized;
+
+ /* fs_codec.encoding is initialized to NULL.
+ Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
+ struct {
+ char *encoding; /* Filesystem encoding (encoded to UTF-8) */
+ char *errors; /* Filesystem errors (encoded to UTF-8) */
+ _Py_error_handler error_handler;
+ } fs_codec;
_PyCoreConfig core_config;
#ifdef HAVE_DLOPEN
STRINGLIB(utf8_encoder)(PyObject *unicode,
STRINGLIB_CHAR *data,
Py_ssize_t size,
+ _Py_error_handler error_handler,
const char *errors)
{
Py_ssize_t i; /* index into data of next input character */
PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
PyObject *rep = NULL;
- _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
#endif
#if STRINGLIB_SIZEOF_CHAR == 1
const Py_ssize_t max_char_size = 2;
#define PY_SSIZE_T_CLEAN
#include "Python.h"
+#include "pycore_coreconfig.h"
#include "pycore_fileutils.h"
#include "pycore_object.h"
#include "pycore_pylifecycle.h"
/* Forward declaration */
static inline int
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
+static PyObject *
+unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
+ const char *errors);
+static PyObject *
+unicode_decode_utf8(const char *s, Py_ssize_t size,
+ _Py_error_handler error_handler, const char *errors,
+ Py_ssize_t *consumed);
/* List of static strings. */
static _Py_Identifier *static_strings = NULL;
return _Py_ERROR_OTHER;
}
+
+static _Py_error_handler
+get_error_handler_wide(const wchar_t *errors)
+{
+ if (errors == NULL || wcscmp(errors, L"strict") == 0) {
+ return _Py_ERROR_STRICT;
+ }
+ if (wcscmp(errors, L"surrogateescape") == 0) {
+ return _Py_ERROR_SURROGATEESCAPE;
+ }
+ if (wcscmp(errors, L"replace") == 0) {
+ return _Py_ERROR_REPLACE;
+ }
+ if (wcscmp(errors, L"ignore") == 0) {
+ return _Py_ERROR_IGNORE;
+ }
+ if (wcscmp(errors, L"backslashreplace") == 0) {
+ return _Py_ERROR_BACKSLASHREPLACE;
+ }
+ if (wcscmp(errors, L"surrogatepass") == 0) {
+ return _Py_ERROR_SURROGATEPASS;
+ }
+ if (wcscmp(errors, L"xmlcharrefreplace") == 0) {
+ return _Py_ERROR_XMLCHARREFREPLACE;
+ }
+ return _Py_ERROR_OTHER;
+}
+
+
/* The max unicode value is always 0x10FFFF while using the PEP-393 API.
This function is kept for backward compatibility with the old API. */
Py_UNICODE
static PyObject *
-unicode_encode_locale(PyObject *unicode, const char *errors,
+unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
int current_locale)
{
- _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
-
Py_ssize_t wlen;
wchar_t *wstr = PyUnicode_AsWideCharString(unicode, &wlen);
if (wstr == NULL) {
PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
{
- return unicode_encode_locale(unicode, errors, 1);
+ _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+ return unicode_encode_locale(unicode, error_handler, 1);
}
PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
- const _PyCoreConfig *config = &interp->core_config;
#ifdef _Py_FORCE_UTF8_FS_ENCODING
- return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
+ if (interp->fs_codec.encoding) {
+ return unicode_encode_utf8(unicode,
+ interp->fs_codec.error_handler,
+ interp->fs_codec.errors);
+ }
+ else {
+ const _PyCoreConfig *config = &interp->core_config;
+ _Py_error_handler errors;
+ errors = get_error_handler_wide(config->filesystem_errors);
+ assert(errors != _Py_ERROR_UNKNOWN);
+ return unicode_encode_utf8(unicode, errors, NULL);
+ }
#else
/* Bootstrap check: if the filesystem codec is implemented in Python, we
cannot use it to encode and decode filenames before it is loaded. Load
the Python codec requires to encode at least its own filename. Use the C
implementation of the locale codec until the codec registry is
initialized and the Python codec is loaded. See initfsencoding(). */
- if (interp->fscodec_initialized) {
+ if (interp->fs_codec.encoding) {
return PyUnicode_AsEncodedString(unicode,
- config->filesystem_encoding,
- config->filesystem_errors);
+ interp->fs_codec.encoding,
+ interp->fs_codec.errors);
}
else {
- return unicode_encode_locale(unicode,
- config->filesystem_errors, 0);
+ const _PyCoreConfig *config = &interp->core_config;
+ _Py_error_handler errors;
+ errors = get_error_handler_wide(config->filesystem_errors);
+ assert(errors != _Py_ERROR_UNKNOWN);
+ return unicode_encode_locale(unicode, errors, 0);
}
#endif
}
}
static PyObject*
-unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
- int current_locale)
+unicode_decode_locale(const char *str, Py_ssize_t len,
+ _Py_error_handler errors, int current_locale)
{
- _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
-
if (str[len] != '\0' || (size_t)len != strlen(str)) {
PyErr_SetString(PyExc_ValueError, "embedded null byte");
return NULL;
size_t wlen;
const char *reason;
int res = _Py_DecodeLocaleEx(str, &wstr, &wlen, &reason,
- current_locale, error_handler);
+ current_locale, errors);
if (res != 0) {
if (res == -2) {
PyObject *exc;
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
{
- return unicode_decode_locale(str, len, errors, 1);
+ _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+ return unicode_decode_locale(str, len, error_handler, 1);
}
PyObject*
PyUnicode_DecodeLocale(const char *str, const char *errors)
{
Py_ssize_t size = (Py_ssize_t)strlen(str);
- return unicode_decode_locale(str, size, errors, 1);
+ _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+ return unicode_decode_locale(str, size, error_handler, 1);
}
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
- const _PyCoreConfig *config = &interp->core_config;
#ifdef _Py_FORCE_UTF8_FS_ENCODING
- return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
+ if (interp->fs_codec.encoding) {
+ return unicode_decode_utf8(s, size,
+ interp->fs_codec.error_handler,
+ interp->fs_codec.errors,
+ NULL);
+ }
+ else {
+ const _PyCoreConfig *config = &interp->core_config;
+ _Py_error_handler errors;
+ errors = get_error_handler_wide(config->filesystem_errors);
+ assert(errors != _Py_ERROR_UNKNOWN);
+ return unicode_decode_utf8(s, size, errors, NULL, NULL);
+ }
#else
/* Bootstrap check: if the filesystem codec is implemented in Python, we
cannot use it to encode and decode filenames before it is loaded. Load
the Python codec requires to encode at least its own filename. Use the C
implementation of the locale codec until the codec registry is
initialized and the Python codec is loaded. See initfsencoding(). */
- if (interp->fscodec_initialized) {
+ if (interp->fs_codec.encoding) {
return PyUnicode_Decode(s, size,
- config->filesystem_encoding,
- config->filesystem_errors);
+ interp->fs_codec.encoding,
+ interp->fs_codec.errors);
}
else {
- return unicode_decode_locale(s, size,
- config->filesystem_errors, 0);
+ const _PyCoreConfig *config = &interp->core_config;
+ _Py_error_handler errors;
+ errors = get_error_handler_wide(config->filesystem_errors);
+ return unicode_decode_locale(s, size, errors, 0);
}
#endif
}
return p - start;
}
-PyObject *
-PyUnicode_DecodeUTF8Stateful(const char *s,
- Py_ssize_t size,
- const char *errors,
- Py_ssize_t *consumed)
+static PyObject *
+unicode_decode_utf8(const char *s, Py_ssize_t size,
+ _Py_error_handler error_handler, const char *errors,
+ Py_ssize_t *consumed)
{
_PyUnicodeWriter writer;
const char *starts = s;
const char *errmsg = "";
PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
- _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
if (size == 0) {
if (consumed)
}
+PyObject *
+PyUnicode_DecodeUTF8Stateful(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
+{
+ return unicode_decode_utf8(s, size, _Py_ERROR_UNKNOWN, errors, consumed);
+}
+
+
/* UTF-8 decoder: use surrogateescape error handler if 'surrogateescape' is
non-zero, use strict error handler otherwise.
maximum possible needed (4 result bytes per Unicode character), and return
the excess memory at the end.
*/
-PyObject *
-_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
+static PyObject *
+unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
+ const char *errors)
{
enum PyUnicode_Kind kind;
void *data;
case PyUnicode_1BYTE_KIND:
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
assert(!PyUnicode_IS_ASCII(unicode));
- return ucs1lib_utf8_encoder(unicode, data, size, errors);
+ return ucs1lib_utf8_encoder(unicode, data, size, error_handler, errors);
case PyUnicode_2BYTE_KIND:
- return ucs2lib_utf8_encoder(unicode, data, size, errors);
+ return ucs2lib_utf8_encoder(unicode, data, size, error_handler, errors);
case PyUnicode_4BYTE_KIND:
- return ucs4lib_utf8_encoder(unicode, data, size, errors);
+ return ucs4lib_utf8_encoder(unicode, data, size, error_handler, errors);
}
}
+PyObject *
+_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
+{
+ return unicode_encode_utf8(unicode, _Py_ERROR_UNKNOWN, errors);
+}
+
+
PyObject *
PyUnicode_EncodeUTF8(const Py_UNICODE *s,
Py_ssize_t size,
}
-static char*
-get_codec_name(const char *encoding)
+static int
+encode_wstr_utf8(wchar_t *wstr, char **str, const char *name)
{
- PyObject *codec, *name_obj = NULL;
+ int res;
+ res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT);
+ if (res == -2) {
+ PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name);
+ return -1;
+ }
+ if (res < 0) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ return 0;
+}
+
+
+static int
+config_get_codec_name(wchar_t **config_encoding)
+{
+ char *encoding;
+ if (encode_wstr_utf8(*config_encoding, &encoding, "stdio_encoding") < 0) {
+ return -1;
+ }
+
+ PyObject *name_obj = NULL;
+ PyObject *codec = _PyCodec_Lookup(encoding);
+ PyMem_RawFree(encoding);
- codec = _PyCodec_Lookup(encoding);
if (!codec)
goto error;
goto error;
}
- const char *name_utf8 = PyUnicode_AsUTF8(name_obj);
- if (name_utf8 == NULL) {
+ wchar_t *wname = PyUnicode_AsWideCharString(name_obj, NULL);
+ Py_DECREF(name_obj);
+ if (wname == NULL) {
goto error;
}
- char *name = _PyMem_RawStrdup(name_utf8);
- Py_DECREF(name_obj);
- if (name == NULL) {
+ wchar_t *raw_wname = _PyMem_RawWcsdup(wname);
+ if (raw_wname == NULL) {
+ PyMem_Free(wname);
PyErr_NoMemory();
- return NULL;
+ goto error;
}
- return name;
+
+ PyMem_RawFree(*config_encoding);
+ *config_encoding = raw_wname;
+
+ PyMem_Free(wname);
+ return 0;
error:
Py_XDECREF(codec);
Py_XDECREF(name_obj);
- return NULL;
+ return -1;
}
static _PyInitError
init_stdio_encoding(PyInterpreterState *interp)
{
+ /* Update the stdio encoding to the normalized Python codec name. */
_PyCoreConfig *config = &interp->core_config;
-
- char *codec_name = get_codec_name(config->stdio_encoding);
- if (codec_name == NULL) {
+ if (config_get_codec_name(&config->stdio_encoding) < 0) {
return _Py_INIT_ERR("failed to get the Python codec name "
"of the stdio encoding");
}
- PyMem_RawFree(config->stdio_encoding);
- config->stdio_encoding = codec_name;
return _Py_INIT_OK();
}
-static _PyInitError
-init_fs_encoding(PyInterpreterState *interp)
+static int
+init_fs_codec(PyInterpreterState *interp)
{
_PyCoreConfig *config = &interp->core_config;
- char *encoding = get_codec_name(config->filesystem_encoding);
- if (encoding == NULL) {
- /* Such error can only occurs in critical situations: no more
- memory, import a module of the standard library failed, etc. */
- return _Py_INIT_ERR("failed to get the Python codec "
- "of the filesystem encoding");
+ _Py_error_handler error_handler;
+ error_handler = get_error_handler_wide(config->filesystem_errors);
+ if (error_handler == _Py_ERROR_UNKNOWN) {
+ PyErr_SetString(PyExc_RuntimeError, "unknow filesystem error handler");
+ return -1;
}
- /* Update the filesystem encoding to the normalized Python codec name.
- For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
- (Python codec name). */
- PyMem_RawFree(config->filesystem_encoding);
- config->filesystem_encoding = encoding;
+ char *encoding, *errors;
+ if (encode_wstr_utf8(config->filesystem_encoding,
+ &encoding,
+ "filesystem_encoding") < 0) {
+ return -1;
+ }
+
+ if (encode_wstr_utf8(config->filesystem_errors,
+ &errors,
+ "filesystem_errors") < 0) {
+ PyMem_RawFree(encoding);
+ return -1;
+ }
+
+ PyMem_RawFree(interp->fs_codec.encoding);
+ interp->fs_codec.encoding = encoding;
+ PyMem_RawFree(interp->fs_codec.errors);
+ interp->fs_codec.errors = errors;
+ interp->fs_codec.error_handler = error_handler;
+
+ /* At this point, PyUnicode_EncodeFSDefault() and
+ PyUnicode_DecodeFSDefault() can now use the Python codec rather than
+ the C implementation of the filesystem encoding. */
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
global configuration variables. */
- if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
- config->filesystem_errors) < 0) {
- return _Py_INIT_NO_MEMORY();
+ if (_Py_SetFileSystemEncoding(interp->fs_codec.encoding,
+ interp->fs_codec.errors) < 0) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ return 0;
+}
+
+
+static _PyInitError
+init_fs_encoding(PyInterpreterState *interp)
+{
+ /* Update the filesystem encoding to the normalized Python codec name.
+ For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
+ (Python codec name). */
+ _PyCoreConfig *config = &interp->core_config;
+ if (config_get_codec_name(&config->filesystem_encoding) < 0) {
+ return _Py_INIT_ERR("failed to get the Python codec "
+ "of the filesystem encoding");
}
- /* PyUnicode can now use the Python codec rather than C implementation
- for the filesystem encoding */
- interp->fscodec_initialized = 1;
+ if (init_fs_codec(interp) < 0) {
+ return _Py_INIT_ERR("cannot initialize filesystem codec");
+ }
return _Py_INIT_OK();
}
}
+#ifdef MS_WINDOWS
+int
+_PyUnicode_EnableLegacyWindowsFSEncoding(void)
+{
+ PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
+ _PyCoreConfig *config = &interp->core_config;
+
+ /* Set the filesystem encoding to mbcs/replace (PEP 529) */
+ wchar_t *encoding = _PyMem_RawWcsdup(L"mbcs");
+ wchar_t *errors = _PyMem_RawWcsdup(L"replace");
+ if (encoding == NULL || errors == NULL) {
+ PyMem_RawFree(encoding);
+ PyMem_RawFree(errors);
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ PyMem_RawFree(config->filesystem_encoding);
+ config->filesystem_encoding = encoding;
+ PyMem_RawFree(config->filesystem_errors);
+ config->filesystem_errors = errors;
+
+ return init_fs_codec(interp);
+}
+#endif
+
+
void
_PyUnicode_Fini(void)
{
}
_PyUnicode_ClearStaticStrings();
(void)PyUnicode_ClearFreeList();
+
+ PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
+ PyMem_RawFree(interp->fs_codec.encoding);
+ interp->fs_codec.encoding = NULL;
+ PyMem_RawFree(interp->fs_codec.errors);
+ interp->fs_codec.errors = NULL;
}
Force it to 0 through the config. */
config.legacy_windows_stdio = 0;
#endif
- config.stdio_encoding = "iso8859-1";
- config.stdio_errors = "replace";
+ config.stdio_encoding = L"iso8859-1";
+ config.stdio_errors = L"replace";
putenv("PYTHONNOUSERSITE=");
Py_NoUserSiteDirectory = 0;
/* Copy str into *config_str (duplicate the string) */
_PyInitError
-_PyCoreConfig_SetString(char **config_str, const char *str)
-{
- char *str2;
- if (str != NULL) {
- str2 = _PyMem_RawStrdup(str);
- if (str2 == NULL) {
- return _Py_INIT_NO_MEMORY();
- }
- }
- else {
- str2 = NULL;
- }
- PyMem_RawFree(*config_str);
- *config_str = str2;
- return _Py_INIT_OK();
-}
-
-
-/* Copy str into *config_str (duplicate the string) */
-_PyInitError
-_PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str)
+_PyCoreConfig_SetString(wchar_t **config_str, const wchar_t *str)
{
wchar_t *str2;
if (str != NULL) {
/* Decode str using Py_DecodeLocale() and set the result into *config_str */
static _PyInitError
-_PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str,
- const char *decode_err_msg)
+_PyCoreConfig_DecodeLocaleErr(wchar_t **config_str, const char *str,
+ const char *decode_err_msg)
{
wchar_t *str2;
if (str != NULL) {
}
+#define CONFIG_DECODE_LOCALE(config_str, str, NAME) \
+ _PyCoreConfig_DecodeLocaleErr(config_str, str, "cannot decode " NAME)
+
+
_PyInitError
-_PyCoreConfig_SetWideStringFromString(wchar_t **config_str, const char *str)
+_PyCoreConfig_DecodeLocale(wchar_t **config_str, const char *str)
{
- return _PyCoreConfig_SetWideStringFromStringErr(
- config_str, str, "cannot decode string");
+ return CONFIG_DECODE_LOCALE(config_str, str, "string");
}
-#define CONFIG_DECODE_LOCALE(config_str, str, NAME) \
- _PyCoreConfig_SetWideStringFromStringErr(config_str, str, \
- "cannot decode " NAME)
-
-
_PyInitError
_PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
{
_PyCoreConfig_Clear(config);
#define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
-#define COPY_STR_ATTR(ATTR) \
- do { \
- err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
- if (_Py_INIT_FAILED(err)) { \
- return err; \
- } \
- } while (0)
#define COPY_WSTR_ATTR(ATTR) \
do { \
- err = _PyCoreConfig_SetWideString(&config->ATTR, config2->ATTR); \
+ err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
if (_Py_INIT_FAILED(err)) { \
return err; \
} \
COPY_ATTR(quiet);
COPY_ATTR(user_site_directory);
COPY_ATTR(buffered_stdio);
- COPY_STR_ATTR(filesystem_encoding);
- COPY_STR_ATTR(filesystem_errors);
- COPY_STR_ATTR(stdio_encoding);
- COPY_STR_ATTR(stdio_errors);
+ COPY_WSTR_ATTR(filesystem_encoding);
+ COPY_WSTR_ATTR(filesystem_errors);
+ COPY_WSTR_ATTR(stdio_encoding);
+ COPY_WSTR_ATTR(stdio_errors);
#ifdef MS_WINDOWS
COPY_ATTR(legacy_windows_stdio);
#endif
COPY_ATTR(_init_main);
#undef COPY_ATTR
-#undef COPY_STR_ATTR
#undef COPY_WSTR_ATTR
#undef COPY_WSTRLIST
return _Py_INIT_OK();
goto fail; \
} \
} while (0)
-#define FROM_STRING(STR) \
- ((STR != NULL) ? \
- PyUnicode_FromString(STR) \
- : (Py_INCREF(Py_None), Py_None))
#define SET_ITEM_INT(ATTR) \
SET_ITEM(#ATTR, PyLong_FromLong(config->ATTR))
#define SET_ITEM_UINT(ATTR) \
SET_ITEM(#ATTR, PyLong_FromUnsignedLong(config->ATTR))
-#define SET_ITEM_STR(ATTR) \
- SET_ITEM(#ATTR, FROM_STRING(config->ATTR))
#define FROM_WSTRING(STR) \
((STR != NULL) ? \
PyUnicode_FromWideChar(STR, -1) \
SET_ITEM_INT(show_alloc_count);
SET_ITEM_INT(dump_refs);
SET_ITEM_INT(malloc_stats);
- SET_ITEM_STR(filesystem_encoding);
- SET_ITEM_STR(filesystem_errors);
+ SET_ITEM_WSTR(filesystem_encoding);
+ SET_ITEM_WSTR(filesystem_errors);
SET_ITEM_WSTR(pycache_prefix);
SET_ITEM_WSTR(program_name);
SET_ITEM_WSTRLIST(argv);
SET_ITEM_INT(quiet);
SET_ITEM_INT(user_site_directory);
SET_ITEM_INT(buffered_stdio);
- SET_ITEM_STR(stdio_encoding);
- SET_ITEM_STR(stdio_errors);
+ SET_ITEM_WSTR(stdio_encoding);
+ SET_ITEM_WSTR(stdio_errors);
#ifdef MS_WINDOWS
SET_ITEM_INT(legacy_windows_stdio);
#endif
Py_DECREF(dict);
return NULL;
-#undef FROM_STRING
#undef FROM_WSTRING
#undef SET_ITEM
#undef SET_ITEM_INT
#undef SET_ITEM_UINT
-#undef SET_ITEM_STR
#undef SET_ITEM_WSTR
#undef SET_ITEM_WSTRLIST
}
return _Py_INIT_OK();
}
- return _PyCoreConfig_SetWideString(dest, var);
+ return _PyCoreConfig_SetString(dest, var);
#else
const char *var = getenv(name);
if (!var || var[0] == '\0') {
return _Py_INIT_OK();
}
- return _PyCoreConfig_SetWideStringFromStringErr(dest, var, decode_err_msg);
+ return _PyCoreConfig_DecodeLocaleErr(dest, var, decode_err_msg);
#endif
}
/* Use argv[0] by default, if available */
if (config->program != NULL) {
- err = _PyCoreConfig_SetWideString(&config->program_name,
- config->program);
+ err = _PyCoreConfig_SetString(&config->program_name, config->program);
if (_Py_INIT_FAILED(err)) {
return err;
}
#else
const wchar_t *default_program_name = L"python3";
#endif
- err = _PyCoreConfig_SetWideString(&config->program_name, default_program_name);
+ err = _PyCoreConfig_SetString(&config->program_name, default_program_name);
if (_Py_INIT_FAILED(err)) {
return err;
}
/* If Py_SetProgramFullPath() was called, use its value */
const wchar_t *program_full_path = _Py_path_config.program_full_path;
if (program_full_path != NULL) {
- _PyInitError err = _PyCoreConfig_SetWideString(&config->executable,
- program_full_path);
+ _PyInitError err = _PyCoreConfig_SetString(&config->executable,
+ program_full_path);
if (_Py_INIT_FAILED(err)) {
return err;
}
/* If Py_SetPythonHome() was called, use its value */
wchar_t *home = _Py_path_config.home;
if (home) {
- _PyInitError err = _PyCoreConfig_SetWideString(&config->home, home);
+ _PyInitError err = _PyCoreConfig_SetString(&config->home, home);
if (_Py_INIT_FAILED(err)) {
return err;
}
}
-static const char *
+static const wchar_t *
config_get_stdio_errors(const _PyCoreConfig *config)
{
#ifndef MS_WINDOWS
if (loc != NULL) {
/* surrogateescape is the default in the legacy C and POSIX locales */
if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) {
- return "surrogateescape";
+ return L"surrogateescape";
}
#ifdef PY_COERCE_C_LOCALE
/* surrogateescape is the default in locale coercion target locales */
if (_Py_IsLocaleCoercionTarget(loc)) {
- return "surrogateescape";
+ return L"surrogateescape";
}
#endif
}
- return "strict";
+ return L"strict";
#else
/* On Windows, always use surrogateescape by default */
- return "surrogateescape";
+ return L"surrogateescape";
#endif
}
static _PyInitError
-config_get_locale_encoding(char **locale_encoding)
+config_get_locale_encoding(wchar_t **locale_encoding)
{
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
+ return _PyCoreConfig_DecodeLocale(locale_encoding, encoding);
#elif defined(_Py_FORCE_UTF8_LOCALE)
- const char *encoding = "UTF-8";
+ return _PyCoreConfig_SetString(locale_encoding, L"utf-8");
#else
const char *encoding = nl_langinfo(CODESET);
if (!encoding || encoding[0] == '\0') {
return _Py_INIT_ERR("failed to get the locale encoding: "
"nl_langinfo(CODESET) failed");
}
+ /* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
+ return CONFIG_DECODE_LOCALE(locale_encoding, encoding,
+ "nl_langinfo(CODESET)");
#endif
-
- assert(*locale_encoding == NULL);
- return _PyCoreConfig_SetString(locale_encoding, encoding);
}
/* If Py_SetStandardStreamEncoding() have been called, use these
parameters. */
if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
- err = _PyCoreConfig_SetString(&config->stdio_encoding,
- _Py_StandardStreamEncoding);
+ err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
+ _Py_StandardStreamEncoding,
+ "_Py_StandardStreamEncoding");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) {
- err = _PyCoreConfig_SetString(&config->stdio_errors,
- _Py_StandardStreamErrors);
+ err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
+ _Py_StandardStreamErrors,
+ "_Py_StandardStreamErrors");
if (_Py_INIT_FAILED(err)) {
return err;
}
/* PYTHONIOENCODING environment variable */
const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING");
if (opt) {
- /* _PyCoreConfig_SetString() requires dest to be initialized to NULL */
- char *pythonioencoding = NULL;
- err = _PyCoreConfig_SetString(&pythonioencoding, opt);
- if (_Py_INIT_FAILED(err)) {
- return err;
+ char *pythonioencoding = _PyMem_RawStrdup(opt);
+ if (pythonioencoding == NULL) {
+ return _Py_INIT_NO_MEMORY();
}
char *errors = strchr(pythonioencoding, ':');
/* Does PYTHONIOENCODING contain an encoding? */
if (pythonioencoding[0]) {
if (config->stdio_encoding == NULL) {
- err = _PyCoreConfig_SetString(&config->stdio_encoding,
- pythonioencoding);
+ err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
+ pythonioencoding,
+ "PYTHONIOENCODING environment variable");
if (_Py_INIT_FAILED(err)) {
PyMem_RawFree(pythonioencoding);
return err;
}
if (config->stdio_errors == NULL && errors != NULL) {
- err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
+ err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
+ errors,
+ "PYTHONIOENCODING environment variable");
if (_Py_INIT_FAILED(err)) {
PyMem_RawFree(pythonioencoding);
return err;
/* UTF-8 Mode uses UTF-8/surrogateescape */
if (preconfig->utf8_mode) {
if (config->stdio_encoding == NULL) {
- err = _PyCoreConfig_SetString(&config->stdio_encoding,
- "utf-8");
+ err = _PyCoreConfig_SetString(&config->stdio_encoding, L"utf-8");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->stdio_errors == NULL) {
err = _PyCoreConfig_SetString(&config->stdio_errors,
- "surrogateescape");
+ L"surrogateescape");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
}
if (config->stdio_errors == NULL) {
- const char *errors = config_get_stdio_errors(config);
+ const wchar_t *errors = config_get_stdio_errors(config);
assert(errors != NULL);
err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
if (config->filesystem_encoding == NULL) {
#ifdef _Py_FORCE_UTF8_FS_ENCODING
- err = _PyCoreConfig_SetString(&config->filesystem_encoding,
- "utf-8");
+ err = _PyCoreConfig_SetString(&config->filesystem_encoding, L"utf-8");
#else
#ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) {
/* Legacy Windows filesystem encoding: mbcs/replace */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
- "mbcs");
+ L"mbcs");
}
else
#endif
if (preconfig->utf8_mode) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
- "utf-8");
+ L"utf-8");
}
#ifndef MS_WINDOWS
else if (_Py_GetForceASCII()) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
- "ascii");
+ L"ascii");
}
#endif
else {
#ifdef MS_WINDOWS
/* Windows defaults to utf-8/surrogatepass (PEP 529). */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
- "utf-8");
+ L"utf-8");
#else
err = config_get_locale_encoding(&config->filesystem_encoding);
#endif
}
if (config->filesystem_errors == NULL) {
- const char *errors;
+ const wchar_t *errors;
#ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) {
- errors = "replace";
+ errors = L"replace";
}
else {
- errors = "surrogatepass";
+ errors = L"surrogatepass";
}
#else
- errors = "surrogateescape";
+ errors = L"surrogateescape";
#endif
err = _PyCoreConfig_SetString(&config->filesystem_errors, errors);
if (_Py_INIT_FAILED(err)) {
|| wcscmp(_PyOS_optarg, L"never") == 0
|| wcscmp(_PyOS_optarg, L"default") == 0)
{
- err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode,
- _PyOS_optarg);
+ err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode,
+ _PyOS_optarg);
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->check_hash_pycs_mode == NULL) {
- err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode, L"default");
+ err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode, L"default");
if (_Py_INIT_FAILED(err)) {
goto done;
}
/* --- File system encoding/errors -------------------------------- */
/* The filesystem encoding is chosen by config_init_fs_encoding(),
- see also initfsencoding(). */
+ see also initfsencoding().
+
+ Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
+ are encoded to UTF-8. */
const char *Py_FileSystemDefaultEncoding = NULL;
int Py_HasFileSystemDefaultEncoding = 0;
const char *Py_FileSystemDefaultEncodeErrors = NULL;
static PyObject*
create_stdio(const _PyCoreConfig *config, PyObject* io,
int fd, int write_mode, const char* name,
- const char* encoding, const char* errors)
+ const wchar_t* encoding, const wchar_t* errors)
{
PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res;
const char* mode;
#ifdef MS_WINDOWS
/* Windows console IO is always UTF-8 encoded */
if (PyWindowsConsoleIO_Check(raw))
- encoding = "utf-8";
+ encoding = L"utf-8";
#endif
text = PyUnicode_FromString(name);
newline = "\n";
#endif
- stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OsssOO",
- buf, encoding, errors,
+ PyObject *encoding_str = PyUnicode_FromWideChar(encoding, -1);
+ if (encoding_str == NULL) {
+ Py_CLEAR(buf);
+ goto error;
+ }
+
+ PyObject *errors_str = PyUnicode_FromWideChar(errors, -1);
+ if (errors_str == NULL) {
+ Py_CLEAR(buf);
+ Py_CLEAR(encoding_str);
+ goto error;
+ }
+
+ stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OOOsOO",
+ buf, encoding_str, errors_str,
newline, line_buffering, write_through);
Py_CLEAR(buf);
+ Py_CLEAR(encoding_str);
+ Py_CLEAR(errors_str);
if (stream == NULL)
goto error;
fd = fileno(stderr);
std = create_stdio(config, iomod, fd, 1, "<stderr>",
config->stdio_encoding,
- "backslashreplace");
+ L"backslashreplace");
if (std == NULL)
goto error;
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
- return PyUnicode_FromString(config->filesystem_encoding);
+ return PyUnicode_FromWideChar(config->filesystem_encoding, -1);
}
/*[clinic input]
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
- return PyUnicode_FromString(config->filesystem_errors);
+ return PyUnicode_FromWideChar(config->filesystem_errors, -1);
}
/*[clinic input]
sys__enablelegacywindowsfsencoding_impl(PyObject *module)
/*[clinic end generated code: output=f5c3855b45e24fe9 input=2bfa931a20704492]*/
{
- PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
- _PyCoreConfig *config = &interp->core_config;
-
- /* Set the filesystem encoding to mbcs/replace (PEP 529) */
- char *encoding = _PyMem_RawStrdup("mbcs");
- char *errors = _PyMem_RawStrdup("replace");
- if (encoding == NULL || errors == NULL) {
- PyMem_Free(encoding);
- PyMem_Free(errors);
- PyErr_NoMemory();
- return NULL;
- }
-
- PyMem_RawFree(config->filesystem_encoding);
- config->filesystem_encoding = encoding;
- PyMem_RawFree(config->filesystem_errors);
- config->filesystem_errors = errors;
-
- if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
- config->filesystem_errors) < 0) {
- PyErr_NoMemory();
+ if (_PyUnicode_EnableLegacyWindowsFSEncoding() < 0) {
return NULL;
}
-
Py_RETURN_NONE;
}