* Set the :attr:`~sys.flags.dev_mode` attribute of :attr:`sys.flags` to
``True``
- * ``-X utf8`` enables UTF-8 mode for operating system interfaces, overriding
+ * ``-X utf8`` enables UTF-8 mode (:pep:`540`) for operating system interfaces, overriding
the default locale-aware mode. ``-X utf8=0`` explicitly disables UTF-8
mode (even when it would otherwise activate automatically).
See :envvar:`PYTHONUTF8` for more details.
* ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel
tree rooted at the given directory instead of to the code tree. See also
:envvar:`PYTHONPYCACHEPREFIX`.
+ * ``-X coerce_c_locale`` or ``-X coerce_c_locale=1`` tries to coerce the C
+ locale (:pep:`538`).
+ ``-X coerce_c_locale=0`` skips coercing the legacy ASCII-based C and POSIX
+ locales to a more capable UTF-8 based alternative.
+ ``-X coerce_c_locale=warn`` will cause Python to emit warning messages on
+ ``stderr`` if either the locale coercion activates, or else if a locale
+ that *would* have triggered coercion is still active when the Python
+ runtime is initialized.
+ See :envvar:`PYTHONCOERCECLOCALE` for more details.
It also allows passing arbitrary values and retrieving them through the
:data:`sys._xoptions` dictionary.
.. versionadded:: 3.7
The ``-X importtime``, ``-X dev`` and ``-X utf8`` options.
+ .. versionadded:: 3.7.1
+ The ``-X coerce_c_locale`` option.
+
.. versionadded:: 3.8
The ``-X pycache_prefix`` option.
order to force the interpreter to use ``ASCII`` instead of ``UTF-8`` for
system interfaces.
+ Also available as the :option:`-X` ``coerce_c_locale`` option.
+
Availability: \*nix
.. versionadded:: 3.7
while in Python 3.7.0 it didn't read any of them due to :issue:`34247`). If
this behavior is unwanted, set :c:data:`Py_IgnoreEnvironmentFlag` to 1 before
calling :c:func:`Py_Initialize`.
+
+:c:func:`Py_Initialize` and :c:func:`Py_Main` cannot enable the C locale
+coercion (:pep:`538`) anymore: it is always disabled. It can now only be
+enabled by the Python program ("python3).
+
+New :option:`-X` ``coerce_c_locale`` command line option to control C locale
+coercion (:pep:`538`).
return data
@classmethod
- def get_child_details(cls, env_vars):
+ def get_child_details(cls, env_vars, xoption=None):
"""Retrieves fsencoding and standard stream details from a child process
Returns (encoding_details, stderr_lines):
The child is run in isolated mode if the current interpreter supports
that.
"""
- result, py_cmd = run_python_until_end(
- "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
- **env_vars
- )
+ args = []
+ if xoption:
+ args.extend(("-X", f"coerce_c_locale={xoption}"))
+ args.extend(("-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT))
+ result, py_cmd = run_python_until_end(*args, **env_vars)
if not result.rc == 0:
result.fail(py_cmd)
# All subprocess outputs in this test case should be pure ASCII
expected_fs_encoding,
expected_stream_encoding,
expected_warnings,
- coercion_expected):
+ coercion_expected,
+ xoption=None):
"""Check the C locale handling for the given process environment
Parameters:
expected_stream_encoding: expected encoding for standard streams
expected_warning: stderr output to expect (if any)
"""
- result = EncodingDetails.get_child_details(env_vars)
+ result = EncodingDetails.get_child_details(env_vars, xoption)
encoding_details, stderr_lines = result
expected_details = EncodingDetails.get_expected_details(
coercion_expected,
coerce_c_locale,
expected_warnings=None,
coercion_expected=True,
+ use_xoption=False,
**extra_vars):
"""Check the C locale handling for various configurations
"PYTHONCOERCECLOCALE": "",
}
base_var_dict.update(extra_vars)
+ xoption = None
if coerce_c_locale is not None:
- base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
+ if use_xoption:
+ xoption = coerce_c_locale
+ else:
+ base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
# Check behaviour for the default locale
with self.subTest(default_locale=True,
fs_encoding,
stream_encoding,
_expected_warnings,
- _coercion_expected)
+ _coercion_expected,
+ xoption=xoption)
# Check behaviour for explicitly configured locales
for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS:
fs_encoding,
stream_encoding,
expected_warnings,
- coercion_expected)
+ coercion_expected,
+ xoption=xoption)
def test_PYTHONCOERCECLOCALE_not_set(self):
# This should coerce to the first available target locale by default
expected_warnings=[LEGACY_LOCALE_WARNING],
coercion_expected=False)
+ def test_xoption_set_to_1(self):
+ self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale="1",
+ use_xoption=True)
+
+ def test_xoption_set_to_zero(self):
+ # The setting "0" should result in the locale coercion being disabled
+ self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
+ EXPECTED_C_LOCALE_STREAM_ENCODING,
+ coerce_c_locale="0",
+ coercion_expected=False,
+ use_xoption=True)
+ # Setting LC_ALL=C shouldn't make any difference to the behaviour
+ self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
+ EXPECTED_C_LOCALE_STREAM_ENCODING,
+ coerce_c_locale="0",
+ LC_ALL="C",
+ coercion_expected=False,
+ use_xoption=True)
+
+ def test_xoption_set_to_warn(self):
+ # -X coerce_c_locale=warn enables runtime warnings for legacy locales
+ self._check_c_locale_coercion("utf-8", "utf-8",
+ coerce_c_locale="warn",
+ expected_warnings=[CLI_COERCION_WARNING],
+ use_xoption=True)
+
def test_main():
test.support.run_unittest(
LocaleConfigurationTests,
env = os.environ.copy()
# Use C locale to get ascii for the locale encoding
env['LC_ALL'] = 'C'
- env['PYTHONCOERCECLOCALE'] = '0'
code = (
b'import locale; '
b'print(ascii("' + undecodable + b'"), '
b'locale.getpreferredencoding())')
p = subprocess.Popen(
- [sys.executable, "-c", code],
+ [sys.executable,
+ # Disable C locale coercion and UTF-8 Mode to not use UTF-8
+ "-X", "coerce_c_locale=0",
+ "-X", "utf8=0",
+ "-c", code],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
env=env)
stdout, stderr = p.communicate()
def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
# Force the POSIX locale
- env = os.environ.copy()
+ env = dict(os.environ)
env["LC_ALL"] = locale
- env["PYTHONCOERCECLOCALE"] = "0"
code = '\n'.join((
'import sys',
'def dump(name):',
'dump("stdout")',
'dump("stderr")',
))
- args = [sys.executable, "-X", "utf8=0", "-c", code]
+ args = [sys.executable,
+ "-X", "utf8=0",
+ "-X", "coerce_c_locale=0",
+ "-c", code]
if isolated:
args.append("-I")
if encoding is not None:
return (loc in POSIX_LOCALES)
def get_output(self, *args, failure=False, **kw):
+ # Always disable the C locale coercion (PEP 538)
+ args = ('-X', 'coerce_c_locale=0', *args)
kw = dict(self.DEFAULT_ENV, **kw)
if failure:
out = assert_python_failure(*args, **kw)
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
# and has the priority over -X utf8 and PYTHONUTF8
out = self.get_output('-X', 'utf8', '-c', code,
- PYTHONUTF8='strict',
PYTHONLEGACYWINDOWSFSENCODING='1')
self.assertEqual(out, 'mbcs/replace')
--- /dev/null
+Add a new :option:`-X` ``coerce_c_locale`` command line option to control C
+locale coercion (:pep:`538`).
return _Py_INIT_OK();
}
+#ifndef MS_WINDOWS
+ /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+ if (ctype_loc != NULL
+ && (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0))
+ {
+ config->utf8_mode = 1;
+ return _Py_INIT_OK();
+ }
+#endif
+
return _Py_INIT_OK();
}
config->malloc_stats = 1;
}
- const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
- if (env) {
- if (strcmp(env, "0") == 0) {
- if (config->_coerce_c_locale < 0) {
- config->_coerce_c_locale = 0;
- }
- }
- else if (strcmp(env, "warn") == 0) {
- if (config->_coerce_c_locale_warn < 0) {
- config->_coerce_c_locale_warn = 1;
- }
- }
- else {
- if (config->_coerce_c_locale < 0) {
- config->_coerce_c_locale = 1;
- }
- }
- }
-
wchar_t *path;
int res = _PyCoreConfig_GetEnvDup(config, &path,
L"PYTHONPATH", "PYTHONPATH");
}
-static void
-config_init_locale(_PyCoreConfig *config)
+static _PyInitError
+config_init_coerce_c_locale(_PyCoreConfig *config)
{
+ const wchar_t *xopt = config_get_xoption(config, L"coerce_c_locale");
+ if (xopt) {
+ wchar_t *sep = wcschr(xopt, L'=');
+ if (sep) {
+ xopt = sep + 1;
+ if (wcscmp(xopt, L"1") == 0) {
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 1;
+ }
+ }
+ else if (wcscmp(xopt, L"0") == 0) {
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 0;
+ }
+ }
+ else if (wcscmp(xopt, L"warn") == 0) {
+ if (config->_coerce_c_locale_warn < 0) {
+ config->_coerce_c_locale_warn = 1;
+ }
+ }
+ else {
+ return _Py_INIT_USER_ERR("invalid -X coerce_c_locale option value");
+ }
+ }
+ else {
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 1;
+ }
+ }
+
+ if (config->_coerce_c_locale_warn < 0) {
+ config->_coerce_c_locale_warn = 0;
+ }
+ }
+
+ const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
+ if (env) {
+ if (strcmp(env, "0") == 0) {
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 0;
+ }
+ }
+ else if (strcmp(env, "warn") == 0) {
+ if (config->_coerce_c_locale_warn < 0) {
+ config->_coerce_c_locale_warn = 1;
+ }
+ }
+ else {
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 1;
+ }
+ }
+
+ if (config->_coerce_c_locale_warn < 0) {
+ config->_coerce_c_locale_warn = 0;
+ }
+ }
+
if (config->_coerce_c_locale < 0) {
/* The C locale enables the C locale coercion (PEP 538) */
if (_Py_LegacyLocaleDetected()) {
config->_coerce_c_locale = 1;
+ return _Py_INIT_OK();
}
}
-#ifndef MS_WINDOWS
- if (config->utf8_mode < 0) {
- /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
- const char *ctype_loc = setlocale(LC_CTYPE, NULL);
- if (ctype_loc != NULL
- && (strcmp(ctype_loc, "C") == 0
- || strcmp(ctype_loc, "POSIX") == 0))
- {
- config->utf8_mode = 1;
- }
- }
-#endif
+ return _Py_INIT_OK();
}
}
}
- if (config->utf8_mode < 0 || config->_coerce_c_locale < 0) {
- config_init_locale(config);
+ if (config->_coerce_c_locale < 0 || config->_coerce_c_locale_warn < 0) {
+ err = config_init_coerce_c_locale(config);
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
}
if (config->_install_importlib) {
}
assert(config->_coerce_c_locale >= 0);
+ assert(config->_coerce_c_locale_warn >= 0);
assert(config->use_environment >= 0);
assert(config->filesystem_encoding != NULL);
assert(config->filesystem_errors != NULL);