filesystem encoding.
initfsencoding() displays also a better error message if get_codeset() failed.
import of source modules.
+.. envvar:: PYTHONFSENCODING
+
+ If this is set before running the intepreter, it overrides the encoding used
+ for the filesystem encoding (see :func:`sys.getfilesystemencoding`).
+
+ .. versionadded:: 3.2
+
+
.. envvar:: PYTHONIOENCODING
- Overrides the encoding used for stdin/stdout/stderr, in the syntax
- ``encodingname:errorhandler``. The ``:errorhandler`` part is optional and
- has the same meaning as in :func:`str.encode`.
+ If this is set before running the intepreter, it overrides the encoding used
+ for stdin/stdout/stderr, in the syntax ``encodingname:errorhandler``. The
+ ``:errorhandler`` part is optional and has the same meaning as in
+ :func:`str.encode`.
For stderr, the ``:errorhandler`` part is ignored; the handler will always be
``'backslashreplace'``.
* Stub
+
+Unicode
+=======
+
+The filesystem encoding can be specified by setting the
+:envvar:`PYTHONFSENCODING` environment variable before running the intepreter.
+The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
+
+
IDLE
====
# Is it Unicode-friendly?
if not os.path.supports_unicode_filenames:
- fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
+ fsencoding = sys.getfilesystemencoding()
try:
for name in filenames:
name.encode(fsencoding)
def test_getfilesystemencoding(self):
import codecs
- def check_fsencoding(fs_encoding):
+ def check_fsencoding(fs_encoding, expected=None):
self.assertIsNotNone(fs_encoding)
if sys.platform == 'darwin':
self.assertEqual(fs_encoding, 'utf-8')
codecs.lookup(fs_encoding)
+ if expected:
+ self.assertEqual(fs_encoding, expected)
fs_encoding = sys.getfilesystemencoding()
check_fsencoding(fs_encoding)
- # Even in C locale
+ def get_fsencoding(env):
+ output = subprocess.check_output(
+ [sys.executable, "-c",
+ "import sys; print(sys.getfilesystemencoding())"],
+ env=env)
+ return output.rstrip().decode('ascii')
+
try:
sys.executable.encode('ascii')
except UnicodeEncodeError:
# see issue #8611
pass
else:
+ # Even in C locale
env = os.environ.copy()
env['LANG'] = 'C'
- output = subprocess.check_output(
- [sys.executable, "-c",
- "import sys; print(sys.getfilesystemencoding())"],
- env=env)
- fs_encoding = output.rstrip().decode('ascii')
- check_fsencoding(fs_encoding)
+ try:
+ del env['PYTHONFSENCODING']
+ except KeyError:
+ pass
+ check_fsencoding(get_fsencoding(env), 'ascii')
+
+ # Filesystem encoding is hardcoded on Windows and Mac OS X
+ if sys.platform not in ('win32', 'darwin'):
+ for encoding in ('ascii', 'cp850', 'iso8859-1', 'utf-8'):
+ env = os.environ.copy()
+ env['PYTHONFSENCODING'] = encoding
+ check_fsencoding(get_fsencoding(env), encoding)
+
def test_setfilesystemencoding(self):
old = sys.getfilesystemencoding()
Core and Builtins
-----------------
+- Issue #8622: Add PYTHONFSENCODING environment variable to override the
+ filesystem encoding.
+
- Issue #5127: The C functions that access the Unicode Database now accept and
return characters from the full Unicode range, even on narrow unicode builds
(Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference
The default module search path uses %s.\n\
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
+PYTHONFSENCODING: Encoding used for the filesystem.\n\
";
FILE *
return flag;
}
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
static char*
-get_codeset(void)
+get_codec_name(const char *encoding)
{
- char* codeset, *name_str;
+ char *name_utf8, *name_str;
PyObject *codec, *name = NULL;
- codeset = nl_langinfo(CODESET);
- if (!codeset || codeset[0] == '\0')
- return NULL;
-
- codec = _PyCodec_Lookup(codeset);
+ codec = _PyCodec_Lookup(encoding);
if (!codec)
goto error;
if (!name)
goto error;
- name_str = _PyUnicode_AsString(name);
+ name_utf8 = _PyUnicode_AsString(name);
if (name == NULL)
goto error;
- codeset = strdup(name_str);
+ name_str = strdup(name_utf8);
Py_DECREF(name);
- return codeset;
+ if (name_str == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ return name_str;
error:
Py_XDECREF(codec);
Py_XDECREF(name);
return NULL;
}
+
+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+static char*
+get_codeset(void)
+{
+ char* codeset = nl_langinfo(CODESET);
+ if (!codeset || codeset[0] == '\0') {
+ PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
+ return NULL;
+ }
+ return get_codec_name(codeset);
+}
#endif
void
{
PyObject *codec;
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
- char *codeset;
+ char *codeset = NULL;
if (Py_FileSystemDefaultEncoding == NULL) {
- /* On Unix, set the file system encoding according to the
- user's preference, if the CODESET names a well-known
- Python codec, and Py_FileSystemDefaultEncoding isn't
- initialized by other means. Also set the encoding of
- stdin and stdout if these are terminals. */
- codeset = get_codeset();
+ const char *env_encoding = Py_GETENV("PYTHONFSENCODING");
+ if (env_encoding != NULL) {
+ codeset = get_codec_name(env_encoding);
+ if (!codeset) {
+ fprintf(stderr, "PYTHONFSENCODING is not a valid encoding:\n");
+ PyErr_Print();
+ }
+ }
+ if (!codeset) {
+ /* On Unix, set the file system encoding according to the
+ user's preference, if the CODESET names a well-known
+ Python codec, and Py_FileSystemDefaultEncoding isn't
+ initialized by other means. Also set the encoding of
+ stdin and stdout if these are terminals. */
+ codeset = get_codeset();
+ }
if (codeset != NULL) {
Py_FileSystemDefaultEncoding = codeset;
Py_HasFileSystemDefaultEncoding = 0;
return;
+ } else {
+ fprintf(stderr, "Unable to get the locale encoding:\n");
+ PyErr_Print();
}
- PyErr_Clear();
- fprintf(stderr,
- "Unable to get the locale encoding: "
- "fallback to utf-8\n");
+ fprintf(stderr, "Unable to get the filesystem encoding: fallback to utf-8\n");
Py_FileSystemDefaultEncoding = "utf-8";
Py_HasFileSystemDefaultEncoding = 1;
}