Issue #8622: Add PYTHONFSENCODING environment variable to override the

author Victor Stinner <victor.stinner@haypocalc.com>

Wed, 18 Aug 2010 21:23:25 +0000 (21:23 +0000)

committer Victor Stinner <victor.stinner@haypocalc.com>

Wed, 18 Aug 2010 21:23:25 +0000 (21:23 +0000)
author Victor Stinner <victor.stinner@haypocalc.com>
Wed, 18 Aug 2010 21:23:25 +0000 (21:23 +0000)
committer Victor Stinner <victor.stinner@haypocalc.com>
Wed, 18 Aug 2010 21:23:25 +0000 (21:23 +0000)
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst

index 81d118b68ef432a16c36dbfcd1a396171e4b0449..c1130e4cd25bf8a5b57e1221e9beb06b02e85afa 100644 (file)
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -442,11 +442,20 @@ These environment variables influence Python's behavior.
     import of source modules.
  
  
+.. envvar:: PYTHONFSENCODING
+
+   If this is set before running the intepreter, it overrides the encoding used
+   for the filesystem encoding (see :func:`sys.getfilesystemencoding`).
+
+   .. versionadded:: 3.2
+
+
  .. envvar:: PYTHONIOENCODING
  
-   Overrides the encoding used for stdin/stdout/stderr, in the syntax
-   ``encodingname:errorhandler``.  The ``:errorhandler`` part is optional and
-   has the same meaning as in :func:`str.encode`.
+   If this is set before running the intepreter, it overrides the encoding used
+   for stdin/stdout/stderr, in the syntax ``encodingname:errorhandler``. The
+   ``:errorhandler`` part is optional and has the same meaning as in
+   :func:`str.encode`.
  
     For stderr, the ``:errorhandler`` part is ignored; the handler will always be
     ``'backslashreplace'``.
diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst

index a2b7af4d9347d0218d644ae6b958bc10ff0d0609..1799b700850658a5d63d5ddadd4823d7a7786c2b 100644 (file)
--- a/Doc/whatsnew/3.2.rst
+++ b/Doc/whatsnew/3.2.rst
@@ -232,6 +232,15 @@ Major performance enhancements have been added:
  
  * Stub
  
+
+Unicode
+=======
+
+The filesystem encoding can be specified by setting the
+:envvar:`PYTHONFSENCODING` environment variable before running the intepreter.
+The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
+
+
  IDLE
  ====
  
diff --git a/Lib/test/test_pep277.py b/Lib/test/test_pep277.py

index 60d99dbbba0ba1a32db276084e09dafa26acd513..0699317e1eb85520d3fae2366936ea0077bf8540 100644 (file)
--- a/Lib/test/test_pep277.py
+++ b/Lib/test/test_pep277.py
@@ -43,7 +43,7 @@ if sys.platform != 'darwin':
  
  # Is it Unicode-friendly?
  if not os.path.supports_unicode_filenames:
-    fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
+    fsencoding = sys.getfilesystemencoding()
      try:
          for name in filenames:
              name.encode(fsencoding)
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py

index 44ef5c17c0b2488d8830e96208ebe3ff3d510098..d2f5b85d2d9a28a1fd1bf1ffda36c9c862c37d5a 100644 (file)
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -863,16 +863,24 @@ class SizeofTest(unittest.TestCase):
      def test_getfilesystemencoding(self):
          import codecs
  
-        def check_fsencoding(fs_encoding):
+        def check_fsencoding(fs_encoding, expected=None):
              self.assertIsNotNone(fs_encoding)
              if sys.platform == 'darwin':
                  self.assertEqual(fs_encoding, 'utf-8')
              codecs.lookup(fs_encoding)
+            if expected:
+                self.assertEqual(fs_encoding, expected)
  
          fs_encoding = sys.getfilesystemencoding()
          check_fsencoding(fs_encoding)
  
-        # Even in C locale
+        def get_fsencoding(env):
+            output = subprocess.check_output(
+                [sys.executable, "-c",
+                 "import sys; print(sys.getfilesystemencoding())"],
+                env=env)
+            return output.rstrip().decode('ascii')
+
          try:
              sys.executable.encode('ascii')
          except UnicodeEncodeError:
@@ -880,14 +888,22 @@ class SizeofTest(unittest.TestCase):
              # see issue #8611
              pass
          else:
+            # Even in C locale
              env = os.environ.copy()
              env['LANG'] = 'C'
-            output = subprocess.check_output(
-                [sys.executable, "-c",
-                 "import sys; print(sys.getfilesystemencoding())"],
-                env=env)
-            fs_encoding = output.rstrip().decode('ascii')
-            check_fsencoding(fs_encoding)
+            try:
+                del env['PYTHONFSENCODING']
+            except KeyError:
+                pass
+            check_fsencoding(get_fsencoding(env), 'ascii')
+
+            # Filesystem encoding is hardcoded on Windows and Mac OS X
+            if sys.platform not in ('win32', 'darwin'):
+                for encoding in ('ascii', 'cp850', 'iso8859-1', 'utf-8'):
+                    env = os.environ.copy()
+                    env['PYTHONFSENCODING'] = encoding
+                    check_fsencoding(get_fsencoding(env), encoding)
+
  
      def test_setfilesystemencoding(self):
          old = sys.getfilesystemencoding()
diff --git a/Misc/NEWS b/Misc/NEWS

index 1e7acc6254d69807c36c041e642b8c8938b61909..5b2250873a666665d3601568809a8eccaabd2c1e 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 2?
  Core and Builtins
  -----------------
  
+- Issue #8622: Add PYTHONFSENCODING environment variable to override the
+  filesystem encoding.
+
  - Issue #5127: The C functions that access the Unicode Database now accept and
    return characters from the full Unicode range, even on narrow unicode builds
    (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others).  A visible difference
diff --git a/Modules/main.c b/Modules/main.c

index 3e7e065fb84f79faf417fd4cb56312fa690f0520..d129aba074c62574c5b45244588dce6f863f9c73 100644 (file)
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -99,6 +99,7 @@ PYTHONHOME   : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
                 The default module search path uses %s.\n\
  PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
  PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
+PYTHONFSENCODING: Encoding used for the filesystem.\n\
  ";
  
  FILE *
diff --git a/Python/pythonrun.c b/Python/pythonrun.c

index 76a8eef2df4b3a35ce74e16c94a31dbe0dd3b242..fd31974cb85dc7a03eed09ab9a5c711f48d5a696 100644 (file)
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -134,18 +134,13 @@ add_flag(int flag, const char *envs)
      return flag;
  }
  
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
  static char*
-get_codeset(void)
+get_codec_name(const char *encoding)
  {
-    char* codeset, *name_str;
+    char *name_utf8, *name_str;
      PyObject *codec, *name = NULL;
  
-    codeset = nl_langinfo(CODESET);
-    if (!codeset || codeset[0] == '\0')
-        return NULL;
-
-    codec = _PyCodec_Lookup(codeset);
+    codec = _PyCodec_Lookup(encoding);
      if (!codec)
          goto error;
  
@@ -154,18 +149,34 @@ get_codeset(void)
      if (!name)
          goto error;
  
-    name_str = _PyUnicode_AsString(name);
+    name_utf8 = _PyUnicode_AsString(name);
      if (name == NULL)
          goto error;
-    codeset = strdup(name_str);
+    name_str = strdup(name_utf8);
      Py_DECREF(name);
-    return codeset;
+    if (name_str == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    return name_str;
  
  error:
      Py_XDECREF(codec);
      Py_XDECREF(name);
      return NULL;
  }
+
+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+static char*
+get_codeset(void)
+{
+    char* codeset = nl_langinfo(CODESET);
+    if (!codeset || codeset[0] == '\0') {
+        PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
+        return NULL;
+    }
+    return get_codec_name(codeset);
+}
  #endif
  
  void
@@ -706,25 +717,35 @@ initfsencoding(void)
  {
      PyObject *codec;
  #if defined(HAVE_LANGINFO_H) && defined(CODESET)
-    char *codeset;
+    char *codeset = NULL;
  
      if (Py_FileSystemDefaultEncoding == NULL) {
-        /* On Unix, set the file system encoding according to the
-           user's preference, if the CODESET names a well-known
-           Python codec, and Py_FileSystemDefaultEncoding isn't
-           initialized by other means. Also set the encoding of
-           stdin and stdout if these are terminals.  */
-        codeset = get_codeset();
+        const char *env_encoding = Py_GETENV("PYTHONFSENCODING");
+        if (env_encoding != NULL) {
+            codeset = get_codec_name(env_encoding);
+            if (!codeset) {
+                fprintf(stderr, "PYTHONFSENCODING is not a valid encoding:\n");
+                PyErr_Print();
+            }
+        }
+        if (!codeset) {
+            /* On Unix, set the file system encoding according to the
+               user's preference, if the CODESET names a well-known
+               Python codec, and Py_FileSystemDefaultEncoding isn't
+               initialized by other means. Also set the encoding of
+               stdin and stdout if these are terminals.  */
+            codeset = get_codeset();
+        }
          if (codeset != NULL) {
              Py_FileSystemDefaultEncoding = codeset;
              Py_HasFileSystemDefaultEncoding = 0;
              return;
+        } else {
+            fprintf(stderr, "Unable to get the locale encoding:\n");
+            PyErr_Print();
          }
  
-        PyErr_Clear();
-        fprintf(stderr,
-                "Unable to get the locale encoding: "
-                "fallback to utf-8\n");
+        fprintf(stderr, "Unable to get the filesystem encoding: fallback to utf-8\n");
          Py_FileSystemDefaultEncoding = "utf-8";
          Py_HasFileSystemDefaultEncoding = 1;
      }
author	Victor Stinner <victor.stinner@haypocalc.com>
	Wed, 18 Aug 2010 21:23:25 +0000 (21:23 +0000)
committer	Victor Stinner <victor.stinner@haypocalc.com>
	Wed, 18 Aug 2010 21:23:25 +0000 (21:23 +0000)
Doc/using/cmdline.rst		patch \| blob \| history
Doc/whatsnew/3.2.rst		patch \| blob \| history
Lib/test/test_pep277.py		patch \| blob \| history
Lib/test/test_sys.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Modules/main.c		patch \| blob \| history
Python/pythonrun.c		patch \| blob \| history