]> granicus.if.org Git - python/commitdiff
Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode
authorVictor Stinner <victor.stinner@haypocalc.com>
Fri, 11 Jun 2010 23:56:51 +0000 (23:56 +0000)
committerVictor Stinner <victor.stinner@haypocalc.com>
Fri, 11 Jun 2010 23:56:51 +0000 (23:56 +0000)
filenames and enable os.fsencode().

Doc/library/os.rst
Lib/os.py
Lib/test/test_ssl.py
Misc/NEWS
Objects/unicodeobject.c

index 970725a1c50d1bafe74b3485cc252527b390b39c..75036972c52eb45f0b6317c02770a32115bc81f2 100644 (file)
@@ -159,10 +159,10 @@ process and user.
 .. function:: fsencode(value)
 
    Encode *value* to bytes for use in the file system, environment variables or
-   the command line. Uses :func:`sys.getfilesystemencoding` and
-   ``'surrogateescape'`` error handler for strings and returns bytes unchanged.
-
-   Availability: Unix.
+   the command line. Use :func:`sys.getfilesystemencoding` and
+   ``'surrogateescape'`` error handler for strings and return bytes unchanged.
+   On Windows, use ``'strict'`` error handler for strings if the file system
+   encoding is ``'mbcs'`` (which is the default encoding).
 
    .. versionadded:: 3.2
 
index 8f47137f3bf1bc62b942e32a1f01eaa052263374..e9d44cc614bb7cbc9a5a382d4f845310e556d111 100644 (file)
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -533,16 +533,19 @@ if supports_bytes_environ:
         return environb.get(key, default)
     __all__.append("getenvb")
 
-if name != 'nt':
-    def fsencode(value):
-        """Encode value for use in the file system, environment variables
-        or the command line."""
-        if isinstance(value, bytes):
-            return value
-        elif isinstance(value, str):
-            return value.encode(sys.getfilesystemencoding(), 'surrogateescape')
+def fsencode(value):
+    """Encode value for use in the file system, environment variables
+    or the command line."""
+    if isinstance(value, bytes):
+        return value
+    elif isinstance(value, str):
+        encoding = sys.getfilesystemencoding()
+        if encoding == 'mbcs':
+            return value.encode(encoding)
         else:
-            raise TypeError("expect bytes or str, not %s" % type(value).__name__)
+            return value.encode(encoding, 'surrogateescape')
+    else:
+        raise TypeError("expect bytes or str, not %s" % type(value).__name__)
 
 def _exists(name):
     return name in globals()
index c4644401806f80fbb2f9d223217674fd5bf60eb9..9c0d263e0f1e634eeaf746c1ef81e714ec37d5e5 100644 (file)
@@ -33,16 +33,15 @@ else:
 HOST = support.HOST
 
 data_file = lambda name: os.path.join(os.path.dirname(__file__), name)
-fsencode = lambda name: name.encode(sys.getfilesystemencoding(), "surrogateescape")
 
 CERTFILE = data_file("keycert.pem")
-BYTES_CERTFILE = fsencode(CERTFILE)
+BYTES_CERTFILE = os.fsencode(CERTFILE)
 ONLYCERT = data_file("ssl_cert.pem")
 ONLYKEY = data_file("ssl_key.pem")
-BYTES_ONLYCERT = fsencode(ONLYCERT)
-BYTES_ONLYKEY = fsencode(ONLYKEY)
+BYTES_ONLYCERT = os.fsencode(ONLYCERT)
+BYTES_ONLYKEY = os.fsencode(ONLYKEY)
 CAPATH = data_file("capath")
-BYTES_CAPATH = fsencode(CAPATH)
+BYTES_CAPATH = os.fsencode(CAPATH)
 
 SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem")
 
index 5a59310ebf18055b73e6bcf6ae106f49473f60e9..3fd017847b5e4ef7ffd24dd24d22135c13c9342d 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1?
 Core and Builtins
 -----------------
 
+- Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode
+  filenames and enable os.fsencode().
+
 - Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash
   the interpreter with characters outside the Basic Multilingual Plane
   (higher than 0x10000).
index de92787cc69e8b34dedb40aeaa652c13e73b1755..8d75b205de73d16e6720b433432f432b8f8f49ae 100644 (file)
@@ -1478,11 +1478,17 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode,
 
 PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
 {
-    if (Py_FileSystemDefaultEncoding)
+    if (Py_FileSystemDefaultEncoding) {
+#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
+        if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0)
+            return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
+                                        PyUnicode_GET_SIZE(unicode),
+                                        NULL);
+#endif
         return PyUnicode_AsEncodedString(unicode,
                                          Py_FileSystemDefaultEncoding,
                                          "surrogateescape");
-    else
+    else
         return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
                                      PyUnicode_GET_SIZE(unicode),
                                      "surrogateescape");
@@ -1639,7 +1645,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
     if (Py_FileSystemDefaultEncoding) {
 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
         if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) {
-            return PyUnicode_DecodeMBCS(s, size, "surrogateescape");
+            return PyUnicode_DecodeMBCS(s, size, NULL);
         }
 #elif defined(__APPLE__)
         if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) {
@@ -2745,7 +2751,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
 #endif
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
-    
+
     q = (unsigned char *)s;
     e = q + size;