]> granicus.if.org Git - python/commitdiff
bpo-37412: os.getcwdb() now uses UTF-8 on Windows (GH-14396)
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Wed, 26 Jun 2019 16:14:30 +0000 (09:14 -0700)
committerGitHub <noreply@github.com>
Wed, 26 Jun 2019 16:14:30 +0000 (09:14 -0700)
The os.getcwdb() function now uses the UTF-8 encoding on Windows,
rather than the ANSI code page: see PEP 529 for the rationale. The
function is no longer deprecated on Windows.

os.getcwd() and os.getcwdb() now detect integer overflow on memory
allocations. On Unix, these functions properly report MemoryError on
memory allocation failure.
(cherry picked from commit 689830ee6243126798a6c519c05aa11ba73db7cd)

Co-authored-by: Victor Stinner <vstinner@redhat.com>
Doc/library/os.rst
Doc/whatsnew/3.8.rst
Lib/test/test_os.py
Misc/NEWS.d/next/Library/2019-06-26-16-28-59.bpo-37412.lx0VjC.rst [new file with mode: 0644]
Modules/posixmodule.c

index e7acb356d38bf999eef4f5decc2fcbc4c1b1a0eb..45ce643d88e012b5754862b8593091beca04fa2c 100644 (file)
@@ -1730,6 +1730,11 @@ features:
 
    Return a bytestring representing the current working directory.
 
+   .. versionchanged:: 3.8
+      The function now uses the UTF-8 encoding on Windows, rather than the ANSI
+      code page: see :pep:`529` for the rationale. The function is no longer
+      deprecated on Windows.
+
 
 .. function:: lchflags(path, flags)
 
index 954f77da2999a1d9c5c29da93a12c16109d7c450..8c2f80ce24e0087184e4eeaaf8fb2447ed6b9258 100644 (file)
@@ -1228,6 +1228,11 @@ Changes in Python behavior
 Changes in the Python API
 -------------------------
 
+* The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
+  rather than the ANSI code page: see :pep:`529` for the rationale. The
+  function is no longer deprecated on Windows.
+  (Contributed by Victor Stinner in :issue:`37412`.)
+
 * :class:`subprocess.Popen` can now use :func:`os.posix_spawn` in some cases
   for better performance. On Windows Subsystem for Linux and QEMU User
   Emulation, Popen constructor using :func:`os.posix_spawn` no longer raise an
index 527f81463280ac1dc8336cce8c63fbd65f6b3cf0..18cd78b55f601906aa15fa524abbf9a70e701364 100644 (file)
@@ -82,6 +82,17 @@ def create_file(filename, content=b'content'):
         fp.write(content)
 
 
+class MiscTests(unittest.TestCase):
+    def test_getcwd(self):
+        cwd = os.getcwd()
+        self.assertIsInstance(cwd, str)
+
+    def test_getcwdb(self):
+        cwd = os.getcwdb()
+        self.assertIsInstance(cwd, bytes)
+        self.assertEqual(os.fsdecode(cwd), os.getcwd())
+
+
 # Tests creating TESTFN
 class FileTests(unittest.TestCase):
     def setUp(self):
diff --git a/Misc/NEWS.d/next/Library/2019-06-26-16-28-59.bpo-37412.lx0VjC.rst b/Misc/NEWS.d/next/Library/2019-06-26-16-28-59.bpo-37412.lx0VjC.rst
new file mode 100644 (file)
index 0000000..3ce4102
--- /dev/null
@@ -0,0 +1,3 @@
+The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
+rather than the ANSI code page: see :pep:`529` for the rationale. The function
+is no longer deprecated on Windows.
index c68bab81c7821049ffc47a425e8b5a35c568586c..2d68c9dbb478b38e08378341f1b5ccaa6383590e 100644 (file)
@@ -505,17 +505,6 @@ void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *,
                                             ULONG, struct _Py_stat_struct *);
 #endif
 
-#ifdef MS_WINDOWS
-static int
-win32_warn_bytes_api()
-{
-    return PyErr_WarnEx(PyExc_DeprecationWarning,
-        "The Windows bytes API has been deprecated, "
-        "use Unicode filenames instead",
-        1);
-}
-#endif
-
 
 #ifndef MS_WINDOWS
 PyObject *
@@ -3333,83 +3322,99 @@ os_lchown_impl(PyObject *module, path_t *path, uid_t uid, gid_t gid)
 static PyObject *
 posix_getcwd(int use_bytes)
 {
-    char *buf, *tmpbuf;
-    char *cwd;
-    const size_t chunk = 1024;
-    size_t buflen = 0;
-    PyObject *obj;
-
 #ifdef MS_WINDOWS
-    if (!use_bytes) {
-        wchar_t wbuf[MAXPATHLEN];
-        wchar_t *wbuf2 = wbuf;
-        PyObject *resobj;
-        DWORD len;
-        Py_BEGIN_ALLOW_THREADS
-        len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
-        /* If the buffer is large enough, len does not include the
-           terminating \0. If the buffer is too small, len includes
-           the space needed for the terminator. */
-        if (len >= Py_ARRAY_LENGTH(wbuf)) {
+    wchar_t wbuf[MAXPATHLEN];
+    wchar_t *wbuf2 = wbuf;
+    DWORD len;
+
+    Py_BEGIN_ALLOW_THREADS
+    len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
+    /* If the buffer is large enough, len does not include the
+       terminating \0. If the buffer is too small, len includes
+       the space needed for the terminator. */
+    if (len >= Py_ARRAY_LENGTH(wbuf)) {
+        if (len >= PY_SSIZE_T_MAX / sizeof(wchar_t)) {
             wbuf2 = PyMem_RawMalloc(len * sizeof(wchar_t));
-            if (wbuf2)
-                len = GetCurrentDirectoryW(len, wbuf2);
         }
-        Py_END_ALLOW_THREADS
-        if (!wbuf2) {
-            PyErr_NoMemory();
-            return NULL;
+        else {
+            wbuf2 = NULL;
         }
-        if (!len) {
-            if (wbuf2 != wbuf)
-                PyMem_RawFree(wbuf2);
-            return PyErr_SetFromWindowsErr(0);
+        if (wbuf2) {
+            len = GetCurrentDirectoryW(len, wbuf2);
         }
-        resobj = PyUnicode_FromWideChar(wbuf2, len);
+    }
+    Py_END_ALLOW_THREADS
+
+    if (!wbuf2) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    if (!len) {
         if (wbuf2 != wbuf)
             PyMem_RawFree(wbuf2);
-        return resobj;
+        return PyErr_SetFromWindowsErr(0);
     }
 
-    if (win32_warn_bytes_api())
-        return NULL;
-#endif
+    PyObject *resobj = PyUnicode_FromWideChar(wbuf2, len);
+    if (wbuf2 != wbuf) {
+        PyMem_RawFree(wbuf2);
+    }
+
+    if (use_bytes) {
+        if (resobj == NULL) {
+            return NULL;
+        }
+        Py_SETREF(resobj, PyUnicode_EncodeFSDefault(resobj));
+    }
+
+    return resobj;
+#else
+    const size_t chunk = 1024;
+
+    char *buf = NULL;
+    char *cwd = NULL;
+    size_t buflen = 0;
 
-    buf = cwd = NULL;
     Py_BEGIN_ALLOW_THREADS
     do {
-        buflen += chunk;
-#ifdef MS_WINDOWS
-        if (buflen > INT_MAX) {
-            PyErr_NoMemory();
-            break;
+        char *newbuf;
+        if (buflen <= PY_SSIZE_T_MAX - chunk) {
+            buflen += chunk;
+            newbuf = PyMem_RawRealloc(buf, buflen);
         }
-#endif
-        tmpbuf = PyMem_RawRealloc(buf, buflen);
-        if (tmpbuf == NULL)
+        else {
+            newbuf = NULL;
+        }
+        if (newbuf == NULL) {
+            PyMem_RawFree(buf);
+            buf = NULL;
             break;
+        }
+        buf = newbuf;
 
-        buf = tmpbuf;
-#ifdef MS_WINDOWS
-        cwd = getcwd(buf, (int)buflen);
-#else
         cwd = getcwd(buf, buflen);
-#endif
     } while (cwd == NULL && errno == ERANGE);
     Py_END_ALLOW_THREADS
 
+    if (buf == NULL) {
+        return PyErr_NoMemory();
+    }
     if (cwd == NULL) {
         PyMem_RawFree(buf);
         return posix_error();
     }
 
-    if (use_bytes)
+    PyObject *obj;
+    if (use_bytes) {
         obj = PyBytes_FromStringAndSize(buf, strlen(buf));
-    else
+    }
+    else {
         obj = PyUnicode_DecodeFSDefault(buf);
+    }
     PyMem_RawFree(buf);
 
     return obj;
+#endif   /* !MS_WINDOWS */
 }