]> granicus.if.org Git - python/commitdiff
bpo-25996: Added support of file descriptors in os.scandir() on Unix. (#502)
authorSerhiy Storchaka <storchaka@gmail.com>
Thu, 30 Mar 2017 06:12:31 +0000 (09:12 +0300)
committerGitHub <noreply@github.com>
Thu, 30 Mar 2017 06:12:31 +0000 (09:12 +0300)
os.fwalk() is sped up by 2 times by using os.scandir().

Doc/library/os.rst
Doc/whatsnew/3.7.rst
Lib/os.py
Lib/test/test_os.py
Misc/NEWS
Modules/clinic/posixmodule.c.h
Modules/posixmodule.c

index 69dd5c45677345933dcca7469a24973afd3f930a..071d158f372ff614920552a7925af2358935eeb4 100644 (file)
@@ -2029,6 +2029,9 @@ features:
    attributes of each :class:`os.DirEntry` will be ``bytes``; in all other
    circumstances, they will be of type ``str``.
 
+   This function can also support :ref:`specifying a file descriptor
+   <path_fd>`; the file descriptor must refer to a directory.
+
    The :func:`scandir` iterator supports the :term:`context manager` protocol
    and has the following method:
 
@@ -2075,6 +2078,9 @@ features:
 
       The function accepts a :term:`path-like object`.
 
+   .. versionchanged:: 3.7
+      Added support for :ref:`file descriptors <path_fd>` on Unix.
+
 
 .. class:: DirEntry
 
@@ -2114,7 +2120,9 @@ features:
       The entry's full path name: equivalent to ``os.path.join(scandir_path,
       entry.name)`` where *scandir_path* is the :func:`scandir` *path*
       argument.  The path is only absolute if the :func:`scandir` *path*
-      argument was absolute.
+      argument was absolute.  If the :func:`scandir` *path*
+      argument was a :ref:`file descriptor <path_fd>`, the :attr:`path`
+      attribute is the same as the :attr:`name` attribute.
 
       The :attr:`path` attribute will be ``bytes`` if the :func:`scandir`
       *path* argument is of type ``bytes`` and ``str`` otherwise.  Use
index e303dfd4e652fca6a001fa453ca63ea55f6d5188..19e04bb19efc5e9ea3648c6e9991ccf065cf6cdb 100644 (file)
@@ -108,6 +108,9 @@ os
 Added support for :class:`bytes` paths in :func:`~os.fwalk`. (Contributed by
 Serhiy Storchaka in :issue:`28682`.)
 
+Added support for :ref:`file descriptors <path_fd>` in :func:`~os.scandir`
+on Unix.  (Contributed by Serhiy Storchaka in :issue:`25996`.)
+
 unittest.mock
 -------------
 
@@ -148,6 +151,10 @@ Optimizations
   :func:`~math.erfc` in the :mod:`math` module. (Contributed by Serhiy
   Storchaka in :issue:`26121`.)
 
+* The :func:`os.fwalk` function has been sped up by 2 times.  This was done
+  using the :func:`os.scandir` function.
+  (Contributed by Serhiy Storchaka in :issue:`25996`.)
+
 
 Build and C API Changes
 =======================
index 70857c7e7856204da2b844c3ebd700c08f4f8a9d..e293ecae7fd3a4ed40d5c4846ba147c22203be2f 100644 (file)
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -129,6 +129,7 @@ if _exists("_have_functions"):
     _add("HAVE_FCHMOD",     "chmod")
     _add("HAVE_FCHOWN",     "chown")
     _add("HAVE_FDOPENDIR",  "listdir")
+    _add("HAVE_FDOPENDIR",  "scandir")
     _add("HAVE_FEXECVE",    "execve")
     _set.add(stat) # fstat always works
     _add("HAVE_FTRUNCATE",  "truncate")
@@ -416,7 +417,7 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
 
 __all__.append("walk")
 
-if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
+if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
 
     def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None):
         """Directory tree generator.
@@ -455,7 +456,8 @@ if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
             top = fspath(top)
         # Note: To guard against symlink races, we use the standard
         # lstat()/open()/fstat() trick.
-        orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
+        if not follow_symlinks:
+            orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
         topfd = open(top, O_RDONLY, dir_fd=dir_fd)
         try:
             if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
@@ -470,35 +472,41 @@ if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
         # necessary, it can be adapted to only require O(1) FDs, see issue
         # #13734.
 
-        names = listdir(topfd)
-        if isbytes:
-            names = map(fsencode, names)
-        dirs, nondirs = [], []
-        for name in names:
+        scandir_it = scandir(topfd)
+        dirs = []
+        nondirs = []
+        entries = None if topdown or follow_symlinks else []
+        for entry in scandir_it:
+            name = entry.name
+            if isbytes:
+                name = fsencode(name)
             try:
-                # Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with
-                # walk() which reports symlinks to directories as directories.
-                # We do however check for symlinks before recursing into
-                # a subdirectory.
-                if st.S_ISDIR(stat(name, dir_fd=topfd).st_mode):
+                if entry.is_dir():
                     dirs.append(name)
+                    if entries is not None:
+                        entries.append(entry)
                 else:
                     nondirs.append(name)
             except OSError:
                 try:
                     # Add dangling symlinks, ignore disappeared files
-                    if st.S_ISLNK(stat(name, dir_fd=topfd, follow_symlinks=False)
-                                .st_mode):
+                    if entry.is_symlink():
                         nondirs.append(name)
                 except OSError:
-                    continue
+                    pass
 
         if topdown:
             yield toppath, dirs, nondirs, topfd
 
-        for name in dirs:
+        for name in dirs if entries is None else zip(dirs, entries):
             try:
-                orig_st = stat(name, dir_fd=topfd, follow_symlinks=follow_symlinks)
+                if not follow_symlinks:
+                    if topdown:
+                        orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
+                    else:
+                        assert entries is not None
+                        name, entry = name
+                        orig_st = entry.stat(follow_symlinks=False)
                 dirfd = open(name, O_RDONLY, dir_fd=topfd)
             except OSError as err:
                 if onerror is not None:
index 83932e64a0b94c35696d2ea1a198d51cd03a5e31..746b3f8be8e0bfc2d11fd8fd8ea8717dcf33514e 100644 (file)
@@ -3313,6 +3313,35 @@ class TestScandir(unittest.TestCase):
         self.assertEqual(entry.path,
                          os.fsencode(os.path.join(self.path, 'file.txt')))
 
+    @unittest.skipUnless(os.listdir in os.supports_fd,
+                         'fd support for listdir required for this test.')
+    def test_fd(self):
+        self.assertIn(os.scandir, os.supports_fd)
+        self.create_file('file.txt')
+        expected_names = ['file.txt']
+        if support.can_symlink():
+            os.symlink('file.txt', os.path.join(self.path, 'link'))
+            expected_names.append('link')
+
+        fd = os.open(self.path, os.O_RDONLY)
+        try:
+            with os.scandir(fd) as it:
+                entries = list(it)
+            names = [entry.name for entry in entries]
+            self.assertEqual(sorted(names), expected_names)
+            self.assertEqual(names, os.listdir(fd))
+            for entry in entries:
+                self.assertEqual(entry.path, entry.name)
+                self.assertEqual(os.fspath(entry), entry.name)
+                self.assertEqual(entry.is_symlink(), entry.name == 'link')
+                if os.stat in os.supports_dir_fd:
+                    st = os.stat(entry.name, dir_fd=fd)
+                    self.assertEqual(entry.stat(), st)
+                    st = os.stat(entry.name, dir_fd=fd, follow_symlinks=False)
+                    self.assertEqual(entry.stat(follow_symlinks=False), st)
+        finally:
+            os.close(fd)
+
     def test_empty_path(self):
         self.assertRaises(FileNotFoundError, os.scandir, '')
 
@@ -3328,7 +3357,7 @@ class TestScandir(unittest.TestCase):
         self.assertEqual(len(entries2), 0, entries2)
 
     def test_bad_path_type(self):
-        for obj in [1234, 1.234, {}, []]:
+        for obj in [1.234, {}, []]:
             self.assertRaises(TypeError, os.scandir, obj)
 
     def test_close(self):
index 28751b34f04009dee0f9d1143be59fc8c8502492..b22322f96c5247002080487e52b4221c4f94f6ba 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -294,6 +294,9 @@ Extension Modules
 Library
 -------
 
+- bpo-25996: Added support of file descriptors in os.scandir() on Unix.
+  os.fwalk() is sped up by 2 times by using os.scandir().
+
 - bpo-28699: Fixed a bug in pools in multiprocessing.pool that raising an
   exception at the very first of an iterable may swallow the exception or
   make the program hang. Patch by Davin Potts and Xiang Zhang.
index 39ac7fd54fd90ed4df55cbe15f3bed7f4b8bd7b7..6ef0293efd78ea57b8e9a459e424ebd067c972cc 100644 (file)
@@ -5926,7 +5926,7 @@ os_scandir(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwname
     PyObject *return_value = NULL;
     static const char * const _keywords[] = {"path", NULL};
     static _PyArg_Parser _parser = {"|O&:scandir", _keywords, 0};
-    path_t path = PATH_T_INITIALIZE("scandir", "path", 1, 0);
+    path_t path = PATH_T_INITIALIZE("scandir", "path", 1, PATH_HAVE_FDOPENDIR);
 
     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
         path_converter, &path)) {
@@ -6493,4 +6493,4 @@ exit:
 #ifndef OS_GETRANDOM_METHODDEF
     #define OS_GETRANDOM_METHODDEF
 #endif /* !defined(OS_GETRANDOM_METHODDEF) */
-/*[clinic end generated code: output=5a0be969e3f71660 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=5529857101c08b49 input=a9049054013a1b77]*/
index 0ae06eb53eaef9d86dab7bc33413c29b78f40591..c03fc15bf808c3c58585ce1020708fa9c8535a15 100644 (file)
@@ -11161,6 +11161,7 @@ typedef struct {
     unsigned char d_type;
 #endif
     ino_t d_ino;
+    int dir_fd;
 #endif
 } DirEntry;
 
@@ -11210,19 +11211,31 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks)
     PyObject *ub;
 
 #ifdef MS_WINDOWS
-    if (PyUnicode_FSDecoder(self->path, &ub)) {
-        const wchar_t *path = PyUnicode_AsUnicode(ub);
+    if (!PyUnicode_FSDecoder(self->path, &ub))
+        return NULL;
+    const wchar_t *path = PyUnicode_AsUnicode(ub);
 #else /* POSIX */
-    if (PyUnicode_FSConverter(self->path, &ub)) {
-        const char *path = PyBytes_AS_STRING(ub);
+    if (!PyUnicode_FSConverter(self->path, &ub))
+        return NULL;
+    const char *path = PyBytes_AS_STRING(ub);
+    if (self->dir_fd != DEFAULT_DIR_FD) {
+#ifdef HAVE_FSTATAT
+        result = fstatat(self->dir_fd, path, &st,
+                         follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW);
+#else
+        PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat");
+        return NULL;
+#endif /* HAVE_FSTATAT */
+    }
+    else
 #endif
+    {
         if (follow_symlinks)
             result = STAT(path, &st);
         else
             result = LSTAT(path, &st);
-        Py_DECREF(ub);
-    } else
-        return NULL;
+    }
+    Py_DECREF(ub);
 
     if (result != 0)
         return path_object_error(self->path);
@@ -11633,20 +11646,36 @@ DirEntry_from_posix_info(path_t *path, const char *name, Py_ssize_t name_len,
     entry->stat = NULL;
     entry->lstat = NULL;
 
-    joined_path = join_path_filename(path->narrow, name, name_len);
-    if (!joined_path)
-        goto error;
+    if (path->fd != -1) {
+        entry->dir_fd = path->fd;
+        joined_path = NULL;
+    }
+    else {
+        entry->dir_fd = DEFAULT_DIR_FD;
+        joined_path = join_path_filename(path->narrow, name, name_len);
+        if (!joined_path)
+            goto error;
+    }
 
     if (!path->narrow || !PyBytes_Check(path->object)) {
         entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len);
-        entry->path = PyUnicode_DecodeFSDefault(joined_path);
+        if (joined_path)
+            entry->path = PyUnicode_DecodeFSDefault(joined_path);
     }
     else {
         entry->name = PyBytes_FromStringAndSize(name, name_len);
-        entry->path = PyBytes_FromString(joined_path);
+        if (joined_path)
+            entry->path = PyBytes_FromString(joined_path);
     }
     PyMem_Free(joined_path);
-    if (!entry->name || !entry->path)
+    if (!entry->name)
+        goto error;
+
+    if (path->fd != -1) {
+        entry->path = entry->name;
+        Py_INCREF(entry->path);
+    }
+    else if (!entry->path)
         goto error;
 
 #ifdef HAVE_DIRENT_D_TYPE
@@ -11674,6 +11703,9 @@ typedef struct {
 #else /* POSIX */
     DIR *dirp;
 #endif
+#ifdef HAVE_FDOPENDIR
+    int fd;
+#endif
 } ScandirIterator;
 
 #ifdef MS_WINDOWS
@@ -11758,6 +11790,10 @@ ScandirIterator_closedir(ScandirIterator *iterator)
 
     iterator->dirp = NULL;
     Py_BEGIN_ALLOW_THREADS
+#ifdef HAVE_FDOPENDIR
+    if (iterator->path.fd != -1)
+        rewinddir(dirp);
+#endif
     closedir(dirp);
     Py_END_ALLOW_THREADS
     return;
@@ -11933,7 +11969,7 @@ static PyTypeObject ScandirIteratorType = {
 /*[clinic input]
 os.scandir
 
-    path : path_t(nullable=True) = None
+    path : path_t(nullable=True, allow_fd='PATH_HAVE_FDOPENDIR') = None
 
 Return an iterator of DirEntry objects for given path.
 
@@ -11946,13 +11982,16 @@ If path is None, uses the path='.'.
 
 static PyObject *
 os_scandir_impl(PyObject *module, path_t *path)
-/*[clinic end generated code: output=6eb2668b675ca89e input=e62b08b3cd41f604]*/
+/*[clinic end generated code: output=6eb2668b675ca89e input=b139dc1c57f60846]*/
 {
     ScandirIterator *iterator;
 #ifdef MS_WINDOWS
     wchar_t *path_strW;
 #else
     const char *path_str;
+#ifdef HAVE_FDOPENDIR
+    int fd = -1;
+#endif
 #endif
 
     iterator = PyObject_New(ScandirIterator, &ScandirIteratorType);
@@ -11988,18 +12027,40 @@ os_scandir_impl(PyObject *module, path_t *path)
         goto error;
     }
 #else /* POSIX */
-    if (iterator->path.narrow)
-        path_str = iterator->path.narrow;
+    errno = 0;
+#ifdef HAVE_FDOPENDIR
+    if (path->fd != -1) {
+        /* closedir() closes the FD, so we duplicate it */
+        fd = _Py_dup(path->fd);
+        if (fd == -1)
+            goto error;
+
+        Py_BEGIN_ALLOW_THREADS
+        iterator->dirp = fdopendir(fd);
+        Py_END_ALLOW_THREADS
+    }
     else
-        path_str = ".";
+#endif
+    {
+        if (iterator->path.narrow)
+            path_str = iterator->path.narrow;
+        else
+            path_str = ".";
 
-    errno = 0;
-    Py_BEGIN_ALLOW_THREADS
-    iterator->dirp = opendir(path_str);
-    Py_END_ALLOW_THREADS
+        Py_BEGIN_ALLOW_THREADS
+        iterator->dirp = opendir(path_str);
+        Py_END_ALLOW_THREADS
+    }
 
     if (!iterator->dirp) {
         path_error(&iterator->path);
+#ifdef HAVE_FDOPENDIR
+        if (fd != -1) {
+            Py_BEGIN_ALLOW_THREADS
+            close(fd);
+            Py_END_ALLOW_THREADS
+        }
+#endif
         goto error;
     }
 #endif