From daa4acc2eb60a47d75fcc440312d5d800985e6cf Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 10 Oct 2017 22:19:46 +0000 Subject: [PATCH] Support: Have directory_iterator::status() return FindFirstFileEx/FindNextFile results on Windows. This allows clients to avoid an unnecessary fs::status() call on each directory entry. Because the information returned by FindFirstFileEx is a subset of the information returned by a regular status() call, I needed to extract a base class from file_status that contains only that information. On my machine, this reduces the time required to enumerate a ThinLTO cache directory containing 520k files from almost 4 minutes to less than 2 seconds. Differential Revision: https://reviews.llvm.org/D38716 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315378 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/FileSystem.h | 141 ++++++++++++++++++------------ lib/Support/CachePruning.cpp | 22 ++--- lib/Support/Path.cpp | 19 ++-- lib/Support/Unix/Path.inc | 20 +++-- lib/Support/Windows/Path.inc | 61 ++++++++----- unittests/Support/Path.cpp | 4 +- 6 files changed, 154 insertions(+), 113 deletions(-) diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index 09f2e34a488..03015a0ca3b 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -141,65 +141,48 @@ public: uint64_t getFile() const { return File; } }; -/// file_status - Represents the result of a call to stat and friends. It has -/// a platform-specific member to store the result. -class file_status -{ - friend bool equivalent(file_status A, file_status B); - +/// Represents the result of a call to directory_iterator::status(). This is a +/// subset of the information returned by a regular sys::fs::status() call, and +/// represents the information provided by Windows FileFirstFile/FindNextFile. +class basic_file_status { +protected: #if defined(LLVM_ON_UNIX) - dev_t fs_st_dev = 0; - nlink_t fs_st_nlinks = 0; - ino_t fs_st_ino = 0; time_t fs_st_atime = 0; time_t fs_st_mtime = 0; uid_t fs_st_uid = 0; gid_t fs_st_gid = 0; off_t fs_st_size = 0; #elif defined (LLVM_ON_WIN32) - uint32_t NumLinks = 0; uint32_t LastAccessedTimeHigh = 0; uint32_t LastAccessedTimeLow = 0; uint32_t LastWriteTimeHigh = 0; uint32_t LastWriteTimeLow = 0; - uint32_t VolumeSerialNumber = 0; uint32_t FileSizeHigh = 0; uint32_t FileSizeLow = 0; - uint32_t FileIndexHigh = 0; - uint32_t FileIndexLow = 0; #endif file_type Type = file_type::status_error; perms Perms = perms_not_known; public: - #if defined(LLVM_ON_UNIX) - file_status() = default; - - file_status(file_type Type) : Type(Type) {} - - file_status(file_type Type, perms Perms, dev_t Dev, nlink_t Links, ino_t Ino, - time_t ATime, time_t MTime, uid_t UID, gid_t GID, off_t Size) - : fs_st_dev(Dev), fs_st_nlinks(Links), fs_st_ino(Ino), fs_st_atime(ATime), - fs_st_mtime(MTime), fs_st_uid(UID), fs_st_gid(GID), fs_st_size(Size), - Type(Type), Perms(Perms) {} - #elif defined(LLVM_ON_WIN32) - file_status() = default; + basic_file_status() = default; - file_status(file_type Type) : Type(Type) {} + explicit basic_file_status(file_type Type) : Type(Type) {} - file_status(file_type Type, perms Perms, uint32_t LinkCount, - uint32_t LastAccessTimeHigh, uint32_t LastAccessTimeLow, - uint32_t LastWriteTimeHigh, uint32_t LastWriteTimeLow, - uint32_t VolumeSerialNumber, uint32_t FileSizeHigh, - uint32_t FileSizeLow, uint32_t FileIndexHigh, - uint32_t FileIndexLow) - : NumLinks(LinkCount), LastAccessedTimeHigh(LastAccessTimeHigh), + #if defined(LLVM_ON_UNIX) + basic_file_status(file_type Type, perms Perms, time_t ATime, time_t MTime, + uid_t UID, gid_t GID, off_t Size) + : fs_st_atime(ATime), fs_st_mtime(MTime), fs_st_uid(UID), fs_st_gid(GID), + fs_st_size(Size), Type(Type), Perms(Perms) {} +#elif defined(LLVM_ON_WIN32) + basic_file_status(file_type Type, perms Perms, uint32_t LastAccessTimeHigh, + uint32_t LastAccessTimeLow, uint32_t LastWriteTimeHigh, + uint32_t LastWriteTimeLow, uint32_t FileSizeHigh, + uint32_t FileSizeLow) + : LastAccessedTimeHigh(LastAccessTimeHigh), LastAccessedTimeLow(LastAccessTimeLow), LastWriteTimeHigh(LastWriteTimeHigh), - LastWriteTimeLow(LastWriteTimeLow), - VolumeSerialNumber(VolumeSerialNumber), FileSizeHigh(FileSizeHigh), - FileSizeLow(FileSizeLow), FileIndexHigh(FileIndexHigh), - FileIndexLow(FileIndexLow), Type(Type), Perms(Perms) {} + LastWriteTimeLow(LastWriteTimeLow), FileSizeHigh(FileSizeHigh), + FileSizeLow(FileSizeLow), Type(Type), Perms(Perms) {} #endif // getters @@ -207,8 +190,6 @@ public: perms permissions() const { return Perms; } TimePoint<> getLastAccessedTime() const; TimePoint<> getLastModificationTime() const; - UniqueID getUniqueID() const; - uint32_t getLinkCount() const; #if defined(LLVM_ON_UNIX) uint32_t getUser() const { return fs_st_uid; } @@ -233,6 +214,49 @@ public: void permissions(perms p) { Perms = p; } }; +/// Represents the result of a call to sys::fs::status(). +class file_status : public basic_file_status { + friend bool equivalent(file_status A, file_status B); + + #if defined(LLVM_ON_UNIX) + dev_t fs_st_dev = 0; + nlink_t fs_st_nlinks = 0; + ino_t fs_st_ino = 0; + #elif defined (LLVM_ON_WIN32) + uint32_t NumLinks = 0; + uint32_t VolumeSerialNumber = 0; + uint32_t FileIndexHigh = 0; + uint32_t FileIndexLow = 0; + #endif + +public: + file_status() = default; + + explicit file_status(file_type Type) : basic_file_status(Type) {} + + #if defined(LLVM_ON_UNIX) + file_status(file_type Type, perms Perms, dev_t Dev, nlink_t Links, ino_t Ino, + time_t ATime, time_t MTime, uid_t UID, gid_t GID, off_t Size) + : basic_file_status(Type, Perms, ATime, MTime, UID, GID, Size), + fs_st_dev(Dev), fs_st_nlinks(Links), fs_st_ino(Ino) {} + #elif defined(LLVM_ON_WIN32) + file_status(file_type Type, perms Perms, uint32_t LinkCount, + uint32_t LastAccessTimeHigh, uint32_t LastAccessTimeLow, + uint32_t LastWriteTimeHigh, uint32_t LastWriteTimeLow, + uint32_t VolumeSerialNumber, uint32_t FileSizeHigh, + uint32_t FileSizeLow, uint32_t FileIndexHigh, + uint32_t FileIndexLow) + : basic_file_status(Type, Perms, LastAccessTimeHigh, LastAccessTimeLow, + LastWriteTimeHigh, LastWriteTimeLow, FileSizeHigh, + FileSizeLow), + NumLinks(LinkCount), VolumeSerialNumber(VolumeSerialNumber), + FileIndexHigh(FileIndexHigh), FileIndexLow(FileIndexLow) {} + #endif + + UniqueID getUniqueID() const; + uint32_t getLinkCount() const; +}; + /// @} /// @name Physical Operators /// @{ @@ -383,10 +407,10 @@ ErrorOr md5_contents(const Twine &Path); /// @brief Does file exist? /// -/// @param status A file_status previously returned from stat. +/// @param status A basic_file_status previously returned from stat. /// @returns True if the file represented by status exists, false if it does /// not. -bool exists(file_status status); +bool exists(const basic_file_status &status); enum class AccessMode { Exist, Write, Execute }; @@ -485,9 +509,9 @@ file_type get_file_type(const Twine &Path, bool Follow = true); /// @brief Does status represent a directory? /// -/// @param status A file_status previously returned from status. +/// @param status A basic_file_status previously returned from status. /// @returns status.type() == file_type::directory_file. -bool is_directory(file_status status); +bool is_directory(const basic_file_status &status); /// @brief Is path a directory? /// @@ -507,9 +531,9 @@ inline bool is_directory(const Twine &Path) { /// @brief Does status represent a regular file? /// -/// @param status A file_status previously returned from status. +/// @param status A basic_file_status previously returned from status. /// @returns status_known(status) && status.type() == file_type::regular_file. -bool is_regular_file(file_status status); +bool is_regular_file(const basic_file_status &status); /// @brief Is path a regular file? /// @@ -531,9 +555,9 @@ inline bool is_regular_file(const Twine &Path) { /// @brief Does status represent a symlink file? /// -/// @param status A file_status previously returned from status. +/// @param status A basic_file_status previously returned from status. /// @returns status_known(status) && status.type() == file_type::symlink_file. -bool is_symlink_file(file_status status); +bool is_symlink_file(const basic_file_status &status); /// @brief Is path a symlink file? /// @@ -556,9 +580,9 @@ inline bool is_symlink_file(const Twine &Path) { /// @brief Does this status represent something that exists but is not a /// directory or regular file? /// -/// @param status A file_status previously returned from status. +/// @param status A basic_file_status previously returned from status. /// @returns exists(s) && !is_regular_file(s) && !is_directory(s) -bool is_other(file_status status); +bool is_other(const basic_file_status &status); /// @brief Is path something that exists but is not a directory, /// regular file, or symlink? @@ -631,7 +655,7 @@ std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time); /// /// @param s Input file status. /// @returns True if status() != status_error. -bool status_known(file_status s); +bool status_known(const basic_file_status &s); /// @brief Is status available? /// @@ -793,24 +817,25 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr); class directory_entry { std::string Path; bool FollowSymlinks; - mutable file_status Status; + basic_file_status Status; public: explicit directory_entry(const Twine &path, bool follow_symlinks = true, - file_status st = file_status()) + basic_file_status st = basic_file_status()) : Path(path.str()), FollowSymlinks(follow_symlinks), Status(st) {} directory_entry() = default; - void assign(const Twine &path, file_status st = file_status()) { + void assign(const Twine &path, basic_file_status st = basic_file_status()) { Path = path.str(); Status = st; } - void replace_filename(const Twine &filename, file_status st = file_status()); + void replace_filename(const Twine &filename, + basic_file_status st = basic_file_status()); const std::string &path() const { return Path; } - std::error_code status(file_status &result) const; + ErrorOr status() const; bool operator==(const directory_entry& rhs) const { return Path == rhs.Path; } bool operator!=(const directory_entry& rhs) const { return !(*this == rhs); } @@ -929,9 +954,9 @@ public: if (State->HasNoPushRequest) State->HasNoPushRequest = false; else { - file_status st; - if ((ec = State->Stack.top()->status(st))) return *this; - if (is_directory(st)) { + ErrorOr st = State->Stack.top()->status(); + if (!st) return *this; + if (is_directory(*st)) { State->Stack.push(directory_iterator(*State->Stack.top(), ec, Follow)); if (ec) return *this; if (State->Stack.top() != end_itr) { diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp index 60d0964f276..5a9580cf440 100644 --- a/lib/Support/CachePruning.cpp +++ b/lib/Support/CachePruning.cpp @@ -182,19 +182,9 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { bool ShouldComputeSize = (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0); - // Keep track of space + // Keep track of space. Needs to be kept ordered by size for determinism. std::set> FileSizes; uint64_t TotalSize = 0; - // Helper to add a path to the set of files to consider for size-based - // pruning, sorted by size. - auto AddToFileListForSizePruning = - [&](StringRef Path) { - if (!ShouldComputeSize) - return; - TotalSize += FileStatus.getSize(); - FileSizes.insert( - std::make_pair(FileStatus.getSize(), std::string(Path))); - }; // Walk the entire directory cache, looking for unused files. std::error_code EC; @@ -212,13 +202,14 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { // Look at this file. If we can't stat it, there's nothing interesting // there. - if (sys::fs::status(File->path(), FileStatus)) { + ErrorOr StatusOrErr = File->status(); + if (!StatusOrErr) { DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n"); continue; } // If the file hasn't been used recently enough, delete it - const auto FileAccessTime = FileStatus.getLastAccessedTime(); + const auto FileAccessTime = StatusOrErr->getLastAccessedTime(); auto FileAge = CurrentTime - FileAccessTime; if (FileAge > Policy.Expiration) { DEBUG(dbgs() << "Remove " << File->path() << " (" @@ -228,7 +219,10 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { } // Leave it here for now, but add it to the list of size-based pruning. - AddToFileListForSizePruning(File->path()); + if (!ShouldComputeSize) + continue; + TotalSize += StatusOrErr->getSize(); + FileSizes.insert({StatusOrErr->getSize(), std::string(File->path())}); } // Prune for size now if needed diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index f30e8a8b0cb..9692acb5283 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -952,11 +952,11 @@ ErrorOr md5_contents(const Twine &Path) { return Result; } -bool exists(file_status status) { +bool exists(const basic_file_status &status) { return status_known(status) && status.type() != file_type::file_not_found; } -bool status_known(file_status s) { +bool status_known(const basic_file_status &s) { return s.type() != file_type::status_error; } @@ -967,7 +967,7 @@ file_type get_file_type(const Twine &Path, bool Follow) { return st.type(); } -bool is_directory(file_status status) { +bool is_directory(const basic_file_status &status) { return status.type() == file_type::directory_file; } @@ -979,7 +979,7 @@ std::error_code is_directory(const Twine &path, bool &result) { return std::error_code(); } -bool is_regular_file(file_status status) { +bool is_regular_file(const basic_file_status &status) { return status.type() == file_type::regular_file; } @@ -991,7 +991,7 @@ std::error_code is_regular_file(const Twine &path, bool &result) { return std::error_code(); } -bool is_symlink_file(file_status status) { +bool is_symlink_file(const basic_file_status &status) { return status.type() == file_type::symlink_file; } @@ -1003,7 +1003,7 @@ std::error_code is_symlink_file(const Twine &path, bool &result) { return std::error_code(); } -bool is_other(file_status status) { +bool is_other(const basic_file_status &status) { return exists(status) && !is_regular_file(status) && !is_directory(status); @@ -1017,17 +1017,14 @@ std::error_code is_other(const Twine &Path, bool &Result) { return std::error_code(); } -void directory_entry::replace_filename(const Twine &filename, file_status st) { +void directory_entry::replace_filename(const Twine &filename, + basic_file_status st) { SmallString<128> path = path::parent_path(Path); path::append(path, filename); Path = path.str(); Status = st; } -std::error_code directory_entry::status(file_status &result) const { - return fs::status(Path, result, FollowSymlinks); -} - ErrorOr getPermissions(const Twine &Path) { file_status Status; if (std::error_code EC = status(Path, Status)) diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index d0bb6a4fffb..781a911ed57 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -217,11 +217,11 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) { return ""; } -TimePoint<> file_status::getLastAccessedTime() const { +TimePoint<> basic_file_status::getLastAccessedTime() const { return toTimePoint(fs_st_atime); } -TimePoint<> file_status::getLastModificationTime() const { +TimePoint<> basic_file_status::getLastModificationTime() const { return toTimePoint(fs_st_mtime); } @@ -713,6 +713,13 @@ std::error_code detail::directory_iterator_increment(detail::DirIterState &it) { return std::error_code(); } +ErrorOr directory_entry::status() const { + file_status s; + if (auto EC = fs::status(Path, s, FollowSymlinks)) + return EC; + return s; +} + #if !defined(F_GETPATH) static bool hasProcSelfFD() { // If we have a /proc filesystem mounted, we can quickly establish the @@ -809,12 +816,11 @@ static std::error_code remove_directories_impl(const T &Entry, directory_iterator End; while (Begin != End) { auto &Item = *Begin; - file_status st; - EC = Item.status(st); - if (EC && !IgnoreErrors) - return EC; + ErrorOr st = Item.status(); + if (!st && !IgnoreErrors) + return st.getError(); - if (is_directory(st)) { + if (is_directory(*st)) { EC = remove_directories_impl(Item, IgnoreErrors); if (EC && !IgnoreErrors) return EC; diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index a81cd58cf4b..1ab354bf094 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -168,14 +168,14 @@ ErrorOr disk_space(const Twine &Path) { return SpaceInfo; } -TimePoint<> file_status::getLastAccessedTime() const { +TimePoint<> basic_file_status::getLastAccessedTime() const { FILETIME Time; Time.dwLowDateTime = LastAccessedTimeLow; Time.dwHighDateTime = LastAccessedTimeHigh; return toTimePoint(Time); } -TimePoint<> file_status::getLastModificationTime() const { +TimePoint<> basic_file_status::getLastModificationTime() const { FILETIME Time; Time.dwLowDateTime = LastWriteTimeLow; Time.dwHighDateTime = LastWriteTimeHigh; @@ -569,6 +569,15 @@ static bool isReservedName(StringRef path) { return false; } +static file_type file_type_from_attrs(DWORD Attrs) { + return (Attrs & FILE_ATTRIBUTE_DIRECTORY) ? file_type::directory_file + : file_type::regular_file; +} + +static perms perms_from_attrs(DWORD Attrs) { + return (Attrs & FILE_ATTRIBUTE_READONLY) ? (all_read | all_exe) : all_all; +} + static std::error_code getStatus(HANDLE FileHandle, file_status &Result) { if (FileHandle == INVALID_HANDLE_VALUE) goto handle_status_error; @@ -597,22 +606,14 @@ static std::error_code getStatus(HANDLE FileHandle, file_status &Result) { if (!::GetFileInformationByHandle(FileHandle, &Info)) goto handle_status_error; - { - file_type Type = (Info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - ? file_type::directory_file - : file_type::regular_file; - perms Permissions = (Info.dwFileAttributes & FILE_ATTRIBUTE_READONLY) - ? (all_read | all_exe) - : all_all; - Result = file_status( - Type, Permissions, Info.nNumberOfLinks, - Info.ftLastAccessTime.dwHighDateTime, - Info.ftLastAccessTime.dwLowDateTime, - Info.ftLastWriteTime.dwHighDateTime, Info.ftLastWriteTime.dwLowDateTime, - Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow, - Info.nFileIndexHigh, Info.nFileIndexLow); - return std::error_code(); - } + Result = file_status( + file_type_from_attrs(Info.dwFileAttributes), + perms_from_attrs(Info.dwFileAttributes), Info.nNumberOfLinks, + Info.ftLastAccessTime.dwHighDateTime, Info.ftLastAccessTime.dwLowDateTime, + Info.ftLastWriteTime.dwHighDateTime, Info.ftLastWriteTime.dwLowDateTime, + Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow, + Info.nFileIndexHigh, Info.nFileIndexLow); + return std::error_code(); handle_status_error: DWORD LastError = ::GetLastError(); @@ -798,6 +799,16 @@ int mapped_file_region::alignment() { return SysInfo.dwAllocationGranularity; } +static basic_file_status status_from_find_data(WIN32_FIND_DATA *FindData) { + return basic_file_status(file_type_from_attrs(FindData->dwFileAttributes), + perms_from_attrs(FindData->dwFileAttributes), + FindData->ftLastAccessTime.dwHighDateTime, + FindData->ftLastAccessTime.dwLowDateTime, + FindData->ftLastWriteTime.dwHighDateTime, + FindData->ftLastWriteTime.dwLowDateTime, + FindData->nFileSizeHigh, FindData->nFileSizeLow); +} + std::error_code detail::directory_iterator_construct(detail::DirIterState &it, StringRef path, bool follow_symlinks) { @@ -818,7 +829,9 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it, // Get the first directory entry. WIN32_FIND_DATAW FirstFind; - ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind)); + ScopedFindHandle FindHandle(::FindFirstFileExW( + c_str(path_utf16), FindExInfoBasic, &FirstFind, FindExSearchNameMatch, + NULL, FIND_FIRST_EX_LARGE_FETCH)); if (!FindHandle) return mapWindowsError(::GetLastError()); @@ -845,7 +858,8 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it, it.IterationHandle = intptr_t(FindHandle.take()); SmallString<128> directory_entry_path(path); path::append(directory_entry_path, directory_entry_name_utf8); - it.CurrentEntry = directory_entry(directory_entry_path, follow_symlinks); + it.CurrentEntry = directory_entry(directory_entry_path, follow_symlinks, + status_from_find_data(&FirstFind)); return std::error_code(); } @@ -881,10 +895,15 @@ std::error_code detail::directory_iterator_increment(detail::DirIterState &it) { directory_entry_path_utf8)) return ec; - it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8)); + it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8), + status_from_find_data(&FindData)); return std::error_code(); } +ErrorOr directory_entry::status() const { + return Status; +} + static std::error_code realPathFromHandle(HANDLE H, SmallVectorImpl &RealPath) { RealPath.clear(); diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp index 4de2e648259..a798928e4e5 100644 --- a/unittests/Support/Path.cpp +++ b/unittests/Support/Path.cpp @@ -869,8 +869,8 @@ TEST_F(FileSystemTest, BrokenSymlinkDirectoryIteration) { i != e; i.increment(ec)) { ASSERT_NO_ERROR(ec); - fs::file_status status; - if (i->status(status) == + ErrorOr status = i->status(); + if (status.getError() == std::make_error_code(std::errc::no_such_file_or_directory)) { i.no_push(); continue; -- 2.40.0