From: Douglas Gregor Date: Sat, 26 Jan 2013 00:55:12 +0000 (+0000) Subject: Since we're stuck with realpath for the header <-> module mapping, X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=713b7c011869f177dc76e6df4f7f44b1bd073bb0;p=clang Since we're stuck with realpath for the header <-> module mapping, factor the realpath calls into FileManager::getCanonicalName() so we can cache the results of this epically slow operation. 5% speedup on my modules test, and realpath drops out of the profile. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173542 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/FileManager.h b/include/clang/Basic/FileManager.h index 5914e1608f..6d9e53b7eb 100644 --- a/include/clang/Basic/FileManager.h +++ b/include/clang/Basic/FileManager.h @@ -17,6 +17,7 @@ #include "clang/Basic/FileSystemOptions.h" #include "clang/Basic/LLVM.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -152,6 +153,12 @@ class FileManager : public RefCountedBase { /// \see SeenDirEntries llvm::StringMap SeenFileEntries; + /// \brief The canonical names of directories. + llvm::DenseMap CanonicalDirNames; + + /// \brief Storage for canonical names that we have computed. + llvm::BumpPtrAllocator CanonicalNameStorage; + /// \brief Each FileEntry we create is assigned a unique ID #. /// unsigned NextFileUID; @@ -257,6 +264,13 @@ public: static void modifyFileEntry(FileEntry *File, off_t Size, time_t ModificationTime); + /// \brief Retrieve the canonical name for a given directory. + /// + /// This is a very expensive operation, despite its results being cached, + /// and should only be used when the physical layout of the file system is + /// required, which is (almost) never. + StringRef getCanonicalName(const DirectoryEntry *Dir); + void PrintStats() const; }; diff --git a/lib/Basic/FileManager.cpp b/lib/Basic/FileManager.cpp index 3a7bdefdaf..19f170e25a 100644 --- a/lib/Basic/FileManager.cpp +++ b/lib/Basic/FileManager.cpp @@ -40,6 +40,11 @@ #define S_ISFIFO(x) (0) #endif #endif +#if defined(LLVM_ON_UNIX) +#if defined(__linux__) +#include +#endif +#endif using namespace clang; // FIXME: Enhance libsystem to support inode and other fields. @@ -620,6 +625,29 @@ void FileManager::modifyFileEntry(FileEntry *File, File->ModTime = ModificationTime; } +StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) { + // FIXME: use llvm::sys::fs::canonical() when it gets implemented +#ifdef LLVM_ON_UNIX + llvm::DenseMap::iterator Known + = CanonicalDirNames.find(Dir); + if (Known != CanonicalDirNames.end()) + return Known->second; + + StringRef CanonicalName(Dir->getName()); + char CanonicalNameBuf[PATH_MAX]; + if (realpath(Dir->getName(), CanonicalNameBuf)) { + unsigned Len = strlen(CanonicalNameBuf); + char *Mem = static_cast(CanonicalNameStorage.Allocate(Len, 1)); + memcpy(Mem, CanonicalNameBuf, Len); + CanonicalName = StringRef(Mem, Len); + } + + CanonicalDirNames.insert(std::make_pair(Dir, CanonicalName)); + return CanonicalName; +#else + return StringRef(Dir->getName()); +#endif +} void FileManager::PrintStats() const { llvm::errs() << "\n*** File Manager Stats:\n"; diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp index 14d44cc3da..6432925518 100644 --- a/lib/Lex/HeaderSearch.cpp +++ b/lib/Lex/HeaderSearch.cpp @@ -268,6 +268,10 @@ const FileEntry *DirectoryLookup::LookupFile( return Result; } +/// FIXME: HACK HACK HACK! +static llvm::DenseMap + TopFrameworkDirs; + /// \brief Given a framework directory, find the top-most framework directory. /// /// \param FileMgr The file manager to use for directory lookups. @@ -280,7 +284,6 @@ getTopFrameworkDir(FileManager &FileMgr, StringRef DirName, assert(llvm::sys::path::extension(DirName) == ".framework" && "Not a framework directory"); -#ifdef LLVM_ON_UNIX // Note: as an egregious but useful hack we use the real path here, because // frameworks moving between top-level frameworks to embedded frameworks tend // to be symlinked, and we base the logical structure of modules on the @@ -295,12 +298,8 @@ getTopFrameworkDir(FileManager &FileMgr, StringRef DirName, // // Similar issues occur when a top-level framework has moved into an // embedded framework. - char RealDirName[PATH_MAX]; - if (realpath(DirName.str().c_str(), RealDirName)) - DirName = RealDirName; -#endif - const DirectoryEntry *TopFrameworkDir = FileMgr.getDirectory(DirName); + DirName = FileMgr.getCanonicalName(TopFrameworkDir); do { // Get the parent directory name. DirName = llvm::sys::path::parent_path(DirName); diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp index 25e5bee9ce..7ad42bef15 100644 --- a/lib/Lex/ModuleMap.cpp +++ b/lib/Lex/ModuleMap.cpp @@ -163,20 +163,12 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) { const DirectoryEntry *Dir = File->getDir(); SmallVector SkippedDirs; -#ifdef LLVM_ON_UNIX + // Note: as an egregious but useful hack we use the real path here, because // frameworks moving from top-level frameworks to embedded frameworks tend // to be symlinked from the top-level location to the embedded location, // and we need to resolve lookups as if we had found the embedded location. - char RealDirName[PATH_MAX]; - StringRef DirName; - if (realpath(Dir->getName(), RealDirName)) - DirName = RealDirName; - else - DirName = Dir->getName(); -#else - StringRef DirName = Dir->getName(); -#endif + StringRef DirName = SourceMgr->getFileManager().getCanonicalName(Dir); // Keep walking up the directory hierarchy, looking for a directory with // an umbrella header. @@ -420,16 +412,13 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, // a framework module, do so. if (!Parent) { // Determine whether we're allowed to infer a module map. - StringRef FrameworkDirName = FrameworkDir->getName(); -#ifdef LLVM_ON_UNIX + // Note: as an egregious but useful hack we use the real path here, because // we might be looking at an embedded framework that symlinks out to a // top-level framework, and we need to infer as if we were naming the // top-level framework. - char RealFrameworkDirName[PATH_MAX]; - if (realpath(FrameworkDir->getName(), RealFrameworkDirName)) - FrameworkDirName = RealFrameworkDirName; -#endif + StringRef FrameworkDirName + = SourceMgr->getFileManager().getCanonicalName(FrameworkDir); bool canInfer = false; if (llvm::sys::path::has_parent_path(FrameworkDirName)) { @@ -527,29 +516,23 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, // check whether it is actually a subdirectory of the parent directory. // This will not be the case if the 'subframework' is actually a symlink // out to a top-level framework. -#ifdef LLVM_ON_UNIX - char RealSubframeworkDirName[PATH_MAX]; - if (realpath(Dir->path().c_str(), RealSubframeworkDirName)) { - StringRef SubframeworkDirName = RealSubframeworkDirName; - - bool FoundParent = false; - do { - // Get the parent directory name. - SubframeworkDirName - = llvm::sys::path::parent_path(SubframeworkDirName); - if (SubframeworkDirName.empty()) - break; - - if (FileMgr.getDirectory(SubframeworkDirName) == FrameworkDir) { - FoundParent = true; - break; - } - } while (true); + StringRef SubframeworkDirName = FileMgr.getCanonicalName(SubframeworkDir); + bool FoundParent = false; + do { + // Get the parent directory name. + SubframeworkDirName + = llvm::sys::path::parent_path(SubframeworkDirName); + if (SubframeworkDirName.empty()) + break; + + if (FileMgr.getDirectory(SubframeworkDirName) == FrameworkDir) { + FoundParent = true; + break; + } + } while (true); - if (!FoundParent) - continue; - } -#endif + if (!FoundParent) + continue; // FIXME: Do we want to warn about subframeworks without umbrella headers? SmallString<32> NameBuf;