From 188bdcd1aaf5e9f457cec6851707d7dc3e7bbb15 Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Fri, 25 Jan 2013 23:32:03 +0000 Subject: [PATCH] Improve coordination between the module manager and the global module index, optimizing the operation that skips lookup in modules where we know the identifier will not be found. This makes the global module index optimization actually useful, providing an 8.5% speedup over modules without the global module index for -fsyntax-only. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173529 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../clang/Serialization/GlobalModuleIndex.h | 26 ++------- include/clang/Serialization/ModuleManager.h | 34 ++++++++++- lib/Serialization/ASTReader.cpp | 34 +++++------ lib/Serialization/GlobalModuleIndex.cpp | 31 ++-------- lib/Serialization/ModuleManager.cpp | 56 +++++++++++++++++-- 5 files changed, 109 insertions(+), 72 deletions(-) diff --git a/include/clang/Serialization/GlobalModuleIndex.h b/include/clang/Serialization/GlobalModuleIndex.h index 703025ea2e..38ccb26970 100644 --- a/include/clang/Serialization/GlobalModuleIndex.h +++ b/include/clang/Serialization/GlobalModuleIndex.h @@ -93,9 +93,6 @@ class GlobalModuleIndex { /// identifier. unsigned NumIdentifierLookupHits; - /// \brief The number of modules provided via skip sets. - unsigned NumIdentifierModulesSkipped; - /// \brief Internal constructor. Use \c readIndex() to read an index. explicit GlobalModuleIndex(FileManager &FileMgr, llvm::MemoryBuffer *Buffer, llvm::BitstreamCursor Cursor); @@ -142,31 +139,20 @@ public: void getModuleDependencies(const FileEntry *ModuleFile, SmallVectorImpl &Dependencies); + /// \brief A set of module files in which we found a result. + typedef llvm::SmallPtrSet HitSet; + /// \brief Look for all of the module files with a namespace-scope binding /// for the given identifier, e.g., a global function, variable, or type with /// that name, or declare a method with the selector. /// /// \param Name The identifier to look for. /// - /// \param ModuleFiles Will be populated with the list of module + /// \param Hits Will be populated with the set of module /// files that declare entities with the given name. /// - /// \returns true if any module files were found, false otherwise. - bool lookupIdentifier(StringRef Name, - SmallVectorImpl &ModuleFiles); - - /// \brief A set of module files into which name lookup can be skipped, - /// because they are known not to contain any bindings for the given name. - typedef llvm::SmallPtrSet SkipSet; - - /// \brief Compute the "skip set", meaning those known modules that do not - /// have some particular property. - /// - /// \param ModuleFiles The set of module files that has some property. - /// - /// \returns The set of known modules that do not have the property exhibited - /// by the files in \p ModuleFiles. - SkipSet computeSkipSet(const SmallVectorImpl &ModuleFiles); + /// \returns true if the identifier is known to the index, false otherwise. + bool lookupIdentifier(StringRef Name, HitSet &Hits); /// \brief Print statistics to standard error. void printStats(); diff --git a/include/clang/Serialization/ModuleManager.h b/include/clang/Serialization/ModuleManager.h index 465da76787..05d702664e 100644 --- a/include/clang/Serialization/ModuleManager.h +++ b/include/clang/Serialization/ModuleManager.h @@ -21,6 +21,8 @@ namespace clang { +class GlobalModuleIndex; + namespace serialization { /// \brief Manages the set of modules loaded by an AST reader. @@ -42,6 +44,25 @@ class ModuleManager { /// \brief The visitation order. SmallVector VisitOrder; + /// \brief The list of module files that both we and the global module index + /// know about. + /// + /// Either the global index or the module manager may have modules that the + /// other does not know about, because the global index can be out-of-date + /// (in which case the module manager could have modules it does not) and + /// this particular translation unit might not have loaded all of the modules + /// known to the global index. + SmallVector ModulesInCommonWithGlobalIndex; + + /// \brief The global module index, if one is attached. + /// + /// The global module index will actually be owned by the ASTReader; this is + /// just an non-owning pointer. + GlobalModuleIndex *GlobalIndex; + + /// \brief Update the set of modules files we know about known to the global index. + void updateModulesInCommonWithGlobalIndex(); + public: typedef SmallVector::iterator ModuleIterator; typedef SmallVector::const_iterator ModuleConstIterator; @@ -117,7 +138,10 @@ public: /// \brief Add an in-memory buffer the list of known buffers void addInMemoryBuffer(StringRef FileName, llvm::MemoryBuffer *Buffer); - + + /// \brief Set the global module index. + void setGlobalIndex(GlobalModuleIndex *Index); + /// \brief Visit each of the modules. /// /// This routine visits each of the modules, starting with the @@ -136,7 +160,13 @@ public: /// /// \param UserData User data associated with the visitor object, which /// will be passed along to the visitor. - void visit(bool (*Visitor)(ModuleFile &M, void *UserData), void *UserData); + /// + /// \param ModuleFilesHit If non-NULL, contains the set of module files + /// that we know we need to visit because the global module index told us to. + /// Any module that is known to both the global module index and the module + /// manager that is *not* in this set can be skipped. + void visit(bool (*Visitor)(ModuleFile &M, void *UserData), void *UserData, + llvm::SmallPtrSet *ModuleFilesHit = 0); /// \brief Visit each of the modules with a depth-first traversal. /// diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index d8d5e664f0..fd1b8966ee 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -1378,17 +1378,15 @@ namespace { class IdentifierLookupVisitor { StringRef Name; unsigned PriorGeneration; - GlobalModuleIndex::SkipSet &SkipSet; unsigned &NumIdentifierLookups; unsigned &NumIdentifierLookupHits; IdentifierInfo *Found; public: IdentifierLookupVisitor(StringRef Name, unsigned PriorGeneration, - GlobalModuleIndex::SkipSet &SkipSet, unsigned &NumIdentifierLookups, unsigned &NumIdentifierLookupHits) - : Name(Name), PriorGeneration(PriorGeneration), SkipSet(SkipSet), + : Name(Name), PriorGeneration(PriorGeneration), NumIdentifierLookups(NumIdentifierLookups), NumIdentifierLookupHits(NumIdentifierLookupHits), Found() @@ -1403,10 +1401,6 @@ namespace { if (M.Generation <= This->PriorGeneration) return true; - // If this module file is in the skip set, don't bother looking in it. - if (This->SkipSet.count(M.File)) - return false; - ASTIdentifierLookupTable *IdTable = (ASTIdentifierLookupTable *)M.IdentifierLookupTable; if (!IdTable) @@ -1443,18 +1437,18 @@ void ASTReader::updateOutOfDateIdentifier(IdentifierInfo &II) { // If there is a global index, look there first to determine which modules // provably do not have any results for this identifier. - GlobalModuleIndex::SkipSet SkipSet; + GlobalModuleIndex::HitSet Hits; + GlobalModuleIndex::HitSet *HitsPtr = 0; if (!loadGlobalIndex()) { - SmallVector ModuleFiles; - if (GlobalIndex->lookupIdentifier(II.getName(), ModuleFiles)) { - SkipSet = GlobalIndex->computeSkipSet(ModuleFiles); + if (GlobalIndex->lookupIdentifier(II.getName(), Hits)) { + HitsPtr = &Hits; } } - IdentifierLookupVisitor Visitor(II.getName(), PriorGeneration, SkipSet, + IdentifierLookupVisitor Visitor(II.getName(), PriorGeneration, NumIdentifierLookups, NumIdentifierLookupHits); - ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor); + ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr); markIdentifierUpToDate(&II); } @@ -2695,6 +2689,7 @@ bool ASTReader::loadGlobalIndex() { return true; GlobalIndex.reset(Result.first); + ModuleMgr.setGlobalIndex(GlobalIndex.get()); return false; } @@ -2725,6 +2720,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName, // If we find that any modules are unusable, the global index is going // to be out-of-date. Just remove it. GlobalIndex.reset(); + ModuleMgr.setGlobalIndex(0); return ReadResult; case Success: @@ -5760,17 +5756,17 @@ IdentifierInfo* ASTReader::get(const char *NameStart, const char *NameEnd) { // If there is a global index, look there first to determine which modules // provably do not have any results for this identifier. - GlobalModuleIndex::SkipSet SkipSet; + GlobalModuleIndex::HitSet Hits; + GlobalModuleIndex::HitSet *HitsPtr = 0; if (!loadGlobalIndex()) { - SmallVector ModuleFiles; - if (GlobalIndex->lookupIdentifier(Name, ModuleFiles)) { - SkipSet = GlobalIndex->computeSkipSet(ModuleFiles); + if (GlobalIndex->lookupIdentifier(Name, Hits)) { + HitsPtr = &Hits; } } - IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0, SkipSet, + IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0, NumIdentifierLookups, NumIdentifierLookupHits); - ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor); + ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr); IdentifierInfo *II = Visitor.getIdentifierInfo(); markIdentifierUpToDate(II); return II; diff --git a/lib/Serialization/GlobalModuleIndex.cpp b/lib/Serialization/GlobalModuleIndex.cpp index b192398280..b778a72ba2 100644 --- a/lib/Serialization/GlobalModuleIndex.cpp +++ b/lib/Serialization/GlobalModuleIndex.cpp @@ -127,7 +127,8 @@ struct LoadedModuleInfo { GlobalModuleIndex::GlobalModuleIndex(FileManager &FileMgr, llvm::MemoryBuffer *Buffer, llvm::BitstreamCursor Cursor) - : Buffer(Buffer), IdentifierIndex() + : Buffer(Buffer), IdentifierIndex(), + NumIdentifierLookups(), NumIdentifierLookupHits() { typedef llvm::DenseMap LoadedModulesMap; LoadedModulesMap LoadedModules; @@ -368,10 +369,8 @@ void GlobalModuleIndex::getModuleDependencies( Dependencies = Modules[Known->second].Dependencies; } -bool GlobalModuleIndex::lookupIdentifier( - StringRef Name, - SmallVectorImpl &ModuleFiles) { - ModuleFiles.clear(); +bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) { + Hits.clear(); // If there's no identifier index, there is nothing we can do. if (!IdentifierIndex) @@ -392,29 +391,13 @@ bool GlobalModuleIndex::lookupIdentifier( if (ID >= Modules.size() || !Modules[ID].File) continue; - ModuleFiles.push_back(Modules[ID].File); + Hits.insert(Modules[ID].File); } ++NumIdentifierLookupHits; return true; } -GlobalModuleIndex::SkipSet -GlobalModuleIndex::computeSkipSet( - const SmallVectorImpl &ModuleFiles) { - llvm::SmallPtrSet Found(ModuleFiles.begin(), - ModuleFiles.end()); - - SkipSet Result; - for (unsigned I = 0, N = Modules.size(); I != N; ++I) { - if (Modules[I].File && !Found.count(Modules[I].File)) - Result.insert(Modules[I].File); - } - - NumIdentifierModulesSkipped += Result.size(); - return Result; -} - void GlobalModuleIndex::printStats() { std::fprintf(stderr, "*** Global Module Index Statistics:\n"); if (NumIdentifierLookups) { @@ -422,10 +405,6 @@ void GlobalModuleIndex::printStats() { NumIdentifierLookupHits, NumIdentifierLookups, (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups); } - if (NumIdentifierLookups && NumIdentifierModulesSkipped) { - fprintf(stderr, " %f modules skipped per lookup (on average)\n", - (double)NumIdentifierModulesSkipped/NumIdentifierLookups); - } std::fprintf(stderr, "\n"); } diff --git a/lib/Serialization/ModuleManager.cpp b/lib/Serialization/ModuleManager.cpp index f3fe2b96d5..97c86a7bb8 100644 --- a/lib/Serialization/ModuleManager.cpp +++ b/lib/Serialization/ModuleManager.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// #include "clang/Serialization/ModuleManager.h" +#include "clang/Serialization/GlobalModuleIndex.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" @@ -140,17 +141,45 @@ void ModuleManager::addInMemoryBuffer(StringRef FileName, InMemoryBuffers[Entry] = Buffer; } -ModuleManager::ModuleManager(FileManager &FileMgr) : FileMgr(FileMgr) { } +void ModuleManager::updateModulesInCommonWithGlobalIndex() { + ModulesInCommonWithGlobalIndex.clear(); + + if (!GlobalIndex) + return; + + // Collect the set of modules known to the global index. + SmallVector KnownModules; + GlobalIndex->getKnownModules(KnownModules); + + // Map those modules to AST files known to the module manager. + for (unsigned I = 0, N = KnownModules.size(); I != N; ++I) { + llvm::DenseMap::iterator Known + = Modules.find(KnownModules[I]); + if (Known == Modules.end()) + continue; + + ModulesInCommonWithGlobalIndex.push_back(Known->second); + } +} + +void ModuleManager::setGlobalIndex(GlobalModuleIndex *Index) { + GlobalIndex = Index; + updateModulesInCommonWithGlobalIndex(); +} + +ModuleManager::ModuleManager(FileManager &FileMgr) + : FileMgr(FileMgr), GlobalIndex() { } ModuleManager::~ModuleManager() { for (unsigned i = 0, e = Chain.size(); i != e; ++i) delete Chain[e - i - 1]; } -void ModuleManager::visit(bool (*Visitor)(ModuleFile &M, void *UserData), - void *UserData) { - // If the visitation number array is the wrong size, resize it and recompute - // an order. +void +ModuleManager::visit(bool (*Visitor)(ModuleFile &M, void *UserData), + void *UserData, + llvm::SmallPtrSet *ModuleFilesHit) { + // If the visitation order vector is the wrong size, recompute the order. if (VisitOrder.size() != Chain.size()) { unsigned N = size(); VisitOrder.clear(); @@ -196,11 +225,28 @@ void ModuleManager::visit(bool (*Visitor)(ModuleFile &M, void *UserData), } assert(VisitOrder.size() == N && "Visitation order is wrong?"); + + // We may need to update the set of modules we have in common with the + // global module index, since modules could have been added to the module + // manager since we loaded the global module index. + updateModulesInCommonWithGlobalIndex(); } SmallVector Stack; SmallVector Visited(size(), false); + // If the caller has provided us with a hit-set that came from the global + // module index, mark every module file in common with the global module + // index that is *not* in that set as 'visited'. + if (ModuleFilesHit && !ModulesInCommonWithGlobalIndex.empty()) { + for (unsigned I = 0, N = ModulesInCommonWithGlobalIndex.size(); I != N; ++I) + { + ModuleFile *M = ModulesInCommonWithGlobalIndex[I]; + if (!ModuleFilesHit->count(M->File)) + Visited[M->Index] = true; + } + } + for (unsigned I = 0, N = VisitOrder.size(); I != N; ++I) { ModuleFile *CurrentModule = VisitOrder[I]; // Should we skip this module file? -- 2.40.0