From: Richard Smith Date: Wed, 22 Jul 2015 02:08:40 +0000 (+0000) Subject: [modules] Stop performing PCM lookups for all identifiers when building with C++... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=eda30f12cd8d8f272bd01aa5f079b2814df288f9;p=clang [modules] Stop performing PCM lookups for all identifiers when building with C++ modules. Instead, serialize a list of interesting identifiers and mark those ones out of date on module import. Avoiding the identifier lookups here gives a 20-30% speedup in builds with large numbers of modules. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@242868 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Serialization/ASTBitCodes.h b/include/clang/Serialization/ASTBitCodes.h index 9836dd8460..f5f0e80758 100644 --- a/include/clang/Serialization/ASTBitCodes.h +++ b/include/clang/Serialization/ASTBitCodes.h @@ -543,7 +543,10 @@ namespace clang { /// macro definition. MACRO_OFFSET = 47, - // ID 48 used to be a table of macros. + /// \brief A list of "interesting" identifiers. Only used in C++ (where we + /// don't normally do lookups into the serialized identifier table). These + /// are eagerly deserialized. + INTERESTING_IDENTIFIERS = 48, /// \brief Record code for undefined but used functions and variables that /// need a definition in this TU. diff --git a/include/clang/Serialization/Module.h b/include/clang/Serialization/Module.h index dae1c4ac39..5ae9ec7d19 100644 --- a/include/clang/Serialization/Module.h +++ b/include/clang/Serialization/Module.h @@ -270,6 +270,10 @@ public: /// IdentifierHashTable. void *IdentifierLookupTable; + /// \brief Offsets of identifiers that we're going to preload within + /// IdentifierTableData. + std::vector PreloadIdentifierOffsets; + // === Macros === /// \brief The cursor to the start of the preprocessor block, which stores diff --git a/include/clang/Serialization/ModuleManager.h b/include/clang/Serialization/ModuleManager.h index ab39aefa94..abd5da5422 100644 --- a/include/clang/Serialization/ModuleManager.h +++ b/include/clang/Serialization/ModuleManager.h @@ -30,14 +30,19 @@ namespace serialization { /// \brief Manages the set of modules loaded by an AST reader. class ModuleManager { - /// \brief The chain of AST files. The first entry is the one named by the - /// user, the last one is the one that doesn't depend on anything further. + /// \brief The chain of AST files, in the order in which we started to load + /// them (this order isn't really useful for anything). SmallVector Chain; + /// \brief The chain of non-module PCH files. The first entry is the one named + /// by the user, the last one is the one that doesn't depend on anything + /// further. + SmallVector PCHChain; + // \brief The roots of the dependency DAG of AST files. This is used // to implement short-circuiting logic when running DFS over the dependencies. SmallVector Roots; - + /// \brief All loaded modules, indexed by name. llvm::DenseMap Modules; @@ -138,6 +143,11 @@ public: ModuleReverseIterator rbegin() { return Chain.rbegin(); } /// \brief Reverse iterator end-point to traverse all loaded modules. ModuleReverseIterator rend() { return Chain.rend(); } + + /// \brief A range covering the PCH and preamble module files loaded. + llvm::iterator_range pch_modules() const { + return llvm::make_range(PCHChain.begin(), PCHChain.end()); + } /// \brief Returns the primary module associated with the manager, that is, /// the first module loaded diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp index cc2d04bf7c..9ee6b819a8 100644 --- a/lib/Sema/Sema.cpp +++ b/lib/Sema/Sema.cpp @@ -154,6 +154,9 @@ void Sema::Initialize() { // will not be able to merge any duplicate __va_list_tag decls correctly. VAListTagName = PP.getIdentifierInfo("__va_list_tag"); + if (!TUScope) + return; + // Initialize predefined 128-bit integer types, if needed. if (Context.getTargetInfo().hasInt128Type()) { // If either of the 128-bit integer types are unavailable to name lookup, diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index cddbd62eb6..c6fe363d7b 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -2564,6 +2564,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { break; } + case INTERESTING_IDENTIFIERS: + F.PreloadIdentifierOffsets.assign(Record.begin(), Record.end()); + break; + case EAGERLY_DESERIALIZED_DECLS: // FIXME: Skip reading this record if our ASTConsumer doesn't care // about "interesting" decls (for instance, if we're building a module). @@ -3410,6 +3414,18 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName, // SourceManager. SourceMgr.getLoadedSLocEntryByID(Index); } + + // Preload all the pending interesting identifiers by marking them out of + // date. + for (auto Offset : F.PreloadIdentifierOffsets) { + const unsigned char *Data = reinterpret_cast( + F.IdentifierTableData + Offset); + + ASTIdentifierLookupTrait Trait(*this, F); + auto KeyDataLen = Trait.ReadKeyDataLength(Data); + auto Key = Trait.ReadKey(Data, KeyDataLen.first); + PP.getIdentifierTable().getOwn(Key).setOutOfDate(true); + } } // Setup the import locations and notify the module manager that we've @@ -3430,13 +3446,20 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName, M->ImportLoc.getRawEncoding()); } - // Mark all of the identifiers in the identifier table as being out of date, - // so that various accessors know to check the loaded modules when the - // identifier is used. - for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(), - IdEnd = PP.getIdentifierTable().end(); - Id != IdEnd; ++Id) - Id->second->setOutOfDate(true); + if (!Context.getLangOpts().CPlusPlus || + (Type != MK_ImplicitModule && Type != MK_ExplicitModule)) { + // Mark all of the identifiers in the identifier table as being out of date, + // so that various accessors know to check the loaded modules when the + // identifier is used. + // + // For C++ modules, we don't need information on many identifiers (just + // those that provide macros or are poisoned), so we mark all of + // the interesting ones via PreloadIdentifierOffsets. + for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(), + IdEnd = PP.getIdentifierTable().end(); + Id != IdEnd; ++Id) + Id->second->setOutOfDate(true); + } // Resolve any unresolved module exports. for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) { @@ -6827,19 +6850,32 @@ IdentifierInfo *ASTReader::get(StringRef Name) { // Note that we are loading an identifier. Deserializing AnIdentifier(this); - // If there is a global index, look there first to determine which modules - // provably do not have any results for this identifier. - GlobalModuleIndex::HitSet Hits; - GlobalModuleIndex::HitSet *HitsPtr = nullptr; - if (!loadGlobalIndex()) { - if (GlobalIndex->lookupIdentifier(Name, Hits)) { - HitsPtr = &Hits; - } - } IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0, NumIdentifierLookups, NumIdentifierLookupHits); - ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr); + + // We don't need to do identifier table lookups in C++ modules (we preload + // all interesting declarations, and don't need to use the scope for name + // lookups). Perform the lookup in PCH files, though, since we don't build + // a complete initial identifier table if we're carrying on from a PCH. + if (Context.getLangOpts().CPlusPlus) { + for (auto F : ModuleMgr.pch_modules()) + if (Visitor.visit(*F, &Visitor)) + break; + } else { + // If there is a global index, look there first to determine which modules + // provably do not have any results for this identifier. + GlobalModuleIndex::HitSet Hits; + GlobalModuleIndex::HitSet *HitsPtr = nullptr; + if (!loadGlobalIndex()) { + if (GlobalIndex->lookupIdentifier(Name, Hits)) { + HitsPtr = &Hits; + } + } + + ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr); + } + IdentifierInfo *II = Visitor.getIdentifierInfo(); markIdentifierUpToDate(II); return II; diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp index ba42cd3173..60c9aa4486 100644 --- a/lib/Serialization/ASTWriter.cpp +++ b/lib/Serialization/ASTWriter.cpp @@ -3104,6 +3104,7 @@ class ASTIdentifierTableTrait { IdentifierResolver &IdResolver; bool IsModule; bool NeedDecls; + ASTWriter::RecordData *InterestingIdentifierOffsets; /// \brief Determines whether this is an "interesting" identifier that needs a /// full IdentifierInfo structure written into the hash table. Notably, this @@ -3131,14 +3132,20 @@ public: typedef unsigned offset_type; ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP, - IdentifierResolver &IdResolver, bool IsModule) + IdentifierResolver &IdResolver, bool IsModule, + ASTWriter::RecordData *InterestingIdentifierOffsets) : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule), - NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus) {} + NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus), + InterestingIdentifierOffsets(InterestingIdentifierOffsets) {} static hash_value_type ComputeHash(const IdentifierInfo* II) { return llvm::HashString(II->getName()); } + bool isInterestingIdentifier(const IdentifierInfo *II) { + auto MacroOffset = Writer.getMacroDirectivesOffset(II); + return isInterestingIdentifier(II, MacroOffset); + } bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) { return isInterestingIdentifier(II, 0); } @@ -3178,6 +3185,12 @@ public: // Record the location of the key data. This is used when generating // the mapping from persistent IDs to strings. Writer.SetIdentifierOffset(II, Out.tell()); + + // Emit the offset of the key/data length information to the interesting + // identifiers table if necessary. + if (InterestingIdentifierOffsets && isInterestingIdentifier(II)) + InterestingIdentifierOffsets->push_back(Out.tell() - 4); + Out.write(II->getNameStart(), KeyLen); } @@ -3238,11 +3251,15 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, bool IsModule) { using namespace llvm; + RecordData InterestingIdents; + // Create and write out the blob that contains the identifier // strings. { llvm::OnDiskChainedHashTableGenerator Generator; - ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule); + ASTIdentifierTableTrait Trait( + *this, PP, IdResolver, IsModule, + (getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr); // Look for any identifiers that were named while processing the // headers, but are otherwise not needed. We add these to the hash @@ -3316,6 +3333,11 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, Record.push_back(FirstIdentID - NUM_PREDEF_IDENT_IDS); Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record, bytes(IdentifierOffsets)); + + // In C++, write the list of interesting identifiers (those that are + // defined as macros, poisoned, or similar unusual things). + if (!InterestingIdents.empty()) + Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents); } //===----------------------------------------------------------------------===// diff --git a/lib/Serialization/ModuleManager.cpp b/lib/Serialization/ModuleManager.cpp index a52abe69b8..254e8e69da 100644 --- a/lib/Serialization/ModuleManager.cpp +++ b/lib/Serialization/ModuleManager.cpp @@ -95,6 +95,8 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type, New->File = Entry; New->ImportLoc = ImportLoc; Chain.push_back(New); + if (!New->isModule()) + PCHChain.push_back(New); if (!ImportedBy) Roots.push_back(New); NewModule = true; @@ -159,6 +161,8 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type, Modules.erase(Entry); assert(Chain.back() == ModuleEntry); Chain.pop_back(); + if (!ModuleEntry->isModule()) + PCHChain.pop_back(); if (Roots.back() == ModuleEntry) Roots.pop_back(); else @@ -225,6 +229,15 @@ void ModuleManager::removeModules( // Remove the modules from the chain. Chain.erase(first, last); + + // Also remove them from the PCH chain. + for (auto I = first; I != last; ++I) { + if (!(*I)->isModule()) { + PCHChain.erase(std::find(PCHChain.begin(), PCHChain.end(), *I), + PCHChain.end()); + break; + } + } } void