[modules] Stop performing PCM lookups for all identifiers when building with C++...

author Richard Smith <richard-llvm@metafoo.co.uk>

Wed, 22 Jul 2015 02:08:40 +0000 (02:08 +0000)

committer Richard Smith <richard-llvm@metafoo.co.uk>

Wed, 22 Jul 2015 02:08:40 +0000 (02:08 +0000)
author Richard Smith <richard-llvm@metafoo.co.uk>
Wed, 22 Jul 2015 02:08:40 +0000 (02:08 +0000)
committer Richard Smith <richard-llvm@metafoo.co.uk>
Wed, 22 Jul 2015 02:08:40 +0000 (02:08 +0000)
diff --git a/include/clang/Serialization/ASTBitCodes.h b/include/clang/Serialization/ASTBitCodes.h

index 9836dd84607159f9a7e70fbcdd8f8863b3d88de2..f5f0e807584b3e2f0908282f0c37bdcf9a6e3c21 100644 (file)
--- a/include/clang/Serialization/ASTBitCodes.h
+++ b/include/clang/Serialization/ASTBitCodes.h
@@ -543,7 +543,10 @@ namespace clang {
        /// macro definition.
        MACRO_OFFSET = 47,
  
-      // ID 48 used to be a table of macros.
+      /// \brief A list of "interesting" identifiers. Only used in C++ (where we
+      /// don't normally do lookups into the serialized identifier table). These
+      /// are eagerly deserialized.
+      INTERESTING_IDENTIFIERS = 48,
  
        /// \brief Record code for undefined but used functions and variables that
        /// need a definition in this TU.
diff --git a/include/clang/Serialization/Module.h b/include/clang/Serialization/Module.h

index dae1c4ac3915f36505db3aff88e6f737430e7a3a..5ae9ec7d19a539d65312ad4a090beee76dc30b7c 100644 (file)
--- a/include/clang/Serialization/Module.h
+++ b/include/clang/Serialization/Module.h
@@ -270,6 +270,10 @@ public:
    /// IdentifierHashTable.
    void *IdentifierLookupTable;
  
+  /// \brief Offsets of identifiers that we're going to preload within
+  /// IdentifierTableData.
+  std::vector<unsigned> PreloadIdentifierOffsets;
+
    // === Macros ===
  
    /// \brief The cursor to the start of the preprocessor block, which stores
diff --git a/include/clang/Serialization/ModuleManager.h b/include/clang/Serialization/ModuleManager.h

index ab39aefa940b5c5f6af2e622efa461f3c59df17a..abd5da5422cc750ca42e8bc240f5d36dd2210a8c 100644 (file)
--- a/include/clang/Serialization/ModuleManager.h
+++ b/include/clang/Serialization/ModuleManager.h
@@ -30,14 +30,19 @@ namespace serialization {
  
  /// \brief Manages the set of modules loaded by an AST reader.
  class ModuleManager {
-  /// \brief The chain of AST files. The first entry is the one named by the
-  /// user, the last one is the one that doesn't depend on anything further.
+  /// \brief The chain of AST files, in the order in which we started to load
+  /// them (this order isn't really useful for anything).
    SmallVector<ModuleFile *, 2> Chain;
  
+  /// \brief The chain of non-module PCH files. The first entry is the one named
+  /// by the user, the last one is the one that doesn't depend on anything
+  /// further.
+  SmallVector<ModuleFile *, 2> PCHChain;
+
    // \brief The roots of the dependency DAG of AST files. This is used
    // to implement short-circuiting logic when running DFS over the dependencies.
    SmallVector<ModuleFile *, 2> Roots;
-  
+
    /// \brief All loaded modules, indexed by name.
    llvm::DenseMap<const FileEntry *, ModuleFile *> Modules;
  
@@ -138,6 +143,11 @@ public:
    ModuleReverseIterator rbegin() { return Chain.rbegin(); }
    /// \brief Reverse iterator end-point to traverse all loaded modules.
    ModuleReverseIterator rend() { return Chain.rend(); }
+
+  /// \brief A range covering the PCH and preamble module files loaded.
+  llvm::iterator_range<ModuleConstIterator> pch_modules() const {
+    return llvm::make_range(PCHChain.begin(), PCHChain.end());
+  }
    
    /// \brief Returns the primary module associated with the manager, that is,
    /// the first module loaded
diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp

index cc2d04bf7c4c49d0693b1c5d98ed5f7ebe40234a..9ee6b819a85b2d794cdccf0b4a2f75ff23a783ab 100644 (file)
--- a/lib/Sema/Sema.cpp
+++ b/lib/Sema/Sema.cpp
@@ -154,6 +154,9 @@ void Sema::Initialize() {
    // will not be able to merge any duplicate __va_list_tag decls correctly.
    VAListTagName = PP.getIdentifierInfo("__va_list_tag");
  
+  if (!TUScope)
+    return;
+
    // Initialize predefined 128-bit integer types, if needed.
    if (Context.getTargetInfo().hasInt128Type()) {
      // If either of the 128-bit integer types are unavailable to name lookup,
diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp

index cddbd62eb630b7b4dfdd52ae266a1d7054c0aa15..c6fe363d7b56079b386557e41de48dce848756d9 100644 (file)
--- a/lib/Serialization/ASTReader.cpp
+++ b/lib/Serialization/ASTReader.cpp
@@ -2564,6 +2564,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
        break;
      }
  
+    case INTERESTING_IDENTIFIERS:
+      F.PreloadIdentifierOffsets.assign(Record.begin(), Record.end());
+      break;
+
      case EAGERLY_DESERIALIZED_DECLS:
        // FIXME: Skip reading this record if our ASTConsumer doesn't care
        // about "interesting" decls (for instance, if we're building a module).
@@ -3410,6 +3414,18 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
        // SourceManager.
        SourceMgr.getLoadedSLocEntryByID(Index);
      }
+
+    // Preload all the pending interesting identifiers by marking them out of
+    // date.
+    for (auto Offset : F.PreloadIdentifierOffsets) {
+      const unsigned char *Data = reinterpret_cast<const unsigned char *>(
+          F.IdentifierTableData + Offset);
+
+      ASTIdentifierLookupTrait Trait(*this, F);
+      auto KeyDataLen = Trait.ReadKeyDataLength(Data);
+      auto Key = Trait.ReadKey(Data, KeyDataLen.first);
+      PP.getIdentifierTable().getOwn(Key).setOutOfDate(true);
+    }
    }
  
    // Setup the import locations and notify the module manager that we've
@@ -3430,13 +3446,20 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
                                         M->ImportLoc.getRawEncoding());
    }
  
-  // Mark all of the identifiers in the identifier table as being out of date,
-  // so that various accessors know to check the loaded modules when the
-  // identifier is used.
-  for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
-                              IdEnd = PP.getIdentifierTable().end();
-       Id != IdEnd; ++Id)
-    Id->second->setOutOfDate(true);
+  if (!Context.getLangOpts().CPlusPlus ||
+      (Type != MK_ImplicitModule && Type != MK_ExplicitModule)) {
+    // Mark all of the identifiers in the identifier table as being out of date,
+    // so that various accessors know to check the loaded modules when the
+    // identifier is used.
+    //
+    // For C++ modules, we don't need information on many identifiers (just
+    // those that provide macros or are poisoned), so we mark all of
+    // the interesting ones via PreloadIdentifierOffsets.
+    for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
+                                IdEnd = PP.getIdentifierTable().end();
+         Id != IdEnd; ++Id)
+      Id->second->setOutOfDate(true);
+  }
    
    // Resolve any unresolved module exports.
    for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
@@ -6827,19 +6850,32 @@ IdentifierInfo *ASTReader::get(StringRef Name) {
    // Note that we are loading an identifier.
    Deserializing AnIdentifier(this);
  
-  // If there is a global index, look there first to determine which modules
-  // provably do not have any results for this identifier.
-  GlobalModuleIndex::HitSet Hits;
-  GlobalModuleIndex::HitSet *HitsPtr = nullptr;
-  if (!loadGlobalIndex()) {
-    if (GlobalIndex->lookupIdentifier(Name, Hits)) {
-      HitsPtr = &Hits;
-    }
-  }
    IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0,
                                    NumIdentifierLookups,
                                    NumIdentifierLookupHits);
-  ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr);
+
+  // We don't need to do identifier table lookups in C++ modules (we preload
+  // all interesting declarations, and don't need to use the scope for name
+  // lookups). Perform the lookup in PCH files, though, since we don't build
+  // a complete initial identifier table if we're carrying on from a PCH.
+  if (Context.getLangOpts().CPlusPlus) {
+    for (auto F : ModuleMgr.pch_modules())
+      if (Visitor.visit(*F, &Visitor))
+        break;
+  } else {
+    // If there is a global index, look there first to determine which modules
+    // provably do not have any results for this identifier.
+    GlobalModuleIndex::HitSet Hits;
+    GlobalModuleIndex::HitSet *HitsPtr = nullptr;
+    if (!loadGlobalIndex()) {
+      if (GlobalIndex->lookupIdentifier(Name, Hits)) {
+        HitsPtr = &Hits;
+      }
+    }
+
+    ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr);
+  }
+
    IdentifierInfo *II = Visitor.getIdentifierInfo();
    markIdentifierUpToDate(II);
    return II;
diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp

index ba42cd3173dc31bb14ceb2e525081f24807fd523..60c9aa4486cc2e4d02c9fe108a4053b7c3a8f491 100644 (file)
--- a/lib/Serialization/ASTWriter.cpp
+++ b/lib/Serialization/ASTWriter.cpp
@@ -3104,6 +3104,7 @@ class ASTIdentifierTableTrait {
    IdentifierResolver &IdResolver;
    bool IsModule;
    bool NeedDecls;
+  ASTWriter::RecordData *InterestingIdentifierOffsets;
    
    /// \brief Determines whether this is an "interesting" identifier that needs a
    /// full IdentifierInfo structure written into the hash table. Notably, this
@@ -3131,14 +3132,20 @@ public:
    typedef unsigned offset_type;
  
    ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
-                          IdentifierResolver &IdResolver, bool IsModule)
+                          IdentifierResolver &IdResolver, bool IsModule,
+                          ASTWriter::RecordData *InterestingIdentifierOffsets)
        : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule),
-        NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus) {}
+        NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus),
+        InterestingIdentifierOffsets(InterestingIdentifierOffsets) {}
  
    static hash_value_type ComputeHash(const IdentifierInfo* II) {
      return llvm::HashString(II->getName());
    }
  
+  bool isInterestingIdentifier(const IdentifierInfo *II) {
+    auto MacroOffset = Writer.getMacroDirectivesOffset(II);
+    return isInterestingIdentifier(II, MacroOffset);
+  }
    bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
      return isInterestingIdentifier(II, 0);
    }
@@ -3178,6 +3185,12 @@ public:
      // Record the location of the key data.  This is used when generating
      // the mapping from persistent IDs to strings.
      Writer.SetIdentifierOffset(II, Out.tell());
+
+    // Emit the offset of the key/data length information to the interesting
+    // identifiers table if necessary.
+    if (InterestingIdentifierOffsets && isInterestingIdentifier(II))
+      InterestingIdentifierOffsets->push_back(Out.tell() - 4);
+
      Out.write(II->getNameStart(), KeyLen);
    }
  
@@ -3238,11 +3251,15 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
                                       bool IsModule) {
    using namespace llvm;
  
+  RecordData InterestingIdents;
+
    // Create and write out the blob that contains the identifier
    // strings.
    {
      llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
-    ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule);
+    ASTIdentifierTableTrait Trait(
+        *this, PP, IdResolver, IsModule,
+        (getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr);
  
      // Look for any identifiers that were named while processing the
      // headers, but are otherwise not needed. We add these to the hash
@@ -3316,6 +3333,11 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
    Record.push_back(FirstIdentID - NUM_PREDEF_IDENT_IDS);
    Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record,
                              bytes(IdentifierOffsets));
+
+  // In C++, write the list of interesting identifiers (those that are
+  // defined as macros, poisoned, or similar unusual things).
+  if (!InterestingIdents.empty())
+    Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents);
  }
  
  //===----------------------------------------------------------------------===//
diff --git a/lib/Serialization/ModuleManager.cpp b/lib/Serialization/ModuleManager.cpp

index a52abe69b81bb12dd211ec2a3c005b518e7e904e..254e8e69da8b068dbbd2ee92b6937fd437ae7b9a 100644 (file)
--- a/lib/Serialization/ModuleManager.cpp
+++ b/lib/Serialization/ModuleManager.cpp
@@ -95,6 +95,8 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
      New->File = Entry;
      New->ImportLoc = ImportLoc;
      Chain.push_back(New);
+    if (!New->isModule())
+      PCHChain.push_back(New);
      if (!ImportedBy)
        Roots.push_back(New);
      NewModule = true;
@@ -159,6 +161,8 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
          Modules.erase(Entry);
          assert(Chain.back() == ModuleEntry);
          Chain.pop_back();
+        if (!ModuleEntry->isModule())
+          PCHChain.pop_back();
          if (Roots.back() == ModuleEntry)
            Roots.pop_back();
          else
@@ -225,6 +229,15 @@ void ModuleManager::removeModules(
  
    // Remove the modules from the chain.
    Chain.erase(first, last);
+
+  // Also remove them from the PCH chain.
+  for (auto I = first; I != last; ++I) {
+    if (!(*I)->isModule()) {
+      PCHChain.erase(std::find(PCHChain.begin(), PCHChain.end(), *I),
+                     PCHChain.end());
+      break;
+    }
+  }
  }
  
  void
author	Richard Smith <richard-llvm@metafoo.co.uk>
	Wed, 22 Jul 2015 02:08:40 +0000 (02:08 +0000)
committer	Richard Smith <richard-llvm@metafoo.co.uk>
	Wed, 22 Jul 2015 02:08:40 +0000 (02:08 +0000)
include/clang/Serialization/ASTBitCodes.h		patch \| blob \| history
include/clang/Serialization/Module.h		patch \| blob \| history
include/clang/Serialization/ModuleManager.h		patch \| blob \| history
lib/Sema/Sema.cpp		patch \| blob \| history
lib/Serialization/ASTReader.cpp		patch \| blob \| history
lib/Serialization/ASTWriter.cpp		patch \| blob \| history
lib/Serialization/ModuleManager.cpp		patch \| blob \| history