PCH optimization for the identifier table, where we separate

author Douglas Gregor <dgregor@apple.com>

Sat, 25 Apr 2009 21:04:17 +0000 (21:04 +0000)

committer Douglas Gregor <dgregor@apple.com>

Sat, 25 Apr 2009 21:04:17 +0000 (21:04 +0000)
author Douglas Gregor <dgregor@apple.com>
Sat, 25 Apr 2009 21:04:17 +0000 (21:04 +0000)
committer Douglas Gregor <dgregor@apple.com>
Sat, 25 Apr 2009 21:04:17 +0000 (21:04 +0000)
diff --git a/lib/Frontend/PCHReader.cpp b/lib/Frontend/PCHReader.cpp

index 005436dccf2eacfdc619d5214e5606612310d707..6f1ff3297722dacd4c42ffe24919110d38f06b00 100644 (file)
--- a/lib/Frontend/PCHReader.cpp
+++ b/lib/Frontend/PCHReader.cpp
@@ -1245,8 +1245,8 @@ public:
    static std::pair<unsigned, unsigned>
    ReadKeyDataLength(const unsigned char*& d) {
      using namespace clang::io;
-    unsigned KeyLen = ReadUnalignedLE16(d);
      unsigned DataLen = ReadUnalignedLE16(d);
+    unsigned KeyLen = ReadUnalignedLE16(d);
      return std::make_pair(KeyLen, DataLen);
    }
      
@@ -2842,8 +2842,31 @@ IdentifierInfo *PCHReader::DecodeIdentifierInfo(unsigned ID) {
    
    if (!IdentifiersLoaded[ID - 1]) {
      uint32_t Offset = IdentifierOffsets[ID - 1];
-    IdentifiersLoaded[ID - 1] 
-      = &Context.Idents.get(IdentifierTableData + Offset);
+
+    // If there is an identifier lookup table, but the offset of this
+    // string is after the identifier table itself, then we know that
+    // this string is not in the on-disk hash table. Therefore,
+    // disable lookup into the hash table when looking for this
+    // identifier.
+    PCHIdentifierLookupTable *IdTable 
+      = (PCHIdentifierLookupTable *)IdentifierLookupTable;
+    bool SkipHashTable = IdTable &&
+      Offset >= uint32_t(IdTable->getBuckets() - IdTable->getBase());
+
+    if (SkipHashTable)
+      PP.getIdentifierTable().setExternalIdentifierLookup(0);
+
+    // All of the strings in the PCH file are preceded by a 16-bit
+    // length. Extract that 16-bit length to avoid having to run
+    // strlen().
+    const char *Str = IdentifierTableData + Offset;
+    const char *StrLenPtr = Str - 2;
+    unsigned StrLen = (((unsigned) StrLenPtr[0])
+                       | (((unsigned) StrLenPtr[1]) << 8)) - 1;
+    IdentifiersLoaded[ID - 1] = &Context.Idents.get(Str, Str + StrLen);
+
+    if (SkipHashTable)
+      PP.getIdentifierTable().setExternalIdentifierLookup(this);
    }
    
    return IdentifiersLoaded[ID - 1];
diff --git a/lib/Frontend/PCHWriter.cpp b/lib/Frontend/PCHWriter.cpp

index 4ac836419eb836d307fe84d5b96d0cfe72b840dd..bce941a8497b3e9bbd4cdec0cca767f7f38f1798 100644 (file)
--- a/lib/Frontend/PCHWriter.cpp
+++ b/lib/Frontend/PCHWriter.cpp
@@ -2013,7 +2013,6 @@ public:
      EmitKeyDataLength(llvm::raw_ostream& Out, const IdentifierInfo* II, 
                        pch::IdentID ID) {
      unsigned KeyLen = strlen(II->getName()) + 1;
-    clang::io::Emit16(Out, KeyLen);
      unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags
                                // 4 bytes for the persistent ID
      if (II->hasMacroDefinition() && 
@@ -2023,7 +2022,13 @@ public:
                                     DEnd = IdentifierResolver::end();
           D != DEnd; ++D)
        DataLen += sizeof(pch::DeclID);
+    // We emit the key length after the data length so that the
+    // "uninteresting" identifiers following the identifier hash table
+    // structure will have the same (key length, key characters)
+    // layout as the keys in the hash table. This also matches the
+    // format for identifiers in pretokenized headers.
      clang::io::Emit16(Out, DataLen);
+    clang::io::Emit16(Out, KeyLen);
      return std::make_pair(KeyLen, DataLen);
    }
    
@@ -2083,12 +2088,33 @@ void PCHWriter::WriteIdentifierTable(Preprocessor &PP) {
    {
      OnDiskChainedHashTableGenerator<PCHIdentifierTableTrait> Generator;
      
+    llvm::SmallVector<const IdentifierInfo *, 32> UninterestingIdentifiers;
+
      // Create the on-disk hash table representation.
      for (llvm::DenseMap<const IdentifierInfo *, pch::IdentID>::iterator
             ID = IdentifierIDs.begin(), IDEnd = IdentifierIDs.end();
           ID != IDEnd; ++ID) {
        assert(ID->first && "NULL identifier in identifier table");
-      Generator.insert(ID->first, ID->second);
+
+      // Classify each identifier as either "interesting" or "not
+      // interesting". Interesting identifiers are those that have
+      // additional information that needs to be read from the PCH
+      // file, e.g., a built-in ID, declaration chain, or macro
+      // definition. These identifiers are placed into the hash table
+      // so that they can be found when looked up in the user program.
+      // All other identifiers are "uninteresting", which means that
+      // the IdentifierInfo built by default has all of the
+      // information we care about. Such identifiers are placed after
+      // the hash table.
+      const IdentifierInfo *II = ID->first;
+      if (II->isPoisoned() ||
+          II->isExtensionToken() ||
+          II->hasMacroDefinition() ||
+          II->getObjCOrBuiltinID() ||
+          II->getFETokenInfo<void>())
+        Generator.insert(ID->first, ID->second);
+      else
+        UninterestingIdentifiers.push_back(II);
      }
  
      // Create the on-disk hash table in a buffer.
@@ -2100,6 +2126,14 @@ void PCHWriter::WriteIdentifierTable(Preprocessor &PP) {
        // Make sure that no bucket is at offset 0
        clang::io::Emit32(Out, 0);
        BucketOffset = Generator.Emit(Out, Trait);
+      
+      for (unsigned I = 0, N = UninterestingIdentifiers.size(); I != N; ++I) {
+        const IdentifierInfo *II = UninterestingIdentifiers[I];
+        unsigned N = II->getLength() + 1;
+        clang::io::Emit16(Out, N);
+        SetIdentifierOffset(II, Out.tell());
+        Out.write(II->getName(), N);
+      }
      }
  
      // Create a blob abbreviation
author	Douglas Gregor <dgregor@apple.com>
	Sat, 25 Apr 2009 21:04:17 +0000 (21:04 +0000)
committer	Douglas Gregor <dgregor@apple.com>
	Sat, 25 Apr 2009 21:04:17 +0000 (21:04 +0000)
lib/Frontend/PCHReader.cpp		patch \| blob \| history
lib/Frontend/PCHWriter.cpp		patch \| blob \| history