From: Douglas Gregor Date: Sat, 25 Apr 2009 19:10:14 +0000 (+0000) Subject: Write the identifier offsets array into the PCH file as a blob, so X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2b3a5a83ea30bde7fa8f9d8e9a0cb12623759bfb;p=clang Write the identifier offsets array into the PCH file as a blob, so that the PCH reader does not have to decode the VBR encoding at PCH load time. Also, reduce the size of the identifier offsets from 64 bits down to 32 bits. The identifier table itself isn't going to grow to more than 4GB :) Overall, this results in a 13% speedup in the Cocoa-prefixed "Hello, World" benchmark. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@70063 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Frontend/PCHReader.h b/include/clang/Frontend/PCHReader.h index fe23185bed..0c8520ceb4 100644 --- a/include/clang/Frontend/PCHReader.h +++ b/include/clang/Frontend/PCHReader.h @@ -121,21 +121,28 @@ private: /// \brief Actual data for the on-disk hash table. /// - /// FIXME: This will eventually go away. + // This pointer points into a memory buffer, where the on-disk hash + // table for identifiers actually lives. const char *IdentifierTableData; /// \brief A pointer to an on-disk hash table of opaque type /// IdentifierHashTable. void *IdentifierLookupTable; - /// \brief String data for identifiers, indexed by the identifier ID - /// minus one. + /// \brief Offsets into the identifier table data. /// - /// Each element in this array is either an offset into - /// IdentifierTable that contains the string data (if the lowest bit - /// is set, in which case the offset is shifted left by one) or is - /// an IdentifierInfo* that has already been resolved. - llvm::SmallVector IdentifierData; + /// This array is indexed by the identifier ID (-1), and provides + /// the offset into IdentifierTableData where the string data is + /// stored. + const uint32_t *IdentifierOffsets; + + /// \brief A vector containing identifiers that have already been + /// loaded. + /// + /// If the pointer at index I is non-NULL, then it refers to the + /// IdentifierInfo for the identifier with ID=I+1 that has already + /// been loaded. + std::vector IdentifiersLoaded; /// \brief A pointer to an on-disk hash table of opaque type /// PCHMethodPoolLookupTable. @@ -256,6 +263,7 @@ public: explicit PCHReader(Preprocessor &PP, ASTContext &Context) : SemaObj(0), PP(PP), Context(Context), Consumer(0), IdentifierTableData(0), IdentifierLookupTable(0), + IdentifierOffsets(0), MethodPoolLookupTable(0), MethodPoolLookupTableData(0), TotalSelectorsInMethodPool(0), SelectorOffsets(0), TotalNumSelectors(0), NumStatementsRead(0), NumMacrosRead(0), @@ -349,7 +357,7 @@ public: virtual std::pair ReadMethodPool(Selector Sel); - void SetIdentifierInfo(unsigned ID, const IdentifierInfo *II); + void SetIdentifierInfo(unsigned ID, IdentifierInfo *II); /// \brief Report a diagnostic. DiagnosticBuilder Diag(unsigned DiagID); diff --git a/include/clang/Frontend/PCHWriter.h b/include/clang/Frontend/PCHWriter.h index 41f8ad4af5..a597d68ee3 100644 --- a/include/clang/Frontend/PCHWriter.h +++ b/include/clang/Frontend/PCHWriter.h @@ -95,18 +95,18 @@ private: llvm::DenseMap IdentifierIDs; /// \brief Offsets of each of the identifier IDs into the identifier - /// table, shifted left by one bit with the low bit set. - llvm::SmallVector IdentifierOffsets; + /// table. + std::vector IdentifierOffsets; /// \brief Map that provides the ID numbers of each Selector. llvm::DenseMap SelectorIDs; /// \brief Offset of each selector within the method pool/selector /// table, indexed by the Selector ID (-1). - llvm::SmallVector SelectorOffsets; + std::vector SelectorOffsets; /// \brief A vector of all Selectors (ordered by ID). - llvm::SmallVector SelVector; + std::vector SelVector; /// \brief Offsets of each of the macro identifiers into the /// bitstream. diff --git a/lib/Frontend/PCHReader.cpp b/lib/Frontend/PCHReader.cpp index 897a0e7713..005436dccf 100644 --- a/lib/Frontend/PCHReader.cpp +++ b/lib/Frontend/PCHReader.cpp @@ -1837,28 +1837,23 @@ PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) { case pch::IDENTIFIER_TABLE: IdentifierTableData = BlobStart; - IdentifierLookupTable - = PCHIdentifierLookupTable::Create( + if (Record[0]) { + IdentifierLookupTable + = PCHIdentifierLookupTable::Create( (const unsigned char *)IdentifierTableData + Record[0], (const unsigned char *)IdentifierTableData, PCHIdentifierLookupTrait(*this)); - PP.getIdentifierTable().setExternalIdentifierLookup(this); + PP.getIdentifierTable().setExternalIdentifierLookup(this); + } break; case pch::IDENTIFIER_OFFSET: - if (!IdentifierData.empty()) { + if (!IdentifiersLoaded.empty()) { Error("Duplicate IDENTIFIER_OFFSET record in PCH file"); return Failure; } - IdentifierData.swap(Record); -#ifndef NDEBUG - for (unsigned I = 0, N = IdentifierData.size(); I != N; ++I) { - if ((IdentifierData[I] & 0x01) == 0) { - Error("Malformed identifier table in the precompiled header"); - return Failure; - } - } -#endif + IdentifierOffsets = (const uint32_t *)BlobStart; + IdentifiersLoaded.resize(Record[0]); break; case pch::EXTERNAL_DEFINITIONS: @@ -2710,22 +2705,20 @@ void PCHReader::StartTranslationUnit(ASTConsumer *Consumer) { void PCHReader::PrintStats() { std::fprintf(stderr, "*** PCH Statistics:\n"); - unsigned NumTypesLoaded = - TypesLoaded.size() - std::count(TypesLoaded.begin(), TypesLoaded.end(), - (Type *)0); - unsigned NumDeclsLoaded = - DeclsLoaded.size() - std::count(DeclsLoaded.begin(), DeclsLoaded.end(), - (Decl *)0); - unsigned NumIdentifiersLoaded = 0; - for (unsigned I = 0; I < IdentifierData.size(); ++I) { - if ((IdentifierData[I] & 0x01) == 0) - ++NumIdentifiersLoaded; - } - unsigned NumSelectorsLoaded = 0; - for (unsigned I = 0; I < SelectorsLoaded.size(); ++I) { - if (SelectorsLoaded[I].getAsOpaquePtr()) - ++NumSelectorsLoaded; - } + unsigned NumTypesLoaded + = TypesLoaded.size() - std::count(TypesLoaded.begin(), TypesLoaded.end(), + (Type *)0); + unsigned NumDeclsLoaded + = DeclsLoaded.size() - std::count(DeclsLoaded.begin(), DeclsLoaded.end(), + (Decl *)0); + unsigned NumIdentifiersLoaded + = IdentifiersLoaded.size() - std::count(IdentifiersLoaded.begin(), + IdentifiersLoaded.end(), + (IdentifierInfo *)0); + unsigned NumSelectorsLoaded + = SelectorsLoaded.size() - std::count(SelectorsLoaded.begin(), + SelectorsLoaded.end(), + Selector()); if (!TypesLoaded.empty()) std::fprintf(stderr, " %u/%u types read (%f%%)\n", @@ -2735,10 +2728,10 @@ void PCHReader::PrintStats() { std::fprintf(stderr, " %u/%u declarations read (%f%%)\n", NumDeclsLoaded, (unsigned)DeclsLoaded.size(), ((float)NumDeclsLoaded/DeclsLoaded.size() * 100)); - if (!IdentifierData.empty()) + if (!IdentifiersLoaded.empty()) std::fprintf(stderr, " %u/%u identifiers read (%f%%)\n", - NumIdentifiersLoaded, (unsigned)IdentifierData.size(), - ((float)NumIdentifiersLoaded/IdentifierData.size() * 100)); + NumIdentifiersLoaded, (unsigned)IdentifiersLoaded.size(), + ((float)NumIdentifiersLoaded/IdentifiersLoaded.size() * 100)); if (TotalNumSelectors) std::fprintf(stderr, " %u/%u selectors read (%f%%)\n", NumSelectorsLoaded, TotalNumSelectors, @@ -2832,27 +2825,28 @@ PCHReader::ReadMethodPool(Selector Sel) { return *Pos; } -void PCHReader::SetIdentifierInfo(unsigned ID, const IdentifierInfo *II) { +void PCHReader::SetIdentifierInfo(unsigned ID, IdentifierInfo *II) { assert(ID && "Non-zero identifier ID required"); - IdentifierData[ID - 1] = reinterpret_cast(II); + assert(ID <= IdentifiersLoaded.size() && "Identifier ID out of range"); + IdentifiersLoaded[ID - 1] = II; } IdentifierInfo *PCHReader::DecodeIdentifierInfo(unsigned ID) { if (ID == 0) return 0; - if (!IdentifierTableData || IdentifierData.empty()) { + if (!IdentifierTableData || IdentifiersLoaded.empty()) { Error("No identifier table in PCH file"); return 0; } - if (IdentifierData[ID - 1] & 0x01) { - uint64_t Offset = IdentifierData[ID - 1] >> 1; - IdentifierData[ID - 1] = reinterpret_cast( - &Context.Idents.get(IdentifierTableData + Offset)); + if (!IdentifiersLoaded[ID - 1]) { + uint32_t Offset = IdentifierOffsets[ID - 1]; + IdentifiersLoaded[ID - 1] + = &Context.Idents.get(IdentifierTableData + Offset); } - return reinterpret_cast(IdentifierData[ID - 1]); + return IdentifiersLoaded[ID - 1]; } Selector PCHReader::DecodeSelector(unsigned ID) { diff --git a/lib/Frontend/PCHWriter.cpp b/lib/Frontend/PCHWriter.cpp index 91ddbf4182..4ac836419e 100644 --- a/lib/Frontend/PCHWriter.cpp +++ b/lib/Frontend/PCHWriter.cpp @@ -2119,7 +2119,18 @@ void PCHWriter::WriteIdentifierTable(Preprocessor &PP) { } // Write the offsets table for identifier IDs. - Stream.EmitRecord(pch::IDENTIFIER_OFFSET, IdentifierOffsets); + BitCodeAbbrev *Abbrev = new BitCodeAbbrev(); + Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_OFFSET)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + + RecordData Record; + Record.push_back(pch::IDENTIFIER_OFFSET); + Record.push_back(IdentifierOffsets.size()); + Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record, + (const char *)&IdentifierOffsets.front(), + IdentifierOffsets.size() * sizeof(uint32_t)); } /// \brief Write a record containing the given attributes. @@ -2253,7 +2264,7 @@ void PCHWriter::AddString(const std::string &Str, RecordData &Record) { /// \brief Note that the identifier II occurs at the given offset /// within the identifier table. void PCHWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) { - IdentifierOffsets[IdentifierIDs[II] - 1] = (Offset << 1) | 0x01; + IdentifierOffsets[IdentifierIDs[II] - 1] = Offset; } /// \brief Note that the selector Sel occurs at the given offset