From: Douglas Gregor Date: Mon, 20 Apr 2009 20:36:09 +0000 (+0000) Subject: Write the identifier table into the PCH file as an on-disk hash table X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3251ceb90b3fec68e86d6dcfa58836e20a7205c3;p=clang Write the identifier table into the PCH file as an on-disk hash table that also includes the contents of the IdentifierInfo itself (the various fields and flags, along with the chain of identifiers visible at the top level that have that name). We don't make any use of the hash table yet, except that our identifier ID -> string mapping points into the hash table now. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@69625 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h index cb55d257a1..d4f680494e 100644 --- a/include/clang/Basic/IdentifierTable.h +++ b/include/clang/Basic/IdentifierTable.h @@ -141,7 +141,7 @@ public: return tok::objc_not_keyword; } void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } - + /// getBuiltinID - Return a value indicating whether this is a builtin /// function. 0 is not-built-in. 1 is builtin-for-some-nonprimary-target. /// 2+ are specific builtin functions. @@ -156,7 +156,10 @@ public: assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID && "ID too large for field!"); } - + + unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; } + void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; } + /// get/setExtension - Initialize information about whether or not this /// language token is an extension. This controls extension warnings, and is /// only valid if a custom token ID is set. diff --git a/include/clang/Frontend/PCHReader.h b/include/clang/Frontend/PCHReader.h index e08aa75f08..6ee549abf5 100644 --- a/include/clang/Frontend/PCHReader.h +++ b/include/clang/Frontend/PCHReader.h @@ -124,7 +124,8 @@ private: /// /// Each element in this array is either an offset into /// IdentifierTable that contains the string data (if the lowest bit - /// is set) or is an IdentifierInfo* that has already been resolved. + /// is set, in which case the offset is shifted left by one) or is + /// an IdentifierInfo* that has already been resolved. llvm::SmallVector IdentifierData; /// \brief The set of external definitions stored in the the PCH diff --git a/include/clang/Frontend/PCHWriter.h b/include/clang/Frontend/PCHWriter.h index a7397750db..68003ef971 100644 --- a/include/clang/Frontend/PCHWriter.h +++ b/include/clang/Frontend/PCHWriter.h @@ -94,6 +94,10 @@ private: /// IdentifierInfo. llvm::DenseMap IdentifierIDs; + /// \brief Offsets of each of the identifier IDs into the identifier + /// table, shifted left by one bit with the low bit set. + llvm::SmallVector IdentifierOffsets; + /// \brief Declarations encountered that might be external /// definitions. /// @@ -162,14 +166,22 @@ public: /// \brief Emit a reference to a declaration. void AddDeclRef(const Decl *D, RecordData &Record); + /// \brief Determine the declaration ID of an already-emitted + /// declaration. + pch::DeclID getDeclID(const Decl *D); + /// \brief Emit a declaration name. void AddDeclarationName(DeclarationName Name, RecordData &Record); /// \brief Add a string to the given record. void AddString(const std::string &Str, RecordData &Record); - /// \brief Add the given statement or expression to the queue of statements to - /// emit. + /// \brief Note that the identifier II occurs at the given offset + /// within the identifier table. + void SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset); + + /// \brief Add the given statement or expression to the queue of + /// statements to emit. /// /// This routine should be used when emitting types and declarations /// that have expressions as part of their formulation. Once the diff --git a/lib/Frontend/PCHReader.cpp b/lib/Frontend/PCHReader.cpp index f1cb4d3cdc..e694a140d1 100644 --- a/lib/Frontend/PCHReader.cpp +++ b/lib/Frontend/PCHReader.cpp @@ -2089,9 +2089,10 @@ IdentifierInfo *PCHReader::DecodeIdentifierInfo(unsigned ID) { } if (IdentifierData[ID - 1] & 0x01) { - uint64_t Offset = IdentifierData[ID - 1]; + uint64_t Offset = IdentifierData[ID - 1] >> 1; IdentifierData[ID - 1] = reinterpret_cast( - &Context.Idents.get(IdentifierTable + Offset)); + &Context.Idents.get(IdentifierTable + Offset)); + // FIXME: also read the contents of the IdentifierInfo. } return reinterpret_cast(IdentifierData[ID - 1]); diff --git a/lib/Frontend/PCHWriter.cpp b/lib/Frontend/PCHWriter.cpp index 22c52aa0ff..643b8dbf7a 100644 --- a/lib/Frontend/PCHWriter.cpp +++ b/lib/Frontend/PCHWriter.cpp @@ -13,6 +13,7 @@ #include "clang/Frontend/PCHWriter.h" #include "../Sema/Sema.h" // FIXME: move header into include/clang/Sema +#include "../Sema/IdentifierResolver.h" // FIXME: move header #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclContextInternals.h" @@ -23,6 +24,7 @@ #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Basic/FileManager.h" +#include "clang/Basic/OnDiskHashTable.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/SourceManagerInternals.h" #include "clang/Basic/TargetInfo.h" @@ -1616,6 +1618,71 @@ void PCHWriter::WriteDeclsBlock(ASTContext &Context) { Stream.ExitBlock(); } +namespace { +class VISIBILITY_HIDDEN PCHIdentifierTableTrait { + PCHWriter &Writer; + +public: + typedef const IdentifierInfo* key_type; + typedef key_type key_type_ref; + + typedef pch::IdentID data_type; + typedef data_type data_type_ref; + + PCHIdentifierTableTrait(PCHWriter &Writer) : Writer(Writer) { } + + static unsigned ComputeHash(const IdentifierInfo* II) { + return clang::BernsteinHash(II->getName()); + } + + static std::pair + EmitKeyDataLength(llvm::raw_ostream& Out, const IdentifierInfo* II, + pch::IdentID ID) { + unsigned KeyLen = strlen(II->getName()) + 1; + clang::io::Emit16(Out, KeyLen); + unsigned DataLen = 4 + 4 + 2; // 4 bytes for token ID, builtin, flags + // 4 bytes for the persistent ID + // 2 bytes for the length of the decl chain + for (IdentifierResolver::iterator D = IdentifierResolver::begin(II), + DEnd = IdentifierResolver::end(); + D != DEnd; ++D) + DataLen += sizeof(pch::DeclID); + return std::make_pair(KeyLen, DataLen); + } + + void EmitKey(llvm::raw_ostream& Out, const IdentifierInfo* II, + unsigned KeyLen) { + // Record the location of the key data. This is used when generating + // the mapping from persistent IDs to strings. + Writer.SetIdentifierOffset(II, Out.tell()); + Out.write(II->getName(), KeyLen); + } + + void EmitData(llvm::raw_ostream& Out, const IdentifierInfo* II, + pch::IdentID ID, unsigned) { + uint32_t Bits = 0; + Bits = Bits | (uint32_t)II->getTokenID(); + Bits = (Bits << 8) | (uint32_t)II->getObjCOrBuiltinID(); + Bits = (Bits << 10) | II->hasMacroDefinition(); + Bits = (Bits << 1) | II->isExtensionToken(); + Bits = (Bits << 1) | II->isPoisoned(); + Bits = (Bits << 1) | II->isCPlusPlusOperatorKeyword(); + clang::io::Emit32(Out, Bits); + clang::io::Emit32(Out, ID); + + llvm::SmallVector Decls; + for (IdentifierResolver::iterator D = IdentifierResolver::begin(II), + DEnd = IdentifierResolver::end(); + D != DEnd; ++D) + Decls.push_back(Writer.getDeclID(*D)); + + clang::io::Emit16(Out, Decls.size()); + for (unsigned I = 0; I < Decls.size(); ++I) + clang::io::Emit32(Out, Decls[I]); + } +}; +} // end anonymous namespace + /// \brief Write the identifier table into the PCH file. /// /// The identifier table consists of a blob containing string data @@ -1626,43 +1693,42 @@ void PCHWriter::WriteIdentifierTable() { // Create and write out the blob that contains the identifier // strings. - RecordData IdentOffsets; - IdentOffsets.resize(IdentifierIDs.size()); + IdentifierOffsets.resize(IdentifierIDs.size()); { - // Create the identifier string data. - std::vector Data; - Data.push_back(0); // Data must not be empty. + OnDiskChainedHashTableGenerator Generator; + + // Create the on-disk hash table representation. for (llvm::DenseMap::iterator ID = IdentifierIDs.begin(), IDEnd = IdentifierIDs.end(); ID != IDEnd; ++ID) { assert(ID->first && "NULL identifier in identifier table"); + Generator.insert(ID->first, ID->second); + } - // Make sure we're starting on an odd byte. The PCH reader - // expects the low bit to be set on all of the offsets. - if ((Data.size() & 0x01) == 0) - Data.push_back((char)0); - - IdentOffsets[ID->second - 1] = Data.size(); - Data.insert(Data.end(), - ID->first->getName(), - ID->first->getName() + ID->first->getLength()); - Data.push_back((char)0); + // Create the on-disk hash table in a buffer. + llvm::SmallVector IdentifierTable; + { + PCHIdentifierTableTrait Trait(*this); + llvm::raw_svector_ostream Out(IdentifierTable); + Generator.Emit(Out, Trait); } // Create a blob abbreviation BitCodeAbbrev *Abbrev = new BitCodeAbbrev(); Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_TABLE)); - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Triple name + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev); // Write the identifier table RecordData Record; Record.push_back(pch::IDENTIFIER_TABLE); - Stream.EmitRecordWithBlob(IDTableAbbrev, Record, &Data.front(), Data.size()); + Stream.EmitRecordWithBlob(IDTableAbbrev, Record, + &IdentifierTable.front(), + IdentifierTable.size()); } // Write the offsets table for identifier IDs. - Stream.EmitRecord(pch::IDENTIFIER_OFFSET, IdentOffsets); + Stream.EmitRecord(pch::IDENTIFIER_OFFSET, IdentifierOffsets); } /// \brief Write a record containing the given attributes. @@ -1791,6 +1857,12 @@ void PCHWriter::AddString(const std::string &Str, RecordData &Record) { Record.insert(Record.end(), Str.begin(), Str.end()); } +/// \brief Note that the identifier II occurs at the given offset +/// within the identifier table. +void PCHWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) { + IdentifierOffsets[IdentifierIDs[II] - 1] = (Offset << 1) | 0x01; +} + PCHWriter::PCHWriter(llvm::BitstreamWriter &Stream) : Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS), NumStatements(0) { } @@ -1930,6 +2002,14 @@ void PCHWriter::AddDeclRef(const Decl *D, RecordData &Record) { Record.push_back(ID); } +pch::DeclID PCHWriter::getDeclID(const Decl *D) { + if (D == 0) + return 0; + + assert(DeclIDs.find(D) != DeclIDs.end() && "Declaration not emitted!"); + return DeclIDs[D]; +} + void PCHWriter::AddDeclarationName(DeclarationName Name, RecordData &Record) { Record.push_back(Name.getNameKind()); switch (Name.getNameKind()) {