From 91b875c229dd68e4ee8a8d86c230ed0aeda17a5b Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 27 Jul 2017 18:25:59 +0000 Subject: [PATCH] [PDB] Write public symbol records and the publics hash table Summary: MSVC link.exe records all external symbol names in the publics stream. It provides similar functionality to an ELF .symtab. Reviewers: zturner, ruiu Subscribers: hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D35871 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309303 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/DebugInfo/CodeView/SymbolRecord.h | 8 +- .../PDB/Native/PublicsStreamBuilder.h | 29 ++- .../PDB/Native/DbiModuleDescriptorBuilder.cpp | 11 +- lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp | 5 +- .../PDB/Native/PublicsStreamBuilder.cpp | 187 +++++++++++++++--- 5 files changed, 196 insertions(+), 44 deletions(-) diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/include/llvm/DebugInfo/CodeView/SymbolRecord.h index 934944a9b65..225cdfa47ee 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -363,12 +363,12 @@ public: : SymbolRecord(SymbolRecordKind::PublicSym32), RecordOffset(RecordOffset) {} - PublicSymFlags Flags; - uint32_t Offset; - uint16_t Segment; + PublicSymFlags Flags = PublicSymFlags::None; + uint32_t Offset = 0; + uint16_t Segment = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_REGISTER diff --git a/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h index 5ab57ebef53..dc78e45a535 100644 --- a/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h @@ -10,15 +10,28 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBPUBLICSTREAMBUILDER_H #define LLVM_DEBUGINFO_PDB_RAW_PDBPUBLICSTREAMBUILDER_H +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" namespace llvm { + +template <> struct BinaryItemTraits { + static size_t length(const codeview::CVSymbol &Item) { + return Item.RecordData.size(); + } + static ArrayRef bytes(const codeview::CVSymbol &Item) { + return Item.RecordData; + } +}; + namespace msf { class MSFBuilder; } @@ -26,6 +39,14 @@ namespace pdb { class PublicsStream; struct PublicsStreamHeader; +struct GSIHashTableBuilder { + void addSymbols(ArrayRef Symbols); + + std::vector HashRecords; + std::array HashBitmap; + std::vector HashBuckets; +}; + class PublicsStreamBuilder { public: explicit PublicsStreamBuilder(msf::MSFBuilder &Msf); @@ -37,15 +58,19 @@ public: Error finalizeMsfLayout(); uint32_t calculateSerializedLength() const; - Error commit(BinaryStreamWriter &PublicsWriter); + Error commit(BinaryStreamWriter &PublicsWriter, + BinaryStreamWriter &RecWriter); uint32_t getStreamIndex() const { return StreamIdx; } uint32_t getRecordStreamIdx() const { return RecordStreamIdx; } + void addPublicSymbol(const codeview::PublicSym32 &Pub); + private: uint32_t StreamIdx = kInvalidStreamIndex; uint32_t RecordStreamIdx = kInvalidStreamIndex; - std::vector HashRecords; + std::unique_ptr Table; + std::vector Publics; msf::MSFBuilder &Msf; }; } // namespace pdb diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 897f78c5103..557dd4f041e 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -16,6 +16,7 @@ #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/BinaryItemStream.h" @@ -26,16 +27,6 @@ using namespace llvm::codeview; using namespace llvm::msf; using namespace llvm::pdb; -namespace llvm { -template <> struct BinaryItemTraits { - static size_t length(const CVSymbol &Item) { return Item.RecordData.size(); } - - static ArrayRef bytes(const CVSymbol &Item) { - return Item.RecordData; - } -}; -} - static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize, uint32_t C13Size) { uint32_t Size = sizeof(uint32_t); // Signature diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 9f35fd73629..21e5e4bc0db 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -212,8 +212,11 @@ Error PDBFileBuilder::commit(StringRef Filename) { if (Publics) { auto PS = WritableMappedBlockStream::createIndexedStream( Layout, Buffer, Publics->getStreamIndex(), Allocator); + auto PRS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, Publics->getRecordStreamIdx(), Allocator); BinaryStreamWriter PSWriter(*PS); - if (auto EC = Publics->commit(PSWriter)) + BinaryStreamWriter RecWriter(*PRS); + if (auto EC = Publics->commit(PSWriter, RecWriter)) return EC; } diff --git a/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp index 1b99b5561a2..473cdddd2d6 100644 --- a/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp @@ -8,16 +8,25 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/SymbolSerializer.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/Support/BinaryItemStream.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include +#include using namespace llvm; using namespace llvm::msf; using namespace llvm::pdb; +using namespace llvm::codeview; -PublicsStreamBuilder::PublicsStreamBuilder(msf::MSFBuilder &Msf) : Msf(Msf) {} +PublicsStreamBuilder::PublicsStreamBuilder(msf::MSFBuilder &Msf) + : Table(new GSIHashTableBuilder), Msf(Msf) {} PublicsStreamBuilder::~PublicsStreamBuilder() {} @@ -25,63 +34,187 @@ uint32_t PublicsStreamBuilder::calculateSerializedLength() const { uint32_t Size = 0; Size += sizeof(PublicsStreamHeader); Size += sizeof(GSIHashHeader); - Size += HashRecords.size() * sizeof(PSHashRecord); - size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); - uint32_t NumBitmapEntries = BitmapSizeInBits / 8; - Size += NumBitmapEntries; - - // FIXME: Account for hash buckets. For now since we we write a zero-bitmap - // indicating that no hash buckets are valid, we also write zero byets of hash - // bucket data. - Size += 0; + Size += Table->HashRecords.size() * sizeof(PSHashRecord); + Size += Table->HashBitmap.size() * sizeof(uint32_t); + Size += Table->HashBuckets.size() * sizeof(uint32_t); + + Size += Publics.size() * sizeof(uint32_t); // AddrMap + + // FIXME: Add thunk map and section offsets for incremental linking. + return Size; } Error PublicsStreamBuilder::finalizeMsfLayout() { + Table->addSymbols(Publics); + Expected Idx = Msf.addStream(calculateSerializedLength()); if (!Idx) return Idx.takeError(); StreamIdx = *Idx; - Expected RecordIdx = Msf.addStream(0); + uint32_t PublicRecordBytes = 0; + for (auto &Pub : Publics) + PublicRecordBytes += Pub.length(); + + Expected RecordIdx = Msf.addStream(PublicRecordBytes); if (!RecordIdx) return RecordIdx.takeError(); RecordStreamIdx = *RecordIdx; return Error::success(); } -Error PublicsStreamBuilder::commit(BinaryStreamWriter &PublicsWriter) { +void PublicsStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) { + Publics.push_back(SymbolSerializer::writeOneSymbol( + const_cast(Pub), Msf.getAllocator(), + CodeViewContainer::Pdb)); +} + +// FIXME: Put this back in the header. +struct PubSymLayout { + ulittle16_t reclen; + ulittle16_t reckind; + ulittle32_t flags; + ulittle32_t off; + ulittle16_t seg; + char name[1]; +}; + +bool comparePubSymByAddrAndName(const CVSymbol *LS, const CVSymbol *RS) { + assert(LS->length() > sizeof(PubSymLayout) && + RS->length() > sizeof(PubSymLayout)); + auto *L = reinterpret_cast(LS->data().data()); + auto *R = reinterpret_cast(RS->data().data()); + if (L->seg < R->seg) + return true; + if (L->seg > R->seg) + return false; + if (L->off < R->off) + return true; + if (L->off > R->off) + return false; + return strcmp(L->name, R->name) < 0; +} + +static StringRef getSymbolName(const CVSymbol &Sym) { + assert(Sym.kind() == S_PUB32 && "handle other kinds"); + ArrayRef NameBytes = + Sym.data().drop_front(offsetof(PubSymLayout, name)); + return StringRef(reinterpret_cast(NameBytes.data()), + NameBytes.size()) + .trim('\0'); +} + +/// Compute the address map. The address map is an array of symbol offsets +/// sorted so that it can be binary searched by address. +static std::vector computeAddrMap(ArrayRef Publics) { + // Make a vector of pointers to the symbols so we can sort it by address. + // Also gather the symbol offsets while we're at it. + std::vector PublicsByAddr; + std::vector SymOffsets; + PublicsByAddr.reserve(Publics.size()); + uint32_t SymOffset = 0; + for (const CVSymbol &Sym : Publics) { + PublicsByAddr.push_back(&Sym); + SymOffsets.push_back(SymOffset); + SymOffset += Sym.length(); + } + std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(), + comparePubSymByAddrAndName); + + // Fill in the symbol offsets in the appropriate order. + std::vector AddrMap; + AddrMap.reserve(Publics.size()); + for (const CVSymbol *Sym : PublicsByAddr) { + ptrdiff_t Idx = std::distance(Publics.data(), Sym); + assert(Idx >= 0 && size_t(Idx) < Publics.size()); + AddrMap.push_back(ulittle32_t(SymOffsets[Idx])); + } + return AddrMap; +} + +Error PublicsStreamBuilder::commit(BinaryStreamWriter &PublicsWriter, + BinaryStreamWriter &RecWriter) { + assert(Table->HashRecords.size() == Publics.size()); + PublicsStreamHeader PSH; GSIHashHeader GSH; - // FIXME: Figure out what to put for these values. - PSH.AddrMap = 0; - PSH.ISectThunkTable = 0; - PSH.NumSections = 0; + PSH.AddrMap = Publics.size() * 4; + + // FIXME: Fill these in. They are for incremental linking. PSH.NumThunks = 0; - PSH.OffThunkTable = 0; PSH.SizeOfThunk = 0; - PSH.SymHash = 0; + PSH.ISectThunkTable = 0; + PSH.OffThunkTable = 0; + PSH.NumSections = 0; GSH.VerSignature = GSIHashHeader::HdrSignature; GSH.VerHdr = GSIHashHeader::HdrVersion; - GSH.HrSize = 0; - GSH.NumBuckets = 0; + GSH.HrSize = Table->HashRecords.size() * sizeof(PSHashRecord); + GSH.NumBuckets = Table->HashBitmap.size() * 4 + Table->HashBuckets.size() * 4; + + PSH.SymHash = sizeof(GSH) + GSH.HrSize + GSH.NumBuckets; if (auto EC = PublicsWriter.writeObject(PSH)) return EC; if (auto EC = PublicsWriter.writeObject(GSH)) return EC; - if (auto EC = PublicsWriter.writeArray(makeArrayRef(HashRecords))) + + if (auto EC = PublicsWriter.writeArray(makeArrayRef(Table->HashRecords))) + return EC; + if (auto EC = PublicsWriter.writeArray(makeArrayRef(Table->HashBitmap))) + return EC; + if (auto EC = PublicsWriter.writeArray(makeArrayRef(Table->HashBuckets))) return EC; - size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); - uint32_t NumBitmapEntries = BitmapSizeInBits / 8; - std::vector BitmapData(NumBitmapEntries); - // FIXME: Build an actual bitmap - if (auto EC = PublicsWriter.writeBytes(makeArrayRef(BitmapData))) + std::vector AddrMap = computeAddrMap(Publics); + if (auto EC = PublicsWriter.writeArray(makeArrayRef(AddrMap))) + return EC; + + BinaryItemStream Records(support::endianness::little); + Records.setItems(Publics); + BinaryStreamRef RecordsRef(Records); + if (auto EC = RecWriter.writeStreamRef(RecordsRef)) return EC; - // FIXME: Write actual hash buckets. return Error::success(); } + +void GSIHashTableBuilder::addSymbols(ArrayRef Symbols) { + std::array, IPHR_HASH + 1> TmpBuckets; + uint32_t SymOffset = 0; + for (const CVSymbol &Sym : Symbols) { + PSHashRecord HR; + // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs. + HR.Off = SymOffset + 1; + HR.CRef = 1; // Always use a refcount of 1. + + // Hash the name to figure out which bucket this goes into. + StringRef Name = getSymbolName(Sym); + size_t BucketIdx = hashStringV1(Name) % IPHR_HASH; + TmpBuckets[BucketIdx].push_back(HR); // FIXME: Does order matter? + + SymOffset += Sym.length(); + } + + // Compute the three tables: the hash records in bucket and chain order, the + // bucket presence bitmap, and the bucket chain start offsets. + HashRecords.reserve(Symbols.size()); + for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) { + auto &Bucket = TmpBuckets[BucketIdx]; + if (Bucket.empty()) + continue; + HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32); + + // Calculate what the offset of the first hash record in the chain would be + // if it were inflated to contain 32-bit pointers. On a 32-bit system, each + // record would be 12 bytes. See HROffsetCalc in gsi.h. + const int SizeOfHROffsetCalc = 12; + ulittle32_t ChainStartOff = + ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc); + HashBuckets.push_back(ChainStartOff); + for (const auto &HR : Bucket) + HashRecords.push_back(HR); + } +} -- 2.50.1