From: Alexandre Ganea Date: Thu, 7 Feb 2019 15:24:18 +0000 (+0000) Subject: [CodeView] Fix cycles in debug info when merging Types with global hashes X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=29400a2fb2b2c72ede0da96a8c2908391151b71d;p=llvm [CodeView] Fix cycles in debug info when merging Types with global hashes When type streams with forward references were merged using GHashes, cycles were introduced in the debug info. This was caused by GlobalTypeTableBuilder::insertRecordAs() not inserting the record on the second pass, thus yielding an empty ArrayRef at that record slot. Later on, upon PDB emission, TpiStreamBuilder::commit() would skip that empty record, thus offseting all indices that came after in the stream. This solution comes in two steps: 1. Fix the hash calculation, by doing a multiple-step resolution, iff there are forward references in the input stream. 2. Fix merge by resolving with multiple passes, therefore moving records with forward references at the end of the stream. This patch also adds support for llvm-readoj --codeview-ghash. Finally, fix dumpCodeViewMergedTypes() which previously could reference deleted memory. Fixes PR40221 Differential Revision: https://reviews.llvm.org/D57790 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353412 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h index c4a7851befe..a43ce20edde 100644 --- a/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h +++ b/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h @@ -73,14 +73,30 @@ public: CreateFunc Create) { auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex()); - if (LLVM_UNLIKELY(Result.second)) { + if (LLVM_UNLIKELY(Result.second /*inserted*/ || + Result.first->second.isSimple())) { uint8_t *Stable = RecordStorage.Allocate(RecordSize); MutableArrayRef Data(Stable, RecordSize); - SeenRecords.push_back(Create(Data)); + ArrayRef StableRecord = Create(Data); + if (StableRecord.empty()) { + // Records with forward references into the Type stream will be deferred + // for insertion at a later time, on the second pass. + Result.first->getSecond() = TypeIndex(SimpleTypeKind::NotTranslated); + return TypeIndex(SimpleTypeKind::NotTranslated); + } + if (Result.first->second.isSimple()) { + assert(Result.first->second.getIndex() == + (uint32_t)SimpleTypeKind::NotTranslated); + // On the second pass, update with index to remapped record. The + // (initially misbehaved) record will now come *after* other records + // resolved in the first pass, with proper *back* references in the + // stream. + Result.first->second = nextTypeIndex(); + } + SeenRecords.push_back(StableRecord); SeenHashes.push_back(Hash); } - // Update the caller's copy of Record to point a stable copy. return Result.first->second; } diff --git a/include/llvm/DebugInfo/CodeView/TypeHashing.h b/include/llvm/DebugInfo/CodeView/TypeHashing.h index 442a65bd86f..c2fbff64ceb 100644 --- a/include/llvm/DebugInfo/CodeView/TypeHashing.h +++ b/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -84,6 +84,8 @@ struct GloballyHashedType { } std::array Hash; + bool empty() const { return *(const uint64_t*)Hash.data() == 0; } + /// Given a sequence of bytes representing a record, compute a global hash for /// this record. Due to the nature of global hashes incorporating the hashes /// of referenced records, this function requires a list of types and ids @@ -107,8 +109,33 @@ struct GloballyHashedType { template static std::vector hashTypes(Range &&Records) { std::vector Hashes; - for (const auto &R : Records) - Hashes.push_back(hashType(R, Hashes, Hashes)); + bool UnresolvedRecords = false; + for (const auto &R : Records) { + GloballyHashedType H = hashType(R, Hashes, Hashes); + if (H.empty()) + UnresolvedRecords = true; + Hashes.push_back(H); + } + + // In some rare cases, there might be records with forward references in the + // stream. Several passes might be needed to fully hash each record in the + // Type stream. However this occurs on very small OBJs generated by MASM, + // with a dozen records at most. Therefore this codepath isn't + // time-critical, as it isn't taken in 99% of cases. + while (UnresolvedRecords) { + UnresolvedRecords = false; + auto HashIt = Hashes.begin(); + for (const auto &R : Records) { + if (HashIt->empty()) { + GloballyHashedType H = hashType(R, Hashes, Hashes); + if (H.empty()) + UnresolvedRecords = true; + else + *HashIt = H; + } + ++HashIt; + } + } return Hashes; } diff --git a/lib/DebugInfo/CodeView/TypeHashing.cpp b/lib/DebugInfo/CodeView/TypeHashing.cpp index 612399ddbff..2dbc11a84f0 100644 --- a/lib/DebugInfo/CodeView/TypeHashing.cpp +++ b/lib/DebugInfo/CodeView/TypeHashing.cpp @@ -54,10 +54,16 @@ GloballyHashedType::hashType(ArrayRef RecordData, reinterpret_cast(RefData.data()), Ref.Count); for (TypeIndex TI : Indices) { ArrayRef BytesToHash; - if (TI.isSimple() || TI.isNoneType() || TI.toArrayIndex() >= Prev.size()) { + if (TI.isSimple() || TI.isNoneType()) { const uint8_t *IndexBytes = reinterpret_cast(&TI); BytesToHash = makeArrayRef(IndexBytes, sizeof(TypeIndex)); } else { + if (TI.toArrayIndex() >= Prev.size() || + Prev[TI.toArrayIndex()].empty()) { + // There are references to yet-unhashed records. Suspend hashing for + // this record until all the other records are processed. + return {}; + } BytesToHash = Prev[TI.toArrayIndex()].Hash; } S.update(BytesToHash); diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index f2a5dcb93d6..6b308453c2d 100644 --- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -152,9 +152,12 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = Writer.writeObject(*Header)) return EC; - for (auto Rec : TypeRecords) + for (auto Rec : TypeRecords) { + assert(!Rec.empty()); // An empty record will not write anything, but it + // would shift all offsets from here on. if (auto EC = Writer.writeBytes(Rec)) return EC; + } if (HashStreamIndex != kInvalidStreamIndex) { auto HVS = WritableMappedBlockStream::createIndexedStream( diff --git a/test/tools/llvm-readobj/codeview-merging-ghash.test b/test/tools/llvm-readobj/codeview-merging-ghash.test new file mode 100644 index 00000000000..04b30ffa7cd --- /dev/null +++ b/test/tools/llvm-readobj/codeview-merging-ghash.test @@ -0,0 +1,132 @@ +# RUN: yaml2obj %s -o=%t.obj +# RUN: llvm-readobj -codeview-merged-types %t.obj | FileCheck %s --check-prefix=MERGED +# RUN: llvm-readobj -codeview-merged-types -codeview-ghash %t.obj | FileCheck %s --check-prefix=MERGED + +# MERGED: Format: COFF-x86-64 +# MERGED-NEXT: Arch: x86_64 +# MERGED-NEXT: AddressSize: 64bit +# MERGED-NEXT: MergedTypeStream [ +# MERGED-NEXT: ArgList (0x1000) { +# MERGED-NEXT: TypeLeafKind: LF_ARGLIST (0x1201) +# MERGED-NEXT: NumArgs: 0 +# MERGED-NEXT: Arguments [ +# MERGED-NEXT: ] +# MERGED-NEXT: } +# MERGED-NEXT: Modifier (0x1001) { +# MERGED-NEXT: TypeLeafKind: LF_MODIFIER (0x1001) +# MERGED-NEXT: ModifiedType: void (0x3) +# MERGED-NEXT: Modifiers [ (0x3) +# MERGED-NEXT: Const (0x1) +# MERGED-NEXT: Volatile (0x2) +# MERGED-NEXT: ] +# MERGED-NEXT: } +# MERGED-NEXT: Procedure (0x1002) { +# MERGED-NEXT: TypeLeafKind: LF_PROCEDURE (0x1008) +# MERGED-NEXT: ReturnType: void (0x3) +# MERGED-NEXT: CallingConvention: NearC (0x0) +# MERGED-NEXT: FunctionOptions [ (0x0) +# MERGED-NEXT: ] +# MERGED-NEXT: NumParameters: 0 +# MERGED-NEXT: ArgListType: () (0x1000) +# MERGED-NEXT: } +# MERGED-NEXT: Pointer (0x1003) { +# MERGED-NEXT: TypeLeafKind: LF_POINTER (0x1002) +# MERGED-NEXT: PointeeType: const volatile void (0x1001) +# MERGED-NEXT: PtrType: Near64 (0xC) +# MERGED-NEXT: PtrMode: Pointer (0x0) +# MERGED-NEXT: IsFlat: 0 +# MERGED-NEXT: IsConst: 1 +# MERGED-NEXT: IsVolatile: 0 +# MERGED-NEXT: IsUnaligned: 0 +# MERGED-NEXT: IsRestrict: 0 +# MERGED-NEXT: IsThisPtr&: 0 +# MERGED-NEXT: IsThisPtr&&: 0 +# MERGED-NEXT: SizeOf: 8 +# MERGED-NEXT: } +# MERGED-NEXT: Pointer (0x1004) { +# MERGED-NEXT: TypeLeafKind: LF_POINTER (0x1002) +# MERGED-NEXT: PointeeType: const volatile void* const (0x1003) +# MERGED-NEXT: PtrType: Near64 (0xC) +# MERGED-NEXT: PtrMode: Pointer (0x0) +# MERGED-NEXT: IsFlat: 0 +# MERGED-NEXT: IsConst: 1 +# MERGED-NEXT: IsVolatile: 0 +# MERGED-NEXT: IsUnaligned: 0 +# MERGED-NEXT: IsRestrict: 0 +# MERGED-NEXT: IsThisPtr&: 0 +# MERGED-NEXT: IsThisPtr&&: 0 +# MERGED-NEXT: SizeOf: 8 +# MERGED-NEXT: } +# MERGED-NEXT: Pointer (0x1005) { +# MERGED-NEXT: TypeLeafKind: LF_POINTER (0x1002) +# MERGED-NEXT: PointeeType: const volatile void* const* const (0x1004) +# MERGED-NEXT: PtrType: Near64 (0xC) +# MERGED-NEXT: PtrMode: Pointer (0x0) +# MERGED-NEXT: IsFlat: 0 +# MERGED-NEXT: IsConst: 1 +# MERGED-NEXT: IsVolatile: 0 +# MERGED-NEXT: IsUnaligned: 0 +# MERGED-NEXT: IsRestrict: 0 +# MERGED-NEXT: IsThisPtr&: 0 +# MERGED-NEXT: IsThisPtr&&: 0 +# MERGED-NEXT: SizeOf: 8 +# MERGED-NEXT: } +# MERGED-NEXT: Pointer (0x1006) { +# MERGED-NEXT: TypeLeafKind: LF_POINTER (0x1002) +# MERGED-NEXT: PointeeType: const volatile void* const* const* const (0x1005) +# MERGED-NEXT: PtrType: Near64 (0xC) +# MERGED-NEXT: PtrMode: Pointer (0x0) +# MERGED-NEXT: IsFlat: 0 +# MERGED-NEXT: IsConst: 1 +# MERGED-NEXT: IsVolatile: 0 +# MERGED-NEXT: IsUnaligned: 0 +# MERGED-NEXT: IsRestrict: 0 +# MERGED-NEXT: IsThisPtr&: 0 +# MERGED-NEXT: IsThisPtr&&: 0 +# MERGED-NEXT: SizeOf: 8 +# MERGED-NEXT: } +# MERGED-NEXT: ] +# MERGED-NEXT: MergedIDStream [ +# MERGED-NEXT: ] + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: '.debug$T' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + Types: + - Kind: LF_PROCEDURE + Procedure: + ReturnType: 3 + CallConv: NearC + Options: [ None ] + ParameterCount: 0 + ArgumentList: 4097 + - Kind: LF_ARGLIST + ArgList: + ArgIndices: [ ] + - Kind: LF_POINTER + Pointer: + ReferentType: 4099 + Attrs: 66572 + - Kind: LF_POINTER + Pointer: + ReferentType: 4100 + Attrs: 66572 + - Kind: LF_POINTER + Pointer: + ReferentType: 4101 + Attrs: 66572 + - Kind: LF_MODIFIER + Modifier: + ModifiedType: 3 + Modifiers: [ None, Const, Volatile ] + - Kind: LF_POINTER + Pointer: + ReferentType: 4098 + Attrs: 66572 +symbols: +... diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index 2af791ae17d..85bbe8d7b84 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -92,9 +92,11 @@ public: void printCOFFResources() override; void printCOFFLoadConfig() override; void printCodeViewDebugInfo() override; - void - mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs, - llvm::codeview::MergingTypeTableBuilder &CVTypes) override; + void mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs, + llvm::codeview::MergingTypeTableBuilder &CVTypes, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes, + bool GHash) override; void printStackMap() const override; void printAddrsig() override; private: @@ -1227,7 +1229,10 @@ void COFFDumper::printFileNameForOffset(StringRef Label, uint32_t FileOffset) { } void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs, - MergingTypeTableBuilder &CVTypes) { + MergingTypeTableBuilder &CVTypes, + GlobalTypeTableBuilder &GlobalCVIDs, + GlobalTypeTableBuilder &GlobalCVTypes, + bool GHash) { for (const SectionRef &S : Obj->sections()) { StringRef SectionName; error(S.getName(SectionName)); @@ -1248,9 +1253,17 @@ void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs, } SmallVector SourceToDest; Optional PCHSignature; - if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types, - PCHSignature)) - return error(std::move(EC)); + if (GHash) { + std::vector Hashes = + GloballyHashedType::hashTypes(Types); + if (auto EC = mergeTypeAndIdRecords(GlobalCVIDs, GlobalCVTypes, SourceToDest, Types, + Hashes, PCHSignature)) + return error(std::move(EC)); + } else { + if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types, + PCHSignature)) + return error(std::move(EC)); + } } } } @@ -1904,16 +1917,10 @@ void COFFDumper::printAddrsig() { } } -void llvm::dumpCodeViewMergedTypes( - ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable, - llvm::codeview::MergingTypeTableBuilder &CVTypes) { - // Flatten it first, then run our dumper on it. - SmallString<0> TypeBuf; - CVTypes.ForEachRecord([&](TypeIndex TI, const CVType &Record) { - TypeBuf.append(Record.RecordData.begin(), Record.RecordData.end()); - }); - - TypeTableCollection TpiTypes(CVTypes.records()); +void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer, + ArrayRef> IpiRecords, + ArrayRef> TpiRecords) { + TypeTableCollection TpiTypes(TpiRecords); { ListScope S(Writer, "MergedTypeStream"); TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes); @@ -1923,7 +1930,7 @@ void llvm::dumpCodeViewMergedTypes( // Flatten the id stream and print it next. The ID stream refers to names from // the type stream. - TypeTableCollection IpiTypes(IDTable.records()); + TypeTableCollection IpiTypes(IpiRecords); { ListScope S(Writer, "MergedIDStream"); TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes); diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h index 6d57f4c1cb3..5767485a192 100644 --- a/tools/llvm-readobj/ObjDumper.h +++ b/tools/llvm-readobj/ObjDumper.h @@ -22,8 +22,9 @@ class COFFImportFile; class ObjectFile; } namespace codeview { +class GlobalTypeTableBuilder; class MergingTypeTableBuilder; -} +} // namespace codeview class ScopedPrinter; @@ -88,7 +89,10 @@ public: virtual void printCodeViewDebugInfo() { } virtual void mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs, - llvm::codeview::MergingTypeTableBuilder &CVTypes) {} + llvm::codeview::MergingTypeTableBuilder &CVTypes, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes, + bool GHash) {} // Only implemented for MachO. virtual void printMachODataInCode() { } @@ -132,9 +136,9 @@ std::error_code createWasmDumper(const object::ObjectFile *Obj, void dumpCOFFImportFile(const object::COFFImportFile *File, ScopedPrinter &Writer); -void dumpCodeViewMergedTypes( - ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable, - llvm::codeview::MergingTypeTableBuilder &TypeTable); +void dumpCodeViewMergedTypes(ScopedPrinter &Writer, + ArrayRef> IpiRecords, + ArrayRef> TpiRecords); } // namespace llvm diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp index 74f21e30d72..2c1674e064e 100644 --- a/tools/llvm-readobj/llvm-readobj.cpp +++ b/tools/llvm-readobj/llvm-readobj.cpp @@ -22,6 +22,7 @@ #include "Error.h" #include "ObjDumper.h" #include "WindowsResourceDumper.h" +#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFFImportFile.h" @@ -218,6 +219,12 @@ namespace opts { CodeViewMergedTypes("codeview-merged-types", cl::desc("Display the merged CodeView type stream")); + // -codeview-ghash + cl::opt CodeViewEnableGHash( + "codeview-ghash", + cl::desc( + "Enable global hashing for CodeView type stream de-duplication")); + // -codeview-subsection-bytes cl::opt CodeViewSubsectionBytes( "codeview-subsection-bytes", @@ -416,13 +423,17 @@ static bool isMipsArch(unsigned Arch) { namespace { struct ReadObjTypeTableBuilder { ReadObjTypeTableBuilder() - : Allocator(), IDTable(Allocator), TypeTable(Allocator) {} + : Allocator(), IDTable(Allocator), TypeTable(Allocator), + GlobalIDTable(Allocator), GlobalTypeTable(Allocator) {} llvm::BumpPtrAllocator Allocator; llvm::codeview::MergingTypeTableBuilder IDTable; llvm::codeview::MergingTypeTableBuilder TypeTable; + llvm::codeview::GlobalTypeTableBuilder GlobalIDTable; + llvm::codeview::GlobalTypeTableBuilder GlobalTypeTable; + std::vector> Binaries; }; -} +} // namespace static ReadObjTypeTableBuilder CVTypes; /// Creates an format-specific object file dumper. @@ -542,7 +553,9 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) { if (opts::CodeView) Dumper->printCodeViewDebugInfo(); if (opts::CodeViewMergedTypes) - Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable); + Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable, + CVTypes.GlobalIDTable, CVTypes.GlobalTypeTable, + opts::CodeViewEnableGHash); } if (Obj->isMachO()) { if (opts::MachODataInCode) @@ -631,6 +644,8 @@ static void dumpInput(StringRef File) { dumpWindowsResourceFile(WinRes); else reportError(File, readobj_error::unrecognized_file_format); + + CVTypes.Binaries.push_back(std::move(*BinaryOrErr)); } /// Registers aliases that should only be allowed by readobj. @@ -720,7 +735,12 @@ int main(int argc, const char *argv[]) { if (opts::CodeViewMergedTypes) { ScopedPrinter W(outs()); - dumpCodeViewMergedTypes(W, CVTypes.IDTable, CVTypes.TypeTable); + if (opts::CodeViewEnableGHash) + dumpCodeViewMergedTypes(W, CVTypes.GlobalIDTable.records(), + CVTypes.GlobalTypeTable.records()); + else + dumpCodeViewMergedTypes(W, CVTypes.IDTable.records(), + CVTypes.TypeTable.records()); } return 0;