]> granicus.if.org Git - llvm/commitdiff
[CodeView] Fix cycles in debug info when merging Types with global hashes
authorAlexandre Ganea <alexandre.ganea@ubisoft.com>
Thu, 7 Feb 2019 15:24:18 +0000 (15:24 +0000)
committerAlexandre Ganea <alexandre.ganea@ubisoft.com>
Thu, 7 Feb 2019 15:24:18 +0000 (15:24 +0000)
When type streams with forward references were merged using GHashes, cycles
were introduced in the debug info. This was caused by
GlobalTypeTableBuilder::insertRecordAs() not inserting the record on the second
pass, thus yielding an empty ArrayRef at that record slot. Later on, upon PDB
emission, TpiStreamBuilder::commit() would skip that empty record, thus
offseting all indices that came after in the stream.

This solution comes in two steps:

1. Fix the hash calculation, by doing a multiple-step resolution, iff there are
forward references in the input stream.
2. Fix merge by resolving with multiple passes, therefore moving records with
forward references at the end of the stream.

This patch also adds support for llvm-readoj --codeview-ghash.
Finally, fix dumpCodeViewMergedTypes() which previously could reference deleted
memory.

Fixes PR40221

Differential Revision: https://reviews.llvm.org/D57790

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353412 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
include/llvm/DebugInfo/CodeView/TypeHashing.h
lib/DebugInfo/CodeView/TypeHashing.cpp
lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
test/tools/llvm-readobj/codeview-merging-ghash.test [new file with mode: 0644]
tools/llvm-readobj/COFFDumper.cpp
tools/llvm-readobj/ObjDumper.h
tools/llvm-readobj/llvm-readobj.cpp

index c4a7851befe2b910fd2a91ff06c2c99d16e2d6ed..a43ce20edde67423e62bce856ebe37a85bd5b510 100644 (file)
@@ -73,14 +73,30 @@ public:
                            CreateFunc Create) {
     auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex());
 
-    if (LLVM_UNLIKELY(Result.second)) {
+    if (LLVM_UNLIKELY(Result.second /*inserted*/ ||
+                      Result.first->second.isSimple())) {
       uint8_t *Stable = RecordStorage.Allocate<uint8_t>(RecordSize);
       MutableArrayRef<uint8_t> Data(Stable, RecordSize);
-      SeenRecords.push_back(Create(Data));
+      ArrayRef<uint8_t> StableRecord = Create(Data);
+      if (StableRecord.empty()) {
+        // Records with forward references into the Type stream will be deferred
+        // for insertion at a later time, on the second pass.
+        Result.first->getSecond() = TypeIndex(SimpleTypeKind::NotTranslated);
+        return TypeIndex(SimpleTypeKind::NotTranslated);
+      }
+      if (Result.first->second.isSimple()) {
+        assert(Result.first->second.getIndex() ==
+               (uint32_t)SimpleTypeKind::NotTranslated);
+        // On the second pass, update with index to remapped record. The
+        // (initially misbehaved) record will now come *after* other records
+        // resolved in the first pass, with proper *back* references in the
+        // stream.
+        Result.first->second = nextTypeIndex();
+      }
+      SeenRecords.push_back(StableRecord);
       SeenHashes.push_back(Hash);
     }
 
-    // Update the caller's copy of Record to point a stable copy.
     return Result.first->second;
   }
 
index 442a65bd86f7efd5e39eeea7160bec3c58cb64ce..c2fbff64ceb8569c08df9f4c4f3e2b3820de1c76 100644 (file)
@@ -84,6 +84,8 @@ struct GloballyHashedType {
   }
   std::array<uint8_t, 8> Hash;
 
+  bool empty() const { return *(const uint64_t*)Hash.data() == 0; }\r
+
   /// Given a sequence of bytes representing a record, compute a global hash for
   /// this record.  Due to the nature of global hashes incorporating the hashes
   /// of referenced records, this function requires a list of types and ids
@@ -107,8 +109,33 @@ struct GloballyHashedType {
   template <typename Range>
   static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
     std::vector<GloballyHashedType> Hashes;
-    for (const auto &R : Records)
-      Hashes.push_back(hashType(R, Hashes, Hashes));
+    bool UnresolvedRecords = false;\r
+    for (const auto &R : Records) {\r
+      GloballyHashedType H = hashType(R, Hashes, Hashes);
+      if (H.empty())
+        UnresolvedRecords = true;
+      Hashes.push_back(H);
+    }
+
+    // In some rare cases, there might be records with forward references in the
+    // stream. Several passes might be needed to fully hash each record in the
+    // Type stream. However this occurs on very small OBJs generated by MASM,
+    // with a dozen records at most. Therefore this codepath isn't
+    // time-critical, as it isn't taken in 99% of cases.
+    while (UnresolvedRecords) {
+      UnresolvedRecords = false;
+      auto HashIt = Hashes.begin();
+      for (const auto &R : Records) {
+        if (HashIt->empty()) {
+          GloballyHashedType H = hashType(R, Hashes, Hashes);
+          if (H.empty())
+            UnresolvedRecords = true;
+          else
+            *HashIt = H;
+        }
+        ++HashIt;
+      }
+    }
 
     return Hashes;
   }
index 612399ddbff70264d9dc7b8322c712873ca5ebb2..2dbc11a84f0b03cace938e2a8618e31ce17070da 100644 (file)
@@ -54,10 +54,16 @@ GloballyHashedType::hashType(ArrayRef<uint8_t> RecordData,
         reinterpret_cast<const TypeIndex *>(RefData.data()), Ref.Count);
     for (TypeIndex TI : Indices) {
       ArrayRef<uint8_t> BytesToHash;
-      if (TI.isSimple() || TI.isNoneType() || TI.toArrayIndex() >= Prev.size()) {
+      if (TI.isSimple() || TI.isNoneType()) {
         const uint8_t *IndexBytes = reinterpret_cast<const uint8_t *>(&TI);
         BytesToHash = makeArrayRef(IndexBytes, sizeof(TypeIndex));
       } else {
+        if (TI.toArrayIndex() >= Prev.size() ||
+            Prev[TI.toArrayIndex()].empty()) {
+          // There are references to yet-unhashed records. Suspend hashing for
+          // this record until all the other records are processed.
+          return {};
+        }
         BytesToHash = Prev[TI.toArrayIndex()].Hash;
       }
       S.update(BytesToHash);
index f2a5dcb93d6161096414273d242a7bde3c80a7dc..6b308453c2dede8156a20271d470929e52dc946f 100644 (file)
@@ -152,9 +152,12 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
   if (auto EC = Writer.writeObject(*Header))
     return EC;
 
-  for (auto Rec : TypeRecords)
+  for (auto Rec : TypeRecords) {
+    assert(!Rec.empty()); // An empty record will not write anything, but it
+                          // would shift all offsets from here on.
     if (auto EC = Writer.writeBytes(Rec))
       return EC;
+  }
 
   if (HashStreamIndex != kInvalidStreamIndex) {
     auto HVS = WritableMappedBlockStream::createIndexedStream(
diff --git a/test/tools/llvm-readobj/codeview-merging-ghash.test b/test/tools/llvm-readobj/codeview-merging-ghash.test
new file mode 100644 (file)
index 0000000..04b30ff
--- /dev/null
@@ -0,0 +1,132 @@
+# RUN: yaml2obj %s -o=%t.obj
+# RUN: llvm-readobj -codeview-merged-types %t.obj | FileCheck %s --check-prefix=MERGED
+# RUN: llvm-readobj -codeview-merged-types -codeview-ghash %t.obj | FileCheck %s --check-prefix=MERGED
+
+# MERGED: Format: COFF-x86-64
+# MERGED-NEXT: Arch: x86_64
+# MERGED-NEXT: AddressSize: 64bit
+# MERGED-NEXT: MergedTypeStream [
+# MERGED-NEXT:   ArgList (0x1000) {
+# MERGED-NEXT:     TypeLeafKind: LF_ARGLIST (0x1201)
+# MERGED-NEXT:     NumArgs: 0
+# MERGED-NEXT:     Arguments [
+# MERGED-NEXT:     ]
+# MERGED-NEXT:   }
+# MERGED-NEXT:   Modifier (0x1001) {
+# MERGED-NEXT:     TypeLeafKind: LF_MODIFIER (0x1001)
+# MERGED-NEXT:     ModifiedType: void (0x3)
+# MERGED-NEXT:     Modifiers [ (0x3)
+# MERGED-NEXT:       Const (0x1)
+# MERGED-NEXT:       Volatile (0x2)
+# MERGED-NEXT:     ]
+# MERGED-NEXT:   }
+# MERGED-NEXT:   Procedure (0x1002) {
+# MERGED-NEXT:     TypeLeafKind: LF_PROCEDURE (0x1008)
+# MERGED-NEXT:     ReturnType: void (0x3)
+# MERGED-NEXT:     CallingConvention: NearC (0x0)
+# MERGED-NEXT:     FunctionOptions [ (0x0)
+# MERGED-NEXT:     ]
+# MERGED-NEXT:     NumParameters: 0
+# MERGED-NEXT:     ArgListType: () (0x1000)
+# MERGED-NEXT:   }
+# MERGED-NEXT:   Pointer (0x1003) {
+# MERGED-NEXT:     TypeLeafKind: LF_POINTER (0x1002)
+# MERGED-NEXT:     PointeeType: const volatile void (0x1001)
+# MERGED-NEXT:     PtrType: Near64 (0xC)
+# MERGED-NEXT:     PtrMode: Pointer (0x0)
+# MERGED-NEXT:     IsFlat: 0
+# MERGED-NEXT:     IsConst: 1
+# MERGED-NEXT:     IsVolatile: 0
+# MERGED-NEXT:     IsUnaligned: 0
+# MERGED-NEXT:     IsRestrict: 0
+# MERGED-NEXT:     IsThisPtr&: 0
+# MERGED-NEXT:     IsThisPtr&&: 0
+# MERGED-NEXT:     SizeOf: 8
+# MERGED-NEXT:   }
+# MERGED-NEXT:   Pointer (0x1004) {
+# MERGED-NEXT:     TypeLeafKind: LF_POINTER (0x1002)
+# MERGED-NEXT:     PointeeType: const volatile void* const (0x1003)
+# MERGED-NEXT:     PtrType: Near64 (0xC)
+# MERGED-NEXT:     PtrMode: Pointer (0x0)
+# MERGED-NEXT:     IsFlat: 0
+# MERGED-NEXT:     IsConst: 1
+# MERGED-NEXT:     IsVolatile: 0
+# MERGED-NEXT:     IsUnaligned: 0
+# MERGED-NEXT:     IsRestrict: 0
+# MERGED-NEXT:     IsThisPtr&: 0
+# MERGED-NEXT:     IsThisPtr&&: 0
+# MERGED-NEXT:     SizeOf: 8
+# MERGED-NEXT:   }
+# MERGED-NEXT:   Pointer (0x1005) {
+# MERGED-NEXT:     TypeLeafKind: LF_POINTER (0x1002)
+# MERGED-NEXT:     PointeeType: const volatile void* const* const (0x1004)
+# MERGED-NEXT:     PtrType: Near64 (0xC)
+# MERGED-NEXT:     PtrMode: Pointer (0x0)
+# MERGED-NEXT:     IsFlat: 0
+# MERGED-NEXT:     IsConst: 1
+# MERGED-NEXT:     IsVolatile: 0
+# MERGED-NEXT:     IsUnaligned: 0
+# MERGED-NEXT:     IsRestrict: 0
+# MERGED-NEXT:     IsThisPtr&: 0
+# MERGED-NEXT:     IsThisPtr&&: 0
+# MERGED-NEXT:     SizeOf: 8
+# MERGED-NEXT:   }
+# MERGED-NEXT:   Pointer (0x1006) {
+# MERGED-NEXT:     TypeLeafKind: LF_POINTER (0x1002)
+# MERGED-NEXT:     PointeeType: const volatile void* const* const* const (0x1005)
+# MERGED-NEXT:     PtrType: Near64 (0xC)
+# MERGED-NEXT:     PtrMode: Pointer (0x0)
+# MERGED-NEXT:     IsFlat: 0
+# MERGED-NEXT:     IsConst: 1
+# MERGED-NEXT:     IsVolatile: 0
+# MERGED-NEXT:     IsUnaligned: 0
+# MERGED-NEXT:     IsRestrict: 0
+# MERGED-NEXT:     IsThisPtr&: 0
+# MERGED-NEXT:     IsThisPtr&&: 0
+# MERGED-NEXT:     SizeOf: 8
+# MERGED-NEXT:   }
+# MERGED-NEXT: ]
+# MERGED-NEXT: MergedIDStream [
+# MERGED-NEXT: ]
+
+--- !COFF
+header:          
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: [  ]
+sections:        
+  - Name:            '.debug$T'
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ]
+    Alignment:       1
+    Types:           
+      - Kind:            LF_PROCEDURE
+        Procedure:       
+          ReturnType:      3
+          CallConv:        NearC
+          Options:         [ None ]
+          ParameterCount:  0
+          ArgumentList:    4097
+      - Kind:            LF_ARGLIST
+        ArgList:         
+          ArgIndices:      [  ]
+      - Kind:            LF_POINTER
+        Pointer:         
+          ReferentType:    4099
+          Attrs:           66572
+      - Kind:            LF_POINTER
+        Pointer:         
+          ReferentType:    4100
+          Attrs:           66572
+      - Kind:            LF_POINTER
+        Pointer:         
+          ReferentType:    4101
+          Attrs:           66572
+      - Kind:            LF_MODIFIER
+        Modifier:        
+          ModifiedType:    3
+          Modifiers:       [ None, Const, Volatile ]
+      - Kind:            LF_POINTER
+        Pointer:         
+          ReferentType:    4098
+          Attrs:           66572
+symbols:         
+...
index 2af791ae17d271ca6da529a79632a99116071c30..85bbe8d7b84f26d5a464a8155a00343e56b2b846 100644 (file)
@@ -92,9 +92,11 @@ public:
   void printCOFFResources() override;
   void printCOFFLoadConfig() override;
   void printCodeViewDebugInfo() override;
-  void
-  mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
-                     llvm::codeview::MergingTypeTableBuilder &CVTypes) override;
+  void mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
+                          llvm::codeview::MergingTypeTableBuilder &CVTypes,
+                          llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs,
+                          llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes,
+                          bool GHash) override;
   void printStackMap() const override;
   void printAddrsig() override;
 private:
@@ -1227,7 +1229,10 @@ void COFFDumper::printFileNameForOffset(StringRef Label, uint32_t FileOffset) {
 }
 
 void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs,
-                                    MergingTypeTableBuilder &CVTypes) {
+                                    MergingTypeTableBuilder &CVTypes,
+                                    GlobalTypeTableBuilder &GlobalCVIDs,
+                                    GlobalTypeTableBuilder &GlobalCVTypes,
+                                    bool GHash) {
   for (const SectionRef &S : Obj->sections()) {
     StringRef SectionName;
     error(S.getName(SectionName));
@@ -1248,9 +1253,17 @@ void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs,
       }
       SmallVector<TypeIndex, 128> SourceToDest;
       Optional<uint32_t> PCHSignature;
-      if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types,
-                                          PCHSignature))
-        return error(std::move(EC));
+      if (GHash) {
+        std::vector<GloballyHashedType> Hashes =
+            GloballyHashedType::hashTypes(Types);
+        if (auto EC = mergeTypeAndIdRecords(GlobalCVIDs, GlobalCVTypes, SourceToDest, Types,
+                                            Hashes, PCHSignature))
+          return error(std::move(EC));
+      } else {
+        if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types,
+                                            PCHSignature))
+          return error(std::move(EC));
+      }
     }
   }
 }
@@ -1904,16 +1917,10 @@ void COFFDumper::printAddrsig() {
   }
 }
 
-void llvm::dumpCodeViewMergedTypes(
-    ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable,
-    llvm::codeview::MergingTypeTableBuilder &CVTypes) {
-  // Flatten it first, then run our dumper on it.
-  SmallString<0> TypeBuf;
-  CVTypes.ForEachRecord([&](TypeIndex TI, const CVType &Record) {
-    TypeBuf.append(Record.RecordData.begin(), Record.RecordData.end());
-  });
-
-  TypeTableCollection TpiTypes(CVTypes.records());
+void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer,
+                                   ArrayRef<ArrayRef<uint8_t>> IpiRecords,
+                                   ArrayRef<ArrayRef<uint8_t>> TpiRecords) {
+  TypeTableCollection TpiTypes(TpiRecords);
   {
     ListScope S(Writer, "MergedTypeStream");
     TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes);
@@ -1923,7 +1930,7 @@ void llvm::dumpCodeViewMergedTypes(
 
   // Flatten the id stream and print it next. The ID stream refers to names from
   // the type stream.
-  TypeTableCollection IpiTypes(IDTable.records());
+  TypeTableCollection IpiTypes(IpiRecords);
   {
     ListScope S(Writer, "MergedIDStream");
     TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes);
index 6d57f4c1cb3b9362f0723b7ec24f89094f4a1817..5767485a19290714f05a338324e2bc92334dce0f 100644 (file)
@@ -22,8 +22,9 @@ class COFFImportFile;
 class ObjectFile;
 }
 namespace codeview {
+class GlobalTypeTableBuilder;
 class MergingTypeTableBuilder;
-}
+} // namespace codeview
 
 class ScopedPrinter;
 
@@ -88,7 +89,10 @@ public:
   virtual void printCodeViewDebugInfo() { }
   virtual void
   mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
-                     llvm::codeview::MergingTypeTableBuilder &CVTypes) {}
+                     llvm::codeview::MergingTypeTableBuilder &CVTypes,
+                     llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs,
+                     llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes,
+                     bool GHash) {}
 
   // Only implemented for MachO.
   virtual void printMachODataInCode() { }
@@ -132,9 +136,9 @@ std::error_code createWasmDumper(const object::ObjectFile *Obj,
 void dumpCOFFImportFile(const object::COFFImportFile *File,
                         ScopedPrinter &Writer);
 
-void dumpCodeViewMergedTypes(
-    ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable,
-    llvm::codeview::MergingTypeTableBuilder &TypeTable);
+void dumpCodeViewMergedTypes(ScopedPrinter &Writer,
+                             ArrayRef<ArrayRef<uint8_t>> IpiRecords,
+                             ArrayRef<ArrayRef<uint8_t>> TpiRecords);
 
 } // namespace llvm
 
index 74f21e30d720e68cf7422e3286892b90edfc55bf..2c1674e064eb108908ae601ee11da523452580b5 100644 (file)
@@ -22,6 +22,7 @@
 #include "Error.h"
 #include "ObjDumper.h"
 #include "WindowsResourceDumper.h"
+#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
 #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/COFFImportFile.h"
@@ -218,6 +219,12 @@ namespace opts {
       CodeViewMergedTypes("codeview-merged-types",
                           cl::desc("Display the merged CodeView type stream"));
 
+  // -codeview-ghash
+  cl::opt<bool> CodeViewEnableGHash(
+      "codeview-ghash",
+      cl::desc(
+          "Enable global hashing for CodeView type stream de-duplication"));
+
   // -codeview-subsection-bytes
   cl::opt<bool> CodeViewSubsectionBytes(
       "codeview-subsection-bytes",
@@ -416,13 +423,17 @@ static bool isMipsArch(unsigned Arch) {
 namespace {
 struct ReadObjTypeTableBuilder {
   ReadObjTypeTableBuilder()
-      : Allocator(), IDTable(Allocator), TypeTable(Allocator) {}
+      : Allocator(), IDTable(Allocator), TypeTable(Allocator),
+        GlobalIDTable(Allocator), GlobalTypeTable(Allocator) {}
 
   llvm::BumpPtrAllocator Allocator;
   llvm::codeview::MergingTypeTableBuilder IDTable;
   llvm::codeview::MergingTypeTableBuilder TypeTable;
+  llvm::codeview::GlobalTypeTableBuilder GlobalIDTable;
+  llvm::codeview::GlobalTypeTableBuilder GlobalTypeTable;
+  std::vector<OwningBinary<Binary>> Binaries;
 };
-}
+} // namespace
 static ReadObjTypeTableBuilder CVTypes;
 
 /// Creates an format-specific object file dumper.
@@ -542,7 +553,9 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
     if (opts::CodeView)
       Dumper->printCodeViewDebugInfo();
     if (opts::CodeViewMergedTypes)
-      Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable);
+      Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable,
+                                 CVTypes.GlobalIDTable, CVTypes.GlobalTypeTable,
+                                 opts::CodeViewEnableGHash);
   }
   if (Obj->isMachO()) {
     if (opts::MachODataInCode)
@@ -631,6 +644,8 @@ static void dumpInput(StringRef File) {
     dumpWindowsResourceFile(WinRes);
   else
     reportError(File, readobj_error::unrecognized_file_format);
+
+  CVTypes.Binaries.push_back(std::move(*BinaryOrErr));
 }
 
 /// Registers aliases that should only be allowed by readobj.
@@ -720,7 +735,12 @@ int main(int argc, const char *argv[]) {
 
   if (opts::CodeViewMergedTypes) {
     ScopedPrinter W(outs());
-    dumpCodeViewMergedTypes(W, CVTypes.IDTable, CVTypes.TypeTable);
+    if (opts::CodeViewEnableGHash)
+      dumpCodeViewMergedTypes(W, CVTypes.GlobalIDTable.records(),
+                              CVTypes.GlobalTypeTable.records());
+    else
+      dumpCodeViewMergedTypes(W, CVTypes.IDTable.records(),
+                              CVTypes.TypeTable.records());
   }
 
   return 0;