]> granicus.if.org Git - llvm/commitdiff
Parse the TPI (type information) stream of PDB files.
authorZachary Turner <zturner@google.com>
Tue, 3 May 2016 00:28:21 +0000 (00:28 +0000)
committerZachary Turner <zturner@google.com>
Tue, 3 May 2016 00:28:21 +0000 (00:28 +0000)
This parses the TPI stream (stream 2) from the PDB file. This stream
contains some header information followed by a series of codeview records.
There is some additional complexity here in that alongside this stream of
codeview records is a serialized hash table in order to efficiently query
the types. We parse the necessary bookkeeping information to allow us to
reconstruct the hash table, but we do not actually construct it yet as
there are still a few things that need to be understood first.

Differential Revision: http://reviews.llvm.org/D19840
Reviewed By: ruiu, rnk

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268343 91177308-0d34-0410-b5e6-96231b3b80d8

15 files changed:
include/llvm/DebugInfo/PDB/Raw/ByteStream.h
include/llvm/DebugInfo/PDB/Raw/MappedBlockStream.h
include/llvm/DebugInfo/PDB/Raw/PDBFile.h
include/llvm/DebugInfo/PDB/Raw/RawConstants.h
include/llvm/DebugInfo/PDB/Raw/StreamInterface.h
include/llvm/DebugInfo/PDB/Raw/StreamReader.h
include/llvm/DebugInfo/PDB/Raw/TpiStream.h [new file with mode: 0644]
lib/DebugInfo/PDB/CMakeLists.txt
lib/DebugInfo/PDB/Raw/ByteStream.cpp
lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp
lib/DebugInfo/PDB/Raw/PDBFile.cpp
lib/DebugInfo/PDB/Raw/StreamReader.cpp
lib/DebugInfo/PDB/Raw/TpiStream.cpp [new file with mode: 0644]
test/DebugInfo/PDB/pdbdump-headers.test
tools/llvm-pdbdump/llvm-pdbdump.cpp

index e9209f91b64bd21109f6af0f278281b33879947a..86e09833aa26896a5c9bf05428c82fec13b97d76 100644 (file)
@@ -35,6 +35,10 @@ public:
 
   std::error_code readBytes(uint32_t Offset,
                             MutableArrayRef<uint8_t> Buffer) const override;
+
+  std::error_code getArrayRef(uint32_t Offset, ArrayRef<uint8_t> &Buffer,
+                              uint32_t Length) const override;
+
   uint32_t getLength() const override;
 
   ArrayRef<uint8_t> data() const { return Data; }
index 8b4aedc260dd83aad718675034b72cde0a44b2f8..1022a8bd5ca7c5ffabfba542c4d1feff465f0bf6 100644 (file)
@@ -27,6 +27,9 @@ public:
 
   std::error_code readBytes(uint32_t Offset,
                             MutableArrayRef<uint8_t> Buffer) const override;
+  std::error_code getArrayRef(uint32_t Offset, ArrayRef<uint8_t> &Buffer,
+                              uint32_t Length) const override;
+
   uint32_t getLength() const override { return StreamLength; }
 
 private:
index d16bb165b0b8ed840463f88f9f8d58f6aa1698a0..9bf165efdabfd9ccdffbaad67ba5959fb7e392c2 100644 (file)
@@ -23,6 +23,7 @@ namespace pdb {
 struct PDBFileContext;
 class DbiStream;
 class InfoStream;
+class TpiStream;
 
 class PDBFile {
 public:
@@ -59,11 +60,13 @@ public:
 
   InfoStream &getPDBInfoStream();
   DbiStream &getPDBDbiStream();
+  TpiStream &getPDBTpiStream();
 
 private:
   std::unique_ptr<PDBFileContext> Context;
   std::unique_ptr<InfoStream> Info;
   std::unique_ptr<DbiStream> Dbi;
+  std::unique_ptr<TpiStream> Tpi;
 };
 }
 }
index 5dd040791d602f76fa60f709afc0ac852173d4f2..0b6d9deae40e282bb8386ae50c914ce88534f5ae 100644 (file)
@@ -35,6 +35,14 @@ enum PdbRaw_DbiVer : uint32_t {
   PdbDbiV110 = 20091201
 };
 
+enum PdbRaw_TpiVer : uint32_t {
+  PdbTpiV40 = 19950410,
+  PdbTpiV41 = 19951122,
+  PdbTpiV50 = 19961031,
+  PdbTpiV70 = 19990903,
+  PdbTpiV80 = 20040203,
+};
+
 enum SpecialStream : uint32_t {
   StreamPDB = 1,
   StreamTPI = 2,
index 4e698f037818ce3013ee0d2270370bf5634c1d7a..f2bc1b2bae245888b2692ae0a1c8691e645a2a9a 100644 (file)
@@ -23,6 +23,10 @@ public:
 
   virtual std::error_code readBytes(uint32_t Offset,
                                     MutableArrayRef<uint8_t> Buffer) const = 0;
+  virtual std::error_code getArrayRef(uint32_t Offset,
+                                      ArrayRef<uint8_t> &Buffer,
+                                      uint32_t Length) const = 0;
+
   virtual uint32_t getLength() const = 0;
 };
 }
index 5a524ffc5af13931f757f910e75fed2e0ae6c852..8f43b18aa808ebc1897eca8d8b627e66d54799ba 100644 (file)
@@ -39,6 +39,8 @@ public:
     return readBytes(Casted);
   }
 
+  std::error_code getArrayRef(ArrayRef<uint8_t> &Array, uint32_t Length);
+
   void setOffset(uint32_t Off) { Offset = Off; }
   uint32_t getOffset() const { return Offset; }
   uint32_t getLength() const { return Stream.getLength(); }
diff --git a/include/llvm/DebugInfo/PDB/Raw/TpiStream.h b/include/llvm/DebugInfo/PDB/Raw/TpiStream.h
new file mode 100644 (file)
index 0000000..f40ef37
--- /dev/null
@@ -0,0 +1,62 @@
+//===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H
+#define LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H
+
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include "llvm/DebugInfo/PDB/Raw/ByteStream.h"
+#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
+
+namespace llvm {
+namespace pdb {
+class PDBFile;
+
+typedef uint32_t (*HashFunctionType)(uint8_t *, uint32_t);
+
+class TpiStream {
+  struct HeaderInfo;
+
+public:
+  struct HashedTypeRecord {
+    uint32_t Hash;
+    codeview::TypeLeafKind Kind;
+    ArrayRef<uint8_t> Record;
+  };
+
+  TpiStream(PDBFile &File);
+  ~TpiStream();
+  std::error_code reload();
+
+  PdbRaw_TpiVer getTpiVersion() const;
+
+  uint32_t TypeIndexBegin() const;
+  uint32_t TypeIndexEnd() const;
+  uint32_t NumTypeRecords() const;
+
+  ArrayRef<HashedTypeRecord> records() const;
+
+private:
+  PDBFile &Pdb;
+  MappedBlockStream Stream;
+  HashFunctionType HashFunction;
+
+  ByteStream RecordsBuffer;
+  ByteStream TypeIndexOffsetBuffer;
+  ByteStream HashValuesBuffer;
+  ByteStream HashAdjBuffer;
+
+  std::vector<HashedTypeRecord> TypeRecords;
+  std::unique_ptr<HeaderInfo> Header;
+};
+}
+}
+
+#endif
index 2fa74b962466c5c0d03c8357a789abb00c29c052..b11fb306ad41dba8d7772ce8b545c65bba07a425 100644 (file)
@@ -36,7 +36,8 @@ add_pdb_impl_folder(Raw
   Raw/NameHashTable.cpp
   Raw/NameMap.cpp
   Raw/RawSession.cpp
-  Raw/StreamReader.cpp)
+  Raw/StreamReader.cpp
+  Raw/TpiStream.cpp)
 
 list(APPEND LIBPDB_ADDITIONAL_HEADER_DIRS "${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/PDB")
 
index 89477ead223658c1c290a34ca57eb839e19951fa..0826b21a9b9e24932373c041af70a147ae54765d 100644 (file)
@@ -58,6 +58,15 @@ std::error_code ByteStream::readBytes(uint32_t Offset,
   return std::error_code();
 }
 
+std::error_code ByteStream::getArrayRef(uint32_t Offset,
+                                        ArrayRef<uint8_t> &Buffer,
+                                        uint32_t Length) const {
+  if (Data.size() < Length + Offset)
+    return std::make_error_code(std::errc::bad_address);
+  Buffer = Data.slice(Offset, Length);
+  return std::error_code();
+}
+
 uint32_t ByteStream::getLength() const { return Data.size(); }
 
 StringRef ByteStream::str() const {
index 860f7639a063fbcb64b80c7e7d94f46a39bdfb62..25c28e587a46bbfe660296c3bf6d55253eb2652e 100644 (file)
@@ -51,3 +51,9 @@ MappedBlockStream::readBytes(uint32_t Offset,
 
   return std::error_code();
 }
+
+std::error_code MappedBlockStream::getArrayRef(uint32_t Offset,
+                                               ArrayRef<uint8_t> &Buffer,
+                                               uint32_t Length) const {
+  return std::make_error_code(std::errc::operation_not_supported);
+}
index df47ced8cd6a1403f6517a1a85d44fe295e054cb..05b3dc7fc317f15b04a841edccb59ee7747676cc 100644 (file)
@@ -11,6 +11,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/MemoryBuffer.h"
 
@@ -119,6 +120,8 @@ StringRef PDBFile::getBlockData(uint32_t BlockIndex, uint32_t NumBytes) const {
 std::error_code PDBFile::parseFileHeaders() {
   std::error_code EC;
   MemoryBufferRef BufferRef = *Context->Buffer;
+  // Make sure the file is sufficiently large to hold a super block.
+  // Do this before attempting to read the super block.
   if (BufferRef.getBufferSize() < sizeof(SuperBlock))
     return std::make_error_code(std::errc::illegal_byte_sequence);
 
@@ -135,10 +138,6 @@ std::error_code PDBFile::parseFileHeaders() {
   if (BufferRef.getBufferSize() % SB->BlockSize != 0)
     return std::make_error_code(std::errc::illegal_byte_sequence);
 
-  // Make sure the file is sufficiently large to hold a super block.
-  if (BufferRef.getBufferSize() < sizeof(SuperBlock))
-    return std::make_error_code(std::errc::illegal_byte_sequence);
-
   // Check the magic bytes.
   if (memcmp(SB->MagicBytes, Magic, sizeof(Magic)) != 0)
     return std::make_error_code(std::errc::illegal_byte_sequence);
@@ -271,3 +270,11 @@ DbiStream &PDBFile::getPDBDbiStream() {
   }
   return *Dbi;
 }
+
+TpiStream &PDBFile::getPDBTpiStream() {
+  if (!Tpi) {
+    Tpi.reset(new TpiStream(*this));
+    Tpi->reload();
+  }
+  return *Tpi;
+}
index 42fe4521acfbb7b729a100774ef510c3dd8220e0..18d29953e252f63f4c9d8ba2dfead3ce4b89cd7d 100644 (file)
@@ -39,3 +39,11 @@ std::error_code StreamReader::readZeroString(std::string &Dest) {
   } while (C != '\0');
   return std::error_code();
 }
+
+std::error_code StreamReader::getArrayRef(ArrayRef<uint8_t> &Array,
+                                          uint32_t Length) {
+  if (auto EC = Stream.getArrayRef(Offset, Array, Length))
+    return EC;
+  Offset += Length;
+  return std::error_code();
+}
diff --git a/lib/DebugInfo/PDB/Raw/TpiStream.cpp b/lib/DebugInfo/PDB/Raw/TpiStream.cpp
new file mode 100644 (file)
index 0000000..7ee4c60
--- /dev/null
@@ -0,0 +1,143 @@
+//===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Raw/StreamReader.h"
+
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::support;
+using namespace llvm::pdb;
+
+namespace {
+const uint32_t MinTypeIndex = codeview::TypeIndex::FirstNonSimpleIndex;
+
+const uint32_t MinHashBuckets = 0x1000;
+const uint32_t MaxHashBuckets = 0x40000;
+}
+
+static uint32_t HashBufferV8(uint8_t *buffer, uint32_t NumBuckets) {
+  // Not yet implemented, this is probably some variation of CRC32 but we need
+  // to be sure of the precise implementation otherwise we won't be able to work
+  // with persisted hash values.
+  return 0;
+}
+
+struct TpiStream::HeaderInfo {
+  struct EmbeddedBuf {
+    little32_t Off;
+    ulittle32_t Length;
+  };
+
+  ulittle32_t Version;
+  ulittle32_t HeaderSize;
+  ulittle32_t TypeIndexBegin;
+  ulittle32_t TypeIndexEnd;
+  ulittle32_t TypeRecordBytes;
+
+  ulittle16_t HashStreamIndex;
+  ulittle16_t HashAuxStreamIndex;
+  ulittle32_t HashKeySize;
+  ulittle32_t NumHashBuckets;
+
+  EmbeddedBuf HashValueBuffer;
+  EmbeddedBuf IndexOffsetBuffer;
+  EmbeddedBuf HashAdjBuffer;
+};
+
+TpiStream::TpiStream(PDBFile &File)
+    : Pdb(File), Stream(StreamTPI, File), HashFunction(nullptr) {}
+
+TpiStream::~TpiStream() {}
+
+std::error_code TpiStream::reload() {
+  StreamReader Reader(Stream);
+
+  if (Reader.bytesRemaining() < sizeof(HeaderInfo))
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  Header.reset(new HeaderInfo());
+  Reader.readObject(Header.get());
+
+  if (Header->Version != PdbTpiV80)
+    return std::make_error_code(std::errc::not_supported);
+
+  if (Header->HeaderSize != sizeof(HeaderInfo))
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  if (Header->HashKeySize != sizeof(ulittle32_t))
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  if (Header->NumHashBuckets < MinHashBuckets ||
+      Header->NumHashBuckets > MaxHashBuckets)
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  HashFunction = HashBufferV8;
+
+  // The actual type records themselves come from this stream
+  RecordsBuffer.initialize(Reader, Header->TypeRecordBytes);
+  TypeRecords.resize(TypeIndexEnd() - ::MinTypeIndex);
+  StreamReader RecordsReader(RecordsBuffer);
+  for (uint32_t I = TypeIndexBegin(); I < TypeIndexEnd(); ++I) {
+    HashedTypeRecord &Record = TypeRecords[I - ::MinTypeIndex];
+    codeview::TypeRecordPrefix Prefix;
+    if (auto EC = RecordsReader.readObject(&Prefix))
+      return EC;
+
+    Record.Kind =
+        static_cast<codeview::TypeLeafKind>(static_cast<uint16_t>(Prefix.Leaf));
+
+    // Since we read this entire buffer into a ByteStream, we are guaranteed
+    // that the entire buffer is contiguous (i.e. there's no longer a chance
+    // that it splits across a page boundary.  So we can request a reference
+    // directly into the stream buffer to avoid unnecessary memory copies.
+    uint32_t RecordSize = Prefix.Len - sizeof(Prefix.Leaf);
+    if (auto EC = RecordsReader.getArrayRef(Record.Record, RecordSize))
+      return EC;
+  }
+
+  // Hash indices, hash values, etc come from the hash stream.
+  MappedBlockStream HS(Header->HashStreamIndex, Pdb);
+  StreamReader HSR(HS);
+  HSR.setOffset(Header->HashValueBuffer.Off);
+  HashValuesBuffer.initialize(HSR, Header->HashValueBuffer.Length);
+
+  HSR.setOffset(Header->HashAdjBuffer.Off);
+  HashAdjBuffer.initialize(HSR, Header->HashAdjBuffer.Length);
+
+  HSR.setOffset(Header->IndexOffsetBuffer.Off);
+  TypeIndexOffsetBuffer.initialize(HSR, Header->IndexOffsetBuffer.Length);
+
+  return std::error_code();
+}
+
+PdbRaw_TpiVer TpiStream::getTpiVersion() const {
+  uint32_t Value = Header->Version;
+  return static_cast<PdbRaw_TpiVer>(Value);
+}
+
+uint32_t TpiStream::TypeIndexBegin() const { return Header->TypeIndexBegin; }
+
+uint32_t TpiStream::TypeIndexEnd() const { return Header->TypeIndexEnd; }
+
+uint32_t TpiStream::NumTypeRecords() const {
+  return TypeIndexEnd() - TypeIndexBegin();
+}
+
+ArrayRef<TpiStream::HashedTypeRecord> TpiStream::records() const {
+  const HashedTypeRecord *Begin =
+      &TypeRecords[TypeIndexBegin() - ::MinTypeIndex];
+  return ArrayRef<HashedTypeRecord>(Begin, NumTypeRecords());
+}
index 6b4c61cf8b670e077bd9de1b3fd248d015b6c831..9104f4d7d5bf75882d8c009db3b70bbd72f1323c 100644 (file)
 ; EMPTY-NEXT:     Type Server Index: 0
 ; EMPTY-NEXT:     Has EC Info: 0
 ; EMPTY-NEXT:     0 Contributing Source Files:
+; EMPTY-NEXT: TPI Version: 20040203
+; EMPTY-NEXT: Record count: 75
+; EMPTY-NEXT:   Kind: 0x4609  Bytes: [00 00 00 00]
+; EMPTY-NEXT:   Kind: 0x4104  Bytes: [74 00 00 00 00 00 00 00 00 10 00 00]
+; EMPTY-NEXT:   Kind: 0x4611  Bytes: [02 15 03 00 01 00 61 70 61 72 74 6D 65 6E 74 00
+; EMPTY-NEXT:                         02 15 03 00 02 00 73 69 6E 67 6C 65 00 F3 F2 F1
+; EMPTY-NEXT:                         02 15 03 00 03 00 66 72 65 65 00 F1 02 15 03 00
+; EMPTY-NEXT:                         04 00 6E 65 75 74 72 61 6C 00 F2 F1 02 15 03 00
+; EMPTY-NEXT:                         05 00 62 6F 74 68 00 F1]
 
 BIG:      BlockSize: 4096
 BIG-NEXT: Unknown0: 2
index a1fe8d506818c0159829ebd02a1c8d5aa6a75721..b58ed8f143e0b9c8ac563cf75cd7a7f584cba582 100644 (file)
@@ -43,6 +43,7 @@
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawSession.h"
 #include "llvm/DebugInfo/PDB/Raw/StreamReader.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/FileSystem.h"
@@ -148,6 +149,29 @@ cl::opt<bool> NoEnumDefs("no-enum-definitions",
                          cl::cat(FilterCategory));
 }
 
+static void dumpBytes(raw_ostream &S, ArrayRef<uint8_t> Bytes,
+                      uint32_t BytesPerRow, uint32_t Indent) {
+  S << "[";
+  uint32_t I = 0;
+
+  uint32_t BytesRemaining = Bytes.size();
+  while (BytesRemaining > 0) {
+    uint32_t BytesThisLine = std::min(BytesRemaining, BytesPerRow);
+    for (size_t L = 0; L < BytesThisLine; ++L, ++I) {
+      S << format_hex_no_prefix(Bytes[I], 2, true);
+      if (L + 1 < BytesThisLine)
+        S << ' ';
+    }
+    BytesRemaining -= BytesThisLine;
+    if (BytesRemaining > 0) {
+      S << '\n';
+      S.indent(Indent);
+    }
+  }
+  S << ']';
+  S.flush();
+}
+
 static void dumpStructure(RawSession &RS) {
   PDBFile &File = RS.getPDBFile();
 
@@ -293,6 +317,16 @@ static void dumpStructure(RawSession &RS) {
       outs().indent(8) << File << '\n';
     }
   }
+
+  TpiStream &Tpi = File.getPDBTpiStream();
+  outs() << "TPI Version: " << Tpi.getTpiVersion() << '\n';
+  outs() << "Record count: " << Tpi.NumTypeRecords() << '\n';
+  for (auto &Record : Tpi.records()) {
+    outs().indent(2) << "Kind: 0x" << Record.Kind;
+    outs().indent(2) << "Bytes: ";
+    dumpBytes(outs(), Record.Record, 16, 24);
+    outs() << '\n';
+  }
 }
 
 static void reportError(StringRef Path, PDB_ErrorCode Error) {