[pdb] Add HashTable data structure.

author Zachary Turner <zturner@google.com>

Thu, 19 Jan 2017 23:31:24 +0000 (23:31 +0000)

committer Zachary Turner <zturner@google.com>

Thu, 19 Jan 2017 23:31:24 +0000 (23:31 +0000)
author Zachary Turner <zturner@google.com>
Thu, 19 Jan 2017 23:31:24 +0000 (23:31 +0000)
committer Zachary Turner <zturner@google.com>
Thu, 19 Jan 2017 23:31:24 +0000 (23:31 +0000)
diff --git a/include/llvm/DebugInfo/PDB/Raw/HashTable.h b/include/llvm/DebugInfo/PDB/Raw/HashTable.h

new file mode 100644 (file)

index 0000000..c0c3240
--- /dev/null
+++ b/include/llvm/DebugInfo/PDB/Raw/HashTable.h
@@ -0,0 +1,102 @@
+//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H
+#define LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <cstdint>
+#include <utility>
+
+namespace llvm {
+namespace pdb {
+
+class HashTable {
+  friend class HashTableIterator;
+  struct Header {
+    support::ulittle32_t Size;
+    support::ulittle32_t Capacity;
+  };
+
+  typedef std::vector<std::pair<uint32_t, uint32_t>> BucketList;
+
+public:
+  HashTable();
+  explicit HashTable(uint32_t Capacity);
+
+  Error load(msf::StreamReader &Stream);
+
+  uint32_t calculateSerializedLength() const;
+  Error commit(msf::StreamWriter &Writer) const;
+
+  uint32_t capacity() const;
+  uint32_t size() const;
+
+  HashTableIterator begin() const;
+  HashTableIterator end() const;
+  HashTableIterator find(uint32_t K);
+
+  void set(uint32_t K, uint32_t V);
+  void remove(uint32_t K);
+  uint32_t get(uint32_t K);
+
+protected:
+  bool isPresent(uint32_t K) const { return Present.test(K); }
+  bool isDeleted(uint32_t K) const { return Deleted.test(K); }
+  BucketList Buckets;
+  mutable SparseBitVector<> Present;
+  mutable SparseBitVector<> Deleted;
+
+private:
+  static uint32_t maxLoad(uint32_t capacity);
+  void grow();
+
+  static Error readSparseBitVector(msf::StreamReader &Stream,
+                                   SparseBitVector<> &V);
+  static Error writeSparseBitVector(msf::StreamWriter &Writer,
+                                    SparseBitVector<> &Vec);
+};
+
+class HashTableIterator
+    : public iterator_facade_base<HashTableIterator, std::forward_iterator_tag,
+                                  std::pair<uint32_t, uint32_t>> {
+  friend class HashTable;
+  HashTableIterator(const HashTable &Map, uint32_t Index, bool IsEnd);
+
+public:
+  HashTableIterator(const HashTable &Map);
+
+  HashTableIterator &operator=(const HashTableIterator &R);
+  bool operator==(const HashTableIterator &R) const;
+  const std::pair<uint32_t, uint32_t> &operator*() const;
+  HashTableIterator &operator++();
+
+private:
+  bool isEnd() const { return IsEnd; }
+  uint32_t index() const { return Index; }
+
+  const HashTable *Map;
+  uint32_t Index;
+  bool IsEnd;
+};
+
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H
diff --git a/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h b/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h

index f5244ac21808f24c25f7383d5e2084bd71ec9e22..048f56e5764b8df41b6f589f6ce1571f74fa633c 100644 (file)
--- a/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h
@@ -10,11 +10,12 @@
  #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBNAMEMAPBUILDER_H
  #define LLVM_DEBUGINFO_PDB_RAW_PDBNAMEMAPBUILDER_H
  
-#include "llvm/ADT/StringMap.h"
+#include "llvm/DebugInfo/PDB/Raw/HashTable.h"
  #include "llvm/Support/Error.h"
  
  #include <cstdint>
  #include <memory>
+#include <vector>
  
  namespace llvm {
  namespace msf {
@@ -29,14 +30,14 @@ public:
  
    void addMapping(StringRef Name, uint32_t Mapping);
  
-  Expected<std::unique_ptr<NameMap>> build();
    Error commit(msf::StreamWriter &Writer) const;
  
    uint32_t calculateSerializedLength() const;
  
  private:
-  StringMap<uint32_t> Map;
-  uint32_t StringDataBytes = 0;
+  std::vector<StringRef> Strings;
+  HashTable Map;
+  uint32_t Offset = 0;
  };
  
  } // end namespace pdb
diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt

index a49c06b1e908506d465045b2f12342fe5c2e9b44..43ae30fabb2707c1d6995ecfe6d64c4a1401c9f2 100644 (file)
--- a/lib/DebugInfo/PDB/CMakeLists.txt
+++ b/lib/DebugInfo/PDB/CMakeLists.txt
@@ -34,6 +34,7 @@ add_pdb_impl_folder(Raw
    Raw/GlobalsStream.cpp
    Raw/GSI.cpp
    Raw/Hash.cpp
+  Raw/HashTable.cpp
    Raw/InfoStream.cpp
    Raw/InfoStreamBuilder.cpp
    Raw/ModInfo.cpp
diff --git a/lib/DebugInfo/PDB/Raw/HashTable.cpp b/lib/DebugInfo/PDB/Raw/HashTable.cpp

new file mode 100644 (file)

index 0000000..cc83174
--- /dev/null
+++ b/lib/DebugInfo/PDB/Raw/HashTable.cpp
@@ -0,0 +1,294 @@
+//===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Raw/HashTable.h"
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+
+using namespace llvm;
+using namespace llvm::pdb;
+
+HashTable::HashTable() : HashTable(8) {}
+
+HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); }
+
+Error HashTable::load(msf::StreamReader &Stream) {
+  const Header *H;
+  if (auto EC = Stream.readObject(H))
+    return EC;
+  if (H->Capacity == 0)
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Invalid Hash Table Capacity");
+  if (H->Size > maxLoad(H->Capacity))
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Invalid Hash Table Size");
+
+  Buckets.resize(H->Capacity);
+
+  if (auto EC = readSparseBitVector(Stream, Present))
+    return EC;
+  if (Present.count() != H->Size)
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Present bit vector does not match size!");
+
+  if (auto EC = readSparseBitVector(Stream, Deleted))
+    return EC;
+  if (Present.intersects(Deleted))
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Present bit vector interesects deleted!");
+
+  for (uint32_t P : Present) {
+    if (auto EC = Stream.readInteger(Buckets[P].first))
+      return EC;
+    if (auto EC = Stream.readInteger(Buckets[P].second))
+      return EC;
+  }
+
+  return Error::success();
+}
+
+uint32_t HashTable::calculateSerializedLength() const {
+  uint32_t Size = sizeof(Header);
+
+  int NumBitsP = Present.find_last() + 1;
+  int NumBitsD = Deleted.find_last() + 1;
+
+  // Present bit set number of words, followed by that many actual words.
+  Size += sizeof(uint32_t);
+  Size += alignTo(NumBitsP, sizeof(uint32_t));
+
+  // Deleted bit set number of words, followed by that many actual words.
+  Size += sizeof(uint32_t);
+  Size += alignTo(NumBitsD, sizeof(uint32_t));
+
+  // One (Key, Value) pair for each entry Present.
+  Size += 2 * sizeof(uint32_t) * size();
+
+  return Size;
+}
+
+Error HashTable::commit(msf::StreamWriter &Writer) const {
+  Header H;
+  H.Size = size();
+  H.Capacity = capacity();
+  if (auto EC = Writer.writeObject(H))
+    return EC;
+
+  if (auto EC = writeSparseBitVector(Writer, Present))
+    return EC;
+
+  if (auto EC = writeSparseBitVector(Writer, Deleted))
+    return EC;
+
+  for (const auto &Entry : *this) {
+    if (auto EC = Writer.writeInteger(Entry.first))
+      return EC;
+    if (auto EC = Writer.writeInteger(Entry.second))
+      return EC;
+  }
+  return Error::success();
+}
+
+uint32_t HashTable::capacity() const { return Buckets.size(); }
+uint32_t HashTable::size() const { return Present.count(); }
+
+HashTableIterator HashTable::begin() const { return HashTableIterator(*this); }
+HashTableIterator HashTable::end() const {
+  return HashTableIterator(*this, 0, true);
+}
+
+HashTableIterator HashTable::find(uint32_t K) {
+  uint32_t H = K % capacity();
+  uint32_t I = H;
+  Optional<uint32_t> FirstUnused;
+  do {
+    if (isPresent(I)) {
+      if (Buckets[I].first == K)
+        return HashTableIterator(*this, I, false);
+    } else {
+      if (!FirstUnused)
+        FirstUnused = I;
+      // Insertion occurs via linear probing from the slot hint, and will be
+      // inserted at the first empty / deleted location.  Therefore, if we are
+      // probing and find a location that is neither present nor deleted, then
+      // nothing must have EVER been inserted at this location, and thus it is
+      // not possible for a matching value to occur later.
+      if (!isDeleted(I))
+        break;
+    }
+    I = (I + 1) % capacity();
+  } while (I != H);
+
+  // The only way FirstUnused would not be set is if every single entry in the
+  // table were Present.  But this would violate the load factor constraints
+  // that we impose, so it should never happen.
+  assert(FirstUnused);
+  return HashTableIterator(*this, *FirstUnused, true);
+}
+
+void HashTable::set(uint32_t K, uint32_t V) {
+  auto Entry = find(K);
+  if (Entry != end()) {
+    assert(isPresent(Entry.index()));
+    assert(Buckets[Entry.index()].first == K);
+    // We're updating, no need to do anything special.
+    Buckets[Entry.index()].second = V;
+    return;
+  }
+
+  auto &B = Buckets[Entry.index()];
+  assert(!isPresent(Entry.index()));
+  assert(Entry.isEnd());
+  B.first = K;
+  B.second = V;
+  Present.set(Entry.index());
+  Deleted.reset(Entry.index());
+
+  grow();
+
+  assert(find(K) != end());
+}
+
+void HashTable::remove(uint32_t K) {
+  auto Iter = find(K);
+  // It wasn't here to begin with, just exit.
+  if (Iter == end())
+    return;
+
+  assert(Present.test(Iter.index()));
+  assert(!Deleted.test(Iter.index()));
+  Deleted.set(Iter.index());
+  Present.reset(Iter.index());
+}
+
+uint32_t HashTable::get(uint32_t K) {
+  auto I = find(K);
+  assert(I != end());
+  return (*I).second;
+}
+
+uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
+
+void HashTable::grow() {
+  uint32_t S = size();
+  if (S < maxLoad(capacity()))
+    return;
+  assert(capacity() != UINT32_MAX, "Can't grow Hash table!");
+
+  uint32_t NewCapacity =
+      (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX;
+
+  // Growing requires rebuilding the table and re-hashing every item.  Make a
+  // copy with a larger capacity, insert everything into the copy, then swap
+  // it in.
+  HashTable NewMap(NewCapacity);
+  for (auto I : Present) {
+    NewMap.set(Buckets[I].first, Buckets[I].second);
+  }
+
+  Buckets.swap(NewMap.Buckets);
+  std::swap(Present, NewMap.Present);
+  std::swap(Deleted, NewMap.Deleted);
+  assert(capacity() == NewCapacity);
+  assert(size() == S);
+}
+
+Error HashTable::readSparseBitVector(msf::StreamReader &Stream,
+                                     SparseBitVector<> &V) {
+  uint32_t NumWords;
+  if (auto EC = Stream.readInteger(NumWords))
+    return joinErrors(
+        std::move(EC),
+        make_error<RawError>(raw_error_code::corrupt_file,
+                             "Expected hash table number of words"));
+
+  for (uint32_t I = 0; I != NumWords; ++I) {
+    uint32_t Word;
+    if (auto EC = Stream.readInteger(Word))
+      return joinErrors(std::move(EC),
+                        make_error<RawError>(raw_error_code::corrupt_file,
+                                             "Expected hash table word"));
+    for (unsigned Idx = 0; Idx < 32; ++Idx)
+      if (Word & (1U << Idx))
+        V.set((I * 32) + Idx);
+  }
+  return Error::success();
+}
+
+Error HashTable::writeSparseBitVector(msf::StreamWriter &Writer,
+                                      SparseBitVector<> &Vec) {
+  int ReqBits = Vec.find_last() + 1;
+  uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t);
+  if (auto EC = Writer.writeInteger(NumWords))
+    return joinErrors(
+        std::move(EC),
+        make_error<RawError>(raw_error_code::corrupt_file,
+                             "Could not write linear map number of words"));
+
+  uint32_t Idx = 0;
+  for (uint32_t I = 0; I != NumWords; ++I) {
+    uint32_t Word = 0;
+    for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) {
+      if (Vec.test(Idx))
+        Word |= (1 << WordIdx);
+    }
+    if (auto EC = Writer.writeInteger(Word))
+      return joinErrors(std::move(EC), make_error<RawError>(
+                                           raw_error_code::corrupt_file,
+                                           "Could not write linear map word"));
+  }
+  return Error::success();
+}
+
+HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index,
+                                     bool IsEnd)
+    : Map(&Map), Index(Index), IsEnd(IsEnd) {}
+
+HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) {
+  int I = Map.Present.find_first();
+  if (I == -1) {
+    Index = 0;
+    IsEnd = true;
+  } else {
+    Index = static_cast<uint32_t>(I);
+    IsEnd = false;
+  }
+}
+
+HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) {
+  Map = R.Map;
+  return *this;
+}
+
+bool HashTableIterator::operator==(const HashTableIterator &R) const {
+  if (IsEnd && R.IsEnd)
+    return true;
+  if (IsEnd != R.IsEnd)
+    return false;
+
+  return (Map == R.Map) && (Index == R.Index);
+}
+
+const std::pair<uint32_t, uint32_t> &HashTableIterator::operator*() const {
+  assert(Map->Present.test(Index));
+  return Map->Buckets[Index];
+}
+
+HashTableIterator &HashTableIterator::operator++() {
+  while (Index < Map->Buckets.size()) {
+    ++Index;
+    if (Map->Present.test(Index))
+      return *this;
+  }
+
+  IsEnd = true;
+  return *this;
+}
diff --git a/lib/DebugInfo/PDB/Raw/NameMap.cpp b/lib/DebugInfo/PDB/Raw/NameMap.cpp

index 0f55f58da38173f2132a4ecd5bf51ba1c059d306..b101f90f756ed413761b4d0e6fbc292687988dfd 100644 (file)
--- a/lib/DebugInfo/PDB/Raw/NameMap.cpp
+++ b/lib/DebugInfo/PDB/Raw/NameMap.cpp
@@ -7,12 +7,14 @@
  //
  //===----------------------------------------------------------------------===//
  
+#include "llvm/DebugInfo/PDB/Raw/NameMap.h"
+
  #include "llvm/ADT/SparseBitVector.h"
  #include "llvm/ADT/StringMap.h"
  #include "llvm/ADT/StringRef.h"
  #include "llvm/ADT/iterator_range.h"
  #include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/NameMap.h"
+#include "llvm/DebugInfo/PDB/Raw/HashTable.h"
  #include "llvm/DebugInfo/PDB/Raw/RawError.h"
  #include "llvm/Support/Error.h"
  #include <algorithm>
@@ -25,123 +27,35 @@ using namespace llvm::pdb;
  NameMap::NameMap() = default;
  
  Error NameMap::load(StreamReader &Stream) {
-  // This is some sort of weird string-set/hash table encoded in the stream.
-  // It starts with the number of bytes in the table.
-  uint32_t NumberOfBytes;
-  if (auto EC = Stream.readInteger(NumberOfBytes))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Expected name map length"));
-  if (Stream.bytesRemaining() < NumberOfBytes)
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Invalid name map length");
-
-  // Following that field is the starting offset of strings in the name table.
-  uint32_t StringsOffset = Stream.getOffset();
-  Stream.setOffset(StringsOffset + NumberOfBytes);
-
-  // This appears to be equivalent to the total number of strings *actually*
-  // in the name table.
-  uint32_t HashSize;
-  if (auto EC = Stream.readInteger(HashSize))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Expected name map hash size"));
-
-  // This appears to be an upper bound on the number of strings in the name
-  // table.
-  uint32_t MaxNumberOfStrings;
-  if (auto EC = Stream.readInteger(MaxNumberOfStrings))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Expected name map max strings"));
-
-  if (MaxNumberOfStrings > (UINT32_MAX / sizeof(uint32_t)))
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Implausible number of strings");
-
-  const uint32_t MaxNumberOfWords = UINT32_MAX / (sizeof(uint32_t) * 8);
-
-  // This appears to be a hash table which uses bitfields to determine whether
-  // or not a bucket is 'present'.
-  uint32_t NumPresentWords;
-  if (auto EC = Stream.readInteger(NumPresentWords))
+  uint32_t StringBufferSize;
+  if (auto EC = Stream.readInteger(StringBufferSize))
      return joinErrors(std::move(EC),
                        make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Expected name map num words"));
+                                           "Expected string buffer size"));
  
-  if (NumPresentWords > MaxNumberOfWords)
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Number of present words is too large");
+  msf::ReadableStreamRef StringsBuffer;
+  if (auto EC = Stream.readStreamRef(StringsBuffer, StringBufferSize))
+    return EC;
  
-  SparseBitVector<> Present;
-  for (uint32_t I = 0; I != NumPresentWords; ++I) {
-    uint32_t Word;
-    if (auto EC = Stream.readInteger(Word))
-      return joinErrors(std::move(EC),
-                        make_error<RawError>(raw_error_code::corrupt_file,
-                                             "Expected name map word"));
-    for (unsigned Idx = 0; Idx < 32; ++Idx)
-      if (Word & (1U << Idx))
-        Present.set((I * 32) + Idx);
-  }
-
-  // This appears to be a hash table which uses bitfields to determine whether
-  // or not a bucket is 'deleted'.
-  uint32_t NumDeletedWords;
-  if (auto EC = Stream.readInteger(NumDeletedWords))
-    return joinErrors(
-        std::move(EC),
-        make_error<RawError>(raw_error_code::corrupt_file,
-                             "Expected name map num deleted words"));
+  HashTable OffsetIndexMap;
+  if (auto EC = OffsetIndexMap.load(Stream))
+    return EC;
  
-  if (NumDeletedWords > MaxNumberOfWords)
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Number of deleted words is too large");
-
-  SparseBitVector<> Deleted;
-  for (uint32_t I = 0; I != NumDeletedWords; ++I) {
-    uint32_t Word;
-    if (auto EC = Stream.readInteger(Word))
-      return joinErrors(std::move(EC),
-                        make_error<RawError>(raw_error_code::corrupt_file,
-                                             "Expected name map word"));
-    for (unsigned Idx = 0; Idx < 32; ++Idx)
-      if (Word & (1U << Idx))
-        Deleted.set((I * 32) + Idx);
-  }
-
-  for (unsigned I : Present) {
-    // For all present entries, dump out their mapping.
-    (void)I;
-
-    // This appears to be an offset relative to the start of the strings.
-    // It tells us where the null-terminated string begins.
-    uint32_t NameOffset;
-    if (auto EC = Stream.readInteger(NameOffset))
-      return joinErrors(std::move(EC),
-                        make_error<RawError>(raw_error_code::corrupt_file,
-                                             "Expected name map name offset"));
-
-    // This appears to be a stream number into the stream directory.
-    uint32_t NameIndex;
-    if (auto EC = Stream.readInteger(NameIndex))
-      return joinErrors(std::move(EC),
-                        make_error<RawError>(raw_error_code::corrupt_file,
-                                             "Expected name map name index"));
+  uint32_t NameOffset;
+  uint32_t NameIndex;
+  for (const auto &Entry : OffsetIndexMap) {
+    std::tie(NameOffset, NameIndex) = Entry;
  
      // Compute the offset of the start of the string relative to the stream.
-    uint32_t StringOffset = StringsOffset + NameOffset;
-    uint32_t OldOffset = Stream.getOffset();
+    msf::StreamReader NameReader(StringsBuffer);
+    NameReader.setOffset(NameOffset);
      // Pump out our c-string from the stream.
      StringRef Str;
-    Stream.setOffset(StringOffset);
-    if (auto EC = Stream.readZeroString(Str))
+    if (auto EC = NameReader.readZeroString(Str))
        return joinErrors(std::move(EC),
                          make_error<RawError>(raw_error_code::corrupt_file,
                                               "Expected name map name"));
  
-    Stream.setOffset(OldOffset);
      // Add this to a string-map from name to stream number.
      Mapping.insert({Str, NameIndex});
    }
diff --git a/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp b/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp

index f570d5931b0f116f92b5de036eba77a4e1836bfc..aead331d99d6df20012df11d02eb7a65b7b54019 100644 (file)
--- a/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp
+++ b/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp
@@ -22,87 +22,39 @@ using namespace llvm::pdb;
  NameMapBuilder::NameMapBuilder() = default;
  
  void NameMapBuilder::addMapping(StringRef Name, uint32_t Mapping) {
-  StringDataBytes += Name.size() + 1;
-  Map.insert({Name, Mapping});
-}
-
-Expected<std::unique_ptr<NameMap>> NameMapBuilder::build() {
-  auto Result = llvm::make_unique<NameMap>();
-  Result->Mapping = Map;
-  return std::move(Result);
+  Strings.push_back(Name);
+  Map.set(Offset, Mapping);
+  Offset += Name.size() + 1;
  }
  
  uint32_t NameMapBuilder::calculateSerializedLength() const {
    uint32_t TotalLength = 0;
  
-  TotalLength += sizeof(support::ulittle32_t); // StringDataBytes value
-  TotalLength += StringDataBytes;              // actual string data
-
-  TotalLength += sizeof(support::ulittle32_t); // Hash Size
-  TotalLength += sizeof(support::ulittle32_t); // Max Number of Strings
-  TotalLength += sizeof(support::ulittle32_t); // Num Present Words
-  // One bitmask word for each present entry
-  TotalLength += Map.size() * sizeof(support::ulittle32_t);
-  TotalLength += sizeof(support::ulittle32_t); // Num Deleted Words
-
-  // For each present word, which we are treating as equivalent to the number of
-  // entries in the table, we have a pair of integers.  An offset into the
-  // string data, and a corresponding stream number.
-  TotalLength += Map.size() * 2 * sizeof(support::ulittle32_t);
+  // Number of bytes of string data.
+  TotalLength += sizeof(support::ulittle32_t);
+  // Followed by that many actual bytes of string data.
+  TotalLength += Offset;
+  // Followed by the mapping from Name to Index.
+  TotalLength += Map.calculateSerializedLength();
  
    return TotalLength;
  }
  
  Error NameMapBuilder::commit(msf::StreamWriter &Writer) const {
-  // The first field is the number of bytes of string data.  So add
-  // up the length of all strings plus a null terminator for each
-  // one.
-  uint32_t NumBytes = 0;
-  for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
-    NumBytes += B->getKeyLength() + 1;
-  }
-
-  if (auto EC = Writer.writeInteger(NumBytes)) // Number of bytes of string data
-    return EC;
-  // Now all of the string data itself.
-  for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
-    if (auto EC = Writer.writeZeroString(B->getKey()))
-      return EC;
-  }
-
-  if (auto EC = Writer.writeInteger(Map.size())) // Hash Size
+  // The first field is the number of bytes of string data.  We've already been
+  // keeping a running total of this in `Offset`.
+  if (auto EC = Writer.writeInteger(Offset)) // Number of bytes of string data
      return EC;
  
-  if (auto EC = Writer.writeInteger(Map.size())) // Max Number of Strings
-    return EC;
-
-  if (auto EC = Writer.writeInteger(Map.size())) // Num Present Words
-    return EC;
-
-  // For each entry in the mapping, write a bit mask which represents a bucket
-  // to store it in.  We don't use this, so the value we write isn't important
-  // to us, it just has to be there.
-  for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
-    if (auto EC = Writer.writeInteger(1U))
+  // Now all of the string data itself.
+  for (auto S : Strings) {
+    if (auto EC = Writer.writeZeroString(S))
        return EC;
    }
  
-  if (auto EC = Writer.writeInteger(0U)) // Num Deleted Words
+  // And finally the Linear Map.
+  if (auto EC = Map.commit(Writer))
      return EC;
  
-  // Mappings of each word.
-  uint32_t OffsetSoFar = 0;
-  for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
-    // This is a list of key value pairs where the key is the offset into the
-    // strings buffer, and the value is a stream number.  Write each pair.
-    if (auto EC = Writer.writeInteger(OffsetSoFar))
-      return EC;
-
-    if (auto EC = Writer.writeInteger(B->second))
-      return EC;
-
-    OffsetSoFar += B->getKeyLength() + 1;
-  }
-
    return Error::success();
  }
diff --git a/unittests/DebugInfo/PDB/CMakeLists.txt b/unittests/DebugInfo/PDB/CMakeLists.txt

index 16a69edd81c35db666b122295746abf96ab05b99..06695e3a82b1ce53e5dc342a104c65d6ead80a0d 100644 (file)
--- a/unittests/DebugInfo/PDB/CMakeLists.txt
+++ b/unittests/DebugInfo/PDB/CMakeLists.txt
@@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
    )
  
  set(DebugInfoPDBSources
+  HashTableTest.cpp
    MappedBlockStreamTest.cpp
    NameHashTableBuilderTest.cpp
    MSFBuilderTest.cpp
diff --git a/unittests/DebugInfo/PDB/HashTableTest.cpp b/unittests/DebugInfo/PDB/HashTableTest.cpp

new file mode 100644 (file)

index 0000000..70d712f
--- /dev/null
+++ b/unittests/DebugInfo/PDB/HashTableTest.cpp
@@ -0,0 +1,167 @@
+//===- llvm/unittest/DebugInfo/PDB/HashTableTest.cpp ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ErrorChecking.h"
+#include "gtest/gtest.h"
+
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
+#include "llvm/DebugInfo/PDB/Raw/HashTable.h"
+
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::pdb;
+
+namespace {
+class HashTableInternals : public HashTable {
+public:
+  using HashTable::Buckets;
+  using HashTable::Present;
+  using HashTable::Deleted;
+};
+}
+
+TEST(HashTableTest, TestSimple) {
+  HashTable Table;
+  EXPECT_EQ(0u, Table.size());
+  EXPECT_GT(Table.capacity(), 0u);
+
+  Table.set(3, 7);
+  EXPECT_EQ(1u, Table.size());
+  ASSERT_NE(Table.end(), Table.find(3));
+  EXPECT_EQ(7u, Table.get(3));
+}
+
+TEST(HashTableTest, TestCollision) {
+  HashTable Table;
+  EXPECT_EQ(0u, Table.size());
+  EXPECT_GT(Table.capacity(), 0u);
+
+  // We use knowledge of the hash table's implementation details to make sure
+  // to add another value that is the equivalent to the first value modulo the
+  // hash table's capacity.
+  uint32_t N1 = Table.capacity() + 1;
+  uint32_t N2 = 2 * N1;
+
+  Table.set(N1, 7);
+  Table.set(N2, 12);
+  EXPECT_EQ(2u, Table.size());
+  ASSERT_NE(Table.end(), Table.find(N1));
+  ASSERT_NE(Table.end(), Table.find(N2));
+
+  EXPECT_EQ(7u, Table.get(N1));
+  EXPECT_EQ(12u, Table.get(N2));
+}
+
+TEST(HashTableTest, TestRemove) {
+  HashTable Table;
+  EXPECT_EQ(0u, Table.size());
+  EXPECT_GT(Table.capacity(), 0u);
+
+  Table.set(1, 2);
+  Table.set(3, 4);
+  EXPECT_EQ(2u, Table.size());
+  ASSERT_NE(Table.end(), Table.find(1));
+  ASSERT_NE(Table.end(), Table.find(3));
+
+  EXPECT_EQ(2u, Table.get(1));
+  EXPECT_EQ(4u, Table.get(3));
+
+  Table.remove(1u);
+  EXPECT_EQ(1u, Table.size());
+  EXPECT_EQ(Table.end(), Table.find(1));
+  ASSERT_NE(Table.end(), Table.find(3));
+  EXPECT_EQ(4u, Table.get(3));
+}
+
+TEST(HashTableTest, TestCollisionAfterMultipleProbes) {
+  HashTable Table;
+  EXPECT_EQ(0u, Table.size());
+  EXPECT_GT(Table.capacity(), 0u);
+
+  // Probing looks for the first available slot.  A slot may already be filled
+  // as a result of an item with a *different* hash value already being there.
+  // Test that when this happens, the probe still finds the value.
+  uint32_t N1 = Table.capacity() + 1;
+  uint32_t N2 = N1 + 1;
+  uint32_t N3 = 2 * N1;
+
+  Table.set(N1, 7);
+  Table.set(N2, 11);
+  Table.set(N3, 13);
+  EXPECT_EQ(3u, Table.size());
+  ASSERT_NE(Table.end(), Table.find(N1));
+  ASSERT_NE(Table.end(), Table.find(N2));
+  ASSERT_NE(Table.end(), Table.find(N3));
+
+  EXPECT_EQ(7u, Table.get(N1));
+  EXPECT_EQ(11u, Table.get(N2));
+  EXPECT_EQ(13u, Table.get(N3));
+
+  // Remove the one that had been filled in the middle, then insert another one
+  // with a collision.  It should fill the newly emptied slot.
+  Table.remove(N2);
+  uint32_t N4 = N1 * 3;
+  Table.set(N4, 17);
+  EXPECT_EQ(3u, Table.size());
+  ASSERT_NE(Table.end(), Table.find(N1));
+  ASSERT_NE(Table.end(), Table.find(N3));
+  ASSERT_NE(Table.end(), Table.find(N4));
+
+  EXPECT_EQ(7u, Table.get(N1));
+  EXPECT_EQ(13u, Table.get(N3));
+  EXPECT_EQ(17u, Table.get(N4));
+}
+
+TEST(HashTableTest, Grow) {
+  // So that we are independent of the load factor, `capacity` items, which is
+  // guaranteed to trigger a grow.  Then verify that the size is the same, the
+  // capacity is larger, and all the original items are still in the table.
+
+  HashTable Table;
+  uint32_t OldCapacity = Table.capacity();
+  for (uint32_t I = 0; I < OldCapacity; ++I) {
+    Table.set(OldCapacity + I * 2 + 1, I * 2 + 3);
+  }
+  EXPECT_EQ(OldCapacity, Table.size());
+  EXPECT_GT(Table.capacity(), OldCapacity);
+  for (uint32_t I = 0; I < OldCapacity; ++I) {
+    ASSERT_NE(Table.end(), Table.find(OldCapacity + I * 2 + 1));
+    EXPECT_EQ(I * 2 + 3, Table.get(OldCapacity + I * 2 + 1));
+  }
+}
+
+TEST(HashTableTest, Serialization) {
+  HashTableInternals Table;
+  uint32_t Cap = Table.capacity();
+  for (uint32_t I = 0; I < Cap; ++I) {
+    Table.set(Cap + I * 2 + 1, I * 2 + 3);
+  }
+
+  std::vector<uint8_t> Buffer(Table.calculateSerializedLength());
+  msf::MutableByteStream Stream(Buffer);
+  msf::StreamWriter Writer(Stream);
+  EXPECT_NO_ERROR(Table.commit(Writer));
+  // We should have written precisely the number of bytes we calculated earlier.
+  EXPECT_EQ(Buffer.size(), Writer.getOffset());
+
+  HashTableInternals Table2;
+  msf::StreamReader Reader(Stream);
+  EXPECT_NO_ERROR(Table2.load(Reader));
+  // We should have read precisely the number of bytes we calculated earlier.
+  EXPECT_EQ(Buffer.size(), Reader.getOffset());
+
+  EXPECT_EQ(Table.size(), Table2.size());
+  EXPECT_EQ(Table.capacity(), Table2.capacity());
+  EXPECT_EQ(Table.Buckets, Table2.Buckets);
+  EXPECT_EQ(Table.Present, Table2.Present);
+  EXPECT_EQ(Table.Deleted, Table2.Deleted);
+}
author	Zachary Turner <zturner@google.com>
	Thu, 19 Jan 2017 23:31:24 +0000 (23:31 +0000)
committer	Zachary Turner <zturner@google.com>
	Thu, 19 Jan 2017 23:31:24 +0000 (23:31 +0000)
include/llvm/DebugInfo/PDB/Raw/HashTable.h	[new file with mode: 0644]	patch \| blob
include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h		patch \| blob \| history
lib/DebugInfo/PDB/CMakeLists.txt		patch \| blob \| history
lib/DebugInfo/PDB/Raw/HashTable.cpp	[new file with mode: 0644]	patch \| blob
lib/DebugInfo/PDB/Raw/NameMap.cpp		patch \| blob \| history
lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp		patch \| blob \| history
unittests/DebugInfo/PDB/CMakeLists.txt		patch \| blob \| history
unittests/DebugInfo/PDB/HashTableTest.cpp	[new file with mode: 0644]	patch \| blob