From: Rafael Espindola Date: Tue, 4 Oct 2016 22:43:25 +0000 (+0000) Subject: Misc improvements to StringTableBuilder. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2638e45e83af8ae75fcb5ad2ac89feb8a0c1ff99;p=llvm Misc improvements to StringTableBuilder. This patch adds write methods to StringTableBuilder so that it is easier to change the underlying implementation. Using the write methods, avoid creating a temporary buffer when using mmaped output. It also uses a more compact key in the DenseMap. Overall this produces a slightly faster lld: firefox master 6.853419709 patch 6.841968912 1.00167361138x faster chromium master 4.297280174 patch 4.298712163 1.00033323147x slower chromium fast master 1.802335952 patch 1.806872459 1.00251701521x slower the gold plugin master 0.3247149 patch 0.321971644 1.00852017888x faster clang master 0.551279945 patch 0.543733194 1.01387951128x faster llvm-as master 0.032743458 patch 0.032143478 1.01866568391x faster the gold plugin fsds master 0.350814247 patch 0.348571741 1.00643341309x faster clang fsds master 0.6281672 patch 0.621130222 1.01132931187x faster llvm-as fsds master 0.030168899 patch 0.029797155 1.01247582194x faster scylla master 3.104222518 patch 3.059590248 1.01458766252x faster git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@283266 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/MC/StringTableBuilder.h b/include/llvm/MC/StringTableBuilder.h index f2b8ecd2d99..90d1bc36f16 100644 --- a/include/llvm/MC/StringTableBuilder.h +++ b/include/llvm/MC/StringTableBuilder.h @@ -15,6 +15,24 @@ #include namespace llvm { +class raw_ostream; + +class CachedHashString { + const char *P; + uint32_t Size; + uint32_t Hash; + +public: + CachedHashString(StringRef S) + : CachedHashString(S, DenseMapInfo::getHashValue(S)) {} + CachedHashString(StringRef S, uint32_t Hash) + : P(S.data()), Size(S.size()), Hash(Hash) { + assert(S.size() <= std::numeric_limits::max()); + } + + StringRef val() const { return StringRef(P, Size); } + uint32_t hash() const { return Hash; } +}; /// \brief Utility for building string tables with deduplicated suffixes. class StringTableBuilder { @@ -22,16 +40,18 @@ public: enum Kind { ELF, WinCOFF, MachO, RAW }; private: - SmallString<256> StringTable; - DenseMap, size_t> StringIndexMap; + DenseMap StringIndexMap; size_t Size = 0; Kind K; unsigned Alignment; + bool Finalized = false; void finalizeStringTable(bool Optimize); + void initSize(); public: StringTableBuilder(Kind K, unsigned Alignment = 1); + ~StringTableBuilder(); /// \brief Add a string to the builder. Returns the position of S in the /// table. The position will be changed if finalize is used. @@ -46,28 +66,18 @@ public: /// returned by add will still be valid. void finalizeInOrder(); - /// \brief Retrieve the string table data. Can only be used after the table - /// is finalized. - StringRef data() const { - assert(isFinalized()); - return StringTable; - } - /// \brief Get the offest of a string in the string table. Can only be used /// after the table is finalized. size_t getOffset(StringRef S) const; - const DenseMap, size_t> &getMap() const { - return StringIndexMap; - } - size_t getSize() const { return Size; } void clear(); + void write(raw_ostream &OS) const; + void write(uint8_t *Buf) const; + private: - bool isFinalized() const { - return !StringTable.empty(); - } + bool isFinalized() const { return Finalized; } }; } // end llvm namespace diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index dc21b48ca6f..b115aabc8af 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -1127,7 +1127,7 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm, const MCSectionELF *ELFObjectWriter::createStringTable(MCContext &Ctx) { const MCSectionELF *StrtabSection = SectionTable[StringTableIndex - 1]; - getStream() << StrTabBuilder.data(); + StrTabBuilder.write(getStream()); return StrtabSection; } diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index e39271949d9..ce8e216e916 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -882,7 +882,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, - StringTableOffset, StringTable.data().size()); + StringTableOffset, StringTable.getSize()); writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, FirstExternalSymbol, NumExternalSymbols, @@ -977,7 +977,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, writeNlist(Entry, Layout); // Write the string table. - getStream() << StringTable.data(); + StringTable.write(getStream()); } } diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp index 7f69871fd45..3fde526e588 100644 --- a/lib/MC/StringTableBuilder.cpp +++ b/lib/MC/StringTableBuilder.cpp @@ -11,13 +11,37 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; -StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) - : K(K), Alignment(Alignment) { +namespace llvm { +template <> struct DenseMapInfo { + static CachedHashString getEmptyKey() { + StringRef S = DenseMapInfo::getEmptyKey(); + return {S, 0}; + } + static CachedHashString getTombstoneKey() { + StringRef S = DenseMapInfo::getTombstoneKey(); + return {S, 0}; + } + static unsigned getHashValue(CachedHashString Val) { + assert(!isEqual(Val, getEmptyKey()) && "Cannot hash the empty key!"); + assert(!isEqual(Val, getTombstoneKey()) && + "Cannot hash the tombstone key!"); + return Val.hash(); + } + static bool isEqual(CachedHashString A, CachedHashString B) { + return DenseMapInfo::isEqual(A.val(), B.val()); + } +}; +} + +StringTableBuilder::~StringTableBuilder() {} + +void StringTableBuilder::initSize() { // Account for leading bytes in table so that offsets returned from add are // correct. switch (K) { @@ -26,19 +50,45 @@ StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) break; case MachO: case ELF: + // Start the table with a NUL byte. Size = 1; break; case WinCOFF: + // Make room to write the table size later. Size = 4; break; } } -typedef std::pair, size_t> StringPair; +StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) + : K(K), Alignment(Alignment) { + initSize(); +} + +void StringTableBuilder::write(raw_ostream &OS) const { + assert(isFinalized()); + SmallString<0> Data; + Data.resize(getSize()); + write((uint8_t *)&Data[0]); + OS << Data; +} + +typedef std::pair StringPair; + +void StringTableBuilder::write(uint8_t *Buf) const { + assert(isFinalized()); + for (const StringPair &P : StringIndexMap) { + StringRef Data = P.first.val(); + memcpy(Buf + P.second, Data.data(), Data.size()); + } + if (K != WinCOFF) + return; + support::endian::write32le(Buf, Size); +} // Returns the character at Pos from end of a string. static int charTailAt(StringPair *P, size_t Pos) { - StringRef S = P->first.Val; + StringRef S = P->first.val(); if (Pos >= S.size()) return -1; return (unsigned char)S[S.size() - Pos - 1]; @@ -86,90 +136,49 @@ void StringTableBuilder::finalizeInOrder() { } void StringTableBuilder::finalizeStringTable(bool Optimize) { - std::vector Strings; - Strings.reserve(StringIndexMap.size()); - for (StringPair &P : StringIndexMap) - Strings.push_back(&P); - - if (!Strings.empty()) { - // If we're optimizing, sort by name. If not, sort by previously assigned - // offset. - if (Optimize) { - multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0); - } else { - std::sort(Strings.begin(), Strings.end(), - [](const StringPair *LHS, const StringPair *RHS) { - return LHS->second < RHS->second; - }); - } - } + Finalized = true; - switch (K) { - case RAW: - break; - case ELF: - case MachO: - // Start the table with a NUL byte. - StringTable += '\x00'; - break; - case WinCOFF: - // Make room to write the table size later. - StringTable.append(4, '\x00'); - break; - } + if (Optimize) { + std::vector Strings; + Strings.reserve(StringIndexMap.size()); + for (StringPair &P : StringIndexMap) + Strings.push_back(&P); - StringRef Previous; - for (StringPair *P : Strings) { - StringRef S = P->first.Val; - if (K == WinCOFF) - assert(S.size() > COFF::NameSize && "Short string in COFF string table!"); - - if (Optimize && Previous.endswith(S)) { - size_t Pos = StringTable.size() - S.size() - (K != RAW); - if (!(Pos & (Alignment - 1))) { - P->second = Pos; - continue; - } + if (!Strings.empty()) { + // If we're optimizing, sort by name. If not, sort by previously assigned + // offset. + multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0); } - if (Optimize) { - size_t Start = alignTo(StringTable.size(), Alignment); - P->second = Start; - StringTable.append(Start - StringTable.size(), '\0'); - } else { - assert(P->second == StringTable.size() && - "different strtab offset after finalization"); - } + initSize(); + + StringRef Previous; + for (StringPair *P : Strings) { + StringRef S = P->first.val(); + if (Previous.endswith(S)) { + size_t Pos = Size - S.size() - (K != RAW); + if (!(Pos & (Alignment - 1))) { + P->second = Pos; + continue; + } + } - StringTable += S; - if (K != RAW) - StringTable += '\x00'; - Previous = S; - } + Size = alignTo(Size, Alignment); + P->second = Size; - switch (K) { - case RAW: - case ELF: - break; - case MachO: - // Pad to multiple of 4. - while (StringTable.size() % 4) - StringTable += '\x00'; - break; - case WinCOFF: - // Write the table size in the first word. - assert(StringTable.size() <= std::numeric_limits::max()); - uint32_t Size = static_cast(StringTable.size()); - support::endian::write( - StringTable.data(), Size); - break; + Size += S.size(); + if (K != RAW) + ++Size; + Previous = S; + } } - Size = StringTable.size(); + if (K == MachO) + Size = alignTo(Size, 4); // Pad to multiple of 4. } void StringTableBuilder::clear() { - StringTable.clear(); + Finalized = false; StringIndexMap.clear(); } @@ -181,6 +190,9 @@ size_t StringTableBuilder::getOffset(StringRef S) const { } size_t StringTableBuilder::add(StringRef S) { + if (K == WinCOFF) + assert(S.size() > COFF::NameSize && "Short string in COFF string table!"); + assert(!isFinalized()); size_t Start = alignTo(Size, Alignment); auto P = StringIndexMap.insert(std::make_pair(S, Start)); diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index f316a5af387..819d446d216 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -1082,7 +1082,7 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, if (Symbol->getIndex() != -1) WriteSymbol(*Symbol); - getStream().write(Strings.data().data(), Strings.data().size()); + Strings.write(getStream()); } MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_) diff --git a/tools/yaml2obj/yaml2elf.cpp b/tools/yaml2obj/yaml2elf.cpp index c98093431a7..8fd2bfd1672 100644 --- a/tools/yaml2obj/yaml2elf.cpp +++ b/tools/yaml2obj/yaml2elf.cpp @@ -305,9 +305,8 @@ void ELFState::initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name, zero(SHeader); SHeader.sh_name = DotShStrtab.getOffset(Name); SHeader.sh_type = ELF::SHT_STRTAB; - CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign) - << STB.data(); - SHeader.sh_size = STB.data().size(); + STB.write(CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign)); + SHeader.sh_size = STB.getSize(); SHeader.sh_addralign = 1; } diff --git a/unittests/MC/StringTableBuilderTest.cpp b/unittests/MC/StringTableBuilderTest.cpp index f78d3588fff..517ada5ed96 100644 --- a/unittests/MC/StringTableBuilderTest.cpp +++ b/unittests/MC/StringTableBuilderTest.cpp @@ -32,7 +32,11 @@ TEST(StringTableBuilderTest, BasicELF) { Expected += "foo"; Expected += '\x00'; - EXPECT_EQ(Expected, B.data()); + SmallString<64> Data; + raw_svector_ostream OS(Data); + B.write(OS); + + EXPECT_EQ(Expected, Data); EXPECT_EQ(1U, B.getOffset("foobar")); EXPECT_EQ(4U, B.getOffset("bar")); EXPECT_EQ(8U, B.getOffset("foo")); @@ -50,7 +54,7 @@ TEST(StringTableBuilderTest, BasicWinCOFF) { // size_field + "pygmy hippopotamus\0" + "river horse\0" uint32_t ExpectedSize = 4 + 19 + 12; - EXPECT_EQ(ExpectedSize, B.data().size()); + EXPECT_EQ(ExpectedSize, B.getSize()); std::string Expected; @@ -62,7 +66,11 @@ TEST(StringTableBuilderTest, BasicWinCOFF) { Expected += "river horse"; Expected += '\x00'; - EXPECT_EQ(Expected, B.data()); + SmallString<64> Data; + raw_svector_ostream OS(Data); + B.write(OS); + + EXPECT_EQ(Expected, Data); EXPECT_EQ(4U, B.getOffset("pygmy hippopotamus")); EXPECT_EQ(10U, B.getOffset("hippopotamus")); EXPECT_EQ(23U, B.getOffset("river horse")); @@ -85,7 +93,11 @@ TEST(StringTableBuilderTest, ELFInOrder) { Expected += "foobar"; Expected += '\x00'; - EXPECT_EQ(Expected, B.data()); + SmallString<64> Data; + raw_svector_ostream OS(Data); + B.write(OS); + + EXPECT_EQ(Expected, Data); EXPECT_EQ(1U, B.getOffset("foo")); EXPECT_EQ(5U, B.getOffset("bar")); EXPECT_EQ(9U, B.getOffset("foobar"));