From ae734ae0eada22cb4fcef3d18035a9725f5532da Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Thu, 31 Aug 2017 20:43:22 +0000 Subject: [PATCH] [llvm-pdbutil] Print detailed S_UDT stats. This adds a new command line option, -udt-stats, which breaks down the stats of S_UDT records. These are one of the biggest contributors to the size of /DEBUG:FASTLINK PDBs, so they need some additional tools to be able to analyze their usage. This option will dig into each S_UDT record and determine what kind of record it points to, and then break down the statistics by the target type. The goal here is to identify how our object files differ from MSVC object files in S_UDT records, so that we can output fewer of them and reach size parity. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312276 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/DebugInfo/PDB/Native/SymbolStream.h | 2 + lib/DebugInfo/PDB/Native/SymbolStream.cpp | 6 +- test/DebugInfo/PDB/module-stats.test | 4 +- test/DebugInfo/PDB/udt-stats.test | 15 ++ tools/llvm-pdbutil/DumpOutputStyle.cpp | 161 +++++++++++++++++- tools/llvm-pdbutil/DumpOutputStyle.h | 3 +- tools/llvm-pdbutil/FormatUtil.cpp | 12 ++ tools/llvm-pdbutil/FormatUtil.h | 1 + tools/llvm-pdbutil/MinimalTypeDumper.cpp | 18 +- tools/llvm-pdbutil/llvm-pdbutil.cpp | 32 ++-- tools/llvm-pdbutil/llvm-pdbutil.h | 3 +- 11 files changed, 221 insertions(+), 36 deletions(-) create mode 100644 test/DebugInfo/PDB/udt-stats.test diff --git a/include/llvm/DebugInfo/PDB/Native/SymbolStream.h b/include/llvm/DebugInfo/PDB/Native/SymbolStream.h index 17695f58784..ae9f7d657b7 100644 --- a/include/llvm/DebugInfo/PDB/Native/SymbolStream.h +++ b/include/llvm/DebugInfo/PDB/Native/SymbolStream.h @@ -31,6 +31,8 @@ public: return SymbolRecords; } + codeview::CVSymbol readRecord(uint32_t Offset) const; + iterator_range getSymbols(bool *HadError) const; diff --git a/lib/DebugInfo/PDB/Native/SymbolStream.cpp b/lib/DebugInfo/PDB/Native/SymbolStream.cpp index 9e9ebd11495..5da1cd54192 100644 --- a/lib/DebugInfo/PDB/Native/SymbolStream.cpp +++ b/lib/DebugInfo/PDB/Native/SymbolStream.cpp @@ -10,7 +10,7 @@ #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" @@ -43,3 +43,7 @@ SymbolStream::getSymbols(bool *HadError) const { } Error SymbolStream::commit() { return Error::success(); } + +codeview::CVSymbol SymbolStream::readRecord(uint32_t Offset) const { + return *SymbolRecords.at(Offset); +} diff --git a/test/DebugInfo/PDB/module-stats.test b/test/DebugInfo/PDB/module-stats.test index 81787090829..c2c58339ae0 100644 --- a/test/DebugInfo/PDB/module-stats.test +++ b/test/DebugInfo/PDB/module-stats.test @@ -1,5 +1,5 @@ -; RUN: llvm-pdbutil dump -mod-stats %p/Inputs/empty.pdb | FileCheck --check-prefix=ALL %s -; RUN: llvm-pdbutil dump -mod-stats -modi=1 %p/Inputs/empty.pdb | FileCheck --check-prefix=ONE %s +; RUN: llvm-pdbutil dump -sym-stats %p/Inputs/empty.pdb | FileCheck --check-prefix=ALL %s +; RUN: llvm-pdbutil dump -sym-stats -modi=1 %p/Inputs/empty.pdb | FileCheck --check-prefix=ONE %s ALL: Mod 0000 | `d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj`: ALL-NEXT: Stream 12, 308 bytes diff --git a/test/DebugInfo/PDB/udt-stats.test b/test/DebugInfo/PDB/udt-stats.test new file mode 100644 index 00000000000..0ffda9c3008 --- /dev/null +++ b/test/DebugInfo/PDB/udt-stats.test @@ -0,0 +1,15 @@ +; RUN: llvm-pdbutil dump -udt-stats %p/Inputs/big-read.pdb | FileCheck %s + +CHECK: S_UDT Record Stats +CHECK-NEXT: ============================================================ +CHECK: Record Kind | Count Size +CHECK-NEXT: ----------------------------- +CHECK-NEXT: LF_ENUM | 3 188 +CHECK-NEXT: LF_POINTER | 39 468 +CHECK-NEXT: LF_UNION | 1 52 +CHECK-NEXT: | 43 0 +CHECK-NEXT: LF_PROCEDURE | 1 16 +CHECK-NEXT: LF_STRUCTURE | 27 1,788 +CHECK-NEXT: ----------------------------- +CHECK-NEXT: Total (S_UDT) | 114 2,604 +CHECK-NEXT: ----------------------------- diff --git a/tools/llvm-pdbutil/DumpOutputStyle.cpp b/tools/llvm-pdbutil/DumpOutputStyle.cpp index c573f606786..09424bd3df9 100644 --- a/tools/llvm-pdbutil/DumpOutputStyle.cpp +++ b/tools/llvm-pdbutil/DumpOutputStyle.cpp @@ -59,6 +59,7 @@ #include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" +#include #include using namespace llvm; @@ -82,8 +83,14 @@ Error DumpOutputStyle::dump() { P.NewLine(); } - if (opts::dump::DumpModuleStats.getNumOccurrences() > 0) { - if (auto EC = dumpModuleStats()) + if (opts::dump::DumpSymbolStats.getNumOccurrences() > 0) { + if (auto EC = dumpSymbolStats()) + return EC; + P.NewLine(); + } + + if (opts::dump::DumpUdtStats.getNumOccurrences() > 0) { + if (auto EC = dumpUdtStats()) return EC; P.NewLine(); } @@ -557,7 +564,7 @@ Error DumpOutputStyle::dumpModuleFiles() { return Error::success(); } -Error DumpOutputStyle::dumpModuleStats() { +Error DumpOutputStyle::dumpSymbolStats() { printHeader(P, "Module Stats"); ExitOnError Err("Unexpected error processing modules: "); @@ -607,6 +614,154 @@ Error DumpOutputStyle::dumpModuleStats() { return Error::success(); } +static bool isValidNamespaceIdentifier(StringRef S) { + if (S.empty()) + return false; + + if (std::isdigit(S[0])) + return false; + + return llvm::all_of(S, [](char C) { return std::isalnum(C); }); +} + +namespace { +constexpr uint32_t kNoneUdtKind = 0; +constexpr uint32_t kSimpleUdtKind = 1; +constexpr uint32_t kUnknownUdtKind = 2; +const StringRef NoneLabel(""); +const StringRef SimpleLabel(""); +const StringRef UnknownLabel(""); + +} // namespace + +static StringRef getUdtStatLabel(uint32_t Kind) { + if (Kind == kNoneUdtKind) + return NoneLabel; + + if (Kind == kSimpleUdtKind) + return SimpleLabel; + + if (Kind == kUnknownUdtKind) + return UnknownLabel; + + return formatTypeLeafKind(static_cast(Kind)); +} + +static uint32_t getLongestTypeLeafName(const StatCollection &Stats) { + size_t L = 0; + for (const auto &Stat : Stats.Individual) { + StringRef Label = getUdtStatLabel(Stat.first); + L = std::max(L, Label.size()); + } + return static_cast(L); +} + +Error DumpOutputStyle::dumpUdtStats() { + printHeader(P, "S_UDT Record Stats"); + + StatCollection UdtStats; + StatCollection UdtTargetStats; + if (!File.hasPDBGlobalsStream()) { + P.printLine("- Error: globals stream not present"); + return Error::success(); + } + + AutoIndent Indent(P, 4); + + auto &SymbolRecords = cantFail(File.getPDBSymbolStream()); + auto &Globals = cantFail(File.getPDBGlobalsStream()); + auto &TpiTypes = cantFail(initializeTypes(StreamTPI)); + + StringMap NamespacedStats; + + P.NewLine(); + + size_t LongestNamespace = 0; + for (uint32_t PubSymOff : Globals.getGlobalsTable()) { + CVSymbol Sym = SymbolRecords.readRecord(PubSymOff); + if (Sym.kind() != SymbolKind::S_UDT) + continue; + UdtStats.update(SymbolKind::S_UDT, Sym.length()); + + UDTSym UDT = cantFail(SymbolDeserializer::deserializeAs(Sym)); + + uint32_t Kind = 0; + uint32_t RecordSize = 0; + if (UDT.Type.isSimple() || + (UDT.Type.toArrayIndex() >= TpiTypes.capacity())) { + if (UDT.Type.isNoneType()) + Kind = kNoneUdtKind; + else if (UDT.Type.isSimple()) + Kind = kSimpleUdtKind; + else + Kind = kUnknownUdtKind; + } else { + CVType T = TpiTypes.getType(UDT.Type); + Kind = T.kind(); + RecordSize = T.length(); + } + + UdtTargetStats.update(Kind, RecordSize); + + size_t Pos = UDT.Name.find("::"); + if (Pos == StringRef::npos) + continue; + + StringRef Scope = UDT.Name.take_front(Pos); + if (Scope.empty() || !isValidNamespaceIdentifier(Scope)) + continue; + + LongestNamespace = std::max(LongestNamespace, Scope.size()); + NamespacedStats[Scope].update(RecordSize); + } + + LongestNamespace += StringRef(" namespace ''").size(); + uint32_t LongestTypeLeafKind = getLongestTypeLeafName(UdtTargetStats); + uint32_t FieldWidth = std::max(LongestNamespace, LongestTypeLeafKind); + + // Compute the max number of digits for count and size fields, including comma + // separators. + StringRef CountHeader("Count"); + StringRef SizeHeader("Size"); + uint32_t CD = NumDigits(UdtStats.Totals.Count); + CD += (CD - 1) / 3; + CD = std::max(CD, CountHeader.size()); + + uint32_t SD = NumDigits(UdtStats.Totals.Size); + SD += (SD - 1) / 3; + SD = std::max(SD, SizeHeader.size()); + + uint32_t TableWidth = FieldWidth + 3 + CD + 2 + SD + 1; + + P.formatLine("{0} | {1} {2}", + fmt_align("Record Kind", AlignStyle::Right, FieldWidth), + fmt_align(CountHeader, AlignStyle::Right, CD), + fmt_align(SizeHeader, AlignStyle::Right, SD)); + + P.formatLine("{0}", fmt_repeat('-', TableWidth)); + for (const auto &Stat : UdtTargetStats.Individual) { + StringRef Label = getUdtStatLabel(Stat.first); + P.formatLine("{0} | {1:N} {2:N}", + fmt_align(Label, AlignStyle::Right, FieldWidth), + fmt_align(Stat.second.Count, AlignStyle::Right, CD), + fmt_align(Stat.second.Size, AlignStyle::Right, SD)); + } + P.formatLine("{0}", fmt_repeat('-', TableWidth)); + P.formatLine("{0} | {1:N} {2:N}", + fmt_align("Total (S_UDT)", AlignStyle::Right, FieldWidth), + fmt_align(UdtStats.Totals.Count, AlignStyle::Right, CD), + fmt_align(UdtStats.Totals.Size, AlignStyle::Right, SD)); + P.formatLine("{0}", fmt_repeat('-', TableWidth)); + for (const auto &Stat : NamespacedStats) { + std::string Label = formatv("namespace '{0}'", Stat.getKey()); + P.formatLine("{0} | {1:N} {2:N}", + fmt_align(Label, AlignStyle::Right, FieldWidth), + fmt_align(Stat.second.Count, AlignStyle::Right, CD), + fmt_align(Stat.second.Size, AlignStyle::Right, SD)); + } + return Error::success(); +} + static void typesetLinesAndColumns(PDBFile &File, LinePrinter &P, uint32_t Start, const LineColumnEntry &E) { const uint32_t kMaxCharsPerLineNumber = 4; // 4 digit line number diff --git a/tools/llvm-pdbutil/DumpOutputStyle.h b/tools/llvm-pdbutil/DumpOutputStyle.h index 497c51fdbe4..7dd717c3788 100644 --- a/tools/llvm-pdbutil/DumpOutputStyle.h +++ b/tools/llvm-pdbutil/DumpOutputStyle.h @@ -66,7 +66,8 @@ private: Error dumpFileSummary(); Error dumpStreamSummary(); - Error dumpModuleStats(); + Error dumpSymbolStats(); + Error dumpUdtStats(); Error dumpStringTable(); Error dumpLines(); Error dumpInlineeLines(); diff --git a/tools/llvm-pdbutil/FormatUtil.cpp b/tools/llvm-pdbutil/FormatUtil.cpp index eca0751a7ac..f55d478127d 100644 --- a/tools/llvm-pdbutil/FormatUtil.cpp +++ b/tools/llvm-pdbutil/FormatUtil.cpp @@ -157,6 +157,18 @@ std::string llvm::pdb::formatSymbolKind(SymbolKind K) { return formatUnknownEnum(K); } +StringRef llvm::pdb::formatTypeLeafKind(TypeLeafKind K) { + switch (K) { +#define TYPE_RECORD(EnumName, value, name) \ + case EnumName: \ + return #EnumName; +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + default: + llvm_unreachable("Unknown type leaf kind!"); + } + return ""; +} + std::string llvm::pdb::formatSegmentOffset(uint16_t Segment, uint32_t Offset) { return formatv("{0:4}:{1:4}", Segment, Offset); } diff --git a/tools/llvm-pdbutil/FormatUtil.h b/tools/llvm-pdbutil/FormatUtil.h index 7804a1f0e23..9a003c9285c 100644 --- a/tools/llvm-pdbutil/FormatUtil.h +++ b/tools/llvm-pdbutil/FormatUtil.h @@ -68,6 +68,7 @@ std::string typesetStringList(uint32_t IndentLevel, std::string formatChunkKind(codeview::DebugSubsectionKind Kind, bool Friendly = true); std::string formatSymbolKind(codeview::SymbolKind K); +StringRef formatTypeLeafKind(codeview::TypeLeafKind K); /// Returns the number of digits in the given integer. inline int NumDigits(uint64_t N) { diff --git a/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/tools/llvm-pdbutil/MinimalTypeDumper.cpp index 0079b9e7eaa..fae89920e0b 100644 --- a/tools/llvm-pdbutil/MinimalTypeDumper.cpp +++ b/tools/llvm-pdbutil/MinimalTypeDumper.cpp @@ -26,18 +26,6 @@ using namespace llvm; using namespace llvm::codeview; using namespace llvm::pdb; -static StringRef getLeafTypeName(TypeLeafKind K) { - switch (K) { -#define TYPE_RECORD(EnumName, value, name) \ - case EnumName: \ - return #EnumName; -#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" - default: - llvm_unreachable("Unknown type leaf kind!"); - } - return ""; -} - static std::string formatClassOptions(uint32_t IndentLevel, ClassOptions Options) { std::vector Opts; @@ -212,7 +200,7 @@ Error MinimalTypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) { if (!Hashes) { P.formatLine("{0} | {1} [size = {2}]", fmt_align(Index, AlignStyle::Right, Width), - getLeafTypeName(Record.Type), Record.length()); + formatTypeLeafKind(Record.Type), Record.length()); } else { std::string H; if (Index.toArrayIndex() >= HashValues.size()) { @@ -231,7 +219,7 @@ Error MinimalTypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) { } P.formatLine("{0} | {1} [size = {2}, hash = {3}]", fmt_align(Index, AlignStyle::Right, Width), - getLeafTypeName(Record.Type), Record.length(), H); + formatTypeLeafKind(Record.Type), Record.length(), H); } P.Indent(Width + 3); return Error::success(); @@ -246,7 +234,7 @@ Error MinimalTypeDumpVisitor::visitTypeEnd(CVType &Record) { } Error MinimalTypeDumpVisitor::visitMemberBegin(CVMemberRecord &Record) { - P.formatLine("- {0}", getLeafTypeName(Record.Kind)); + P.formatLine("- {0}", formatTypeLeafKind(Record.Kind)); return Error::success(); } diff --git a/tools/llvm-pdbutil/llvm-pdbutil.cpp b/tools/llvm-pdbutil/llvm-pdbutil.cpp index 045eb71a209..3f77813be9e 100644 --- a/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -422,10 +422,15 @@ cl::opt DumpStreamBlocks( "stream-blocks", cl::desc("Add block information to the output of -streams"), cl::cat(MsfOptions), cl::sub(DumpSubcommand)); -cl::opt - DumpModuleStats("mod-stats", - cl::desc("Dump a detailed size breakdown for each module"), - cl::cat(MsfOptions), cl::sub(DumpSubcommand)); +cl::opt DumpSymbolStats( + "sym-stats", + cl::desc("Dump a detailed breakdown of symbol usage/size for each module"), + cl::cat(MsfOptions), cl::sub(DumpSubcommand)); + +cl::opt DumpUdtStats( + "udt-stats", + cl::desc("Dump a detailed breakdown of S_UDT record usage / stats"), + cl::cat(MsfOptions), cl::sub(DumpSubcommand)); // TYPE OPTIONS cl::opt DumpTypes("types", @@ -1095,27 +1100,28 @@ int main(int argc_, const char *argv_[]) { if (opts::DumpSubcommand) { if (opts::dump::RawAll) { - opts::dump::DumpLines = true; + opts::dump::DumpGlobals = true; opts::dump::DumpInlineeLines = true; - opts::dump::DumpXme = true; - opts::dump::DumpXmi = true; opts::dump::DumpIds = true; - opts::dump::DumpGlobals = true; + opts::dump::DumpIdExtras = true; + opts::dump::DumpLines = true; + opts::dump::DumpModules = true; + opts::dump::DumpModuleFiles = true; opts::dump::DumpPublics = true; opts::dump::DumpSectionContribs = true; + opts::dump::DumpSectionHeaders = true; opts::dump::DumpSectionMap = true; opts::dump::DumpStreams = true; opts::dump::DumpStreamBlocks = true; opts::dump::DumpStringTable = true; - opts::dump::DumpSectionHeaders = true; opts::dump::DumpSummary = true; opts::dump::DumpSymbols = true; - opts::dump::DumpIds = true; - opts::dump::DumpIdExtras = true; + opts::dump::DumpSymbolStats = true; opts::dump::DumpTypes = true; opts::dump::DumpTypeExtras = true; - opts::dump::DumpModules = true; - opts::dump::DumpModuleFiles = true; + opts::dump::DumpUdtStats = true; + opts::dump::DumpXme = true; + opts::dump::DumpXmi = true; } } if (opts::PdbToYamlSubcommand) { diff --git a/tools/llvm-pdbutil/llvm-pdbutil.h b/tools/llvm-pdbutil/llvm-pdbutil.h index 901d2a8cd61..98619da9513 100644 --- a/tools/llvm-pdbutil/llvm-pdbutil.h +++ b/tools/llvm-pdbutil/llvm-pdbutil.h @@ -127,7 +127,8 @@ namespace dump { extern llvm::cl::opt DumpSummary; extern llvm::cl::opt DumpFpm; extern llvm::cl::opt DumpStreams; -extern llvm::cl::opt DumpModuleStats; +extern llvm::cl::opt DumpSymbolStats; +extern llvm::cl::opt DumpUdtStats; extern llvm::cl::opt DumpStreamBlocks; extern llvm::cl::opt DumpLines; -- 2.50.1