]> granicus.if.org Git - llvm/commitdiff
[Remarks] Add string deduplication using a string table
authorFrancis Visoiu Mistrih <francisvm@yahoo.com>
Wed, 24 Apr 2019 00:06:24 +0000 (00:06 +0000)
committerFrancis Visoiu Mistrih <francisvm@yahoo.com>
Wed, 24 Apr 2019 00:06:24 +0000 (00:06 +0000)
* Add support for uniquing strings in the remark streamer and emitting the string table in the remarks section.

* Add parsing support for the string table in the RemarkParser.

From this remark:

```
--- !Missed
Pass:     inline
Name:     NoDefinition
DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
            Line: 7, Column: 3 }
Function: printArgsNoRet
Args:
  - Callee:   printf
  - String:   ' will not be inlined into '
  - Caller:   printArgsNoRet
    DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
                Line: 6, Column: 0 }
  - String:   ' because its definition is unavailable'
...
```

to:

```
--- !Missed
Pass: 0
Name: 1
DebugLoc: { File: 3, Line: 7, Column: 3 }
Function: 2
Args:
  - Callee:   4
  - String:   5
  - Caller:   2
    DebugLoc: { File: 3, Line: 6, Column: 0 }
  - String:   6
...
```

And the string table in the .remarks/__remarks section containing:

```
inline\0NoDefinition\0printArgsNoRet\0
test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c\0printf\0
will not be inlined into \0 because its definition is unavailable\0
```

This is mostly supposed to be used for testing purposes, but it gives us
a 2x reduction in the remark size, and is an incremental change for the
updates to the remarks file format.

Differential Revision: https://reviews.llvm.org/D60227

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359050 91177308-0d34-0410-b5e6-96231b3b80d8

19 files changed:
docs/CodeGenerator.rst
include/llvm/IR/RemarkStreamer.h
include/llvm/Remarks/RemarkParser.h
include/llvm/Remarks/RemarkStringTable.h [new file with mode: 0644]
include/llvm/Support/YAMLTraits.h
lib/CodeGen/AsmPrinter/AsmPrinter.cpp
lib/CodeGen/AsmPrinter/LLVMBuild.txt
lib/IR/DiagnosticInfo.cpp
lib/IR/LLVMBuild.txt
lib/IR/RemarkStreamer.cpp
lib/Remarks/CMakeLists.txt
lib/Remarks/RemarkParser.cpp
lib/Remarks/RemarkStringTable.cpp [new file with mode: 0644]
lib/Remarks/YAMLRemarkParser.cpp
lib/Remarks/YAMLRemarkParser.h
test/CodeGen/X86/remarks-section.ll
unittests/Remarks/CMakeLists.txt
unittests/Remarks/RemarksStrTabParsingTest.cpp [new file with mode: 0644]
unittests/Remarks/YAMLRemarksParsingTest.cpp

index 094bd05adb60c2e41d732d61bfe6aa00439ed2ae..b0cee783a912bf8049b6bb5f6d1132b55de6b733 100644 (file)
@@ -1597,6 +1597,10 @@ A section containing metadata on remark diagnostics will be emitted when
 
 * a magic number: "REMARKS\0"
 * the version number: a little-endian uint64_t
+* the string table:
+  * the total size of the string table (the size itself excluded):
+    little-endian uint64_t
+  * a list of null-terminated strings
 * the absolute file path to the serialized remark diagnostics: a
   null-terminated string.
 
index 64de27e372600b62025eafce4ad8871cdfab2850..b4672c269da4ac6e148132642292d478c7937afc 100644 (file)
 #define LLVM_IR_REMARKSTREAMER_H
 
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Remarks/RemarkStringTable.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Regex.h"
 #include <string>
 #include <vector>
 
@@ -34,6 +35,11 @@ class RemarkStreamer {
   /// The YAML streamer.
   yaml::Output YAMLOutput;
 
+  /// The string table containing all the unique strings used in the output.
+  /// The table will be serialized in a section to be consumed after the
+  /// compilation.
+  remarks::StringTable StrTab;
+
 public:
   RemarkStreamer(StringRef Filename, raw_ostream& OS);
   /// Return the filename that the remark diagnostics are emitted to.
@@ -45,6 +51,9 @@ public:
   Error setFilter(StringRef Filter);
   /// Emit a diagnostic through the streamer.
   void emit(const DiagnosticInfoOptimizationBase &Diag);
+  /// The string table used during emission.
+  remarks::StringTable &getStringTable() { return StrTab; }
+  const remarks::StringTable &getStringTable() const { return StrTab; }
 };
 } // end namespace llvm
 
index fb8d4c6021a8982e4c637bcf7a1b327c14cbb87e..aa6f04a51ce4b42060b410336a2798c25ffca107 100644 (file)
@@ -13,6 +13,7 @@
 #ifndef LLVM_REMARKS_REMARK_PARSER_H
 #define LLVM_REMARKS_REMARK_PARSER_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Remarks/Remark.h"
 #include "llvm/Support/Error.h"
@@ -32,6 +33,11 @@ struct Parser {
   /// This constructor should be only used for parsing YAML remarks.
   Parser(StringRef Buffer);
 
+  /// Create a parser parsing \p Buffer to Remark objects, using \p StrTabBuf as
+  /// string table.
+  /// This constructor should be only used for parsing YAML remarks.
+  Parser(StringRef Buffer, StringRef StrTabBuf);
+
   // Needed because ParserImpl is an incomplete type.
   ~Parser();
 
@@ -40,6 +46,18 @@ struct Parser {
   Expected<const Remark *> getNext() const;
 };
 
+/// In-memory representation of the string table parsed from a buffer (e.g. the
+/// remarks section).
+struct ParsedStringTable {
+  /// The buffer mapped from the section contents.
+  StringRef Buffer;
+  /// Collection of offsets in the buffer for each string entry.
+  SmallVector<size_t, 8> Offsets;
+
+  Expected<StringRef> operator[](size_t Index);
+  ParsedStringTable(StringRef Buffer);
+};
+
 } // end namespace remarks
 } // end namespace llvm
 
diff --git a/include/llvm/Remarks/RemarkStringTable.h b/include/llvm/Remarks/RemarkStringTable.h
new file mode 100644 (file)
index 0000000..f9b4fdb
--- /dev/null
@@ -0,0 +1,59 @@
+//===-- RemarkStringTable.h - Serializing string table ----------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class is used to deduplicate and serialize a string table used for
+// generating remarks.
+//
+// For parsing a string table, use ParsedStringTable in RemarkParser.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_STRING_TABLE_H
+#define LLVM_REMARKS_REMARK_STRING_TABLE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace remarks {
+
+/// The string table used for serializing remarks.
+/// This table can be for example serialized in a section to be consumed after
+/// the compilation.
+struct StringTable {
+  /// Allocator holding all the memory used by the map.
+  BumpPtrAllocator Allocator;
+  /// The string table containing all the unique strings used in the output.
+  /// It maps a string to an unique ID.
+  StringMap<unsigned, BumpPtrAllocator &> StrTab;
+  /// Total size of the string table when serialized.
+  size_t SerializedSize = 0;
+
+  StringTable() : Allocator(), StrTab(Allocator) {}
+  /// Add a string to the table. It returns an unique ID of the string.
+  std::pair<unsigned, StringRef> add(StringRef Str);
+  /// Serialize the string table to a stream. It is serialized as a little
+  /// endian uint64 (the size of the table in bytes) followed by a sequence of
+  /// NULL-terminated strings, where the N-th string is the string with the ID N
+  /// in the StrTab map.
+  void serialize(raw_ostream &OS) const;
+  /// Serialize the string table to a vector. This allows users to do the actual
+  /// writing to file/memory/other.
+  /// The string with the ID == N should be the N-th element in the vector.
+  std::vector<StringRef> serialize() const;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_STRING_TABLE_H */
index ec508ecb14f8f2cc6b461baad7854ac8619ff4df..2185cd75ae4f4a04fbae7150d465ca1be4314bab 100644 (file)
@@ -1905,6 +1905,11 @@ struct SequenceTraits<SmallVector<T, N>,
                       typename std::enable_if<CheckIsBool<
                           SequenceElementTraits<T>::flow>::value>::type>
     : SequenceTraitsImpl<SmallVector<T, N>, SequenceElementTraits<T>::flow> {};
+template <typename T>
+struct SequenceTraits<SmallVectorImpl<T>,
+                      typename std::enable_if<CheckIsBool<
+                          SequenceElementTraits<T>::flow>::value>::type>
+    : SequenceTraitsImpl<SmallVectorImpl<T>, SequenceElementTraits<T>::flow> {};
 
 // Sequences of fundamental types use flow formatting.
 template <typename T>
index 987d324df2cc5236d3d3bd65bcb25f6363b82c0a..fc5049b90674144110b1f9a309b1e413e16108f1 100644 (file)
@@ -1362,6 +1362,29 @@ void AsmPrinter::emitRemarksSection(Module &M) {
   support::endian::write64le(Version.data(), remarks::Version);
   OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size()));
 
+  // Emit the string table in the section.
+  // Note: we need to use the streamer here to emit it in the section. We can't
+  // just use the serialize function with a raw_ostream because of the way
+  // MCStreamers work.
+  const remarks::StringTable &StrTab = RS->getStringTable();
+  std::vector<StringRef> StrTabStrings = StrTab.serialize();
+  uint64_t StrTabSize = StrTab.SerializedSize;
+  // Emit the total size of the string table (the size itself excluded):
+  // little-endian uint64_t.
+  // The total size is located after the version number.
+  std::array<char, 8> StrTabSizeBuf;
+  support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
+  OutStreamer->EmitBinaryData(
+      StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
+  // Emit a list of null-terminated strings.
+  // Note: the order is important here: the ID used in the remarks corresponds
+  // to the position of the string in the section.
+  for (StringRef Str : StrTabStrings) {
+    OutStreamer->EmitBytes(Str);
+    // Explicitly emit a '\0'.
+    OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+  }
+
   // Emit the null-terminated absolute path to the remark file.
   // The path is located at the offset 0x4 in the section.
   StringRef FilenameRef = RS->getFilename();
index 56449269681dd40ce7ac0200c8cec6f67f8b3181..44595a18f1b54aa3a83e883b7bb50b1249201ca9 100644 (file)
@@ -18,4 +18,4 @@
 type = Library
 name = AsmPrinter
 parent = Libraries
-required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Support Target
+required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Remarks Support Target
index 14bee35dc2929ec6060c63c8e6fdc1cdb44c8151..7c387e957244a5d2b3899ff014763336a28a6858 100644 (file)
@@ -43,6 +43,8 @@
 
 using namespace llvm;
 
+cl::opt<bool> UseStringTable("remarks-yaml-string-table", cl::init(false));
+
 int llvm::getNextAvailablePluginDiagnosticKind() {
   static std::atomic<int> PluginKindID(DK_FirstPluginKind);
   return ++PluginKindID;
@@ -373,6 +375,20 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
 void OptimizationRemarkAnalysisFPCommute::anchor() {}
 void OptimizationRemarkAnalysisAliasing::anchor() {}
 
+template <typename T>
+static void mapRemarkHeader(
+    yaml::IO &io, T PassName, T RemarkName, DiagnosticLocation DL,
+    T FunctionName, Optional<uint64_t> Hotness,
+    SmallVectorImpl<DiagnosticInfoOptimizationBase::Argument> &Args) {
+  io.mapRequired("Pass", PassName);
+  io.mapRequired("Name", RemarkName);
+  if (!io.outputting() || DL.isValid())
+    io.mapOptional("DebugLoc", DL);
+  io.mapRequired("Function", FunctionName);
+  io.mapOptional("Hotness", Hotness);
+  io.mapOptional("Args", Args);
+}
+
 namespace llvm {
 namespace yaml {
 
@@ -413,13 +429,18 @@ void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping(
       GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());
 
   StringRef PassName(OptDiag->PassName);
-  io.mapRequired("Pass", PassName);
-  io.mapRequired("Name", OptDiag->RemarkName);
-  if (!io.outputting() || DL.isValid())
-    io.mapOptional("DebugLoc", DL);
-  io.mapRequired("Function", FN);
-  io.mapOptional("Hotness", OptDiag->Hotness);
-  io.mapOptional("Args", OptDiag->Args);
+  if (UseStringTable) {
+    remarks::StringTable &StrTab =
+        reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+    unsigned PassID = StrTab.add(PassName).first;
+    unsigned NameID = StrTab.add(OptDiag->RemarkName).first;
+    unsigned FunctionID = StrTab.add(FN).first;
+    mapRemarkHeader(io, PassID, NameID, DL, FunctionID, OptDiag->Hotness,
+                    OptDiag->Args);
+  } else {
+    mapRemarkHeader(io, PassName, OptDiag->RemarkName, DL, FN, OptDiag->Hotness,
+                    OptDiag->Args);
+  }
 }
 
 template <> struct MappingTraits<DiagnosticLocation> {
@@ -430,7 +451,15 @@ template <> struct MappingTraits<DiagnosticLocation> {
     unsigned Line = DL.getLine();
     unsigned Col = DL.getColumn();
 
-    io.mapRequired("File", File);
+    if (UseStringTable) {
+      remarks::StringTable &StrTab =
+          reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+      unsigned FileID = StrTab.add(File).first;
+      io.mapRequired("File", FileID);
+    } else {
+      io.mapRequired("File", File);
+    }
+
     io.mapRequired("Line", Line);
     io.mapRequired("Column", Col);
   }
@@ -459,12 +488,18 @@ template <> struct BlockScalarTraits<StringBlockVal> {
 template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
   static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
     assert(io.outputting() && "input not yet implemented");
-    // Emit a string block scalar for multiline strings, to preserve newlines.
-    if (StringRef(A.Val).count('\n') > 1) {
+
+    if (UseStringTable) {
+      remarks::StringTable &StrTab =
+          reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+      auto ValueID = StrTab.add(A.Val).first;
+      io.mapRequired(A.Key.data(), ValueID);
+    } else if (StringRef(A.Val).count('\n') > 1) {
       StringBlockVal S(A.Val);
       io.mapRequired(A.Key.data(), S);
-    } else
+    } else {
       io.mapRequired(A.Key.data(), A.Val);
+    }
     if (A.Loc.isValid())
       io.mapOptional("DebugLoc", A.Loc);
   }
index b3dcd413f41d31309801487241e89b468d297cf0..73d97108c400053bb6550313e78d5b4263596408 100644 (file)
@@ -18,4 +18,4 @@
 type = Library
 name = Core
 parent = Libraries
-required_libraries = BinaryFormat Support
+required_libraries = BinaryFormat Remarks Support
index 022c17d6722bf61b1450aaed065e917ca27c8414..d2a4ed4adf49c0e1f19c7b18511cfc414bc66d2c 100644 (file)
@@ -17,7 +17,7 @@ using namespace llvm;
 
 RemarkStreamer::RemarkStreamer(StringRef Filename, raw_ostream &OS)
     : Filename(Filename), OS(OS),
-      YAMLOutput(OS, reinterpret_cast<void *>(this)) {
+      YAMLOutput(OS, reinterpret_cast<void *>(this)), StrTab() {
   assert(!Filename.empty() && "This needs to be a real filename.");
 }
 
index 2ab7e8476a19bcf4b33aa3a0ff060a2696a5cc83..ccbca7ea4f4cd96ee96f7e8b57bcdc17ca0a4600 100644 (file)
@@ -1,5 +1,6 @@
 add_llvm_library(LLVMRemarks
   Remark.cpp
   RemarkParser.cpp
+  RemarkStringTable.cpp
   YAMLRemarkParser.cpp
 )
index 30de40dd54a09614df6a675b2d7adc1ce3bcf2d4..144f08f6feb97befc2c9de09fea1650b49af2713 100644 (file)
@@ -22,6 +22,9 @@ using namespace llvm::remarks;
 
 Parser::Parser(StringRef Buf) : Impl(llvm::make_unique<YAMLParserImpl>(Buf)) {}
 
+Parser::Parser(StringRef Buf, StringRef StrTabBuf)
+    : Impl(llvm::make_unique<YAMLParserImpl>(Buf, StrTabBuf)) {}
+
 Parser::~Parser() = default;
 
 static Expected<const Remark *> getNextYAML(YAMLParserImpl &Impl) {
@@ -56,6 +59,31 @@ Expected<const Remark *> Parser::getNext() const {
   llvm_unreachable("Get next called with an unknown parsing implementation.");
 }
 
+ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) {
+  while (!InBuffer.empty()) {
+    // Strings are separated by '\0' bytes.
+    std::pair<StringRef, StringRef> Split = InBuffer.split('\0');
+    // We only store the offset from the beginning of the buffer.
+    Offsets.push_back(Split.first.data() - Buffer.data());
+    InBuffer = Split.second;
+  }
+}
+
+Expected<StringRef> ParsedStringTable::operator[](size_t Index) {
+  if (Index >= Offsets.size())
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "String with index %u is out of bounds (size = %u).", Index,
+        Offsets.size());
+
+  size_t Offset = Offsets[Index];
+  // If it's the last offset, we can't use the next offset to know the size of
+  // the string.
+  size_t NextOffset =
+      (Index == Offsets.size() - 1) ? Buffer.size() : Offsets[Index + 1];
+  return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1);
+}
+
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef)
 
diff --git a/lib/Remarks/RemarkStringTable.cpp b/lib/Remarks/RemarkStringTable.cpp
new file mode 100644 (file)
index 0000000..984aa5b
--- /dev/null
@@ -0,0 +1,48 @@
+//===- RemarkStringTable.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the Remark string table used at remark generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+std::pair<unsigned, StringRef> StringTable::add(StringRef Str) {
+  size_t NextID = StrTab.size();
+  auto KV = StrTab.insert({Str, NextID});
+  // If it's a new string, add it to the final size.
+  if (KV.second)
+    SerializedSize += KV.first->first().size() + 1; // +1 for the '\0'
+  // Can be either NextID or the previous ID if the string is already there.
+  return {KV.first->second, KV.first->first()};
+}
+
+void StringTable::serialize(raw_ostream &OS) const {
+  // Emit the number of strings.
+  uint64_t StrTabSize = SerializedSize;
+  support::endian::write(OS, StrTabSize, support::little);
+  // Emit the sequence of strings.
+  for (StringRef Str : serialize()) {
+    OS << Str;
+    // Explicitly emit a '\0'.
+    OS.write('\0');
+  }
+}
+
+std::vector<StringRef> StringTable::serialize() const {
+  std::vector<StringRef> Strings{StrTab.size()};
+  for (const auto &KV : StrTab)
+    Strings[KV.second] = KV.first();
+  return Strings;
+}
index db6b0b25bd14ad23fef4feb7826a58fe8ab6f407..0c265856d28c843a63b357efbc007eac707670f9 100644 (file)
@@ -34,7 +34,19 @@ Error YAMLRemarkParser::parseStr(T &Result, yaml::KeyValueNode &Node) {
   auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
   if (!Value)
     return make_error<YAMLParseError>("expected a value of scalar type.", Node);
-  StringRef Tmp = Value->getRawValue();
+  StringRef Tmp;
+  if (!StrTab) {
+    Tmp = Value->getRawValue();
+  } else {
+    // If we have a string table, parse it as an unsigned.
+    unsigned StrID = 0;
+    if (Error E = parseUnsigned(StrID, Node))
+      return E;
+    if (Expected<StringRef> Str = (*StrTab)[StrID])
+      Tmp = *Str;
+    else
+      return Str.takeError();
+  }
 
   if (Tmp.front() == '\'')
     Tmp = Tmp.drop_front();
index 5095a4be730cafd2d7070b15fc3541b172efba46..5fd17865b69bf110226ac2faa41db926607afabf 100644 (file)
@@ -17,6 +17,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLParser.h"
@@ -38,7 +39,8 @@ struct YAMLRemarkParser {
   raw_string_ostream ErrorStream;
   /// Temporary parsing buffer for the arguments.
   SmallVector<Argument, 8> TmpArgs;
-
+  /// The string table used for parsing strings.
+  Optional<ParsedStringTable> StrTab;
   /// The state used by the parser to parse a remark entry. Invalidated with
   /// every call to `parseYAMLElement`.
   struct ParseState {
@@ -57,10 +59,13 @@ struct YAMLRemarkParser {
   /// not be containing any value.
   Optional<ParseState> State;
 
-  YAMLRemarkParser(StringRef Buf)
+  YAMLRemarkParser(StringRef Buf, Optional<StringRef> StrTabBuf = None)
       : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString),
-        TmpArgs() {
+        TmpArgs(), StrTab() {
     SM.setDiagHandler(YAMLRemarkParser::HandleDiagnostic, this);
+
+    if (StrTabBuf)
+      StrTab.emplace(*StrTabBuf);
   }
 
   /// Parse a YAML element.
@@ -122,8 +127,8 @@ struct YAMLParserImpl : public ParserImpl {
   /// Set to `true` if we had any errors during parsing.
   bool HasErrors = false;
 
-  YAMLParserImpl(StringRef Buf)
-      : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf),
+  YAMLParserImpl(StringRef Buf, Optional<StringRef> StrTabBuf = None)
+      : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf, StrTabBuf),
         YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {}
 
   static bool classof(const ParserImpl *PI) {
index 3780ccae8657dd8978c89c54512d66b474ca734e..6d7957a16256060b8d77fc3a1bfb3f50aec31799 100644 (file)
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-linux -remarks-section -pass-remarks-output=%/t.yaml | FileCheck -DPATH=%/t.yaml %s
 ; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN -DPATH=%/t.yaml %s
+; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -remarks-yaml-string-table -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-STRTAB -DPATH=%/t.yaml %s
 
 ; CHECK-LABEL: func1:
 
 ; The version:
 ; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
 ; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table size:
+; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; EMPTY
 ; The remark file path:
 ; CHECK-NEXT: .ascii "[[PATH]]"
 ; Null-terminator:
 ; The version:
 ; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
 ; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table size:
+; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; EMPTY
 ; The remark file path:
 ; CHECK-DARWIN-NEXT: .ascii "[[PATH]]"
 ; Null-terminator:
 ; CHECK-DARWIN-NEXT: .byte 0
+
+; CHECK-DARWIN-STRTAB: .section __LLVM,__remarks,regular,debug
+; The magic number:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "REMARKS"
+; Null-terminator:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; The version:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The size of the string table:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x71, 0x00, 0x00, 0x00
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "prologepilog"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "StackSize"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "func1"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .byte 48
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii " stack bytes in function"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "asm-printer"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "InstructionCount"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .byte 49
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii " instructions in function"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; The remark file path:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "[[PATH]]"
+; Null-terminator:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
 define void @func1() {
   ret void
 }
index b7d22b694ef8d93b9be9fb6f77ee5bd0a4450723..d74960e05582050b5c8065e29d1c3f627f2026c5 100644 (file)
@@ -4,5 +4,6 @@ set(LLVM_LINK_COMPONENTS
   )
 
 add_llvm_unittest(RemarksTests
+  RemarksStrTabParsingTest.cpp
   YAMLRemarksParsingTest.cpp
   )
diff --git a/unittests/Remarks/RemarksStrTabParsingTest.cpp b/unittests/Remarks/RemarksStrTabParsingTest.cpp
new file mode 100644 (file)
index 0000000..f1e9eb8
--- /dev/null
@@ -0,0 +1,39 @@
+//===- unittest/Support/RemarksStrTabParsingTest.cpp - StrTab tests -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+TEST(RemarksStrTab, ParsingEmpty) {
+  StringRef Empty("", 0);
+  remarks::ParsedStringTable StrTab(Empty);
+  Expected<StringRef> Nothing = StrTab[0];
+  EXPECT_FALSE(static_cast<bool>(Nothing));
+  EXPECT_EQ(toString(Nothing.takeError()),
+            "String with index 0 is out of bounds (size = 0).");
+}
+
+TEST(RemarksStrTab, ParsingGood) {
+  StringRef Strings("str1\0str2\0str3\0str4", 20);
+  remarks::ParsedStringTable StrTab(Strings);
+  Expected<StringRef> Result = StrTab[0];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str1");
+  Result = StrTab[1];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str2");
+  Result = StrTab[2];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str3");
+  Result = StrTab[3];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str4");
+}
index 36dbb0c9518c612d89c9396c095abfedc3394f49..1a6267c9785a35ff36921fb3f26776ccf9e43494 100644 (file)
@@ -492,3 +492,105 @@ TEST(YAMLRemarks, ContentsCAPI) {
   EXPECT_FALSE(LLVMRemarkParserHasError(Parser));
   LLVMRemarkParserDispose(Parser);
 }
+
+TEST(YAMLRemarks, ContentsStrTab) {
+  StringRef Buf = "\n"
+                  "--- !Missed\n"
+                  "Pass: 0\n"
+                  "Name: 1\n"
+                  "DebugLoc: { File: 2, Line: 3, Column: 12 }\n"
+                  "Function: 3\n"
+                  "Hotness: 4\n"
+                  "Args:\n"
+                  "  - Callee: 5\n"
+                  "  - String: 7\n"
+                  "  - Caller: 3\n"
+                  "    DebugLoc: { File: 2, Line: 2, Column: 0 }\n"
+                  "  - String: 8\n"
+                  "\n";
+
+  StringRef StrTabBuf =
+      StringRef("inline\0NoDefinition\0file.c\0foo\0Callee\0bar\0String\0 "
+                "will not be inlined into \0 because its definition is "
+                "unavailable",
+                115);
+
+  remarks::Parser Parser(Buf, StrTabBuf);
+  Expected<const remarks::Remark *> RemarkOrErr = Parser.getNext();
+  EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
+  EXPECT_TRUE(*RemarkOrErr != nullptr);
+
+  const remarks::Remark &Remark = **RemarkOrErr;
+  EXPECT_EQ(Remark.RemarkType, remarks::Type::Missed);
+  EXPECT_EQ(checkStr(Remark.PassName, 6), "inline");
+  EXPECT_EQ(checkStr(Remark.RemarkName, 12), "NoDefinition");
+  EXPECT_EQ(checkStr(Remark.FunctionName, 3), "foo");
+  EXPECT_TRUE(Remark.Loc);
+  const remarks::RemarkLocation &RL = *Remark.Loc;
+  EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c");
+  EXPECT_EQ(RL.SourceLine, 3U);
+  EXPECT_EQ(RL.SourceColumn, 12U);
+  EXPECT_TRUE(Remark.Hotness);
+  EXPECT_EQ(*Remark.Hotness, 4U);
+  EXPECT_EQ(Remark.Args.size(), 4U);
+
+  unsigned ArgID = 0;
+  for (const remarks::Argument &Arg : Remark.Args) {
+    switch (ArgID) {
+    case 0:
+      EXPECT_EQ(checkStr(Arg.Key, 6), "Callee");
+      EXPECT_EQ(checkStr(Arg.Val, 3), "bar");
+      EXPECT_FALSE(Arg.Loc);
+      break;
+    case 1:
+      EXPECT_EQ(checkStr(Arg.Key, 6), "String");
+      EXPECT_EQ(checkStr(Arg.Val, 26), " will not be inlined into ");
+      EXPECT_FALSE(Arg.Loc);
+      break;
+    case 2: {
+      EXPECT_EQ(checkStr(Arg.Key, 6), "Caller");
+      EXPECT_EQ(checkStr(Arg.Val, 3), "foo");
+      EXPECT_TRUE(Arg.Loc);
+      const remarks::RemarkLocation &RL = *Arg.Loc;
+      EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c");
+      EXPECT_EQ(RL.SourceLine, 2U);
+      EXPECT_EQ(RL.SourceColumn, 0U);
+      break;
+    }
+    case 3:
+      EXPECT_EQ(checkStr(Arg.Key, 6), "String");
+      EXPECT_EQ(checkStr(Arg.Val, 38),
+                " because its definition is unavailable");
+      EXPECT_FALSE(Arg.Loc);
+      break;
+    default:
+      break;
+    }
+    ++ArgID;
+  }
+
+  RemarkOrErr = Parser.getNext();
+  EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
+  EXPECT_EQ(*RemarkOrErr, nullptr);
+}
+
+TEST(YAMLRemarks, ParsingBadStringTableIndex) {
+  StringRef Buf = "\n"
+                  "--- !Missed\n"
+                  "Pass: 50\n"
+                  "\n";
+
+  StringRef StrTabBuf = StringRef("inline");
+
+  remarks::Parser Parser(Buf, StrTabBuf);
+  Expected<const remarks::Remark *> Remark = Parser.getNext();
+  EXPECT_FALSE(Remark); // Expect an error here.
+
+  std::string ErrorStr;
+  raw_string_ostream Stream(ErrorStr);
+  handleAllErrors(Remark.takeError(),
+                  [&](const ErrorInfoBase &EIB) { EIB.log(Stream); });
+  EXPECT_TRUE(
+      StringRef(Stream.str())
+          .contains("String with index 50 is out of bounds (size = 1)."));
+}