]> granicus.if.org Git - llvm/commitdiff
[llvm-exegesis] Add throughput mode.
authorClement Courbet <courbet@google.com>
Wed, 30 Jan 2019 16:02:20 +0000 (16:02 +0000)
committerClement Courbet <courbet@google.com>
Wed, 30 Jan 2019 16:02:20 +0000 (16:02 +0000)
Summary:
This just uses the latency benchmark runner on the parallel uops snippet
generator.

Fixes PR37698.

Reviewers: gchatelet

Subscribers: tschuett, RKSimon, llvm-commits

Differential Revision: https://reviews.llvm.org/D57000

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352632 91177308-0d34-0410-b5e6-96231b3b80d8

docs/CommandGuide/llvm-exegesis.rst
test/tools/llvm-exegesis/X86/inverse_throughput-by-opcode-name.s [new file with mode: 0644]
tools/llvm-exegesis/lib/BenchmarkResult.cpp
tools/llvm-exegesis/lib/BenchmarkResult.h
tools/llvm-exegesis/lib/BenchmarkRunner.h
tools/llvm-exegesis/lib/Latency.cpp
tools/llvm-exegesis/lib/Latency.h
tools/llvm-exegesis/lib/Target.cpp
tools/llvm-exegesis/lib/Target.h
tools/llvm-exegesis/llvm-exegesis.cpp

index f27db9e57edc6c4a8d93764ba39178eb6e094911..bbd90563005b687222323c60a147f3eabe6cd587 100644 (file)
@@ -10,13 +10,13 @@ DESCRIPTION
 -----------
 
 :program:`llvm-exegesis` is a benchmarking tool that uses information available
-in LLVM to measure host machine instruction characteristics like latency or port
-decomposition.
+in LLVM to measure host machine instruction characteristics like latency,
+throughput, or port decomposition.
 
 Given an LLVM opcode name and a benchmarking mode, :program:`llvm-exegesis`
 generates a code snippet that makes execution as serial (resp. as parallel) as
-possible so that we can measure the latency (resp. uop decomposition) of the
-instruction.
+possible so that we can measure the latency (resp. inverse throughput/uop decomposition)
+of the instruction.
 The code snippet is jitted and executed on the host subtarget. The time taken
 (resp. resource usage) is measured using hardware performance counters. The
 result is printed out as YAML to the standard output.
@@ -37,11 +37,13 @@ instruction, run:
 
     $ llvm-exegesis -mode=latency -opcode-name=ADD64rr
 
-Measuring the uop decomposition of an instruction works similarly:
+Measuring the uop decomposition or inverse throughput of an instruction works similarly:
 
 .. code-block:: bash
 
     $ llvm-exegesis -mode=uops -opcode-name=ADD64rr
+    $ llvm-exegesis -mode=inverse_throughput -opcode-name=ADD64rr
+
 
 The output is a YAML document (the default is to write to stdout, but you can
 redirect the output to a file using `-benchmarks-file`):
@@ -186,7 +188,7 @@ OPTIONS
   Specify the custom code snippet to measure. See example 2 for details.
   Either `opcode-index`, `opcode-name` or `snippets-file` must be set.
 
-.. option:: -mode=[latency|uops|analysis]
+.. option:: -mode=[latency|uops|inverse_throughput|analysis]
 
  Specify the run mode.
 
@@ -197,8 +199,8 @@ OPTIONS
 
 .. option:: -benchmarks-file=</path/to/file>
 
- File to read (`analysis` mode) or write (`latency`/`uops` modes) benchmark
- results. "-" uses stdin/stdout.
+ File to read (`analysis` mode) or write (`latency`/`uops`/`inverse_throughput`
modes) benchmark results. "-" uses stdin/stdout.
 
 .. option:: -analysis-clusters-output-file=</path/to/file>
 
diff --git a/test/tools/llvm-exegesis/X86/inverse_throughput-by-opcode-name.s b/test/tools/llvm-exegesis/X86/inverse_throughput-by-opcode-name.s
new file mode 100644 (file)
index 0000000..49cb847
--- /dev/null
@@ -0,0 +1,8 @@
+# RUN: llvm-exegesis -mode=inverse_throughput -opcode-name=ADD32rr | FileCheck %s
+
+CHECK:      ---
+CHECK-NEXT: mode: inverse_throughput
+CHECK-NEXT: key:
+CHECK-NEXT:   instructions:
+CHECK-NEXT:     ADD32rr
+CHECK: key: inverse_throughput
index 01748d3c45ba38eb9f174698718550659b31d49b..e18fdf332ae25dd2cd8273ed92bc0d178d72b8e0 100644 (file)
@@ -209,6 +209,8 @@ struct ScalarEnumerationTraits<exegesis::InstructionBenchmark::ModeE> {
     Io.enumCase(Value, "", exegesis::InstructionBenchmark::Unknown);
     Io.enumCase(Value, "latency", exegesis::InstructionBenchmark::Latency);
     Io.enumCase(Value, "uops", exegesis::InstructionBenchmark::Uops);
+    Io.enumCase(Value, "inverse_throughput",
+                exegesis::InstructionBenchmark::InverseThroughput);
   }
 };
 
index c0050054273202d0259b4bedc8d5329f6d7590b2..0ef4fb3caa95cc0b865f9cc17fabcff0ade20b46 100644 (file)
@@ -57,7 +57,7 @@ struct BenchmarkMeasure {
 // The result of an instruction benchmark.
 struct InstructionBenchmark {
   InstructionBenchmarkKey Key;
-  enum ModeE { Unknown, Latency, Uops };
+  enum ModeE { Unknown, Latency, Uops, InverseThroughput };
   ModeE Mode;
   std::string CpuName;
   std::string LLVMTriple;
index b2637788278373cd6b4c6798610cecd8019a93f9..4387bc8456e858ec03388e6a685beb8a577deb30 100644 (file)
@@ -75,6 +75,7 @@ public:
 
 protected:
   const LLVMState &State;
+  const InstructionBenchmark::ModeE Mode;
 
 private:
   virtual llvm::Expected<std::vector<BenchmarkMeasure>>
@@ -84,7 +85,6 @@ private:
   writeObjectFile(const BenchmarkCode &Configuration,
                   llvm::ArrayRef<llvm::MCInst> Code) const;
 
-  const InstructionBenchmark::ModeE Mode;
 
   const std::unique_ptr<ScratchSpace> Scratch;
 };
index 4be615323d83f093dbbb643f8865b2a63d70d36c..2f3fbaef4f4e5e59491bb00dc0af28eab6de0a5e 100644 (file)
@@ -165,6 +165,14 @@ LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
   return std::move(Results);
 }
 
+LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
+                                               InstructionBenchmark::ModeE Mode)
+    : BenchmarkRunner(State, Mode) {
+  assert((Mode == InstructionBenchmark::Latency ||
+          Mode == InstructionBenchmark::InverseThroughput) &&
+         "invalid mode");
+}
+
 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
 
 llvm::Expected<std::vector<BenchmarkMeasure>>
@@ -184,8 +192,17 @@ LatencyBenchmarkRunner::runMeasurements(
     if (*ExpectedCounterValue < MinValue)
       MinValue = *ExpectedCounterValue;
   }
-  std::vector<BenchmarkMeasure> Result = {
-      BenchmarkMeasure::Create("latency", MinValue)};
+  std::vector<BenchmarkMeasure> Result;
+  switch (Mode) {
+  case InstructionBenchmark::Latency:
+    Result = {BenchmarkMeasure::Create("latency", MinValue)};
+    break;
+  case InstructionBenchmark::InverseThroughput:
+    Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
+    break;
+  default:
+    break;
+  }
   return std::move(Result);
 }
 
index 5ab16df11a2f1cfb058b4b1f36e0bb2b52fbecc5..7d6d96a195efb338015e704d73ebf2c63f54afa8 100644 (file)
@@ -32,8 +32,8 @@ public:
 
 class LatencyBenchmarkRunner : public BenchmarkRunner {
 public:
-  LatencyBenchmarkRunner(const LLVMState &State)
-      : BenchmarkRunner(State, InstructionBenchmark::Latency) {}
+  LatencyBenchmarkRunner(const LLVMState &State,
+                         InstructionBenchmark::ModeE Mode);
   ~LatencyBenchmarkRunner() override;
 
 private:
index 1fb0237d46118e5e896bf33768625d8aaddb7456..c662f1f5566fe18803813c5e932232434757bf2c 100644 (file)
@@ -45,6 +45,7 @@ ExegesisTarget::createSnippetGenerator(InstructionBenchmark::ModeE Mode,
   case InstructionBenchmark::Latency:
     return createLatencySnippetGenerator(State);
   case InstructionBenchmark::Uops:
+  case InstructionBenchmark::InverseThroughput:
     return createUopsSnippetGenerator(State);
   }
   return nullptr;
@@ -57,7 +58,8 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
   case InstructionBenchmark::Unknown:
     return nullptr;
   case InstructionBenchmark::Latency:
-    return createLatencyBenchmarkRunner(State);
+  case InstructionBenchmark::InverseThroughput:
+    return createLatencyBenchmarkRunner(State, Mode);
   case InstructionBenchmark::Uops:
     return createUopsBenchmarkRunner(State);
   }
@@ -74,9 +76,9 @@ ExegesisTarget::createUopsSnippetGenerator(const LLVMState &State) const {
   return llvm::make_unique<UopsSnippetGenerator>(State);
 }
 
-std::unique_ptr<BenchmarkRunner>
-ExegesisTarget::createLatencyBenchmarkRunner(const LLVMState &State) const {
-  return llvm::make_unique<LatencyBenchmarkRunner>(State);
+std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(
+    const LLVMState &State, InstructionBenchmark::ModeE Mode) const {
+  return llvm::make_unique<LatencyBenchmarkRunner>(State, Mode);
 }
 
 std::unique_ptr<BenchmarkRunner>
index da3441e67b270bfbfbd554aa2cc759cc8dc64db0..f3429b79a34beb9a874c02eebbefd465e3abe35e 100644 (file)
@@ -130,7 +130,7 @@ private:
   std::unique_ptr<SnippetGenerator> virtual createUopsSnippetGenerator(
       const LLVMState &State) const;
   std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
-      const LLVMState &State) const;
+      const LLVMState &State, InstructionBenchmark::ModeE Mode) const;
   std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
       const LLVMState &State) const;
 
index 145fea55bebf27a709a509c8f69d910e5d2289f2..ce11fadbf6eaadd4c9dc19f0d23f4e24e8f24b30 100644 (file)
@@ -56,16 +56,19 @@ static cl::opt<std::string> SnippetsFile("snippets-file",
 static cl::opt<std::string> BenchmarkFile("benchmarks-file", cl::desc(""),
                                           cl::init(""));
 
-static cl::opt<exegesis::InstructionBenchmark::ModeE>
-    BenchmarkMode("mode", cl::desc("the mode to run"),
-                  cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency,
-                                        "latency", "Instruction Latency"),
-                             clEnumValN(exegesis::InstructionBenchmark::Uops,
-                                        "uops", "Uop Decomposition"),
-                             // When not asking for a specific benchmark mode,
-                             // we'll analyse the results.
-                             clEnumValN(exegesis::InstructionBenchmark::Unknown,
-                                        "analysis", "Analysis")));
+static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
+    "mode", cl::desc("the mode to run"),
+    cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency, "latency",
+                          "Instruction Latency"),
+               clEnumValN(exegesis::InstructionBenchmark::InverseThroughput,
+                          "inverse_throughput",
+                          "Instruction Inverse Throughput"),
+               clEnumValN(exegesis::InstructionBenchmark::Uops, "uops",
+                          "Uop Decomposition"),
+               // When not asking for a specific benchmark mode,
+               // we'll analyse the results.
+               clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",
+                          "Analysis")));
 
 static cl::opt<unsigned>
     NumRepetitions("num-repetitions",