]> granicus.if.org Git - llvm/commitdiff
[llvm-exegesis] Split Epsilon param into two (PR40787)
authorRoman Lebedev <lebedev.ri@gmail.com>
Mon, 25 Feb 2019 09:36:12 +0000 (09:36 +0000)
committerRoman Lebedev <lebedev.ri@gmail.com>
Mon, 25 Feb 2019 09:36:12 +0000 (09:36 +0000)
Summary:
This eps param is used for two distinct things:
* initial point clusterization
* checking clusters against the llvm values

What if one wants to only look at highly different clusters, without changing
the clustering itself? In particular, this helps to weed out noisy measurements
(since the clusterization epsilon is still small, so there is a better chance
that noisy measurements from the same opcode will go into different clusters)

By splitting it into two params it is now possible.

This is nearly-free performance-wise:
Old:
```
$ perf stat -r 25 ./bin/llvm-exegesis -mode=analysis -benchmarks-file=/home/lebedevri/PileDriver-Sched/benchmarks-latency-1.yaml -analysis-inconsistencies-output-file=/tmp/clusters-old.html
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 10099 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-old.html'
...
 Performance counter stats for './bin/llvm-exegesis -mode=analysis -benchmarks-file=/home/lebedevri/PileDriver-Sched/benchmarks-latency-1.yaml -analysis-inconsistencies-output-file=/tmp/clusters-old.html' (25 runs):

            390.01 msec task-clock                #    0.998 CPUs utilized            ( +-  0.25% )
                12      context-switches          #   31.735 M/sec                    ( +- 27.38% )
                 0      cpu-migrations            #    0.000 K/sec
              4745      page-faults               # 12183.732 M/sec                   ( +-  0.54% )
        1562711900      cycles                    # 4012303.327 GHz                   ( +-  0.24% )  (82.90%)
         185567822      stalled-cycles-frontend   #   11.87% frontend cycles idle     ( +-  0.52% )  (83.30%)
         392106234      stalled-cycles-backend    #   25.09% backend cycles idle      ( +-  1.31% )  (33.79%)
        1839236666      instructions              #    1.18  insn per cycle
                                                  #    0.21  stalled cycles per insn  ( +-  0.15% )  (50.37%)
         407035764      branches                  # 1045074878.710 M/sec              ( +-  0.12% )  (66.80%)
          10896459      branch-misses             #    2.68% of all branches          ( +-  0.17% )  (83.20%)

          0.390629 +- 0.000972 seconds time elapsed  ( +-  0.25% )
```
```
$ perf stat -r 9 ./bin/llvm-exegesis -mode=analysis -benchmarks-file=/home/lebedevri/PileDriver-Sched/benchmarks-latency.yml -analysis-inconsistencies-output-file=/tmp/clusters-old.html
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 50572 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-old.html'
...
 Performance counter stats for './bin/llvm-exegesis -mode=analysis -benchmarks-file=/home/lebedevri/PileDriver-Sched/benchmarks-latency.yml -analysis-inconsistencies-output-file=/tmp/clusters-old.html' (9 runs):

           6803.36 msec task-clock                #    0.999 CPUs utilized            ( +-  0.96% )
               262      context-switches          #   38.546 M/sec                    ( +- 23.06% )
                 0      cpu-migrations            #    0.065 M/sec                    ( +- 76.03% )
             13287      page-faults               # 1953.206 M/sec                    ( +-  0.32% )
       27252537904      cycles                    # 4006024.257 GHz                   ( +-  0.95% )  (83.31%)
        1496314935      stalled-cycles-frontend   #    5.49% frontend cycles idle     ( +-  0.97% )  (83.32%)
       16128404524      stalled-cycles-backend    #   59.18% backend cycles idle      ( +-  0.30% )  (33.37%)
       17611143370      instructions              #    0.65  insn per cycle
                                                  #    0.92  stalled cycles per insn  ( +-  0.05% )  (50.04%)
        3894906599      branches                  # 572537147.437 M/sec               ( +-  0.03% )  (66.69%)
         116314514      branch-misses             #    2.99% of all branches          ( +-  0.20% )  (83.35%)

            6.8118 +- 0.0689 seconds time elapsed  ( +-  1.01%)
```
New:
```
$ perf stat -r 25 ./bin/llvm-exegesis -mode=analysis -benchmarks-file=/home/lebedevri/PileDriver-Sched/benchmarks-latency-1.yaml -analysis-inconsistencies-output-file=/tmp/clusters-new.html
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 10099 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-new.html'
...
 Performance counter stats for './bin/llvm-exegesis -mode=analysis -benchmarks-file=/home/lebedevri/PileDriver-Sched/benchmarks-latency-1.yaml -analysis-inconsistencies-output-file=/tmp/clusters-new.html' (25 runs):

            400.14 msec task-clock                #    0.998 CPUs utilized            ( +-  0.66% )
                12      context-switches          #   29.429 M/sec                    ( +- 25.95% )
                 0      cpu-migrations            #    0.100 M/sec                    ( +-100.00% )
              4714      page-faults               # 11796.496 M/sec                   ( +-  0.55% )
        1603131306      cycles                    # 4011840.105 GHz                   ( +-  0.66% )  (82.85%)
         199538509      stalled-cycles-frontend   #   12.45% frontend cycles idle     ( +-  2.40% )  (83.10%)
         402249109      stalled-cycles-backend    #   25.09% backend cycles idle      ( +-  1.19% )  (34.05%)
        1847783963      instructions              #    1.15  insn per cycle
                                                  #    0.22  stalled cycles per insn  ( +-  0.18% )  (50.64%)
         407162722      branches                  # 1018925730.631 M/sec              ( +-  0.12% )  (67.02%)
          10932779      branch-misses             #    2.69% of all branches          ( +-  0.51% )  (83.28%)

           0.40077 +- 0.00267 seconds time elapsed  ( +-  0.67% )

lebedevri@pini-pini:/build/llvm-build-Clang-release$ perf stat -r 9 ./bin/llvm-exegesis -mode=analysis -benchmarks-file=/home/lebedevri/PileDriver-Sched/benchmarks-latency.yml -analysis-inconsistencies-output-file=/tmp/clusters-new.html
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 50572 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-new.html'
...
 Performance counter stats for './bin/llvm-exegesis -mode=analysis -benchmarks-file=/home/lebedevri/PileDriver-Sched/benchmarks-latency.yml -analysis-inconsistencies-output-file=/tmp/clusters-new.html' (9 runs):

           6947.79 msec task-clock                #    1.000 CPUs utilized            ( +-  0.90% )
               217      context-switches          #   31.236 M/sec                    ( +- 36.16% )
                 1      cpu-migrations            #    0.096 M/sec                    ( +- 50.00% )
             13258      page-faults               # 1908.389 M/sec                    ( +-  0.34% )
       27830796523      cycles                    # 4006032.286 GHz                   ( +-  0.89% )  (83.30%)
        1504554006      stalled-cycles-frontend   #    5.41% frontend cycles idle     ( +-  2.10% )  (83.32%)
       16716574843      stalled-cycles-backend    #   60.07% backend cycles idle      ( +-  0.65% )  (33.38%)
       17755545931      instructions              #    0.64  insn per cycle
                                                  #    0.94  stalled cycles per insn  ( +-  0.09% )  (50.04%)
        3897255686      branches                  # 560980426.597 M/sec               ( +-  0.06% )  (66.70%)
         117045395      branch-misses             #    3.00% of all branches          ( +-  0.47% )  (83.34%)

            6.9507 +- 0.0627 seconds time elapsed  ( +-  0.90% )
```

I.e. it's +2.6% slowdown for one whole sweep, or +2% for 5 whole sweeps.
Within noise i'd say.

Should help with [[ https://bugs.llvm.org/show_bug.cgi?id=40787 | PR40787 ]].

Reviewers: courbet, gchatelet

Reviewed By: courbet

Subscribers: tschuett, RKSimon, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D58476

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354767 91177308-0d34-0410-b5e6-96231b3b80d8

docs/CommandGuide/llvm-exegesis.rst
test/tools/llvm-exegesis/X86/analysis-cluster-stabilization.test
test/tools/llvm-exegesis/X86/analysis-epsilons.test [new file with mode: 0644]
tools/llvm-exegesis/lib/Analysis.cpp
tools/llvm-exegesis/lib/Analysis.h
tools/llvm-exegesis/lib/Clustering.cpp
tools/llvm-exegesis/lib/Clustering.h
tools/llvm-exegesis/llvm-exegesis.cpp

index b5263973d16ac32642ed9ad643ff9cef0792e007..13ca16a8d7f1616846a14c4267706c8aeaaa92e1 100644 (file)
@@ -219,11 +219,16 @@ OPTIONS
  Specify the numPoints parameters to be used for DBSCAN clustering
  (`analysis` mode).
 
-.. option:: -analysis-epsilon=<dbscan epsilon parameter>
+.. option:: -analysis-clustering-epsilon=<dbscan epsilon parameter>
 
- Specify the numPoints parameters to be used for DBSCAN clustering
+ Specify the epsilon parameter used for clustering of benchmark points
  (`analysis` mode).
 
+.. option:: -analysis-inconsistency-epsilon=<epsilon>
+
+ Specify the epsilon parameter used for detection of when the cluster
+ is different from the LLVM schedule profile values (`analysis` mode).
+
 .. option:: -analysis-display-unstable-clusters
 
  If there is more than one benchmark for an opcode, said benchmarks may end up
index aa14d7b8e86e39e87c65ac868fd9ac0d0323cbd0..1579993c6b7ad506fe77afa3c7e17dc3ef35ec74 100644 (file)
@@ -1,6 +1,6 @@
-# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS %s
-# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-epsilon=0.5 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-STABLE %s
-# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-UNSTABLE %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-STABLE %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-UNSTABLE %s
 
 # We have one ADD32rr measurement, and two measurements for SQRTSSr.
 # The ADD32rr measurement and one of the SQRTSSr measurements are identical,
diff --git a/test/tools/llvm-exegesis/X86/analysis-epsilons.test b/test/tools/llvm-exegesis/X86/analysis-epsilons.test
new file mode 100644 (file)
index 0000000..9d9b09d
--- /dev/null
@@ -0,0 +1,63 @@
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=9 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-TWO %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=9 -analysis-inconsistency-epsilon=100 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-TWO %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=10 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-ONE %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=10 -analysis-inconsistency-epsilon=100 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-ONE %s
+
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=9 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-FAIL %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=10 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-FAIL %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=9 -analysis-inconsistency-epsilon=100 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-PASS %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=10 -analysis-inconsistency-epsilon=100 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-PASS %s
+
+# CHECK-CLUSTERS-ALL: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}}
+
+# CHECK-CLUSTERS-TWO: {{^}}0,
+# CHECK-CLUSTERS-TWO-SAME: ,90.00{{$}}
+# CHECK-CLUSTERS-TWO: {{^}}1,
+# CHECK-CLUSTERS-TWO-SAME: ,100.00{{$}}
+
+# CHECK-CLUSTERS-ONE: {{^}}0,
+# CHECK-CLUSTERS-ONE-SAME: ,90.00{{$}}
+# CHECK-CLUSTERS-ONE-NEXT: {{^}}0,
+# CHECK-CLUSTERS-ONE-SAME: ,100.00{{$}}
+
+# CHECK-INCONSISTENCIES-FAIL: contains instructions whose performance characteristics do not match that of LLVM
+# CHECK-INCONSISTENCIES-FAIL: contains instructions whose performance characteristics do not match that of LLVM
+# CHECK-INCONSISTENCIES-FAIL-NOT: contains instructions whose performance characteristics do not match that of LLVM
+
+# CHECK-INCONSISTENCIES-PASS-NOT: contains instructions whose performance characteristics do not match that of LLVM
+
+---
+mode:            latency
+key:
+  instructions:
+    - 'ADD32rr EDX EDX EAX'
+  config:          ''
+  register_initial_values:
+    - 'EDX=0x0'
+    - 'EAX=0x0'
+cpu_name:        bdver2
+llvm_triple:     x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+  - { key: latency, value: 90, per_snippet_value: 90 }
+error:           ''
+info:            Repeating a single implicitly serial instruction
+assembled_snippet: BA00000000B80000000001C201C201C201C201C201C201C201C201C201C201C201C201C201C201C201C2C3
+...
+---
+mode:            latency
+key:
+  instructions:
+    - 'SQRTSSr XMM11 XMM11'
+  config:          ''
+  register_initial_values:
+    - 'XMM11=0x0'
+cpu_name:        bdver2
+llvm_triple:     x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+  - { key: latency, value: 100, per_snippet_value: 100 }
+error:           ''
+info:            Repeating a single explicitly serial instruction
+assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C410F3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBC3
+...
index ba5620e8c4b5265be34c1b0e56eb3c62367c731c..ec964b24a130e3978fff8bd7abb956cf49c20d30 100644 (file)
@@ -170,8 +170,11 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
 Analysis::Analysis(const llvm::Target &Target,
                    std::unique_ptr<llvm::MCInstrInfo> InstrInfo,
                    const InstructionBenchmarkClustering &Clustering,
+                   double AnalysisInconsistencyEpsilon,
                    bool AnalysisDisplayUnstableOpcodes)
     : Clustering_(Clustering), InstrInfo_(std::move(InstrInfo)),
+      AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon *
+                                           AnalysisInconsistencyEpsilon),
       AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {
   if (Clustering.getPoints().empty())
     return;
@@ -301,7 +304,8 @@ void Analysis::printSchedClassClustersHtml(
   OS << "</tr>";
   for (const SchedClassCluster &Cluster : Clusters) {
     OS << "<tr class=\""
-       << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
+       << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_,
+                                     AnalysisInconsistencyEpsilonSquared_)
                ? "good-cluster"
                : "bad-cluster")
        << "\"><td>";
@@ -461,7 +465,8 @@ static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
 
 bool Analysis::SchedClassCluster::measurementsMatch(
     const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
-    const InstructionBenchmarkClustering &Clustering) const {
+    const InstructionBenchmarkClustering &Clustering,
+    const double AnalysisInconsistencyEpsilonSquared_) const {
   const size_t NumMeasurements = Representative.size();
   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
@@ -520,7 +525,8 @@ bool Analysis::SchedClassCluster::measurementsMatch(
     llvm_unreachable("unimplemented measurement matching mode");
     return false;
   }
-  return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
+  return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint,
+                                AnalysisInconsistencyEpsilonSquared_);
 }
 
 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
@@ -689,7 +695,8 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
     if (llvm::all_of(SchedClassClusters,
                      [this, &RSCAndPoints](const SchedClassCluster &C) {
                        return C.measurementsMatch(
-                           *SubtargetInfo_, RSCAndPoints.RSC, Clustering_);
+                           *SubtargetInfo_, RSCAndPoints.RSC, Clustering_,
+                           AnalysisInconsistencyEpsilonSquared_);
                      }))
       continue; // Nothing weird.
 
index d55a9a8c684032814932e38c001d82bacf137475..1a0859ee512d0b1921f3a9dd9fb93032bed1e110 100644 (file)
@@ -38,6 +38,7 @@ public:
   Analysis(const llvm::Target &Target,
            std::unique_ptr<llvm::MCInstrInfo> InstrInfo,
            const InstructionBenchmarkClustering &Clustering,
+           double AnalysisInconsistencyEpsilon,
            bool AnalysisDisplayUnstableOpcodes);
 
   // Prints a csv of instructions for each cluster.
@@ -81,7 +82,8 @@ private:
     bool
     measurementsMatch(const llvm::MCSubtargetInfo &STI,
                       const ResolvedSchedClass &SC,
-                      const InstructionBenchmarkClustering &Clustering) const;
+                      const InstructionBenchmarkClustering &Clustering,
+                      const double AnalysisInconsistencyEpsilonSquared_) const;
 
     void addPoint(size_t PointId,
                   const InstructionBenchmarkClustering &Clustering);
@@ -127,6 +129,7 @@ private:
   std::unique_ptr<llvm::MCAsmInfo> AsmInfo_;
   std::unique_ptr<llvm::MCInstPrinter> InstPrinter_;
   std::unique_ptr<llvm::MCDisassembler> Disasm_;
+  const double AnalysisInconsistencyEpsilonSquared_;
   const bool AnalysisDisplayUnstableOpcodes_;
 };
 
index 0c529c4cbd9a3a871159b0e1497a9b48b9559d04..cc46cb3fe187e194fe880700caf01d202b7324d2 100644 (file)
@@ -46,7 +46,8 @@ void InstructionBenchmarkClustering::rangeQuery(
     const auto &PMeasurements = Points_[P].Measurements;
     if (PMeasurements.empty()) // Error point.
       continue;
-    if (isNeighbour(PMeasurements, QMeasurements)) {
+    if (isNeighbour(PMeasurements, QMeasurements,
+                    AnalysisClusteringEpsilonSquared_)) {
       Neighbors.push_back(P);
     }
   }
@@ -54,8 +55,9 @@ void InstructionBenchmarkClustering::rangeQuery(
 
 InstructionBenchmarkClustering::InstructionBenchmarkClustering(
     const std::vector<InstructionBenchmark> &Points,
-    const double EpsilonSquared)
-    : Points_(Points), EpsilonSquared_(EpsilonSquared),
+    const double AnalysisClusteringEpsilonSquared)
+    : Points_(Points),
+      AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared),
       NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {}
 
 llvm::Error InstructionBenchmarkClustering::validateAndSetup() {
@@ -245,8 +247,10 @@ void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) {
 llvm::Expected<InstructionBenchmarkClustering>
 InstructionBenchmarkClustering::create(
     const std::vector<InstructionBenchmark> &Points, const size_t MinPts,
-    const double Epsilon, llvm::Optional<unsigned> NumOpcodes) {
-  InstructionBenchmarkClustering Clustering(Points, Epsilon * Epsilon);
+    const double AnalysisClusteringEpsilon,
+    llvm::Optional<unsigned> NumOpcodes) {
+  InstructionBenchmarkClustering Clustering(
+      Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon);
   if (auto Error = Clustering.validateAndSetup()) {
     return std::move(Error);
   }
index 70082044f56ea78bf8c2f4c9defec323e97463e9..ad4cab395c3abc788ad3cea6806d9c1fa4989fc8 100644 (file)
@@ -29,7 +29,8 @@ public:
   // for more explanations on the algorithm.
   static llvm::Expected<InstructionBenchmarkClustering>
   create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
-         double Epsilon, llvm::Optional<unsigned> NumOpcodes = llvm::None);
+         double AnalysisClusteringEpsilon,
+         llvm::Optional<unsigned> NumOpcodes = llvm::None);
 
   class ClusterId {
   public:
@@ -103,7 +104,8 @@ public:
 
   // Returns true if the given point is within a distance Epsilon of each other.
   bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
-                   const std::vector<BenchmarkMeasure> &Q) const {
+                   const std::vector<BenchmarkMeasure> &Q,
+                   const double EpsilonSquared_) const {
     double DistanceSquared = 0.0;
     for (size_t I = 0, E = P.size(); I < E; ++I) {
       const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
@@ -114,7 +116,8 @@ public:
 
 private:
   InstructionBenchmarkClustering(
-      const std::vector<InstructionBenchmark> &Points, double EpsilonSquared);
+      const std::vector<InstructionBenchmark> &Points,
+      double AnalysisClusteringEpsilonSquared);
 
   llvm::Error validateAndSetup();
   void dbScan(size_t MinPts);
@@ -122,7 +125,7 @@ private:
   void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
 
   const std::vector<InstructionBenchmark> &Points_;
-  const double EpsilonSquared_;
+  const double AnalysisClusteringEpsilonSquared_;
   int NumDimensions_ = 0;
   // ClusterForPoint_[P] is the cluster id for Points[P].
   std::vector<ClusterId> ClusterIdForPoint_;
index 915ffc5863fff99ed886b410bc5ee87615aaa130..0ad058061fd98c1bccfd971c517695899a55b239 100644 (file)
@@ -84,10 +84,15 @@ static cl::opt<unsigned> AnalysisNumPoints(
     "analysis-numpoints",
     cl::desc("minimum number of points in an analysis cluster"), cl::init(3));
 
-static cl::opt<float>
-    AnalysisEpsilon("analysis-epsilon",
-                    cl::desc("dbscan epsilon for analysis clustering"),
-                    cl::init(0.1));
+static cl::opt<float> AnalysisClusteringEpsilon(
+    "analysis-clustering-epsilon",
+    cl::desc("dbscan epsilon for benchmark point clustering"), cl::init(0.1));
+
+static cl::opt<float> AnalysisInconsistencyEpsilon(
+    "analysis-inconsistency-epsilon",
+    cl::desc("epsilon for detection of when the cluster is different from the "
+             "LLVM schedule profile values"),
+    cl::init(0.1));
 
 static cl::opt<std::string>
     AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""),
@@ -444,9 +449,11 @@ static void analysisMain() {
   std::unique_ptr<llvm::MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
 
   const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
-      Points, AnalysisNumPoints, AnalysisEpsilon, InstrInfo->getNumOpcodes()));
+      Points, AnalysisNumPoints, AnalysisClusteringEpsilon,
+      InstrInfo->getNumOpcodes()));
 
   const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering,
+                          AnalysisInconsistencyEpsilon,
                           AnalysisDisplayUnstableOpcodes);
 
   maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",