If non-empty, write inconsistencies found during analysis to this file. `-`
prints to stdout. By default, this analysis is not run.
+.. option:: -analysis-clustering=[dbscan,naive]
+
+ Specify the clustering algorithm to use. By default DBSCAN will be used.
+ Naive clustering algorithm is better for doing further work on the
+ `-analysis-inconsistencies-output-file=` output, it will create one cluster
+ per opcode, and check that the cluster is stable (all points are neighbours).
+
.. option:: -analysis-numpoints=<dbscan numPoints parameter>
Specify the numPoints parameters to be used for DBSCAN clustering
- (`analysis` mode).
+ (`analysis` mode, DBSCAN only).
.. option:: -analysis-clustering-epsilon=<dbscan epsilon parameter>
--- /dev/null
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=dbscan | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-DBSCAN-05 %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.49 -analysis-numpoints=1 -analysis-clustering=dbscan | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-DBSCAN-049 %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-NAIVE %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.49 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-NAIVE %s
+
+# CHECK-CLUSTERS-ALL: {{^}}cluster_id,opcode_name,config,sched_class,inverse_throughput{{$}}
+
+# By default with -analysis-clustering-epsilon=0.5 everything ends up in the
+# same cluster, because each next point is a neighbour of the previous point.
+
+# CHECK-CLUSTERS-DBSCAN-05-NEXT: {{^}}0,
+# CHECK-CLUSTERS-DBSCAN-05-SAME: ,1.00{{$}}
+# CHECK-CLUSTERS-DBSCAN-05-NEXT: {{^}}0,
+# CHECK-CLUSTERS-DBSCAN-05-SAME: ,1.50{{$}}
+# CHECK-CLUSTERS-DBSCAN-05-NEXT: {{^}}0,
+# CHECK-CLUSTERS-DBSCAN-05-SAME: ,2.00{{$}}
+# CHECK-CLUSTERS-DBSCAN-05-NEXT: {{^}}0,
+# CHECK-CLUSTERS-DBSCAN-05-SAME: ,2.50{{$}}
+
+# With -analysis-clustering-epsilon=0.49 every point goes into separate cluster.
+
+# CHECK-CLUSTERS-DBSCAN-049-NEXT: {{^}}0,
+# CHECK-CLUSTERS-DBSCAN-049-SAME: ,1.00{{$}}
+# CHECK-CLUSTERS-DBSCAN-049: {{^}}1,
+# CHECK-CLUSTERS-DBSCAN-049-SAME: ,1.50{{$}}
+# CHECK-CLUSTERS-DBSCAN-049: {{^}}2,
+# CHECK-CLUSTERS-DBSCAN-049-SAME: ,2.00{{$}}
+# CHECK-CLUSTERS-DBSCAN-049: {{^}}3,
+# CHECK-CLUSTERS-DBSCAN-049-SAME: ,2.50{{$}}
+
+# And -analysis-clustering=naive every opcode goes into separate cluster.
+
+# CHECK-CLUSTERS-NAIVE-049-NEXT: {{^}}0,
+# CHECK-CLUSTERS-NAIVE-049-SAME: ,1.50{{$}}
+# CHECK-CLUSTERS-NAIVE-049: {{^}}1,
+# CHECK-CLUSTERS-NAIVE-049-SAME: ,2.00{{$}}
+# CHECK-CLUSTERS-NAIVE-049: {{^}}2,
+# CHECK-CLUSTERS-NAIVE-049-SAME: ,2.50{{$}}
+# CHECK-CLUSTERS-NAIVE-049: {{^}}3,
+# CHECK-CLUSTERS-NAIVE-049-SAME: ,1.00{{$}}
+
+# The "value" is manually specified, not measured.
+
+---
+mode: inverse_throughput
+key:
+ instructions:
+ - 'ROL8ri AH AH i_0x1'
+ - 'ROL8ri AL AL i_0x1'
+ - 'ROL8ri BH BH i_0x1'
+ - 'ROL8ri BL BL i_0x1'
+ - 'ROL8ri BPL BPL i_0x1'
+ - 'ROL8ri CH CH i_0x1'
+ - 'ROL8ri CL CL i_0x1'
+ - 'ROL8ri DH DH i_0x1'
+ - 'ROL8ri DIL DIL i_0x1'
+ - 'ROL8ri DL DL i_0x1'
+ - 'ROL8ri SIL SIL i_0x1'
+ - 'ROL8ri R8B R8B i_0x1'
+ - 'ROL8ri R9B R9B i_0x1'
+ - 'ROL8ri R10B R10B i_0x1'
+ - 'ROL8ri R11B R11B i_0x1'
+ - 'ROL8ri R12B R12B i_0x1'
+ - 'ROL8ri R13B R13B i_0x1'
+ - 'ROL8ri R14B R14B i_0x1'
+ - 'ROL8ri R15B R15B i_0x1'
+ config: ''
+ register_initial_values:
+ - 'AH=0x0'
+ - 'AL=0x0'
+ - 'BH=0x0'
+ - 'BL=0x0'
+ - 'BPL=0x0'
+ - 'CH=0x0'
+ - 'CL=0x0'
+ - 'DH=0x0'
+ - 'DIL=0x0'
+ - 'DL=0x0'
+ - 'SIL=0x0'
+ - 'R8B=0x0'
+ - 'R9B=0x0'
+ - 'R10B=0x0'
+ - 'R11B=0x0'
+ - 'R12B=0x0'
+ - 'R13B=0x0'
+ - 'R14B=0x0'
+ - 'R15B=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 1000000
+measurements:
+ - { key: inverse_throughput, value: 1.0000, per_snippet_value: 30.4026 }
+error: ''
+info: instruction has tied variables, using static renaming.
+assembled_snippet: 55415741564155415453B400B000B700B30040B500B500B100B60040B700B20040B60041B00041B10041B20041B30041B40041B50041B60041B700C0C401C0C001C0C701C0C30140C0C501C0C501C0C101C0C60140C0C701C0C20140C0C60141C0C00141C0C10141C0C20141C0C30141C0C40141C0C50141C0C60141C0C7015B415C415D415E415F5DC3
+...
+---
+mode: inverse_throughput
+key:
+ instructions:
+ - 'ROL16ri AX AX i_0x1'
+ - 'ROL16ri BP BP i_0x1'
+ - 'ROL16ri BX BX i_0x1'
+ - 'ROL16ri CX CX i_0x1'
+ - 'ROL16ri DI DI i_0x1'
+ - 'ROL16ri DX DX i_0x1'
+ - 'ROL16ri SI SI i_0x1'
+ - 'ROL16ri R8W R8W i_0x1'
+ - 'ROL16ri R9W R9W i_0x1'
+ - 'ROL16ri R10W R10W i_0x1'
+ - 'ROL16ri R11W R11W i_0x1'
+ - 'ROL16ri R12W R12W i_0x1'
+ - 'ROL16ri R13W R13W i_0x1'
+ - 'ROL16ri R14W R14W i_0x1'
+ - 'ROL16ri R15W R15W i_0x1'
+ config: ''
+ register_initial_values:
+ - 'AX=0x0'
+ - 'BP=0x0'
+ - 'BX=0x0'
+ - 'CX=0x0'
+ - 'DI=0x0'
+ - 'DX=0x0'
+ - 'SI=0x0'
+ - 'R8W=0x0'
+ - 'R9W=0x0'
+ - 'R10W=0x0'
+ - 'R11W=0x0'
+ - 'R12W=0x0'
+ - 'R13W=0x0'
+ - 'R14W=0x0'
+ - 'R15W=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 1000000
+measurements:
+ - { key: inverse_throughput, value: 1.5000, per_snippet_value: 30.154 }
+error: ''
+info: instruction has tied variables, using static renaming.
+assembled_snippet: 5541574156415541545366B8000066BD000066BB000066B9000066BF000066BA000066BE00006641B800006641B900006641BA00006641BB00006641BC00006641BD00006641BE00006641BF000066C1C00166C1C50166C1C30166C1C10166C1C70166C1C20166C1C6016641C1C0016641C1C1016641C1C2016641C1C3016641C1C4016641C1C5016641C1C6016641C1C70166C1C0015B415C415D415E415F5DC3
+...
+---
+mode: inverse_throughput
+key:
+ instructions:
+ - 'ROL32ri EAX EAX i_0x1'
+ - 'ROL32ri EBP EBP i_0x1'
+ - 'ROL32ri EBX EBX i_0x1'
+ - 'ROL32ri ECX ECX i_0x1'
+ - 'ROL32ri EDI EDI i_0x1'
+ - 'ROL32ri EDX EDX i_0x1'
+ - 'ROL32ri ESI ESI i_0x1'
+ - 'ROL32ri R8D R8D i_0x1'
+ - 'ROL32ri R9D R9D i_0x1'
+ - 'ROL32ri R10D R10D i_0x1'
+ - 'ROL32ri R11D R11D i_0x1'
+ - 'ROL32ri R12D R12D i_0x1'
+ - 'ROL32ri R13D R13D i_0x1'
+ - 'ROL32ri R14D R14D i_0x1'
+ - 'ROL32ri R15D R15D i_0x1'
+ config: ''
+ register_initial_values:
+ - 'EAX=0x0'
+ - 'EBP=0x0'
+ - 'EBX=0x0'
+ - 'ECX=0x0'
+ - 'EDI=0x0'
+ - 'EDX=0x0'
+ - 'ESI=0x0'
+ - 'R8D=0x0'
+ - 'R9D=0x0'
+ - 'R10D=0x0'
+ - 'R11D=0x0'
+ - 'R12D=0x0'
+ - 'R13D=0x0'
+ - 'R14D=0x0'
+ - 'R15D=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 1000000
+measurements:
+ - { key: inverse_throughput, value: 2.0000, per_snippet_value: 23.2762 }
+error: ''
+info: instruction has tied variables, using static renaming.
+assembled_snippet: 55415741564155415453B800000000BD00000000BB00000000B900000000BF00000000BA00000000BE0000000041B80000000041B90000000041BA0000000041BB0000000041BC0000000041BD0000000041BE0000000041BF00000000C1C001C1C501C1C301C1C101C1C701C1C201C1C60141C1C00141C1C10141C1C20141C1C30141C1C40141C1C50141C1C60141C1C701C1C0015B415C415D415E415F5DC3
+...
+---
+mode: inverse_throughput
+key:
+ instructions:
+ - 'ROL64ri RAX RAX i_0x1'
+ - 'ROL64ri RBP RBP i_0x1'
+ - 'ROL64ri RBX RBX i_0x1'
+ - 'ROL64ri RCX RCX i_0x1'
+ - 'ROL64ri RDI RDI i_0x1'
+ - 'ROL64ri RDX RDX i_0x1'
+ - 'ROL64ri RSI RSI i_0x1'
+ - 'ROL64ri R8 R8 i_0x1'
+ - 'ROL64ri R9 R9 i_0x1'
+ - 'ROL64ri R10 R10 i_0x1'
+ - 'ROL64ri R11 R11 i_0x1'
+ - 'ROL64ri R12 R12 i_0x1'
+ - 'ROL64ri R13 R13 i_0x1'
+ - 'ROL64ri R14 R14 i_0x1'
+ - 'ROL64ri R15 R15 i_0x1'
+ config: ''
+ register_initial_values:
+ - 'RAX=0x0'
+ - 'RBP=0x0'
+ - 'RBX=0x0'
+ - 'RCX=0x0'
+ - 'RDI=0x0'
+ - 'RDX=0x0'
+ - 'RSI=0x0'
+ - 'R8=0x0'
+ - 'R9=0x0'
+ - 'R10=0x0'
+ - 'R11=0x0'
+ - 'R12=0x0'
+ - 'R13=0x0'
+ - 'R14=0x0'
+ - 'R15=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 1000000
+measurements:
+ - { key: inverse_throughput, value: 2.5000, per_snippet_value: 26.2268 }
+error: ''
+info: instruction has tied variables, using static renaming.
+assembled_snippet: 5541574156415541545348B8000000000000000048BD000000000000000048BB000000000000000048B9000000000000000048BF000000000000000048BA000000000000000048BE000000000000000049B8000000000000000049B9000000000000000049BA000000000000000049BB000000000000000049BC000000000000000049BD000000000000000049BE000000000000000049BF000000000000000048C1C00148C1C50148C1C30148C1C10148C1C70148C1C20148C1C60149C1C00149C1C10149C1C20149C1C30149C1C40149C1C50149C1C60149C1C70148C1C0015B415C415D415E415F5DC3
+...
--- /dev/null
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-05 %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-STABLE-05 %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-UNSTABLE-05 %s
+
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.49 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-049 %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.49 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-STABLE-049 %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.49 -analysis-inconsistency-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-UNSTABLE-049 %s
+
+# CHECK-CLUSTERS-ALL: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}}
+
+# CHECK-CLUSTERS-NEXT-05: {{^}}0,
+# CHECK-CLUSTERS-SAME-05: ,90.00{{$}}
+# CHECK-CLUSTERS-05: {{^}}0,
+# CHECK-CLUSTERS-SAME-05: ,90.50{{$}}
+
+# CHECK-INCONSISTENCIES-STABLE-05: ADD32rr
+# CHECK-INCONSISTENCIES-STABLE-05: ADD32rr
+# CHECK-INCONSISTENCIES-STABLE-05-NOT: ADD32rr
+
+# CHECK-INCONSISTENCIES-UNSTABLE-05-NOT: ADD32rr
+
+# CHECK-INCONSISTENCIES-STABLE-049-NOT: ADD32rr
+
+# CHECK-INCONSISTENCIES-UNSTABLE-049: ADD32rr
+# CHECK-INCONSISTENCIES-UNSTABLE-049: ADD32rr
+# CHECK-INCONSISTENCIES-UNSTABLE-049-NOT: ADD32rr
+
+---
+mode: latency
+key:
+ instructions:
+ - 'ADD32rr EDX EDX EAX'
+ config: ''
+ register_initial_values:
+ - 'EDX=0x0'
+ - 'EAX=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+ - { key: latency, value: 90.0000, per_snippet_value: 90.0000 }
+error: ''
+info: Repeating a single implicitly serial instruction
+assembled_snippet: BA00000000B80000000001C201C201C201C201C201C201C201C201C201C201C201C201C201C201C201C2C3
+---
+mode: latency
+key:
+ instructions:
+ - 'ADD32rr EDX EDX EAX'
+ config: ''
+ register_initial_values:
+ - 'EDX=0x0'
+ - 'EAX=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+ - { key: latency, value: 90.5000, per_snippet_value: 90.5000 }
+error: ''
+info: Repeating a single implicitly serial instruction
+assembled_snippet: BA00000000B80000000001C201C201C201C201C201C201C201C201C201C201C201C201C201C201C201C2C3
+---
+...
--- /dev/null
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-STABLE %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-UNSTABLE %s
+
+# We have two ADD32rr measurements, and two measurements for SQRTSSr.
+# ADD32rr measurements are neighbours.
+# But the measurements of SQRTSSr are not neighbours,
+# so therefore that cluster is marked as unstable.
+
+# By default, we do not show such unstable clusters.
+# If told to show, we *only* show such unstable clusters.
+
+# CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}}
+# CHECK-CLUSTERS-NEXT: {{^}}0,
+# CHECK-CLUSTERS-SAME: ,90.00{{$}}
+# CHECK-CLUSTERS-NEXT: {{^}}0,
+# CHECK-CLUSTERS-SAME: ,90.11{{$}}
+# CHECK-CLUSTERS: {{^}}1,
+# CHECK-CLUSTERS-SAME: ,90.11{{$}}
+# CHECK-CLUSTERS-NEXT: {{^}}1,
+# CHECK-CLUSTERS-SAME: ,100.00{{$}}
+
+# CHECK-INCONSISTENCIES-STABLE: ADD32rr
+# CHECK-INCONSISTENCIES-STABLE: ADD32rr
+# CHECK-INCONSISTENCIES-STABLE-NOT: ADD32rr
+# CHECK-INCONSISTENCIES-STABLE-NOT: SQRTSSr
+
+# CHECK-INCONSISTENCIES-UNSTABLE: SQRTSSr
+# CHECK-INCONSISTENCIES-UNSTABLE: SQRTSSr
+# CHECK-INCONSISTENCIES-UNSTABLE-NOT: SQRTSSr
+# CHECK-INCONSISTENCIES-UNSTABLE-NOT: ADD32rr
+
+---
+mode: latency
+key:
+ instructions:
+ - 'ADD32rr EDX EDX EAX'
+ config: ''
+ register_initial_values:
+ - 'EDX=0x0'
+ - 'EAX=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+ - { key: latency, value: 90.0000, per_snippet_value: 90.0000 }
+error: ''
+info: Repeating a single implicitly serial instruction
+assembled_snippet: BA00000000B80000000001C201C201C201C201C201C201C201C201C201C201C201C201C201C201C201C2C3
+---
+mode: latency
+key:
+ instructions:
+ - 'ADD32rr EDX EDX EAX'
+ config: ''
+ register_initial_values:
+ - 'EDX=0x0'
+ - 'EAX=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+ - { key: latency, value: 90.1100, per_snippet_value: 90.1100 }
+error: ''
+info: Repeating a single implicitly serial instruction
+assembled_snippet: BA00000000B80000000001C201C201C201C201C201C201C201C201C201C201C201C201C201C201C201C2C3
+---
+mode: latency
+key:
+ instructions:
+ - 'SQRTSSr XMM11 XMM11'
+ config: ''
+ register_initial_values:
+ - 'XMM11=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+ - { key: latency, value: 90.1111, per_snippet_value: 90.1111 }
+error: ''
+info: Repeating a single explicitly serial instruction
+assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C410F3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBC3
+...
+---
+mode: latency
+key:
+ instructions:
+ - 'SQRTSSr XMM11 XMM11'
+ config: ''
+ register_initial_values:
+ - 'XMM11=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+ - { key: latency, value: 100, per_snippet_value: 100 }
+error: ''
+info: Repeating a single explicitly serial instruction
+assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C410F3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBC3
+...
--- /dev/null
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=10 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-DBSCAN %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=10 -analysis-numpoints=1 -analysis-clustering=dbscan | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-DBSCAN %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=10 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-NAIVE %s
+
+# Normally BSR32rr is in WriteBSR and BSF32rr is in WriteBSF sched classes.
+# Here we check that if we have dbscan-clustered these two measurements into the
+# same cluster, we don't split it per the sched classes into two.
+
+# CHECK-CLUSTERS-ALL: {{^}}cluster_id,opcode_name,config,sched_class,inverse_throughput{{$}}
+
+# CHECK-CLUSTERS-DBSCAN-NEXT: {{^}}0,
+# CHECK-CLUSTERS-DBSCAN-SAME: ,4.03{{$}}
+# CHECK-CLUSTERS-DBSCAN-NEXT: {{^}}0,
+# CHECK-CLUSTERS-DBSCAN-SAME: ,3.02{{$}}
+
+# CHECK-CLUSTERS-NAIVE-NEXT: {{^}}0,
+# CHECK-CLUSTERS-NAIVE-SAME: ,3.02{{$}}
+# CHECK-CLUSTERS-NAIVE: {{^}}1,
+# CHECK-CLUSTERS-NAIVE-SAME: ,4.03{{$}}
+
+---
+mode: inverse_throughput
+key:
+ instructions:
+ - 'BSR32rr R11D EDI'
+ config: ''
+ register_initial_values:
+ - 'EDI=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 1000000
+measurements:
+ - { key: inverse_throughput, value: 4.03048, per_snippet_value: 4.03048 }
+error: ''
+info: instruction has no tied variables picking Uses different from defs
+assembled_snippet: BF00000000440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDFC3
+...
+---
+mode: inverse_throughput
+key:
+ instructions:
+ - 'BSF32rr EAX R14D'
+ config: ''
+ register_initial_values:
+ - 'R14D=0x0'
+cpu_name: bdver2
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 1000000
+measurements:
+ - { key: inverse_throughput, value: 3.02186, per_snippet_value: 3.02186 }
+error: ''
+info: instruction has no tied variables picking Uses different from defs
+assembled_snippet: 415641BE00000000410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6415EC3
+...
OS << "</span></li>";
}
OS << "</ul></td>";
- for (const auto &Stats : Cluster.getRepresentative()) {
+ for (const auto &Stats : Cluster.getCentroid().getStats()) {
OS << "<td class=\"measurement\">";
writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
OS << "<br><span class=\"minmax\">[";
size_t PointId, const InstructionBenchmarkClustering &Clustering) {
PointIds.push_back(PointId);
const auto &Point = Clustering.getPoints()[PointId];
- if (ClusterId.isUndef()) {
+ if (ClusterId.isUndef())
ClusterId = Clustering.getClusterIdForPoint(PointId);
- Representative.resize(Point.Measurements.size());
- }
- for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
- Representative[I].push(Point.Measurements[I]);
- }
assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
+
+ Centroid.addPoint(Point.Measurements);
}
// Returns a ProxResIdx by id or name.
const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
const InstructionBenchmarkClustering &Clustering,
const double AnalysisInconsistencyEpsilonSquared_) const {
+ ArrayRef<PerInstructionStats> Representative = Centroid.getStats();
const size_t NumMeasurements = Representative.size();
std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
const std::vector<size_t> &getPointIds() const { return PointIds; }
+ void addPoint(size_t PointId,
+ const InstructionBenchmarkClustering &Clustering);
+
// Return the cluster centroid.
- const std::vector<PerInstructionStats> &getRepresentative() const {
- return Representative;
- }
+ const SchedClassClusterCentroid &getCentroid() const { return Centroid; }
// Returns true if the cluster representative measurements match that of SC.
bool
const InstructionBenchmarkClustering &Clustering,
const double AnalysisInconsistencyEpsilonSquared_) const;
- void addPoint(size_t PointId,
- const InstructionBenchmarkClustering &Clustering);
-
private:
InstructionBenchmarkClustering::ClusterId ClusterId;
std::vector<size_t> PointIds;
// Measurement stats for the points in the SchedClassCluster.
- std::vector<PerInstructionStats> Representative;
+ SchedClassClusterCentroid Centroid;
};
void printInstructionRowCsv(size_t PointId, llvm::raw_ostream &OS) const;
}
}
+// Given a set of points, checks that all the points are neighbours
+// up to AnalysisClusteringEpsilon. This is O(2*N).
+bool InstructionBenchmarkClustering::areAllNeighbours(
+ ArrayRef<size_t> Pts) const {
+ // First, get the centroid of this group of points. This is O(N).
+ SchedClassClusterCentroid G;
+ llvm::for_each(Pts, [this, &G](size_t P) {
+ assert(P < Points_.size());
+ ArrayRef<BenchmarkMeasure> Measurements = Points_[P].Measurements;
+ if (Measurements.empty()) // Error point.
+ return;
+ G.addPoint(Measurements);
+ });
+ const std::vector<BenchmarkMeasure> Centroid = G.getAsPoint();
+
+ // Since we will be comparing with the centroid, we need to halve the epsilon.
+ double AnalysisClusteringEpsilonHalvedSquared =
+ AnalysisClusteringEpsilonSquared_ / 4.0;
+
+ // And now check that every point is a neighbour of the centroid. Also O(N).
+ return llvm::all_of(
+ Pts, [this, &Centroid, AnalysisClusteringEpsilonHalvedSquared](size_t P) {
+ assert(P < Points_.size());
+ const auto &PMeasurements = Points_[P].Measurements;
+ if (PMeasurements.empty()) // Error point.
+ return true; // Pretend that error point is a neighbour.
+ return isNeighbour(PMeasurements, Centroid,
+ AnalysisClusteringEpsilonHalvedSquared);
+ });
+}
+
InstructionBenchmarkClustering::InstructionBenchmarkClustering(
const std::vector<InstructionBenchmark> &Points,
const double AnalysisClusteringEpsilonSquared)
return llvm::Error::success();
}
-void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {
+void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts) {
std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs.
for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
if (!ClusterIdForPoint_[P].isUndef())
}
}
+void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes) {
+ // Given an instruction Opcode, which are the benchmarks of this instruction?
+ std::vector<llvm::SmallVector<size_t, 1>> OpcodeToPoints;
+ OpcodeToPoints.resize(NumOpcodes);
+ size_t NumOpcodesSeen = 0;
+ for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
+ const InstructionBenchmark &Point = Points_[P];
+ const unsigned Opcode = Point.keyInstruction().getOpcode();
+ assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)");
+ llvm::SmallVectorImpl<size_t> &PointsOfOpcode = OpcodeToPoints[Opcode];
+ if (PointsOfOpcode.empty()) // If we previously have not seen any points of
+ ++NumOpcodesSeen; // this opcode, then naturally this is the new opcode.
+ PointsOfOpcode.emplace_back(P);
+ }
+ assert(OpcodeToPoints.size() == NumOpcodes && "sanity check");
+ assert(NumOpcodesSeen <= NumOpcodes &&
+ "can't see more opcodes than there are total opcodes");
+ assert(NumOpcodesSeen <= Points_.size() &&
+ "can't see more opcodes than there are total points");
+
+ Clusters_.reserve(NumOpcodesSeen); // One cluster per opcode.
+ for (ArrayRef<size_t> PointsOfOpcode : llvm::make_filter_range(
+ OpcodeToPoints, [](ArrayRef<size_t> PointsOfOpcode) {
+ return !PointsOfOpcode.empty(); // Ignore opcodes with no points.
+ })) {
+ // Create a new cluster.
+ Clusters_.emplace_back(ClusterId::makeValid(
+ Clusters_.size(), /*IsUnstable=*/!areAllNeighbours(PointsOfOpcode)));
+ Cluster &CurrentCluster = Clusters_.back();
+ // Mark points as belonging to the new cluster.
+ llvm::for_each(PointsOfOpcode, [this, &CurrentCluster](size_t P) {
+ ClusterIdForPoint_[P] = CurrentCluster.Id;
+ });
+ // And add all the points of this opcode to the new cluster.
+ CurrentCluster.PointIndices.reserve(PointsOfOpcode.size());
+ CurrentCluster.PointIndices.assign(PointsOfOpcode.begin(),
+ PointsOfOpcode.end());
+ assert(CurrentCluster.PointIndices.size() == PointsOfOpcode.size());
+ }
+ assert(Clusters_.size() == NumOpcodesSeen);
+}
+
// Given an instruction Opcode, we can make benchmarks (measurements) of the
// instruction characteristics/performance. Then, to facilitate further analysis
// we group the benchmarks with *similar* characteristics into clusters.
llvm::Expected<InstructionBenchmarkClustering>
InstructionBenchmarkClustering::create(
- const std::vector<InstructionBenchmark> &Points, const size_t MinPts,
- const double AnalysisClusteringEpsilon,
+ const std::vector<InstructionBenchmark> &Points, const ModeE Mode,
+ const size_t DbscanMinPts, const double AnalysisClusteringEpsilon,
llvm::Optional<unsigned> NumOpcodes) {
InstructionBenchmarkClustering Clustering(
Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon);
return Clustering; // Nothing to cluster.
}
- Clustering.dbScan(MinPts);
+ if (Mode == ModeE::Dbscan) {
+ Clustering.clusterizeDbScan(DbscanMinPts);
- if (NumOpcodes.hasValue())
- Clustering.stabilize(NumOpcodes.getValue());
+ if (NumOpcodes.hasValue())
+ Clustering.stabilize(NumOpcodes.getValue());
+ } else /*if(Mode == ModeE::Naive)*/ {
+ if (!NumOpcodes.hasValue())
+ llvm::report_fatal_error(
+ "'naive' clustering mode requires opcode count to be specified");
+ Clustering.clusterizeNaive(NumOpcodes.getValue());
+ }
return Clustering;
}
+void SchedClassClusterCentroid::addPoint(ArrayRef<BenchmarkMeasure> Point) {
+ if (Representative.empty())
+ Representative.resize(Point.size());
+ assert(Representative.size() == Point.size() &&
+ "All points should have identical dimensions.");
+
+ for (const auto &I : llvm::zip(Representative, Point))
+ std::get<0>(I).push(std::get<1>(I));
+}
+
+std::vector<BenchmarkMeasure> SchedClassClusterCentroid::getAsPoint() const {
+ std::vector<BenchmarkMeasure> ClusterCenterPoint(Representative.size());
+ for (const auto &I : llvm::zip(ClusterCenterPoint, Representative))
+ std::get<0>(I).PerInstructionValue = std::get<1>(I).avg();
+ return ClusterCenterPoint;
+}
+
} // namespace exegesis
} // namespace llvm
class InstructionBenchmarkClustering {
public:
+ enum ModeE { Dbscan, Naive };
+
// Clusters `Points` using DBSCAN with the given parameters. See the cc file
// for more explanations on the algorithm.
static llvm::Expected<InstructionBenchmarkClustering>
- create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
- double AnalysisClusteringEpsilon,
+ create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
+ size_t DbscanMinPts, double AnalysisClusteringEpsilon,
llvm::Optional<unsigned> NumOpcodes = llvm::None);
class ClusterId {
public:
static ClusterId noise() { return ClusterId(kNoise); }
static ClusterId error() { return ClusterId(kError); }
- static ClusterId makeValid(size_t Id) { return ClusterId(Id); }
+ static ClusterId makeValid(size_t Id, bool IsUnstable = false) {
+ return ClusterId(Id, IsUnstable);
+ }
static ClusterId makeValidUnstable(size_t Id) {
- return ClusterId(Id, /*IsUnstable=*/true);
+ return makeValid(Id, /*IsUnstable=*/true);
}
ClusterId() : Id_(kUndef), IsUnstable_(false) {}
double AnalysisClusteringEpsilonSquared);
llvm::Error validateAndSetup();
- void dbScan(size_t MinPts);
+
+ void clusterizeDbScan(size_t MinPts);
+ void clusterizeNaive(unsigned NumOpcodes);
+
+ // Stabilization is only needed if dbscan was used to clusterize.
void stabilize(unsigned NumOpcodes);
+
void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
+ bool areAllNeighbours(ArrayRef<size_t> Pts) const;
+
const std::vector<InstructionBenchmark> &Points_;
const double AnalysisClusteringEpsilonSquared_;
+
int NumDimensions_ = 0;
// ClusterForPoint_[P] is the cluster id for Points[P].
std::vector<ClusterId> ClusterIdForPoint_;
Cluster ErrorCluster_;
};
+class SchedClassClusterCentroid {
+public:
+ const std::vector<PerInstructionStats> &getStats() const {
+ return Representative;
+ }
+
+ std::vector<BenchmarkMeasure> getAsPoint() const;
+
+ void addPoint(ArrayRef<BenchmarkMeasure> Point);
+
+private:
+ // Measurement stats for the points in the SchedClassCluster.
+ std::vector<PerInstructionStats> Representative;
+};
+
} // namespace exegesis
} // namespace llvm
cl::cat(Options), cl::init(""));
static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
- "mode", cl::desc("the mode to run"), cl::cat(BenchmarkOptions),
+ "mode", cl::desc("the mode to run"), cl::cat(Options),
cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency, "latency",
"Instruction Latency"),
clEnumValN(exegesis::InstructionBenchmark::InverseThroughput,
cl::desc("ignore instructions that do not define a sched class"),
cl::cat(BenchmarkOptions), cl::init(false));
-static cl::opt<unsigned> AnalysisNumPoints(
+static cl::opt<exegesis::InstructionBenchmarkClustering::ModeE>
+ AnalysisClusteringAlgorithm(
+ "analysis-clustering", cl::desc("the clustering algorithm to use"),
+ cl::cat(AnalysisOptions),
+ cl::values(clEnumValN(exegesis::InstructionBenchmarkClustering::Dbscan,
+ "dbscan", "use DBSCAN/OPTICS algorithm"),
+ clEnumValN(exegesis::InstructionBenchmarkClustering::Naive,
+ "naive", "one cluster per opcode")),
+ cl::init(exegesis::InstructionBenchmarkClustering::Dbscan));
+
+static cl::opt<unsigned> AnalysisDbscanNumPoints(
"analysis-numpoints",
- cl::desc("minimum number of points in an analysis cluster"),
+ cl::desc("minimum number of points in an analysis cluster (dbscan only)"),
cl::cat(AnalysisOptions), cl::init(3));
static cl::opt<float> AnalysisClusteringEpsilon(
"analysis-clustering-epsilon",
- cl::desc("dbscan epsilon for benchmark point clustering"),
+ cl::desc("epsilon for benchmark point clustering"),
cl::cat(AnalysisOptions), cl::init(0.1));
static cl::opt<float> AnalysisInconsistencyEpsilon(
std::unique_ptr<llvm::MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
- Points, AnalysisNumPoints, AnalysisClusteringEpsilon,
- InstrInfo->getNumOpcodes()));
+ Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
+ AnalysisClusteringEpsilon, InstrInfo->getNumOpcodes()));
const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering,
AnalysisInconsistencyEpsilon,
// Error cluster: points {2}
Points[2].Error = "oops";
- auto Clustering = InstructionBenchmarkClustering::create(Points, 2, 0.25);
+ auto Clustering = InstructionBenchmarkClustering::create(
+ Points, InstructionBenchmarkClustering::ModeE::Dbscan, 2, 0.25);
ASSERT_TRUE((bool)Clustering);
EXPECT_THAT(Clustering.get().getValidClusters(),
UnorderedElementsAre(HasPoints({0, 3}), HasPoints({1, 4})));
{"x", 0.01, 0.0}, {"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
Points[1].Measurements = {{"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
auto Error =
- InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();
+ InstructionBenchmarkClustering::create(
+ Points, InstructionBenchmarkClustering::ModeE::Dbscan, 2, 0.25)
+ .takeError();
ASSERT_TRUE((bool)Error);
consumeError(std::move(Error));
}
Points[0].Measurements = {{"x", 0.01, 0.0}, {"y", 1.02, 0.0}};
Points[1].Measurements = {{"y", 1.02, 0.0}, {"x", 1.98, 0.0}};
auto Error =
- InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();
+ InstructionBenchmarkClustering::create(
+ Points, InstructionBenchmarkClustering::ModeE::Dbscan, 2, 0.25)
+ .takeError();
ASSERT_TRUE((bool)Error);
consumeError(std::move(Error));
}
Points[2].Measurements = {
{"x", 2.0, 0.0}};
- auto Clustering = InstructionBenchmarkClustering::create(Points, 2, 1.1);
+ auto Clustering = InstructionBenchmarkClustering::create(
+ Points, InstructionBenchmarkClustering::ModeE::Dbscan, 2, 1.1);
ASSERT_TRUE((bool)Clustering);
EXPECT_THAT(Clustering.get().getValidClusters(),
UnorderedElementsAre(HasPoints({0, 1, 2})));
Points[2].Measurements = {
{"x", 1.0, 0.0}};
- auto Clustering = InstructionBenchmarkClustering::create(Points, 2, 1.1);
+ auto Clustering = InstructionBenchmarkClustering::create(
+ Points, InstructionBenchmarkClustering::ModeE::Dbscan, 2, 1.1);
ASSERT_TRUE((bool)Clustering);
EXPECT_THAT(Clustering.get().getValidClusters(),
UnorderedElementsAre(HasPoints({0, 1, 2})));