# have different configs, so they should not be placed in the same cluster by
# stabilization.
-# CHECK-UNSTABLE: SQRTSSr
-# CHECK-UNSTABLE: SQRTSSr
+# CHECK-UNSTABLE-NOT: SQRTSSr
---
mode: latency
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H
#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H
-#include "RegisterValue.h"
+#include "BenchmarkResult.h"
#include "llvm/MC/MCInst.h"
#include <string>
#include <vector>
// A collection of instructions that are to be assembled, executed and measured.
struct BenchmarkCode {
- // The sequence of instructions that are to be repeated.
- std::vector<llvm::MCInst> Instructions;
-
- // Before the code is executed some instructions are added to setup the
- // registers initial values.
- std::vector<RegisterValue> RegisterInitialValues;
+ InstructionBenchmarkKey Key;
// We also need to provide the registers that are live on entry for the
// assembler to generate proper prologue/epilogue.
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H
#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H
-#include "BenchmarkCode.h"
#include "LlvmState.h"
+#include "RegisterValue.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInst.h"
BenchmarkRunner::~BenchmarkRunner() = default;
-
namespace {
class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
public:
InstrBenchmark.NumRepetitions = NumRepetitions;
InstrBenchmark.Info = BC.Info;
- const std::vector<llvm::MCInst> &Instructions = BC.Instructions;
+ const std::vector<llvm::MCInst> &Instructions = BC.Key.Instructions;
- InstrBenchmark.Key.Instructions = Instructions;
- InstrBenchmark.Key.RegisterInitialValues = BC.RegisterInitialValues;
+ InstrBenchmark.Key = BC.Key;
// Assemble at least kMinInstructionsForSnippet instructions by repeating the
// snippet for debug/analysis. This is so that the user clearly understands
{
llvm::SmallString<0> Buffer;
llvm::raw_svector_ostream OS(Buffer);
- assembleToStream(
- State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns,
- BC.RegisterInitialValues,
- Repetitor.Repeat(BC.Instructions, kMinInstructionsForSnippet), OS);
+ assembleToStream(State.getExegesisTarget(), State.createTargetMachine(),
+ BC.LiveIns, BC.Key.RegisterInitialValues,
+ Repetitor.Repeat(Instructions, kMinInstructionsForSnippet),
+ OS);
const ExecutableFunction EF(State.createTargetMachine(),
getObjectFromBuffer(OS.str()));
const auto FnBytes = EF.getFunctionBytes();
// Assemble NumRepetitions instructions repetitions of the snippet for
// measurements.
const auto Filler =
- Repetitor.Repeat(BC.Instructions, InstrBenchmark.NumRepetitions);
+ Repetitor.Repeat(Instructions, InstrBenchmark.NumRepetitions);
llvm::object::OwningBinary<llvm::object::ObjectFile> ObjectFile;
if (DumpObjectToDisk) {
llvm::SmallString<0> Buffer;
llvm::raw_svector_ostream OS(Buffer);
assembleToStream(State.getExegesisTarget(), State.createTargetMachine(),
- BC.LiveIns, BC.RegisterInitialValues, Filler, OS);
+ BC.LiveIns, BC.Key.RegisterInitialValues, Filler, OS);
ObjectFile = getObjectFromBuffer(OS.str());
}
// Scale the measurements by instruction.
BM.PerInstructionValue /= InstrBenchmark.NumRepetitions;
// Scale the measurements by snippet.
- BM.PerSnippetValue *= static_cast<double>(BC.Instructions.size()) /
+ BM.PerSnippetValue *= static_cast<double>(Instructions.size()) /
InstrBenchmark.NumRepetitions;
}
return std::move(E);
llvm::raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
assembleToStream(State.getExegesisTarget(), State.createTargetMachine(),
- BC.LiveIns, BC.RegisterInitialValues, FillFunction, OFS);
+ BC.LiveIns, BC.Key.RegisterInitialValues, FillFunction, OFS);
return ResultPath.str();
}
// We shall find every opcode with benchmarks not in just one cluster, and move
// *all* the benchmarks of said Opcode into one new unstable cluster per Opcode.
void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) {
- // Given an instruction Opcode, in which clusters do benchmarks of this
- // instruction lie? Normally, they all should be in the same cluster.
- std::vector<llvm::SmallSet<ClusterId, 1>> OpcodeToClusterIDs;
- OpcodeToClusterIDs.resize(NumOpcodes);
- // The list of opcodes that have more than one cluster.
- llvm::SetVector<size_t> UnstableOpcodes;
- // Populate OpcodeToClusterIDs and UnstableOpcodes data structures.
+ // Given an instruction Opcode and Config, in which clusters do benchmarks of
+ // this instruction lie? Normally, they all should be in the same cluster.
+ struct OpcodeAndConfig {
+ explicit OpcodeAndConfig(const InstructionBenchmark &IB)
+ : Opcode(IB.keyInstruction().getOpcode()), Config(&IB.Key.Config) {}
+ unsigned Opcode;
+ const std::string *Config;
+
+ auto Tie() const -> auto { return std::tie(Opcode, *Config); }
+
+ bool operator<(const OpcodeAndConfig &O) const { return Tie() < O.Tie(); }
+ bool operator!=(const OpcodeAndConfig &O) const { return Tie() != O.Tie(); }
+ };
+ std::map<OpcodeAndConfig, llvm::SmallSet<ClusterId, 1>>
+ OpcodeConfigToClusterIDs;
+ // Populate OpcodeConfigToClusterIDs and UnstableOpcodes data structures.
assert(ClusterIdForPoint_.size() == Points_.size() && "size mismatch");
for (const auto &Point : zip(Points_, ClusterIdForPoint_)) {
const ClusterId &ClusterIdOfPoint = std::get<1>(Point);
if (!ClusterIdOfPoint.isValid())
continue; // Only process fully valid clusters.
- const unsigned Opcode = std::get<0>(Point).keyInstruction().getOpcode();
- assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)");
+ const OpcodeAndConfig Key(std::get<0>(Point));
llvm::SmallSet<ClusterId, 1> &ClusterIDsOfOpcode =
- OpcodeToClusterIDs[Opcode];
+ OpcodeConfigToClusterIDs[Key];
ClusterIDsOfOpcode.insert(ClusterIdOfPoint);
- // Is there more than one ClusterID for this opcode?.
- if (ClusterIDsOfOpcode.size() < 2)
- continue; // If not, then at this moment this Opcode is stable.
- // Else let's record this unstable opcode for future use.
- UnstableOpcodes.insert(Opcode);
}
- assert(OpcodeToClusterIDs.size() == NumOpcodes && "sanity check");
- // We know with how many [new] clusters we will end up with.
- const auto NewTotalClusterCount = Clusters_.size() + UnstableOpcodes.size();
- Clusters_.reserve(NewTotalClusterCount);
- for (const size_t UnstableOpcode : UnstableOpcodes.getArrayRef()) {
+ for (const auto &OpcodeConfigToClusterID : OpcodeConfigToClusterIDs) {
const llvm::SmallSet<ClusterId, 1> &ClusterIDs =
- OpcodeToClusterIDs[UnstableOpcode];
- assert(ClusterIDs.size() > 1 &&
- "Should only have Opcodes with more than one cluster.");
+ OpcodeConfigToClusterID.second;
+ const OpcodeAndConfig &Key = OpcodeConfigToClusterID.first;
+ // We only care about unstable instructions.
+ if (ClusterIDs.size() < 2)
+ continue;
// Create a new unstable cluster, one per Opcode.
Clusters_.emplace_back(ClusterId::makeValidUnstable(Clusters_.size()));
// and the rest of the points is for the UnstableOpcode.
const auto it = std::stable_partition(
OldCluster.PointIndices.begin(), OldCluster.PointIndices.end(),
- [this, UnstableOpcode](size_t P) {
- return Points_[P].keyInstruction().getOpcode() != UnstableOpcode;
+ [this, &Key](size_t P) {
+ return OpcodeAndConfig(Points_[P]) != Key;
});
assert(std::distance(it, OldCluster.PointIndices.end()) > 0 &&
"Should have found at least one bad point");
"New unstable cluster should end up with no less points than there "
"was clusters");
}
- assert(Clusters_.size() == NewTotalClusterCount && "sanity check");
}
llvm::Expected<InstructionBenchmarkClustering>
CodeTemplate &operator=(const CodeTemplate &) = delete;
ExecutionMode Execution = ExecutionMode::UNKNOWN;
+ // See InstructionBenchmarkKey.::Config.
+ std::string Config;
// Some information about how this template has been created.
std::string Info;
// The list of the instructions for this template.
// instructions.
void EmitInstruction(const MCInst &Instruction,
const MCSubtargetInfo &STI) override {
- Result->Instructions.push_back(Instruction);
+ Result->Key.Instructions.push_back(Instruction);
}
// Implementation of the AsmCommentConsumer.
const StringRef HexValue = Parts[1].trim();
RegVal.Value = APInt(
/* each hex digit is 4 bits */ HexValue.size() * 4, HexValue, 16);
- Result->RegisterInitialValues.push_back(std::move(RegVal));
+ Result->Key.RegisterInitialValues.push_back(std::move(RegVal));
return;
}
if (CommentText.consume_front("LIVEIN")) {
BC.Info = CT.Info;
for (InstructionTemplate &IT : CT.Instructions) {
randomizeUnsetVariables(State.getExegesisTarget(), ForbiddenRegs, IT);
- BC.Instructions.push_back(IT.build());
+ BC.Key.Instructions.push_back(IT.build());
}
if (CT.ScratchSpacePointerInReg)
BC.LiveIns.push_back(CT.ScratchSpacePointerInReg);
- BC.RegisterInitialValues =
+ BC.Key.RegisterInitialValues =
computeRegisterInitialValues(CT.Instructions);
+ BC.Key.Config = CT.Config;
Output.push_back(std::move(BC));
}
}
EXPECT_FALSE((bool)Snippets.takeError());
ASSERT_THAT(*Snippets, SizeIs(1));
const auto &Snippet = (*Snippets)[0];
- ASSERT_THAT(Snippet.Instructions, ElementsAre(HasOpcode(X86::INC64r)));
- ASSERT_THAT(Snippet.RegisterInitialValues,
+ ASSERT_THAT(Snippet.Key.Instructions, ElementsAre(HasOpcode(X86::INC64r)));
+ ASSERT_THAT(Snippet.Key.RegisterInitialValues,
ElementsAre(RegisterInitialValueIs(X86::RAX, 15),
RegisterInitialValueIs(X86::SIL, 0)));
ASSERT_THAT(Snippet.LiveIns, ElementsAre(X86::RDI, X86::DL));