From bf17945077810047d22af3532f389c2af5b395fd Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Wed, 9 Oct 2019 08:49:13 +0000 Subject: [PATCH] [llvm-exegesis] Explore LEA addressing modes. Summary: This will help for PR32326. This shows the well-known issue with `RBP` and `R13` as base registers. Reviewers: gchatelet Subscribers: tschuett, llvm-commits, RKSimon, andreadb Tags: #llvm Differential Revision: https://reviews.llvm.org/D68646 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374146 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/tools/llvm-exegesis/X86/latency-LEA64r.s | 16 +++ test/tools/llvm-exegesis/X86/uops-LEA64r.s | 16 +++ tools/llvm-exegesis/lib/RegisterAliasing.h | 7 ++ tools/llvm-exegesis/lib/Uops.cpp | 6 - tools/llvm-exegesis/lib/X86/Target.cpp | 117 ++++++++++++++++-- 5 files changed, 145 insertions(+), 17 deletions(-) create mode 100644 test/tools/llvm-exegesis/X86/latency-LEA64r.s create mode 100644 test/tools/llvm-exegesis/X86/uops-LEA64r.s diff --git a/test/tools/llvm-exegesis/X86/latency-LEA64r.s b/test/tools/llvm-exegesis/X86/latency-LEA64r.s new file mode 100644 index 00000000000..e5d6db2ada2 --- /dev/null +++ b/test/tools/llvm-exegesis/X86/latency-LEA64r.s @@ -0,0 +1,16 @@ +# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s +# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s + +CHECK: --- +CHECK-NEXT: mode: latency +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: LEA64r +CHECK-NEXT: config: '0(%[[REG1:[A-Z0-9]+]], %[[REG1]], 1)' + +CHECK: --- +CHECK-NEXT: mode: latency +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: LEA64r +CHECK-NEXT: config: '42(%[[REG2:[A-Z0-9]+]], %[[REG2]], 1)' diff --git a/test/tools/llvm-exegesis/X86/uops-LEA64r.s b/test/tools/llvm-exegesis/X86/uops-LEA64r.s new file mode 100644 index 00000000000..68dde0a3be4 --- /dev/null +++ b/test/tools/llvm-exegesis/X86/uops-LEA64r.s @@ -0,0 +1,16 @@ +# RUN: llvm-exegesis -mode=uops -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s + +CHECK: --- +CHECK-NEXT: mode: uops +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: LEA64r +CHECK-NEXT: config: '0(%[[REG1:[A-Z0-9]+]], %[[REG2:[A-Z0-9]+]], 1)' + +CHECK: --- +CHECK-NEXT: mode: uops +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: LEA64r +CHECK-NEXT: config: '42(%[[REG3:[A-Z0-9]+]], %[[REG4:[A-Z0-9]+]], 1)' diff --git a/tools/llvm-exegesis/lib/RegisterAliasing.h b/tools/llvm-exegesis/lib/RegisterAliasing.h index 0ddccdd6e52..361acb641f8 100644 --- a/tools/llvm-exegesis/lib/RegisterAliasing.h +++ b/tools/llvm-exegesis/lib/RegisterAliasing.h @@ -103,6 +103,13 @@ private: RegisterClasses; }; +// `a = a & ~b`, optimized for few bit sets in B and no allocation. +inline void remove(llvm::BitVector &A, const llvm::BitVector &B) { + assert(A.size() == B.size()); + for (auto I : B.set_bits()) + A.reset(I); +} + } // namespace exegesis } // namespace llvm diff --git a/tools/llvm-exegesis/lib/Uops.cpp b/tools/llvm-exegesis/lib/Uops.cpp index 6823ec8fd17..c9cf06f409b 100644 --- a/tools/llvm-exegesis/lib/Uops.cpp +++ b/tools/llvm-exegesis/lib/Uops.cpp @@ -89,12 +89,6 @@ getVariablesWithTiedOperands(const Instruction &Instr) { return Result; } -static void remove(llvm::BitVector &a, const llvm::BitVector &b) { - assert(a.size() == b.size()); - for (auto I : b.set_bits()) - a.reset(I); -} - UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; UopsSnippetGenerator::~UopsSnippetGenerator() = default; diff --git a/tools/llvm-exegesis/lib/X86/Target.cpp b/tools/llvm-exegesis/lib/X86/Target.cpp index 1532af8ddec..681bf4cebf8 100644 --- a/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/tools/llvm-exegesis/lib/X86/Target.cpp @@ -17,6 +17,7 @@ #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/Support/FormatVariadic.h" namespace llvm { namespace exegesis { @@ -177,6 +178,72 @@ static unsigned getX86FPFlags(const Instruction &Instr) { return Instr.Description->TSFlags & llvm::X86II::FPTypeMask; } +// Helper to fill a memory operand with a value. +static void setMemOp(InstructionTemplate &IT, int OpIdx, + const MCOperand &OpVal) { + const auto Op = IT.Instr.Operands[OpIdx]; + assert(Op.isExplicit() && "invalid memory pattern"); + IT.getValueFor(Op) = OpVal; +}; + +// Common (latency, uops) code for LEA templates. `GetDestReg` takes the +// addressing base and index registers and returns the LEA destination register. +static llvm::Expected> generateLEATemplatesCommon( + const Instruction &Instr, const BitVector &ForbiddenRegisters, + const LLVMState &State, const SnippetGenerator::Options &Opts, + std::function GetDestReg) { + assert(Instr.Operands.size() == 6 && "invalid LEA"); + assert(X86II::getMemoryOperandNo(Instr.Description->TSFlags) == 1 && + "invalid LEA"); + + constexpr const int kDestOp = 0; + constexpr const int kBaseOp = 1; + constexpr const int kIndexOp = 3; + auto PossibleDestRegs = + Instr.Operands[kDestOp].getRegisterAliasing().sourceBits(); + remove(PossibleDestRegs, ForbiddenRegisters); + auto PossibleBaseRegs = + Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits(); + remove(PossibleBaseRegs, ForbiddenRegisters); + auto PossibleIndexRegs = + Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits(); + remove(PossibleIndexRegs, ForbiddenRegisters); + + const auto &RegInfo = State.getRegInfo(); + std::vector Result; + for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) { + for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) { + for (int LogScale = 0; LogScale <= 3; ++LogScale) { + // FIXME: Add an option for controlling how we explore immediates. + for (const int Disp : {0, 42}) { + InstructionTemplate IT(Instr); + const int64_t Scale = 1ull << LogScale; + setMemOp(IT, 1, MCOperand::createReg(BaseReg)); + setMemOp(IT, 2, MCOperand::createImm(Scale)); + setMemOp(IT, 3, MCOperand::createReg(IndexReg)); + setMemOp(IT, 4, MCOperand::createImm(Disp)); + // SegmentReg must be 0 for LEA. + setMemOp(IT, 5, MCOperand::createReg(0)); + + // Output reg is selected by the caller. + setMemOp(IT, 0, MCOperand::createReg(GetDestReg(BaseReg, IndexReg))); + + CodeTemplate CT; + CT.Instructions.push_back(std::move(IT)); + CT.Config = formatv("{3}(%{0}, %{1}, {2})", RegInfo.getName(BaseReg), + RegInfo.getName(IndexReg), Scale, Disp) + .str(); + Result.push_back(std::move(CT)); + if (Result.size() >= Opts.MaxConfigsPerOpcode) + return Result; + } + } + } + } + + return Result; +} + namespace { class X86LatencySnippetGenerator : public LatencySnippetGenerator { public: @@ -194,6 +261,17 @@ X86LatencySnippetGenerator::generateCodeTemplates( if (auto E = IsInvalidOpcode(Instr)) return std::move(E); + // LEA gets special attention. + const auto Opcode = Instr.Description->getOpcode(); + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) { + return generateLEATemplatesCommon(Instr, ForbiddenRegisters, State, Opts, + [](unsigned BaseReg, unsigned IndexReg) { + // We just select the same base and + // output register. + return BaseReg; + }); + } + switch (getX86FPFlags(Instr)) { case llvm::X86II::NotFP: return LatencySnippetGenerator::generateCodeTemplates(Instr, @@ -225,6 +303,7 @@ public: generateCodeTemplates(const Instruction &Instr, const BitVector &ForbiddenRegisters) const override; }; + } // namespace llvm::Expected> @@ -233,6 +312,28 @@ X86UopsSnippetGenerator::generateCodeTemplates( if (auto E = IsInvalidOpcode(Instr)) return std::move(E); + // LEA gets special attention. + const auto Opcode = Instr.Description->getOpcode(); + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) { + // Any destination register that is not used for adddressing is fine. + auto PossibleDestRegs = + Instr.Operands[0].getRegisterAliasing().sourceBits(); + remove(PossibleDestRegs, ForbiddenRegisters); + return generateLEATemplatesCommon( + Instr, ForbiddenRegisters, State, Opts, + [this, &PossibleDestRegs](unsigned BaseReg, unsigned IndexReg) { + auto PossibleDestRegsNow = PossibleDestRegs; + remove(PossibleDestRegsNow, + State.getRATC().getRegister(BaseReg).aliasedBits()); + remove(PossibleDestRegsNow, + State.getRATC().getRegister(IndexReg).aliasedBits()); + assert(PossibleDestRegsNow.set_bits().begin() != + PossibleDestRegsNow.set_bits().end() && + "no remaining registers"); + return *PossibleDestRegsNow.set_bits().begin(); + }); + } + switch (getX86FPFlags(Instr)) { case llvm::X86II::NotFP: return UopsSnippetGenerator::generateCodeTemplates(Instr, @@ -548,17 +649,11 @@ void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT, ++MemOpIdx; } } - // Now fill in the memory operands. - const auto SetOp = [&IT](int OpIdx, const MCOperand &OpVal) { - const auto Op = IT.Instr.Operands[OpIdx]; - assert(Op.isMemory() && Op.isExplicit() && "invalid memory pattern"); - IT.getValueFor(Op) = OpVal; - }; - SetOp(MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg - SetOp(MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt - SetOp(MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg - SetOp(MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp - SetOp(MemOpIdx + 4, MCOperand::createReg(0)); // Segment + setMemOp(IT, MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg + setMemOp(IT, MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt + setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg + setMemOp(IT, MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp + setMemOp(IT, MemOpIdx + 4, MCOperand::createReg(0)); // Segment } void ExegesisX86Target::decrementLoopCounterAndJump( -- 2.40.0