From: Craig Topper Date: Tue, 29 Aug 2017 05:14:27 +0000 (+0000) Subject: Mark Knights Landing as having slow two memory operand instructions X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=57c0ea353dd307958106f8edc1954cf3ecca8329;p=llvm Mark Knights Landing as having slow two memory operand instructions Summary: Knights Landing, because it is Atom derived, has slow two memory operand instructions. Mark the Knights Landing CPU model accordingly. Patch by David Zarzycki. Reviewers: craig.topper Reviewed By: craig.topper Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D37224 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311979 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 93521a38858..37a7cdd779d 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -226,14 +226,12 @@ def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", "Flush A Cache Line Optimized">; def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", "Cache Line Write Back">; -// TODO: This feature ought to be renamed. -// What it really refers to are CPUs for which certain instructions -// (which ones besides the example below?) are microcoded. -// The best examples of this are the memory forms of CALL and PUSH -// instructions, which should be avoided in favor of a MOV + register CALL/PUSH. -def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", - "CallRegIndirect", "true", - "Call register indirect">; +// On some processors, instructions that implicitly take two memory operands are +// slow. In practice, this means that CALL, PUSH, and POP with memory operands +// should be avoided in favor of a MOV + register CALL/PUSH/POP. +def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", + "SlowTwoMemOps", "true", + "Two memory operand instructions are slow">; def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", @@ -401,7 +399,7 @@ class BonnellProc : ProcessorModel : ProcessorModel : ProcessorModel : ProcModel; def : KnightsLandingProc<"knl">; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 17bf3523032..2fec38d7c40 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -575,7 +575,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && // Only does this when target favors doesn't favor register indirect // call. - ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || + ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index b34b8f7d525..82d44410e65 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -8010,13 +8010,13 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( unsigned Size, unsigned Align, bool AllowCommute) const { const DenseMap > *OpcodeTablePtr = nullptr; - bool isCallRegIndirect = Subtarget.callRegIndirect(); + bool isSlowTwoMemOps = Subtarget.slowTwoMemOps(); bool isTwoAddrFold = false; // For CPUs that favor the register form of a call or push, // do not fold loads into calls or pushes, unless optimizing for size // aggressively. - if (isCallRegIndirect && !MF.getFunction()->optForMinSize() && + if (isSlowTwoMemOps && !MF.getFunction()->optForMinSize() && (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r || MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r || MI.getOpcode() == X86::PUSH64r)) diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 2972de2e448..f20841bd02e 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -912,7 +912,7 @@ let RecomputePerFunction = 1 in { def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; -def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; +def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">; def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index a3a9929d242..6ad6da95d7b 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -351,7 +351,7 @@ void X86Subtarget::initializeEnvironment() { HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; - CallRegIndirect = false; + SlowTwoMemOps = false; LEAUsesAG = false; SlowLEA = false; Slow3OpsLEA = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 2d9eef978df..e1e1cdfb8da 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -245,9 +245,9 @@ protected: /// a stall when returning too early. bool PadShortFunctions; - /// True if the Calls with memory reference should be converted - /// to a register-based indirect call. - bool CallRegIndirect; + /// True if two memory operand instructions should use a temporary register + /// instead. + bool SlowTwoMemOps; /// True if the LEA instruction inputs have to be ready at address generation /// (AG) time. @@ -492,7 +492,7 @@ public: bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } - bool callRegIndirect() const { return CallRegIndirect; } + bool slowTwoMemOps() const { return SlowTwoMemOps; } bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } bool slow3OpsLEA() const { return Slow3OpsLEA; } diff --git a/test/CodeGen/X86/fold-push.ll b/test/CodeGen/X86/fold-push.ll index 9d3afd1c449..c887b835aab 100644 --- a/test/CodeGen/X86/fold-push.ll +++ b/test/CodeGen/X86/fold-push.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL -; RUN: llc < %s -mtriple=i686-windows -mattr=call-reg-indirect | FileCheck %s -check-prefix=CHECK -check-prefix=SLM +; RUN: llc < %s -mtriple=i686-windows -mattr=slow-two-mem-ops | FileCheck %s -check-prefix=CHECK -check-prefix=SLM declare void @foo(i32 %r)