From 46ff7007477a327809ca1172eee18ab5124f930f Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Wed, 17 Jul 2019 12:30:04 +0000 Subject: [PATCH] PowerPC/SPE: Fix load/store handling for SPE Summary: Pointed out in a comment for D49754, register spilling will currently spill SPE registers at almost any offset. However, the instructions `evstdd` and `evldd` require a) 8-byte alignment, and b) a limit of 256 (unsigned) bytes from the base register, as the offset must fix into a 5-bit offset, which ranges from 0-31 (indexed in double-words). The update to the register spill test is taken partially from the test case shown in D49754. Additionally, pointed out by Kei Thomsen, globals will currently use evldd/evstdd, though the offset isn't known at compile time, so may exceed the 8-bit (unsigned) offset permitted. This fixes that as well, by forcing it to always use evlddx/evstddx when accessing globals. Part of the patch contributed by Kei Thomsen. Reviewers: nemanjai, hfinkel, joerg Subscribers: kbarton, jsji, llvm-commits Differential Revision: https://reviews.llvm.org/D54409 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366318 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 23 +++++++++++++++++++++++ lib/Target/PowerPC/PPCISelLowering.h | 5 +++++ lib/Target/PowerPC/PPCRegisterInfo.cpp | 8 +++++++- test/CodeGen/PowerPC/spe.ll | 25 ++++++++++++++++++++++--- 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index d014e007095..24d50074860 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2233,6 +2233,25 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } + +/// SelectAddressEVXRegReg - Given the specified address, check to see if it can +/// be represented as an indexed [r+r] operation. +bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base, + SDValue &Index, + SelectionDAG &DAG) const { + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) { + if (MemSDNode *Memop = dyn_cast(*UI)) { + if (Memop->getMemoryVT() == MVT::f64) { + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } + } + } + return false; +} + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is @@ -2244,6 +2263,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, unsigned EncodingAlignment) const { int16_t imm = 0; if (N.getOpcode() == ISD::ADD) { + // Is there any SPE load/store (f64), which can't handle 16bit offset? + // SPE load/store can only handle 8-bit offsets. + if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG)) + return true; if (isIntS16Immediate(N.getOperand(1), imm) && (!EncodingAlignment || !(imm % EncodingAlignment))) return false; // r+i diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 73c6dcd7c85..97422c6eda3 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -669,6 +669,11 @@ namespace llvm { ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; + /// SelectAddressEVXRegReg - Given the specified addressed, check to see if + /// it can be more efficiently represented as [r+imm]. + bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, + SelectionDAG &DAG) const; + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment /// is non-zero, only accept displacement which is not suitable for [r+imm]. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 32b3d7e61d0..04988120502 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -938,6 +938,9 @@ static unsigned offsetMinAlignForOpcode(unsigned OpC) { case PPC::STXSD: case PPC::STXSSP: return 4; + case PPC::EVLDD: + case PPC::EVSTDD: + return 8; case PPC::LXV: case PPC::STXV: return 16; @@ -1060,7 +1063,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // happen in invalid code. assert(OpC != PPC::DBG_VALUE && "This should be handled in a target-independent way"); - if (!noImmForm && ((isInt<16>(Offset) && + bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ? + isUInt<8>(Offset) : + isInt<16>(Offset); + if (!noImmForm && ((OffsetFitsMnemonic && ((Offset % offsetMinAlign(MI)) == 0)) || OpC == TargetOpcode::STACKMAP || OpC == TargetOpcode::PATCHPOINT)) { diff --git a/test/CodeGen/PowerPC/spe.ll b/test/CodeGen/PowerPC/spe.ll index 06915ceb2db..bd03fa30e6a 100644 --- a/test/CodeGen/PowerPC/spe.ll +++ b/test/CodeGen/PowerPC/spe.ll @@ -523,18 +523,37 @@ entry: ; CHECK: #NO_APP } -define double @test_spill(double %a) nounwind { +declare double @test_spill_spe_regs(double, double); +define dso_local void @test_func2() #0 { entry: + ret void +} + +declare void @test_memset(i8* nocapture writeonly, i8, i32, i1) +@global_var1 = global i32 0, align 4 +define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* %a5) nounwind { +entry: + %v1 = alloca [13 x i32], align 4 + %v2 = alloca [11 x i32], align 4 %0 = fadd double %a, %a - call void asm sideeffect "","~{r0},~{r3},~{s4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind + call void asm sideeffect "","~{s0},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind %1 = fadd double %0, 3.14159 + %2 = bitcast [13 x i32]* %v1 to i8* + call void @test_memset(i8* align 4 %2, i8 0, i32 24, i1 true) + store i32 0, i32* %a5, align 4 + call void @test_func2() + %3 = bitcast [11 x i32]* %v2 to i8* + call void @test_memset(i8* align 4 %3, i8 0, i32 20, i1 true) br label %return return: ret double %1 ; CHECK-LABEL: test_spill -; CHECK: efdadd +; CHECK: li [[VREG:[0-9]+]], 256 +; CHECK: evstddx {{[0-9]+}}, {{[0-9]+}}, [[VREG]] +; CHECK-NOT: evstdd {{[0-9]+}}, 256({{[0-9]+}} ; CHECK: evstdd +; CHECK: efdadd ; CHECK: evldd } -- 2.40.0