From 46ff7007477a327809ca1172eee18ab5124f930f Mon Sep 17 00:00:00 2001
From: Justin Hibbits <jrh29@alumni.cwru.edu>
Date: Wed, 17 Jul 2019 12:30:04 +0000
Subject: [PATCH] PowerPC/SPE: Fix load/store handling for SPE

Summary:
Pointed out in a comment for D49754, register spilling will currently
spill SPE registers at almost any offset.  However, the instructions
`evstdd` and `evldd` require a) 8-byte alignment, and b) a limit of 256
(unsigned) bytes from the base register, as the offset must fix into a
5-bit offset, which ranges from 0-31 (indexed in double-words).

The update to the register spill test is taken partially from the test
case shown in D49754.

Additionally, pointed out by Kei Thomsen, globals will currently use
evldd/evstdd, though the offset isn't known at compile time, so may
exceed the 8-bit (unsigned) offset permitted.  This fixes that as well,
by forcing it to always use evlddx/evstddx when accessing globals.

Part of the patch contributed by Kei Thomsen.

Reviewers: nemanjai, hfinkel, joerg

Subscribers: kbarton, jsji, llvm-commits

Differential Revision: https://reviews.llvm.org/D54409

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366318 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/PowerPC/PPCISelLowering.cpp | 23 +++++++++++++++++++++++
 lib/Target/PowerPC/PPCISelLowering.h   |  5 +++++
 lib/Target/PowerPC/PPCRegisterInfo.cpp |  8 +++++++-
 test/CodeGen/PowerPC/spe.ll            | 25 ++++++++++++++++++++++---
 4 files changed, 57 insertions(+), 4 deletions(-)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index d014e007095..24d50074860 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2233,6 +2233,25 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
   return isIntS16Immediate(Op.getNode(), Imm);
 }
 
+
+/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
+/// be represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
+                                               SDValue &Index,
+                                               SelectionDAG &DAG) const {
+  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+      UI != E; ++UI) {
+    if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
+      if (Memop->getMemoryVT() == MVT::f64) {
+          Base = N.getOperand(0);
+          Index = N.getOperand(1);
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
 /// SelectAddressRegReg - Given the specified addressed, check to see if it
 /// can be represented as an indexed [r+r] operation.  Returns false if it
 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
@@ -2244,6 +2263,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
                                             unsigned EncodingAlignment) const {
   int16_t imm = 0;
   if (N.getOpcode() == ISD::ADD) {
+    // Is there any SPE load/store (f64), which can't handle 16bit offset?
+    // SPE load/store can only handle 8-bit offsets.
+    if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
+        return true;
     if (isIntS16Immediate(N.getOperand(1), imm) &&
         (!EncodingAlignment || !(imm % EncodingAlignment)))
       return false; // r+i
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 73c6dcd7c85..97422c6eda3 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -669,6 +669,11 @@ namespace llvm {
                                    ISD::MemIndexedMode &AM,
                                    SelectionDAG &DAG) const override;
 
+    /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
+    /// it can be more efficiently represented as [r+imm].
+    bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
+                                SelectionDAG &DAG) const;
+
     /// SelectAddressRegReg - Given the specified addressed, check to see if it
     /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
     /// is non-zero, only accept displacement which is not suitable for [r+imm].
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 32b3d7e61d0..04988120502 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -938,6 +938,9 @@ static unsigned offsetMinAlignForOpcode(unsigned OpC) {
   case PPC::STXSD:
   case PPC::STXSSP:
     return 4;
+  case PPC::EVLDD:
+  case PPC::EVSTDD:
+    return 8;
   case PPC::LXV:
   case PPC::STXV:
     return 16;
@@ -1060,7 +1063,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // happen in invalid code.
   assert(OpC != PPC::DBG_VALUE &&
          "This should be handled in a target-independent way");
-  if (!noImmForm && ((isInt<16>(Offset) &&
+  bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ?
+                            isUInt<8>(Offset) :
+                            isInt<16>(Offset);
+  if (!noImmForm && ((OffsetFitsMnemonic &&
                       ((Offset % offsetMinAlign(MI)) == 0)) ||
                      OpC == TargetOpcode::STACKMAP ||
                      OpC == TargetOpcode::PATCHPOINT)) {
diff --git a/test/CodeGen/PowerPC/spe.ll b/test/CodeGen/PowerPC/spe.ll
index 06915ceb2db..bd03fa30e6a 100644
--- a/test/CodeGen/PowerPC/spe.ll
+++ b/test/CodeGen/PowerPC/spe.ll
@@ -523,18 +523,37 @@ entry:
 ; CHECK: #NO_APP
 }
 
-define double @test_spill(double %a) nounwind {
+declare double @test_spill_spe_regs(double, double);
+define dso_local void @test_func2() #0 {
 entry:
+  ret void
+}
+
+declare void @test_memset(i8* nocapture writeonly, i8, i32, i1)
+@global_var1 = global i32 0, align 4
+define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* %a5) nounwind {
+entry:
+  %v1 = alloca [13 x i32], align 4
+  %v2 = alloca [11 x i32], align 4
   %0 = fadd double %a, %a
-  call void asm sideeffect "","~{r0},~{r3},~{s4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
+  call void asm sideeffect "","~{s0},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind
   %1 = fadd double %0, 3.14159
+  %2 = bitcast [13 x i32]* %v1 to i8*
+  call void @test_memset(i8* align 4 %2, i8 0, i32 24, i1 true)
+  store i32 0, i32* %a5, align 4
+  call void @test_func2()
+  %3 = bitcast [11 x i32]* %v2 to i8*
+  call void @test_memset(i8* align 4 %3, i8 0, i32 20, i1 true)
   br label %return
 
 return:
   ret double %1
 
 ; CHECK-LABEL: test_spill
-; CHECK: efdadd
+; CHECK: li [[VREG:[0-9]+]], 256
+; CHECK: evstddx {{[0-9]+}}, {{[0-9]+}}, [[VREG]]
+; CHECK-NOT: evstdd {{[0-9]+}}, 256({{[0-9]+}}
 ; CHECK: evstdd
+; CHECK: efdadd
 ; CHECK: evldd
 }
-- 
2.40.0