]> granicus.if.org Git - llvm/commitdiff
AMDGPU: Fix folding immediate into readfirstlane through reg_sequence
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 19 Jun 2019 20:44:15 +0000 (20:44 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 19 Jun 2019 20:44:15 +0000 (20:44 +0000)
The def instruction for the vreg may not match, because it may be
folding through a reg_sequence. The assert was overly conservative and
not necessary. It's not actually important if DefMI really defined the
register, because the fold that will be done cares about the def of
the value that will be folded.

For some reason copies aren't making it through the reg_sequence,
although they should.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363876 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/SIFoldOperands.cpp
lib/Target/AMDGPU/SIInstrInfo.cpp
test/CodeGen/AMDGPU/constant-address-space-32bit.ll
test/CodeGen/AMDGPU/fold-readlane.mir

index 4a1fc1332c3686aef3a6d5942d596a6fb3cfb614..3f566884f6b0ced841426a14981a497967b145f3 100644 (file)
@@ -436,9 +436,11 @@ void SIFoldOperands::foldOperand(
     unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
     unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
 
+    MachineRegisterInfo::use_iterator Next;
     for (MachineRegisterInfo::use_iterator
            RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
-         RSUse != RSE; ++RSUse) {
+         RSUse != RSE; RSUse = Next) {
+      Next = std::next(RSUse);
 
       MachineInstr *RSUseMI = RSUse->getParent();
       if (RSUse->getSubReg() != RegSeqDstSubReg)
@@ -523,6 +525,9 @@ void SIFoldOperands::foldOperand(
           return;
 
         UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
+
+        // FIXME: ChangeToImmediate should clear subreg
+        UseMI->getOperand(1).setSubReg(0);
         UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
         UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
         return;
index 56935b35734a9617ddaa0a1f0f00c21606a25e48..5831abb8071c2c0560f89cd167b847b81807a4a0 100644 (file)
@@ -6079,7 +6079,6 @@ bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
                                       const MachineInstr &DefMI,
                                       const MachineInstr *UseMI) {
   assert(MRI.isSSA() && "Must be run on SSA");
-  assert(DefMI.definesRegister(VReg) && "wrong def instruction");
 
   auto *TRI = MRI.getTargetRegisterInfo();
   auto *DefBB = DefMI.getParent();
index 040bcbc0182724376b8c1395ff21f021813ae38e..e90c85545b00dd058c3405ffcbb9671e22f7ab09 100644 (file)
@@ -279,12 +279,24 @@ define amdgpu_vs float @load_addr_no_fold(i32 addrspace(6)* inreg noalias %p0) #
   ret float %r2
 }
 
+; CHECK-LABEL: {{^}}vgpr_arg_src:
+; CHECK: v_readfirstlane_b32 s[[READLANE:[0-9]+]], v0
+; CHECK: s_mov_b32 s[[ZERO:[0-9]+]]
+; CHECK: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[READLANE]]:[[ZERO]]{{\]}}
+define amdgpu_vs float @vgpr_arg_src(<4 x i32> addrspace(6)* %arg) {
+main_body:
+  %tmp9 = load <4 x i32>, <4 x i32> addrspace(6)* %arg
+  %tmp10 = call nsz float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp9, i32 undef, i32 0, i32 0, i32 0) #1
+  ret float %tmp10
+}
+
 ; Function Attrs: nounwind readnone speculatable
 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
 
 ; Function Attrs: nounwind readonly
 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7
 
+declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #7
 
 !0 = !{}
 
index 55b7a612d777a6a8066ffbeab6022dd8cad8cce1..3c68686aa4a64f7d5060af372196e8e82a8a91c9 100644 (file)
@@ -248,3 +248,126 @@ body:             |
     %1:sreg_32_xm0 = S_MOV_B32 12
     %2:sreg_32_xm0 = V_READLANE_B32 %0, %1, implicit $exec
 ...
+
+# Constant for subreg0
+# GCN-LABEL: name: fold-imm-readfirstlane-regsequence0{{$}}
+
+# GCN: %0:vgpr_32 = COPY $vgpr0
+# GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1
+# GCN-NEXT: %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0, implicit $exec
+# GCN-NEXT: %4:sgpr_32 = S_MOV_B32 0
+---
+name: fold-imm-readfirstlane-regsequence0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1
+    %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec
+    %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec
+...
+
+# Constant for subreg1
+# GCN-LABEL: name: fold-imm-readfirstlane-regsequence1{{$}}
+# GCN: %0:vgpr_32 = COPY $vgpr0
+# GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, killed %0, %subreg.sub1
+# GCN-NEXT: %3:sgpr_32 = S_MOV_B32 0
+# GCN-NEXT: %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1, implicit $exec
+
+---
+name: fold-imm-readfirstlane-regsequence1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, killed %0:vgpr_32, %subreg.sub1
+    %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec
+    %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec
+...
+
+# Different constant regs for each subreg
+# GCN-LABEL: name: fold-imm-readfirstlane-regsequence2{{$}}
+# GCN: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+# GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1
+# GCN-NEXT: %3:sgpr_32 = S_MOV_B32 0
+# GCN-NEXT: %4:sgpr_32 = S_MOV_B32 1
+---
+name: fold-imm-readfirstlane-regsequence2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    %2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1
+    %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec
+    %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec
+...
+
+# Same constant reg for each subreg, so there are multiple constant uses
+# GCN-LABEL: name: fold-imm-readfirstlane-regsequence3{{$}}
+# GCN: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1
+# GCN-NEXT: %3:sgpr_32 = S_MOV_B32 0
+# GCN-NEXT: %4:sgpr_32 = S_MOV_B32 0
+---
+name: fold-imm-readfirstlane-regsequence3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1
+    %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec
+    %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec
+...
+
+# FIXME: This should fold
+# GCN-LABEL: name: fold-copy-readfirstlane-regsequence0{{$}}
+# GCN: %0:vgpr_32 = COPY $sgpr10
+# GCN-NEXT: %1:vgpr_32 = COPY $sgpr11
+# GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1
+# GCN-NEXT: %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0, implicit $exec
+# GCN-NEXT: %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1, implicit $exec
+---
+name: fold-copy-readfirstlane-regsequence0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr10, $sgpr11
+    %0:vgpr_32 = COPY $sgpr10
+    %1:vgpr_32 = COPY $sgpr11
+    %2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1
+    %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec
+    %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec
+...
+
+# GCN-LABEL: name: fold-copy-readfirstlane-regsequence1{{$}}
+# GCN: %0:sreg_32_xm0 = COPY $sgpr10
+# GCN-NEXT: %1:sreg_32_xm0 = COPY $sgpr11
+# GCN-NEXT: %2:vgpr_32 = COPY %0
+# GCN-NEXT: %3:vgpr_32 = COPY %1
+# GCN-NEXT: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, killed %3, %subreg.sub1
+# GCN-NEXT: %5:sgpr_32 = V_READFIRSTLANE_B32 %4.sub0, implicit $exec
+# GCN-NEXT: %6:sgpr_32 = V_READFIRSTLANE_B32 %4.sub1, implicit $exec
+---
+name: fold-copy-readfirstlane-regsequence1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr10, $sgpr11
+    %0:sreg_32_xm0 = COPY $sgpr10
+    %1:sreg_32_xm0 = COPY $sgpr11
+    %2:vgpr_32 = COPY %0
+    %3:vgpr_32 = COPY %1
+    %4:vreg_64 = REG_SEQUENCE %2:vgpr_32, %subreg.sub0, killed %3:vgpr_32, %subreg.sub1
+    %5:sgpr_32 = V_READFIRSTLANE_B32 %4.sub0:vreg_64, implicit $exec
+    %6:sgpr_32 = V_READFIRSTLANE_B32 %4.sub1:vreg_64, implicit $exec
+...