]> granicus.if.org Git - llvm/commitdiff
AMDGPU: Inline constant when materalizing FI with add on gfx9
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Thu, 12 Sep 2019 23:46:46 +0000 (23:46 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Thu, 12 Sep 2019 23:46:46 +0000 (23:46 +0000)
This was relying on the SGPR usable for the carry out clobber to also
be used for the input. There was no carry out on gfx9. With no carry
out clobber to worry about, so the literal can just be directly used
with a VOP2 add.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371791 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/SIInstrInfo.cpp
lib/Target/AMDGPU/SIRegisterInfo.cpp
test/CodeGen/AMDGPU/frame-index-elimination.ll
test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir [new file with mode: 0644]

index fe57435204a8ec3eecde5c454cd993318fcc8864..3386f80beb8dd6de725b811f129f480fb6574f65 100644 (file)
@@ -6110,7 +6110,7 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
                                                Register DestReg,
                                                RegScavenger &RS) const {
   if (ST.hasAddNoCarry())
-    return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
+    return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg);
 
   Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false);
   // TODO: Users need to deal with this.
index 235de006a1d9c1068e0a1288d69d0f5bfac25733..7967d9c9fb92a7bc107cd1c36bcb93f9aab5d37e 100644 (file)
@@ -1285,12 +1285,15 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
               .addImm(ST.getWavefrontSizeLog2())
               .addReg(DiffReg, RegState::Kill);
 
+            const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
+
             // TODO: Fold if use instruction is another add of a constant.
-            if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
+            if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
               // FIXME: This can fail
               MIB.addImm(Offset);
               MIB.addReg(ScaledReg, RegState::Kill);
-              MIB.addImm(0); // clamp bit
+              if (!IsVOP2)
+                MIB.addImm(0); // clamp bit
             } else {
               Register ConstOffsetReg =
                 RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false);
index 0d8e74568763c7392804eb4070f9bac3bdb44f1e..b25a40f443c6bba24917eae8b4bd90c104a8eca3 100644 (file)
@@ -176,13 +176,13 @@ ret:
 ; Added offset can't be used with VOP3 add
 ; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32:
 ; GCN: s_sub_u32 [[SUB:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
-; GCN-DAG: s_movk_i32 [[K:s[0-9]+|vcc_lo|vcc_hi]], 0x200
+; CI-DAG: s_movk_i32 [[K:s[0-9]+|vcc_lo|vcc_hi]], 0x200
 
 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB]], 6
 ; CI: v_add_i32_e32 [[VZ:v[0-9]+]], vcc, [[K]], [[SCALED]]
 
 ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[SUB]]
-; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], [[K]], [[SCALED]]
+; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]]
 
 ; GCN: v_mul_u32_u24_e32 [[VZ]], 9, [[VZ]]
 ; GCN: ds_write_b32 v0, [[VZ]]
@@ -200,13 +200,13 @@ define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
 
 ; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32_vcc_live:
 ; GCN: s_sub_u32 [[DIFF:s[0-9]+]], s32, s33
-; GCN-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x200
+; CI-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x200
 
 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[DIFF]], 6
 ; CI: v_add_i32_e64 [[VZ:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[OFFSET]], [[SCALED]]
 
 ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[DIFF]]
-; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], [[OFFSET]], [[SCALED]]
+; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]]
 
 ; GCN: v_mul_u32_u24_e32 [[VZ]], 9, [[VZ]]
 ; GCN: ds_write_b32 v0, [[VZ]]
diff --git a/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
new file mode 100644 (file)
index 0000000..90afb18
--- /dev/null
@@ -0,0 +1,43 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
+
+# Test what happens when an SGPR is unavailable for the unused add. The non-inline constant needs to be folded into the add instruction and not materialized in a register.
+
+---
+name: scavenge_sgpr_pei_no_sgprs
+tracksRegLiveness: true
+
+stack:
+  - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
+  - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
+
+machineFunctionInfo:
+  isEntryFunction: false
+  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr34
+  frameOffsetReg:  $sgpr33
+  stackPtrOffsetReg:  $sgpr32
+
+body:             |
+  bb.0:
+    liveins: $vgpr1
+
+    ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
+    ; CHECK: liveins: $vgpr1
+    ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
+    ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+    ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
+    ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
+    ; CHECK: $sgpr33 = S_SUB_U32 $sgpr33, $sgpr34, implicit-def $scc
+    ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, killed $sgpr33, implicit $exec
+    ; CHECK: $vgpr2 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec
+    ; CHECK: $sgpr33 = S_ADD_U32 $sgpr33, $sgpr34, implicit-def $scc
+    ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+    ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup COPY $sgpr27
+    ; CHECK: S_ENDPGM 0, implicit $vcc
+    S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
+    $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+    S_ENDPGM 0, implicit $vcc
+...