]> granicus.if.org Git - llvm/commitdiff
AMDGPU/GlobalISel: Fix G_GEP with mixed SGPR/VGPR operands
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Tue, 2 Jul 2019 14:40:22 +0000 (14:40 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Tue, 2 Jul 2019 14:40:22 +0000 (14:40 +0000)
The register bank for the destination of the sample argument copy was
wrong. We shouldn't be constraining each source to the result register
bank. Allow constraining the original register to the right size.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364928 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
test/CodeGen/AMDGPU/GlobalISel/inst-select-gep.mir
test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir

index c7e5f05d401b76cee21d259f6f9dd92d261121f9..c99707ed697bee1541305ad6d5f89c44e46e03b1 100644 (file)
@@ -278,12 +278,15 @@ bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
       .add(Lo1)
       .add(Lo2)
       .addImm(0);
-    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
+    MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
       .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
       .add(Hi1)
       .add(Hi2)
       .addReg(CarryReg, RegState::Kill)
       .addImm(0);
+
+    if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
+      return false;
   }
 
   BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
@@ -292,9 +295,8 @@ bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
     .addReg(DstHi)
     .addImm(AMDGPU::sub1);
 
-  if (!RBI.constrainGenericRegister(DstReg, RC, MRI) ||
-      !RBI.constrainGenericRegister(I.getOperand(1).getReg(), RC, MRI) ||
-      !RBI.constrainGenericRegister(I.getOperand(2).getReg(), RC, MRI))
+
+  if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
     return false;
 
   I.eraseFromParent();
index bc943384b6732cf909ded4c09bc8f7b60e98d50a..0cebbb24be6cfdbaa1d8db8f00ea4f66d31b336a 100644 (file)
@@ -160,7 +160,7 @@ body: |
   bb.0:
     liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
     ; GFX6-LABEL: name: gep_p0_sgpr_vgpr
-    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
@@ -172,7 +172,7 @@ body: |
     ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX8-LABEL: name: gep_p0_sgpr_vgpr
     ; GFX8: $vcc_hi = IMPLICIT_DEF
-    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
@@ -184,7 +184,7 @@ body: |
     ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX9-LABEL: name: gep_p0_sgpr_vgpr
     ; GFX9: $vcc_hi = IMPLICIT_DEF
-    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
@@ -195,7 +195,7 @@ body: |
     ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_vgpr
-    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
     ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
     ; GFX10-WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
@@ -207,7 +207,7 @@ body: |
     ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_vgpr
     ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
-    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
     ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
     ; GFX10-WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
@@ -217,7 +217,7 @@ body: |
     ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
     ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
-    %0:vgpr(p0) = COPY $sgpr0_sgpr1
+    %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %1:vgpr(s64) = COPY $vgpr0_vgpr1
     %2:vgpr(p0) = G_GEP %0, %1
     S_ENDPGM 0, implicit %2
@@ -319,34 +319,34 @@ body: |
   bb.0:
     liveins: $sgpr0, $vgpr0
     ; GFX6-LABEL: name: gep_p3_sgpr_vgpr
-    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX6: S_ENDPGM 0, implicit %2
     ; GFX8-LABEL: name: gep_p3_sgpr_vgpr
     ; GFX8: $vcc_hi = IMPLICIT_DEF
-    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %2:vgpr_32, dead %3:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX8: S_ENDPGM 0, implicit %2
     ; GFX9-LABEL: name: gep_p3_sgpr_vgpr
     ; GFX9: $vcc_hi = IMPLICIT_DEF
-    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
     ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_vgpr
-    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
     ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX10-WAVE64: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX10-WAVE64: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
     ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_vgpr
     ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
-    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
     ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX10-WAVE32: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX10-WAVE32: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
-    %0:vgpr(p3) = COPY $sgpr0
+    %0:sgpr(p3) = COPY $sgpr0
     %1:vgpr(s32) = COPY $vgpr0
     %2:vgpr(p3) = G_GEP %0, %1
     S_ENDPGM 0, implicit %2
index f82ad33a5150c976afc6cc41e9369e4278da23e0..e5512d55276a7b5cd982152ed2834ed94032b042 100644 (file)
@@ -12,7 +12,7 @@ legalized:       true
 regBankSelected: true
 
 # GCN: body:
-# GCN: [[PTR:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
 
 # Immediate offset:
 # SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0