From 3940fc1fe6437507724081a74d1d84e4cf40407a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 5 Sep 2019 23:54:35 +0000 Subject: [PATCH] AMDGPU: Allow getMemOperandWithOffset to analyze stack accesses Report soffset as a base register if the scratch resource can be ignored. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371149 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIInstrInfo.cpp | 21 +++++++- test/CodeGen/AMDGPU/byval-frame-setup.ll | 7 +-- test/CodeGen/AMDGPU/call-argument-types.ll | 51 +++++++++---------- .../AMDGPU/callee-special-input-vgprs.ll | 12 ++--- .../CodeGen/AMDGPU/frame-index-elimination.ll | 2 +- test/CodeGen/AMDGPU/insert_vector_elt.ll | 8 +-- 6 files changed, 58 insertions(+), 43 deletions(-) diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 2a3a1b34094..4456f68fad7 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -318,8 +318,25 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, if (isMUBUF(LdSt) || isMTBUF(LdSt)) { const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset); - if (SOffset && SOffset->isReg()) - return false; + if (SOffset && SOffset->isReg()) { + // We can only handle this if it's a stack access, as any other resource + // would require reporting multiple base registers. + const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); + if (AddrReg && !AddrReg->isFI()) + return false; + + const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); + const SIMachineFunctionInfo *MFI + = LdSt.getParent()->getParent()->getInfo(); + if (RSrc->getReg() != MFI->getScratchRSrcReg()) + return false; + + const MachineOperand *OffsetImm = + getNamedOperand(LdSt, AMDGPU::OpName::offset); + BaseOp = SOffset; + Offset = OffsetImm->getImm(); + return true; + } const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (!AddrReg) diff --git a/test/CodeGen/AMDGPU/byval-frame-setup.ll b/test/CodeGen/AMDGPU/byval-frame-setup.ll index 2ab3327d50b..2fe58159694 100644 --- a/test/CodeGen/AMDGPU/byval-frame-setup.ll +++ b/test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -133,10 +133,10 @@ entry: ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 -; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28 +; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 @@ -263,10 +263,10 @@ entry: ; GCN-NOT: s_add_u32 s32, s32, 0x800 +; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}} ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 -; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}} ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 @@ -331,10 +331,11 @@ entry: ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 -; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28 +; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16 + ; GCN: s_waitcnt vmcnt(0) ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 diff --git a/test/CodeGen/AMDGPU/call-argument-types.ll b/test/CodeGen/AMDGPU/call-argument-types.ll index bb09c220c27..816e6836c17 100644 --- a/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/test/CodeGen/AMDGPU/call-argument-types.ll @@ -765,17 +765,16 @@ entry: ; GCN-LABEL: {{^}}tail_call_byval_align16: ; GCN-NOT: s32 -; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16 -; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:20 +; GCN-NOT: buffer_store_dword v33 +; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GCN-NOT: buffer_store_dword v33 +; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:20 ; GCN: s_getpc_b64 -; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} -; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 +; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16 +; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN-NOT: s32 ; GCN: s_setpc_b64 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { @@ -806,12 +805,12 @@ entry: ; GCN-LABEL: {{^}}stack_12xv3i32: ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 -; GCN: buffer_store_dword [[REG12]], {{.*$}} ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 -; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 +; GCN: buffer_store_dword [[REG12]], {{.*$}} +; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4 +; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12 ; GCN: v_mov_b32_e32 v31, 11 ; GCN: s_getpc @@ -835,12 +834,12 @@ entry: ; GCN-LABEL: {{^}}stack_12xv3f32: ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 -; GCN: buffer_store_dword [[REG12]], {{.*$}} ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 -; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 -; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 +; GCN: buffer_store_dword [[REG12]], {{.*$}} +; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4 +; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12 ; GCN: v_mov_b32_e32 v31, 0x41300000 ; GCN: s_getpc @@ -865,20 +864,20 @@ entry: ; GCN-LABEL: {{^}}stack_8xv5i32: ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 -; GCN: buffer_store_dword [[REG8]], {{.*$}} ; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 -; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4 ; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 -; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8 ; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 -; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 +; GCN: buffer_store_dword [[REG8]], {{.*$}} +; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4 +; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8 +; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 -; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 +; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20 +; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28 ; GCN: v_mov_b32_e32 v31, 7 @@ -899,20 +898,20 @@ entry: ; GCN-LABEL: {{^}}stack_8xv5f32: ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 0x41000000 -; GCN: buffer_store_dword [[REG8]], {{.*$}} ; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000 -; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4 ; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000 -; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8 ; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000 -; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 +; GCN: buffer_store_dword [[REG8]], {{.*$}} +; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4 +; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8 +; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 -; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 -; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 +; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20 +; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28 ; GCN: v_mov_b32_e32 v31, 0x40e00000 diff --git a/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index d5211ceede3..61528cf504f 100644 --- a/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -489,16 +489,15 @@ define void @too_many_args_use_workitem_id_x_byval( ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval: ; GCN: enable_vgpr_workitem_id = 0 - -; GCN: s_mov_b32 s33, s7 +; GCN-DAG: s_mov_b32 s33, s7 +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} +; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4 +; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4 ; GCN: s_add_u32 s32, s33, 0x400{{$}} ; GCN-NOT: s32 -; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} -; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4 -; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]], ; GCN: s_swappc_b64 @@ -521,9 +520,8 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} ; GCN: buffer_store_dword [[K]], off, s[0:3], s34{{$}} -; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4 - ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s34{{$}} +; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]], ; GCN: s_swappc_b64 diff --git a/test/CodeGen/AMDGPU/frame-index-elimination.ll b/test/CodeGen/AMDGPU/frame-index-elimination.ll index 07ec95ca36d..470b64808e0 100644 --- a/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -231,10 +231,10 @@ declare void @func(<4 x float> addrspace(5)* nocapture) #0 ; GCN-LABEL: {{^}}undefined_stack_store_reg: ; GCN: s_and_saveexec_b64 +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset: ; GCN: buffer_store_dword v0, off, s[0:3], s34 offset: ; GCN: buffer_store_dword v0, off, s[0:3], s34 offset: ; GCN: buffer_store_dword v0, off, s[0:3], s34 offset: -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset: define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 { bb: %tmp = alloca <4 x float>, align 16, addrspace(5) diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.ll b/test/CodeGen/AMDGPU/insert_vector_elt.ll index dd663fa7d53..22600ca1d67 100644 --- a/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1650,10 +1650,10 @@ define amdgpu_kernel void @dynamic_insertelement_v8f64(<8 x double> addrspace(1) ; SI-NEXT: v_mov_b32_e32 v13, s21 ; SI-NEXT: v_mov_b32_e32 v14, s22 ; SI-NEXT: v_mov_b32_e32 v15, s23 -; SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112 ; SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], s7 offset:96 -; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80 +; SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], s7 offset:64 +; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80 ; SI-NEXT: v_or_b32_e32 v16, s4, v16 ; SI-NEXT: v_mov_b32_e32 v0, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0x40200000 @@ -1696,10 +1696,10 @@ define amdgpu_kernel void @dynamic_insertelement_v8f64(<8 x double> addrspace(1) ; VI-NEXT: v_mov_b32_e32 v13, s21 ; VI-NEXT: v_mov_b32_e32 v14, s22 ; VI-NEXT: v_mov_b32_e32 v15, s23 -; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], s7 offset:96 -; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80 +; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], s7 offset:64 +; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80 ; VI-NEXT: v_or_b32_e32 v16, s4, v16 ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 0x40200000 -- 2.40.0