From: Matt Arsenault Date: Thu, 16 May 2019 15:10:27 +0000 (+0000) Subject: AMDGPU: Introduce TokenFactor for ABI register copies in call sequence X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7d75e77554a4ce8c025b01bb71580c2e6ec8bbd5;p=llvm AMDGPU: Introduce TokenFactor for ABI register copies in call sequence The call was missing chain dependencies on the pre-call copies. I don't think this was causing any real issues however. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360906 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 6f60f534666..c4c0e4047fc 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2592,24 +2592,31 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, if (!IsSibCall) { Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); + SmallVector CopyFromChains; + unsigned OffsetReg = Info->getScratchWaveOffsetReg(); // In the HSA case, this should be an identity copy. SDValue ScratchRSrcReg = DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32); RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg); + CopyFromChains.push_back(ScratchRSrcReg.getValue(1)); // TODO: Don't hardcode these registers and get from the callee function. SDValue ScratchWaveOffsetReg = DAG.getCopyFromReg(Chain, DL, OffsetReg, MVT::i32); RegsToPass.emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg); + CopyFromChains.push_back(ScratchWaveOffsetReg.getValue(1)); if (!Info->isEntryFunction()) { // Avoid clobbering this function's FP value. In the current convention // callee will overwrite this, so do save/restore around the call site. CallerSavedFP = DAG.getCopyFromReg(Chain, DL, Info->getFrameOffsetReg(), MVT::i32); + CopyFromChains.push_back(CallerSavedFP.getValue(1)); } + + Chain = DAG.getTokenFactor(DL, CopyFromChains); } SmallVector MemOpChains; diff --git a/test/CodeGen/AMDGPU/byval-frame-setup.ll b/test/CodeGen/AMDGPU/byval-frame-setup.ll index b606de17bb6..c4b2561a8f2 100644 --- a/test/CodeGen/AMDGPU/byval-frame-setup.ll +++ b/test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -78,16 +78,6 @@ entry: ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24 -; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 -; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 -; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 -; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 - -; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20 -; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24 -; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28 -; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 - ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 @@ -95,11 +85,21 @@ entry: ; GCN-NOT: s_add_u32 s32, s32, 0x800 + ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}} ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16 +; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 +; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 +; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 +; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 + +; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20 +; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24 +; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28 +; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 ; GCN: s_swappc_b64 ; GCN-NOT: v_readlane_b32 s32 @@ -272,16 +272,6 @@ entry: ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24 -; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 -; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 -; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 -; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 - -; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24 -; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28 -; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32 -; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36 - ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 @@ -294,7 +284,15 @@ entry: ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:16 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:20 +; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 +; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 +; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 +; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 +; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24 +; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28 +; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32 +; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36 ; GCN: s_swappc_b64 ; GCN-NOT: v_readlane_b32 s32 diff --git a/test/CodeGen/AMDGPU/call-argument-types.ll b/test/CodeGen/AMDGPU/call-argument-types.ll index 2afe4c867c4..4d05fcc7de2 100644 --- a/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/test/CodeGen/AMDGPU/call-argument-types.ll @@ -805,14 +805,14 @@ entry: } ; GCN-LABEL: {{^}}stack_12xv3i32: -; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 -; GCN: buffer_store_dword [[REG15]], {{.*}} offset:16 -; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 -; GCN: buffer_store_dword [[REG14]], {{.*}} offset:12 -; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN: buffer_store_dword [[REG13]], {{.*}} offset:8 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:4 +; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 +; GCN: buffer_store_dword [[REG13]], {{.*}} offset:8 +; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 +; GCN: buffer_store_dword [[REG14]], {{.*}} offset:12 +; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 +; GCN: buffer_store_dword [[REG15]], {{.*}} offset:16 ; GCN: v_mov_b32_e32 v31, 11 ; GCN: s_getpc define void @stack_12xv3i32() #0 { @@ -834,14 +834,14 @@ entry: } ; GCN-LABEL: {{^}}stack_12xv3f32: -; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 -; GCN: buffer_store_dword [[REG15]], {{.*}} offset:16 -; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 -; GCN: buffer_store_dword [[REG14]], {{.*}} offset:12 -; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 -; GCN: buffer_store_dword [[REG13]], {{.*}} offset:8 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:4 +; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 +; GCN: buffer_store_dword [[REG13]], {{.*}} offset:8 +; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 +; GCN: buffer_store_dword [[REG14]], {{.*}} offset:12 +; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 +; GCN: buffer_store_dword [[REG15]], {{.*}} offset:16 ; GCN: v_mov_b32_e32 v31, 0x41300000 ; GCN: s_getpc define void @stack_12xv3f32() #0 { @@ -863,22 +863,24 @@ entry: } ; GCN-LABEL: {{^}}stack_8xv5i32: -; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 -; GCN: buffer_store_dword [[REG15]], {{.*}} offset:32 -; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 -; GCN: buffer_store_dword [[REG14]], {{.*}} offset:28 -; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN: buffer_store_dword [[REG13]], {{.*}} offset:24 -; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 -; GCN: buffer_store_dword [[REG12]], {{.*}} offset:20 -; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 -; GCN: buffer_store_dword [[REG11]], {{.*}} offset:16 -; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 -; GCN: buffer_store_dword [[REG10]], {{.*}} offset:12 -; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 -; GCN: buffer_store_dword [[REG9]], {{.*}} offset:8 + ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 ; GCN: buffer_store_dword [[REG8]], {{.*}} offset:4 +; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 +; GCN: buffer_store_dword [[REG9]], {{.*}} offset:8 +; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 +; GCN: buffer_store_dword [[REG10]], {{.*}} offset:12 +; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 +; GCN: buffer_store_dword [[REG11]], {{.*}} offset:16 +; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 +; GCN: buffer_store_dword [[REG12]], {{.*}} offset:20 +; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 +; GCN: buffer_store_dword [[REG13]], {{.*}} offset:24 +; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 +; GCN: buffer_store_dword [[REG14]], {{.*}} offset:28 +; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 +; GCN: buffer_store_dword [[REG15]], {{.*}} offset:32 + ; GCN: v_mov_b32_e32 v31, 7 ; GCN: s_getpc define void @stack_8xv5i32() #0 { @@ -896,22 +898,23 @@ entry: } ; GCN-LABEL: {{^}}stack_8xv5f32: -; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 -; GCN: buffer_store_dword [[REG15]], {{.*}} offset:32 -; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 -; GCN: buffer_store_dword [[REG14]], {{.*}} offset:28 -; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 -; GCN: buffer_store_dword [[REG13]], {{.*}} offset:24 -; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 -; GCN: buffer_store_dword [[REG12]], {{.*}} offset:20 -; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000 -; GCN: buffer_store_dword [[REG11]], {{.*}} offset:16 -; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000 -; GCN: buffer_store_dword [[REG10]], {{.*}} offset:12 -; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000 -; GCN: buffer_store_dword [[REG9]], {{.*}} offset:8 ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 0x41000000 ; GCN: buffer_store_dword [[REG8]], {{.*}} offset:4 +; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000 +; GCN: buffer_store_dword [[REG9]], {{.*}} offset:8 +; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000 +; GCN: buffer_store_dword [[REG10]], {{.*}} offset:12 +; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000 +; GCN: buffer_store_dword [[REG11]], {{.*}} offset:16 +; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 +; GCN: buffer_store_dword [[REG12]], {{.*}} offset:20 +; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 +; GCN: buffer_store_dword [[REG13]], {{.*}} offset:24 +; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 +; GCN: buffer_store_dword [[REG14]], {{.*}} offset:28 +; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 +; GCN: buffer_store_dword [[REG15]], {{.*}} offset:32 + ; GCN: v_mov_b32_e32 v31, 0x40e00000 ; GCN: s_getpc define void @stack_8xv5f32() #0 {