From dfa41563195dbafdb33d6defba7780c61e723200 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Tue, 5 Mar 2019 12:21:44 +0000 Subject: [PATCH] [AMDGPU] Fix DPP operand order in atomic optimizer Summary: Ensure order of operands in DPP atomic optimizer final WWM step is appropriate for sub instructions. Change-Id: I631d050e1c00a3b4bc7c11a90437064403c4cf30 Reviewers: sheredom, tpr Reviewed By: sheredom Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D58900 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355394 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 2 +- test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll | 5 +++-- test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll | 4 +++- test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll | 4 +++- test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll | 4 +++- test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll | 4 +++- 6 files changed, 16 insertions(+), 7 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index 3210df461f5..9b732ac9665 100644 --- a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -311,7 +311,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, } LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV); - NewV = B.CreateBinOp(Op, NewV, SetInactive); + NewV = B.CreateBinOp(Op, SetInactive, NewV); // Read the value from the last lane, which has accumlated the values of // each active lane in the wavefront. This will be our new value with which diff --git a/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll b/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll index 88824aa64ee..9a22780b9c4 100644 --- a/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll +++ b/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll @@ -112,7 +112,7 @@ entry: ; GFX7LESS-NOT: v_mbcnt_hi_u32_b32 ; GFX7LESS-NOT: s_bcnt1_i32_b64 ; GFX7LESS: buffer_atomic_sub v{{[0-9]+}} -; GFX8MORE: v_mov_b32_dpp v[[wave_shr1:[0-9]+]], v{{[0-9]+}} wave_shr:1 row_mask:0xf bank_mask:0xf +; GFX8MORE: v_mov_b32_dpp v[[wave_shr1:[0-9]+]], v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:1 row_mask:0xf bank_mask:0xf ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:2 row_mask:0xf bank_mask:0xf ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:3 row_mask:0xf bank_mask:0xf @@ -120,7 +120,8 @@ entry: ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:8 row_mask:0xf bank_mask:0xc ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:15 row_mask:0xa bank_mask:0xf ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}} +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: buffer_atomic_sub v[[value]] define amdgpu_kernel void @sub_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) { diff --git a/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll index 644814f4fa0..201eac172c2 100644 --- a/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll +++ b/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll @@ -133,7 +133,9 @@ entry: ; GFX7LESS-NOT: v_mbcnt_hi_u32_b32 ; GFX7LESS-NOT: s_bcnt1_i32_b64 ; GFX7LESS: buffer_atomic_sub v{{[0-9]+}} -; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf +; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}} +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: buffer_atomic_sub v[[value]] define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace(1)* %inout) { diff --git a/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index 3ce91e83cf3..94c6ef85436 100644 --- a/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -136,7 +136,9 @@ entry: ; GFX7LESS-NOT: v_mbcnt_hi_u32_b32 ; GFX7LESS-NOT: s_bcnt1_i32_b64 ; GFX7LESS: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf +; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}} +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) { diff --git a/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll b/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll index c2db5547201..f7980cc8691 100644 --- a/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll +++ b/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll @@ -104,7 +104,9 @@ entry: ; GFX7LESS-NOT: v_mbcnt_hi_u32_b32 ; GFX7LESS-NOT: s_bcnt1_i32_b64 ; GFX7LESS: buffer_atomic_sub v{{[0-9]+}} -; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf +; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}} +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: buffer_atomic_sub v[[value]] define amdgpu_kernel void @sub_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) { diff --git a/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll b/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll index eb3f0ab17ac..47fed39cdec 100644 --- a/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll +++ b/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll @@ -117,7 +117,9 @@ entry: ; GFX7LESS-NOT: v_mbcnt_hi_u32_b32 ; GFX7LESS-NOT: s_bcnt1_i32_b64 ; GFX7LESS: buffer_atomic_sub v{{[0-9]+}} -; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf +; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}} +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: buffer_atomic_sub v[[value]] define amdgpu_kernel void @sub_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) { -- 2.50.1