From 953b70393acb4204f19710641ccd1a857b8ac3ff Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 6 Sep 2017 18:29:51 +0000 Subject: [PATCH] [AMDGPU] Fixed encoding of v_pk_mul_f16 in fcanonicalize Differential Revision: https://reviews.llvm.org/D37522 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312660 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIInstructions.td | 2 +- test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 5945f963710..7744f7ae6fd 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1322,7 +1322,7 @@ def : Pat< def : Pat< (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), - (V_PK_MUL_F16 SRCMODS.OP_SEL_1, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE) + (V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE) >; diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index 52b85565176..1dfd2fe1522 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -211,7 +211,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace ; VI-DAG: v_max_f16_e32 [[REG1:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} ; VI-NOT: v_and_b32 -; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+$}} +; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} op_sel_hi:[0,1]{{$}} ; GFX9: buffer_store_dword [[REG]] define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -232,7 +232,7 @@ define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1) ; VI: v_or_b32 ; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}} -; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]]{{$}} +; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]] op_sel_hi:[0,1]{{$}} ; GCN: buffer_store_dword define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -251,7 +251,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspa ; VI: v_or_b32 ; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}} -; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]] neg_lo:[0,1] neg_hi:[0,1]{{$}} +; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]] op_sel_hi:[0,1] neg_lo:[0,1] neg_hi:[0,1]{{$}} ; GCN: buffer_store_dword define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -264,7 +264,6 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad ret void } -; FIXME: Fold modifier ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_v2f16: ; VI: v_xor_b32_e32 [[FNEG:v[0-9]+]], 0x80008000, v{{[0-9]+}} ; VI: v_lshrrev_b32_e32 [[FNEGHI:v[0-9]+]], 16, [[FNEG]] @@ -272,7 +271,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad ; VI-DAG: v_max_f16_e32 [[REG0:v[0-9]+]], [[FNEG]], [[FNEG]] ; VI-NOT: 0xffff -; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} neg_lo:[0,1] neg_hi:[0,1]{{$}} +; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} op_sel_hi:[0,1] neg_lo:[0,1] neg_hi:[0,1]{{$}} ; GFX9: buffer_store_dword [[REG]] define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspace(1)* %out) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -289,7 +288,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa ; VI: v_max_f16_e64 [[REG1:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}} ; VI-NOT: v_and_b32 -; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{s[0-9]+$}} +; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}} op_sel_hi:[0,1]{{$}} ; GFX9: buffer_store_dword [[REG]] define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out, i32 zeroext %val.arg) #1 { %val = bitcast i32 %val.arg to <2 x half> -- 2.40.0