From 802910f0341e3e9b6fce31d6e8f76c04587ac401 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 27 Jan 2017 18:31:33 +0000 Subject: [PATCH] Merging r292472: ------------------------------------------------------------------------ r292472 | arsenm | 2017-01-18 22:04:12 -0800 (Wed, 18 Jan 2017) | 5 lines AMDGPU: Remove modifiers from v_div_scale_* They seem to produce nonsense results when used. This should be applied to the release branch. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_40@293317 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 10 ++-------- lib/Target/AMDGPU/VOP3Instructions.td | 6 +++++- test/CodeGen/AMDGPU/inserted-wait-states.mir | 2 +- test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll | 6 ++++-- test/MC/AMDGPU/vop3.s | 8 ++------ 5 files changed, 14 insertions(+), 18 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 2b4fc5397b1..5bf347e4865 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { unsigned Opc = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; - // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, - // omod - SDValue Ops[8]; - - SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); - SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); - SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); - CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index 5efa64d25ce..c2a4d4ba99b 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -70,8 +70,10 @@ class VOP3_Profile : VOPProfile { } class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { + // v_div_scale_{f32|f64} do not support input modifiers. + let HasModifiers = 0; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); - let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod"; + let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; } def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile { @@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile, AMDGPU def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> { let SchedRW = [WriteFloatFMA, WriteSALU]; let hasExtraSrcRegAllocReq = 1; + let AsmMatchConverter = ""; } // Double precision division pre-scale. def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> { let SchedRW = [WriteDouble, WriteSALU]; let hasExtraSrcRegAllocReq = 1; + let AsmMatchConverter = ""; } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile, int_amdgcn_msad_u8>; diff --git a/test/CodeGen/AMDGPU/inserted-wait-states.mir b/test/CodeGen/AMDGPU/inserted-wait-states.mir index 7cc9c7c1d92..85cd903a405 100644 --- a/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -63,7 +63,7 @@ body: | S_BRANCH %bb.3 bb.3: - %vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec + %vgpr4, %vcc = V_DIV_SCALE_F32 %vgpr1, %vgpr1, %vgpr3, implicit %exec %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec S_ENDPGM diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll index 38e4b8440d3..8e5c62c31db 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @@ -322,7 +322,8 @@ define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float a ; SI-LABEL: {{^}}test_div_scale_f32_fabs_num: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]| +; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[A]] +; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[ABS_A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { @@ -344,7 +345,8 @@ define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspa ; SI-LABEL: {{^}}test_div_scale_f32_fabs_den: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]] +; SI: v_and_b32_e32 [[ABS_B:v[0-9]+]], 0x7fffffff, [[B]] +; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[ABS_B]], [[ABS_B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { diff --git a/test/MC/AMDGPU/vop3.s b/test/MC/AMDGPU/vop3.s index 6a62df0d956..21fbc644bb5 100644 --- a/test/MC/AMDGPU/vop3.s +++ b/test/MC/AMDGPU/vop3.s @@ -352,10 +352,6 @@ v_div_scale_f32 v24, vcc, v22, v22, v20 // SICI: v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x04] // VI: v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0x52,0x04] -v_div_scale_f32 v24, vcc, s[10:11], v22, v20 -// SICI: v_div_scale_f32 v24, vcc, s[10:11], v22, v20 ; encoding: [0x18,0x6a,0xda,0xd2,0x0a,0x2c,0x52,0x04] -// VI: v_div_scale_f32 v24, vcc, s[10:11], v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x0a,0x2c,0x52,0x04] - v_div_scale_f32 v24, s[10:11], v22, v22, v20 // SICI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xda,0xd2,0x16,0x2d,0x52,0x04] // VI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xe0,0xd1,0x16,0x2d,0x52,0x04] @@ -365,8 +361,8 @@ v_div_scale_f32 v24, vcc, v22, 1.0, v22 // VI: v_div_scale_f32 v24, vcc, v22, 1.0, v22 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0xe5,0x59,0x04] v_div_scale_f32 v24, vcc, v22, v22, -2.0 -// SICI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0xd2,0x83] -// VI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0xd2,0x83] +// SICI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0xd6,0x03] +// VI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0xd6,0x03] v_div_scale_f32 v24, vcc, v22, v22, 0xc0000000 // SICI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0xd6,0x03] -- 2.49.0