From 208bbb179720222f412df69505810970bc69a359 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 21 Oct 2019 19:53:49 +0000 Subject: [PATCH] AMDGPU: Use CopyToReg for interp intrinsic lowering This doesn't use the default value, so doesn't benefit from the hack to help optimize it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375450 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIISelLowering.cpp | 33 ++++++++++--------- test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll | 8 ++--- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 80eb2be9e65..30fed3b9e01 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5877,34 +5877,35 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_fdiv_fast: return lowerFDIV_FAST(Op, DAG); case Intrinsic::amdgcn_interp_mov: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4)); - SDValue Glue = M0.getValue(1); + SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0, + Op.getOperand(4), SDValue()); return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3), Glue); + Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1)); } case Intrinsic::amdgcn_interp_p1: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4)); - SDValue Glue = M0.getValue(1); + SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0, + Op.getOperand(4), SDValue()); return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3), Glue); + Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1)); } case Intrinsic::amdgcn_interp_p2: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5)); - SDValue Glue = SDValue(M0.getNode(), 1); + SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0, + Op.getOperand(5), SDValue()); return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), - Glue); + ToM0.getValue(1)); } case Intrinsic::amdgcn_interp_p1_f16: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5)); - SDValue Glue = M0.getValue(1); + SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0, + Op.getOperand(5), SDValue()); + if (getSubtarget()->getLDSBankCount() == 16) { // 16 bank LDS SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, DAG.getConstant(2, DL, MVT::i32), // P0 Op.getOperand(2), // Attrchan Op.getOperand(3), // Attr - Glue); + ToM0.getValue(1)); SDValue Ops[] = { Op.getOperand(1), // Src0 Op.getOperand(2), // Attrchan @@ -5927,14 +5928,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(4), // high DAG.getTargetConstant(0, DL, MVT::i1), // $clamp DAG.getTargetConstant(0, DL, MVT::i32), // $omod - Glue + ToM0.getValue(1) }; return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops); } } case Intrinsic::amdgcn_interp_p2_f16: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(6)); - SDValue Glue = SDValue(M0.getNode(), 1); + SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0, + Op.getOperand(6), SDValue()); SDValue Ops[] = { Op.getOperand(2), // Src0 Op.getOperand(3), // Attrchan @@ -5944,7 +5945,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers Op.getOperand(5), // high DAG.getTargetConstant(0, DL, MVT::i1), // $clamp - Glue + ToM0.getValue(1) }; return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops); } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll index 37417a9bc36..5d2e10756c3 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll @@ -6,8 +6,8 @@ define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 { ; GFX9-32BANK-LABEL: interp_f16: ; GFX9-32BANK: ; %bb.0: ; %main_body -; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-32BANK-NEXT: s_mov_b32 m0, s2 +; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 ; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y ; GFX9-32BANK-NEXT: v_mov_b32_e32 v2, s1 @@ -20,8 +20,8 @@ define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) ; ; GFX8-32BANK-LABEL: interp_f16: ; GFX8-32BANK: ; %bb.0: ; %main_body -; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-32BANK-NEXT: s_mov_b32 m0, s2 +; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 ; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y ; GFX8-32BANK-NEXT: v_mov_b32_e32 v2, s1 @@ -119,8 +119,8 @@ main_body: define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 { ; GFX9-32BANK-LABEL: interp_p2_m0_setup: ; GFX9-32BANK: ; %bb.0: ; %main_body -; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-32BANK-NEXT: s_mov_b32 m0, s2 +; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 ; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y ; GFX9-32BANK-NEXT: ;;#ASMSTART @@ -136,8 +136,8 @@ define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 in ; ; GFX8-32BANK-LABEL: interp_p2_m0_setup: ; GFX8-32BANK: ; %bb.0: ; %main_body -; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-32BANK-NEXT: s_mov_b32 m0, s2 +; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 ; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y ; GFX8-32BANK-NEXT: ;;#ASMSTART -- 2.40.0