From: Matt Arsenault Date: Thu, 8 Dec 2016 20:14:46 +0000 (+0000) Subject: AMDGPU: Make f16 ConstantFP legal X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=beec226db531a440c63ed48c4b66fd12da24724f;p=llvm AMDGPU: Make f16 ConstantFP legal Not having this legal led to combine failures, resulting in dumb things like bitcasts of constants not being folded away. The only reason I'm leaving the v_mov_b32 hack that f32 already uses is to avoid madak formation test regressions. PeepholeOptimizer has an ordering issue where the immediate fold attempt is into the sgpr->vgpr copy instead of the actual use. Running it twice avoids that problem. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289096 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index a0184bfefd0..eeab4821e50 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -277,7 +277,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); // F16 - Constant Actions. - setOperationAction(ISD::ConstantFP, MVT::f16, Custom); + setOperationAction(ISD::ConstantFP, MVT::f16, Legal); // F16 - Load/Store Actions. setOperationAction(ISD::LOAD, MVT::f16, Promote); @@ -1848,9 +1848,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG); case ISD::TRAP: return lowerTRAP(Op, DAG); - - case ISD::ConstantFP: - return lowerConstantFP(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); } @@ -2055,15 +2052,6 @@ SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG, DAG.getNode(ISD::FTRUNC, DL, VT, Op); } -SDValue SITargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const { - if (ConstantFPSDNode *FP = dyn_cast(Op)) { - return DAG.getConstant(FP->getValueAPF().bitcastToAPInt().getZExtValue(), - SDLoc(Op), MVT::i32); - } - - return SDValue(); -} - SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::f16 && "Do not know how to custom lower FP_ROUND for non-f16 type"); diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 56d6ef2a0c1..cb6d5364793 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -53,9 +53,6 @@ class SITargetLowering final : public AMDGPUTargetLowering { const SDLoc &DL, EVT VT) const; - /// \brief Custom lowering for ISD::ConstantFP. - SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; - /// \brief Custom lowering for ISD::FP_ROUND for MVT::f16. SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 7ca50968096..93e7bcd02a1 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -706,11 +706,24 @@ def : Pat < (S_MOV_B32 imm:$imm) >; +// FIXME: Workaround for ordering issue with peephole optimizer where +// a register class copy interferes with immediate folding. Should +// use s_mov_b32, which can be shrunk to s_movk_i32 +def : Pat < + (VGPRImm<(f16 fpimm)>:$imm), + (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm))) +>; + def : Pat < (f32 fpimm:$imm), (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) >; +def : Pat < + (f16 fpimm:$imm), + (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) +>; + def : Pat < (i32 frameindex:$fi), (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi))) diff --git a/test/CodeGen/AMDGPU/br_cc.f16.ll b/test/CodeGen/AMDGPU/br_cc.f16.ll index 970260412c4..b7584714919 100644 --- a/test/CodeGen/AMDGPU/br_cc.f16.ll +++ b/test/CodeGen/AMDGPU/br_cc.f16.ll @@ -47,8 +47,9 @@ two: ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] ; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]] -; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]] ; SI: s_cbranch_vccz + +; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] ; VI: s_cbranch_vccnz ; VI: one{{$}} @@ -85,7 +86,7 @@ two: ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_nge_f16_e32 vcc, v[[B_F16]], v[[A_F16]] +; VI: v_cmp_ngt_f16_e32 vcc, v[[B_F16]], v[[A_F16]] ; GCN: s_cbranch_vccnz ; GCN: one{{$}}