From: Matt Arsenault Date: Tue, 31 Jul 2018 13:34:31 +0000 (+0000) Subject: AMDGPU: Fold undef fcanonicalize to qNaN X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8f44f41e0f3c0714984307749b1891cdaaceb9ee;p=llvm AMDGPU: Fold undef fcanonicalize to qNaN We could choose a free 0 for this, but this matches the behavior for fmul undef, 1.0. Also, the NaN use is more useful for folding use operations although if it's not eliminated it is more expensive in terms of code size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338376 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 97c38e44e40..acde638a006 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6845,8 +6845,16 @@ SDValue SITargetLowering::performFCanonicalizeCombine( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0)); + SDValue N0 = N->getOperand(0); + + // fcanonicalize undef -> qnan + if (N0.isUndef()) { + EVT VT = N->getValueType(0); + APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT)); + return DAG.getConstantFP(QNaN, SDLoc(N), VT); + } + ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0); if (!CFP) { SDValue N0 = N->getOperand(0); EVT VT = N0.getValueType().getScalarType(); @@ -6899,7 +6907,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine( return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT); } - return N->getOperand(0); + return N0; } static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) { diff --git a/test/CodeGen/AMDGPU/fcanonicalize.ll b/test/CodeGen/AMDGPU/fcanonicalize.ll index 7c4698b8cfa..6b2d58db804 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -62,6 +62,15 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* ret void } +; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} +; GCN: buffer_store_dword [[REG]] +define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 { + %canonicalized = call float @llvm.canonicalize.f32(float undef) + store float %canonicalized, float addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32: ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} ; GCN: buffer_store_dword [[REG]]