From 8f44f41e0f3c0714984307749b1891cdaaceb9ee Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 31 Jul 2018 13:34:31 +0000 Subject: [PATCH] AMDGPU: Fold undef fcanonicalize to qNaN We could choose a free 0 for this, but this matches the behavior for fmul undef, 1.0. Also, the NaN use is more useful for folding use operations although if it's not eliminated it is more expensive in terms of code size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338376 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIISelLowering.cpp | 12 ++++++++++-- test/CodeGen/AMDGPU/fcanonicalize.ll | 9 +++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 97c38e44e40..acde638a006 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6845,8 +6845,16 @@ SDValue SITargetLowering::performFCanonicalizeCombine( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0)); + SDValue N0 = N->getOperand(0); + + // fcanonicalize undef -> qnan + if (N0.isUndef()) { + EVT VT = N->getValueType(0); + APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT)); + return DAG.getConstantFP(QNaN, SDLoc(N), VT); + } + ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0); if (!CFP) { SDValue N0 = N->getOperand(0); EVT VT = N0.getValueType().getScalarType(); @@ -6899,7 +6907,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine( return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT); } - return N->getOperand(0); + return N0; } static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) { diff --git a/test/CodeGen/AMDGPU/fcanonicalize.ll b/test/CodeGen/AMDGPU/fcanonicalize.ll index 7c4698b8cfa..6b2d58db804 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -62,6 +62,15 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* ret void } +; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} +; GCN: buffer_store_dword [[REG]] +define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 { + %canonicalized = call float @llvm.canonicalize.f32(float undef) + store float %canonicalized, float addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32: ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} ; GCN: buffer_store_dword [[REG]] -- 2.50.1