From 8f44f41e0f3c0714984307749b1891cdaaceb9ee Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 31 Jul 2018 13:34:31 +0000
Subject: [PATCH] AMDGPU: Fold undef fcanonicalize to qNaN

We could choose a free 0 for this, but this
matches the behavior for fmul undef, 1.0. Also,
the NaN use is more useful for folding use operations
although if it's not eliminated it is more expensive
in terms of code size.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338376 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AMDGPU/SIISelLowering.cpp | 12 ++++++++++--
 test/CodeGen/AMDGPU/fcanonicalize.ll |  9 +++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 97c38e44e40..acde638a006 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6845,8 +6845,16 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
   SDNode *N,
   DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
-  ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
+  SDValue N0 = N->getOperand(0);
+
+  // fcanonicalize undef -> qnan
+  if (N0.isUndef()) {
+    EVT VT = N->getValueType(0);
+    APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
+    return DAG.getConstantFP(QNaN, SDLoc(N), VT);
+  }
 
+  ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
   if (!CFP) {
     SDValue N0 = N->getOperand(0);
     EVT VT = N0.getValueType().getScalarType();
@@ -6899,7 +6907,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
       return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
   }
 
-  return N->getOperand(0);
+  return N0;
 }
 
 static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
diff --git a/test/CodeGen/AMDGPU/fcanonicalize.ll b/test/CodeGen/AMDGPU/fcanonicalize.ll
index 7c4698b8cfa..6b2d58db804 100644
--- a/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -62,6 +62,15 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)*
   ret void
 }
 
+; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 {
+  %canonicalized = call float @llvm.canonicalize.f32(float undef)
+  store float %canonicalized, float addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dword [[REG]]
-- 
2.50.1