AMDGPU: Fold undef fcanonicalize to qNaN

author Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 31 Jul 2018 13:34:31 +0000 (13:34 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 31 Jul 2018 13:34:31 +0000 (13:34 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 31 Jul 2018 13:34:31 +0000 (13:34 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 31 Jul 2018 13:34:31 +0000 (13:34 +0000)
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 97c38e44e408a89d47a4c2e7e3463c187b2ca522..acde638a00651c11dc58b9812c185ae3445f3218 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6845,8 +6845,16 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
    SDNode *N,
    DAGCombinerInfo &DCI) const {
    SelectionDAG &DAG = DCI.DAG;
-  ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
+  SDValue N0 = N->getOperand(0);
+
+  // fcanonicalize undef -> qnan
+  if (N0.isUndef()) {
+    EVT VT = N->getValueType(0);
+    APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
+    return DAG.getConstantFP(QNaN, SDLoc(N), VT);
+  }
  
+  ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
    if (!CFP) {
      SDValue N0 = N->getOperand(0);
      EVT VT = N0.getValueType().getScalarType();
@@ -6899,7 +6907,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
        return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
    }
  
-  return N->getOperand(0);
+  return N0;
  }
  
  static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
diff --git a/test/CodeGen/AMDGPU/fcanonicalize.ll b/test/CodeGen/AMDGPU/fcanonicalize.ll

index 7c4698b8cfaf9f51e21a55039659366e3a87a2b3..6b2d58db804ee7c7e94a4205aeedbdcba2e83f2c 100644 (file)
--- a/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -62,6 +62,15 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)*
    ret void
  }
  
+; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 {
+  %canonicalized = call float @llvm.canonicalize.f32(float undef)
+  store float %canonicalized, float addrspace(1)* %out
+  ret void
+}
+
  ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
  ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
  ; GCN: buffer_store_dword [[REG]]
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 31 Jul 2018 13:34:31 +0000 (13:34 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 31 Jul 2018 13:34:31 +0000 (13:34 +0000)
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/fcanonicalize.ll		patch \| blob \| history