From 1f4353fb621051f446c3d0ccc96e3a4a0ded3457 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 11 Jan 2017 00:33:24 +0000
Subject: [PATCH] InstSimplify: Eliminate fabs on known positive

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291624 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Analysis/ValueTracking.h         |  8 +-
 lib/Analysis/InstructionSimplify.cpp          | 15 ++-
 lib/Analysis/ValueTracking.cpp                | 71 ++++++++++----
 test/Transforms/InstCombine/fabs.ll           | 50 +++++++++-
 test/Transforms/InstCombine/fast-math.ll      |  8 +-
 .../InstSimplify/floating-point-arithmetic.ll | 92 +++++++++++++++++++
 6 files changed, 212 insertions(+), 32 deletions(-)
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index dd767217345..aaf6f888e06 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -169,8 +169,12 @@ template <typename T> class ArrayRef;
 
   /// Return true if we can prove that the specified FP value is either a NaN or
   /// never less than 0.0.
-  bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI,
-                                   unsigned Depth = 0);
+  /// If \p IncludeNeg0 is false, -0.0 is considered less than 0.0.
+  bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
+
+  /// \returns true if we can prove that the specified FP value has a 0 sign
+  /// bit.
+  bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
 
   /// If the specified value can be set by repeating the same byte in memory,
   /// return the i8 value that it is represented with. This is true for all i8
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 8da2f0981d0..73dcd713416 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -4308,10 +4308,21 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
     return nullptr;
 
   // Unary Ops
-  if (NumOperands == 1)
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin))
+  if (NumOperands == 1) {
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) {
       if (II->getIntrinsicID() == IID)
         return II;
+    }
+
+    switch (IID) {
+    case Intrinsic::fabs: {
+      if (SignBitMustBeZero(*ArgBegin, Q.TLI))
+        return *ArgBegin;
+    }
+    default:
+      break;
+    }
+  }
 
   return nullptr;
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index d31472c0d33..b98c9629960 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -2580,51 +2580,70 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
   return false;
 }
 
-bool llvm::CannotBeOrderedLessThanZero(const Value *V,
-                                       const TargetLibraryInfo *TLI,
-                                       unsigned Depth) {
-  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
-    return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
+/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
+/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
+/// bit despite comparing equal.
+static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
+                                            const TargetLibraryInfo *TLI,
+                                            bool SignBitOnly,
+                                            unsigned Depth) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    return !CFP->getValueAPF().isNegative() ||
+           (!SignBitOnly && CFP->getValueAPF().isZero());
+  }
 
   if (Depth == MaxDepth)
-    return false;  // Limit search depth.
+    return false; // Limit search depth.
 
   const Operator *I = dyn_cast<Operator>(V);
-  if (!I) return false;
+  if (!I)
+    return false;
 
   switch (I->getOpcode()) {
-  default: break;
+  default:
+    break;
   // Unsigned integers are always nonnegative.
   case Instruction::UIToFP:
     return true;
   case Instruction::FMul:
     // x*x is always non-negative or a NaN.
-    if (I->getOperand(0) == I->getOperand(1))
+    if (I->getOperand(0) == I->getOperand(1) &&
+        (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
       return true;
+
     LLVM_FALLTHROUGH;
   case Instruction::FAdd:
   case Instruction::FDiv:
   case Instruction::FRem:
-    return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) &&
-           CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                           Depth + 1) &&
+           cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                           Depth + 1);
   case Instruction::Select:
-    return CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1) &&
-           CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                           Depth + 1) &&
+           cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
+                                           Depth + 1);
   case Instruction::FPExt:
   case Instruction::FPTrunc:
     // Widening/narrowing never change sign.
-    return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                           Depth + 1);
   case Instruction::Call:
     Intrinsic::ID IID = getIntrinsicForCallSite(cast<CallInst>(I), TLI);
     switch (IID) {
     default:
       break;
     case Intrinsic::maxnum:
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) ||
-             CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1) ||
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                             Depth + 1);
     case Intrinsic::minnum:
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) &&
-             CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1) &&
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                             Depth + 1);
     case Intrinsic::exp:
     case Intrinsic::exp2:
     case Intrinsic::fabs:
@@ -2636,18 +2655,30 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
         if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
           return true;
       }
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1);
     case Intrinsic::fma:
     case Intrinsic::fmuladd:
       // x*x+y is non-negative if y is non-negative.
       return I->getOperand(0) == I->getOperand(1) &&
-             CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1);
+             (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
+                                             Depth + 1);
     }
     break;
   }
   return false;
 }
 
+bool llvm::CannotBeOrderedLessThanZero(const Value *V,
+                                       const TargetLibraryInfo *TLI) {
+  return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
+}
+
+bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
+  return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
+}
+
 /// If the specified value can be set by repeating the same byte in memory,
 /// return the i8 value that it is represented with.  This is
 /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll
index 6b5f5a94953..aee853ae9ee 100644
--- a/test/Transforms/InstCombine/fabs.ll
+++ b/test/Transforms/InstCombine/fabs.ll
@@ -5,6 +5,8 @@
 declare float @fabsf(float)
 declare double @fabs(double)
 declare fp128 @fabsl(fp128)
+declare float @llvm.fma.f32(float, float, float)
+declare float @llvm.fmuladd.f32(float, float, float)
 
 define float @square_fabs_call_f32(float %x) {
   %mul = fmul float %x, %x
@@ -80,7 +82,6 @@ define fp128 @square_fabs_intrinsic_f128(fp128 %x) {
 ; CHECK-NEXT: ret fp128 %fabsl
 }
 
-; TODO: This should be able to elimnated the fabs
 define float @square_nnan_fabs_intrinsic_f32(float %x) {
   %mul = fmul nnan float %x, %x
   %fabsf = call float @llvm.fabs.f32(float %mul)
@@ -88,8 +89,7 @@ define float @square_nnan_fabs_intrinsic_f32(float %x) {
 
 ; CHECK-LABEL: square_nnan_fabs_intrinsic_f32(
 ; CHECK-NEXT: %mul = fmul nnan float %x, %x
-; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %mul)
-; CHECK-NEXT: ret float %fabsf
+; CHECK-NEXT: ret float %mul
 }
 
 ; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization.
@@ -170,3 +170,47 @@ define float @fabs_select_var_constant_negative(i32 %c, float %x) {
   %fabs = call float @llvm.fabs.f32(float %select)
   ret float %fabs
 }
+
+; The fabs cannot be eliminated because %x may be a NaN
+define float @square_fma_fabs_intrinsic_f32(float %x) {
+  %fma = call float @llvm.fma.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fma)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_fma_fabs_intrinsic_f32(
+; CHECK-NEXT: %fma = call float @llvm.fma.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fma)
+; CHECK-NEXT: ret float %fabsf
+}
+
+; The fabs cannot be eliminated because %x may be a NaN
+define float @square_nnan_fma_fabs_intrinsic_f32(float %x) {
+  %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fma)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_nnan_fma_fabs_intrinsic_f32(
+; CHECK-NEXT: %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float %fma
+}
+
+define float @square_fmuladd_fabs_intrinsic_f32(float %x) {
+  %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_fmuladd_fabs_intrinsic_f32(
+; CHECK-NEXT: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+; CHECK-NEXT: ret float %fabsf
+}
+
+define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) {
+  %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_nnan_fmuladd_fabs_intrinsic_f32(
+; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float %fmuladd
+}
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 84f24ca0bf2..ad8a9247e4e 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -241,7 +241,7 @@ define float @fmul2(float %f1) {
 ; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
 @fmul2_external = external global float
 define float @fmul2_disable(float %f1) {
-  %div = fdiv fast float 1.000000e+00, %f1 
+  %div = fdiv fast float 1.000000e+00, %f1
   store float %div, float* @fmul2_external
   %mul = fmul fast float %div, 2.000000e+00
   ret float %mul
@@ -672,8 +672,7 @@ define double @sqrt_intrinsic_arg_4th(double %x) {
 
 ; CHECK-LABEL: sqrt_intrinsic_arg_4th(
 ; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul)
-; CHECK-NEXT: ret double %fabs
+; CHECK-NEXT: ret double %mul
 }
 
 define double @sqrt_intrinsic_arg_5th(double %x) {
@@ -685,9 +684,8 @@ define double @sqrt_intrinsic_arg_5th(double %x) {
 
 ; CHECK-LABEL: sqrt_intrinsic_arg_5th(
 ; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul)
 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x)
-; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
 ; CHECK-NEXT: ret double %1
 }
 
diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index 21c9fdde150..dfdb88dcc85 100644
--- a/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -103,3 +103,95 @@ define float @PR22688(float %x) {
   ret float %7
 }
 
+declare float @llvm.fabs.f32(float)
+
+; CHECK-LABEL: @fabs_select_positive_constants(
+; CHECK: %select = select i1 %cmp, float 1.000000e+00, float 2.000000e+00
+; CHECK-NEXT: ret float %select
+define float @fabs_select_positive_constants(i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 1.0, float 2.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_constant_variable(
+; CHECK: %select = select i1 %cmp, float 1.000000e+00, float %x
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_constant_variable(i32 %c, float %x) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 1.0, float %x
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_neg0_pos0(
+; CHECK: %select = select i1 %cmp, float -0.000000e+00, float 0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+; CHECK-NEXT: ret float %fabs
+define float @fabs_select_neg0_pos0(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -0.0, float 0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_neg0_neg1(
+; CHECK: %select = select i1 %cmp, float -0.000000e+00, float -1.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_neg0_neg1(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -0.0, float -1.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_nan_nan(
+; CHECK: %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000
+; CHECK-NEXT: ret float %select
+define float @fabs_select_nan_nan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_nan(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_nan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_negnan(
+; CHECK:  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_negnan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_negzero(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float -0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_negzero(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float -0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_zero(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_zero(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
-- 
2.50.1