InstSimplify: Eliminate fabs on known positive

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 11 Jan 2017 00:33:24 +0000 (00:33 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 11 Jan 2017 00:33:24 +0000 (00:33 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 11 Jan 2017 00:33:24 +0000 (00:33 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 11 Jan 2017 00:33:24 +0000 (00:33 +0000)
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h

index dd767217345af5f6b2678ea235df650ee93f3e18..aaf6f888e06f5c54e0a678fe34a12571728afb24 100644 (file)
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -169,8 +169,12 @@ template <typename T> class ArrayRef;
  
    /// Return true if we can prove that the specified FP value is either a NaN or
    /// never less than 0.0.
-  bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI,
-                                   unsigned Depth = 0);
+  /// If \p IncludeNeg0 is false, -0.0 is considered less than 0.0.
+  bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
+
+  /// \returns true if we can prove that the specified FP value has a 0 sign
+  /// bit.
+  bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
  
    /// If the specified value can be set by repeating the same byte in memory,
    /// return the i8 value that it is represented with. This is true for all i8
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp

index 8da2f0981d0ca1c83f1f1311aa21501dc07efeba..73dcd713416a1b9210e1c5b0a791768ab3621af2 100644 (file)
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -4308,10 +4308,21 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
      return nullptr;
  
    // Unary Ops
-  if (NumOperands == 1)
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin))
+  if (NumOperands == 1) {
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) {
        if (II->getIntrinsicID() == IID)
          return II;
+    }
+
+    switch (IID) {
+    case Intrinsic::fabs: {
+      if (SignBitMustBeZero(*ArgBegin, Q.TLI))
+        return *ArgBegin;
+    }
+    default:
+      break;
+    }
+  }
  
    return nullptr;
  }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp

index d31472c0d33c1b69d0e74330135399696fd0171a..b98c96299600c69c3abcdfd54710bd4a9bcf5b7f 100644 (file)
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -2580,51 +2580,70 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
    return false;
  }
  
-bool llvm::CannotBeOrderedLessThanZero(const Value *V,
-                                       const TargetLibraryInfo *TLI,
-                                       unsigned Depth) {
-  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
-    return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
+/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
+/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
+/// bit despite comparing equal.
+static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
+                                            const TargetLibraryInfo *TLI,
+                                            bool SignBitOnly,
+                                            unsigned Depth) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    return !CFP->getValueAPF().isNegative() ||
+           (!SignBitOnly && CFP->getValueAPF().isZero());
+  }
  
    if (Depth == MaxDepth)
-    return false;  // Limit search depth.
+    return false; // Limit search depth.
  
    const Operator *I = dyn_cast<Operator>(V);
-  if (!I) return false;
+  if (!I)
+    return false;
  
    switch (I->getOpcode()) {
-  default: break;
+  default:
+    break;
    // Unsigned integers are always nonnegative.
    case Instruction::UIToFP:
      return true;
    case Instruction::FMul:
      // x*x is always non-negative or a NaN.
-    if (I->getOperand(0) == I->getOperand(1))
+    if (I->getOperand(0) == I->getOperand(1) &&
+        (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
        return true;
+
      LLVM_FALLTHROUGH;
    case Instruction::FAdd:
    case Instruction::FDiv:
    case Instruction::FRem:
-    return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) &&
-           CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                           Depth + 1) &&
+           cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                           Depth + 1);
    case Instruction::Select:
-    return CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1) &&
-           CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                           Depth + 1) &&
+           cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
+                                           Depth + 1);
    case Instruction::FPExt:
    case Instruction::FPTrunc:
      // Widening/narrowing never change sign.
-    return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                           Depth + 1);
    case Instruction::Call:
      Intrinsic::ID IID = getIntrinsicForCallSite(cast<CallInst>(I), TLI);
      switch (IID) {
      default:
        break;
      case Intrinsic::maxnum:
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) ||
-             CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1) ||
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                             Depth + 1);
      case Intrinsic::minnum:
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) &&
-             CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1) &&
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                             Depth + 1);
      case Intrinsic::exp:
      case Intrinsic::exp2:
      case Intrinsic::fabs:
@@ -2636,18 +2655,30 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
          if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
            return true;
        }
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1);
      case Intrinsic::fma:
      case Intrinsic::fmuladd:
        // x*x+y is non-negative if y is non-negative.
        return I->getOperand(0) == I->getOperand(1) &&
-             CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1);
+             (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
+                                             Depth + 1);
      }
      break;
    }
    return false;
  }
  
+bool llvm::CannotBeOrderedLessThanZero(const Value *V,
+                                       const TargetLibraryInfo *TLI) {
+  return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
+}
+
+bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
+  return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
+}
+
  /// If the specified value can be set by repeating the same byte in memory,
  /// return the i8 value that it is represented with.  This is
  /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll

index 6b5f5a949530128d5885b357b8de1f4e2297f342..aee853ae9eeba7bb828d3ded6f407079fff4598a 100644 (file)
--- a/test/Transforms/InstCombine/fabs.ll
+++ b/test/Transforms/InstCombine/fabs.ll
@@ -5,6 +5,8 @@
  declare float @fabsf(float)
  declare double @fabs(double)
  declare fp128 @fabsl(fp128)
+declare float @llvm.fma.f32(float, float, float)
+declare float @llvm.fmuladd.f32(float, float, float)
  
  define float @square_fabs_call_f32(float %x) {
    %mul = fmul float %x, %x
@@ -80,7 +82,6 @@ define fp128 @square_fabs_intrinsic_f128(fp128 %x) {
  ; CHECK-NEXT: ret fp128 %fabsl
  }
  
-; TODO: This should be able to elimnated the fabs
  define float @square_nnan_fabs_intrinsic_f32(float %x) {
    %mul = fmul nnan float %x, %x
    %fabsf = call float @llvm.fabs.f32(float %mul)
@@ -88,8 +89,7 @@ define float @square_nnan_fabs_intrinsic_f32(float %x) {
  
  ; CHECK-LABEL: square_nnan_fabs_intrinsic_f32(
  ; CHECK-NEXT: %mul = fmul nnan float %x, %x
-; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %mul)
-; CHECK-NEXT: ret float %fabsf
+; CHECK-NEXT: ret float %mul
  }
  
  ; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization.
@@ -170,3 +170,47 @@ define float @fabs_select_var_constant_negative(i32 %c, float %x) {
    %fabs = call float @llvm.fabs.f32(float %select)
    ret float %fabs
  }
+
+; The fabs cannot be eliminated because %x may be a NaN
+define float @square_fma_fabs_intrinsic_f32(float %x) {
+  %fma = call float @llvm.fma.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fma)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_fma_fabs_intrinsic_f32(
+; CHECK-NEXT: %fma = call float @llvm.fma.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fma)
+; CHECK-NEXT: ret float %fabsf
+}
+
+; The fabs cannot be eliminated because %x may be a NaN
+define float @square_nnan_fma_fabs_intrinsic_f32(float %x) {
+  %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fma)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_nnan_fma_fabs_intrinsic_f32(
+; CHECK-NEXT: %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float %fma
+}
+
+define float @square_fmuladd_fabs_intrinsic_f32(float %x) {
+  %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_fmuladd_fabs_intrinsic_f32(
+; CHECK-NEXT: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+; CHECK-NEXT: ret float %fabsf
+}
+
+define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) {
+  %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_nnan_fmuladd_fabs_intrinsic_f32(
+; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float %fmuladd
+}
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll

index 84f24ca0bf2479ee2dcb51be8372fea846d1086b..ad8a9247e4e1d83c81df68f8f3a70376b896ac5e 100644 (file)
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -241,7 +241,7 @@ define float @fmul2(float %f1) {
  ; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
  @fmul2_external = external global float
  define float @fmul2_disable(float %f1) {
-  %div = fdiv fast float 1.000000e+00, %f1 
+  %div = fdiv fast float 1.000000e+00, %f1
    store float %div, float* @fmul2_external
    %mul = fmul fast float %div, 2.000000e+00
    ret float %mul
@@ -672,8 +672,7 @@ define double @sqrt_intrinsic_arg_4th(double %x) {
  
  ; CHECK-LABEL: sqrt_intrinsic_arg_4th(
  ; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul)
-; CHECK-NEXT: ret double %fabs
+; CHECK-NEXT: ret double %mul
  }
  
  define double @sqrt_intrinsic_arg_5th(double %x) {
@@ -685,9 +684,8 @@ define double @sqrt_intrinsic_arg_5th(double %x) {
  
  ; CHECK-LABEL: sqrt_intrinsic_arg_5th(
  ; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul)
  ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x)
-; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
  ; CHECK-NEXT: ret double %1
  }
  
diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll

index 21c9fdde15069ede72a60715a2313b52c2c85add..dfdb88dcc858025104b238bb12a05fffcba18387 100644 (file)
--- a/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -103,3 +103,95 @@ define float @PR22688(float %x) {
    ret float %7
  }
  
+declare float @llvm.fabs.f32(float)
+
+; CHECK-LABEL: @fabs_select_positive_constants(
+; CHECK: %select = select i1 %cmp, float 1.000000e+00, float 2.000000e+00
+; CHECK-NEXT: ret float %select
+define float @fabs_select_positive_constants(i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 1.0, float 2.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_constant_variable(
+; CHECK: %select = select i1 %cmp, float 1.000000e+00, float %x
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_constant_variable(i32 %c, float %x) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 1.0, float %x
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_neg0_pos0(
+; CHECK: %select = select i1 %cmp, float -0.000000e+00, float 0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+; CHECK-NEXT: ret float %fabs
+define float @fabs_select_neg0_pos0(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -0.0, float 0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_neg0_neg1(
+; CHECK: %select = select i1 %cmp, float -0.000000e+00, float -1.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_neg0_neg1(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -0.0, float -1.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_nan_nan(
+; CHECK: %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000
+; CHECK-NEXT: ret float %select
+define float @fabs_select_nan_nan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_nan(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_nan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_negnan(
+; CHECK:  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_negnan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_negzero(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float -0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_negzero(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float -0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_zero(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_zero(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 11 Jan 2017 00:33:24 +0000 (00:33 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 11 Jan 2017 00:33:24 +0000 (00:33 +0000)
include/llvm/Analysis/ValueTracking.h		patch \| blob \| history
lib/Analysis/InstructionSimplify.cpp		patch \| blob \| history
lib/Analysis/ValueTracking.cpp		patch \| blob \| history
test/Transforms/InstCombine/fabs.ll		patch \| blob \| history
test/Transforms/InstCombine/fast-math.ll		patch \| blob \| history
test/Transforms/InstSimplify/floating-point-arithmetic.ll		patch \| blob \| history