From 1f4353fb621051f446c3d0ccc96e3a4a0ded3457 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 11 Jan 2017 00:33:24 +0000 Subject: [PATCH] InstSimplify: Eliminate fabs on known positive git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291624 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/ValueTracking.h | 8 +- lib/Analysis/InstructionSimplify.cpp | 15 ++- lib/Analysis/ValueTracking.cpp | 71 ++++++++++---- test/Transforms/InstCombine/fabs.ll | 50 +++++++++- test/Transforms/InstCombine/fast-math.ll | 8 +- .../InstSimplify/floating-point-arithmetic.ll | 92 +++++++++++++++++++ 6 files changed, 212 insertions(+), 32 deletions(-) diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index dd767217345..aaf6f888e06 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -169,8 +169,12 @@ template class ArrayRef; /// Return true if we can prove that the specified FP value is either a NaN or /// never less than 0.0. - bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI, - unsigned Depth = 0); + /// If \p IncludeNeg0 is false, -0.0 is considered less than 0.0. + bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI); + + /// \returns true if we can prove that the specified FP value has a 0 sign + /// bit. + bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI); /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is true for all i8 diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 8da2f0981d0..73dcd713416 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -4308,10 +4308,21 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, return nullptr; // Unary Ops - if (NumOperands == 1) - if (IntrinsicInst *II = dyn_cast(*ArgBegin)) + if (NumOperands == 1) { + if (IntrinsicInst *II = dyn_cast(*ArgBegin)) { if (II->getIntrinsicID() == IID) return II; + } + + switch (IID) { + case Intrinsic::fabs: { + if (SignBitMustBeZero(*ArgBegin, Q.TLI)) + return *ArgBegin; + } + default: + break; + } + } return nullptr; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index d31472c0d33..b98c9629960 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -2580,51 +2580,70 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, return false; } -bool llvm::CannotBeOrderedLessThanZero(const Value *V, - const TargetLibraryInfo *TLI, - unsigned Depth) { - if (const ConstantFP *CFP = dyn_cast(V)) - return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero(); +/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a +/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign +/// bit despite comparing equal. +static bool cannotBeOrderedLessThanZeroImpl(const Value *V, + const TargetLibraryInfo *TLI, + bool SignBitOnly, + unsigned Depth) { + if (const ConstantFP *CFP = dyn_cast(V)) { + return !CFP->getValueAPF().isNegative() || + (!SignBitOnly && CFP->getValueAPF().isZero()); + } if (Depth == MaxDepth) - return false; // Limit search depth. + return false; // Limit search depth. const Operator *I = dyn_cast(V); - if (!I) return false; + if (!I) + return false; switch (I->getOpcode()) { - default: break; + default: + break; // Unsigned integers are always nonnegative. case Instruction::UIToFP: return true; case Instruction::FMul: // x*x is always non-negative or a NaN. - if (I->getOperand(0) == I->getOperand(1)) + if (I->getOperand(0) == I->getOperand(1) && + (!SignBitOnly || cast(I)->hasNoNaNs())) return true; + LLVM_FALLTHROUGH; case Instruction::FAdd: case Instruction::FDiv: case Instruction::FRem: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Instruction::Select: - return CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly, + Depth + 1); case Instruction::FPExt: case Instruction::FPTrunc: // Widening/narrowing never change sign. - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1); case Instruction::Call: Intrinsic::ID IID = getIntrinsicForCallSite(cast(I), TLI); switch (IID) { default: break; case Intrinsic::maxnum: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) || - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) || + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Intrinsic::minnum: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::fabs: @@ -2636,18 +2655,30 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0) return true; } - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1); case Intrinsic::fma: case Intrinsic::fmuladd: // x*x+y is non-negative if y is non-negative. return I->getOperand(0) == I->getOperand(1) && - CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1); + (!SignBitOnly || cast(I)->hasNoNaNs()) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly, + Depth + 1); } break; } return false; } +bool llvm::CannotBeOrderedLessThanZero(const Value *V, + const TargetLibraryInfo *TLI) { + return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0); +} + +bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) { + return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0); +} + /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll index 6b5f5a94953..aee853ae9ee 100644 --- a/test/Transforms/InstCombine/fabs.ll +++ b/test/Transforms/InstCombine/fabs.ll @@ -5,6 +5,8 @@ declare float @fabsf(float) declare double @fabs(double) declare fp128 @fabsl(fp128) +declare float @llvm.fma.f32(float, float, float) +declare float @llvm.fmuladd.f32(float, float, float) define float @square_fabs_call_f32(float %x) { %mul = fmul float %x, %x @@ -80,7 +82,6 @@ define fp128 @square_fabs_intrinsic_f128(fp128 %x) { ; CHECK-NEXT: ret fp128 %fabsl } -; TODO: This should be able to elimnated the fabs define float @square_nnan_fabs_intrinsic_f32(float %x) { %mul = fmul nnan float %x, %x %fabsf = call float @llvm.fabs.f32(float %mul) @@ -88,8 +89,7 @@ define float @square_nnan_fabs_intrinsic_f32(float %x) { ; CHECK-LABEL: square_nnan_fabs_intrinsic_f32( ; CHECK-NEXT: %mul = fmul nnan float %x, %x -; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %mul) -; CHECK-NEXT: ret float %fabsf +; CHECK-NEXT: ret float %mul } ; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization. @@ -170,3 +170,47 @@ define float @fabs_select_var_constant_negative(i32 %c, float %x) { %fabs = call float @llvm.fabs.f32(float %select) ret float %fabs } + +; The fabs cannot be eliminated because %x may be a NaN +define float @square_fma_fabs_intrinsic_f32(float %x) { + %fma = call float @llvm.fma.f32(float %x, float %x, float 1.0) + %fabsf = call float @llvm.fabs.f32(float %fma) + ret float %fabsf + +; CHECK-LABEL: @square_fma_fabs_intrinsic_f32( +; CHECK-NEXT: %fma = call float @llvm.fma.f32(float %x, float %x, float 1.000000e+00) +; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fma) +; CHECK-NEXT: ret float %fabsf +} + +; The fabs cannot be eliminated because %x may be a NaN +define float @square_nnan_fma_fabs_intrinsic_f32(float %x) { + %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.0) + %fabsf = call float @llvm.fabs.f32(float %fma) + ret float %fabsf + +; CHECK-LABEL: @square_nnan_fma_fabs_intrinsic_f32( +; CHECK-NEXT: %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.000000e+00) +; CHECK-NEXT: ret float %fma +} + +define float @square_fmuladd_fabs_intrinsic_f32(float %x) { + %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.0) + %fabsf = call float @llvm.fabs.f32(float %fmuladd) + ret float %fabsf + +; CHECK-LABEL: @square_fmuladd_fabs_intrinsic_f32( +; CHECK-NEXT: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00) +; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fmuladd) +; CHECK-NEXT: ret float %fabsf +} + +define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) { + %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.0) + %fabsf = call float @llvm.fabs.f32(float %fmuladd) + ret float %fabsf + +; CHECK-LABEL: @square_nnan_fmuladd_fabs_intrinsic_f32( +; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00) +; CHECK-NEXT: ret float %fmuladd +} diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll index 84f24ca0bf2..ad8a9247e4e 100644 --- a/test/Transforms/InstCombine/fast-math.ll +++ b/test/Transforms/InstCombine/fast-math.ll @@ -241,7 +241,7 @@ define float @fmul2(float %f1) { ; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses @fmul2_external = external global float define float @fmul2_disable(float %f1) { - %div = fdiv fast float 1.000000e+00, %f1 + %div = fdiv fast float 1.000000e+00, %f1 store float %div, float* @fmul2_external %mul = fmul fast float %div, 2.000000e+00 ret float %mul @@ -672,8 +672,7 @@ define double @sqrt_intrinsic_arg_4th(double %x) { ; CHECK-LABEL: sqrt_intrinsic_arg_4th( ; CHECK-NEXT: %mul = fmul fast double %x, %x -; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul) -; CHECK-NEXT: ret double %fabs +; CHECK-NEXT: ret double %mul } define double @sqrt_intrinsic_arg_5th(double %x) { @@ -685,9 +684,8 @@ define double @sqrt_intrinsic_arg_5th(double %x) { ; CHECK-LABEL: sqrt_intrinsic_arg_5th( ; CHECK-NEXT: %mul = fmul fast double %x, %x -; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul) ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x) -; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 +; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1 ; CHECK-NEXT: ret double %1 } diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll index 21c9fdde150..dfdb88dcc85 100644 --- a/test/Transforms/InstSimplify/floating-point-arithmetic.ll +++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll @@ -103,3 +103,95 @@ define float @PR22688(float %x) { ret float %7 } +declare float @llvm.fabs.f32(float) + +; CHECK-LABEL: @fabs_select_positive_constants( +; CHECK: %select = select i1 %cmp, float 1.000000e+00, float 2.000000e+00 +; CHECK-NEXT: ret float %select +define float @fabs_select_positive_constants(i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 1.0, float 2.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_constant_variable( +; CHECK: %select = select i1 %cmp, float 1.000000e+00, float %x +; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_constant_variable(i32 %c, float %x) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 1.0, float %x + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_neg0_pos0( +; CHECK: %select = select i1 %cmp, float -0.000000e+00, float 0.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +; CHECK-NEXT: ret float %fabs +define float @fabs_select_neg0_pos0(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float -0.0, float 0.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_neg0_neg1( +; CHECK: %select = select i1 %cmp, float -0.000000e+00, float -1.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_neg0_neg1(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float -0.0, float -1.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_nan_nan( +; CHECK: %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000 +; CHECK-NEXT: ret float %select +define float @fabs_select_nan_nan(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_nan( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_nan(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_negnan( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_negnan(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_negzero( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float -0.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_negzero(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float -0.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_zero( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_zero(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float 0.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} -- 2.50.1