From 5276f9a93425c3f3220504f0e15b9d3c798466f9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 17 Jan 2017 00:10:40 +0000 Subject: [PATCH] SimplifyLibCalls: Replace fabs libcalls with intrinsics Add missing fabs(fpext) optimzation that worked with the call, and also fixes it creating a second fpext when there were multiple uses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292172 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/PatternMatch.h | 12 +++ .../InstCombine/InstCombineCalls.cpp | 12 +++ .../InstCombine/InstCombineCasts.cpp | 33 ++++---- lib/Transforms/Utils/SimplifyLibCalls.cpp | 14 ++-- .../InstCombine/double-float-shrink-2.ll | 18 ++++- test/Transforms/InstCombine/fabs-libcall.ll | 21 ++++++ test/Transforms/InstCombine/fabs.ll | 75 +++++++++++-------- .../InstCombine/float-shrink-compare.ll | 32 +++++--- test/Transforms/InstCombine/pow-1.ll | 6 +- test/Transforms/InstCombine/win-math.ll | 4 +- .../InstCombine/zero-point-zero-add.ll | 2 +- 11 files changed, 160 insertions(+), 69 deletions(-) create mode 100644 test/Transforms/InstCombine/fabs-libcall.ll diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index a30fc97e98e..6e45dcfd719 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -826,6 +826,18 @@ inline CastClass_match m_SIToFP(const OpTy &Op) { return CastClass_match(Op); } +/// \brief Matches FPTrunc +template +inline CastClass_match m_FPTrunc(const OpTy &Op) { + return CastClass_match(Op); +} + +/// \brief Matches FPExt +template +inline CastClass_match m_FPExt(const OpTy &Op) { + return CastClass_match(Op); +} + //===----------------------------------------------------------------------===// // Matchers for unary operators // diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index ec2ebaaed88..c68a5ca9134 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1631,6 +1631,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return SelectInst::Create(Cond, Call0, Call1); } + Value *ExtSrc; + if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) && + II->getArgOperand(0)->hasOneUse()) { + // fabs (fpext x) -> fpext (fabs x) + Value *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::fabs, + { ExtSrc->getType() }); + CallInst *NewFabs = Builder->CreateCall(F, ExtSrc); + NewFabs->copyFastMathFlags(II); + NewFabs->takeName(II); + return new FPExtInst(NewFabs, II->getType()); + } + break; } case Intrinsic::cos: diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index e74b590e2b7..5ba6fd6fe32 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1392,21 +1392,24 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { IntrinsicInst *II = dyn_cast(CI.getOperand(0)); if (II) { switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::fabs: { - // (fptrunc (fabs x)) -> (fabs (fptrunc x)) - Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), - CI.getType()); - Type *IntrinsicType[] = { CI.getType() }; - Function *Overload = Intrinsic::getDeclaration( - CI.getModule(), II->getIntrinsicID(), IntrinsicType); - - SmallVector OpBundles; - II->getOperandBundlesAsDefs(OpBundles); - - Value *Args[] = { InnerTrunc }; - return CallInst::Create(Overload, Args, OpBundles, II->getName()); - } + default: break; + case Intrinsic::fabs: { + // (fptrunc (fabs x)) -> (fabs (fptrunc x)) + Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), + CI.getType()); + Type *IntrinsicType[] = { CI.getType() }; + Function *Overload = Intrinsic::getDeclaration( + CI.getModule(), II->getIntrinsicID(), IntrinsicType); + + SmallVector OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + + Value *Args[] = { InnerTrunc }; + CallInst *NewCI = CallInst::Create(Overload, Args, + OpBundles, II->getName()); + NewCI->copyFastMathFlags(II); + return NewCI; + } } } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 8eaeb1073a7..945814e3a86 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1210,11 +1210,15 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - if (Name == "fabs" && hasFloatVersion(Name)) - return optimizeUnaryDoubleFP(CI, B, false); + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); - return nullptr; + // fabs/fabsf -> llvm.fabs.* + Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::fabs, + CI->getType()); + Value *NewCall = B.CreateCall(F, { CI->getArgOperand(0) }); + NewCall->takeName(CI); + return NewCall; } Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { @@ -2029,8 +2033,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizePow(CI, Builder); case Intrinsic::exp2: return optimizeExp2(CI, Builder); - case Intrinsic::fabs: - return optimizeFabs(CI, Builder); case Intrinsic::log: return optimizeLog(CI, Builder); case Intrinsic::sqrt: diff --git a/test/Transforms/InstCombine/double-float-shrink-2.ll b/test/Transforms/InstCombine/double-float-shrink-2.ll index 7f6df92c96c..b314fcf2f8a 100644 --- a/test/Transforms/InstCombine/double-float-shrink-2.ll +++ b/test/Transforms/InstCombine/double-float-shrink-2.ll @@ -10,7 +10,8 @@ ; DO-SIMPLIFY: call float @roundf( ; DO-SIMPLIFY: call float @nearbyintf( ; DO-SIMPLIFY: call float @truncf( -; DO-SIMPLIFY: call float @fabsf( +; DO-SIMPLIFY: call float @llvm.fabs.f32( +; DO-SIMPLIFY: call fast float @llvm.fabs.f32( ; C89-SIMPLIFY: call float @floorf( ; C89-SIMPLIFY: call float @ceilf( @@ -22,7 +23,10 @@ ; DONT-SIMPLIFY: call double @round( ; DONT-SIMPLIFY: call double @nearbyint( ; DONT-SIMPLIFY: call double @trunc( -; DONT-SIMPLIFY: call double @fabs( + +; This is replaced with the intrinsic, which does the right thing on +; all platforms. +; DONT-SIMPLIFY: call float @llvm.fabs.f32( declare double @floor(double) declare double @ceil(double) @@ -30,6 +34,7 @@ declare double @round(double) declare double @nearbyint(double) declare double @trunc(double) declare double @fabs(double) +declare double @llvm.fabs.f64(double) define float @test_floor(float %C) { %D = fpext float %C to double @@ -78,3 +83,12 @@ define float @test_fabs(float %C) { %F = fptrunc double %E to float ret float %F } + +; Make sure fast math flags are preserved +define float @test_fabs_fast(float %C) { + %D = fpext float %C to double + ; --> fabsf + %E = call fast double @fabs(double %D) + %F = fptrunc double %E to float + ret float %F +} diff --git a/test/Transforms/InstCombine/fabs-libcall.ll b/test/Transforms/InstCombine/fabs-libcall.ll new file mode 100644 index 00000000000..5733badfa8f --- /dev/null +++ b/test/Transforms/InstCombine/fabs-libcall.ll @@ -0,0 +1,21 @@ +; RUN: opt -S -mtriple=i686-apple-macosx -instcombine %s | FileCheck %s + +declare x86_fp80 @fabsl(x86_fp80) + +; CHECK-LABEL: @replace_fabs_call_f80( +; CHECK-NEXT: %fabsl = call x86_fp80 @llvm.fabs.f80(x86_fp80 %x) +; CHECK-NEXT: ret x86_fp80 %fabsl +define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) { + %fabsl = tail call x86_fp80 @fabsl(x86_fp80 %x) + ret x86_fp80 %fabsl + +} + +; CHECK-LABEL: @fmf_replace_fabs_call_f80( +; CHECK-NEXT: %fabsl = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 %x) +; CHECK-NEXT: ret x86_fp80 %fabsl +define x86_fp80 @fmf_replace_fabs_call_f80(x86_fp80 %x) { + %fabsl = tail call nnan x86_fp80 @fabsl(x86_fp80 %x) + ret x86_fp80 %fabsl +} + diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll index aee853ae9ee..a95f7b306b5 100644 --- a/test/Transforms/InstCombine/fabs.ll +++ b/test/Transforms/InstCombine/fabs.ll @@ -1,6 +1,10 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -instcombine -S | FileCheck %s -; Make sure all library calls are eliminated when the input is known positive. +; Make sure libcalls are replaced with intrinsic calls. + +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) +declare fp128 @llvm.fabs.f128(fp128) declare float @fabsf(float) declare double @fabs(double) @@ -8,46 +12,46 @@ declare fp128 @fabsl(fp128) declare float @llvm.fma.f32(float, float, float) declare float @llvm.fmuladd.f32(float, float, float) -define float @square_fabs_call_f32(float %x) { - %mul = fmul float %x, %x - %fabsf = tail call float @fabsf(float %mul) +define float @replace_fabs_call_f32(float %x) { + %fabsf = tail call float @fabsf(float %x) ret float %fabsf -; CHECK-LABEL: square_fabs_call_f32( -; CHECK-NEXT: %mul = fmul float %x, %x -; CHECK-NEXT: %fabsf = tail call float @fabsf(float %mul) +; CHECK-LABEL: @replace_fabs_call_f32( +; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %x) ; CHECK-NEXT: ret float %fabsf } -define double @square_fabs_call_f64(double %x) { - %mul = fmul double %x, %x - %fabs = tail call double @fabs(double %mul) +define double @replace_fabs_call_f64(double %x) { + %fabs = tail call double @fabs(double %x) ret double %fabs -; CHECK-LABEL: square_fabs_call_f64( -; CHECK-NEXT: %mul = fmul double %x, %x -; CHECK-NEXT: %fabs = tail call double @fabs(double %mul) +; CHECK-LABEL: @replace_fabs_call_f64( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) ; CHECK-NEXT: ret double %fabs } -define fp128 @square_fabs_call_f128(fp128 %x) { - %mul = fmul fp128 %x, %x - %fabsl = tail call fp128 @fabsl(fp128 %mul) +define fp128 @replace_fabs_call_f128(fp128 %x) { + %fabsl = tail call fp128 @fabsl(fp128 %x) ret fp128 %fabsl -; CHECK-LABEL: square_fabs_call_f128( -; CHECK-NEXT: %mul = fmul fp128 %x, %x -; CHECK-NEXT: %fabsl = tail call fp128 @fabsl(fp128 %mul) +; CHECK-LABEL: replace_fabs_call_f128( +; CHECK-NEXT: %fabsl = call fp128 @llvm.fabs.f128(fp128 %x) ; CHECK-NEXT: ret fp128 %fabsl } +; Make sure fast math flags are preserved when replacing the libcall. +define float @fmf_replace_fabs_call_f32(float %x) { + %fabsf = tail call nnan float @fabsf(float %x) + ret float %fabsf + +; CHECK-LABEL: @fmf_replace_fabs_call_f32( +; CHECK-NEXT: %fabsf = call nnan float @llvm.fabs.f32(float %x) +; CHECK-NEXT: ret float %fabsf +} + ; Make sure all intrinsic calls are eliminated when the input is known ; positive. -declare float @llvm.fabs.f32(float) -declare double @llvm.fabs.f64(double) -declare fp128 @llvm.fabs.f128(fp128) - ; The fabs cannot be eliminated because %x may be a NaN define float @square_fabs_intrinsic_f32(float %x) { %mul = fmul float %x, %x @@ -102,10 +106,8 @@ define float @square_fabs_shrink_call1(float %x) { ret float %trunc ; CHECK-LABEL: square_fabs_shrink_call1( -; CHECK-NEXT: %ext = fpext float %x to double -; CHECK-NEXT: %sq = fmul double %ext, %ext -; CHECK-NEXT: call double @fabs(double %sq) -; CHECK-NEXT: %trunc = fptrunc double %fabs to float +; CHECK-NEXT: fmul float %x, %x +; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float ; CHECK-NEXT: ret float %trunc } @@ -118,8 +120,8 @@ define float @square_fabs_shrink_call2(float %x) { ; CHECK-LABEL: square_fabs_shrink_call2( ; CHECK-NEXT: %sq = fmul float %x, %x -; CHECK-NEXT: %fabsf = call float @fabsf(float %sq) -; CHECK-NEXT: ret float %fabsf +; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float %sq) +; CHECK-NEXT: ret float %trunc } ; CHECK-LABEL: @fabs_select_constant_negative_positive( @@ -214,3 +216,16 @@ define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) { ; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00) ; CHECK-NEXT: ret float %fmuladd } + +; Don't introduce a second fpext +; CHECK-LABEL: @multi_use_fabs_fpext( +; CHECK: %fpext = fpext float %x to double +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %fpext) +; CHECK-NEXT: store volatile double %fpext, double* undef, align 8 +; CHECK-NEXT: ret double %fabs +define double @multi_use_fabs_fpext(float %x) { + %fpext = fpext float %x to double + %fabs = call double @llvm.fabs.f64(double %fpext) + store volatile double %fpext, double* undef + ret double %fabs +} diff --git a/test/Transforms/InstCombine/float-shrink-compare.ll b/test/Transforms/InstCombine/float-shrink-compare.ll index a08f9531d21..d9f4bc2dbb6 100644 --- a/test/Transforms/InstCombine/float-shrink-compare.ll +++ b/test/Transforms/InstCombine/float-shrink-compare.ll @@ -22,8 +22,20 @@ define i32 @test2(float %x, float %y) nounwind uwtable { %5 = zext i1 %4 to i32 ret i32 %5 ; CHECK-LABEL: @test2( -; CHECK-NEXT: %fabsf = call float @fabsf(float %x) -; CHECK-NEXT: fcmp oeq float %fabsf, %y +; CHECK-NEXT: [[FABS:%[0-9]+]] = call float @llvm.fabs.f32(float %x) +; CHECK-NEXT: fcmp oeq float [[FABS]], %y +} + +define i32 @fmf_test2(float %x, float %y) nounwind uwtable { + %1 = fpext float %x to double + %2 = call nnan double @fabs(double %1) nounwind readnone + %3 = fpext float %y to double + %4 = fcmp oeq double %2, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK-LABEL: @fmf_test2( +; CHECK-NEXT: [[FABS:%[0-9]+]] = call nnan float @llvm.fabs.f32(float %x) +; CHECK-NEXT: fcmp oeq float [[FABS]], %y } define i32 @test3(float %x, float %y) nounwind uwtable { @@ -99,15 +111,15 @@ define i32 @test8(float %x, float %y) nounwind uwtable { } define i32 @test9(float %x, float %y) nounwind uwtable { - %1 = fpext float %y to double - %2 = fpext float %x to double - %3 = call double @fabs(double %2) nounwind readnone - %4 = fcmp oeq double %1, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %fabs = call double @fabs(double %x.ext) nounwind readnone + %cmp = fcmp oeq double %y.ext, %fabs + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test9( -; CHECK-NEXT: %fabsf = call float @fabsf(float %x) -; CHECK-NEXT: fcmp oeq float %fabsf, %y +; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x) +; CHECK-NEXT: fcmp oeq float %fabs, %y } define i32 @test10(float %x, float %y) nounwind uwtable { diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll index c9f71fd4572..602c20a1314 100644 --- a/test/Transforms/InstCombine/pow-1.ll +++ b/test/Transforms/InstCombine/pow-1.ll @@ -72,7 +72,7 @@ define float @test_simplify7(float %x) { ; CHECK-LABEL: @test_simplify7( %retval = call float @powf(float %x, float 0.5) ; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO:#[0-9]+]] -; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]] +; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @llvm.fabs.f32(float [[SQRTF]]) ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]] ret float %retval @@ -83,7 +83,7 @@ define double @test_simplify8(double %x) { ; CHECK-LABEL: @test_simplify8( %retval = call double @pow(double %x, double 0.5) ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]] -; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]] +; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]]) ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]] ret double %retval @@ -163,7 +163,7 @@ define double @test_simplify17(double %x) { ; CHECK-LABEL: @test_simplify17( %retval = call double @llvm.pow.f64(double %x, double 0.5) ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) -; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) +; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]]) ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]] ret double %retval diff --git a/test/Transforms/InstCombine/win-math.ll b/test/Transforms/InstCombine/win-math.ll index e6e79e2b84a..47e99607a25 100644 --- a/test/Transforms/InstCombine/win-math.ll +++ b/test/Transforms/InstCombine/win-math.ll @@ -284,11 +284,11 @@ define float @float_powsqrt(float %x) nounwind readnone { ; WIN64: float @powf ; MINGW32-LABEL: @float_powsqrt( ; MINGW32: float @sqrtf -; MINGW32: float @fabsf +; MINGW32: float @llvm.fabs.f32 ; MINGW32-NOT: float @powf ; MINGW64-LABEL: @float_powsqrt( ; MINGW64: float @sqrtf -; MINGW64: float @fabsf +; MINGW64: float @llvm.fabs.f32( ; MINGW64-NOT: float @powf %1 = call float @powf(float %x, float 0.5) ret float %1 diff --git a/test/Transforms/InstCombine/zero-point-zero-add.ll b/test/Transforms/InstCombine/zero-point-zero-add.ll index e466e8ad742..a23db75525e 100644 --- a/test/Transforms/InstCombine/zero-point-zero-add.ll +++ b/test/Transforms/InstCombine/zero-point-zero-add.ll @@ -15,7 +15,7 @@ define double @test(double %X) { define double @test1(double %X) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[Y:%.*]] = call double @fabs(double %X) +; CHECK-NEXT: [[Y:%.*]] = call double @llvm.fabs.f64(double %X) ; CHECK-NEXT: ret double [[Y]] ; %Y = call double @fabs(double %X) -- 2.50.1