}
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
// If we can shrink the call to a float function rather than a double
// function, do that first.
+ Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
return Ret;
+ // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
+ // the intrinsics for improved optimization (for example, vectorization).
+ // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
+ // From the C standard draft WG14/N1256:
+ // "Ideally, fmax would be sensitive to the sign of zero, for example
+ // fmax(-0.0, +0.0) would return +0; however, implementation in software
+ // might be impractical."
IRBuilder<>::FastMathFlagGuard Guard(B);
- FastMathFlags FMF;
- if (CI->isFast()) {
- // If the call is 'fast', then anything we create here will also be 'fast'.
- FMF.setFast();
- } else {
- // At a minimum, no-nans-fp-math must be true.
- if (!CI->hasNoNaNs())
- return nullptr;
- // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
- // "Ideally, fmax would be sensitive to the sign of zero, for example
- // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
- // might be impractical."
- FMF.setNoSignedZeros();
- FMF.setNoNaNs();
- }
+ FastMathFlags FMF = CI->getFastMathFlags();
+ FMF.setNoSignedZeros();
B.setFastMathFlags(FMF);
- // We have a relaxed floating-point environment. We can ignore NaN-handling
- // and transform to a compare and select. We do not have to consider errno or
- // exceptions, because fmin/fmax do not have those.
- Value *Op0 = CI->getArgOperand(0);
- Value *Op1 = CI->getArgOperand(1);
- Value *Cmp = Callee->getName().startswith("fmin") ?
- B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
- return B.CreateSelect(Cmp, Op0, Op1);
+ Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
+ : Intrinsic::maxnum;
+ Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
+ return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
}
Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
; flags are propagated for shrunken *binary* double FP calls.
define float @max1(float %a, float %b) {
; CHECK-LABEL: @max1(
-; ISC99-NEXT: [[FMAXF:%.*]] = call arcp float @fmaxf(float [[A:%.*]], float [[B:%.*]])
+; ISC99-NEXT: [[FMAXF:%.*]] = call nsz arcp float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
; ISC99-NEXT: ret float [[FMAXF]]
; ISC89: [[FMAXF:%.*]] = call arcp double @fmax(double [[A:%.*]], double [[B:%.*]])
;
ret float %f
}
-; A function can have a name that matches a common libcall,
-; but with the wrong type(s). Let it be.
+; This is treated as libm 'fmin' - LLVM types do not necessarily
+; correspond to 'C' types, so this is not required to be "fminl".
define float @fake_fmin(float %a, float %b) {
; CHECK-LABEL: @fake_fmin(
; CHECK-NEXT: [[C:%.*]] = fpext float [[A:%.*]] to fp128
; CHECK-NEXT: [[D:%.*]] = fpext float [[B:%.*]] to fp128
-; CHECK-NEXT: [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]])
+; ISC99-NEXT: [[E:%.*]] = call nsz fp128 @llvm.minnum.f128(fp128 [[C]], fp128 [[D]])
+; ISC89-NEXT: [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]])
; CHECK-NEXT: [[F:%.*]] = fptrunc fp128 [[E]] to float
; CHECK-NEXT: ret float [[F]]
;
ret float %f
}
-declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for.
+declare fp128 @fmin(fp128, fp128)
declare double @fmax(double, double)
declare fp128 @fmaxl(fp128, fp128)
declare fp128 @fminl(fp128, fp128)
-; No NaNs is the minimum requirement to replace these calls.
-; This should always be set when unsafe-fp-math is true, but
-; alternate the attributes for additional test coverage.
; 'nsz' is implied by the definition of fmax or fmin itself.
-; Shrink and remove the call.
+; Shrink and replace the call.
define float @max1(float %a, float %b) {
; CHECK-LABEL: @max1(
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT: ret float [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT: ret float [[TMP1]]
;
%c = fpext float %a to double
%d = fpext float %b to double
define float @fmax_no_fmf(float %a, float %b) {
; CHECK-LABEL: @fmax_no_fmf(
-; CHECK-NEXT: [[C:%.*]] = call float @fmaxf(float [[A:%.*]], float [[B:%.*]])
-; CHECK-NEXT: ret float [[C]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT: ret float [[TMP1]]
;
%c = call float @fmaxf(float %a, float %b)
ret float %c
define float @max2(float %a, float %b) {
; CHECK-LABEL: @max2(
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT: ret float [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT: ret float [[TMP1]]
;
%c = call nnan float @fmaxf(float %a, float %b)
ret float %c
define double @max3(double %a, double %b) {
; CHECK-LABEL: @max3(
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], double [[A]], double [[B]]
-; CHECK-NEXT: ret double [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.maxnum.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT: ret double [[TMP1]]
;
%c = call fast double @fmax(double %a, double %b)
ret double %c
define fp128 @max4(fp128 %a, fp128 %b) {
; CHECK-LABEL: @max4(
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
-; CHECK-NEXT: ret fp128 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz fp128 @llvm.maxnum.f128(fp128 [[A:%.*]], fp128 [[B:%.*]])
+; CHECK-NEXT: ret fp128 [[TMP1]]
;
%c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b)
ret fp128 %c
; Shrink and remove the call.
define float @min1(float %a, float %b) {
; CHECK-LABEL: @min1(
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT: ret float [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT: ret float [[TMP1]]
;
%c = fpext float %a to double
%d = fpext float %b to double
define float @fmin_no_fmf(float %a, float %b) {
; CHECK-LABEL: @fmin_no_fmf(
-; CHECK-NEXT: [[C:%.*]] = call float @fminf(float [[A:%.*]], float [[B:%.*]])
-; CHECK-NEXT: ret float [[C]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT: ret float [[TMP1]]
;
%c = call float @fminf(float %a, float %b)
ret float %c
define float @min2(float %a, float %b) {
; CHECK-LABEL: @min2(
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT: ret float [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT: ret float [[TMP1]]
;
%c = call fast float @fminf(float %a, float %b)
ret float %c
define double @min3(double %a, double %b) {
; CHECK-LABEL: @min3(
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], double [[A]], double [[B]]
-; CHECK-NEXT: ret double [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz double @llvm.minnum.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT: ret double [[TMP1]]
;
%c = call nnan double @fmin(double %a, double %b)
ret double %c
define fp128 @min4(fp128 %a, fp128 %b) {
; CHECK-LABEL: @min4(
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
-; CHECK-NEXT: ret fp128 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = call fast fp128 @llvm.minnum.f128(fp128 [[A:%.*]], fp128 [[B:%.*]])
+; CHECK-NEXT: ret fp128 [[TMP1]]
;
%c = call fast fp128 @fminl(fp128 %a, fp128 %b)
ret fp128 %c
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
define i1 @test15(float %x, float %y, float %z) {
; CHECK-LABEL: @test15(
-; CHECK-NEXT: [[FMINF:%.*]] = call float @fminf(float %x, float %y) #0
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %z
-; CHECK-NEXT: ret i1 [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT: ret i1 [[TMP2]]
;
%1 = fpext float %x to double
%2 = fpext float %y to double
define i1 @test16(float %x, float %y, float %z) {
; CHECK-LABEL: @test16(
-; CHECK-NEXT: [[FMINF:%.*]] = call float @fminf(float %x, float %y) #0
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %z
-; CHECK-NEXT: ret i1 [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT: ret i1 [[TMP2]]
;
%1 = fpext float %z to double
%2 = fpext float %x to double
define i1 @test17(float %x, float %y, float %z) {
; CHECK-LABEL: @test17(
-; CHECK-NEXT: [[FMAXF:%.*]] = call float @fmaxf(float %x, float %y) #0
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[FMAXF]], %z
-; CHECK-NEXT: ret i1 [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT: ret i1 [[TMP2]]
;
%1 = fpext float %x to double
%2 = fpext float %y to double
define i1 @test18(float %x, float %y, float %z) {
; CHECK-LABEL: @test18(
-; CHECK-NEXT: [[FMAXF:%.*]] = call float @fmaxf(float %x, float %y) #0
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[FMAXF]], %z
-; CHECK-NEXT: ret i1 [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT: ret i1 [[TMP2]]
;
%1 = fpext float %z to double
%2 = fpext float %x to double
define i1 @test20(float %x, float %y) {
; CHECK-LABEL: @test20(
-; CHECK-NEXT: [[FMINF:%.*]] = call float @fminf(float 1.000000e+00, float %x) #0
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %y
-; CHECK-NEXT: ret i1 [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[TMP2]]
;
%1 = fpext float %y to double
%2 = fpext float %x to double
define i1 @test21(float %x, float %y) {
; CHECK-LABEL: @test21(
-; CHECK-NEXT: [[TMP1:%.*]] = fpext float %y to double
-; CHECK-NEXT: [[TMP2:%.*]] = fpext float %x to double
-; CHECK-NEXT: [[TMP3:%.*]] = call double @fmin(double 1.300000e+00, double [[TMP2]]) #2
+; CHECK-NEXT: [[TMP1:%.*]] = fpext float [[Y:%.*]] to double
+; CHECK-NEXT: [[TMP2:%.*]] = fpext float [[X:%.*]] to double
+; CHECK-NEXT: [[TMP3:%.*]] = call nsz double @llvm.minnum.f64(double [[TMP2]], double 1.300000e+00)
; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq double [[TMP3]], [[TMP1]]
; CHECK-NEXT: ret i1 [[TMP4]]
;