[InstCombine] canonicalize fmin/fmax to LLVM intrinsics minnum/maxnum

author Sanjay Patel <spatel@rotateright.com>

Sat, 29 Jun 2019 14:28:54 +0000 (14:28 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Sat, 29 Jun 2019 14:28:54 +0000 (14:28 +0000)
author Sanjay Patel <spatel@rotateright.com>
Sat, 29 Jun 2019 14:28:54 +0000 (14:28 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Sat, 29 Jun 2019 14:28:54 +0000 (14:28 +0000)
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp

index 0c95f4ce4857c1938105f5a3b6ce765896f62f85..b5f8b396efec934d5f12f35bb7df4fd6b4f5605a 100644 (file)
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1563,40 +1563,30 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
  }
  
  Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
    // If we can shrink the call to a float function rather than a double
    // function, do that first.
+  Function *Callee = CI->getCalledFunction();
    StringRef Name = Callee->getName();
    if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
      if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
        return Ret;
  
+  // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
+  // the intrinsics for improved optimization (for example, vectorization).
+  // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
+  // From the C standard draft WG14/N1256:
+  // "Ideally, fmax would be sensitive to the sign of zero, for example
+  // fmax(-0.0, +0.0) would return +0; however, implementation in software
+  // might be impractical."
    IRBuilder<>::FastMathFlagGuard Guard(B);
-  FastMathFlags FMF;
-  if (CI->isFast()) {
-    // If the call is 'fast', then anything we create here will also be 'fast'.
-    FMF.setFast();
-  } else {
-    // At a minimum, no-nans-fp-math must be true.
-    if (!CI->hasNoNaNs())
-      return nullptr;
-    // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
-    // "Ideally, fmax would be sensitive to the sign of zero, for example
-    // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
-    // might be impractical."
-    FMF.setNoSignedZeros();
-    FMF.setNoNaNs();
-  }
+  FastMathFlags FMF = CI->getFastMathFlags();
+  FMF.setNoSignedZeros();
    B.setFastMathFlags(FMF);
  
-  // We have a relaxed floating-point environment. We can ignore NaN-handling
-  // and transform to a compare and select. We do not have to consider errno or
-  // exceptions, because fmin/fmax do not have those.
-  Value *Op0 = CI->getArgOperand(0);
-  Value *Op1 = CI->getArgOperand(1);
-  Value *Cmp = Callee->getName().startswith("fmin") ?
-    B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
-  return B.CreateSelect(Cmp, Op0, Op1);
+  Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
+                                                           : Intrinsic::maxnum;
+  Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
+  return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
  }
  
  Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll

index e8f7f720b1527351dc57f0967eb5714179b777de..330e0abb36b85039a5757ef78837e020b4019cfd 100644 (file)
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -513,7 +513,7 @@ define double @tanh_test2(float %f) {
  ; flags are propagated for shrunken *binary* double FP calls.
  define float @max1(float %a, float %b) {
  ; CHECK-LABEL: @max1(
-; ISC99-NEXT:    [[FMAXF:%.*]] = call arcp float @fmaxf(float [[A:%.*]], float [[B:%.*]])
+; ISC99-NEXT:    [[FMAXF:%.*]] = call nsz arcp float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
  ; ISC99-NEXT:    ret float [[FMAXF]]
  ; ISC89:         [[FMAXF:%.*]] = call arcp double @fmax(double [[A:%.*]], double [[B:%.*]])
  ;
@@ -524,14 +524,15 @@ define float @max1(float %a, float %b) {
    ret float %f
  }
  
-; A function can have a name that matches a common libcall,
-; but with the wrong type(s). Let it be.
+; This is treated as libm 'fmin' - LLVM types do not necessarily
+; correspond to 'C' types, so this is not required to be "fminl".
  
  define float @fake_fmin(float %a, float %b) {
  ; CHECK-LABEL: @fake_fmin(
  ; CHECK-NEXT:    [[C:%.*]] = fpext float [[A:%.*]] to fp128
  ; CHECK-NEXT:    [[D:%.*]] = fpext float [[B:%.*]] to fp128
-; CHECK-NEXT:    [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]])
+; ISC99-NEXT:    [[E:%.*]] = call nsz fp128 @llvm.minnum.f128(fp128 [[C]], fp128 [[D]])
+; ISC89-NEXT:    [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]])
  ; CHECK-NEXT:    [[F:%.*]] = fptrunc fp128 [[E]] to float
  ; CHECK-NEXT:    ret float [[F]]
  ;
@@ -542,7 +543,7 @@ define float @fake_fmin(float %a, float %b) {
    ret float %f
  }
  
-declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for.
+declare fp128 @fmin(fp128, fp128)
  
  declare double @fmax(double, double)
  
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll

index 632d178412fd60a525acfbdc1f4c82ee248d5781..9563c377dca53e467a0dbd4a66a012287f67eca3 100644 (file)
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -811,17 +811,13 @@ declare float @fminf(float, float)
  declare fp128 @fmaxl(fp128, fp128)
  declare fp128 @fminl(fp128, fp128)
  
-; No NaNs is the minimum requirement to replace these calls.
-; This should always be set when unsafe-fp-math is true, but
-; alternate the attributes for additional test coverage.
  ; 'nsz' is implied by the definition of fmax or fmin itself.
  
-; Shrink and remove the call.
+; Shrink and replace the call.
  define float @max1(float %a, float %b) {
  ; CHECK-LABEL: @max1(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select fast i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
  ;
    %c = fpext float %a to double
    %d = fpext float %b to double
@@ -832,8 +828,8 @@ define float @max1(float %a, float %b) {
  
  define float @fmax_no_fmf(float %a, float %b) {
  ; CHECK-LABEL: @fmax_no_fmf(
-; CHECK-NEXT:    [[C:%.*]] = call float @fmaxf(float [[A:%.*]], float [[B:%.*]])
-; CHECK-NEXT:    ret float [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
  ;
    %c = call float @fmaxf(float %a, float %b)
    ret float %c
@@ -841,9 +837,8 @@ define float @fmax_no_fmf(float %a, float %b) {
  
  define float @max2(float %a, float %b) {
  ; CHECK-LABEL: @max2(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
  ;
    %c = call nnan float @fmaxf(float %a, float %b)
    ret float %c
@@ -852,9 +847,8 @@ define float @max2(float %a, float %b) {
  
  define double @max3(double %a, double %b) {
  ; CHECK-LABEL: @max3(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select fast i1 [[TMP1]], double [[A]], double [[B]]
-; CHECK-NEXT:    ret double [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.maxnum.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    ret double [[TMP1]]
  ;
    %c = call fast double @fmax(double %a, double %b)
    ret double %c
@@ -862,9 +856,8 @@ define double @max3(double %a, double %b) {
  
  define fp128 @max4(fp128 %a, fp128 %b) {
  ; CHECK-LABEL: @max4(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
-; CHECK-NEXT:    ret fp128 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz fp128 @llvm.maxnum.f128(fp128 [[A:%.*]], fp128 [[B:%.*]])
+; CHECK-NEXT:    ret fp128 [[TMP1]]
  ;
    %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b)
    ret fp128 %c
@@ -873,9 +866,8 @@ define fp128 @max4(fp128 %a, fp128 %b) {
  ; Shrink and remove the call.
  define float @min1(float %a, float %b) {
  ; CHECK-LABEL: @min1(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
  ;
    %c = fpext float %a to double
    %d = fpext float %b to double
@@ -886,8 +878,8 @@ define float @min1(float %a, float %b) {
  
  define float @fmin_no_fmf(float %a, float %b) {
  ; CHECK-LABEL: @fmin_no_fmf(
-; CHECK-NEXT:    [[C:%.*]] = call float @fminf(float [[A:%.*]], float [[B:%.*]])
-; CHECK-NEXT:    ret float [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
  ;
    %c = call float @fminf(float %a, float %b)
    ret float %c
@@ -895,9 +887,8 @@ define float @fmin_no_fmf(float %a, float %b) {
  
  define float @min2(float %a, float %b) {
  ; CHECK-LABEL: @min2(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select fast i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
  ;
    %c = call fast float @fminf(float %a, float %b)
    ret float %c
@@ -905,9 +896,8 @@ define float @min2(float %a, float %b) {
  
  define double @min3(double %a, double %b) {
  ; CHECK-LABEL: @min3(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], double [[A]], double [[B]]
-; CHECK-NEXT:    ret double [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz double @llvm.minnum.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    ret double [[TMP1]]
  ;
    %c = call nnan double @fmin(double %a, double %b)
    ret double %c
@@ -915,9 +905,8 @@ define double @min3(double %a, double %b) {
  
  define fp128 @min4(fp128 %a, fp128 %b) {
  ; CHECK-LABEL: @min4(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select fast i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
-; CHECK-NEXT:    ret fp128 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast fp128 @llvm.minnum.f128(fp128 [[A:%.*]], fp128 [[B:%.*]])
+; CHECK-NEXT:    ret fp128 [[TMP1]]
  ;
    %c = call fast fp128 @fminl(fp128 %a, fp128 %b)
    ret fp128 %c
diff --git a/test/Transforms/InstCombine/float-shrink-compare.ll b/test/Transforms/InstCombine/float-shrink-compare.ll

index 2cf4df54254b2d2967a061e47bf3bc09825eadc5..ca2f6d1c23cb2e7aa856408a27fd00d91d4926cc 100644 (file)
--- a/test/Transforms/InstCombine/float-shrink-compare.ll
+++ b/test/Transforms/InstCombine/float-shrink-compare.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -instcombine < %s | FileCheck %s
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
@@ -356,9 +357,9 @@ define i1 @test14_intrin(float %x, float %y) {
  
  define i1 @test15(float %x, float %y, float %z) {
  ; CHECK-LABEL: @test15(
-; CHECK-NEXT:    [[FMINF:%.*]] = call float @fminf(float %x, float %y) #0
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %z
-; CHECK-NEXT:    ret i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
  ;
    %1 = fpext float %x to double
    %2 = fpext float %y to double
@@ -370,9 +371,9 @@ define i1 @test15(float %x, float %y, float %z) {
  
  define i1 @test16(float %x, float %y, float %z) {
  ; CHECK-LABEL: @test16(
-; CHECK-NEXT:    [[FMINF:%.*]] = call float @fminf(float %x, float %y) #0
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %z
-; CHECK-NEXT:    ret i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
  ;
    %1 = fpext float %z to double
    %2 = fpext float %x to double
@@ -384,9 +385,9 @@ define i1 @test16(float %x, float %y, float %z) {
  
  define i1 @test17(float %x, float %y, float %z) {
  ; CHECK-LABEL: @test17(
-; CHECK-NEXT:    [[FMAXF:%.*]] = call float @fmaxf(float %x, float %y) #0
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMAXF]], %z
-; CHECK-NEXT:    ret i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
  ;
    %1 = fpext float %x to double
    %2 = fpext float %y to double
@@ -398,9 +399,9 @@ define i1 @test17(float %x, float %y, float %z) {
  
  define i1 @test18(float %x, float %y, float %z) {
  ; CHECK-LABEL: @test18(
-; CHECK-NEXT:    [[FMAXF:%.*]] = call float @fmaxf(float %x, float %y) #0
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMAXF]], %z
-; CHECK-NEXT:    ret i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
  ;
    %1 = fpext float %z to double
    %2 = fpext float %x to double
@@ -426,9 +427,9 @@ define i1 @test19(float %x, float %y, float %z) {
  
  define i1 @test20(float %x, float %y) {
  ; CHECK-LABEL: @test20(
-; CHECK-NEXT:    [[FMINF:%.*]] = call float @fminf(float 1.000000e+00, float %x) #0
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %y
-; CHECK-NEXT:    ret i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
  ;
    %1 = fpext float %y to double
    %2 = fpext float %x to double
@@ -441,9 +442,9 @@ define i1 @test20(float %x, float %y) {
  
  define i1 @test21(float %x, float %y) {
  ; CHECK-LABEL: @test21(
-; CHECK-NEXT:    [[TMP1:%.*]] = fpext float %y to double
-; CHECK-NEXT:    [[TMP2:%.*]] = fpext float %x to double
-; CHECK-NEXT:    [[TMP3:%.*]] = call double @fmin(double 1.300000e+00, double [[TMP2]]) #2
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext float [[Y:%.*]] to double
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext float [[X:%.*]] to double
+; CHECK-NEXT:    [[TMP3:%.*]] = call nsz double @llvm.minnum.f64(double [[TMP2]], double 1.300000e+00)
  ; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq double [[TMP3]], [[TMP1]]
  ; CHECK-NEXT:    ret i1 [[TMP4]]
  ;
author	Sanjay Patel <spatel@rotateright.com>
	Sat, 29 Jun 2019 14:28:54 +0000 (14:28 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Sat, 29 Jun 2019 14:28:54 +0000 (14:28 +0000)
lib/Transforms/Utils/SimplifyLibCalls.cpp		patch \| blob \| history
test/Transforms/InstCombine/double-float-shrink-1.ll		patch \| blob \| history
test/Transforms/InstCombine/fast-math.ll		patch \| blob \| history
test/Transforms/InstCombine/float-shrink-compare.ll		patch \| blob \| history