From: Matt Arsenault Date: Mon, 27 Feb 2017 23:08:49 +0000 (+0000) Subject: AMDGPU: Basic folds for fmed3 intrinsic X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f5567ad95f220251564035ccab0a71d79f139648;p=llvm AMDGPU: Basic folds for fmed3 intrinsic Constant fold, canonicalize constants to RHS, reduce to minnum/maxnum when inputs are nan/undef. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296409 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index 5a41b5a2ed6..fb26aeb33df 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -560,6 +560,22 @@ public: Type *ResultType, const Twine &Name = ""); + /// Create a call to intrinsic \p ID with 2 operands which is mangled on the + /// first type. + CallInst *CreateBinaryIntrinsic(Intrinsic::ID ID, + Value *LHS, Value *RHS, + const Twine &Name = ""); + + /// Create call to the minnum intrinsic. + CallInst *CreateMinNum(Value *LHS, Value *RHS, const Twine &Name = "") { + return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, Name); + } + + /// Create call to the maxnum intrinsic. + CallInst *CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name = "") { + return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, Name); + } + private: /// \brief Create a call to a masked intrinsic with given Id. CallInst *CreateMaskedIntrinsic(Intrinsic::ID Id, ArrayRef Ops, diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index 6e45dcfd719..2cd99d76cc9 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -157,6 +157,19 @@ inline match_combine_or m_AnyZero() { return m_CombineOr(m_Zero(), m_NegZero()); } +struct match_nan { + template bool match(ITy *V) { + if (const auto *C = dyn_cast(V)) { + const APFloat &APF = C->getValueAPF(); + return APF.isNaN(); + } + return false; + } +}; + +/// Match an arbitrary NaN constant. This includes quiet and signalling nans. +inline match_nan m_NaN() { return match_nan(); } + struct apint_match { const APInt *&Res; apint_match(const APInt *&R) : Res(R) {} diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index d3e410d6d03..4671eb06274 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -482,3 +482,11 @@ CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint, getInt32(DerivedOffset)}; return createCallHelper(FnGCRelocate, Args, this, Name); } + +CallInst *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID, + Value *LHS, Value *RHS, + const Twine &Name) { + Module *M = BB->getParent()->getParent(); + Function *Fn = Intrinsic::getDeclaration(M, ID, { LHS->getType() }); + return createCallHelper(Fn, { LHS, RHS }, this, Name); +} diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 664d113f9c1..58992014809 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1533,6 +1533,27 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) { return true; } +// Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs. +// +// A single NaN input is folded to minnum, so we rely on that folding for +// handling NaNs. +static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, + const APFloat &Src2) { + APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2); + + APFloat::cmpResult Cmp0 = Max3.compare(Src0); + assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately"); + if (Cmp0 == APFloat::cmpEqual) + return maxnum(Src1, Src2); + + APFloat::cmpResult Cmp1 = Max3.compare(Src1); + assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately"); + if (Cmp1 == APFloat::cmpEqual) + return maxnum(Src0, Src2); + + return maxnum(Src0, Src1); +} + // Returns true iff the 2 intrinsics have the same operands, limiting the // comparison to the first NumOperands. static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, @@ -3331,6 +3352,61 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return II; break; + + } + case Intrinsic::amdgcn_fmed3: { + // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled + // for the shader. + + Value *Src0 = II->getArgOperand(0); + Value *Src1 = II->getArgOperand(1); + Value *Src2 = II->getArgOperand(2); + + bool Swap = false; + // Canonicalize constants to RHS operands. + // + // fmed3(c0, x, c1) -> fmed3(x, c0, c1) + if (isa(Src0) && !isa(Src1)) { + std::swap(Src0, Src1); + Swap = true; + } + + if (isa(Src1) && !isa(Src2)) { + std::swap(Src1, Src2); + Swap = true; + } + + if (isa(Src0) && !isa(Src1)) { + std::swap(Src0, Src1); + Swap = true; + } + + if (Swap) { + II->setArgOperand(0, Src0); + II->setArgOperand(1, Src1); + II->setArgOperand(2, Src2); + return II; + } + + if (match(Src2, m_NaN()) || isa(Src2)) { + CallInst *NewCall = Builder->CreateMinNum(Src0, Src1); + NewCall->copyFastMathFlags(II); + NewCall->takeName(II); + return replaceInstUsesWith(*II, NewCall); + } + + if (const ConstantFP *C0 = dyn_cast(Src0)) { + if (const ConstantFP *C1 = dyn_cast(Src1)) { + if (const ConstantFP *C2 = dyn_cast(Src2)) { + APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(), + C2->getValueAPF()); + return replaceInstUsesWith(*II, + ConstantFP::get(Builder->getContext(), Result)); + } + } + } + + break; } case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can diff --git a/test/Transforms/InstCombine/amdgcn-intrinsics.ll b/test/Transforms/InstCombine/amdgcn-intrinsics.ll index 42a36c0ecb6..facc26b9134 100644 --- a/test/Transforms/InstCombine/amdgcn-intrinsics.ll +++ b/test/Transforms/InstCombine/amdgcn-intrinsics.ll @@ -1025,3 +1025,185 @@ define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) ret void } + +; -------------------------------------------------------------------- +; llvm.amdgcn.fmed3 +; -------------------------------------------------------------------- + +declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone + +; CHECK-LABEL: @fmed3_f32( +; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z) +define float @fmed3_f32(float %x, float %y, float %z) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32( +; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00) +define float @fmed3_canonicalize_x_c0_c1_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32( +; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00) +define float @fmed3_canonicalize_c0_x_c1_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32( +; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00) +define float @fmed3_canonicalize_c0_c1_x_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32( +; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00) +define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32( +; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00) +define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32( +; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00) +define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_undef_x_y_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_undef_x_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32( +; CHECK: call nnan float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) { + %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_x_undef_y_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_x_undef_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_x_y_undef_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_x_y_undef_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_qnan0_x_y_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_qnan0_x_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_x_qnan0_y_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_x_qnan0_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_x_y_qnan0_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_x_y_qnan0_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_qnan1_x_y_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_qnan1_x_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y) + ret float %med3 +} + +; This can return any of the qnans. +; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32( +; CHECK: ret float 0x7FF8002000000000 +define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src0_0_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src0_0_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src0_1_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src0_1_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src1_0_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src1_0_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src1_1_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src1_1_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src2_0_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src2_0_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src2_1_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src2_1_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32( +; CHECK: ret float %x +define float @fmed3_x_qnan0_qnan1_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32( +; CHECK: ret float %x +define float @fmed3_qnan0_x_qnan1_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32( +; CHECK: ret float %x +define float @fmed3_qnan0_qnan1_x_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x) + ret float %med3 +}