From 77366ccba37ac4ce04d863a704f605e87444fc00 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 1 Mar 2019 19:42:40 +0000 Subject: [PATCH] [InstCombine] move add after umin/umax In the motivating cases from PR14613: https://bugs.llvm.org/show_bug.cgi?id=14613 ...moving the add enables us to narrow the min/max which eliminates zext/trunc which enables signficantly better vectorization. But that bug is still not completely fixed. https://rise4fun.com/Alive/5KQ Name: umax Pre: C1 u>= C0 %a = add nuw i8 %x, C0 %cond = icmp ugt i8 %a, C1 %r = select i1 %cond, i8 %a, i8 C1 => %c2 = icmp ugt i8 %x, C1-C0 %u2 = select i1 %c2, i8 %x, i8 C1-C0 %r = add nuw i8 %u2, C0 Name: umin Pre: C1 u>= C0 %a = add nuw i32 %x, C0 %cond = icmp ult i32 %a, C1 %r = select i1 %cond, i32 %a, i32 C1 => %c2 = icmp ult i32 %x, C1-C0 %u2 = select i1 %c2, i32 %x, i32 C1-C0 %r = add nuw i32 %u2, C0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355221 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineSelect.cpp | 27 +++++++++ test/Transforms/InstCombine/minmax-fold.ll | 59 ++++++++----------- 2 files changed, 53 insertions(+), 33 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 79835ef9316..b39a4648579 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1563,6 +1563,30 @@ static Instruction *foldSelectCmpXchg(SelectInst &SI) { return nullptr; } +static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X, + Value *Y, + InstCombiner::BuilderTy &Builder) { + assert (SelectPatternResult::isMinOrMax(SPF) && "Expected min/max pattern"); + bool IsUnsigned = SPF == SelectPatternFlavor::SPF_UMIN || + SPF == SelectPatternFlavor::SPF_UMAX; + // TODO: If InstSimplify could fold all cases where C2 <= C1, we could change + // the constant value check to an assert. + Value *A; + const APInt *C1, *C2; + if (IsUnsigned && match(X, m_NUWAdd(m_Value(A), m_APInt(C1))) && + match(Y, m_APInt(C2)) && C2->uge(*C1) && X->hasNUses(2)) { + // umin (add nuw A, C1), C2 --> add nuw (umin A, C2 - C1), C1 + // umax (add nuw A, C1), C2 --> add nuw (umax A, C2 - C1), C1 + Value *NewMinMax = createMinMax(Builder, SPF, A, + ConstantInt::get(X->getType(), *C2 - *C1)); + return BinaryOperator::CreateNUW(BinaryOperator::Add, NewMinMax, + ConstantInt::get(X->getType(), *C1)); + } + // TODO: Handle SMIN/SMAX (similar to unsigned, but the signed subtraction of + // the constants must not overflow). + return nullptr; +} + /// Reduce a sequence of min/max with a common operand. static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, Value *RHS, @@ -1962,6 +1986,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *I = moveNotAfterMinMax(RHS, LHS)) return I; + if (Instruction *I = moveAddAfterMinMax(SPF, LHS, RHS, Builder)) + return I; + if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder)) return I; } diff --git a/test/Transforms/InstCombine/minmax-fold.ll b/test/Transforms/InstCombine/minmax-fold.ll index b060fab9b40..44feb5b08ef 100644 --- a/test/Transforms/InstCombine/minmax-fold.ll +++ b/test/Transforms/InstCombine/minmax-fold.ll @@ -909,9 +909,9 @@ define float @not_min_of_min(i8 %i, float %x) { define i32 @add_umin(i32 %x) { ; CHECK-LABEL: @add_umin( -; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[X:%.*]], 15 -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 27 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 27 +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[TMP2]], 15 ; CHECK-NEXT: ret i32 [[R]] ; %a = add nuw i32 %x, 15 @@ -922,9 +922,8 @@ define i32 @add_umin(i32 %x) { define i32 @add_umin_constant_limit(i32 %x) { ; CHECK-LABEL: @add_umin_constant_limit( -; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[X:%.*]], 41 -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 41, i32 42 ; CHECK-NEXT: ret i32 [[R]] ; %a = add nuw i32 %x, 41 @@ -1008,9 +1007,9 @@ define i32 @add_umin_extra_use(i32 %x, i32* %p) { define <2 x i16> @add_umin_vec(<2 x i16> %x) { ; CHECK-LABEL: @add_umin_vec( -; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i16> [[A]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i16> [[A]], <2 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[X]], <2 x i16> +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i16> [[TMP2]], ; CHECK-NEXT: ret <2 x i16> [[R]] ; %a = add nuw <2 x i16> %x, @@ -1021,9 +1020,9 @@ define <2 x i16> @add_umin_vec(<2 x i16> %x) { define i37 @add_umax(i37 %x) { ; CHECK-LABEL: @add_umax( -; CHECK-NEXT: [[A:%.*]] = add nuw i37 [[X:%.*]], 5 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i37 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i37 [[A]], i37 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i37 [[X:%.*]], 37 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i37 [[X]], i37 37 +; CHECK-NEXT: [[R:%.*]] = add nuw i37 [[TMP2]], 5 ; CHECK-NEXT: ret i37 [[R]] ; %a = add nuw i37 %x, 5 @@ -1034,9 +1033,9 @@ define i37 @add_umax(i37 %x) { define i37 @add_umax_constant_limit(i37 %x) { ; CHECK-LABEL: @add_umax_constant_limit( -; CHECK-NEXT: [[A:%.*]] = add nuw i37 [[X:%.*]], 81 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i37 [[A]], 82 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i37 [[A]], i37 82 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i37 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i37 [[X]], i37 1 +; CHECK-NEXT: [[R:%.*]] = add nuw i37 [[TMP2]], 81 ; CHECK-NEXT: ret i37 [[R]] ; %a = add nuw i37 %x, 81 @@ -1050,9 +1049,7 @@ define i37 @add_umax_constant_limit(i37 %x) { define i37 @add_umax_simplify(i37 %x) { ; CHECK-LABEL: @add_umax_simplify( -; CHECK-NEXT: [[A:%.*]] = add nuw i37 [[X:%.*]], 42 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i37 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i37 [[A]], i37 42 +; CHECK-NEXT: [[R:%.*]] = add nuw i37 [[X:%.*]], 42 ; CHECK-NEXT: ret i37 [[R]] ; %a = add nuw i37 %x, 42 @@ -1124,9 +1121,9 @@ define i32 @add_umax_extra_use(i32 %x, i32* %p) { define <2 x i33> @add_umax_vec(<2 x i33> %x) { ; CHECK-LABEL: @add_umax_vec( -; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i33> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i33> [[A]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i33> [[A]], <2 x i33> +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i33> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i33> [[X]], <2 x i33> +; CHECK-NEXT: [[R:%.*]] = add nuw <2 x i33> [[TMP2]], ; CHECK-NEXT: ret <2 x i33> [[R]] ; %a = add nuw <2 x i33> %x, @@ -1137,12 +1134,10 @@ define <2 x i33> @add_umax_vec(<2 x i33> %x) { define i8 @PR14613_umin(i8 %x) { ; CHECK-LABEL: @PR14613_umin( -; CHECK-NEXT: [[U4:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[U5:%.*]] = add nuw nsw i32 [[U4]], 15 -; CHECK-NEXT: [[U6:%.*]] = icmp ult i32 [[U5]], 255 -; CHECK-NEXT: [[U7:%.*]] = select i1 [[U6]], i32 [[U5]], i32 255 -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[U7]] to i8 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], -16 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -16 +; CHECK-NEXT: [[U7:%.*]] = add i8 [[TMP2]], 15 +; CHECK-NEXT: ret i8 [[U7]] ; %u4 = zext i8 %x to i32 %u5 = add nuw nsw i32 %u4, 15 @@ -1154,12 +1149,10 @@ define i8 @PR14613_umin(i8 %x) { define i8 @PR14613_umax(i8 %x) { ; CHECK-LABEL: @PR14613_umax( -; CHECK-NEXT: [[U4:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[U5:%.*]] = add nuw nsw i32 [[U4]], 15 -; CHECK-NEXT: [[U6:%.*]] = icmp ugt i32 [[U5]], 255 -; CHECK-NEXT: [[U7:%.*]] = select i1 [[U6]], i32 [[U5]], i32 255 -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[U7]] to i8 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], -16 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -16 +; CHECK-NEXT: [[U7:%.*]] = add nsw i8 [[TMP2]], 15 +; CHECK-NEXT: ret i8 [[U7]] ; %u4 = zext i8 %x to i32 %u5 = add nuw nsw i32 %u4, 15 -- 2.50.1