From 47d34935368d02921c015ef00f0dfb29e40fcd98 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 17 Feb 2019 15:58:48 +0000 Subject: [PATCH] [InstCombine] reduce unsigned saturated add with 'not' op We want to use the sum in the icmp to allow matching with m_UAddWithOverflow and eliminate the 'not'. This is discussed in D51929 and is another step towards solving PR14613: https://bugs.llvm.org/show_bug.cgi?id=14613 (The matching here is incomplete. Trying to take minimal steps to make sure we don't induce infinite looping from existing canonicalizations of the 'select'.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354221 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineSelect.cpp | 39 +++++++++++++------ .../InstCombine/saturating-add-sub.ll | 16 ++++---- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5922e6f7506..8b5dd7923c4 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -677,19 +677,36 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, InstCombiner::BuilderTy &Builder) { - // Match an unsigned saturated add with constant. - Value *X = Cmp->getOperand(0); - const APInt *CmpC, *AddC; - if (!Cmp->hasOneUse() || Cmp->getPredicate() != ICmpInst::ICMP_ULT || - !match(Cmp->getOperand(1), m_APInt(CmpC)) || !match(FVal, m_AllOnes()) || - !match(TVal, m_Add(m_Specific(X), m_APInt(AddC))) || ~(*AddC) != *CmpC) + if (!Cmp->hasOneUse() || Cmp->getPredicate() != ICmpInst::ICMP_ULT) return nullptr; - // Commute compare and select operands: - // select (icmp ult X, C), (add X, ~C), -1 --> - // select (icmp ugt X, C), -1, (add X, ~C) - Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, X, Cmp->getOperand(1)); - return Builder.CreateSelect(NewCmp, FVal, TVal); + // Match unsigned saturated add of 2 variables with an unnecessary 'not'. + // TODO: There are more variations of this pattern. + Value *Cmp0 = Cmp->getOperand(0); + Value *Cmp1 = Cmp->getOperand(1); + Value *X, *Y; + if (match(TVal, m_AllOnes()) && match(Cmp0, m_Not(m_Value(X))) && + match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) { + // Change the comparison to use the sum (false value of the select). That is + // the canonical pattern match form for uadd.with.overflow and eliminates a + // use of the 'not' op: + // (~X u< Y) ? -1 : (X + Y) --> ((X + Y) u< Y) ? -1 : (X + Y) + // (~X u< Y) ? -1 : (Y + X) --> ((Y + X) u< Y) ? -1 : (Y + X) + Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y); + return Builder.CreateSelect(NewCmp, TVal, FVal); + } + + // Match unsigned saturated add with constant. + const APInt *C, *CmpC; + if (match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 && + match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) { + // Commute compare predicate and select operands: + // (X u< ~C) ? (X + C) : -1 --> (X u> ~C) ? -1 : (X + C) + Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, X, Cmp1); + return Builder.CreateSelect(NewCmp, FVal, TVal); + } + + return nullptr; } /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single diff --git a/test/Transforms/InstCombine/saturating-add-sub.ll b/test/Transforms/InstCombine/saturating-add-sub.ll index 451f60ad8af..5ee5dcb9557 100644 --- a/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/test/Transforms/InstCombine/saturating-add-sub.ll @@ -641,11 +641,10 @@ define <2 x i8> @test_vector_ssub_neg_nneg(<2 x i8> %a) { define i32 @uadd_sat(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat( -; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[Y:%.*]], [[X]] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[NOTX]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[A:%.*]] = add i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %notx = xor i32 %x, -1 %a = add i32 %y, %x @@ -657,11 +656,10 @@ define i32 @uadd_sat(i32 %x, i32 %y) { define i32 @uadd_sat_commute1(i32 %xp, i32 %y) { ; CHECK-LABEL: @uadd_sat_commute1( ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[XP:%.*]] -; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X]], -1 ; CHECK-NEXT: [[A:%.*]] = add i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[NOTX]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %x = urem i32 42, %xp ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 -- 2.40.0