From: Sanjay Patel Date: Sun, 17 Feb 2019 16:48:50 +0000 (+0000) Subject: [InstCombine] reduce more unsigned saturated add with 'not' op X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6de5879eecb1de26d10e69aebe0df7cb5d60035e;p=llvm [InstCombine] reduce more unsigned saturated add with 'not' op We want to use the sum in the icmp to allow matching with m_UAddWithOverflow and eliminate the 'not'. This is discussed in D51929 and is another step towards solving PR14613: https://bugs.llvm.org/show_bug.cgi?id=14613 Name: not op %notx = xor i32 %x, -1 %a = add i32 %x, %y %c = icmp ult i32 %notx, %y %r = select i1 %c, i32 -1, i32 %a => %a = add i32 %x, %y %c2 = icmp ult i32 %a, %y %r = select i1 %c2, i32 -1, i32 %a Name: not op ugt %notx = xor i32 %x, -1 %a = add i32 %x, %y %c = icmp ugt i32 %y, %notx %r = select i1 %c, i32 -1, i32 %a => %a = add i32 %x, %y %c2 = icmp ult i32 %a, %y %r = select i1 %c2, i32 -1, i32 %a https://rise4fun.com/Alive/niom (The matching here is still incomplete.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354224 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 8b5dd7923c4..3ee418feb0e 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -677,13 +677,23 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, InstCombiner::BuilderTy &Builder) { - if (!Cmp->hasOneUse() || Cmp->getPredicate() != ICmpInst::ICMP_ULT) + if (!Cmp->hasOneUse()) return nullptr; - // Match unsigned saturated add of 2 variables with an unnecessary 'not'. - // TODO: There are more variations of this pattern. + // Canonicalize to 'ULT' to simplify matching below. Value *Cmp0 = Cmp->getOperand(0); Value *Cmp1 = Cmp->getOperand(1); + ICmpInst::Predicate Pred = Cmp->getPredicate(); + if (Pred == ICmpInst::ICMP_UGT) { + Pred = ICmpInst::ICMP_ULT; + std::swap(Cmp0, Cmp1); + } + + if (Pred != ICmpInst::ICMP_ULT) + return nullptr; + + // Match unsigned saturated add of 2 variables with an unnecessary 'not'. + // TODO: There are more variations of this pattern. Value *X, *Y; if (match(TVal, m_AllOnes()) && match(Cmp0, m_Not(m_Value(X))) && match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) { diff --git a/test/Transforms/InstCombine/saturating-add-sub.ll b/test/Transforms/InstCombine/saturating-add-sub.ll index 96848629438..17c0d958838 100644 --- a/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/test/Transforms/InstCombine/saturating-add-sub.ll @@ -672,11 +672,10 @@ define i32 @uadd_sat_commute_add(i32 %xp, i32 %y) { define i32 @uadd_sat_ugt(i32 %x, i32 %yp) { ; CHECK-LABEL: @uadd_sat_ugt( ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442 -; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[X]] -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[Y]], [[NOTX]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -690,11 +689,10 @@ define i32 @uadd_sat_ugt_commute_add(i32 %xp, i32 %yp) { ; CHECK-LABEL: @uadd_sat_ugt_commute_add( ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442 ; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[XP:%.*]] -; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X]], -1 ; CHECK-NEXT: [[A:%.*]] = add i32 [[X]], [[Y]] -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[Y]], [[NOTX]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization %x = srem i32 42, %xp ; thwart complexity-based-canonicalization