From 1c1bc8778e6189f073dfe243f68d33d01867203d Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 1 Jul 2019 09:41:43 +0000 Subject: [PATCH] [InstCombine] Omit 'urem' where possible This was added in D63390 / rL364286 to backend, but it makes sense to also handle it in middle-end. https://rise4fun.com/Alive/Zsln git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364738 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCompares.cpp | 24 +++++++++++++++---- ...of-two-or-zero-when-comparing-with-zero.ll | 13 ++++------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 29ac729682d..40c3b79e88d 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1317,14 +1317,16 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } -// Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) +// Handle icmp pred X, 0 Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { CmpInst::Predicate Pred = Cmp.getPredicate(); - Value *X = Cmp.getOperand(0); + if (!match(Cmp.getOperand(1), m_Zero())) + return nullptr; - if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) { + // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) + if (Pred == ICmpInst::ICMP_SGT) { Value *A, *B; - SelectPatternResult SPR = matchSelectPattern(X, A, B); + SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B); if (SPR.Flavor == SPF_SMIN) { if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) return new ICmpInst(Pred, B, Cmp.getOperand(1)); @@ -1332,6 +1334,20 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { return new ICmpInst(Pred, A, Cmp.getOperand(1)); } } + + // Given: + // icmp eq/ne (urem %x, %y), 0 + // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': + // icmp eq/ne %x, 0 + Value *X, *Y; + if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) && + ICmpInst::isEquality(Pred)) { + KnownBits XKnown = computeKnownBits(X, 0, &Cmp); + KnownBits YKnown = computeKnownBits(Y, 0, &Cmp); + if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2) + return new ICmpInst(Pred, X, Cmp.getOperand(1)); + } + return nullptr; } diff --git a/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll index c9be1039c0d..f15d0a84826 100644 --- a/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -14,8 +14,7 @@ define i1 @p0_scalar_urem_by_const(i32 %x, i32 %y) { ; CHECK-LABEL: @p0_scalar_urem_by_const( ; CHECK-NEXT: [[T0:%.*]] = and i32 [[X:%.*]], 128 -; CHECK-NEXT: [[T1:%.*]] = urem i32 [[T0]], 6 -; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T0]], 0 ; CHECK-NEXT: ret i1 [[T2]] ; %t0 = and i32 %x, 128 ; clearly a power-of-two or zero @@ -27,9 +26,7 @@ define i1 @p0_scalar_urem_by_const(i32 %x, i32 %y) { define i1 @p1_scalar_urem_by_nonconst(i32 %x, i32 %y) { ; CHECK-LABEL: @p1_scalar_urem_by_nonconst( ; CHECK-NEXT: [[T0:%.*]] = and i32 [[X:%.*]], 128 -; CHECK-NEXT: [[T1:%.*]] = or i32 [[Y:%.*]], 6 -; CHECK-NEXT: [[T2:%.*]] = urem i32 [[T0]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T2]], 0 +; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T0]], 0 ; CHECK-NEXT: ret i1 [[T3]] ; %t0 = and i32 %x, 128 ; clearly a power-of-two or zero @@ -76,8 +73,7 @@ define i1 @p3_scalar_shifted2_urem_by_const(i32 %x, i32 %y) { define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @p4_vector_urem_by_const__splat( ; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> [[T0]], -; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T1]], zeroinitializer +; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[T2]] ; %t0 = and <4 x i32> %x, ; clearly a power-of-two or zero @@ -115,8 +111,7 @@ define <4 x i1> @p6_vector_urem_by_const__nonsplat_undef0(<4 x i32> %x, <4 x i32 define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @p7_vector_urem_by_const__nonsplat_undef2( ; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> [[T0]], -; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T1]], +; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], ; CHECK-NEXT: ret <4 x i1> [[T2]] ; %t0 = and <4 x i32> %x, ; clearly a power-of-two or zero -- 2.50.1