From 44e68c6bb86bdeda4ab9b2d67d324f7912dd8d96 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 10 Sep 2017 17:55:08 +0000 Subject: [PATCH] [InstSimplify] refactor udiv/urem code and add tests; NFCI This removes some duplicated code and makes it easier to support signed div/rem in a similar way if we want to do that. Note that the existing comments were not accurate - we don't need a constant divisor to simplify; icmp simplification does more than that. But as the added tests show, it could go even further. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312885 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 49 ++++++++++++-------- test/Transforms/InstSimplify/div.ll | 68 +++++++++++++++++++++++++++ test/Transforms/InstSimplify/rem.ll | 69 ++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 18 deletions(-) diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 0fb0b7d2449..dbcc7e13f21 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1075,6 +1075,33 @@ Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit); } +/// Given a predicate and two operands, return true if the comparison is true. +/// This is a helper for div/rem simplification where we return some other value +/// when we can prove a relationship between the operands. +static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS, + const SimplifyQuery &Q, unsigned MaxRecurse) { + Value *V = SimplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse); + Constant *C = dyn_cast_or_null(V); + return (C && C->isAllOnesValue()); +} + +static Value *simplifyUnsignedDivRem(Value *Op0, Value *Op1, + const SimplifyQuery &Q, + unsigned MaxRecurse, bool IsDiv) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return nullptr; + + // If we can prove that the quotient is unsigned less than the divisor, then + // we know the answer: + // X / Y --> 0 + // X % Y --> X + if (isICmpTrue(ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse)) + return IsDiv ? Constant::getNullValue(Op0->getType()) : Op0; + + return nullptr; +} + /// Given operands for a UDiv, see if we can fold the result. /// If not, this returns null. static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -1082,15 +1109,8 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; - // udiv %V, C -> 0 if %V < C - if (MaxRecurse) { - if (Constant *C = dyn_cast_or_null(SimplifyICmpInst( - ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) { - if (C->isAllOnesValue()) { - return Constant::getNullValue(Op0->getType()); - } - } - } + if (Value *V = simplifyUnsignedDivRem(Op0, Op1, Q, MaxRecurse, true)) + return V; return nullptr; } @@ -1198,15 +1218,8 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; - // urem %V, C -> %V if %V < C - if (MaxRecurse) { - if (Constant *C = dyn_cast_or_null(SimplifyICmpInst( - ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) { - if (C->isAllOnesValue()) { - return Op0; - } - } - } + if (Value *V = simplifyUnsignedDivRem(Op0, Op1, Q, MaxRecurse, false)) + return V; return nullptr; } diff --git a/test/Transforms/InstSimplify/div.ll b/test/Transforms/InstSimplify/div.ll index f096719359d..be17746c24f 100644 --- a/test/Transforms/InstSimplify/div.ll +++ b/test/Transforms/InstSimplify/div.ll @@ -54,6 +54,74 @@ define <2 x i1> @udiv_bool_vec(<2 x i1> %x, <2 x i1> %y) { ret <2 x i1> %div } +define i32 @udiv_quotient_known_smaller_than_constant_denom(i32 %x) { +; CHECK-LABEL: @udiv_quotient_known_smaller_than_constant_denom( +; CHECK-NEXT: ret i32 0 +; + %and = and i32 %x, 250 + %div = udiv i32 %and, 251 + ret i32 %div +} + +define i32 @not_udiv_quotient_known_smaller_than_constant_denom(i32 %x) { +; CHECK-LABEL: @not_udiv_quotient_known_smaller_than_constant_denom( +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 251 +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AND]], 251 +; CHECK-NEXT: ret i32 [[DIV]] +; + %and = and i32 %x, 251 + %div = udiv i32 %and, 251 + ret i32 %div +} + +define i32 @udiv_constant_quotient_known_smaller_than_denom(i32 %x) { +; CHECK-LABEL: @udiv_constant_quotient_known_smaller_than_denom( +; CHECK-NEXT: ret i32 0 +; + %or = or i32 %x, 251 + %div = udiv i32 250, %or + ret i32 %div +} + +define i32 @not_udiv_constant_quotient_known_smaller_than_denom(i32 %x) { +; CHECK-LABEL: @not_udiv_constant_quotient_known_smaller_than_denom( +; CHECK-NEXT: [[OR:%.*]] = or i32 %x, 251 +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 251, [[OR]] +; CHECK-NEXT: ret i32 [[DIV]] +; + %or = or i32 %x, 251 + %div = udiv i32 251, %or + ret i32 %div +} + +; This would require computing known bits on both x and y. Is it worth doing? + +define i32 @udiv_quotient_known_smaller_than_denom(i32 %x, i32 %y) { +; CHECK-LABEL: @udiv_quotient_known_smaller_than_denom( +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 250 +; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 251 +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AND]], [[OR]] +; CHECK-NEXT: ret i32 [[DIV]] +; + %and = and i32 %x, 250 + %or = or i32 %y, 251 + %div = udiv i32 %and, %or + ret i32 %div +} + +define i32 @not_udiv_quotient_known_smaller_than_denom(i32 %x, i32 %y) { +; CHECK-LABEL: @not_udiv_quotient_known_smaller_than_denom( +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 251 +; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 251 +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AND]], [[OR]] +; CHECK-NEXT: ret i32 [[DIV]] +; + %and = and i32 %x, 251 + %or = or i32 %y, 251 + %div = udiv i32 %and, %or + ret i32 %div +} + declare i32 @external() define i32 @div1() { diff --git a/test/Transforms/InstSimplify/rem.ll b/test/Transforms/InstSimplify/rem.ll index b7f18f36b4b..05818043bc1 100644 --- a/test/Transforms/InstSimplify/rem.ll +++ b/test/Transforms/InstSimplify/rem.ll @@ -104,6 +104,75 @@ define i32 @rem3(i32 %x, i32 %n) { ret i32 %mod1 } +define i32 @urem_quotient_known_smaller_than_constant_denom(i32 %x) { +; CHECK-LABEL: @urem_quotient_known_smaller_than_constant_denom( +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 250 +; CHECK-NEXT: ret i32 [[AND]] +; + %and = and i32 %x, 250 + %r = urem i32 %and, 251 + ret i32 %r +} + +define i32 @not_urem_quotient_known_smaller_than_constant_denom(i32 %x) { +; CHECK-LABEL: @not_urem_quotient_known_smaller_than_constant_denom( +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 251 +; CHECK-NEXT: [[R:%.*]] = urem i32 [[AND]], 251 +; CHECK-NEXT: ret i32 [[R]] +; + %and = and i32 %x, 251 + %r = urem i32 %and, 251 + ret i32 %r +} + +define i32 @urem_constant_quotient_known_smaller_than_denom(i32 %x) { +; CHECK-LABEL: @urem_constant_quotient_known_smaller_than_denom( +; CHECK-NEXT: ret i32 250 +; + %or = or i32 %x, 251 + %r = urem i32 250, %or + ret i32 %r +} + +define i32 @not_urem_constant_quotient_known_smaller_than_denom(i32 %x) { +; CHECK-LABEL: @not_urem_constant_quotient_known_smaller_than_denom( +; CHECK-NEXT: [[OR:%.*]] = or i32 %x, 251 +; CHECK-NEXT: [[R:%.*]] = urem i32 251, [[OR]] +; CHECK-NEXT: ret i32 [[R]] +; + %or = or i32 %x, 251 + %r = urem i32 251, %or + ret i32 %r +} + +; This would require computing known bits on both x and y. Is it worth doing? + +define i32 @urem_quotient_known_smaller_than_denom(i32 %x, i32 %y) { +; CHECK-LABEL: @urem_quotient_known_smaller_than_denom( +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 250 +; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 251 +; CHECK-NEXT: [[R:%.*]] = urem i32 [[AND]], [[OR]] +; CHECK-NEXT: ret i32 [[R]] +; + %and = and i32 %x, 250 + %or = or i32 %y, 251 + %r = urem i32 %and, %or + ret i32 %r +} + +define i32 @not_urem_quotient_known_smaller_than_denom(i32 %x, i32 %y) { +; CHECK-LABEL: @not_urem_quotient_known_smaller_than_denom( +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 251 +; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 251 +; CHECK-NEXT: [[R:%.*]] = urem i32 [[AND]], [[OR]] +; CHECK-NEXT: ret i32 [[R]] +; + %and = and i32 %x, 251 + %or = or i32 %y, 251 + %r = urem i32 %and, %or + ret i32 %r +} + declare i32 @external() define i32 @rem4() { -- 2.50.1