From f7bbb0f66951f6a52e74a36e3ebfd80385d438ca Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 29 Aug 2019 12:48:04 +0000
Subject: [PATCH] [InstSimplify] Drop leftover "division-by-zero guard" around
 `@llvm.umul.with.overflow` inverted overflow bit

Summary:
Now that with D65143/D65144 we've produce `@llvm.umul.with.overflow`,
and with D65147 we've flattened the CFG, we now can see that
the guard may have been there to prevent division by zero is redundant.
We can simply drop it:
```
----------------------------------------
Name: no overflow or zero
  %iszero = icmp eq i4 %y, 0
  %umul = smul_overflow i4 %x, %y
  %umul.ov = extractvalue {i4, i1} %umul, 1
  %umul.ov.not = xor %umul.ov, -1
  %retval.0 = or i1 %iszero, %umul.ov.not
  ret i1 %retval.0
=>
  %iszero = icmp eq i4 %y, 0
  %umul = smul_overflow i4 %x, %y
  %umul.ov = extractvalue {i4, i1} %umul, 1
  %umul.ov.not = xor %umul.ov, -1
  %retval.0 = or i1 %iszero, %umul.ov.not
  ret i1 %umul.ov.not

Done: 1
Optimization is correct!
```
Note that this is inverted from what we have in a previous patch,
here we are looking for the inverted overflow bit.
And that inversion is kinda problematic - given this particular
pattern we neither hoist that `not` closer to `ret` (then the pattern
would have been identical to the one without inversion,
and would have been handled by the previous patch), neither
do the opposite transform. But regardless, we should handle this too.
I've filled [[ https://bugs.llvm.org/show_bug.cgi?id=42720 | PR42720 ]].

Reviewers: nikic, spatel, xbolva00, RKSimon

Reviewed By: spatel

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D65151

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370351 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Analysis/InstructionSimplify.cpp          | 71 +++++++++++++++----
 .../div-by-0-guard-before-smul_ov-not.ll      | 12 ++--
 .../div-by-0-guard-before-umul_ov-not.ll      | 12 ++--
 .../unsigned-multiply-overflow-check.ll       | 12 ++--
 4 files changed, 71 insertions(+), 36 deletions(-)
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index d320333aaa0..fa80f1241ca 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1759,6 +1759,27 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
   return nullptr;
 }
 
+/// Check that the Op1 is in expected form, i.e.:
+///   %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+///   %Op1 = extractvalue { i4, i1 } %Agg, 1
+static bool omitCheckForZeroBeforeMulWithOverflowInternal(Value *Op1,
+                                                          Value *X) {
+  auto *Extract = dyn_cast<ExtractValueInst>(Op1);
+  // We should only be extracting the overflow bit.
+  if (!Extract || !Extract->getIndices().equals(1))
+    return false;
+  Value *Agg = Extract->getAggregateOperand();
+  // This should be a multiplication-with-overflow intrinsic.
+  if (!match(Agg, m_CombineOr(m_Intrinsic<Intrinsic::umul_with_overflow>(),
+                              m_Intrinsic<Intrinsic::smul_with_overflow>())))
+    return false;
+  // One of its multipliers should be the value we checked for zero before.
+  if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)),
+                              m_Argument<1>(m_Specific(X)))))
+    return false;
+  return true;
+}
+
 /// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
 /// other form of check, e.g. one that was using division; it may have been
 /// guarded against division-by-zero. We can drop that check now.
@@ -1774,23 +1795,41 @@ static Value *omitCheckForZeroBeforeMulWithOverflow(Value *Op0, Value *Op1) {
   if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
       Pred != ICmpInst::Predicate::ICMP_NE)
     return nullptr;
-  auto *Extract = dyn_cast<ExtractValueInst>(Op1);
-  // We should only be extracting the overflow bit.
-  if (!Extract || !Extract->getIndices().equals(1))
-    return nullptr;
-  Value *Agg = Extract->getAggregateOperand();
-  // This should be a multiplication-with-overflow intrinsic.
-  if (!match(Agg, m_CombineOr(m_Intrinsic<Intrinsic::umul_with_overflow>(),
-                              m_Intrinsic<Intrinsic::smul_with_overflow>())))
-    return nullptr;
-  // One of its multipliers should be the value we checked for zero before.
-  if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)),
-                              m_Argument<1>(m_Specific(X)))))
+  // Is Op1 in expected form?
+  if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
     return nullptr;
   // Can omit 'and', and just return the overflow bit.
   return Op1;
 }
 
+/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
+/// other form of check, e.g. one that was using division; it may have been
+/// guarded against division-by-zero. We can drop that check now.
+/// Look for:
+///   %Op0 = icmp eq i4 %X, 0
+///   %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+///   %Op1 = extractvalue { i4, i1 } %Agg, 1
+///   %NotOp1 = xor i1 %Op1, true
+///   %or = or i1 %Op0, %NotOp1
+/// We can just return  %NotOp1
+static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0,
+                                                            Value *NotOp1) {
+  ICmpInst::Predicate Pred;
+  Value *X;
+  if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
+      Pred != ICmpInst::Predicate::ICMP_EQ)
+    return nullptr;
+  // We expect the other hand of an 'or' to be a 'not'.
+  Value *Op1;
+  if (!match(NotOp1, m_Not(m_Value(Op1))))
+    return nullptr;
+  // Is Op1 in expected form?
+  if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
+    return nullptr;
+  // Can omit 'and', and just return the inverted overflow bit.
+  return NotOp1;
+}
+
 /// Given operands for an And, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -2027,6 +2066,14 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
     return V;
 
+  // If we have a multiplication overflow check that is being 'and'ed with a
+  // check that one of the multipliers is not zero, we can omit the 'and', and
+  // only keep the overflow check.
+  if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op0, Op1))
+    return V;
+  if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op1, Op0))
+    return V;
+
   // Try some generic simplifications for associative operations.
   if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
                                           MaxRecurse))
diff --git a/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll b/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll
index a2fd8351d59..6ed3b1e57c8 100644
--- a/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll
+++ b/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll
@@ -5,12 +5,10 @@ declare { i4, i1 } @llvm.smul.with.overflow.i4(i4, i4) #1
 
 define i1 @t0_umul(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @t0_umul(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
+; CHECK-NEXT:    [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
 ; CHECK-NEXT:    [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
 ; CHECK-NEXT:    [[PHITMP:%.*]] = xor i1 [[SMUL_OV]], true
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[CMP]], [[PHITMP]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    ret i1 [[PHITMP]]
 ;
   %cmp = icmp eq i4 %size, 0
   %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)
@@ -22,12 +20,10 @@ define i1 @t0_umul(i4 %size, i4 %nmemb) {
 
 define i1 @t1_commutative(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @t1_commutative(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
+; CHECK-NEXT:    [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
 ; CHECK-NEXT:    [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
 ; CHECK-NEXT:    [[PHITMP:%.*]] = xor i1 [[SMUL_OV]], true
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[PHITMP]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    ret i1 [[PHITMP]]
 ;
   %cmp = icmp eq i4 %size, 0
   %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)
diff --git a/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll b/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll
index a34b4202836..c3c28a726f9 100644
--- a/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll
+++ b/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll
@@ -5,12 +5,10 @@ declare { i4, i1 } @llvm.umul.with.overflow.i4(i4, i4) #1
 
 define i1 @t0_umul(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @t0_umul(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
+; CHECK-NEXT:    [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
 ; CHECK-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
 ; CHECK-NEXT:    [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[CMP]], [[PHITMP]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    ret i1 [[PHITMP]]
 ;
   %cmp = icmp eq i4 %size, 0
   %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)
@@ -22,12 +20,10 @@ define i1 @t0_umul(i4 %size, i4 %nmemb) {
 
 define i1 @t1_commutative(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @t1_commutative(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
+; CHECK-NEXT:    [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
 ; CHECK-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
 ; CHECK-NEXT:    [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[PHITMP]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    ret i1 [[PHITMP]]
 ;
   %cmp = icmp eq i4 %size, 0
   %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)
diff --git a/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll b/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll
index cbebaa289c3..6b097da3f14 100644
--- a/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll
+++ b/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll
@@ -124,12 +124,10 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
 ;
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:  bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1:%.*]])
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T6:%.*]] = or i1 [[T0]], [[PHITMP]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[T6]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[PHITMP]]
 ;
 ; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-LABEL: @will_overflow(
 ; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT:  bb:
@@ -142,12 +140,10 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
 ;
 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-LABEL: @will_overflow(
 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:  bb:
-; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1:%.*]])
 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[T6:%.*]] = or i1 [[T0]], [[PHITMP]]
-; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    ret i1 [[T6]]
+; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    ret i1 [[PHITMP]]
 ;
 bb:
   %t0 = icmp eq i64 %arg, 0
-- 
2.40.0