From 458204d46507d4613d9536aa2f06293bd1f13943 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Mon, 1 Jul 2019 15:55:15 +0000
Subject: [PATCH] [InstCombine] Shift amount reassociation in bittest (PR42399)

Summary:
Given pattern:
`icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0`
we should move shifts to the same hand of 'and', i.e. rewrite as
`icmp eq/ne (and (x shift (Q+K)), y), 0`  iff `(Q+K) u< bitwidth(x)`

It might be tempting to not restrict this to situations where we know
we'd fold two shifts together, but i'm not sure what rules should there be
to avoid endless combine loops.

We pick the same shift that was originally used to shift the variable we picked to shift:
https://rise4fun.com/Alive/6x1v

Should fix [[ https://bugs.llvm.org/show_bug.cgi?id=42399 | PR42399]].

Reviewers: spatel, nikic, RKSimon

Reviewed By: spatel

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63829

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364791 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineCompares.cpp       |  60 ++++++
 .../shift-amount-reassociation-in-bittest.ll  | 196 ++++++++----------
 2 files changed, 148 insertions(+), 108 deletions(-)
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 40c3b79e88d..664edc7c9e5 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3270,6 +3270,63 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I,
   return T1;
 }
 
+// Given pattern:
+//   icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
+// we should move shifts to the same hand of 'and', i.e. rewrite as
+//   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
+// We are only interested in opposite logical shifts here.
+// If we can, we want to end up creating 'lshr' shift.
+static Value *
+foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
+                                           InstCombiner::BuilderTy &Builder) {
+  if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) ||
+      !I.getOperand(0)->hasOneUse())
+    return nullptr;
+
+  auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
+  auto m_AnyLShr = m_LShr(m_Value(), m_Value());
+
+  // Look for an 'and' of two (opposite) logical shifts.
+  // Pick the single-use shift as XShift.
+  Value *XShift, *YShift;
+  if (!match(I.getOperand(0),
+             m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
+                     m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
+    return nullptr;
+
+  // If YShift is a single-use 'lshr', swap the shifts around.
+  if (match(YShift, m_OneUse(m_AnyLShr)))
+    std::swap(XShift, YShift);
+
+  // The shifts must be in opposite directions.
+  Instruction::BinaryOps XShiftOpcode =
+      cast<BinaryOperator>(XShift)->getOpcode();
+  if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
+    return nullptr; // Do not care about same-direction shifts here.
+
+  Value *X, *XShAmt, *Y, *YShAmt;
+  match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
+  match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
+
+  // Can we fold (XShAmt+YShAmt) ?
+  Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
+                                  SQ.getWithInstruction(&I));
+  if (!NewShAmt)
+    return nullptr;
+  // Is the new shift amount smaller than the bit width?
+  // FIXME: could also rely on ConstantRange.
+  unsigned BitWidth = X->getType()->getScalarSizeInBits();
+  if (!match(NewShAmt, m_SpecificInt_ULT(APInt(BitWidth, BitWidth))))
+    return nullptr;
+  // All good, we can do this fold. The shift is the same that was for X.
+  Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
+                  ? Builder.CreateLShr(X, NewShAmt)
+                  : Builder.CreateShl(X, NewShAmt);
+  Value *T1 = Builder.CreateAnd(T0, Y);
+  return Builder.CreateICmp(I.getPredicate(), T1,
+                            Constant::getNullValue(X->getType()));
+}
+
 /// Try to fold icmp (binop), X or icmp X, (binop).
 /// TODO: A large part of this logic is duplicated in InstSimplify's
 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -3625,6 +3682,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
   if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
     return replaceInstUsesWith(I, V);
 
+  if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder))
+    return replaceInstUsesWith(I, V);
+
   return nullptr;
 }
 
diff --git a/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll b/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll
index 7ce0bc8d937..c0bce48b3be 100644
--- a/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll
+++ b/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll
@@ -11,11 +11,10 @@
 
 define i1 @t0_const_lshr_shl_ne(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t0_const_lshr_shl_ne(
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   %t1 = shl i32 %y, 1
@@ -25,11 +24,10 @@ define i1 @t0_const_lshr_shl_ne(i32 %x, i32 %y) {
 }
 define i1 @t1_const_shl_lshr_ne(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t1_const_shl_lshr_ne(
-; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = shl i32 %x, 1
   %t1 = lshr i32 %y, 1
@@ -41,11 +39,10 @@ define i1 @t1_const_shl_lshr_ne(i32 %x, i32 %y) {
 ; We are ok with 'eq' predicate too.
 define i1 @t2_const_lshr_shl_eq(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t2_const_lshr_shl_eq(
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp eq i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   %t1 = shl i32 %y, 1
@@ -58,13 +55,10 @@ define i1 @t2_const_lshr_shl_eq(i32 %x, i32 %y) {
 
 define i1 @t3_const_after_fold_lshr_shl_ne(i32 %x, i32 %y, i32 %len) {
 ; CHECK-LABEL: @t3_const_after_fold_lshr_shl_ne(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -1
-; CHECK-NEXT:    [[T3:%.*]] = shl i32 [[Y:%.*]], [[T2]]
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = sub i32 32, %len
   %t1 = lshr i32 %x, %t0
@@ -76,13 +70,10 @@ define i1 @t3_const_after_fold_lshr_shl_ne(i32 %x, i32 %y, i32 %len) {
 }
 define i1 @t4_const_after_fold_lshr_shl_ne(i32 %x, i32 %y, i32 %len) {
 ; CHECK-LABEL: @t4_const_after_fold_lshr_shl_ne(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -1
-; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[Y:%.*]], [[T2]]
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[Y:%.*]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = sub i32 32, %len
   %t1 = shl i32 %x, %t0
@@ -128,11 +119,10 @@ define i1 @t6_const_shl_lshr_ne(i32 %x, i32 %y, i32 %shamt0, i32 %shamt1) {
 
 define <2 x i1> @t7_const_lshr_shl_ne_vec_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t7_const_lshr_shl_ne_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <2 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t0 = lshr <2 x i32> %x, <i32 1, i32 1>
   %t1 = shl <2 x i32> %y, <i32 1, i32 1>
@@ -142,11 +132,10 @@ define <2 x i1> @t7_const_lshr_shl_ne_vec_splat(<2 x i32> %x, <2 x i32> %y) {
 }
 define <2 x i1> @t8_const_lshr_shl_ne_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t8_const_lshr_shl_ne_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 1, i32 2>
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 3, i32 4>
-; CHECK-NEXT:    [[T2:%.*]] = and <2 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t0 = lshr <2 x i32> %x, <i32 1, i32 2>
   %t1 = shl <2 x i32> %y, <i32 3, i32 4>
@@ -156,11 +145,10 @@ define <2 x i1> @t8_const_lshr_shl_ne_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) {
 }
 define <3 x i1> @t9_const_lshr_shl_ne_vec_undef0(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t9_const_lshr_shl_ne_vec_undef0(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1>
@@ -170,11 +158,10 @@ define <3 x i1> @t9_const_lshr_shl_ne_vec_undef0(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t10_const_lshr_shl_ne_vec_undef1(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t10_const_lshr_shl_ne_vec_undef1(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -184,11 +171,10 @@ define <3 x i1> @t10_const_lshr_shl_ne_vec_undef1(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t11_const_lshr_shl_ne_vec_undef2(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t11_const_lshr_shl_ne_vec_undef2(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1>
@@ -198,11 +184,10 @@ define <3 x i1> @t11_const_lshr_shl_ne_vec_undef2(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t12_const_lshr_shl_ne_vec_undef3(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t12_const_lshr_shl_ne_vec_undef3(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -212,11 +197,10 @@ define <3 x i1> @t12_const_lshr_shl_ne_vec_undef3(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t13_const_lshr_shl_ne_vec_undef4(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t13_const_lshr_shl_ne_vec_undef4(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -226,11 +210,10 @@ define <3 x i1> @t13_const_lshr_shl_ne_vec_undef4(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t14_const_lshr_shl_ne_vec_undef5(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t14_const_lshr_shl_ne_vec_undef5(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1>
@@ -240,11 +223,10 @@ define <3 x i1> @t14_const_lshr_shl_ne_vec_undef5(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t15_const_lshr_shl_ne_vec_undef6(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t15_const_lshr_shl_ne_vec_undef6(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -260,11 +242,10 @@ declare i32 @gen32()
 define i1 @t16_commutativity0(i32 %x) {
 ; CHECK-LABEL: @t16_commutativity0(
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %y = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -277,11 +258,10 @@ define i1 @t16_commutativity0(i32 %x) {
 define i1 @t17_commutativity1(i32 %y) {
 ; CHECK-LABEL: @t17_commutativity1(
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %x = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -299,10 +279,10 @@ define i1 @t18_const_oneuse0(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t18_const_oneuse0(
 ; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   call void @use32(i32 %t0)
@@ -313,12 +293,12 @@ define i1 @t18_const_oneuse0(i32 %x, i32 %y) {
 }
 define i1 @t19_const_oneuse1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t19_const_oneuse1(
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T1]])
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   %t1 = shl i32 %y, 1
@@ -548,10 +528,10 @@ define i1 @t32_commutativity0_oneuse0(i32 %x) {
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
 ; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %y = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -564,12 +544,12 @@ define i1 @t32_commutativity0_oneuse0(i32 %x) {
 define i1 @t33_commutativity0_oneuse1(i32 %x) {
 ; CHECK-LABEL: @t33_commutativity0_oneuse1(
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T1]])
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %y = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -585,10 +565,10 @@ define i1 @t34_commutativity1_oneuse0(i32 %y) {
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @gen32()
 ; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %x = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -601,12 +581,12 @@ define i1 @t34_commutativity1_oneuse0(i32 %y) {
 define i1 @t35_commutativity1_oneuse1(i32 %y) {
 ; CHECK-LABEL: @t35_commutativity1_oneuse1(
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X]], 1
 ; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T1]])
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %x = call i32 @gen32()
   %t0 = lshr i32 %x, 1
-- 
2.40.0