[InstCombine] Shift amount reassociation in bittest: trunc-of-lshr (PR42399)

author Roman Lebedev <lebedev.ri@gmail.com>

Thu, 29 Aug 2019 10:26:23 +0000 (10:26 +0000)

committer Roman Lebedev <lebedev.ri@gmail.com>

Thu, 29 Aug 2019 10:26:23 +0000 (10:26 +0000)
author Roman Lebedev <lebedev.ri@gmail.com>
Thu, 29 Aug 2019 10:26:23 +0000 (10:26 +0000)
committer Roman Lebedev <lebedev.ri@gmail.com>
Thu, 29 Aug 2019 10:26:23 +0000 (10:26 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp

index 940e5554bb331a0f9a9ea8859672ff34baac8b19..1117586549e50a0ca5a16183ff3a72798255c17a 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3379,7 +3379,7 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I,
  // we should move shifts to the same hand of 'and', i.e. rewrite as
  //   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
  // We are only interested in opposite logical shifts here.
-// One of the shifts can be truncated. For now, it can only be 'shl'.
+// One of the shifts can be truncated.
  // If we can, we want to end up creating 'lshr' shift.
  static Value *
  foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
@@ -3413,14 +3413,6 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
           "We did not look past any shifts while matching XShift though.");
    bool HadTrunc = WidestTy != I.getOperand(0)->getType();
  
-  if (HadTrunc) {
-    // We did indeed have a truncation. For now, let's only proceed if the 'shl'
-    // was truncated, since that does not require any extra legality checks.
-    // FIXME: trunc-of-lshr.
-    if (!match(YShift, m_Shl(m_Value(), m_Value())))
-      return nullptr;
-  }
-
    // If YShift is a 'lshr', swap the shifts around.
    if (match(YShift, m_LShr(m_Value(), m_Value())))
      std::swap(XShift, YShift);
@@ -3462,16 +3454,68 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
                        /*isNUW=*/false, SQ.getWithInstruction(&I)));
    if (!NewShAmt)
      return nullptr;
+  NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy);
+  unsigned WidestBitWidth = WidestTy->getScalarSizeInBits();
+
    // Is the new shift amount smaller than the bit width?
    // FIXME: could also rely on ConstantRange.
-  if (!match(NewShAmt, m_SpecificInt_ICMP(
-                           ICmpInst::Predicate::ICMP_ULT,
-                           APInt(NewShAmt->getType()->getScalarSizeInBits(),
-                                 WidestTy->getScalarSizeInBits()))))
+  if (!match(NewShAmt,
+             m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
+                                APInt(WidestBitWidth, WidestBitWidth))))
      return nullptr;
+
+  // An extra legality check is needed if we had trunc-of-lshr.
+  if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) {
+    auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ,
+                    WidestShift]() {
+      // It isn't obvious whether it's worth it to analyze non-constants here.
+      // Also, let's basically give up on non-splat cases, pessimizing vectors.
+      // If *any* of these preconditions matches we can perform the fold.
+      Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy()
+                                    ? NewShAmt->getSplatValue()
+                                    : NewShAmt;
+      // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold.
+      if (NewShAmtSplat &&
+          (NewShAmtSplat->isNullValue() ||
+           NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1))
+        return true;
+      // We consider *min* leading zeros so a single outlier
+      // blocks the transform as opposed to allowing it.
+      if (auto *C = dyn_cast<Constant>(NarrowestShift->getOperand(0))) {
+        KnownBits Known = computeKnownBits(C, SQ.DL);
+        unsigned MinLeadZero = Known.countMinLeadingZeros();
+        // If the value being shifted has at most lowest bit set we can fold.
+        unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
+        if (MaxActiveBits <= 1)
+          return true;
+        // Precondition:  NewShAmt u<= countLeadingZeros(C)
+        if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero))
+          return true;
+      }
+      if (auto *C = dyn_cast<Constant>(WidestShift->getOperand(0))) {
+        KnownBits Known = computeKnownBits(C, SQ.DL);
+        unsigned MinLeadZero = Known.countMinLeadingZeros();
+        // If the value being shifted has at most lowest bit set we can fold.
+        unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
+        if (MaxActiveBits <= 1)
+          return true;
+        // Precondition:  ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C)
+        if (NewShAmtSplat) {
+          APInt AdjNewShAmt =
+              (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger();
+          if (AdjNewShAmt.ule(MinLeadZero))
+            return true;
+        }
+      }
+      return false; // Can't tell if it's ok.
+    };
+    if (!CanFold())
+      return nullptr;
+  }
+
    // All good, we can do this fold.
-  NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy);
    X = Builder.CreateZExt(X, WidestTy);
+  Y = Builder.CreateZExt(Y, WidestTy);
    // The shift is the same that was for X.
    Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
                    ? Builder.CreateLShr(X, NewShAmt)
diff --git a/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll b/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll

index 56d4ee78f08261736d3b93f5a45854963117d910..118186f98a9e1964321d0cb8374375f357813059 100644 (file)
--- a/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll
+++ b/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll
@@ -42,15 +42,9 @@ define i1 @n0(i32 %x, i64 %y, i32 %len) {
  ; New shift amount would be 16, %x has 16 leading zeros - can fold.
  define i1 @t1(i64 %y, i32 %len) {
  ; CHECK-LABEL: @t1(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 65535, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -16
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
-; CHECK-NEXT:    [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[Y:%.*]], 4294901760
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
  ;
    %t0 = sub i32 32, %len
    %t1 = shl i32 65535, %t0
@@ -65,15 +59,9 @@ define i1 @t1(i64 %y, i32 %len) {
  ; Note that we indeed look at leading zeros!
  define i1 @t1_single_bit(i64 %y, i32 %len) {
  ; CHECK-LABEL: @t1_single_bit(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 32768, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -16
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
-; CHECK-NEXT:    [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[Y:%.*]], 2147483648
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
  ;
    %t0 = sub i32 32, %len
    %t1 = shl i32 32768, %t0
@@ -112,15 +100,9 @@ define i1 @n2(i64 %y, i32 %len) {
  ; New shift amount would be 16, %y has 47 leading zeros - can fold.
  define i1 @t3(i32 %x, i32 %len) {
  ; CHECK-LABEL: @t3(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -16
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
-; CHECK-NEXT:    [[T3:%.*]] = lshr i64 131071, [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
  ;
    %t0 = sub i32 32, %len
    %t1 = shl i32 %x, %t0
@@ -135,15 +117,9 @@ define i1 @t3(i32 %x, i32 %len) {
  ; Note that we indeed look at leading zeros!
  define i1 @t3_singlebit(i32 %x, i32 %len) {
  ; CHECK-LABEL: @t3_singlebit(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -16
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
-; CHECK-NEXT:    [[T3:%.*]] = lshr i64 65536, [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
  ;
    %t0 = sub i32 32, %len
    %t1 = shl i32 %x, %t0
@@ -189,15 +165,10 @@ define i1 @n4(i32 %x, i32 %len) {
  ; New shift amount would be 16, minimal count of leading zeros in %x is 16. Ok.
  define <2 x i1> @t5_vec(<2 x i64> %y, <2 x i32> %len) {
  ; CHECK-LABEL: @t5_vec(
-; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> <i32 32, i32 32>, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> <i32 65535, i32 32767>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[LEN]], <i32 -16, i32 -16>
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64>
-; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32>
-; CHECK-NEXT:    [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], <i64 16, i64 16>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP1]], <i64 65535, i64 32767>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
  ;
    %t0 = sub <2 x i32> <i32 32, i32 32>, %len
    %t1 = shl <2 x i32> <i32 65535, i32 32767>, %t0
@@ -236,15 +207,9 @@ define <2 x i1> @n6_vec(<2 x i64> %y, <2 x i32> %len) {
  ; New shift amount would be 16, minimal count of leading zeros in %x is 47. Ok.
  define <2 x i1> @t7_vec(<2 x i32> %x, <2 x i32> %len) {
  ; CHECK-LABEL: @t7_vec(
-; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> <i32 32, i32 32>, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[LEN]], <i32 -16, i32 -16>
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64>
-; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i64> <i64 131071, i64 65535>, [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32>
-; CHECK-NEXT:    [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
  ;
    %t0 = sub <2 x i32> <i32 32, i32 32>, %len
    %t1 = shl <2 x i32> %x, %t0
@@ -285,15 +250,11 @@ define <2 x i1> @n8_vec(<2 x i32> %x, <2 x i32> %len) {
  ; Ok if the final shift amount is exactly one less than widest bit width.
  define i1 @t9_highest_bit(i32 %x, i64 %y, i32 %len) {
  ; CHECK-LABEL: @t9_highest_bit(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 64, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -1
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
-; CHECK-NEXT:    [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[Y:%.*]], 63
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
+; CHECK-NEXT:    ret i1 [[TMP4]]
  ;
    %t0 = sub i32 64, %len
    %t1 = shl i32 %x, %t0
@@ -332,15 +293,10 @@ define i1 @t10_almost_highest_bit(i32 %x, i64 %y, i32 %len) {
  ; Ok if the final shift amount is zero.
  define i1 @t11_no_shift(i32 %x, i64 %y, i32 %len) {
  ; CHECK-LABEL: @t11_no_shift(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 64, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -64
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
-; CHECK-NEXT:    [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
  ;
    %t0 = sub i32 64, %len
    %t1 = shl i32 %x, %t0
@@ -427,15 +383,9 @@ define <2 x i1> @n12_bad(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) {
  ; Ok if one of the values being shifted is 1
  define i1 @t13_x_is_one(i64 %y, i32 %len) {
  ; CHECK-LABEL: @t13_x_is_one(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 1, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -16
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
-; CHECK-NEXT:    [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[Y:%.*]], 65536
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
  ;
    %t0 = sub i32 32, %len
    %t1 = shl i32 1, %t0
@@ -449,15 +399,7 @@ define i1 @t13_x_is_one(i64 %y, i32 %len) {
  }
  define i1 @t14_x_is_one(i32 %x, i32 %len) {
  ; CHECK-LABEL: @t14_x_is_one(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -16
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
-; CHECK-NEXT:    [[T3:%.*]] = lshr i64 1, [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    ret i1 false
  ;
    %t0 = sub i32 32, %len
    %t1 = shl i32 %x, %t0
@@ -472,15 +414,10 @@ define i1 @t14_x_is_one(i32 %x, i32 %len) {
  
  define <2 x i1> @t15_vec_x_is_one_or_zero(<2 x i64> %y, <2 x i32> %len) {
  ; CHECK-LABEL: @t15_vec_x_is_one_or_zero(
-; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> <i32 64, i32 64>, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> <i32 1, i32 0>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[LEN]], <i32 -16, i32 -16>
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64>
-; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32>
-; CHECK-NEXT:    [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], <i64 48, i64 48>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP1]], <i64 1, i64 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
  ;
    %t0 = sub <2 x i32> <i32 64, i32 64>, %len
    %t1 = shl <2 x i32> <i32 1, i32 0>, %t0
@@ -494,15 +431,7 @@ define <2 x i1> @t15_vec_x_is_one_or_zero(<2 x i64> %y, <2 x i32> %len) {
  }
  define <2 x i1> @t16_vec_y_is_one_or_zero(<2 x i32> %x, <2 x i32> %len) {
  ; CHECK-LABEL: @t16_vec_y_is_one_or_zero(
-; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> <i32 64, i32 64>, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[LEN]], <i32 -16, i32 -16>
-; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64>
-; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i64> <i64 1, i64 0>, [[T2_WIDE]]
-; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32>
-; CHECK-NEXT:    [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T5]]
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
  ;
    %t0 = sub <2 x i32> <i32 64, i32 64>, %len
    %t1 = shl <2 x i32> %x, %t0
@@ -524,15 +453,8 @@ define <2 x i1> @t16_vec_y_is_one_or_zero(<2 x i32> %x, <2 x i32> %len) {
  ; And that's the main motivational pattern:
  define i1 @rawspeed_signbit(i64 %storage, i32 %nbits) {
  ; CHECK-LABEL: @rawspeed_signbit(
-; CHECK-NEXT:    [[SKIPNBITS:%.*]] = sub nsw i32 64, [[NBITS:%.*]]
-; CHECK-NEXT:    [[SKIPNBITSWIDE:%.*]] = zext i32 [[SKIPNBITS]] to i64
-; CHECK-NEXT:    [[DATAWIDE:%.*]] = lshr i64 [[STORAGE:%.*]], [[SKIPNBITSWIDE]]
-; CHECK-NEXT:    [[DATA:%.*]] = trunc i64 [[DATAWIDE]] to i32
-; CHECK-NEXT:    [[NBITSMINUSONE:%.*]] = add nsw i32 [[NBITS]], -1
-; CHECK-NEXT:    [[BITMASK:%.*]] = shl i32 1, [[NBITSMINUSONE]]
-; CHECK-NEXT:    [[BITMASKED:%.*]] = and i32 [[BITMASK]], [[DATA]]
-; CHECK-NEXT:    [[ISBITUNSET:%.*]] = icmp eq i32 [[BITMASKED]], 0
-; CHECK-NEXT:    ret i1 [[ISBITUNSET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[STORAGE:%.*]], -1
+; CHECK-NEXT:    ret i1 [[TMP1]]
  ;
    %skipnbits = sub nsw i32 64, %nbits
    %skipnbitswide = zext i32 %skipnbits to i64
diff --git a/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll b/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll

index fbad6d1d65190b394848187c1d063b22dc668016..0c3ee460948c004889fc23968d1dccb19c6c6115 100644 (file)
--- a/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll
+++ b/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll
@@ -6,8 +6,8 @@
  ; we should move shifts to the same hand of 'and', i.e. e.g. rewrite as
  ;   icmp eq/ne (and (((x shift Q) shift K), y)), 0
  ; We are only interested in opposite logical shifts here.
-; We still can handle the case where there is a truncation between a shift
-; and an 'and', but for now only if it's 'shl' - simpler legality check.
+; We still can handle the case where there is a truncation between a shift and
+; an 'and'. If it's trunc-of-shl - no extra legality check is needed.
  
  ;-------------------------------------------------------------------------------
  ; Basic scalar tests
author	Roman Lebedev <lebedev.ri@gmail.com>
	Thu, 29 Aug 2019 10:26:23 +0000 (10:26 +0000)
committer	Roman Lebedev <lebedev.ri@gmail.com>
	Thu, 29 Aug 2019 10:26:23 +0000 (10:26 +0000)
lib/Transforms/InstCombine/InstCombineCompares.cpp		patch \| blob \| history
test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll		patch \| blob \| history
test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll		patch \| blob \| history