[InstCombine] Simplify cttz/ctlz + icmp ugt/ult

author Nikita Popov <nikita.ppv@gmail.com>

Sat, 19 Jan 2019 09:56:01 +0000 (09:56 +0000)

committer Nikita Popov <nikita.ppv@gmail.com>

Sat, 19 Jan 2019 09:56:01 +0000 (09:56 +0000)
author Nikita Popov <nikita.ppv@gmail.com>
Sat, 19 Jan 2019 09:56:01 +0000 (09:56 +0000)
committer Nikita Popov <nikita.ppv@gmail.com>
Sat, 19 Jan 2019 09:56:01 +0000 (09:56 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp

index c3a7b62568c205dad089954240972d57fab1f70f..81a89e67ea027a6a9abdd0aefc80dab41c57b0cd 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2610,8 +2610,9 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
        return I;
    }
  
-  if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
-    return I;
+  if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
+    if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
+      return I;
  
    return nullptr;
  }
@@ -2755,14 +2756,10 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
    return nullptr;
  }
  
-/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
-Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
-                                                         const APInt &C) {
-  IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0));
-  if (!II || !Cmp.isEquality())
-    return nullptr;
-
-  // Handle icmp {eq|ne} <intrinsic>, Constant.
+/// Fold an equality icmp with LLVM intrinsic and constant operand.
+Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp,
+                                                           IntrinsicInst *II,
+                                                           const APInt &C) {
    Type *Ty = II->getType();
    unsigned BitWidth = C.getBitWidth();
    switch (II->getIntrinsicID()) {
@@ -2822,6 +2819,65 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
    return nullptr;
  }
  
+/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
+Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
+                                                         IntrinsicInst *II,
+                                                         const APInt &C) {
+  if (Cmp.isEquality())
+    return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
+
+  Type *Ty = II->getType();
+  unsigned BitWidth = C.getBitWidth();
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::ctlz: {
+    // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
+    if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+      unsigned Num = C.getLimitedValue();
+      APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
+      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
+                             II->getArgOperand(0), ConstantInt::get(Ty, Limit));
+    }
+
+    // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
+    if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
+        C.uge(1) && C.ule(BitWidth)) {
+      unsigned Num = C.getLimitedValue();
+      APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
+      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
+                             II->getArgOperand(0), ConstantInt::get(Ty, Limit));
+    }
+    break;
+  }
+  case Intrinsic::cttz: {
+    // Limit to one use to ensure we don't increase instruction count.
+    if (!II->hasOneUse())
+      return nullptr;
+
+    // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
+    if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+      APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
+      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
+                             Builder.CreateAnd(II->getArgOperand(0), Mask),
+                             ConstantInt::getNullValue(Ty));
+    }
+
+    // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
+    if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
+        C.uge(1) && C.ule(BitWidth)) {
+      APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
+      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
+                             Builder.CreateAnd(II->getArgOperand(0), Mask),
+                             ConstantInt::getNullValue(Ty));
+    }
+    break;
+  }
+  default:
+    break;
+  }
+
+  return nullptr;
+}
+
  /// Handle icmp with constant (but not simple integer constant) RHS.
  Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h

index fe28ee78de1bb4b79acb26bae499c055f2e44b15..51e182ef1879c5b8ef0f2f2346a2c31e7ee6dcbc 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -903,7 +903,10 @@ private:
    Instruction *foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
                                                   BinaryOperator *BO,
                                                   const APInt &C);
-  Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, const APInt &C);
+  Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
+                                             const APInt &C);
+  Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
+                                               const APInt &C);
  
    // Helpers of visitSelectInst().
    Instruction *foldSelectExtConst(SelectInst &Sel);
diff --git a/test/Transforms/InstCombine/cmp-intrinsic.ll b/test/Transforms/InstCombine/cmp-intrinsic.ll

index d9199ce3ff454cab5e45c775a835145355bd0f88..82b32ee1b7f3e85a8684d874c60ead9fb335ebc7 100644 (file)
--- a/test/Transforms/InstCombine/cmp-intrinsic.ll
+++ b/test/Transforms/InstCombine/cmp-intrinsic.ll
@@ -149,8 +149,7 @@ define i1 @ctlz_ugt_zero_i32(i32 %x) {
  
  define i1 @ctlz_ugt_one_i32(i32 %x) {
  ; CHECK-LABEL: @ctlz_ugt_one_i32(
-; CHECK-NEXT:    [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[LZ]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 1073741824
  ; CHECK-NEXT:    ret i1 [[CMP]]
  ;
    %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -160,8 +159,7 @@ define i1 @ctlz_ugt_one_i32(i32 %x) {
  
  define i1 @ctlz_ugt_other_i32(i32 %x) {
  ; CHECK-LABEL: @ctlz_ugt_other_i32(
-; CHECK-NEXT:    [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[LZ]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32768
  ; CHECK-NEXT:    ret i1 [[CMP]]
  ;
    %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -173,7 +171,7 @@ define i1 @ctlz_ugt_other_multiuse_i32(i32 %x, i32* %p) {
  ; CHECK-LABEL: @ctlz_ugt_other_multiuse_i32(
  ; CHECK-NEXT:    [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
  ; CHECK-NEXT:    store i32 [[LZ]], i32* [[P:%.*]], align 4
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[LZ]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X]], 32768
  ; CHECK-NEXT:    ret i1 [[CMP]]
  ;
    %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -184,8 +182,7 @@ define i1 @ctlz_ugt_other_multiuse_i32(i32 %x, i32* %p) {
  
  define i1 @ctlz_ugt_bw_minus_one_i32(i32 %x) {
  ; CHECK-LABEL: @ctlz_ugt_bw_minus_one_i32(
-; CHECK-NEXT:    [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[LZ]], 31
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
  ; CHECK-NEXT:    ret i1 [[CMP]]
  ;
    %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -205,8 +202,7 @@ define <2 x i1> @ctlz_ult_one_v2i32(<2 x i32> %x) {
  
  define <2 x i1> @ctlz_ult_other_v2i32(<2 x i32> %x) {
  ; CHECK-LABEL: @ctlz_ult_other_v2i32(
-; CHECK-NEXT:    [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 16, i32 16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -218,7 +214,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
  ; CHECK-LABEL: @ctlz_ult_other_multiuse_v2i32(
  ; CHECK-NEXT:    [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
  ; CHECK-NEXT:    store <2 x i32> [[LZ]], <2 x i32>* [[P:%.*]], align 8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 16, i32 16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], <i32 65535, i32 65535>
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -229,8 +225,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
  
  define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
  ; CHECK-LABEL: @ctlz_ult_bw_minus_one_v2i32(
-; CHECK-NEXT:    [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 31, i32 31>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 1, i32 1>
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -240,8 +235,7 @@ define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
  
  define <2 x i1> @ctlz_ult_bitwidth_v2i32(<2 x i32> %x) {
  ; CHECK-LABEL: @ctlz_ult_bitwidth_v2i32(
-; CHECK-NEXT:    [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 32, i32 32>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -359,8 +353,8 @@ define i1 @cttz_ugt_zero_i33(i33 %x) {
  
  define i1 @cttz_ugt_one_i33(i33 %x) {
  ; CHECK-LABEL: @cttz_ugt_one_i33(
-; CHECK-NEXT:    [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i33 [[TZ]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i33 [[X:%.*]], 3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0
  ; CHECK-NEXT:    ret i1 [[CMP]]
  ;
    %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
@@ -370,8 +364,8 @@ define i1 @cttz_ugt_one_i33(i33 %x) {
  
  define i1 @cttz_ugt_other_i33(i33 %x) {
  ; CHECK-LABEL: @cttz_ugt_other_i33(
-; CHECK-NEXT:    [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i33 [[TZ]], 16
+; CHECK-NEXT:    [[TMP1:%.*]] = and i33 [[X:%.*]], 131071
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0
  ; CHECK-NEXT:    ret i1 [[CMP]]
  ;
    %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
@@ -394,8 +388,7 @@ define i1 @cttz_ugt_other_multiuse_i33(i33 %x, i33* %p) {
  
  define i1 @cttz_ugt_bw_minus_one_i33(i33 %x) {
  ; CHECK-LABEL: @cttz_ugt_bw_minus_one_i33(
-; CHECK-NEXT:    [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i33 [[TZ]], 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i33 [[X:%.*]], 0
  ; CHECK-NEXT:    ret i1 [[CMP]]
  ;
    %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
@@ -415,8 +408,8 @@ define <2 x i1> @cttz_ult_one_v2i32(<2 x i32> %x) {
  
  define <2 x i1> @cttz_ult_other_v2i32(<2 x i32> %x) {
  ; CHECK-LABEL: @cttz_ult_other_v2i32(
-; CHECK-NEXT:    [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
@@ -439,8 +432,8 @@ define <2 x i1> @cttz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
  
  define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
  ; CHECK-LABEL: @cttz_ult_bw_minus_one_v2i32(
-; CHECK-NEXT:    [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
@@ -450,8 +443,7 @@ define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
  
  define <2 x i1> @cttz_ult_bitwidth_v2i32(<2 x i32> %x) {
  ; CHECK-LABEL: @cttz_ult_bitwidth_v2i32(
-; CHECK-NEXT:    [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 32, i32 32>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
author	Nikita Popov <nikita.ppv@gmail.com>
	Sat, 19 Jan 2019 09:56:01 +0000 (09:56 +0000)
committer	Nikita Popov <nikita.ppv@gmail.com>
	Sat, 19 Jan 2019 09:56:01 +0000 (09:56 +0000)
lib/Transforms/InstCombine/InstCombineCompares.cpp		patch \| blob \| history
lib/Transforms/InstCombine/InstCombineInternal.h		patch \| blob \| history
test/Transforms/InstCombine/cmp-intrinsic.ll		patch \| blob \| history