From b1eb7bb6ec1a90b6b7f774098e309a93beda2bd4 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 5 Oct 2017 21:11:49 +0000 Subject: [PATCH] [InstCombine] improve folds for icmp gt/lt (shr X, C1), C2 We can always eliminate the shift in: icmp gt/lt (shr X, C1), C2 --> icmp gt/lt X, C' This patch was supposed to just be an efficiency improvement because we were doing this 3-step process to fold: IC: Visiting: %c = icmp ugt i4 %s, 1 IC: ADD: %s = lshr i4 %x, 1 IC: ADD: %1 = udiv i4 %x, 2 IC: Old = %c = icmp ugt i4 %1, 1 New = = icmp uge i4 %x, 4 IC: ADD: %c = icmp uge i4 %x, 4 IC: ERASE %2 = icmp ugt i4 %1, 1 IC: Visiting: %c = icmp uge i4 %x, 4 IC: Old = %c = icmp uge i4 %x, 4 New = = icmp ugt i4 %x, 3 IC: ADD: %c = icmp ugt i4 %x, 3 IC: ERASE %2 = icmp uge i4 %x, 4 IC: Visiting: %c = icmp ugt i4 %x, 3 IC: DCE: %1 = udiv i4 %x, 2 IC: ERASE %1 = udiv i4 %x, 2 IC: DCE: %s = lshr i4 %x, 1 IC: ERASE %s = lshr i4 %x, 1 IC: Visiting: ret i1 %c When we could go directly to canonical icmp form: IC: Visiting: %c = icmp ugt i4 %s, 1 IC: Old = %c = icmp ugt i4 %s, 1 New = = icmp ugt i4 %x, 3 IC: ADD: %c = icmp ugt i4 %x, 3 IC: ERASE %1 = icmp ugt i4 %s, 1 IC: ADD: %s = lshr i4 %x, 1 IC: DCE: %s = lshr i4 %x, 1 IC: ERASE %s = lshr i4 %x, 1 IC: Visiting: %c = icmp ugt i4 %x, 3 ...but then I noticed that the folds were incomplete too: https://godbolt.org/g/aB2hLE Here are attempts to prove the logic with Alive: https://rise4fun.com/Alive/92o Name: lshr_ult Pre: ((C2 << C1) u>> C1) == C2 %sh = lshr i8 %x, C1 %r = icmp ult i8 %sh, C2 => %r = icmp ult i8 %x, (C2 << C1) Name: ashr_slt Pre: ((C2 << C1) >> C1) == C2 %sh = ashr i8 %x, C1 %r = icmp slt i8 %sh, C2 => %r = icmp slt i8 %x, (C2 << C1) Name: lshr_ugt Pre: (((C2+1) << C1) u>> C1) == (C2+1) %sh = lshr i8 %x, C1 %r = icmp ugt i8 %sh, C2 => %r = icmp ugt i8 %x, ((C2+1) << C1) - 1 Name: ashr_sgt Pre: (C2 != 127) && ((C2+1) << C1 != -128) && (((C2+1) << C1) >> C1) == (C2+1) %sh = ashr i8 %x, C1 %r = icmp sgt i8 %sh, C2 => %r = icmp sgt i8 %x, ((C2+1) << C1) - 1 Name: ashr_exact_sgt Pre: ((C2 << C1) >> C1) == C2 %sh = ashr exact i8 %x, C1 %r = icmp sgt i8 %sh, C2 => %r = icmp sgt i8 %x, (C2 << C1) Name: ashr_exact_slt Pre: ((C2 << C1) >> C1) == C2 %sh = ashr exact i8 %x, C1 %r = icmp slt i8 %sh, C2 => %r = icmp slt i8 %x, (C2 << C1) Name: lshr_exact_ugt Pre: ((C2 << C1) u>> C1) == C2 %sh = lshr exact i8 %x, C1 %r = icmp ugt i8 %sh, C2 => %r = icmp ugt i8 %x, (C2 << C1) Name: lshr_exact_ult Pre: ((C2 << C1) u>> C1) == C2 %sh = lshr exact i8 %x, C1 %r = icmp ult i8 %sh, C2 => %r = icmp ult i8 %x, (C2 << C1) We did something similar for 'shl' in D28406. Differential Revision: https://reviews.llvm.org/D38514 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315021 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCompares.cpp | 77 ++++++++++--------- test/Transforms/InstCombine/icmp-shr-lt-gt.ll | 48 ++++-------- 2 files changed, 56 insertions(+), 69 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 20d8a2785b8..67b5e5c130b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2011,43 +2011,46 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, return nullptr; bool IsAShr = Shr->getOpcode() == Instruction::AShr; - if (!Cmp.isEquality()) { - // If we have an unsigned comparison and an ashr, we can't simplify this. - // Similarly for signed comparisons with lshr. - if (Cmp.isSigned() != IsAShr) - return nullptr; - - // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv - // by a power of 2. Since we already have logic to simplify these, - // transform to div and then simplify the resultant comparison. - if (IsAShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1)) - return nullptr; - - // Revisit the shift (to delete it). - Worklist.Add(Shr); - - Constant *DivCst = ConstantInt::get( - Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); - - Value *Tmp = IsAShr ? Builder.CreateSDiv(X, DivCst, "", Shr->isExact()) - : Builder.CreateUDiv(X, DivCst, "", Shr->isExact()); - - Cmp.setOperand(0, Tmp); - - // If the builder folded the binop, just return it. - BinaryOperator *TheDiv = dyn_cast(Tmp); - if (!TheDiv) - return &Cmp; - - // Otherwise, fold this div/compare. - assert(TheDiv->getOpcode() == Instruction::SDiv || - TheDiv->getOpcode() == Instruction::UDiv); - - Instruction *Res = foldICmpDivConstant(Cmp, TheDiv, C); - assert(Res && "This div/cst should have folded!"); - return Res; + bool IsExact = Shr->isExact(); + Type *ShrTy = Shr->getType(); + // TODO: If we could guarantee that InstSimplify would handle all of the + // constant-value-based preconditions in the folds below, then we could assert + // those conditions rather than checking them. This is difficult because of + // undef/poison (PR34838). + if (IsAShr) { + if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) { + // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC) + // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC) + APInt ShiftedC = C.shl(ShAmtVal); + if (ShiftedC.ashr(ShAmtVal) == C) + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); + } + if (Pred == CmpInst::ICMP_SGT) { + // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1 + APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; + if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() && + (ShiftedC + 1).ashr(ShAmtVal) == (C + 1)) + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); + } + } else { + if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) { + // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC) + // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC) + APInt ShiftedC = C.shl(ShAmtVal); + if (ShiftedC.lshr(ShAmtVal) == C) + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); + } + if (Pred == CmpInst::ICMP_UGT) { + // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1 + APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; + if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1)) + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); + } } + if (!Cmp.isEquality()) + return nullptr; + // Handle equality comparisons of shift-by-constant. // If the comparison constant changes with the shift, the comparison cannot @@ -2060,14 +2063,14 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, // Check if the bits shifted out are known to be zero. If so, we can compare // against the unshifted value: // (X & 4) >> 1 == 2 --> (X & 4) == 4. - Constant *ShiftedCmpRHS = ConstantInt::get(Shr->getType(), C << ShAmtVal); + Constant *ShiftedCmpRHS = ConstantInt::get(ShrTy, C << ShAmtVal); if (Shr->hasOneUse()) { if (Shr->isExact()) return new ICmpInst(Pred, X, ShiftedCmpRHS); // Otherwise strength reduce the shift into an 'and'. APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = ConstantInt::get(Shr->getType(), Val); + Constant *Mask = ConstantInt::get(ShrTy, Val); Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask"); return new ICmpInst(Pred, And, ShiftedCmpRHS); } diff --git a/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/test/Transforms/InstCombine/icmp-shr-lt-gt.ll index 8a378717f50..bf1a031a412 100644 --- a/test/Transforms/InstCombine/icmp-shr-lt-gt.ll +++ b/test/Transforms/InstCombine/icmp-shr-lt-gt.ll @@ -888,8 +888,7 @@ define i1 @lshrult_03_15(i4 %x) { define i1 @ashrsgt_01_00(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_00( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 0 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 1 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -899,8 +898,7 @@ define i1 @ashrsgt_01_00(i4 %x) { define i1 @ashrsgt_01_01(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_01( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 3 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -910,8 +908,7 @@ define i1 @ashrsgt_01_01(i4 %x) { define i1 @ashrsgt_01_02(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_02( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 2 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 5 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1002,8 +999,7 @@ define i1 @ashrsgt_01_11(i4 %x) { define i1 @ashrsgt_01_12(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_12( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], -4 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -7 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1013,8 +1009,7 @@ define i1 @ashrsgt_01_12(i4 %x) { define i1 @ashrsgt_01_13(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_13( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], -3 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -5 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1024,8 +1019,7 @@ define i1 @ashrsgt_01_13(i4 %x) { define i1 @ashrsgt_01_14(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_14( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], -2 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -3 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1045,8 +1039,7 @@ define i1 @ashrsgt_01_15(i4 %x) { define i1 @ashrsgt_02_00(i4 %x) { ; CHECK-LABEL: @ashrsgt_02_00( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 2 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 0 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 3 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 2 @@ -1173,8 +1166,7 @@ define i1 @ashrsgt_02_13(i4 %x) { define i1 @ashrsgt_02_14(i4 %x) { ; CHECK-LABEL: @ashrsgt_02_14( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 2 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], -2 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -5 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 2 @@ -1349,8 +1341,7 @@ define i1 @ashrslt_01_00(i4 %x) { define i1 @ashrslt_01_01(i4 %x) { ; CHECK-LABEL: @ashrslt_01_01( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], 1 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 2 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1360,8 +1351,7 @@ define i1 @ashrslt_01_01(i4 %x) { define i1 @ashrslt_01_02(i4 %x) { ; CHECK-LABEL: @ashrslt_01_02( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], 2 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1371,8 +1361,7 @@ define i1 @ashrslt_01_02(i4 %x) { define i1 @ashrslt_01_03(i4 %x) { ; CHECK-LABEL: @ashrslt_01_03( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], 3 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 6 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1463,8 +1452,7 @@ define i1 @ashrslt_01_12(i4 %x) { define i1 @ashrslt_01_13(i4 %x) { ; CHECK-LABEL: @ashrslt_01_13( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], -3 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -6 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1474,8 +1462,7 @@ define i1 @ashrslt_01_13(i4 %x) { define i1 @ashrslt_01_14(i4 %x) { ; CHECK-LABEL: @ashrslt_01_14( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], -2 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1485,8 +1472,7 @@ define i1 @ashrslt_01_14(i4 %x) { define i1 @ashrslt_01_15(i4 %x) { ; CHECK-LABEL: @ashrslt_01_15( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1 -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], -1 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -2 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1506,8 +1492,7 @@ define i1 @ashrslt_02_00(i4 %x) { define i1 @ashrslt_02_01(i4 %x) { ; CHECK-LABEL: @ashrslt_02_01( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 2 -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], 1 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 2 @@ -1634,8 +1619,7 @@ define i1 @ashrslt_02_14(i4 %x) { define i1 @ashrslt_02_15(i4 %x) { ; CHECK-LABEL: @ashrslt_02_15( -; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 2 -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], -1 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 2 -- 2.40.0