return Count;
}
-/// Perform a logical right-shift from Src to Dst, which must be equal or
-/// non-overlapping, of Words words, by Shift, which must be less than 64.
-static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words,
- unsigned Shift) {
- uint64_t Carry = 0;
- for (int I = Words - 1; I >= 0; --I) {
- uint64_t Tmp = Src[I];
- Dst[I] = (Tmp >> Shift) | Carry;
- Carry = Tmp << (64 - Shift);
- }
-}
-
APInt APInt::byteSwap() const {
assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
if (BitWidth == 16)
for (unsigned I = 0, N = getNumWords(); I != N; ++I)
Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
if (Result.BitWidth != BitWidth) {
- lshrNear(Result.pVal, Result.pVal, getNumWords(),
- Result.BitWidth - BitWidth);
+ Result.lshrInPlace(Result.BitWidth - BitWidth);
Result.BitWidth = BitWidth;
}
return Result;
}
APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
- while (!!B) {
- APInt R = A.urem(B);
- A = std::move(B);
- B = std::move(R);
+ // Fast-path a common case.
+ if (A == B) return A;
+
+ // Corner cases: if either operand is zero, the other is the gcd.
+ if (!A) return B;
+ if (!B) return A;
+
+ // Count common powers of 2 and remove all other powers of 2.
+ unsigned Pow2;
+ {
+ unsigned Pow2_A = A.countTrailingZeros();
+ unsigned Pow2_B = B.countTrailingZeros();
+ if (Pow2_A > Pow2_B) {
+ A.lshrInPlace(Pow2_A - Pow2_B);
+ Pow2 = Pow2_B;
+ } else if (Pow2_B > Pow2_A) {
+ B.lshrInPlace(Pow2_B - Pow2_A);
+ Pow2 = Pow2_A;
+ } else {
+ Pow2 = Pow2_A;
+ }
+ }
+
+ // Both operands are odd multiples of 2^Pow_2:
+ //
+ // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
+ //
+ // This is a modified version of Stein's algorithm, taking advantage of
+ // efficient countTrailingZeros().
+ while (A != B) {
+ if (A.ugt(B)) {
+ A -= B;
+ A.lshrInPlace(A.countTrailingZeros() - Pow2);
+ } else {
+ B -= A;
+ B.lshrInPlace(B.countTrailingZeros() - Pow2);
+ }
}
+
return A;
}
return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
}
+/// Perform a logical right-shift from Src to Dst of Words words, by Shift,
+/// which must be less than 64. If the source and destination ranges overlap,
+/// we require that Src >= Dst (put another way, we require that the overall
+/// operation is a right shift on the combined range).
+static void lshrWords(APInt::WordType *Dst, APInt::WordType *Src,
+ unsigned Words, unsigned Shift) {
+ assert(Shift < APInt::APINT_BITS_PER_WORD);
+
+ if (!Words)
+ return;
+
+ if (Shift == 0) {
+ std::memmove(Dst, Src, Words * APInt::APINT_WORD_SIZE);
+ return;
+ }
+
+ uint64_t Low = Src[0];
+ for (unsigned I = 1; I != Words; ++I) {
+ uint64_t High = Src[I];
+ Dst[I - 1] =
+ (Low >> Shift) | (High << (APInt::APINT_BITS_PER_WORD - Shift));
+ Low = High;
+ }
+ Dst[Words - 1] = Low >> Shift;
+}
+
/// Logical right-shift this APInt by shiftAmt.
/// @brief Logical right-shift function.
-APInt APInt::lshr(unsigned shiftAmt) const {
+void APInt::lshrInPlace(unsigned shiftAmt) {
if (isSingleWord()) {
if (shiftAmt >= BitWidth)
- return APInt(BitWidth, 0);
+ VAL = 0;
else
- return APInt(BitWidth, this->VAL >> shiftAmt);
- }
-
- // If all the bits were shifted out, the result is 0. This avoids issues
- // with shifting by the size of the integer type, which produces undefined
- // results. We define these "undefined results" to always be 0.
- if (shiftAmt >= BitWidth)
- return APInt(BitWidth, 0);
-
- // If none of the bits are shifted out, the result is *this. This avoids
- // issues with shifting by the size of the integer type, which produces
- // undefined results in the code below. This is also an optimization.
- if (shiftAmt == 0)
- return *this;
-
- // Create some space for the result.
- uint64_t * val = new uint64_t[getNumWords()];
-
- // If we are shifting less than a word, compute the shift with a simple carry
- if (shiftAmt < APINT_BITS_PER_WORD) {
- lshrNear(val, pVal, getNumWords(), shiftAmt);
- APInt Result(val, BitWidth);
- Result.clearUnusedBits();
- return Result;
+ VAL >>= shiftAmt;
+ return;
}
- // Compute some values needed by the remaining shift algorithms
- unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
- unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+ // Don't bother performing a no-op shift.
+ if (!shiftAmt)
+ return;
- // If we are shifting whole words, just move whole words
- if (wordShift == 0) {
- for (unsigned i = 0; i < getNumWords() - offset; ++i)
- val[i] = pVal[i+offset];
- for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
- val[i] = 0;
- APInt Result(val, BitWidth);
- Result.clearUnusedBits();
- return Result;
- }
+ // Find number of complete words being shifted out and zeroed.
+ const unsigned Words = getNumWords();
+ const unsigned ShiftFullWords =
+ std::min(shiftAmt / APINT_BITS_PER_WORD, Words);
- // Shift the low order words
- unsigned breakWord = getNumWords() - offset -1;
- for (unsigned i = 0; i < breakWord; ++i)
- val[i] = (pVal[i+offset] >> wordShift) |
- (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
- // Shift the break word.
- val[breakWord] = pVal[breakWord+offset] >> wordShift;
+ // Fill in first Words - ShiftFullWords by shifting.
+ lshrWords(pVal, pVal + ShiftFullWords, Words - ShiftFullWords,
+ shiftAmt - (ShiftFullWords * APINT_BITS_PER_WORD));
- // Remaining words are 0
- for (unsigned i = breakWord+1; i < getNumWords(); ++i)
- val[i] = 0;
- APInt Result(val, BitWidth);
- Result.clearUnusedBits();
- return Result;
+ // The remaining high words are all zero.
+ for (unsigned I = Words - ShiftFullWords; I != Words; ++I)
+ pVal[I] = 0;
}
/// Left-shift this APInt by shiftAmt.
Constant *C = Builder->getInt(CI->getValue()-1);
return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
}
+#if 0
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS) {
+ ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+ const APInt &CmpRHSV = CmpRHS->getValue();
+
+ // FIXME: If the operand types don't match the type of the divide
+ // then don't attempt this transform. The code below doesn't have the
+ // logic to deal with a signed divide and an unsigned compare (and
+ // vice versa). This is because (x /s C1) <s C2 produces different
+ // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+ // (x /u C1) <u C2. Simply casting the operands and result won't
+ // work. :( The if statement below tests that condition and bails
+ // if it finds it.
+ bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+ if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+ return nullptr;
+ if (DivRHS->isZero())
+ return nullptr; // The ProdOV computation fails on divide by zero.
+ if (DivIsSigned && DivRHS->isAllOnesValue())
+ return nullptr; // The overflow computation also screws up here
+ if (DivRHS->isOne()) {
+ // This eliminates some funny cases with INT_MIN.
+ ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X.
+ return &ICI;
+ }
+
+ // Compute Prod = CI * DivRHS. We are essentially solving an equation
+ // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
+ // C2 (CI). By solving for X we can turn this into a range check
+ // instead of computing a divide.
+ Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+
+ // Determine if the product overflows by seeing if the product is
+ // not equal to the divide. Make sure we do the same kind of divide
+ // as in the LHS instruction that we're folding.
+ bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+ ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+ // Get the ICmp opcode
+ ICmpInst::Predicate Pred = ICI.getPredicate();
+
+ /// If the division is known to be exact, then there is no remainder from the
+ /// divide, so the covered range size is unit, otherwise it is the divisor.
+ ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+
+ // Figure out the interval that is being checked. For example, a comparison
+ // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
+ // Compute this interval based on the constants involved and the signedness of
+ // the compare/divide. This computes a half-open interval, keeping track of
+ // whether either value in the interval overflows. After analysis each
+ // overflow variable is set to 0 if it's corresponding bound variable is valid
+ // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+ int LoOverflow = 0, HiOverflow = 0;
+ Constant *LoBound = nullptr, *HiBound = nullptr;
+
+ if (!DivIsSigned) { // udiv
+ // e.g. X/5 op 3 --> [15, 20)
+ LoBound = Prod;
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow) {
+ // If this is not an exact divide, then many values in the range collapse
+ // to the same result value.
+ HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+ }
+ } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+ if (CmpRHSV == 0) { // (X / pos) op 0
+ // Can't overflow. e.g. X/2 op 0 --> [-1, 2)
+ LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+ HiBound = RangeSize;
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos
+ LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
+ } else { // (X / pos) op neg
+ // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
+ HiBound = AddOne(Prod);
+ LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow) {
+ ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+ }
+ }
+ } else if (DivRHS->isNegative()) { // Divisor is < 0.
+ if (DivI->isExact())
+ RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ if (CmpRHSV == 0) { // (X / neg) op 0
+ // e.g. X/-5 op 0 --> [-4, 5)
+ LoBound = AddOne(RangeSize);
+ HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ if (HiBound == DivRHS) { // -INTMIN = INTMIN
+ HiOverflow = 1; // [INTMIN+1, overflow)
+ HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN
+ }
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos
+ // e.g. X/-5 op 3 --> [-19, -14)
+ HiBound = AddOne(Prod);
+ HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow)
+ LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
+ } else { // (X / neg) op neg
+ LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
+ LoOverflow = HiOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
+ }
+
+ // Dividing by a negative swaps the condition. LT <-> GT
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ Value *X = DivI->getOperand(0);
+ switch (Pred) {
+ default: llvm_unreachable("Unhandled icmp opcode!");
+ case ICmpInst::ICMP_EQ:
+ if (LoOverflow && HiOverflow)
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, LoBound);
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, HiBound);
+ return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, true));
+ case ICmpInst::ICMP_NE:
+ if (LoOverflow && HiOverflow)
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, LoBound);
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, HiBound);
+ return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, false));
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ if (LoOverflow == +1) // Low bound is greater than input range.
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
+ if (LoOverflow == -1) // Low bound is less than input range.
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
+ return new ICmpInst(Pred, X, LoBound);
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ if (HiOverflow == +1) // High bound greater than input range.
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
+ if (HiOverflow == -1) // High bound less than input range.
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
+ if (Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+ return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+ }
+}
+
+/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+ ConstantInt *ShAmt) {
+ const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ uint32_t TypeBits = CmpRHSV.getBitWidth();
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+ return nullptr;
+
+ if (!ICI.isEquality()) {
+ // If we have an unsigned comparison and an ashr, we can't simplify this.
+ // Similarly for signed comparisons with lshr.
+ if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+ return nullptr;
+
+ // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
+ // by a power of 2. Since we already have logic to simplify these,
+ // transform to div and then simplify the resultant comparison.
+ if (Shr->getOpcode() == Instruction::AShr &&
+ (!Shr->isExact() || ShAmtVal == TypeBits - 1))
+ return nullptr;
+
+ // Revisit the shift (to delete it).
+ Worklist.Add(Shr);
+
+ Constant *DivCst =
+ ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+
+ Value *Tmp =
+ Shr->getOpcode() == Instruction::AShr ?
+ Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+ Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+
+ ICI.setOperand(0, Tmp);
+
+ // If the builder folded the binop, just return it.
+ BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+ if (!TheDiv)
+ return &ICI;
+
+ // Otherwise, fold this div/compare.
+ assert(TheDiv->getOpcode() == Instruction::SDiv ||
+ TheDiv->getOpcode() == Instruction::UDiv);
+
+ Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+ assert(Res && "This div/cst should have folded!");
+ return Res;
+ }
+
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ APInt Comp = CmpRHSV << ShAmtVal;
+ ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp);
+ if (Shr->getOpcode() == Instruction::LShr)
+ Comp = Comp.lshr(ShAmtVal);
+ else
+ Comp = Comp.ashr(ShAmtVal);
+
+ if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst = Builder->getInt1(IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ // Otherwise, check to see if the bits shifted out are known to be zero.
+ // If so, we can compare against the unshifted value:
+ // (X & 4) >> 1 == 2 --> (X & 4) == 4.
+ if (Shr->hasOneUse() && Shr->isExact())
+ return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+
+ if (Shr->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+ Constant *Mask = Builder->getInt(Val);
+
+ Value *And = Builder->CreateAnd(Shr->getOperand(0),
+ Mask, Shr->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
+ }
+ return nullptr;
+}
+#endif
+namespace {
+/// Models a check that LHS is divisible by Factor.
+class DivisibilityCheck {
+ // Signedness of the check. A bitwise and is a divisibility check,
+ // if its mask is (equivalent to) a power of 2 mask.
+ enum { DC_Null, DC_SRem, DC_URem, DC_And } Kind;
+ Value *Check;
+ Value *LHS;
+ ConstantInt *Factor;
+
+public:
+ DivisibilityCheck() : Kind(DC_Null) {}
+
+ /// Try to extract a divisibility check from V, on the assumption
+ /// that it is being compared to 0.
+ bool match(Value *V) {
+ Kind = DC_Null;
+ Check = V;
+ if (::match(V, m_SRem(m_Value(LHS), m_ConstantInt(Factor))))
+ Kind = DC_SRem;
+ else if (::match(V, m_URem(m_Value(LHS), m_ConstantInt(Factor))))
+ Kind = DC_URem;
+ else if (::match(V, m_And(m_Value(LHS), m_ConstantInt(Factor))))
+ Kind = DC_And;
+ return Kind != DC_Null;
+ }
+
+ /// Merge another divisibility check into this one.
+ bool merge(const DivisibilityCheck &O) {
+ assert(Kind != DC_Null && O.Kind != DC_Null);
+ if (LHS != O.LHS)
+ // We don't have two divisibility checks on the same operand.
+ return false;
+
+ if (!(Check->hasOneUse() && Kind != DC_And) &&
+ !(O.Check->hasOneUse() && O.Kind != DC_And))
+ // We would not remove a division: bail out.
+ return false;
+
+ // Determine the factors we're checking for.
+ bool Failed = false;
+ APInt LHS = getFactor(O, Failed);
+ APInt RHS = O.getFactor(*this, Failed);
+ if (Failed)
+ return false;
+
+ // If we don't have a single signedness, we can fold the checks
+ // together if one of them is for a power of 2, because
+ // divisibility by a power of 2 is the same for srem and urem.
+ if (Kind != O.Kind && O.Kind != DC_And && LHS.isPowerOf2())
+ Kind = O.Kind;
+ if (Kind != O.Kind && !RHS.isPowerOf2())
+ return false;
+ assert(Kind == DC_SRem || Kind == DC_URem && "bad kind after merging");
+ bool Signed = Kind == DC_SRem;
+
+ // Fold them together.
+ APInt GCD = APIntOps::GreatestCommonDivisor(LHS, RHS);
+ APInt LCM = LHS.udiv(GCD);
+ bool Overflow = false;
+ // Use a negative signed multiplication: producing INT_MIN should not
+ // be considered an overflow here.
+ LCM = Signed ? LCM.smul_ov(-RHS, Overflow) : LCM.umul_ov(RHS, Overflow);
+ // On overflow, there cannot exist a non-zero value that is divisible by
+ // both factors at once.
+ if (Overflow) LCM = 0;
+ Factor = cast<ConstantInt>(ConstantInt::get(Factor->getType(), LCM));
+ return true;
+ }
+
+ Value *create(InstCombiner::BuilderTy *Builder) {
+ // LHS is divisible by zero iff LHS is zero.
+ if (!Factor->getValue())
+ return LHS;
+ // Checking for divisibility by power of 2 doesn't need a division.
+ if (Factor->getValue().isPowerOf2())
+ return Builder->CreateAnd(
+ LHS, ConstantInt::get(Factor->getType(), Factor->getValue() - 1));
+ return Kind == DC_SRem ? Builder->CreateSRem(LHS, Factor)
+ : Builder->CreateURem(LHS, Factor);
+ }
+
+private:
+ /// Get the unsigned multiplicative factor we're checking for.
+ APInt getFactor(const DivisibilityCheck &O, bool &Failed) const {
+ switch (Kind) {
+ case DC_Null:
+ llvm_unreachable("unexpected Kind");
+
+ case DC_SRem:
+ if (Factor->getValue().isNegative())
+ return -Factor->getValue();
+ // Fall through.
+ case DC_URem:
+ return Factor->getValue();
+
+ case DC_And:
+ assert(O.Kind != DC_And && "bad kind pair");
+ // If we're also checking for divisibility by K * 2^N,
+ // the low N bits of the mask are irrelevant.
+ APInt Result =
+ Factor->getValue() |
+ APInt::getLowBitsSet(Factor->getValue().getBitWidth(),
+ O.getFactor(*this, Failed).countTrailingZeros());
+ ++Result;
+ if (!!Result && !Result.isPowerOf2())
+ Failed = true;
+ return Result;
+ }
+ }
+};
+
+struct DivisibilityCheck_match {
+ DivisibilityCheck &Check;
+ DivisibilityCheck_match(DivisibilityCheck &Check) : Check(Check) {}
+ bool match(Value *V) { return Check.match(V); }
+};
+
+/// Matcher for divisibility checks.
+DivisibilityCheck_match m_DivisibilityCheck(DivisibilityCheck &Check) {
+ return DivisibilityCheck_match(Check);
+}
+}
/// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
/// (icmp eq/ne A, Log2(AP2/AP1)) ->
if (!Cmp.isEquality() || *C != 0 || !Or->hasOneUse())
return nullptr;
+ DivisibilityCheck DivL, DivR;
+ if (match(Or, m_Or(m_DivisibilityCheck(DivL), m_DivisibilityCheck(DivR))) &&
+ DivL.merge(DivR)) {
+ // Simplify icmp eq (or (srem P, M), (srem P, N)), 0
+ // -> icmp eq (srem P, lcm(M, N)), 0
+ return new ICmpInst(Pred, DivL.create(Builder),
+ ConstantInt::getNullValue(Or->getType()));
+ }
+
+#if 0
+ // icmp eq (or X, Y), 0
+ // -> and (icmp eq X, 0), (icmp eq Y, 0)
+ // but only if this allows either subexpression to simplify further.
+ Instruction *ICmpX = nullptr, *ICmpY = nullptr;
+ if (auto *X = dyn_cast<Instruction>(LHSI->getOperand(0)))
+ ICmpX = visitICmpInstWithInstAndIntCst(ICI, X, RHS);
+ if (auto *Y = dyn_cast<Instruction>(LHSI->getOperand(1)))
+ ICmpY = visitICmpInstWithInstAndIntCst(ICI, Y, RHS);
+ if (ICmpX || ICmpX) {
+ Value *NewX, *NewY;
+ if (ICmpX) {
+ Worklist.Add(ICmpX);
+ NewX = Builder->Insert(ICmpX);
+ } else
+ NewX = Builder->CreateICmp(ICI.getPredicate(), LHSI->getOperand(0),
+ RHS);
+ if (ICmpY) {
+ Worklist.Add(ICmpY);
+ NewY = Builder->Insert(ICmpY);
+ } else
+ NewY = Builder->CreateICmp(ICI.getPredicate(), LHSI->getOperand(1),
+ RHS);
+ return BinaryOperator::CreateAnd(NewX, NewY);
+ }
+#endif
+
Value *P, *Q;
if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
// Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
--- /dev/null
+; Test that multiple divisibility checks are merged.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @test1(i32 %A) {
+ %B = srem i32 %A, 2
+ %C = srem i32 %A, 3
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: srem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test2(i32 %A) {
+ %B = urem i32 %A, 2
+ %C = urem i32 %A, 3
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: urem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test3(i32 %A) {
+ %B = srem i32 %A, 2
+ %C = urem i32 %A, 3
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: urem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test4(i32 %A) {
+ %B = urem i32 %A, 2
+ %C = srem i32 %A, 3
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: srem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test5(i32 %A) {
+ %B = srem i32 %A, 8
+ %C = srem i32 %A, 12
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: srem i32 %A, 24
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test6(i32 %A) {
+ %B = and i32 %A, 6
+ %C = srem i32 %A, 12
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: srem i32 %A, 24
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test7(i32 %A) {
+ %B = and i32 %A, 8
+ %C = srem i32 %A, 12
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: and i32 %A, 8
+; CHECK-NEXT: srem i32 %A, 12
+; CHECK-NEXT: or
+; CHECK-NEXT: icmp
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test8(i32 %A, i32 %B) {
+ %C = srem i32 %A, 2
+ %D = srem i32 %B, 3
+ %E = or i32 %C, %D
+ %F = icmp eq i32 %E, 0
+ ret i1 %F
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: srem i32 %B, 3
+; CHECK-NEXT: and i32 %A, 1
+; CHECK-NEXT: or
+; CHECK-NEXT: icmp
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test9(i32 %A) {
+ %B = srem i32 %A, 7589
+ %C = srem i32 %A, 395309
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1 %E
+}
+
+define i1 @test10(i32 %A) {
+ ; 7589 and 395309 are prime, and
+ ; 7589 * 395309 == 3000000001 == -1294967295 (2^32)
+ %B = urem i32 %A, 7589
+ %C = urem i32 %A, 395309
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: urem i32 %A, -1294967295
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test11(i32 %A) {
+ %B = urem i32 %A, 65535
+ %C = urem i32 %A, 65537
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: urem i32 %A, -1
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test12(i32 %A) {
+ %B = urem i32 %A, 65536
+ %C = urem i32 %A, 65537
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test13(i32 %A) {
+ %B = srem i32 %A, 65536
+ %C = urem i32 %A, 65535
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: urem i32 %A, -65536
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test14(i32 %A) {
+ %B = srem i32 %A, 95
+ %C = srem i32 %A, 22605091
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: srem i32 %A, 2147483645
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test15(i32 %A) {
+ %B = srem i32 %A, 97
+ %C = srem i32 %A, 22605091
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ ret i1 %E
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1
+}
+
+define i32 @test16(i32 %A) {
+ %B = srem i32 %A, 3
+ %C = srem i32 %A, 5
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ %F = zext i1 %E to i32
+ %G = add i32 %B, %F
+ ret i32 %G
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: %B = srem i32 %A, 3
+; CHECK-NEXT: %[[REM:.*]] = srem i32 %A, 15
+; CHECK-NEXT: %E = icmp eq i32 %[[REM]], 0
+; CHECK-NEXT: %F = zext i1 %E to i32
+; CHECK-NEXT: %G = add i32 %B, %F
+; CHECK-NEXT: ret i32 %G
+}
+
+define i32 @test17(i32 %A) {
+ %B = srem i32 %A, 3
+ %C = srem i32 %A, 5
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ %F = zext i1 %E to i32
+ %G = add i32 %B, %F
+ %H = add i32 %C, %G
+ ret i32 %H
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: %B = srem i32 %A, 3
+; CHECK-NEXT: %C = srem i32 %A, 5
+; CHECK-NOT: srem
+; CHECK: ret i32
+}
+
+define i32 @test18(i32 %A) {
+ %B = srem i32 %A, 3
+ %C = and i32 %A, 7
+ %D = or i32 %B, %C
+ %E = icmp eq i32 %D, 0
+ %F = zext i1 %E to i32
+ %G = add i32 %C, %F
+ ret i32 %G
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: %C = and i32 %A, 7
+; CHECK-NEXT: %[[REM:.*]] = srem i32 %A, 24
+; CHECK-NEXT: %E = icmp eq i32 %[[REM]], 0
+; CHECK-NEXT: %F = zext i1 %E to i32
+; CHECK-NEXT: %G = add
+; CHECK-NEXT: ret i32 %G
+}
+
+define i1 @test19(i32 %A) {
+ %B = srem i32 %A, 6
+ %C = srem i32 %A, 10
+ %D = icmp eq i32 %B, 0
+ %E = icmp eq i32 %C, 0
+ %F = and i1 %D, %E
+ ret i1 %F
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: %[[REM:.*]] = srem i32 %A, 30
+; CHECK-NEXT: icmp eq i32 %[[REM]], 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test20(i32 %A) {
+ %B = and i32 %A, 1
+ %C = srem i32 %A, 3
+ %D = and i32 %A, 3
+ %E = srem i32 %A, 5
+ %F = srem i32 %A, 6
+ %G = icmp eq i32 %B, 0
+ %H = icmp eq i32 %C, 0
+ %I = icmp eq i32 %D, 0
+ %J = icmp eq i32 %E, 0
+ %K = icmp eq i32 %F, 0
+ %L = and i1 %G, %H
+ %M = and i1 %L, %I
+ %N = and i1 %M, %J
+ %O = and i1 %N, %K
+ ret i1 %O
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: srem i32 %A, 60
+; CHECK-NEXT: icmp eq i32
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test21(i32 %A) {
+ %B = srem i32 %A, -2147483648
+ %C = srem i32 %A, 1024
+ %D = icmp eq i32 %B, 0
+ %E = icmp eq i32 %C, 0
+ %F = and i1 %D, %E
+ ret i1 %F
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: and i32 %A, 2147483647
+; CHECK-NEXT: icmp eq i32
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test22(i32 %A) {
+ %B = srem i32 %A, 1024
+ %C = srem i32 %A, -2147483648
+ %D = icmp eq i32 %B, 0
+ %E = icmp eq i32 %C, 0
+ %F = and i1 %D, %E
+ ret i1 %F
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: and i32 %A, 2147483647
+; CHECK-NEXT: icmp eq i32
+; CHECK-NEXT: ret i1
+}