From 19ce959014b75d99add2509dd5afca7470f1fff4 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 18 Jan 2017 23:56:42 +0000 Subject: [PATCH] [SCEV] Make getUDivExactExpr handle non-nuw multiplies correctly. To avoid regressions, make ScalarEvolution::createSCEV a bit more clever. Also get rid of some useless code in ScalarEvolution::howFarToZero which was hiding this bug. No new testcase because it's impossible to actually expose this bug: we don't have any in-tree users of getUDivExactExpr besides the two functions I just mentioned, and they both dodged the problem. I'll try to add some interesting users in a followup. Differential Revision: https://reviews.llvm.org/D28587 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292449 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 37 ++++++++++++++---------- test/Analysis/ScalarEvolution/pr24757.ll | 2 +- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 81648528cb5..18fcb0e85b6 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -2888,7 +2888,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, // end of this file for inspiration. const SCEVMulExpr *Mul = dyn_cast(LHS); - if (!Mul) + if (!Mul || !Mul->hasNoUnsignedWrap()) return getUDivExpr(LHS, RHS); if (const SCEVConstant *RHSCst = dyn_cast(RHS)) { @@ -5147,12 +5147,27 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) { - const SCEV *MulCount = getConstant(ConstantInt::get( - getContext(), APInt::getOneBitSet(BitWidth, TZ))); + const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ)); + const SCEV *LHS = getSCEV(BO->LHS); + const SCEV *ShiftedLHS = nullptr; + if (auto *LHSMul = dyn_cast(LHS)) { + if (auto *OpC = dyn_cast(LHSMul->getOperand(0))) { + // For an expression like (x * 8) & 8, simplify the multiply. + unsigned MulZeros = OpC->getAPInt().countTrailingZeros(); + unsigned GCD = std::min(MulZeros, TZ); + APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD); + SmallVector MulOps; + MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD))); + MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end()); + auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags()); + ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt)); + } + } + if (!ShiftedLHS) + ShiftedLHS = getUDivExpr(LHS, MulCount); return getMulExpr( getZeroExtendExpr( - getTruncateExpr( - getUDivExactExpr(getSCEV(BO->LHS), MulCount), + getTruncateExpr(ShiftedLHS, IntegerType::get(getContext(), BitWidth - LZ - TZ)), BO->LHS->getType()), MulCount); @@ -7276,17 +7291,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3) // is i8 1, not i8 -127 - const auto *ModuloResult = getUDivExactExpr(Distance, Step); - - // Since SCEV does not have a URem node, we construct one using a truncate - // and a zero extend. - - unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros(); - auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); - auto *WideTy = Distance->getType(); - - const SCEV *Limit = - getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + const auto *Limit = getUDivExactExpr(Distance, Step); return ExitLimit(Limit, Limit, false, Predicates); } } diff --git a/test/Analysis/ScalarEvolution/pr24757.ll b/test/Analysis/ScalarEvolution/pr24757.ll index 815adcde0e9..83baade34ad 100644 --- a/test/Analysis/ScalarEvolution/pr24757.ll +++ b/test/Analysis/ScalarEvolution/pr24757.ll @@ -1,6 +1,6 @@ ; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s -; CHECK: Loop %bb1: backedge-taken count is (zext i7 (trunc i8 %a.promoted to i7) to i8) +; CHECK: Loop %bb1: backedge-taken count is ((2 * %a.promoted) /u 2) target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" -- 2.50.1