From 10c34d1b8ddc5591d95ac4b170b04ad77c2a3d6c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 17 Jun 2019 21:06:17 +0000 Subject: [PATCH] Teach getSCEVAtScope how to handle loop phis w/invariant operands in loops w/taken backedges This patch really contains two pieces: Teach SCEV how to fold a phi in the header of a loop to the value on the backedge when a) the backedge is known to execute at least once, and b) the value is safe to use globally within the scope dominated by the original phi. Teach IndVarSimplify's rewriteLoopExitValues to allow loop invariant expressions which already exist (and thus don't need new computation inserted) even in loops where we can't optimize away other uses. Differential Revision: https://reviews.llvm.org/D63224 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363619 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 44 +++++++++++-------- lib/Transforms/Scalar/IndVarSimplify.cpp | 7 ++- .../IndVarSimplify/exit_value_tests.ll | 41 ++++++++++++----- test/Transforms/IndVarSimplify/pr39673.ll | 4 +- 4 files changed, 61 insertions(+), 35 deletions(-) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index f37581fbded..0a87a084644 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -8122,27 +8122,35 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // count. If so, we may be able to force computation of the exit // value. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); - if (const SCEVConstant *BTCC = - dyn_cast(BackedgeTakenCount)) { - - // This trivial case can show up in some degenerate cases where - // the incoming IR has not yet been fully simplified. - if (BTCC->getValue()->isZero()) { - Value *InitValue = nullptr; - bool MultipleInitValues = false; - for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { - if (!LI->contains(PN->getIncomingBlock(i))) { - if (!InitValue) - InitValue = PN->getIncomingValue(i); - else if (InitValue != PN->getIncomingValue(i)) { - MultipleInitValues = true; - break; - } + // This trivial case can show up in some degenerate cases where + // the incoming IR has not yet been fully simplified. + if (BackedgeTakenCount->isZero()) { + Value *InitValue = nullptr; + bool MultipleInitValues = false; + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { + if (!LI->contains(PN->getIncomingBlock(i))) { + if (!InitValue) + InitValue = PN->getIncomingValue(i); + else if (InitValue != PN->getIncomingValue(i)) { + MultipleInitValues = true; + break; } } - if (!MultipleInitValues && InitValue) - return getSCEV(InitValue); } + if (!MultipleInitValues && InitValue) + return getSCEV(InitValue); + } + // Do we have a loop invariant value flowing around the backedge + // for a loop which must execute the backedge? + if (!isa(BackedgeTakenCount) && + isKnownPositive(BackedgeTakenCount) && + PN->getNumIncomingValues() == 2) { + unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1; + const SCEV *OnBackedge = getSCEV(PN->getIncomingValue(InLoopPred)); + if (IsAvailableOnEntry(LI, DT, OnBackedge, PN->getParent())) + return OnBackedge; + } + if (auto *BTCC = dyn_cast(BackedgeTakenCount)) { // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 558ea27659c..66b079f6b2e 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -635,9 +635,12 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Computing the value outside of the loop brings no benefit if it is // definitely used inside the loop in a way which can not be optimized - // away. + // away. Avoid doing so unless we know we have a value which computes + // the ExitValue already. TODO: This should be merged into SCEV + // expander to leverage its knowledge of existing expressions. if (ReplaceExitValue != AlwaysRepl && - !isa(ExitValue) && hasHardUserWithinLoop(L, Inst)) + !isa(ExitValue) && !isa(ExitValue) && + hasHardUserWithinLoop(L, Inst)) continue; bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); diff --git a/test/Transforms/IndVarSimplify/exit_value_tests.ll b/test/Transforms/IndVarSimplify/exit_value_tests.ll index 04ee33d9603..619ee4d08ea 100644 --- a/test/Transforms/IndVarSimplify/exit_value_tests.ll +++ b/test/Transforms/IndVarSimplify/exit_value_tests.ll @@ -153,10 +153,34 @@ loopexit: ; preds = %loop define i32 @unroll_phi_select_constant_nonzero(i32 %arg1, i32 %arg2) { ; CHECK-LABEL: @unroll_phi_select_constant_nonzero( ; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[ARG2:%.*]] +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %selector = phi i32 [%arg1, %entry], [%arg2, %loop] + %i.next = add nsw nuw i32 %i, 1 + %c = icmp ult i32 %i, 4 + br i1 %c, label %loop, label %loopexit + +loopexit: + ret i32 %selector +} + +declare i32 @f() + +; After LCSSA formation, there's no LCSSA phi for %f since it isn't directly +; used outside the loop, and thus we can't directly replace %selector w/ %f. +define i32 @neg_unroll_phi_select_constant_nonzero(i32 %arg) { +; CHECK-LABEL: @neg_unroll_phi_select_constant_nonzero( +; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SELECTOR:%.*]] = phi i32 [ [[ARG1:%.*]], [[ENTRY]] ], [ [[ARG2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SELECTOR:%.*]] = phi i32 [ [[ARG:%.*]], [[ENTRY]] ], [ [[F:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[F]] = call i32 @f() ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[I]], 4 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]] @@ -169,7 +193,8 @@ entry: loop: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] - %selector = phi i32 [%arg1, %entry], [%arg2, %loop] + %selector = phi i32 [%arg, %entry], [%f, %loop] + %f = call i32 @f() %i.next = add nsw nuw i32 %i, 1 %c = icmp ult i32 %i, 4 br i1 %c, label %loop, label %loopexit @@ -178,6 +203,7 @@ loopexit: ret i32 %selector } + define i32 @unroll_phi_select_constant_zero(i32 %arg1, i32 %arg2) { ; CHECK-LABEL: @unroll_phi_select_constant_zero( ; CHECK-NEXT: entry: @@ -201,16 +227,7 @@ define i32 @unroll_phi_select(i32 %arg1, i32 %arg2, i16 %len) { ; CHECK-LABEL: @unroll_phi_select( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LENGTH:%.*]] = zext i16 [[LEN:%.*]] to i32 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SELECTOR:%.*]] = phi i32 [ [[ARG1:%.*]], [[ENTRY]] ], [ [[ARG2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 -; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[I]], [[LENGTH]] -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]] -; CHECK: loopexit: -; CHECK-NEXT: [[SELECTOR_LCSSA:%.*]] = phi i32 [ [[SELECTOR]], [[LOOP]] ] -; CHECK-NEXT: ret i32 [[SELECTOR_LCSSA]] +; CHECK-NEXT: ret i32 [[ARG2:%.*]] ; entry: %length = zext i16 %len to i32 diff --git a/test/Transforms/IndVarSimplify/pr39673.ll b/test/Transforms/IndVarSimplify/pr39673.ll index f11c4ab3ad0..4c1bfea0b2b 100644 --- a/test/Transforms/IndVarSimplify/pr39673.ll +++ b/test/Transforms/IndVarSimplify/pr39673.ll @@ -58,15 +58,13 @@ define i16 @dom_argument(i16 %arg1, i16 %arg2) { ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop1: ; CHECK-NEXT: [[L1:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[L1_ADD:%.*]], [[LOOP1]] ] -; CHECK-NEXT: [[SELECTOR:%.*]] = phi i16 [ [[ARG1:%.*]], [[ENTRY]] ], [ [[ARG2:%.*]], [[LOOP1]] ] ; CHECK-NEXT: [[L1_ADD]] = add nuw nsw i16 [[L1]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i16 [[L1_ADD]], 2 ; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP1]], label [[LOOP2_PREHEADER:%.*]] ; CHECK: loop2.preheader: -; CHECK-NEXT: [[K1_ADD_LCSSA:%.*]] = phi i16 [ [[SELECTOR]], [[LOOP1]] ] ; CHECK-NEXT: br label [[LOOP2:%.*]] ; CHECK: loop2: -; CHECK-NEXT: [[K2:%.*]] = phi i16 [ [[K2_ADD:%.*]], [[LOOP2]] ], [ [[K1_ADD_LCSSA]], [[LOOP2_PREHEADER]] ] +; CHECK-NEXT: [[K2:%.*]] = phi i16 [ [[K2_ADD:%.*]], [[LOOP2]] ], [ [[ARG2:%.*]], [[LOOP2_PREHEADER]] ] ; CHECK-NEXT: [[L2:%.*]] = phi i16 [ [[L2_ADD:%.*]], [[LOOP2]] ], [ 0, [[LOOP2_PREHEADER]] ] ; CHECK-NEXT: [[L2_ADD]] = add nuw nsw i16 [[L2]], 1 ; CHECK-NEXT: tail call void @foo(i16 [[K2]]) -- 2.40.0