From 4a1169edbda98cd733139be82909ed35cb8fd4e5 Mon Sep 17 00:00:00 2001 From: Philip Reames <listmail@philipreames.com> Date: Wed, 14 Aug 2019 18:27:57 +0000 Subject: [PATCH] [RLEV] Rewrite loop exit values for multiple exit loops w/o overall loop exit count We already supported rewriting loop exit values for multiple exit loops, but if any of the loop exits were not computable, we gave up on all loop exit values. This patch generalizes the existing code to handle individual computable loop exits where possible. As discussed in the review, this is a starting point for figuring out a better API. The code is a bit ugly, but getting it in lets us test as we go. Differential Revision: https://reviews.llvm.org/D65544 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368898 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/IndVarSimplify.cpp | 24 ++- test/Transforms/IndVarSimplify/rlev-add-me.ll | 167 ++++++++++++++++++ test/Transforms/LoopUnroll/scevunroll.ll | 2 +- 3 files changed, 188 insertions(+), 5 deletions(-) create mode 100644 test/Transforms/IndVarSimplify/rlev-add-me.ll diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 5d1bb6b9390..246ddfd28f6 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -628,13 +628,29 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it - // contains when the loop exits, if possible. + // contains when the loop exits, if possible. We prefer to start with + // expressions which are true for all exits (so as to maximize + // expression reuse by the SCEVExpander), but resort to per-exit + // evaluation if that fails. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (isa<SCEVCouldNotCompute>(ExitValue) || !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) - continue; - + !isSafeToExpand(ExitValue, *SE)) { + // TODO: This should probably be sunk into SCEV in some way; maybe a + // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for + // most SCEV expressions and other recurrence types (e.g. shift + // recurrences). Is there existing code we can reuse? + const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); + if (isa<SCEVCouldNotCompute>(ExitCount)) + continue; + if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) + ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); + if (isa<SCEVCouldNotCompute>(ExitValue) || + !SE->isLoopInvariant(ExitValue, L) || + !isSafeToExpand(ExitValue, *SE)) + continue; + } + // Computing the value outside of the loop brings no benefit if it is // definitely used inside the loop in a way which can not be optimized // away. Avoid doing so unless we know we have a value which computes diff --git a/test/Transforms/IndVarSimplify/rlev-add-me.ll b/test/Transforms/IndVarSimplify/rlev-add-me.ll new file mode 100644 index 00000000000..c41bdf45186 --- /dev/null +++ b/test/Transforms/IndVarSimplify/rlev-add-me.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -indvars < %s | FileCheck %s +target datalayout = "n8:16:32:64" +@G = external global i32 + +; Basic case where we know the value of an induction variable along one +; exit edge, but not another. +define i32 @test(i32 %n) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT1:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[HEADER]], label [[EXIT2:%.*]] +; CHECK: exit1: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[HEADER]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; CHECK: exit2: +; CHECK-NEXT: ret i32 [[N]] +; +entry: + br label %header +header: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + %v = load volatile i32, i32* @G + %cmp1 = icmp eq i32 %v, 0 + br i1 %cmp1, label %latch, label %exit1 + +latch: + %iv.next = add i32 %iv, 1 + %cmp2 = icmp ult i32 %iv, %n + br i1 %cmp2, label %header, label %exit2 +exit1: + ret i32 %iv +exit2: + ret i32 %iv +} + +define i32 @test2(i32 %n) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT1:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[HEADER]], label [[EXIT2:%.*]] +; CHECK: exit1: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[HEADER]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; CHECK: exit2: +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + br label %header +header: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + %v = load volatile i32, i32* @G + %cmp1 = icmp eq i32 %v, 0 + br i1 %cmp1, label %latch, label %exit1 + +latch: + %iv.next = add i32 %iv, 1 + %cmp2 = icmp ult i32 %iv, %n + br i1 %cmp2, label %header, label %exit2 +exit1: + ret i32 %iv +exit2: + ret i32 %iv.next +} + +; TODO: Generalize the code to handle other SCEV expressions +define i32 @test3(i32 %n) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[EXPR:%.*]] = udiv i32 [[IV]], 5 +; CHECK-NEXT: [[V:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT1:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[HEADER]], label [[EXIT2:%.*]] +; CHECK: exit1: +; CHECK-NEXT: [[EXPR_LCSSA:%.*]] = phi i32 [ [[EXPR]], [[HEADER]] ] +; CHECK-NEXT: ret i32 [[EXPR_LCSSA]] +; CHECK: exit2: +; CHECK-NEXT: [[EXPR_LCSSA1:%.*]] = phi i32 [ [[EXPR]], [[LATCH]] ] +; CHECK-NEXT: ret i32 [[EXPR_LCSSA1]] +; +entry: + br label %header +header: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + %expr = udiv i32 %iv, 5 + %v = load volatile i32, i32* @G + %cmp1 = icmp eq i32 %v, 0 + br i1 %cmp1, label %latch, label %exit1 + +latch: + %iv.next = add i32 %iv, 1 + %cmp2 = icmp ult i32 %iv, %n + br i1 %cmp2, label %header, label %exit2 +exit1: + ret i32 %expr +exit2: + ret i32 %expr +} + + +; A slightly more real example where we're searching for either a) the first +; non-zero element, or b) the end of a memory region. +define i32 @bounded_find(i32 %n) { +; CHECK-LABEL: @bounded_find( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* @G, i32 [[IV]] +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ADDR]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT1:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[HEADER]], label [[EXIT2:%.*]] +; CHECK: exit1: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[HEADER]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; CHECK: exit2: +; CHECK-NEXT: ret i32 [[N]] +; +entry: + br label %header +header: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + %addr = getelementptr i32, i32* @G, i32 %iv + %v = load i32, i32* %addr + %cmp1 = icmp eq i32 %v, 0 + br i1 %cmp1, label %latch, label %exit1 + +latch: + %iv.next = add i32 %iv, 1 + %cmp2 = icmp ult i32 %iv, %n + br i1 %cmp2, label %header, label %exit2 +exit1: + ret i32 %iv +exit2: + ret i32 %iv +} diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll index afee41e11aa..d67e1cfe9d4 100644 --- a/test/Transforms/LoopUnroll/scevunroll.ll +++ b/test/Transforms/LoopUnroll/scevunroll.ll @@ -184,7 +184,7 @@ for.body87: ; CHECK: for.body: ; CHECK: %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ] ; CHECK: return: -; CHECK: %b.03.lcssa = phi i32 [ %b.03, %for.body ], [ 0, %for.cond ] +; CHECK: %b.03.lcssa = phi i32 [ 8, %for.body ], [ 0, %for.cond ] define void @nsw_latch(i32* %a) nounwind { entry: br label %for.body -- 2.40.0