From 3ef6c7c651a704d88907af91a3c5b5d37ea6a38a Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Thu, 26 Jan 2017 01:04:11 +0000 Subject: [PATCH] [LoopUnroll] Properly update loopinfo for runtime unrolling by 2 Even when we don't create a remainder loop (that is, when we unroll by 2), we may duplicate nested loops into the remainder. This is complicated by the fact the remainder may itself be either inserted into an outer loop, or at the top level. In the latter case, we may need to create new top-level loops. Differential Revision: https://reviews.llvm.org/D29156 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293124 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopUnrollPass.cpp | 3 +- lib/Transforms/Utils/LoopUnroll.cpp | 11 ++++--- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 15 ++++++--- test/Transforms/LoopUnroll/revisit.ll | 10 ++++-- test/Transforms/LoopUnroll/runtime-li.ll | 36 ++++++++++++++++++++++ 5 files changed, 63 insertions(+), 12 deletions(-) create mode 100644 test/Transforms/LoopUnroll/runtime-li.ll diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index c2adb7caad6..305122511f5 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1160,7 +1160,8 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, #endif // Unrolling can do several things to introduce new loops into a loop nest: - // - Partial unrolling clones child loops within the current loop. + // - Partial unrolling clones child loops within the current loop. If it + // uses a remainder, then it can also create any number of sibling loops. // - Full unrolling clones child loops within the current loop but then // removes the current loop making all of the children appear to be new // sibling loops. diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 7cd69d2b847..5c3c94008d3 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -199,11 +199,14 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, assert(OriginalBB == OldLoop->getHeader() && "Header should be first in RPO"); + NewLoop = new Loop(); Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); - assert(NewLoopParent && - "Expected parent loop before sub-loop in RPO"); - NewLoop = new Loop; - NewLoopParent->addChildLoop(NewLoop); + + if (NewLoopParent) + NewLoopParent->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); return OldLoop; } else { diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index d5d54cf1d84..8406c563a9c 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -311,17 +311,22 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, } NewLoopsMap NewLoops; - NewLoops[L] = NewLoop; + if (NewLoop) + NewLoops[L] = NewLoop; + else if (ParentLoop) + NewLoops[L] = ParentLoop; + // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); - - if (NewLoop) { + + // If we're unrolling the outermost loop, there's no remainder loop, + // and this block isn't in a nested loop, then the new block is not + // in any loop. Otherwise, add it to loopinfo. + if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); - } else if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { diff --git a/test/Transforms/LoopUnroll/revisit.ll b/test/Transforms/LoopUnroll/revisit.ll index 18ae3658385..88c9f7ba21a 100644 --- a/test/Transforms/LoopUnroll/revisit.ll +++ b/test/Transforms/LoopUnroll/revisit.ll @@ -123,7 +123,7 @@ l0.0.latch: ; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0 ; CHECK-NOT: LoopUnrollPass ; -; Partial unrolling occurs which introduces new child loops but not new sibling +; Partial unrolling occurs which introduces both new child loops and new sibling ; loops. We only visit the child loops in a special mode, not by default. ; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0
; CHECK-CHILDREN-NOT: LoopUnrollPass @@ -137,7 +137,13 @@ l0.0.latch: ; When we revisit children, we also revisit the current loop. ; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 2 containing: %l0.0
; CHECK-CHILDREN-NOT: LoopUnrollPass - +; +; Revisit the children of the outer loop that are part of the prologue. +; +; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.prol
+; CHECK-NOT: LoopUnrollPass +; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.prol
+; CHECK-NOT: LoopUnrollPass l0.latch: br label %l0 ; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0
diff --git a/test/Transforms/LoopUnroll/runtime-li.ll b/test/Transforms/LoopUnroll/runtime-li.ll new file mode 100644 index 00000000000..5494c8e9da7 --- /dev/null +++ b/test/Transforms/LoopUnroll/runtime-li.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -loop-unroll -unroll-runtime -unroll-count=2 -verify-loop-info -pass-remarks=loop-unroll < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Verify that runtime-unrolling a top-level loop that has nested loops does not +; make the unroller produce invalid loop-info. +; CHECK: remark: {{.*}}: unrolled loop by a factor of 2 with run-time trip count +; CHECK: @widget +; CHECK: ret void +define void @widget(double* %arg, double* %arg1, double* %p, i64* %q1, i64* %q2) local_unnamed_addr { +entry: + br label %header.outer + +header.outer: ; preds = %latch.outer, %entry + %tmp = phi double* [ %tmp8, %latch.outer ], [ %arg, %entry ] + br label %header.inner + +header.inner: ; preds = %latch.inner, %header.outer + br i1 undef, label %latch.inner, label %latch.outer + +latch.inner: ; preds = %header.inner + %tmp5 = load i64, i64* %q1, align 8 + store i64 %tmp5, i64* %q2, align 8 + %tmp6 = icmp eq double* %p, %arg + br label %header.inner + +latch.outer: ; preds = %header.inner + store double 0.0, double* %p, align 8 + %tmp8 = getelementptr inbounds double, double* %tmp, i64 1 + %tmp9 = icmp eq double* %tmp8, %arg1 + br i1 %tmp9, label %exit, label %header.outer + +exit: ; preds = %latch.outer + ret void +} -- 2.50.1