From 5f13aa40626739698a88d18bf851a72064e6e7b8 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Mon, 17 Jun 2019 13:39:28 +0000 Subject: [PATCH] [CodeGen] Check for HardwareLoop Latch ExitBlock The HardwareLoops pass finds exit blocks with a scevable exit count. If the target specifies to update the loop counter in a register, through a phi, we need to ensure that the exit block is a latch so that we can insert the phi with the correct value for the incoming edge. Differential Revision: https://reviews.llvm.org/D63336 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363556 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/TargetTransformInfo.h | 10 +-- lib/CodeGen/HardwareLoops.cpp | 16 +++- lib/Target/ARM/ARMTargetTransformInfo.cpp | 4 - .../Transforms/HardwareLoops/ARM/structure.ll | 76 +++++++++++++++++++ .../HardwareLoops/unconditional-latch.ll | 46 +++++++++++ 5 files changed, 139 insertions(+), 13 deletions(-) create mode 100644 test/Transforms/HardwareLoops/unconditional-latch.ll diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 0ef8c08380a..52d4cb73c5b 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -448,9 +448,7 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const; - /// Attributes of a target dependent hardware loop. Here, the term 'element' - /// describes the work performed by an IR loop that has not been vectorized - /// by the compiler. + /// Attributes of a target dependent hardware loop. struct HardwareLoopInfo { HardwareLoopInfo() = delete; HardwareLoopInfo(Loop *L) : L(L) { } @@ -459,10 +457,10 @@ public: BranchInst *ExitBranch = nullptr; const SCEV *ExitCount = nullptr; IntegerType *CountType = nullptr; - Value *LoopDecrement = nullptr; // The maximum number of elements - // processed in the loop body. + Value *LoopDecrement = nullptr; // Decrement the loop counter by this + // value in every iteration. bool IsNestingLegal = false; // Can a hardware loop be a parent to - // another hardware loop. + // another hardware loop? bool CounterInReg = false; // Should loop counter be updated in // the loop via a phi? }; diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp index df063545f28..99191090220 100644 --- a/lib/CodeGen/HardwareLoops.cpp +++ b/lib/CodeGen/HardwareLoops.cpp @@ -235,7 +235,17 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) { for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { - const SCEV *EC = SE->getExitCount(L, *I); + BasicBlock *BB = *I; + + // If we pass the updated counter back through a phi, we need to know + // which latch the updated value will be coming from. + if (!L->isLoopLatch(BB)) { + if ((ForceHardwareLoopPHI.getNumOccurrences() && ForceHardwareLoopPHI) || + HWLoopInfo.CounterInReg) + continue; + } + + const SCEV *EC = SE->getExitCount(L, BB); if (isa(EC)) continue; if (const SCEVConstant *ConstEC = dyn_cast(EC)) { @@ -251,7 +261,7 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) { // If this exiting block is contained in a nested loop, it is not eligible // for insertion of the branch-and-decrement since the inner loop would // end up messing up the value in the CTR. - if (!HWLoopInfo.IsNestingLegal && LI->getLoopFor(*I) != L && + if (!HWLoopInfo.IsNestingLegal && LI->getLoopFor(BB) != L && !ForceNestedLoop) continue; @@ -278,7 +288,7 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) { continue; // Make sure this blocks ends with a conditional branch. - Instruction *TI = (*I)->getTerminator(); + Instruction *TI = BB->getTerminator(); if (!TI) continue; diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index a57a4049dbc..ca905dfc7ff 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -702,10 +702,6 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, if (!ST->hasLOB() || DisableLowOverheadLoops) return false; - // For now, for simplicity, only support loops with one exit block. - if (!L->getExitBlock()) - return false; - if (!SE.hasLoopInvariantBackedgeTakenCount(L)) return false; diff --git a/test/Transforms/HardwareLoops/ARM/structure.ll b/test/Transforms/HardwareLoops/ARM/structure.ll index 42726756a53..fa3dbc0d929 100644 --- a/test/Transforms/HardwareLoops/ARM/structure.ll +++ b/test/Transforms/HardwareLoops/ARM/structure.ll @@ -135,6 +135,82 @@ while.end7: ret void } +; CHECK-LABEL: not_rotated +; CHECK-NOT: call void @llvm.set.loop.iterations +; CHECK-NOT: call i32 @llvm.loop.decrement.i32 +define void @not_rotated(i32, i16* nocapture, i16 signext) { + br label %4 + +4: + %5 = phi i32 [ 0, %3 ], [ %19, %18 ] + %6 = icmp eq i32 %5, %0 + br i1 %6, label %20, label %7 + +7: + %8 = mul i32 %5, %0 + br label %9 + +9: + %10 = phi i32 [ %17, %12 ], [ 0, %7 ] + %11 = icmp eq i32 %10, %0 + br i1 %11, label %18, label %12 + +12: + %13 = add i32 %10, %8 + %14 = getelementptr inbounds i16, i16* %1, i32 %13 + %15 = load i16, i16* %14, align 2 + %16 = add i16 %15, %2 + store i16 %16, i16* %14, align 2 + %17 = add i32 %10, 1 + br label %9 + +18: + %19 = add i32 %5, 1 + br label %4 + +20: + ret void +} + +; CHECK-LABEL: multi_latch +; CHECK-NOT: call void @llvm.set.loop.iterations +; CHECK-NOT: call i32 @llvm.loop.decrement +define void @multi_latch(i32* %a, i32* %b, i32 %N) { +entry: + %half = lshr i32 %N, 1 + br label %header + +header: + %iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ] + %cmp = icmp ult i32 %iv, %half + %addr.a = getelementptr i32, i32* %a, i32 %iv + %addr.b = getelementptr i32, i32* %b, i32 %iv + br i1 %cmp, label %if.then, label %if.else + +if.then: + store i32 %iv, i32* %addr.a + br label %latch.0 + +if.else: + store i32 %iv, i32* %addr.b + br label %latch.0 + +latch.0: + %count.next = add nuw i32 %iv, 1 + %cmp.1 = icmp ult i32 %count.next, %half + br i1 %cmp.1, label %header, label %latch.1 + +latch.1: + %ld = load i32, i32* %addr.a + store i32 %ld, i32* %addr.b + %cmp.2 = icmp ult i32 %count.next, %N + br i1 %cmp.2, label %header, label %latch.1 + +exit: + ret void +} + + declare void @llvm.set.loop.iterations.i32(i32) #0 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 diff --git a/test/Transforms/HardwareLoops/unconditional-latch.ll b/test/Transforms/HardwareLoops/unconditional-latch.ll new file mode 100644 index 00000000000..71479d04393 --- /dev/null +++ b/test/Transforms/HardwareLoops/unconditional-latch.ll @@ -0,0 +1,46 @@ +; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW +; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH + +; CHECK-LABEL: not_rotated +; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations +; CHECK-LATCH-NOT: call i1 @llvm.loop.decrement + +; CHECK-ALLOW: call void @llvm.set.loop.iterations.i32(i32 %4) +; CHECK-ALLOW: br label %10 + +; CHECK-ALLOW: [[CMP:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1) +; CHECK-ALLOW: br i1 [[CMP]], label %13, label %19 + +define void @not_rotated(i32, i16* nocapture, i16 signext) { + br label %4 + +4: + %5 = phi i32 [ 0, %3 ], [ %19, %18 ] + %6 = icmp eq i32 %5, %0 + br i1 %6, label %20, label %7 + +7: + %8 = mul i32 %5, %0 + br label %9 + +9: + %10 = phi i32 [ %17, %12 ], [ 0, %7 ] + %11 = icmp eq i32 %10, %0 + br i1 %11, label %18, label %12 + +12: + %13 = add i32 %10, %8 + %14 = getelementptr inbounds i16, i16* %1, i32 %13 + %15 = load i16, i16* %14, align 2 + %16 = add i16 %15, %2 + store i16 %16, i16* %14, align 2 + %17 = add i32 %10, 1 + br label %9 + +18: + %19 = add i32 %5, 1 + br label %4 + +20: + ret void +} -- 2.40.0