From: Eli Friedman Date: Sat, 11 Jun 2016 21:48:25 +0000 (+0000) Subject: [LICM] Make isGuaranteedToExecute more accurate. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=df383ca8810045b5da4e934529d0afa7f9046776;p=llvm [LICM] Make isGuaranteedToExecute more accurate. Summary: Make isGuaranteedToExecute use the isGuaranteedToTransferExecutionToSuccessor helper, and make that helper a bit more accurate. There's a potential performance impact here from assuming that arbitrary calls might not return. This probably has little impact on loads and stores to a pointer because most things alias analysis can reason about are dereferenceable anyway. The other impacts, like less aggressive hoisting of sdiv by a variable and less aggressive hoisting around volatile memory operations, are unlikely to matter for real code. This also impacts SCEV, which uses the same helper. It's a minor improvement there because we can tell that, for example, memcpy always returns normally. Strictly speaking, it's also introducing a bug, but it's not any worse than everywhere else we assume readonly functions terminate. Fixes http://llvm.org/PR27857. Reviewers: hfinkel, reames, chandlerc, sanjoy Subscribers: broune, llvm-commits Differential Revision: http://reviews.llvm.org/D21167 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272489 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 323fe83bff6..7e09ec74bef 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -3444,19 +3444,45 @@ OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS, } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { - // FIXME: This conservative implementation can be relaxed. E.g. most - // atomic operations are guaranteed to terminate on most platforms - // and most functions terminate. - - // Calls can throw and thus not terminate, and invokes may not terminate and - // could throw to non-successor (see bug 24185 for details). - if (isa(I) || isa(I)) - // However, llvm.dbg intrinsics are safe, since they're no-ops. - return isa(I); - - return !I->isAtomic() && // atomics may never succeed on some platforms - !isa(I) && // has no successors - !isa(I); // has no successors + // A memory operation returns normally if it isn't volatile. A volatile + // operation is allowed to trap. + // + // An atomic operation isn't guaranteed to return in a reasonable amount of + // time because it's possible for another thread to interfere with it for an + // arbitrary length of time, but programs aren't allowed to rely on that. + if (const LoadInst *LI = dyn_cast(I)) + return !LI->isVolatile(); + if (const StoreInst *SI = dyn_cast(I)) + return !SI->isVolatile(); + if (const AtomicCmpXchgInst *CXI = dyn_cast(I)) + return !CXI->isVolatile(); + if (const AtomicRMWInst *RMWI = dyn_cast(I)) + return !RMWI->isVolatile(); + if (const MemIntrinsic *MII = dyn_cast(I)) + return !MII->isVolatile(); + + // If there is no successor, then execution can't transfer to it. + if (const auto *CRI = dyn_cast(I)) + return !CRI->unwindsToCaller(); + if (const auto *CatchSwitch = dyn_cast(I)) + return !CatchSwitch->unwindsToCaller(); + if (isa(I)) + return false; + if (isa(I)) + return false; + + // Calls can throw, or contain an infinite loop, or kill the process. + if (CallSite CS = CallSite(const_cast(I))) { + // Calls which don't write to arbitrary memory are safe. + // FIXME: Ignoring infinite loops without any side-effects is too aggressive, + // but it's consistent with other passes. See http://llvm.org/PR965 . + // FIXME: This isn't aggressive enough; a call which only writes to a + // global is guaranteed to return. + return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory(); + } + + // Other instructions return normally. + return true; } bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 8f7cacedfb4..b67bd24ecb8 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -388,7 +388,7 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) { // Iterate over header and compute safety info. for (BasicBlock::iterator I = Header->begin(), E = Header->end(); (I != E) && !SafetyInfo->HeaderMayThrow; ++I) - SafetyInfo->HeaderMayThrow |= I->mayThrow(); + SafetyInfo->HeaderMayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I); SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow; // Iterate over loop instructions and compute safety info. @@ -397,7 +397,7 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) { (BB != BBE) && !SafetyInfo->MayThrow; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); (I != E) && !SafetyInfo->MayThrow; ++I) - SafetyInfo->MayThrow |= I->mayThrow(); + SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I); // Compute funclet colors if we might sink/hoist in a function with a funclet // personality routine. diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 300931352db..3902c67c6a0 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -962,5 +962,8 @@ bool llvm::isGuaranteedToExecute(const Instruction &Inst, if (ExitBlocks.empty()) return false; + // FIXME: In general, we have to prove that the loop isn't an infinite loop. + // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is + // just a special case of this.) return true; } diff --git a/test/Transforms/LICM/hoist-nounwind.ll b/test/Transforms/LICM/hoist-nounwind.ll new file mode 100644 index 00000000000..efa4b8dceca --- /dev/null +++ b/test/Transforms/LICM/hoist-nounwind.ll @@ -0,0 +1,71 @@ +; RUN: opt -S -basicaa -licm < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @f() nounwind + +; Don't hoist load past nounwind call. +define i32 @test1(i32* noalias nocapture readonly %a) nounwind uwtable { +; CHECK-LABEL: @test1( +entry: + br label %for.body + +; CHECK: tail call void @f() +; CHECK-NEXT: load i32 +for.body: + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %x.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + tail call void @f() nounwind + %i1 = load i32, i32* %a, align 4 + %add = add nsw i32 %i1, %x.05 + %inc = add nuw nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret i32 %add +} + +; Don't hoist division past nounwind call. +define i32 @test2(i32 %N, i32 %c) nounwind uwtable { +; CHECK-LABEL: @test2( +entry: + %cmp4 = icmp sgt i32 %N, 0 + br i1 %cmp4, label %for.body, label %for.cond.cleanup + +; CHECK: tail call void @f() +; CHECK-NEXT: sdiv i32 +for.body: + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + tail call void @f() nounwind + %div = sdiv i32 5, %c + %add = add i32 %i.05, 1 + %inc = add i32 %add, %div + %cmp = icmp slt i32 %inc, %N + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret i32 0 +} + +; Don't hoist load past volatile load. +define i32 @test3(i32* noalias nocapture readonly %a, i32* %v) nounwind uwtable { +; CHECK-LABEL: @test3( +entry: + br label %for.body + +; CHECK: load volatile i32 +; CHECK-NEXT: load i32 +for.body: + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %x.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %xxx = load volatile i32, i32* %v, align 4 + %i1 = load i32, i32* %a, align 4 + %add = add nsw i32 %i1, %x.05 + %inc = add nuw nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret i32 %add +} diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll index 8609407cc59..e5de7a8f91e 100644 --- a/test/Transforms/LICM/hoisting.ll +++ b/test/Transforms/LICM/hoisting.ll @@ -37,16 +37,21 @@ define i32 @test2(i1 %c) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: load i32, i32* @X ; CHECK-NEXT: %B = sdiv i32 4, %A - %A = load i32, i32* @X ; [#uses=2] - br label %Loop + %A = load i32, i32* @X + br label %Loop + Loop: - ;; Should have hoisted this div! - %B = sdiv i32 4, %A ; [#uses=2] - call void @foo2( i32 %B ) - br i1 %c, label %Loop, label %Out -Out: ; preds = %Loop - %C = sub i32 %A, %B ; [#uses=1] - ret i32 %C + ;; Should have hoisted this div! + %B = sdiv i32 4, %A + br label %loop2 + +loop2: + call void @foo2( i32 %B ) + br i1 %c, label %Loop, label %Out + +Out: + %C = sub i32 %A, %B + ret i32 %C } diff --git a/test/Transforms/LICM/preheader-safe.ll b/test/Transforms/LICM/preheader-safe.ll index 260a5f653b7..92bd76073b0 100644 --- a/test/Transforms/LICM/preheader-safe.ll +++ b/test/Transforms/LICM/preheader-safe.ll @@ -14,6 +14,9 @@ entry: loop: ; preds = %entry, %for.inc %div = udiv i64 %x, %y + br label %loop2 + +loop2: call void @use_nothrow(i64 %div) br label %loop } diff --git a/test/Transforms/LICM/promote-tls.ll b/test/Transforms/LICM/promote-tls.ll index 7763114ef45..571d5258001 100644 --- a/test/Transforms/LICM/promote-tls.ll +++ b/test/Transforms/LICM/promote-tls.ll @@ -25,7 +25,7 @@ for.header: %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] %old = load i32, i32* %addr, align 4 ; deliberate impossible to analyze branch - %guard = load volatile i8*, i8** @p + %guard = load atomic i8*, i8** @p monotonic, align 8 %exitcmp = icmp eq i8* %guard, null br i1 %exitcmp, label %for.body, label %early-exit diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll index 6ef4bac39bb..9acf8f8c32a 100644 --- a/test/Transforms/LICM/scalar_promote.ll +++ b/test/Transforms/LICM/scalar_promote.ll @@ -135,7 +135,7 @@ Loop: ; preds = %Loop, %0 %x2 = add i32 %x, 1 ; [#uses=1] store i32 %x2, i32* @X - store volatile i32* @X, i32** %P2 + store atomic i32* @X, i32** %P2 monotonic, align 8 %Next = add i32 %j, 1 ; [#uses=2] %cond = icmp eq i32 %Next, 0 ; [#uses=1] diff --git a/test/Transforms/LICM/volatile-alias.ll b/test/Transforms/LICM/volatile-alias.ll index fda930df933..5e599da12fc 100644 --- a/test/Transforms/LICM/volatile-alias.ll +++ b/test/Transforms/LICM/volatile-alias.ll @@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" ; Function Attrs: nounwind uwtable -define i32 @foo(i32* %p, i32* %q, i32 %n) #0 { +define i32 @foo(i32* dereferenceable(4) nonnull %p, i32* %q, i32 %n) #0 { entry: %p.addr = alloca i32*, align 8 %q.addr = alloca i32*, align 8