Summary:
Make isGuaranteedToExecute use the
isGuaranteedToTransferExecutionToSuccessor helper, and make that helper
a bit more accurate.
There's a potential performance impact here from assuming that arbitrary
calls might not return. This probably has little impact on loads and
stores to a pointer because most things alias analysis can reason about
are dereferenceable anyway. The other impacts, like less aggressive
hoisting of sdiv by a variable and less aggressive hoisting around
volatile memory operations, are unlikely to matter for real code.
This also impacts SCEV, which uses the same helper. It's a minor
improvement there because we can tell that, for example, memcpy always
returns normally. Strictly speaking, it's also introducing
a bug, but it's not any worse than everywhere else we assume readonly
functions terminate.
Fixes http://llvm.org/PR27857.
Reviewers: hfinkel, reames, chandlerc, sanjoy
Subscribers: broune, llvm-commits
Differential Revision: http://reviews.llvm.org/D21167
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272489
91177308-0d34-0410-b5e6-
96231b3b80d8
}
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
- // FIXME: This conservative implementation can be relaxed. E.g. most
- // atomic operations are guaranteed to terminate on most platforms
- // and most functions terminate.
-
- // Calls can throw and thus not terminate, and invokes may not terminate and
- // could throw to non-successor (see bug 24185 for details).
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- // However, llvm.dbg intrinsics are safe, since they're no-ops.
- return isa<DbgInfoIntrinsic>(I);
-
- return !I->isAtomic() && // atomics may never succeed on some platforms
- !isa<ResumeInst>(I) && // has no successors
- !isa<ReturnInst>(I); // has no successors
+ // A memory operation returns normally if it isn't volatile. A volatile
+ // operation is allowed to trap.
+ //
+ // An atomic operation isn't guaranteed to return in a reasonable amount of
+ // time because it's possible for another thread to interfere with it for an
+ // arbitrary length of time, but programs aren't allowed to rely on that.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ return !LI->isVolatile();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+ return !SI->isVolatile();
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
+ return !CXI->isVolatile();
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
+ return !RMWI->isVolatile();
+ if (const MemIntrinsic *MII = dyn_cast<MemIntrinsic>(I))
+ return !MII->isVolatile();
+
+ // If there is no successor, then execution can't transfer to it.
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
+ return !CRI->unwindsToCaller();
+ if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I))
+ return !CatchSwitch->unwindsToCaller();
+ if (isa<ResumeInst>(I))
+ return false;
+ if (isa<ReturnInst>(I))
+ return false;
+
+ // Calls can throw, or contain an infinite loop, or kill the process.
+ if (CallSite CS = CallSite(const_cast<Instruction*>(I))) {
+ // Calls which don't write to arbitrary memory are safe.
+ // FIXME: Ignoring infinite loops without any side-effects is too aggressive,
+ // but it's consistent with other passes. See http://llvm.org/PR965 .
+ // FIXME: This isn't aggressive enough; a call which only writes to a
+ // global is guaranteed to return.
+ return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory();
+ }
+
+ // Other instructions return normally.
+ return true;
}
bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
// Iterate over header and compute safety info.
for (BasicBlock::iterator I = Header->begin(), E = Header->end();
(I != E) && !SafetyInfo->HeaderMayThrow; ++I)
- SafetyInfo->HeaderMayThrow |= I->mayThrow();
+ SafetyInfo->HeaderMayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I);
SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
// Iterate over loop instructions and compute safety info.
(BB != BBE) && !SafetyInfo->MayThrow; ++BB)
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
(I != E) && !SafetyInfo->MayThrow; ++I)
- SafetyInfo->MayThrow |= I->mayThrow();
+ SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I);
// Compute funclet colors if we might sink/hoist in a function with a funclet
// personality routine.
if (ExitBlocks.empty())
return false;
+ // FIXME: In general, we have to prove that the loop isn't an infinite loop.
+ // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is
+ // just a special case of this.)
return true;
}
--- /dev/null
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @f() nounwind
+
+; Don't hoist load past nounwind call.
+define i32 @test1(i32* noalias nocapture readonly %a) nounwind uwtable {
+; CHECK-LABEL: @test1(
+entry:
+ br label %for.body
+
+; CHECK: tail call void @f()
+; CHECK-NEXT: load i32
+for.body:
+ %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %x.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ tail call void @f() nounwind
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %x.05
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+}
+
+; Don't hoist division past nounwind call.
+define i32 @test2(i32 %N, i32 %c) nounwind uwtable {
+; CHECK-LABEL: @test2(
+entry:
+ %cmp4 = icmp sgt i32 %N, 0
+ br i1 %cmp4, label %for.body, label %for.cond.cleanup
+
+; CHECK: tail call void @f()
+; CHECK-NEXT: sdiv i32
+for.body:
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ tail call void @f() nounwind
+ %div = sdiv i32 5, %c
+ %add = add i32 %i.05, 1
+ %inc = add i32 %add, %div
+ %cmp = icmp slt i32 %inc, %N
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret i32 0
+}
+
+; Don't hoist load past volatile load.
+define i32 @test3(i32* noalias nocapture readonly %a, i32* %v) nounwind uwtable {
+; CHECK-LABEL: @test3(
+entry:
+ br label %for.body
+
+; CHECK: load volatile i32
+; CHECK-NEXT: load i32
+for.body:
+ %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %x.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %xxx = load volatile i32, i32* %v, align 4
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %x.05
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+}
; CHECK-LABEL: @test2(
; CHECK-NEXT: load i32, i32* @X
; CHECK-NEXT: %B = sdiv i32 4, %A
- %A = load i32, i32* @X ; <i32> [#uses=2]
- br label %Loop
+ %A = load i32, i32* @X
+ br label %Loop
+
Loop:
- ;; Should have hoisted this div!
- %B = sdiv i32 4, %A ; <i32> [#uses=2]
- call void @foo2( i32 %B )
- br i1 %c, label %Loop, label %Out
-Out: ; preds = %Loop
- %C = sub i32 %A, %B ; <i32> [#uses=1]
- ret i32 %C
+ ;; Should have hoisted this div!
+ %B = sdiv i32 4, %A
+ br label %loop2
+
+loop2:
+ call void @foo2( i32 %B )
+ br i1 %c, label %Loop, label %Out
+
+Out:
+ %C = sub i32 %A, %B
+ ret i32 %C
}
loop: ; preds = %entry, %for.inc
%div = udiv i64 %x, %y
+ br label %loop2
+
+loop2:
call void @use_nothrow(i64 %div)
br label %loop
}
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%old = load i32, i32* %addr, align 4
; deliberate impossible to analyze branch
- %guard = load volatile i8*, i8** @p
+ %guard = load atomic i8*, i8** @p monotonic, align 8
%exitcmp = icmp eq i8* %guard, null
br i1 %exitcmp, label %for.body, label %early-exit
%x2 = add i32 %x, 1 ; <i32> [#uses=1]
store i32 %x2, i32* @X
- store volatile i32* @X, i32** %P2
+ store atomic i32* @X, i32** %P2 monotonic, align 8
%Next = add i32 %j, 1 ; <i32> [#uses=2]
%cond = icmp eq i32 %Next, 0 ; <i1> [#uses=1]
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Function Attrs: nounwind uwtable
-define i32 @foo(i32* %p, i32* %q, i32 %n) #0 {
+define i32 @foo(i32* dereferenceable(4) nonnull %p, i32* %q, i32 %n) #0 {
entry:
%p.addr = alloca i32*, align 8
%q.addr = alloca i32*, align 8