ScalarEvolution *SE;
DominatorTree *DT;
+ // Does the module have any calls to the llvm.experimental.guard intrinsic
+ // at all? If not we can avoid scanning instructions looking for guards.
+ bool HasGuards;
+
// Result
PHINode *WidePhi;
Instruction *WideInc;
public:
WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
ScalarEvolution *SEv, DominatorTree *DTree,
- SmallVectorImpl<WeakVH> &DI) :
+ SmallVectorImpl<WeakVH> &DI, bool HasGuards) :
OrigPhi(WI.NarrowIV),
WideType(WI.WidestNativeType),
LI(LInfo),
L(LI->getLoopFor(OrigPhi->getParent())),
SE(SEv),
DT(DTree),
+ HasGuards(HasGuards),
WidePhi(nullptr),
WideInc(nullptr),
WideIncExpr(nullptr),
updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
};
+ auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
+ if (!HasGuards)
+ return;
+
+ for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
+ Ctx->getParent()->rend())) {
+ Value *C = nullptr;
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
+ UpdateRangeFromCondition(C, /*TrueDest=*/true);
+ }
+ };
+
+ UpdateRangeFromGuards(NarrowUser);
+
BasicBlock *NarrowUserBB = NarrowUser->getParent();
// If NarrowUserBB is statically unreachable asking dominator queries may
// yield suprising results. (e.g. the block may not have a dom tree node)
DTB = DTB->getIDom()) {
auto *BB = DTB->getBlock();
auto *TI = BB->getTerminator();
+ UpdateRangeFromGuards(TI);
auto *BI = dyn_cast<BranchInst>(TI);
if (!BI || !BI->isConditional())
LoopInfo *LI) {
SmallVector<WideIVInfo, 8> WideIVs;
+ auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ bool HasGuards = GuardDecl && !GuardDecl->use_empty();
+
SmallVector<PHINode*, 8> LoopPhis;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
LoopPhis.push_back(cast<PHINode>(I));
} while(!LoopPhis.empty());
for (; !WideIVs.empty(); WideIVs.pop_back()) {
- WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
+ WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
exit:
ret void
}
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test_guard_one_bb(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_guard_one_bb(
+; CHECK-NOT: trunc
+; CHECK-NOT: icmp slt i32
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.body ]
+ %within_limits = icmp ult i32 %i, 64
+ %i.i64 = zext i32 %i to i64
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+ %val = load i32, i32* %arrayidx, align 4
+ call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ]
+ %i.inc = add nsw nuw i32 %i, 1
+ %cmp = icmp slt i32 %i.inc, %limit
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @test_guard_in_the_same_bb(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_guard_in_the_same_bb(
+; CHECK-NOT: trunc
+; CHECK-NOT: icmp slt i32
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
+ %within_limits = icmp ult i32 %i, 64
+ %i.i64 = zext i32 %i to i64
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+ %val = load i32, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc:
+ call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ]
+ %i.inc = add nsw nuw i32 %i, 1
+ %cmp = icmp slt i32 %i.inc, %limit
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @test_guard_in_idom(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_guard_in_idom(
+; CHECK-NOT: trunc
+; CHECK-NOT: icmp slt i32
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
+ %within_limits = icmp ult i32 %i, 64
+ call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ]
+ %i.i64 = zext i32 %i to i64
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+ %val = load i32, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc:
+ %i.inc = add nsw nuw i32 %i, 1
+ %cmp = icmp slt i32 %i.inc, %limit
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @test_guard_merge_ranges(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_guard_merge_ranges(
+; CHECK-NOT: trunc
+; CHECK-NOT: icmp slt i32
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.body ]
+ %within_limits.1 = icmp ult i32 %i, 64
+ call void(i1, ...) @llvm.experimental.guard(i1 %within_limits.1) [ "deopt"() ]
+ %within_limits.2 = icmp ult i32 %i, 2147483647
+ call void(i1, ...) @llvm.experimental.guard(i1 %within_limits.2) [ "deopt"() ]
+ %i.i64 = zext i32 %i to i64
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+ %val = load i32, i32* %arrayidx, align 4
+ %i.inc = add nsw nuw i32 %i, 1
+ %cmp = icmp slt i32 %i.inc, %limit
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ br label %exit
+
+exit:
+ ret void
+}