void initializeLoopInterchangePass(PassRegistry &);
void initializeLoopLoadEliminationPass(PassRegistry&);
void initializeLoopPassPass(PassRegistry&);
+void initializeLoopPredicationLegacyPassPass(PassRegistry&);
void initializeLoopRerollPass(PassRegistry&);
void initializeLoopRotateLegacyPassPass(PassRegistry&);
void initializeLoopSimplifyCFGLegacyPassPass(PassRegistry&);
(void) llvm::createLazyValueInfoPass();
(void) llvm::createLoopExtractorPass();
(void) llvm::createLoopInterchangePass();
+ (void) llvm::createLoopPredicationPass();
(void) llvm::createLoopSimplifyPass();
(void) llvm::createLoopSimplifyCFGPass();
(void) llvm::createLoopStrengthReducePass();
//
Pass *createLoopSinkPass();
+//===----------------------------------------------------------------------===//
+//
+// LoopPredication - This pass does loop predication on guards.
+//
+Pass *createLoopPredicationPass();
+
//===----------------------------------------------------------------------===//
//
// LoopInterchange - This pass interchanges loops to provide a more
--- /dev/null
+//===- LoopPredication.h - Guard based loop predication pass ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to convert loop variant range checks to loop invariant by
+// widening checks across loop iterations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+/// Performs Loop Predication Pass.
+class LoopPredicationPass : public PassInfoMixin<LoopPredicationPass> {
+public:
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Scalar/LoopPredication.h"
#include "llvm/Transforms/Scalar/LoopRotation.h"
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
#include "llvm/Transforms/Scalar/LoopSink.h"
LOOP_PASS("unroll", LoopUnrollPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
+LOOP_PASS("loop-predication", LoopPredicationPass())
#undef LOOP_PASS
LoopInterchange.cpp
LoopLoadElimination.cpp
LoopPassManager.cpp
+ LoopPredication.cpp
LoopRerollPass.cpp
LoopRotation.cpp
LoopSimplifyCFG.cpp
--- /dev/null
+//===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LoopPredication pass tries to convert loop variant range checks to loop
+// invariant by widening checks across loop iterations. For example, it will
+// convert
+//
+// for (i = 0; i < n; i++) {
+// guard(i < len);
+// ...
+// }
+//
+// to
+//
+// for (i = 0; i < n; i++) {
+// guard(n - 1 < len);
+// ...
+// }
+//
+// After this transformation the condition of the guard is loop invariant, so
+// loop-unswitch can later unswitch the loop by this condition which basically
+// predicates the loop by the widened condition:
+//
+// if (n - 1 < len)
+// for (i = 0; i < n; i++) {
+// ...
+// }
+// else
+// deoptimize
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopPredication.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "loop-predication"
+
+using namespace llvm;
+
+namespace {
+class LoopPredication {
+ ScalarEvolution *SE;
+
+ Loop *L;
+ const DataLayout *DL;
+ BasicBlock *Preheader;
+
+ Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
+ IRBuilder<> &Builder);
+ bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
+
+public:
+ LoopPredication(ScalarEvolution *SE) : SE(SE){};
+ bool runOnLoop(Loop *L);
+};
+
+class LoopPredicationLegacyPass : public LoopPass {
+public:
+ static char ID;
+ LoopPredicationLegacyPass() : LoopPass(ID) {
+ initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ getLoopAnalysisUsage(AU);
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipLoop(L))
+ return false;
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LoopPredication LP(SE);
+ return LP.runOnLoop(L);
+ }
+};
+
+char LoopPredicationLegacyPass::ID = 0;
+} // end namespace llvm
+
+INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication",
+ "Loop predication", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication",
+ "Loop predication", false, false)
+
+Pass *llvm::createLoopPredicationPass() {
+ return new LoopPredicationLegacyPass();
+}
+
+PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ LoopPredication LP(&AR.SE);
+ if (!LP.runOnLoop(&L))
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}
+
+/// If ICI can be widened to a loop invariant condition emits the loop
+/// invariant condition in the loop preheader and return it, otherwise
+/// returns None.
+Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
+ SCEVExpander &Expander,
+ IRBuilder<> &Builder) {
+ DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
+ DEBUG(ICI->dump());
+
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ const SCEV *LHSS = SE->getSCEV(LHS);
+ if (isa<SCEVCouldNotCompute>(LHSS))
+ return None;
+ const SCEV *RHSS = SE->getSCEV(RHS);
+ if (isa<SCEVCouldNotCompute>(RHSS))
+ return None;
+
+ // Canonicalize RHS to be loop invariant bound, LHS - a loop computable index
+ if (SE->isLoopInvariant(LHSS, L)) {
+ std::swap(LHS, RHS);
+ std::swap(LHSS, RHSS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ if (!SE->isLoopInvariant(RHSS, L))
+ return None;
+
+ Value *Bound = RHS;
+ const SCEVAddRecExpr *IndexAR = dyn_cast<SCEVAddRecExpr>(LHSS);
+ if (!IndexAR || IndexAR->getLoop() != L)
+ return None;
+
+ DEBUG(dbgs() << "IndexAR: ");
+ DEBUG(IndexAR->dump());
+
+ bool IsIncreasing = false;
+ if (!SE->isMonotonicPredicate(IndexAR, Pred, IsIncreasing))
+ return None;
+
+ // If the predicate is increasing the condition can change from false to true
+ // as the loop progresses, in this case take the value on the first iteration
+ // for the widened check. Otherwise the condition can change from true to
+ // false as the loop progresses, so take the value on the last iteration.
+ const SCEV *NewLHSS = IsIncreasing
+ ? IndexAR->getStart()
+ : SE->getSCEVAtScope(IndexAR, L->getParentLoop());
+ if (NewLHSS == IndexAR) {
+ DEBUG(dbgs() << "Can't compute NewLHSS!");
+ return None;
+ }
+
+ DEBUG(dbgs() << "NewLHSS: ");
+ DEBUG(NewLHSS->dump());
+
+ if (!SE->isLoopInvariant(NewLHSS, L) || !isSafeToExpand(NewLHSS, *SE))
+ return None;
+
+ DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n");
+
+ Value *NewLHS = Expander.expandCodeFor(NewLHSS, Bound->getType(),
+ Preheader->getTerminator());
+ return Builder.CreateICmp(Pred, NewLHS, Bound);
+}
+
+bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
+ SCEVExpander &Expander) {
+ DEBUG(dbgs() << "Processing guard:\n");
+ DEBUG(Guard->dump());
+
+ IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator()));
+
+ // The guard condition is expected to be in form of:
+ // cond1 && cond2 && cond3 ...
+ // Iterate over subconditions looking for for icmp conditions which can be
+ // widened across loop iterations. Widening these conditions remember the
+ // resulting list of subconditions in Checks vector.
+ SmallVector<Value *, 4> Worklist(1, Guard->getOperand(0));
+ SmallPtrSet<Value *, 4> Visited;
+
+ SmallVector<Value *, 4> Checks;
+
+ unsigned NumWidened = 0;
+ do {
+ Value *Condition = Worklist.pop_back_val();
+ if (!Visited.insert(Condition).second)
+ continue;
+
+ Value *LHS, *RHS;
+ using namespace llvm::PatternMatch;
+ if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) {
+ Worklist.push_back(LHS);
+ Worklist.push_back(RHS);
+ continue;
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
+ if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Builder)) {
+ Checks.push_back(NewRangeCheck.getValue());
+ NumWidened++;
+ continue;
+ }
+ }
+
+ // Save the condition as is if we can't widen it
+ Checks.push_back(Condition);
+ } while (Worklist.size() != 0);
+
+ if (NumWidened == 0)
+ return false;
+
+ // Emit the new guard condition
+ Builder.SetInsertPoint(Guard);
+ Value *LastCheck = nullptr;
+ for (auto *Check : Checks)
+ if (!LastCheck)
+ LastCheck = Check;
+ else
+ LastCheck = Builder.CreateAnd(LastCheck, Check);
+ Guard->setOperand(0, LastCheck);
+
+ DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+ return true;
+}
+
+bool LoopPredication::runOnLoop(Loop *Loop) {
+ L = Loop;
+
+ DEBUG(dbgs() << "Analyzing ");
+ DEBUG(L->dump());
+
+ Module *M = L->getHeader()->getModule();
+
+ // There is nothing to do if the module doesn't use guards
+ auto *GuardDecl =
+ M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
+ if (!GuardDecl || GuardDecl->use_empty())
+ return false;
+
+ DL = &M->getDataLayout();
+
+ Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ // Collect all the guards into a vector and process later, so as not
+ // to invalidate the instruction iterator.
+ SmallVector<IntrinsicInst *, 4> Guards;
+ for (const auto BB : L->blocks())
+ for (auto &I : *BB)
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::experimental_guard)
+ Guards.push_back(II);
+
+ SCEVExpander Expander(*SE, *DL, "loop-predication");
+
+ bool Changed = false;
+ for (auto *Guard : Guards)
+ Changed |= widenGuardConditions(Guard, Expander);
+
+ return Changed;
+}
initializeLoopAccessLegacyAnalysisPass(Registry);
initializeLoopInstSimplifyLegacyPassPass(Registry);
initializeLoopInterchangePass(Registry);
+ initializeLoopPredicationLegacyPassPass(Registry);
initializeLoopRotateLegacyPassPass(Registry);
initializeLoopStrengthReducePass(Registry);
initializeLoopRerollPass(Registry);
--- /dev/null
+; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes='require<scalar-evolution>,loop(loop-predication)' < %s 2>&1 | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define i32 @unsigned_loop_0_to_n_ult_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @unsigned_loop_0_to_n_ult_check
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %within.bounds = icmp ult i32 %i, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @unsigned_loop_0_to_n_ugt_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @unsigned_loop_0_to_n_ugt_check
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %within.bounds = icmp ugt i32 %length, %i
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+
+define i32 @two_range_checks(i32* %array.1, i32 %length.1,
+ i32* %array.2, i32 %length.2, i32 %n) {
+; CHECK-LABEL: @two_range_checks
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}}
+; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}}
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %within.bounds.1 = icmp ult i32 %i, %length.1
+ %within.bounds.2 = icmp ult i32 %i, %length.2
+ %within.bounds = and i1 %within.bounds.1, %within.bounds.2
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
+ %array.1.i = load i32, i32* %array.1.i.ptr, align 4
+ %loop.acc.1 = add i32 %loop.acc, %array.1.i
+
+ %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
+ %array.2.i = load i32, i32* %array.2.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc.1, %array.2.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @three_range_checks(i32* %array.1, i32 %length.1,
+ i32* %array.2, i32 %length.2,
+ i32* %array.3, i32 %length.3, i32 %n) {
+; CHECK-LABEL: @three_range_checks
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
+; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
+; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]]
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %within.bounds.1 = icmp ult i32 %i, %length.1
+ %within.bounds.2 = icmp ult i32 %i, %length.2
+ %within.bounds.3 = icmp ult i32 %i, %length.3
+ %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2
+ %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
+ %array.1.i = load i32, i32* %array.1.i.ptr, align 4
+ %loop.acc.1 = add i32 %loop.acc, %array.1.i
+
+ %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
+ %array.2.i = load i32, i32* %array.2.i.ptr, align 4
+ %loop.acc.2 = add i32 %loop.acc.1, %array.2.i
+
+ %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64
+ %array.3.i = load i32, i32* %array.3.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc.2, %array.3.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @three_guards(i32* %array.1, i32 %length.1,
+ i32* %array.2, i32 %length.2,
+ i32* %array.3, i32 %length.3, i32 %n) {
+; CHECK-LABEL: @three_guards
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.1
+; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.2
+; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.3
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ]
+
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+
+ %within.bounds.1 = icmp ult i32 %i, %length.1
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
+ %array.1.i = load i32, i32* %array.1.i.ptr, align 4
+ %loop.acc.1 = add i32 %loop.acc, %array.1.i
+
+ %within.bounds.2 = icmp ult i32 %i, %length.2
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ]
+
+ %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
+ %array.2.i = load i32, i32* %array.2.i.ptr, align 4
+ %loop.acc.2 = add i32 %loop.acc.1, %array.2.i
+
+ %within.bounds.3 = icmp ult i32 %i, %length.3
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ]
+
+ %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64
+ %array.3.i = load i32, i32* %array.3.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc.2, %array.3.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_start_to_n_sge_0_check(i32* %array, i32 %length, i32 %start, i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_sge_0_check
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp sge i32 %start, 0
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
+ %within.bounds = icmp sge i32 %i, 0
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add nsw i32 %i, 1
+ %continue = icmp slt i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_start_to_n_upper_slt_length_check(i32* %array, i32 %length, i32 %start, i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_upper_slt_length_check
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[start_1:[^ ]+]] = add i32 %start, 1
+; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]]
+; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]]
+; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_index]], %length
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
+ %within.bounds = icmp slt i32 %i, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add nsw i32 %i, 1
+ %continue = icmp slt i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_start_to_n_both_checks(i32* %array, i32 %length, i32 %start, i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_both_checks
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[lower_check:[^ ]+]] = icmp sge i32 %start, 0
+; CHECK-NEXT: [[start_1:[^ ]+]] = add i32 %start, 1
+; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]]
+; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]]
+; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1
+; CHECK-NEXT: [[upper_check:[^ ]+]] = icmp slt i32 [[max_index]], %length
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: [[wide_cond:[^ ]+]] = and i1 [[lower_check]], [[upper_check]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
+ %within.bounds.1 = icmp slt i32 %i, %length
+ %within.bounds.2 = icmp sge i32 %i, 0
+ %within.bounds = and i1 %within.bounds.1, %within.bounds.2
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add nsw i32 %i, 1
+ %continue = icmp slt i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @unsigned_loop_0_to_n_unrelated_condition(i32* %array, i32 %length, i32 %n, i32 %x) {
+; CHECK-LABEL: @unsigned_loop_0_to_n_unrelated_condition
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %unrelated.cond = icmp ult i32 %x, %length
+; CHECK: [[guard_cond:[^ ]+]] = and i1 %unrelated.cond, [[wide_cond]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[guard_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %within.bounds = icmp ult i32 %i, %length
+ %unrelated.cond = icmp ult i32 %x, %length
+ %guard.cond = and i1 %within.bounds, %unrelated.cond
+ call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+; Don't change the guard condition if there were no widened subconditions
+define i32 @test_no_widened_conditions(i32* %array, i32 %length, i32 %n, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: @test_no_widened_conditions
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %unrelated.cond.1 = icmp eq i32 %x1, %i
+; CHECK-NEXT: %unrelated.cond.2 = icmp eq i32 %x2, %i
+; CHECK-NEXT: %unrelated.cond.3 = icmp eq i32 %x3, %i
+; CHECK-NEXT: %unrelated.cond.and.1 = and i1 %unrelated.cond.1, %unrelated.cond.2
+; CHECK-NEXT: %guard.cond = and i1 %unrelated.cond.and.1, %unrelated.cond.3
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %unrelated.cond.1 = icmp eq i32 %x1, %i
+ %unrelated.cond.2 = icmp eq i32 %x2, %i
+ %unrelated.cond.3 = icmp eq i32 %x3, %i
+ %unrelated.cond.and.1 = and i1 %unrelated.cond.1, %unrelated.cond.2
+ %guard.cond = and i1 %unrelated.cond.and.1, %unrelated.cond.3
+
+ call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_start_to_n_loop_variant_bound(i32* %array, i32 %x, i32 %start, i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_loop_variant_bound
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %bound = add i32 %i, %x
+; CHECK-NEXT: %within.bounds = icmp slt i32 %i, %bound
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
+ %bound = add i32 %i, %x
+ %within.bounds = icmp slt i32 %i, %bound
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add nsw i32 %i, 1
+ %continue = icmp slt i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_start_to_n_non_monotonic_predicate(i32* %array, i32 %x, i32 %start, i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_non_monotonic_predicate
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %guard.cond = icmp eq i32 %i, %x
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
+ %guard.cond = icmp eq i32 %i, %x
+ call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add nsw i32 %i, 1
+ %continue = icmp slt i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
\ No newline at end of file
--- /dev/null
+; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes='require<scalar-evolution>,loop(loop-predication)' < %s 2>&1 | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define i32 @signed_loop_0_to_n_nested_0_to_l_inner_index_check(i32* %array, i32 %length, i32 %n, i32 %l) {
+; CHECK-LABEL: @signed_loop_0_to_n_nested_0_to_l_inner_index_check
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %outer.loop.preheader
+
+outer.loop.preheader:
+; CHECK: outer.loop.preheader:
+; CHECK: [[iteration_count:[^ ]+]] = add i32 %l, -1
+ br label %outer.loop
+
+outer.loop:
+ %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+ %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+ %tmp6 = icmp sle i32 %l, 0
+ br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
+
+inner.loop.preheader:
+; CHECK: inner.loop.preheader:
+; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length
+ br label %inner.loop
+
+inner.loop:
+; CHECK: inner.loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
+ %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
+
+ %within.bounds = icmp slt i32 %j, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %j.i64 = zext i32 %j to i64
+ %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64
+ %array.j = load i32, i32* %array.j.ptr, align 4
+ %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j
+
+ %j.next = add nsw i32 %j, 1
+ %inner.continue = icmp slt i32 %j.next, %l
+ br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
+
+outer.loop.inc:
+ %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
+ %i.next = add nsw i32 %i, 1
+ %outer.continue = icmp slt i32 %i.next, %n
+ br i1 %outer.continue, label %outer.loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_0_to_n_nested_0_to_l_outer_index_check(i32* %array, i32 %length, i32 %n, i32 %l) {
+; CHECK-LABEL: @signed_loop_0_to_n_nested_0_to_l_outer_index_check
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %outer.loop.preheader
+
+outer.loop.preheader:
+; CHECK: outer.loop.preheader:
+; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1
+; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length
+ br label %outer.loop
+
+outer.loop:
+ %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+ %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+ %tmp6 = icmp sle i32 %l, 0
+ br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
+
+inner.loop.preheader:
+ br label %inner.loop
+
+inner.loop:
+; CHECK: inner.loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+
+ %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
+ %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
+
+ %within.bounds = icmp slt i32 %i, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %inner.loop.acc.next = add i32 %inner.loop.acc, %array.i
+
+ %j.next = add nsw i32 %j, 1
+ %inner.continue = icmp slt i32 %j.next, %l
+ br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
+
+outer.loop.inc:
+ %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
+ %i.next = add nsw i32 %i, 1
+ %outer.continue = icmp slt i32 %i.next, %n
+ br i1 %outer.continue, label %outer.loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_0_to_n_nested_i_to_l_inner_index_check(i32* %array, i32 %length, i32 %n, i32 %l) {
+; CHECK-LABEL: @signed_loop_0_to_n_nested_i_to_l_inner_index_check
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %outer.loop.preheader
+
+outer.loop.preheader:
+ br label %outer.loop
+
+outer.loop:
+; CHECK: outer.loop:
+; CHECK: [[i_1:[^ ]+]] = add i32 %i, 1
+; CHECK-NEXT: [[l_sgt_i_1:[^ ]+]] = icmp sgt i32 %l, [[i_1]]
+; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[l_sgt_i_1]], i32 %l, i32 [[i_1]]
+; CHECK-NEXT: [[max_j:[^ ]+]] = add i32 [[smax]], -1
+ %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+ %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+ %tmp6 = icmp sle i32 %l, 0
+ br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
+
+inner.loop.preheader:
+; CHECK: inner.loop.preheader:
+; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_j]], %length
+ br label %inner.loop
+
+inner.loop:
+; CHECK: inner.loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
+ %j = phi i32 [ %j.next, %inner.loop ], [ %i, %inner.loop.preheader ]
+
+ %within.bounds = icmp slt i32 %j, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %j.i64 = zext i32 %j to i64
+ %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64
+ %array.j = load i32, i32* %array.j.ptr, align 4
+ %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j
+
+ %j.next = add nsw i32 %j, 1
+ %inner.continue = icmp slt i32 %j.next, %l
+ br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
+
+outer.loop.inc:
+ %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
+ %i.next = add nsw i32 %i, 1
+ %outer.continue = icmp slt i32 %i.next, %n
+ br i1 %outer.continue, label %outer.loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
+ ret i32 %result
+}
\ No newline at end of file
--- /dev/null
+; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes='require<scalar-evolution>,loop(loop-predication)' < %s 2>&1 | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define i32 @test_visited(i32* %array, i32 %length, i32 %n, i32 %x) {
+; CHECK-LABEL: @test_visited
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[iteration_count]], %length
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %unrelated.cond = icmp eq i32 %x, %i
+; CHECK: [[guard_cond:[^ ]+]] = and i1 %unrelated.cond, [[wide_cond]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[guard_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %within.bounds = icmp ult i32 %i, %length
+ %unrelated.cond = icmp eq i32 %x, %i
+ %guard.cond.2 = and i1 %within.bounds, %unrelated.cond
+ %guard.cond.3 = and i1 %guard.cond.2, %unrelated.cond
+ %guard.cond.4 = and i1 %guard.cond.3, %guard.cond.2
+ %guard.cond.5 = and i1 %guard.cond.4, %guard.cond.3
+ %guard.cond.6 = and i1 %guard.cond.5, %guard.cond.4
+ %guard.cond.7 = and i1 %guard.cond.6, %guard.cond.5
+ %guard.cond.8 = and i1 %guard.cond.7, %guard.cond.6
+ %guard.cond.9 = and i1 %guard.cond.8, %guard.cond.7
+ %guard.cond.10 = and i1 %guard.cond.9, %guard.cond.8
+ %guard.cond.11 = and i1 %guard.cond.10, %guard.cond.9
+ %guard.cond.12 = and i1 %guard.cond.11, %guard.cond.10
+ %guard.cond.13 = and i1 %guard.cond.12, %guard.cond.11
+ %guard.cond.14 = and i1 %guard.cond.13, %guard.cond.12
+ %guard.cond.15 = and i1 %guard.cond.14, %guard.cond.13
+ %guard.cond.16 = and i1 %guard.cond.15, %guard.cond.14
+ %guard.cond.17 = and i1 %guard.cond.16, %guard.cond.15
+ %guard.cond.18 = and i1 %guard.cond.17, %guard.cond.16
+ %guard.cond.19 = and i1 %guard.cond.18, %guard.cond.17
+ %guard.cond.20 = and i1 %guard.cond.19, %guard.cond.18
+ %guard.cond.21 = and i1 %guard.cond.20, %guard.cond.19
+ %guard.cond.22 = and i1 %guard.cond.21, %guard.cond.20
+ %guard.cond.23 = and i1 %guard.cond.22, %guard.cond.21
+ %guard.cond.24 = and i1 %guard.cond.23, %guard.cond.22
+ %guard.cond.25 = and i1 %guard.cond.24, %guard.cond.23
+ %guard.cond.26 = and i1 %guard.cond.25, %guard.cond.24
+ %guard.cond.27 = and i1 %guard.cond.26, %guard.cond.25
+ %guard.cond.28 = and i1 %guard.cond.27, %guard.cond.26
+ %guard.cond.29 = and i1 %guard.cond.28, %guard.cond.27
+ %guard.cond.30 = and i1 %guard.cond.29, %guard.cond.28
+ %guard.cond.31 = and i1 %guard.cond.30, %guard.cond.29
+ %guard.cond.32 = and i1 %guard.cond.31, %guard.cond.30
+ %guard.cond.33 = and i1 %guard.cond.32, %guard.cond.31
+ %guard.cond.34 = and i1 %guard.cond.33, %guard.cond.32
+ %guard.cond.35 = and i1 %guard.cond.34, %guard.cond.33
+ %guard.cond.36 = and i1 %guard.cond.35, %guard.cond.34
+ %guard.cond.37 = and i1 %guard.cond.36, %guard.cond.35
+ %guard.cond.38 = and i1 %guard.cond.37, %guard.cond.36
+ %guard.cond.39 = and i1 %guard.cond.38, %guard.cond.37
+ %guard.cond.40 = and i1 %guard.cond.39, %guard.cond.38
+ %guard.cond.41 = and i1 %guard.cond.40, %guard.cond.39
+ %guard.cond.42 = and i1 %guard.cond.41, %guard.cond.40
+ %guard.cond.43 = and i1 %guard.cond.42, %guard.cond.41
+ %guard.cond.44 = and i1 %guard.cond.43, %guard.cond.42
+ %guard.cond.45 = and i1 %guard.cond.44, %guard.cond.43
+ %guard.cond.46 = and i1 %guard.cond.45, %guard.cond.44
+ %guard.cond.47 = and i1 %guard.cond.46, %guard.cond.45
+ %guard.cond.48 = and i1 %guard.cond.47, %guard.cond.46
+ %guard.cond.49 = and i1 %guard.cond.48, %guard.cond.47
+ %guard.cond.50 = and i1 %guard.cond.49, %guard.cond.48
+ %guard.cond.51 = and i1 %guard.cond.50, %guard.cond.49
+ %guard.cond.52 = and i1 %guard.cond.51, %guard.cond.50
+ %guard.cond.53 = and i1 %guard.cond.52, %guard.cond.51
+ %guard.cond.54 = and i1 %guard.cond.53, %guard.cond.52
+ %guard.cond.55 = and i1 %guard.cond.54, %guard.cond.53
+ %guard.cond.56 = and i1 %guard.cond.55, %guard.cond.54
+ %guard.cond.57 = and i1 %guard.cond.56, %guard.cond.55
+ %guard.cond.58 = and i1 %guard.cond.57, %guard.cond.56
+ %guard.cond.59 = and i1 %guard.cond.58, %guard.cond.57
+ %guard.cond.60 = and i1 %guard.cond.59, %guard.cond.58
+ %guard.cond.61 = and i1 %guard.cond.60, %guard.cond.59
+ %guard.cond.62 = and i1 %guard.cond.61, %guard.cond.60
+ %guard.cond.63 = and i1 %guard.cond.62, %guard.cond.61
+ %guard.cond.64 = and i1 %guard.cond.63, %guard.cond.62
+ %guard.cond.65 = and i1 %guard.cond.64, %guard.cond.63
+ %guard.cond.66 = and i1 %guard.cond.65, %guard.cond.64
+ %guard.cond.67 = and i1 %guard.cond.66, %guard.cond.65
+ %guard.cond.68 = and i1 %guard.cond.67, %guard.cond.66
+ %guard.cond.69 = and i1 %guard.cond.68, %guard.cond.67
+ %guard.cond.70 = and i1 %guard.cond.69, %guard.cond.68
+ %guard.cond.71 = and i1 %guard.cond.70, %guard.cond.69
+ %guard.cond.72 = and i1 %guard.cond.71, %guard.cond.70
+ %guard.cond.73 = and i1 %guard.cond.72, %guard.cond.71
+ %guard.cond.74 = and i1 %guard.cond.73, %guard.cond.72
+ %guard.cond.75 = and i1 %guard.cond.74, %guard.cond.73
+ %guard.cond.76 = and i1 %guard.cond.75, %guard.cond.74
+ %guard.cond.77 = and i1 %guard.cond.76, %guard.cond.75
+ %guard.cond.78 = and i1 %guard.cond.77, %guard.cond.76
+ %guard.cond.79 = and i1 %guard.cond.78, %guard.cond.77
+ %guard.cond.80 = and i1 %guard.cond.79, %guard.cond.78
+ %guard.cond.81 = and i1 %guard.cond.80, %guard.cond.79
+ %guard.cond.82 = and i1 %guard.cond.81, %guard.cond.80
+ %guard.cond.83 = and i1 %guard.cond.82, %guard.cond.81
+ %guard.cond.84 = and i1 %guard.cond.83, %guard.cond.82
+ %guard.cond.85 = and i1 %guard.cond.84, %guard.cond.83
+ %guard.cond.86 = and i1 %guard.cond.85, %guard.cond.84
+ %guard.cond.87 = and i1 %guard.cond.86, %guard.cond.85
+ %guard.cond.88 = and i1 %guard.cond.87, %guard.cond.86
+ %guard.cond.89 = and i1 %guard.cond.88, %guard.cond.87
+ %guard.cond.90 = and i1 %guard.cond.89, %guard.cond.88
+ %guard.cond.91 = and i1 %guard.cond.90, %guard.cond.89
+ %guard.cond.92 = and i1 %guard.cond.91, %guard.cond.90
+ %guard.cond.93 = and i1 %guard.cond.92, %guard.cond.91
+ %guard.cond.94 = and i1 %guard.cond.93, %guard.cond.92
+ %guard.cond.95 = and i1 %guard.cond.94, %guard.cond.93
+ %guard.cond.96 = and i1 %guard.cond.95, %guard.cond.94
+ %guard.cond.97 = and i1 %guard.cond.96, %guard.cond.95
+ %guard.cond.98 = and i1 %guard.cond.97, %guard.cond.96
+ %guard.cond.99 = and i1 %guard.cond.98, %guard.cond.97
+ call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond.99, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
\ No newline at end of file