[Guards] Introduce loop-predication pass

author Artur Pilipenko <apilipenko@azulsystems.com>

Wed, 25 Jan 2017 16:00:44 +0000 (16:00 +0000)

committer Artur Pilipenko <apilipenko@azulsystems.com>

Wed, 25 Jan 2017 16:00:44 +0000 (16:00 +0000)
author Artur Pilipenko <apilipenko@azulsystems.com>
Wed, 25 Jan 2017 16:00:44 +0000 (16:00 +0000)
committer Artur Pilipenko <apilipenko@azulsystems.com>
Wed, 25 Jan 2017 16:00:44 +0000 (16:00 +0000)
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h

index a34ebaf18a03032edee8aeb09c67c5b1950e320f..c59dfee9693899cbc913c43454a7ccf1c85b1d31 100644 (file)
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -199,6 +199,7 @@ void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&);
  void initializeLoopInterchangePass(PassRegistry &);
  void initializeLoopLoadEliminationPass(PassRegistry&);
  void initializeLoopPassPass(PassRegistry&);
+void initializeLoopPredicationLegacyPassPass(PassRegistry&);
  void initializeLoopRerollPass(PassRegistry&);
  void initializeLoopRotateLegacyPassPass(PassRegistry&);
  void initializeLoopSimplifyCFGLegacyPassPass(PassRegistry&);
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h

index e50137f8e02e24459f37b69ae280a3c74e5314a0..98dac1256e01ed6aa19f63c2a57fe92747524ba5 100644 (file)
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -116,6 +116,7 @@ namespace {
        (void) llvm::createLazyValueInfoPass();
        (void) llvm::createLoopExtractorPass();
        (void) llvm::createLoopInterchangePass();
+      (void) llvm::createLoopPredicationPass();
        (void) llvm::createLoopSimplifyPass();
        (void) llvm::createLoopSimplifyCFGPass();
        (void) llvm::createLoopStrengthReducePass();
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h

index 92558937d04764bfc944200f46b02b8a68b2203b..d4215412b149577e695cc3a6c4089ed826c604f6 100644 (file)
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -145,6 +145,12 @@ Pass *createLICMPass();
  //
  Pass *createLoopSinkPass();
  
+//===----------------------------------------------------------------------===//
+//
+// LoopPredication - This pass does loop predication on guards.
+//
+Pass *createLoopPredicationPass();
+
  //===----------------------------------------------------------------------===//
  //
  // LoopInterchange - This pass interchanges loops to provide a more
diff --git a/include/llvm/Transforms/Scalar/LoopPredication.h b/include/llvm/Transforms/Scalar/LoopPredication.h

new file mode 100644 (file)

index 0000000..57398bd
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/LoopPredication.h
@@ -0,0 +1,32 @@
+//===- LoopPredication.h - Guard based loop predication pass ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to convert loop variant range checks to loop invariant by
+// widening checks across loop iterations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+/// Performs Loop Predication Pass.
+class LoopPredicationPass : public PassInfoMixin<LoopPredicationPass> {
+public:
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp

index 7d00096df4deefdfff87e032c388a342de0d7253..e26a8e2f1ed670818cbccb55443ffc596e760beb 100644 (file)
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -105,6 +105,7 @@
  #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
  #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
  #include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Scalar/LoopPredication.h"
  #include "llvm/Transforms/Scalar/LoopRotation.h"
  #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
  #include "llvm/Transforms/Scalar/LoopSink.h"
diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def

index d93fbdb2c722cc8027a64ef5a5861f80ae60360a..bfa1868692d586c902c646d5d961d6b543a70382 100644 (file)
--- a/lib/Passes/PassRegistry.def
+++ b/lib/Passes/PassRegistry.def
@@ -227,4 +227,5 @@ LOOP_PASS("indvars", IndVarSimplifyPass())
  LOOP_PASS("unroll", LoopUnrollPass())
  LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
  LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
+LOOP_PASS("loop-predication", LoopPredicationPass())
  #undef LOOP_PASS
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt

index 06d3d6a73954eeb9b1b256d5ebbcddb388856edf..b2579a813e62bf39f708e8a32926ee1df2094d88 100644 (file)
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_library(LLVMScalarOpts
    LoopInterchange.cpp
    LoopLoadElimination.cpp
    LoopPassManager.cpp
+  LoopPredication.cpp
    LoopRerollPass.cpp
    LoopRotation.cpp
    LoopSimplifyCFG.cpp
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp

new file mode 100644 (file)

index 0000000..4f68314
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -0,0 +1,280 @@
+//===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LoopPredication pass tries to convert loop variant range checks to loop
+// invariant by widening checks across loop iterations. For example, it will
+// convert
+//
+//   for (i = 0; i < n; i++) {
+//     guard(i < len);
+//     ...
+//   }
+//
+// to
+//
+//   for (i = 0; i < n; i++) {
+//     guard(n - 1 < len);
+//     ...
+//   }
+//
+// After this transformation the condition of the guard is loop invariant, so
+// loop-unswitch can later unswitch the loop by this condition which basically
+// predicates the loop by the widened condition:
+//
+//   if (n - 1 < len)
+//     for (i = 0; i < n; i++) {
+//       ...
+//     }
+//   else
+//     deoptimize
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopPredication.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "loop-predication"
+
+using namespace llvm;
+
+namespace {
+class LoopPredication {
+  ScalarEvolution *SE;
+
+  Loop *L;
+  const DataLayout *DL;
+  BasicBlock *Preheader;
+
+  Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
+                                        IRBuilder<> &Builder);
+  bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
+
+public:
+  LoopPredication(ScalarEvolution *SE) : SE(SE){};
+  bool runOnLoop(Loop *L);
+};
+
+class LoopPredicationLegacyPass : public LoopPass {
+public:
+  static char ID;
+  LoopPredicationLegacyPass() : LoopPass(ID) {
+    initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    getLoopAnalysisUsage(AU);
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+    if (skipLoop(L))
+      return false;
+    auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+    LoopPredication LP(SE);
+    return LP.runOnLoop(L);
+  }
+};
+
+char LoopPredicationLegacyPass::ID = 0;
+} // end namespace llvm
+
+INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication",
+                      "Loop predication", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication",
+                    "Loop predication", false, false)
+
+Pass *llvm::createLoopPredicationPass() {
+  return new LoopPredicationLegacyPass();
+}
+
+PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
+                                           LoopStandardAnalysisResults &AR,
+                                           LPMUpdater &U) {
+  LoopPredication LP(&AR.SE);
+  if (!LP.runOnLoop(&L))
+    return PreservedAnalyses::all();
+
+  return getLoopPassPreservedAnalyses();
+}
+
+/// If ICI can be widened to a loop invariant condition emits the loop
+/// invariant condition in the loop preheader and return it, otherwise
+/// returns None.
+Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
+                                                       SCEVExpander &Expander,
+                                                       IRBuilder<> &Builder) {
+  DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
+  DEBUG(ICI->dump());
+
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+  Value *LHS = ICI->getOperand(0);
+  Value *RHS = ICI->getOperand(1);
+  const SCEV *LHSS = SE->getSCEV(LHS);
+  if (isa<SCEVCouldNotCompute>(LHSS))
+    return None;
+  const SCEV *RHSS = SE->getSCEV(RHS);
+  if (isa<SCEVCouldNotCompute>(RHSS))
+    return None;
+
+  // Canonicalize RHS to be loop invariant bound, LHS - a loop computable index
+  if (SE->isLoopInvariant(LHSS, L)) {
+    std::swap(LHS, RHS);
+    std::swap(LHSS, RHSS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+  if (!SE->isLoopInvariant(RHSS, L))
+    return None;
+
+  Value *Bound = RHS;
+  const SCEVAddRecExpr *IndexAR = dyn_cast<SCEVAddRecExpr>(LHSS);
+  if (!IndexAR || IndexAR->getLoop() != L)
+    return None;
+
+  DEBUG(dbgs() << "IndexAR: ");
+  DEBUG(IndexAR->dump());
+
+  bool IsIncreasing = false;
+  if (!SE->isMonotonicPredicate(IndexAR, Pred, IsIncreasing))
+    return None;
+
+  // If the predicate is increasing the condition can change from false to true
+  // as the loop progresses, in this case take the value on the first iteration
+  // for the widened check. Otherwise the condition can change from true to
+  // false as the loop progresses, so take the value on the last iteration.
+  const SCEV *NewLHSS = IsIncreasing
+                            ? IndexAR->getStart()
+                            : SE->getSCEVAtScope(IndexAR, L->getParentLoop());
+  if (NewLHSS == IndexAR) {
+    DEBUG(dbgs() << "Can't compute NewLHSS!");
+    return None;
+  }
+
+  DEBUG(dbgs() << "NewLHSS: ");
+  DEBUG(NewLHSS->dump());
+
+  if (!SE->isLoopInvariant(NewLHSS, L) || !isSafeToExpand(NewLHSS, *SE))
+    return None;
+
+  DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n");
+
+  Value *NewLHS = Expander.expandCodeFor(NewLHSS, Bound->getType(),
+                                         Preheader->getTerminator());
+  return Builder.CreateICmp(Pred, NewLHS, Bound);
+}
+
+bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
+                                           SCEVExpander &Expander) {
+  DEBUG(dbgs() << "Processing guard:\n");
+  DEBUG(Guard->dump());
+
+  IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator()));
+
+  // The guard condition is expected to be in form of:
+  //   cond1 && cond2 && cond3 ...
+  // Iterate over subconditions looking for for icmp conditions which can be
+  // widened across loop iterations. Widening these conditions remember the
+  // resulting list of subconditions in Checks vector.
+  SmallVector<Value *, 4> Worklist(1, Guard->getOperand(0));
+  SmallPtrSet<Value *, 4> Visited;
+
+  SmallVector<Value *, 4> Checks;
+
+  unsigned NumWidened = 0;
+  do {
+    Value *Condition = Worklist.pop_back_val();
+    if (!Visited.insert(Condition).second)
+      continue;
+
+    Value *LHS, *RHS;
+    using namespace llvm::PatternMatch;
+    if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) {
+      Worklist.push_back(LHS);
+      Worklist.push_back(RHS);
+      continue;
+    }
+
+    if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
+      if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Builder)) {
+        Checks.push_back(NewRangeCheck.getValue());
+        NumWidened++;
+        continue;
+      }
+    }
+
+    // Save the condition as is if we can't widen it
+    Checks.push_back(Condition);
+  } while (Worklist.size() != 0);
+
+  if (NumWidened == 0)
+    return false;
+
+  // Emit the new guard condition
+  Builder.SetInsertPoint(Guard);
+  Value *LastCheck = nullptr;
+  for (auto *Check : Checks)
+    if (!LastCheck)
+      LastCheck = Check;
+    else
+      LastCheck = Builder.CreateAnd(LastCheck, Check);
+  Guard->setOperand(0, LastCheck);
+
+  DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+  return true;
+}
+
+bool LoopPredication::runOnLoop(Loop *Loop) {
+  L = Loop;
+
+  DEBUG(dbgs() << "Analyzing ");
+  DEBUG(L->dump());
+
+  Module *M = L->getHeader()->getModule();
+
+  // There is nothing to do if the module doesn't use guards
+  auto *GuardDecl =
+      M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
+  if (!GuardDecl || GuardDecl->use_empty())
+    return false;
+
+  DL = &M->getDataLayout();
+
+  Preheader = L->getLoopPreheader();
+  if (!Preheader)
+    return false;
+
+  // Collect all the guards into a vector and process later, so as not
+  // to invalidate the instruction iterator.
+  SmallVector<IntrinsicInst *, 4> Guards;
+  for (const auto BB : L->blocks())
+    for (auto &I : *BB)
+      if (auto *II = dyn_cast<IntrinsicInst>(&I))
+        if (II->getIntrinsicID() == Intrinsic::experimental_guard)
+          Guards.push_back(II);
+
+  SCEVExpander Expander(*SE, *DL, "loop-predication");
+
+  bool Changed = false;
+  for (auto *Guard : Guards)
+    Changed |= widenGuardConditions(Guard, Expander);
+
+  return Changed;
+}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp

index afe7483006ae6439c9ecae92e2b8c627fa68d7d2..f2727b088818c1ae18b38fe2361aa40b87031756 100644 (file)
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -58,6 +58,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
    initializeLoopAccessLegacyAnalysisPass(Registry);
    initializeLoopInstSimplifyLegacyPassPass(Registry);
    initializeLoopInterchangePass(Registry);
+  initializeLoopPredicationLegacyPassPass(Registry);
    initializeLoopRotateLegacyPassPass(Registry);
    initializeLoopStrengthReducePass(Registry);
    initializeLoopRerollPass(Registry);
diff --git a/test/Transforms/LoopPredication/basic.ll b/test/Transforms/LoopPredication/basic.ll

new file mode 100644 (file)

index 0000000..a347e61
--- /dev/null
+++ b/test/Transforms/LoopPredication/basic.ll
@@ -0,0 +1,496 @@
+; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes='require<scalar-evolution>,loop(loop-predication)' < %s 2>&1 | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define i32 @unsigned_loop_0_to_n_ult_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @unsigned_loop_0_to_n_ult_check
+entry:
+  %tmp5 = icmp eq i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds = icmp ult i32 %i, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc, %array.i
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ult i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+define i32 @unsigned_loop_0_to_n_ugt_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @unsigned_loop_0_to_n_ugt_check
+entry:
+  %tmp5 = icmp eq i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds = icmp ugt i32 %length, %i
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc, %array.i
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ult i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+
+define i32 @two_range_checks(i32* %array.1, i32 %length.1,
+                             i32* %array.2, i32 %length.2, i32 %n) {
+; CHECK-LABEL: @two_range_checks
+entry:
+  %tmp5 = icmp eq i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}}
+; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}}
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds.1 = icmp ult i32 %i, %length.1
+  %within.bounds.2 = icmp ult i32 %i, %length.2
+  %within.bounds = and i1 %within.bounds.1, %within.bounds.2
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
+  %array.1.i = load i32, i32* %array.1.i.ptr, align 4
+  %loop.acc.1 = add i32 %loop.acc, %array.1.i
+
+  %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
+  %array.2.i = load i32, i32* %array.2.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc.1, %array.2.i
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ult i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+define i32 @three_range_checks(i32* %array.1, i32 %length.1,
+                               i32* %array.2, i32 %length.2,
+                               i32* %array.3, i32 %length.3, i32 %n) {
+; CHECK-LABEL: @three_range_checks
+entry:
+  %tmp5 = icmp eq i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
+; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
+; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]]
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds.1 = icmp ult i32 %i, %length.1
+  %within.bounds.2 = icmp ult i32 %i, %length.2
+  %within.bounds.3 = icmp ult i32 %i, %length.3
+  %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2
+  %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
+  %array.1.i = load i32, i32* %array.1.i.ptr, align 4
+  %loop.acc.1 = add i32 %loop.acc, %array.1.i
+
+  %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
+  %array.2.i = load i32, i32* %array.2.i.ptr, align 4
+  %loop.acc.2 = add i32 %loop.acc.1, %array.2.i
+
+  %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64
+  %array.3.i = load i32, i32* %array.3.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc.2, %array.3.i
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ult i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+define i32 @three_guards(i32* %array.1, i32 %length.1,
+                         i32* %array.2, i32 %length.2,
+                         i32* %array.3, i32 %length.3, i32 %n) {
+; CHECK-LABEL: @three_guards
+entry:
+  %tmp5 = icmp eq i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.1
+; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.2
+; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.3
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ]
+
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+
+  %within.bounds.1 = icmp ult i32 %i, %length.1
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
+  %array.1.i = load i32, i32* %array.1.i.ptr, align 4
+  %loop.acc.1 = add i32 %loop.acc, %array.1.i
+
+  %within.bounds.2 = icmp ult i32 %i, %length.2
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ]
+
+  %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
+  %array.2.i = load i32, i32* %array.2.i.ptr, align 4
+  %loop.acc.2 = add i32 %loop.acc.1, %array.2.i
+
+  %within.bounds.3 = icmp ult i32 %i, %length.3
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ]
+
+  %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64
+  %array.3.i = load i32, i32* %array.3.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc.2, %array.3.i
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ult i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+define i32 @signed_loop_start_to_n_sge_0_check(i32* %array, i32 %length, i32 %start, i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_sge_0_check
+entry:
+  %tmp5 = icmp eq i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp sge i32 %start, 0
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
+  %within.bounds = icmp sge i32 %i, 0
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc, %array.i
+
+  %i.next = add nsw i32 %i, 1
+  %continue = icmp slt i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+define i32 @signed_loop_start_to_n_upper_slt_length_check(i32* %array, i32 %length, i32 %start, i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_upper_slt_length_check
+entry:
+  %tmp5 = icmp sle i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[start_1:[^ ]+]] = add i32 %start, 1
+; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]]
+; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]]
+; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_index]], %length
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
+  %within.bounds = icmp slt i32 %i, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc, %array.i
+
+  %i.next = add nsw i32 %i, 1
+  %continue = icmp slt i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+define i32 @signed_loop_start_to_n_both_checks(i32* %array, i32 %length, i32 %start, i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_both_checks
+entry:
+  %tmp5 = icmp sle i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[lower_check:[^ ]+]] = icmp sge i32 %start, 0
+; CHECK-NEXT: [[start_1:[^ ]+]] = add i32 %start, 1
+; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]]
+; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]]
+; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1
+; CHECK-NEXT: [[upper_check:[^ ]+]] = icmp slt i32 [[max_index]], %length
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: [[wide_cond:[^ ]+]] = and i1 [[lower_check]], [[upper_check]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
+  %within.bounds.1 = icmp slt i32 %i, %length
+  %within.bounds.2 = icmp sge i32 %i, 0
+  %within.bounds = and i1 %within.bounds.1, %within.bounds.2
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc, %array.i
+
+  %i.next = add nsw i32 %i, 1
+  %continue = icmp slt i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+define i32 @unsigned_loop_0_to_n_unrelated_condition(i32* %array, i32 %length, i32 %n, i32 %x) {
+; CHECK-LABEL: @unsigned_loop_0_to_n_unrelated_condition
+entry:
+  %tmp5 = icmp eq i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %unrelated.cond = icmp ult i32 %x, %length
+; CHECK: [[guard_cond:[^ ]+]] = and i1 %unrelated.cond, [[wide_cond]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[guard_cond]], i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds = icmp ult i32 %i, %length
+  %unrelated.cond = icmp ult i32 %x, %length
+  %guard.cond = and i1 %within.bounds, %unrelated.cond
+  call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc, %array.i
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ult i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+; Don't change the guard condition if there were no widened subconditions
+define i32 @test_no_widened_conditions(i32* %array, i32 %length, i32 %n, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: @test_no_widened_conditions
+entry:
+  %tmp5 = icmp eq i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %unrelated.cond.1 = icmp eq i32 %x1, %i
+; CHECK-NEXT: %unrelated.cond.2 = icmp eq i32 %x2, %i
+; CHECK-NEXT: %unrelated.cond.3 = icmp eq i32 %x3, %i
+; CHECK-NEXT: %unrelated.cond.and.1 = and i1 %unrelated.cond.1, %unrelated.cond.2
+; CHECK-NEXT: %guard.cond = and i1 %unrelated.cond.and.1, %unrelated.cond.3
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %unrelated.cond.1 = icmp eq i32 %x1, %i
+  %unrelated.cond.2 = icmp eq i32 %x2, %i
+  %unrelated.cond.3 = icmp eq i32 %x3, %i
+  %unrelated.cond.and.1 = and i1 %unrelated.cond.1, %unrelated.cond.2
+  %guard.cond = and i1 %unrelated.cond.and.1, %unrelated.cond.3
+
+  call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc, %array.i
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ult i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+define i32 @signed_loop_start_to_n_loop_variant_bound(i32* %array, i32 %x, i32 %start, i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_loop_variant_bound
+entry:
+  %tmp5 = icmp sle i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %bound = add i32 %i, %x
+; CHECK-NEXT: %within.bounds = icmp slt i32 %i, %bound
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
+  %bound = add i32 %i, %x
+  %within.bounds = icmp slt i32 %i, %bound
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc, %array.i
+
+  %i.next = add nsw i32 %i, 1
+  %continue = icmp slt i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+
+define i32 @signed_loop_start_to_n_non_monotonic_predicate(i32* %array, i32 %x, i32 %start, i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_non_monotonic_predicate
+entry:
+  %tmp5 = icmp sle i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %guard.cond = icmp eq i32 %i, %x
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
+  %guard.cond = icmp eq i32 %i, %x
+  call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc, %array.i
+
+  %i.next = add nsw i32 %i, 1
+  %continue = icmp slt i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+\ No newline at end of file
diff --git a/test/Transforms/LoopPredication/nested.ll b/test/Transforms/LoopPredication/nested.ll

new file mode 100644 (file)

index 0000000..6b40cde
--- /dev/null
+++ b/test/Transforms/LoopPredication/nested.ll
@@ -0,0 +1,160 @@
+; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes='require<scalar-evolution>,loop(loop-predication)' < %s 2>&1 | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define i32 @signed_loop_0_to_n_nested_0_to_l_inner_index_check(i32* %array, i32 %length, i32 %n, i32 %l) {
+; CHECK-LABEL: @signed_loop_0_to_n_nested_0_to_l_inner_index_check
+entry:
+  %tmp5 = icmp sle i32 %n, 0
+  br i1 %tmp5, label %exit, label %outer.loop.preheader
+
+outer.loop.preheader:
+; CHECK: outer.loop.preheader:
+; CHECK: [[iteration_count:[^ ]+]] = add i32 %l, -1
+  br label %outer.loop
+
+outer.loop:
+  %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+  %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+  %tmp6 = icmp sle i32 %l, 0
+  br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
+  
+inner.loop.preheader:
+; CHECK: inner.loop.preheader:
+; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length
+  br label %inner.loop
+
+inner.loop:
+; CHECK: inner.loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+  %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
+  %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
+
+  %within.bounds = icmp slt i32 %j, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+  
+  %j.i64 = zext i32 %j to i64
+  %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64
+  %array.j = load i32, i32* %array.j.ptr, align 4
+  %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j
+
+  %j.next = add nsw i32 %j, 1
+  %inner.continue = icmp slt i32 %j.next, %l
+  br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
+
+outer.loop.inc:
+  %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
+  %i.next = add nsw i32 %i, 1
+  %outer.continue = icmp slt i32 %i.next, %n
+  br i1 %outer.continue, label %outer.loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
+  ret i32 %result
+}
+
+define i32 @signed_loop_0_to_n_nested_0_to_l_outer_index_check(i32* %array, i32 %length, i32 %n, i32 %l) {
+; CHECK-LABEL: @signed_loop_0_to_n_nested_0_to_l_outer_index_check
+entry:
+  %tmp5 = icmp sle i32 %n, 0
+  br i1 %tmp5, label %exit, label %outer.loop.preheader
+
+outer.loop.preheader:
+; CHECK: outer.loop.preheader:
+; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1
+; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length
+  br label %outer.loop
+
+outer.loop:
+  %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+  %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+  %tmp6 = icmp sle i32 %l, 0
+  br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
+  
+inner.loop.preheader:
+  br label %inner.loop
+
+inner.loop:
+; CHECK: inner.loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+
+  %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
+  %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
+
+  %within.bounds = icmp slt i32 %i, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+  
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %inner.loop.acc.next = add i32 %inner.loop.acc, %array.i
+
+  %j.next = add nsw i32 %j, 1
+  %inner.continue = icmp slt i32 %j.next, %l
+  br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
+
+outer.loop.inc:
+  %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
+  %i.next = add nsw i32 %i, 1
+  %outer.continue = icmp slt i32 %i.next, %n
+  br i1 %outer.continue, label %outer.loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
+  ret i32 %result
+}
+
+define i32 @signed_loop_0_to_n_nested_i_to_l_inner_index_check(i32* %array, i32 %length, i32 %n, i32 %l) {
+; CHECK-LABEL: @signed_loop_0_to_n_nested_i_to_l_inner_index_check
+entry:
+  %tmp5 = icmp sle i32 %n, 0
+  br i1 %tmp5, label %exit, label %outer.loop.preheader
+
+outer.loop.preheader:
+  br label %outer.loop
+
+outer.loop:
+; CHECK: outer.loop:
+; CHECK: [[i_1:[^ ]+]] = add i32 %i, 1
+; CHECK-NEXT: [[l_sgt_i_1:[^ ]+]] = icmp sgt i32 %l, [[i_1]]
+; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[l_sgt_i_1]], i32 %l, i32 [[i_1]]
+; CHECK-NEXT: [[max_j:[^ ]+]] = add i32 [[smax]], -1
+  %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+  %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+  %tmp6 = icmp sle i32 %l, 0
+  br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
+  
+inner.loop.preheader:
+; CHECK: inner.loop.preheader:
+; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_j]], %length
+  br label %inner.loop
+
+inner.loop:
+; CHECK: inner.loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+  %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
+  %j = phi i32 [ %j.next, %inner.loop ], [ %i, %inner.loop.preheader ]
+
+  %within.bounds = icmp slt i32 %j, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+  
+  %j.i64 = zext i32 %j to i64
+  %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64
+  %array.j = load i32, i32* %array.j.ptr, align 4
+  %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j
+
+  %j.next = add nsw i32 %j, 1
+  %inner.continue = icmp slt i32 %j.next, %l
+  br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
+
+outer.loop.inc:
+  %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
+  %i.next = add nsw i32 %i, 1
+  %outer.continue = icmp slt i32 %i.next, %n
+  br i1 %outer.continue, label %outer.loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
+  ret i32 %result
+}
+\ No newline at end of file
diff --git a/test/Transforms/LoopPredication/visited.ll b/test/Transforms/LoopPredication/visited.ll

new file mode 100644 (file)

index 0000000..e9aae77
--- /dev/null
+++ b/test/Transforms/LoopPredication/visited.ll
@@ -0,0 +1,140 @@
+; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes='require<scalar-evolution>,loop(loop-predication)' < %s 2>&1 | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define i32 @test_visited(i32* %array, i32 %length, i32 %n, i32 %x) {
+; CHECK-LABEL: @test_visited
+entry:
+  %tmp5 = icmp eq i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[iteration_count]], %length
+; CHECK-NEXT: br label %loop
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %unrelated.cond = icmp eq i32 %x, %i
+; CHECK: [[guard_cond:[^ ]+]] = and i1 %unrelated.cond, [[wide_cond]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[guard_cond]], i32 9) [ "deopt"() ]
+  %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds = icmp ult i32 %i, %length
+  %unrelated.cond = icmp eq i32 %x, %i
+  %guard.cond.2 = and i1 %within.bounds, %unrelated.cond
+  %guard.cond.3 = and i1 %guard.cond.2, %unrelated.cond
+  %guard.cond.4 = and i1 %guard.cond.3, %guard.cond.2
+  %guard.cond.5 = and i1 %guard.cond.4, %guard.cond.3
+  %guard.cond.6 = and i1 %guard.cond.5, %guard.cond.4
+  %guard.cond.7 = and i1 %guard.cond.6, %guard.cond.5
+  %guard.cond.8 = and i1 %guard.cond.7, %guard.cond.6
+  %guard.cond.9 = and i1 %guard.cond.8, %guard.cond.7
+  %guard.cond.10 = and i1 %guard.cond.9, %guard.cond.8
+  %guard.cond.11 = and i1 %guard.cond.10, %guard.cond.9
+  %guard.cond.12 = and i1 %guard.cond.11, %guard.cond.10
+  %guard.cond.13 = and i1 %guard.cond.12, %guard.cond.11
+  %guard.cond.14 = and i1 %guard.cond.13, %guard.cond.12
+  %guard.cond.15 = and i1 %guard.cond.14, %guard.cond.13
+  %guard.cond.16 = and i1 %guard.cond.15, %guard.cond.14
+  %guard.cond.17 = and i1 %guard.cond.16, %guard.cond.15
+  %guard.cond.18 = and i1 %guard.cond.17, %guard.cond.16
+  %guard.cond.19 = and i1 %guard.cond.18, %guard.cond.17
+  %guard.cond.20 = and i1 %guard.cond.19, %guard.cond.18
+  %guard.cond.21 = and i1 %guard.cond.20, %guard.cond.19
+  %guard.cond.22 = and i1 %guard.cond.21, %guard.cond.20
+  %guard.cond.23 = and i1 %guard.cond.22, %guard.cond.21
+  %guard.cond.24 = and i1 %guard.cond.23, %guard.cond.22
+  %guard.cond.25 = and i1 %guard.cond.24, %guard.cond.23
+  %guard.cond.26 = and i1 %guard.cond.25, %guard.cond.24
+  %guard.cond.27 = and i1 %guard.cond.26, %guard.cond.25
+  %guard.cond.28 = and i1 %guard.cond.27, %guard.cond.26
+  %guard.cond.29 = and i1 %guard.cond.28, %guard.cond.27
+  %guard.cond.30 = and i1 %guard.cond.29, %guard.cond.28
+  %guard.cond.31 = and i1 %guard.cond.30, %guard.cond.29
+  %guard.cond.32 = and i1 %guard.cond.31, %guard.cond.30
+  %guard.cond.33 = and i1 %guard.cond.32, %guard.cond.31
+  %guard.cond.34 = and i1 %guard.cond.33, %guard.cond.32
+  %guard.cond.35 = and i1 %guard.cond.34, %guard.cond.33
+  %guard.cond.36 = and i1 %guard.cond.35, %guard.cond.34
+  %guard.cond.37 = and i1 %guard.cond.36, %guard.cond.35
+  %guard.cond.38 = and i1 %guard.cond.37, %guard.cond.36
+  %guard.cond.39 = and i1 %guard.cond.38, %guard.cond.37
+  %guard.cond.40 = and i1 %guard.cond.39, %guard.cond.38
+  %guard.cond.41 = and i1 %guard.cond.40, %guard.cond.39
+  %guard.cond.42 = and i1 %guard.cond.41, %guard.cond.40
+  %guard.cond.43 = and i1 %guard.cond.42, %guard.cond.41
+  %guard.cond.44 = and i1 %guard.cond.43, %guard.cond.42
+  %guard.cond.45 = and i1 %guard.cond.44, %guard.cond.43
+  %guard.cond.46 = and i1 %guard.cond.45, %guard.cond.44
+  %guard.cond.47 = and i1 %guard.cond.46, %guard.cond.45
+  %guard.cond.48 = and i1 %guard.cond.47, %guard.cond.46
+  %guard.cond.49 = and i1 %guard.cond.48, %guard.cond.47
+  %guard.cond.50 = and i1 %guard.cond.49, %guard.cond.48
+  %guard.cond.51 = and i1 %guard.cond.50, %guard.cond.49
+  %guard.cond.52 = and i1 %guard.cond.51, %guard.cond.50
+  %guard.cond.53 = and i1 %guard.cond.52, %guard.cond.51
+  %guard.cond.54 = and i1 %guard.cond.53, %guard.cond.52
+  %guard.cond.55 = and i1 %guard.cond.54, %guard.cond.53
+  %guard.cond.56 = and i1 %guard.cond.55, %guard.cond.54
+  %guard.cond.57 = and i1 %guard.cond.56, %guard.cond.55
+  %guard.cond.58 = and i1 %guard.cond.57, %guard.cond.56
+  %guard.cond.59 = and i1 %guard.cond.58, %guard.cond.57
+  %guard.cond.60 = and i1 %guard.cond.59, %guard.cond.58
+  %guard.cond.61 = and i1 %guard.cond.60, %guard.cond.59
+  %guard.cond.62 = and i1 %guard.cond.61, %guard.cond.60
+  %guard.cond.63 = and i1 %guard.cond.62, %guard.cond.61
+  %guard.cond.64 = and i1 %guard.cond.63, %guard.cond.62
+  %guard.cond.65 = and i1 %guard.cond.64, %guard.cond.63
+  %guard.cond.66 = and i1 %guard.cond.65, %guard.cond.64
+  %guard.cond.67 = and i1 %guard.cond.66, %guard.cond.65
+  %guard.cond.68 = and i1 %guard.cond.67, %guard.cond.66
+  %guard.cond.69 = and i1 %guard.cond.68, %guard.cond.67
+  %guard.cond.70 = and i1 %guard.cond.69, %guard.cond.68
+  %guard.cond.71 = and i1 %guard.cond.70, %guard.cond.69
+  %guard.cond.72 = and i1 %guard.cond.71, %guard.cond.70
+  %guard.cond.73 = and i1 %guard.cond.72, %guard.cond.71
+  %guard.cond.74 = and i1 %guard.cond.73, %guard.cond.72
+  %guard.cond.75 = and i1 %guard.cond.74, %guard.cond.73
+  %guard.cond.76 = and i1 %guard.cond.75, %guard.cond.74
+  %guard.cond.77 = and i1 %guard.cond.76, %guard.cond.75
+  %guard.cond.78 = and i1 %guard.cond.77, %guard.cond.76
+  %guard.cond.79 = and i1 %guard.cond.78, %guard.cond.77
+  %guard.cond.80 = and i1 %guard.cond.79, %guard.cond.78
+  %guard.cond.81 = and i1 %guard.cond.80, %guard.cond.79
+  %guard.cond.82 = and i1 %guard.cond.81, %guard.cond.80
+  %guard.cond.83 = and i1 %guard.cond.82, %guard.cond.81
+  %guard.cond.84 = and i1 %guard.cond.83, %guard.cond.82
+  %guard.cond.85 = and i1 %guard.cond.84, %guard.cond.83
+  %guard.cond.86 = and i1 %guard.cond.85, %guard.cond.84
+  %guard.cond.87 = and i1 %guard.cond.86, %guard.cond.85
+  %guard.cond.88 = and i1 %guard.cond.87, %guard.cond.86
+  %guard.cond.89 = and i1 %guard.cond.88, %guard.cond.87
+  %guard.cond.90 = and i1 %guard.cond.89, %guard.cond.88
+  %guard.cond.91 = and i1 %guard.cond.90, %guard.cond.89
+  %guard.cond.92 = and i1 %guard.cond.91, %guard.cond.90
+  %guard.cond.93 = and i1 %guard.cond.92, %guard.cond.91
+  %guard.cond.94 = and i1 %guard.cond.93, %guard.cond.92
+  %guard.cond.95 = and i1 %guard.cond.94, %guard.cond.93
+  %guard.cond.96 = and i1 %guard.cond.95, %guard.cond.94
+  %guard.cond.97 = and i1 %guard.cond.96, %guard.cond.95
+  %guard.cond.98 = and i1 %guard.cond.97, %guard.cond.96
+  %guard.cond.99 = and i1 %guard.cond.98, %guard.cond.97
+  call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond.99, i32 9) [ "deopt"() ]
+
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  %loop.acc.next = add i32 %loop.acc, %array.i
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ult i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+  ret i32 %result
+}
+\ No newline at end of file
author	Artur Pilipenko <apilipenko@azulsystems.com>
	Wed, 25 Jan 2017 16:00:44 +0000 (16:00 +0000)
committer	Artur Pilipenko <apilipenko@azulsystems.com>
	Wed, 25 Jan 2017 16:00:44 +0000 (16:00 +0000)
include/llvm/InitializePasses.h		patch \| blob \| history
include/llvm/LinkAllPasses.h		patch \| blob \| history
include/llvm/Transforms/Scalar.h		patch \| blob \| history
include/llvm/Transforms/Scalar/LoopPredication.h	[new file with mode: 0644]	patch \| blob
lib/Passes/PassBuilder.cpp		patch \| blob \| history
lib/Passes/PassRegistry.def		patch \| blob \| history
lib/Transforms/Scalar/CMakeLists.txt		patch \| blob \| history
lib/Transforms/Scalar/LoopPredication.cpp	[new file with mode: 0644]	patch \| blob
lib/Transforms/Scalar/Scalar.cpp		patch \| blob \| history
test/Transforms/LoopPredication/basic.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/LoopPredication/nested.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/LoopPredication/visited.ll	[new file with mode: 0644]	patch \| blob