// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
- LicmMssaOptCap, LicmMssaNoAccForPromotionCap};
+ LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*IsSink=*/true};
if (L->hasDedicatedExits())
Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
+ Flags.IsSink = false;
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
// Could do better here, but this is conservatively correct.
// TODO: Cache set of Uses on the first walk in runOnLoop, update when
// moving accesses. Can also extend to dominating uses.
+ auto *SIMD = MSSA->getMemoryAccess(SI);
for (auto *BB : CurLoop->getBlocks())
if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
for (const auto &MA : *Accesses)
if (!MSSA->isLiveOnEntryDef(MD) &&
CurLoop->contains(MD->getBlock()))
return false;
+ // Disable hoisting past potentially interfering loads. Optimized
+ // Uses may point to an access outside the loop, as getClobbering
+ // checks the previous iteration when walking the backedge.
+ // FIXME: More precise: no Uses that alias SI.
+ if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
+ return false;
} else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
(void)LI; // Silence warning.
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop,
SinkAndHoistLICMFlags &Flags) {
- MemoryAccess *Source;
- // See declaration of SetLicmMssaOptCap for usage details.
- if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
- Source = MU->getDefiningAccess();
- else {
- Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
- Flags.LicmMssaOptCounter++;
+ // For hoisting, use the walker to determine safety
+ if (!Flags.IsSink) {
+ MemoryAccess *Source;
+ // See declaration of SetLicmMssaOptCap for usage details.
+ if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
+ Source = MU->getDefiningAccess();
+ else {
+ Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
+ Flags.LicmMssaOptCounter++;
+ }
+ return !MSSA->isLiveOnEntryDef(Source) &&
+ CurLoop->contains(Source->getBlock());
}
- return !MSSA->isLiveOnEntryDef(Source) &&
- CurLoop->contains(Source->getBlock());
+
+ // For sinking, we'd need to check all Defs below this use. The getClobbering
+ // call will look on the backedge of the loop, but will check aliasing with
+ // the instructions on the previous iteration.
+ // For example:
+ // for (i ... )
+ // load a[i] ( Use (LoE)
+ // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
+ // i++;
+ // The load sees no clobbering inside the loop, as the backedge alias check
+ // does phi translation, and will check aliasing against store a[i-1].
+ // However sinking the load outside the loop, below the store is incorrect.
+
+ // For now, only sink if there are no Defs in the loop, and the existing ones
+ // precede the use and are in the same block.
+ // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
+ // needs PostDominatorTreeAnalysis.
+ // FIXME: More precise: no Defs that alias this Use.
+ if (Flags.NoOfMemAccTooLarge)
+ return true;
+ for (auto *BB : CurLoop->getBlocks())
+ if (auto *Accesses = MSSA->getBlockDefs(BB))
+ for (const auto &MA : *Accesses)
+ if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+ if (MU->getBlock() != MD->getBlock() ||
+ !MSSA->locallyDominates(MD, MU))
+ return true;
+ return false;
}
/// Little predicate that returns true if the specified basic block is in
--- /dev/null
+; RUN: opt -loop-rotate -licm %s -disable-output -enable-mssa-loop-dependency=true -debug-only=licm 2>&1 | FileCheck %s -check-prefix=LICM
+; RUN: opt -loop-rotate -licm %s -disable-output -enable-mssa-loop-dependency=false -debug-only=licm 2>&1 | FileCheck %s -check-prefix=LICM
+; RUN: opt -loop-rotate -licm %s -S -enable-mssa-loop-dependency=true | FileCheck %s
+; RUN: opt -loop-rotate -licm %s -S -enable-mssa-loop-dependency=false | FileCheck %s
+
+; LICM: Using
+; LICM-NOT: LICM sinking instruction: %.pre = load i8, i8* %arrayidx.phi.trans.insert
+
+; CHECK-LABEL: @fn1
+; CHECK-LABEL: entry:
+; CHECK: br i1 true, label %[[END:.*]], label %[[PH:.*]]
+; CHECK: [[PH]]:
+; CHECK: br label %[[CRIT:.*]]
+; CHECK: [[CRIT]]:
+; CHECK: load i8
+; CHECK: store i8
+; CHECK: br i1 true, label %[[ENDCRIT:.*]], label %[[CRIT]]
+; CHECK: [[ENDCRIT]]:
+; CHECK-NOT: load i8
+; CHECK: br label %[[END]]
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @fn1() {
+entry:
+ %g = alloca [9 x i8], align 1
+ br label %for.body
+
+for.body: ; preds = %for.body.for.body_crit_edge, %entry
+ %0 = phi i64 [ 0, %entry ], [ %phitmp, %for.body.for.body_crit_edge ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
+ %arrayidx = getelementptr inbounds [9 x i8], [9 x i8]* %g, i64 0, i64 %indvars.iv
+ store i8 2, i8* %arrayidx, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br i1 undef, label %for.end18, label %for.body.for.body_crit_edge
+
+for.body.for.body_crit_edge: ; preds = %for.body
+ %arrayidx.phi.trans.insert = getelementptr inbounds [9 x i8], [9 x i8]* %g, i64 0, i64 %indvars.iv.next
+ %.pre = load i8, i8* %arrayidx.phi.trans.insert, align 1
+ %phitmp = zext i8 %.pre to i64
+ br label %for.body
+
+for.end18: ; preds = %for.body
+ store i64 %0, i64* undef, align 8
+ ret void
+}
+