From: Anna Thomas Date: Tue, 6 Jun 2017 14:54:01 +0000 (+0000) Subject: [IRCE] Canonicalize pre/post loops after the blocks are added into parent loop X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=958169b1f83d5a1826f202efddc99085e21fb012;p=llvm [IRCE] Canonicalize pre/post loops after the blocks are added into parent loop Summary: We were canonizalizing the pre loop (into loop-simplify form) before the post loop blocks were added into parent loop. This is incorrect when IRCE is done on a subloop. The post-loop blocks are created, but not yet added to the parent loop. So, loop-simplification on the pre-loop incorrectly updates LoopInfo. This patch corrects the ordering so that pre and post loop blocks are added to parent loop (if any), and then the loops are canonicalized to LCSSA and LoopSimplifyForm. Reviewers: reames, sanjoy, apilipenko Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D33846 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304800 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 5017a08ed1e..2f96c3064b8 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -1371,28 +1371,35 @@ bool LoopConstrainer::run() { DT.recalculate(F); + // We need to first add all the pre and post loop blocks into the loop + // structures (as part of createClonedLoopStructure), and then update the + // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating + // LI when LoopSimplifyForm is generated. + Loop *PreL = nullptr, *PostL = nullptr; if (!PreLoop.Blocks.empty()) { - auto *L = createClonedLoopStructure( + PreL = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map); - formLCSSARecursively(*L, DT, &LI, &SE); - simplifyLoop(L, &DT, &LI, &SE, nullptr, true); - // Pre loops are slow paths, we do not need to perform any loop - // optimizations on them. - DisableAllLoopOptsOnLoop(*L); } if (!PostLoop.Blocks.empty()) { - auto *L = createClonedLoopStructure( + PostL = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map); + } + + // This function canonicalizes the loop into Loop-Simplify and LCSSA forms. + auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) { formLCSSARecursively(*L, DT, &LI, &SE); simplifyLoop(L, &DT, &LI, &SE, nullptr, true); - // Post loops are slow paths, we do not need to perform any loop + // Pre/post loops are slow paths, we do not need to perform any loop // optimizations on them. - DisableAllLoopOptsOnLoop(*L); - } - - formLCSSARecursively(OriginalLoop, DT, &LI, &SE); - simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true); + if (!IsOriginalLoop) + DisableAllLoopOptsOnLoop(*L); + }; + if (PreL) + CanonicalizeLoop(PreL, false); + if (PostL) + CanonicalizeLoop(PostL, false); + CanonicalizeLoop(&OriginalLoop, true); return true; } diff --git a/test/Transforms/IRCE/correct-loop-info.ll b/test/Transforms/IRCE/correct-loop-info.ll new file mode 100644 index 00000000000..3c26b47f154 --- /dev/null +++ b/test/Transforms/IRCE/correct-loop-info.ll @@ -0,0 +1,182 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -irce < %s -S | FileCheck %s + +; REQUIRES: asserts + +; IRCE creates the pre and post loop, and invokes the +; canonicalizing these loops to LCSSA and loop-simplfy structure. Make sure that the update to the loopinfo does not +; incorrectly change the header while canonicalizing these pre/post loops. We +; were incorrectly updating LI when the split loop is a subloop as in the case below. +source_filename = "correct-loop-info.ll" + +define void @baz() personality i32* ()* @ham { +; CHECK-LABEL: @baz( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[OUTERHEADER:%.*]] +; CHECK: outerheader: +; CHECK-NEXT: [[TMP:%.*]] = icmp slt i32 undef, 84 +; CHECK-NEXT: br i1 [[TMP]], label [[BB2:%.*]], label [[BB16:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br i1 false, label [[INNERHEADER_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]] +; CHECK: innerheader.preloop.preheader: +; CHECK-NEXT: br label [[INNERHEADER_PRELOOP:%.*]] +; CHECK: mainloop: +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[INDVAR_END:%.*]], -1 +; CHECK-NEXT: br i1 [[TMP0]], label [[INNERHEADER_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: innerheader.preheader: +; CHECK-NEXT: br label [[INNERHEADER:%.*]] +; CHECK: innerheader: +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP6:%.*]], [[BB8:%.*]] ], [ [[TMP4_PRELOOP_COPY:%.*]], [[INNERHEADER_PREHEADER]] ] +; CHECK-NEXT: invoke void @pluto() +; CHECK-NEXT: to label [[BB5:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit.split-lp +; CHECK: bb5: +; CHECK-NEXT: [[TMP6]] = add i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 0 +; CHECK-NEXT: br i1 true, label [[BB8]], label [[EXIT3_LOOPEXIT5:%.*]] +; CHECK: bb8: +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP6]], 84 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP6]], -1 +; CHECK-NEXT: br i1 [[TMP1]], label [[INNERHEADER]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[TMP6_LCSSA:%.*]] = phi i32 [ [[TMP6]], [[BB8]] ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP6_LCSSA]], 84 +; CHECK-NEXT: br i1 [[TMP2]], label [[MAIN_PSEUDO_EXIT]], label [[BB13:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[TMP4_COPY:%.*]] = phi i32 [ [[TMP4_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END1:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: outer_exiting.loopexit: +; CHECK-NEXT: [[LPAD_LOOPEXIT:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label [[OUTER_EXITING:%.*]] +; CHECK: outer_exiting.loopexit.split-lp.loopexit: +; CHECK-NEXT: [[LPAD_LOOPEXIT2:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label %outer_exiting.loopexit.split-lp +; CHECK: outer_exiting.loopexit.split-lp.loopexit.split-lp: +; CHECK-NEXT: %lpad.loopexit.split-lp3 = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label %outer_exiting.loopexit.split-lp +; CHECK: outer_exiting.loopexit.split-lp: +; CHECK-NEXT: br label [[OUTER_EXITING]] +; CHECK: outer_exiting: +; CHECK-NEXT: switch i32 undef, label [[EXIT2:%.*]] [ +; CHECK-NEXT: i32 142, label [[BB14:%.*]] +; CHECK-NEXT: i32 448, label [[EXIT:%.*]] +; CHECK-NEXT: ] +; CHECK: exit3.loopexit: +; CHECK-NEXT: br label [[EXIT3:%.*]] +; CHECK: exit3.loopexit4: +; CHECK-NEXT: br label [[EXIT3]] +; CHECK: exit3.loopexit5: +; CHECK-NEXT: br label [[EXIT3]] +; CHECK: exit3: +; CHECK-NEXT: ret void +; CHECK: bb13.loopexit: +; CHECK-NEXT: br label [[BB13]] +; CHECK: bb13: +; CHECK-NEXT: unreachable +; CHECK: bb14: +; CHECK-NEXT: br label [[OUTERHEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: bb16: +; CHECK-NEXT: ret void +; CHECK: exit2: +; CHECK-NEXT: ret void +; CHECK: innerheader.preloop: +; CHECK-NEXT: [[TMP4_PRELOOP:%.*]] = phi i32 [ [[TMP6_PRELOOP:%.*]], [[BB8_PRELOOP:%.*]] ], [ undef, [[INNERHEADER_PRELOOP_PREHEADER]] ] +; CHECK-NEXT: invoke void @pluto() +; CHECK-NEXT: to label [[BB5_PRELOOP:%.*]] unwind label [[OUTER_EXITING_LOOPEXIT:%.*]] +; CHECK: bb5.preloop: +; CHECK-NEXT: [[TMP6_PRELOOP]] = add i32 [[TMP4_PRELOOP]], 1 +; CHECK-NEXT: [[TMP7_PRELOOP:%.*]] = icmp ult i32 [[TMP6_PRELOOP]], 0 +; CHECK-NEXT: br i1 [[TMP7_PRELOOP]], label [[BB8_PRELOOP]], label [[EXIT3_LOOPEXIT:%.*]] +; CHECK: bb8.preloop: +; CHECK-NEXT: [[TMP9_PRELOOP:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], 84 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], -1 +; CHECK-NEXT: br i1 [[TMP3]], label [[INNERHEADER_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop !0, !irce.loop.clone !5 +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[TMP6_PRELOOP_LCSSA:%.*]] = phi i32 [ [[TMP6_PRELOOP]], [[BB8_PRELOOP]] ] +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP6_PRELOOP_LCSSA]], 84 +; CHECK-NEXT: br i1 [[TMP4]], label [[PRELOOP_PSEUDO_EXIT]], label [[BB13]] +; CHECK: preloop.pseudo.exit: +; CHECK-NEXT: [[TMP4_PRELOOP_COPY]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[MAINLOOP]] +; CHECK: postloop: +; CHECK-NEXT: br label [[INNERHEADER_POSTLOOP:%.*]] +; CHECK: innerheader.postloop: +; CHECK-NEXT: [[TMP4_POSTLOOP:%.*]] = phi i32 [ [[TMP6_POSTLOOP:%.*]], [[BB8_POSTLOOP:%.*]] ], [ [[TMP4_COPY]], [[POSTLOOP]] ] +; CHECK-NEXT: invoke void @pluto() +; CHECK-NEXT: to label [[BB5_POSTLOOP:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit +; CHECK: bb5.postloop: +; CHECK-NEXT: [[TMP6_POSTLOOP]] = add i32 [[TMP4_POSTLOOP]], 1 +; CHECK-NEXT: [[TMP7_POSTLOOP:%.*]] = icmp ult i32 [[TMP6_POSTLOOP]], 0 +; CHECK-NEXT: br i1 [[TMP7_POSTLOOP]], label [[BB8_POSTLOOP]], label [[EXIT3_LOOPEXIT4:%.*]] +; CHECK: bb8.postloop: +; CHECK-NEXT: [[TMP9_POSTLOOP:%.*]] = icmp slt i32 [[TMP6_POSTLOOP]], 84 +; CHECK-NEXT: br i1 [[TMP9_POSTLOOP]], label [[INNERHEADER_POSTLOOP]], label [[BB13_LOOPEXIT:%.*]], !llvm.loop !6, !irce.loop.clone !5 +; +bb: + br label %outerheader + +outerheader: ; preds = %bb14, %bb + %tmp = icmp slt i32 undef, 84 + br i1 %tmp, label %bb2, label %bb16 + +bb2: ; preds = %outerheader + br label %innerheader + +innerheader: ; preds = %bb8, %bb2 + %tmp4 = phi i32 [ %tmp6, %bb8 ], [ undef, %bb2 ] + invoke void @pluto() + to label %bb5 unwind label %outer_exiting + +bb5: ; preds = %innerheader + %tmp6 = add i32 %tmp4, 1 + %tmp7 = icmp ult i32 %tmp6, 0 + br i1 %tmp7, label %bb8, label %exit3 + +bb8: ; preds = %bb5 + %tmp9 = icmp slt i32 %tmp6, 84 + br i1 %tmp9, label %innerheader, label %bb13 + +outer_exiting: ; preds = %innerheader + %tmp11 = landingpad { i8*, i32 } + cleanup + switch i32 undef, label %exit2 [ + i32 142, label %bb14 + i32 448, label %exit + ] + +exit3: ; preds = %bb5 + ret void + +bb13: ; preds = %bb8 + unreachable + +bb14: ; preds = %outer_exiting + br label %outerheader + +exit: ; preds = %outer_exiting + ret void + +bb16: ; preds = %outerheader + ret void + +exit2: ; preds = %outer_exiting + ret void +} + +declare i32* @ham() + +declare void @pluto() + +!0 = distinct !{!0, !1, !2, !3, !4} +!1 = !{!"llvm.loop.unroll.disable"} +!2 = !{!"llvm.loop.vectorize.enable", i1 false} +!3 = !{!"llvm.loop.licm_versioning.disable"} +!4 = !{!"llvm.loop.distribute.enable", i1 false} +!5 = !{} +!6 = distinct !{!6, !1, !2, !3, !4}