From: Hans Wennborg Date: Tue, 10 May 2016 21:45:55 +0000 (+0000) Subject: Loop unroller: set thresholds for optsize and minsize functions to zero X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9ee5a28c8c359314e4019b6f09ba668d2f4b3136;p=llvm Loop unroller: set thresholds for optsize and minsize functions to zero Before r268509, Clang would disable the loop unroll pass when optimizing for size. That commit enabled it to be able to support unroll pragmas in -Os builds. However, this regressed binary size in one of Chromium's DLLs with ~100 KB. This restores the original behaviour of no unrolling at -Os, but doing it in LLVM instead of Clang makes more sense, and also allows the pragmas to keep working. Differential revision: http://reviews.llvm.org/D20115 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@269124 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 2e5c55e73a4..95ec12d5cb8 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -111,9 +111,9 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( UP.Threshold = 150; UP.PercentDynamicCostSavedThreshold = 20; UP.DynamicCostSavingsDiscount = 2000; - UP.OptSizeThreshold = 50; + UP.OptSizeThreshold = 0; UP.PartialThreshold = UP.Threshold; - UP.PartialOptSizeThreshold = UP.OptSizeThreshold; + UP.PartialOptSizeThreshold = 0; UP.Count = 0; UP.MaxCount = UINT_MAX; UP.FullUnrollMaxCount = UINT_MAX; diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll index bf2b9cf40dd..77e92bd7506 100644 --- a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll +++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll @@ -1,30 +1,5 @@ ; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG -define void @unroll_opt_for_size() nounwind optsize { -entry: - br label %loop - -loop: - %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] - %inc = add i32 %iv, 1 - %exitcnd = icmp uge i32 %inc, 1024 - br i1 %exitcnd, label %exit, label %loop - -exit: - ret void -} - -; EPILOG-LABEL: @unroll_opt_for_size -; EPILOG: add -; EPILOG-NEXT: add -; EPILOG-NEXT: add -; EPILOG: icmp - -; PROLOG-LABEL: @unroll_opt_for_size -; PROLOG: add -; PROLOG-NEXT: add -; PROLOG-NEXT: add -; PROLOG: icmp define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { entry: diff --git a/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll index 7a50fc0a4f4..c9677d83e37 100644 --- a/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll +++ b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll @@ -1,53 +1,4 @@ ; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s -define void @unroll_opt_for_size() nounwind optsize { -entry: - br label %loop - -loop: - %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] - %inc = add i32 %iv, 1 - %exitcnd = icmp uge i32 %inc, 1024 - br i1 %exitcnd, label %exit, label %loop - -exit: - ret void -} - -; CHECK-LABEL: @unroll_opt_for_size -; CHECK: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: icmp - define void @unroll_default() nounwind { entry: br label %loop diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll deleted file mode 100644 index e5e0151761b..00000000000 --- a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s -; RUN: sed -e 's/optsize/minsize/' %s | opt -S -loop-unroll -unroll-allow-partial | FileCheck %s - -; Loop size = 3, when the function has the optsize attribute, the -; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled -; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not). -define void @unroll_opt_for_size() nounwind optsize { -entry: - br label %loop - -loop: - %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] - %inc = add i32 %iv, 1 - %exitcnd = icmp uge i32 %inc, 1024 - br i1 %exitcnd, label %exit, label %loop - -exit: - ret void -} - -; CHECK: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: icmp diff --git a/test/Transforms/LoopUnroll/unroll-opt-attribute.ll b/test/Transforms/LoopUnroll/unroll-opt-attribute.ll index 334162f5605..f4a76c71368 100644 --- a/test/Transforms/LoopUnroll/unroll-opt-attribute.ll +++ b/test/Transforms/LoopUnroll/unroll-opt-attribute.ll @@ -4,10 +4,10 @@ ;///////////////////// TEST 1 ////////////////////////////// -; This test shows that with optsize attribute, the loop is unrolled -; according to the specified unroll factor. +; This test shows that the loop is unrolled according to the specified +; unroll factor. -define void @Test1() nounwind optsize { +define void @Test1() nounwind { entry: br label %loop @@ -32,38 +32,10 @@ exit: ;///////////////////// TEST 2 ////////////////////////////// -; This test shows that with minsize attribute, the loop is unrolled -; according to the specified unroll factor. - -define void @Test2() nounwind minsize { -entry: - br label %loop - -loop: - %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] - %inc = add i32 %iv, 1 - %exitcnd = icmp uge i32 %inc, 1024 - br i1 %exitcnd, label %exit, label %loop - -exit: - ret void -} - -; CHECK_COUNT4-LABEL: @Test2 -; CHECK_COUNT4: phi -; CHECK_COUNT4-NEXT: add -; CHECK_COUNT4-NEXT: add -; CHECK_COUNT4-NEXT: add -; CHECK_COUNT4-NEXT: add -; CHECK_COUNT4-NEXT: icmp - - -;///////////////////// TEST 3 ////////////////////////////// - ; This test shows that with optnone attribute, the loop is not unrolled ; even if an unroll factor was specified. -define void @Test3() nounwind optnone noinline { +define void @Test2() nounwind optnone noinline { entry: br label %loop @@ -77,20 +49,19 @@ exit: ret void } -; CHECK_COUNT4-LABEL: @Test3 +; CHECK_COUNT4-LABEL: @Test2 ; CHECK_COUNT4: phi ; CHECK_COUNT4-NEXT: add ; CHECK_COUNT4-NEXT: icmp -;///////////////////// TEST 4 ////////////////////////////// +;///////////////////// TEST 3 ////////////////////////////// -; This test shows that without any attribute, this loop is fully unrolled -; by default. +; This test shows that this loop is fully unrolled by default. @tab = common global [24 x i32] zeroinitializer, align 4 -define i32 @Test4() { +define i32 @Test3() { entry: br label %for.body @@ -106,7 +77,7 @@ for.end: ; preds = %for.body ret i32 42 } -; CHECK_NOCOUNT-LABEL: @Test4 +; CHECK_NOCOUNT-LABEL: @Test3 ; CHECK_NOCOUNT: store ; CHECK_NOCOUNT-NEXT: store ; CHECK_NOCOUNT-NEXT: store @@ -134,12 +105,11 @@ for.end: ; preds = %for.body ; CHECK_NOCOUNT-NEXT: ret -;///////////////////// TEST 5 ////////////////////////////// +;///////////////////// TEST 4 ////////////////////////////// -; This test shows that with optsize attribute, this loop is not unrolled -; by default. +; This test shows that with optsize attribute, this loop is not unrolled. -define i32 @Test5() optsize { +define i32 @Test4() optsize { entry: br label %for.body @@ -155,6 +125,6 @@ for.end: ; preds = %for.body ret i32 42 } -; CHECK_NOCOUNT-LABEL: @Test5 +; CHECK_NOCOUNT-LABEL: @Test4 ; CHECK_NOCOUNT: phi ; CHECK_NOCOUNT: icmp diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll index 88f32c92d69..5b405a030a1 100644 --- a/test/Transforms/LoopUnroll/unroll-pragmas.ll +++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -108,6 +108,29 @@ for.end: ; preds = %for.body !3 = !{!3, !4} !4 = !{!"llvm.loop.unroll.full"} +; #pragma clang loop unroll(full) +; Loop should be fully unrolled, even for optsize. +; +; CHECK-LABEL: @loop64_with_full_optsize( +; CHECK-NOT: br i1 +define void @loop64_with_full_optsize(i32* nocapture %a) optsize { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 + +for.end: ; preds = %for.body + ret void +} + ; #pragma clang loop unroll_count(4) ; Loop should be unrolled 4 times. ;