cl::init(true), cl::Hidden);
static cl::opt<int>
- MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden,
- cl::desc("Code size threshold for outlining within a "
- "single BB (as a multiple of TCC_Basic)"));
+ SplittingThreshold("hotcoldsplit-threshold", cl::init(3), cl::Hidden,
+ cl::desc("Code size threshold for splitting cold code "
+ "(as a multiple of TCC_Basic)"));
namespace {
/// Check whether \p Region is profitable to outline.
static bool isProfitableToOutline(const BlockSequence &Region,
TargetTransformInfo &TTI) {
+ // If the splitting threshold is set at or below zero, skip the usual
+ // profitability check.
+ if (SplittingThreshold <= 0)
+ return true;
+
if (Region.size() > 1)
return true;
Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
- if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic))
+ if (Cost >= (SplittingThreshold * TargetTransformInfo::TCC_Basic))
return true;
}
return false;
-; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
-; RUN: opt -passes=hotcoldsplit -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=2 -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
br i1 undef, label %if.then, label %if.end
if.then: ; preds = %entry
- call void @sink()
call void @sink()
ret void
br label %loop
loop:
- call void @sink()
- call void @sink()
call void @sink()
br label %loop
}
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=1 < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
; The magic number 6 comes from (1 * TCC_Expensive) + (1 * CostOfCallX86).
-; RUN: opt -hotcoldsplit -min-outlining-thresh=6 -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=6 -S < %s | FileCheck %s
; Test that we outline even though there are only two cold instructions. TTI
; should determine that they are expensive in terms of code size.
-; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
; We expect this block to be outlined. That kills the definition of %var.
%var = add i32 0, 0, !dbg !11
call void @sink()
- call void @sink()
- call void @sink()
br label %cleanup
cleanup:
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
continue_exception:
call void @sideeffect(i32 0)
- call void @sideeffect(i32 1)
call void @sink()
ret void
exception:
; Note: EH pads are not candidates for region entry points.
%cleanup = landingpad i8 cleanup
- ret void
+ br label %trivial-eh-handler
+
+trivial-eh-handler:
+ call void @sideeffect(i32 1)
+ br label %normal
normal:
call void @sideeffect(i32 0)
- call void @sideeffect(i32 1)
ret void
}
; CHECK-LABEL: define {{.*}}@foo.cold.1(
; CHECK: sideeffect(i32 0)
-; CHECK: sideeffect(i32 1)
; CHECK: sink
declare void @sideeffect(i32)
-; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
; Do not outline calls to @llvm.eh.typeid.for. See llvm.org/PR39545.
if.else:
%t = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
call void @sink()
- call void @sink()
- call void @sink()
ret void
}
-; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
-; RUN: opt -S -hotcoldsplit < %s 2>&1 | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s 2>&1 | FileCheck %s
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s | FileCheck %s
; Source:
;
-; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
ret void
if.else:
- call void @sink()
- call void @sink()
call void @sink()
ret void
}
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s | FileCheck %s
; Source:
;
}
; CHECK-LABEL: define {{.*}}@foo.cold.1(
-; TODO: Eliminate this unnecessary unconditional branch.
; CHECK: br
; CHECK: [[exit1Stub:.*]]:
; CHECK-NEXT: ret i1 true
-; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
br i1 %2, label %sink, label %exit
sink:
- tail call void @_Z10sideeffectv()
- tail call void @_Z10sideeffectv()
tail call void @_Z10sideeffectv()
call void @llvm.trap()
unreachable
-; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
if.else:
call void asm "", ""()
call void @sink()
- call void @sink()
- call void @sink()
ret void
}
-; RUN: opt -S -hotcoldsplit < %s 2>&1 | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s 2>&1 | FileCheck %s
; CHECK-LABEL: define {{.*}}@fun
; CHECK: call {{.*}}@fun.cold.2(
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s | FileCheck %s
; Source:
;
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s | FileCheck %s
; Source:
;
; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.1
; CHECK: call void @_Z10sideeffecti(i32 1)
-; CHECK: call void @_Z10sideeffecti(i32 11)
; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.2
; CHECK: call void @_Z10sideeffecti(i32 0)
-; CHECK: call void @_Z10sideeffecti(i32 10)
; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.3
; CHECK: call void @_Z4sinkv
; <label>:8: ; preds = %5
call void @_Z10sideeffecti(i32 0)
- call void @_Z10sideeffecti(i32 10)
br label %14
; <label>:9: ; preds = %5
; <label>:12: ; preds = %9
call void @_Z10sideeffecti(i32 1)
- call void @_Z10sideeffecti(i32 11)
br label %14
; <label>:13: ; preds = %9
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s | FileCheck %s
; Source:
;
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
coldbb:
call void @sink()
call void @sideeffect()
- call void @sideeffect()
br i1 undef, label %if.end, label %coldbb2
coldbb2:
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s | FileCheck %s
; Source:
;
; if (cond1) {
; if (cond2) { // This is the first cold region we visit.
; sideeffect(0);
-; sideeffect(10);
; sink(0);
; }
;
; // There's a larger, overlapping cold region here. But we ignore it.
; // This could be improved.
; sideeffect(1);
-; sideeffect(11);
; sink(1);
; }
; }
; <label>:10: ; preds = %7
call void @_Z10sideeffecti(i32 0)
- call void @_Z10sideeffecti(i32 10)
call void @_Z4sinki(i32 0) #3
br label %11
; <label>:11: ; preds = %10, %7
call void @_Z10sideeffecti(i32 1)
- call void @_Z10sideeffecti(i32 11)
call void @_Z4sinki(i32 1) #3
br label %12
; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.1
; CHECK: call void @_Z10sideeffecti(i32 0)
-; CHECK: call void @_Z10sideeffecti(i32 10)
declare void @_Z10sideeffecti(i32)
-; RUN: opt -hotcoldsplit -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
-; RUN: opt -passes=hotcoldsplit -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
+; RUN: opt -hotcoldsplit-threshold=0 -passes=hotcoldsplit -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
; Make sure this compiles. This test used to fail with an invalid phi node: the
; two predecessors were outlined and the SSA representation was invalid.
-; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
if.end: ; preds = %entry
call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11
call void @sink()
- call void @sink()
- call void @sink()
ret void
}
-; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
coldbb:
call void @sink()
call void @sideeffect()
- call void @sideeffect()
br i1 undef, label %if.end, label %coldbb2
coldbb2:
coldbb:
call void @sink()
call void @sideeffect()
- call void @sideeffect()
br i1 undef, label %if.end, label %coldbb2
coldbb2:
-; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
continue_exception:
call void @sideeffect(i32 0)
- call void @sideeffect(i32 1)
call void @sink()
resume i32 undef