Summary: With accurate sample profile, we can do more aggressive size optimization. For some size-critical application, this can reduce the text size by 20%
Reviewers: davidxl, rsmith
Reviewed By: davidxl, rsmith
Subscribers: mehdi_amini, eraman, sanjoy, cfe-commits
Differential Revision: https://reviews.llvm.org/D37091
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@311707
91177308-0d34-0410-b5e6-
96231b3b80d8
def fprofile_sample_use_EQ : Joined<["-"], "fprofile-sample-use=">,
Group<f_Group>, Flags<[DriverOption, CC1Option]>,
HelpText<"Enable sample-based profile guided optimizations">;
+def fprofile_sample_accurate : Flag<["-"], "fprofile-sample-accurate">,
+ Group<f_Group>, Flags<[DriverOption, CC1Option]>,
+ HelpText<"Specifies that the sample profile is accurate">,
+ DocBrief<[{Specifies that the sample profile is accurate. If the sample
+ profile is accurate, callsites without profile samples are marked
+ as cold. Otherwise, treat callsites without profile samples as if
+ we have no profile}]>;
+def fno_profile_sample_accurate : Flag<["-"], "fno-profile-sample-accurate">,
+ Group<f_Group>, Flags<[DriverOption]>;
def fauto_profile : Flag<["-"], "fauto-profile">, Group<f_Group>,
Alias<fprofile_sample_use>;
def fno_auto_profile : Flag<["-"], "fno-auto-profile">, Group<f_Group>,
Alias<fno_profile_sample_use>;
def fauto_profile_EQ : Joined<["-"], "fauto-profile=">,
Alias<fprofile_sample_use_EQ>;
+def fauto_profile_accurate : Flag<["-"], "fauto-profile-accurate">,
+ Group<f_Group>, Alias<fprofile_sample_accurate>;
+def fno_auto_profile_accurate : Flag<["-"], "fno-auto-profile-accurate">,
+ Group<f_Group>, Alias<fno_profile_sample_accurate>;
def fdebug_info_for_profiling : Flag<["-"], "fdebug-info-for-profiling">, Group<f_Group>,
Flags<[CC1Option]>,
HelpText<"Emit extra debug info to make sample profile more accurate.">;
CODEGENOPT(UnwindTables , 1, 0) ///< Emit unwind tables.
CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer.
CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer.
+CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate.
/// Attempt to use register sized accesses to bit-fields in structures, when
/// possible.
Fn->addFnAttr("no-jump-tables",
llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables));
+ // Add profile-sample-accurate value.
+ if (CGM.getCodeGenOpts().ProfileSampleAccurate)
+ Fn->addFnAttr("profile-sample-accurate");
+
if (getLangOpts().OpenCL) {
// Add metadata for a kernel function.
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
true))
CmdArgs.push_back("-fno-jump-tables");
+ if (Args.hasFlag(options::OPT_fprofile_sample_accurate,
+ options::OPT_fno_profile_sample_accurate, false))
+ CmdArgs.push_back("-fprofile-sample-accurate");
+
if (!Args.hasFlag(options::OPT_fpreserve_as_comments,
options::OPT_fno_preserve_as_comments, true))
CmdArgs.push_back("-fno-preserve-as-comments");
Opts.NoUseJumpTables = Args.hasArg(OPT_fno_jump_tables);
+ Opts.ProfileSampleAccurate = Args.hasArg(OPT_fprofile_sample_accurate);
+
Opts.PrepareForLTO = Args.hasArg(OPT_flto, OPT_flto_EQ);
Opts.EmitSummaryIndex = false;
if (Arg *A = Args.getLastArg(OPT_flto_EQ)) {
--- /dev/null
+// Test to ensure -emit-llvm profile-sample-accurate is honored by clang.
+// RUN: %clang -S -emit-llvm %s -fprofile-sample-accurate -o - | FileCheck %s
+
+// CHECK: define void @foo()
+// CHECK: attributes {{.*}} "profile-sample-accurate"
+void foo() {
+}
// CHECK-REROLL-LOOPS: "-freroll-loops"
// CHECK-NO-REROLL-LOOPS-NOT: "-freroll-loops"
+// RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s
+// CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate"
+
// RUN: %clang -### -S -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 | FileCheck -check-prefix=CHECK-SAMPLE-PROFILE %s
// CHECK-SAMPLE-PROFILE: "-fprofile-sample-use={{.*}}/file.prof"
--- /dev/null
+// Test to ensure -emit-llvm profile-sample-accurate is honored in ThinLTO.
+// RUN: %clang -O2 %s -flto=thin -fprofile-sample-accurate -c -o %t.o
+// RUN: llvm-lto -thinlto -o %t %t.o
+// RUN: %clang_cc1 -O2 -x ir %t.o -fthinlto-index=%t.thinlto.bc -emit-llvm -o - | FileCheck %s
+
+// CHECK: define void @foo()
+// CHECK: attributes {{.*}} "profile-sample-accurate"
+void foo() {
+}