const FeatureBitset &CalleeBits =
TM.getSubtargetImpl(*Callee)->getFeatureBits();
- // FIXME: This is likely too limiting as it will include subtarget features
- // that we might not care about for inlining, but it is conservatively
- // correct.
- return (CallerBits & CalleeBits) == CalleeBits;
+ FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
+ FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
+ return (RealCallerBits & RealCalleeBits) == RealCalleeBits;
}
const X86TTIImpl::TTI::MemCmpExpansionOptions *
const X86Subtarget *getST() const { return ST; }
const X86TargetLowering *getTLI() const { return TLI; }
+ const FeatureBitset InlineFeatureIgnoreList = {
+ // This indicates the CPU is 64 bit capable not that we are in 64-bit mode.
+ X86::Feature64Bit,
+
+ // These features don't have any intrinsics or ABI effect.
+ X86::FeatureNOPL,
+ X86::FeatureCMPXCHG16B,
+ X86::FeatureLAHFSAHF,
+
+ // Codegen control options.
+ X86::FeatureFast11ByteNOP,
+ X86::FeatureFast15ByteNOP,
+ X86::FeatureFastBEXTR,
+ X86::FeatureFastHorizontalOps,
+ X86::FeatureFastLZCNT,
+ X86::FeatureFastPartialYMMorZMMWrite,
+ X86::FeatureFastScalarFSQRT,
+ X86::FeatureFastSHLDRotate,
+ X86::FeatureFastVariableShuffle,
+ X86::FeatureFastVectorFSQRT,
+ X86::FeatureLEAForSP,
+ X86::FeatureLEAUsesAG,
+ X86::FeatureLZCNTFalseDeps,
+ X86::FeatureMacroFusion,
+ X86::FeatureMergeToThreeWayBranch,
+ X86::FeaturePadShortFunctions,
+ X86::FeaturePOPCNTFalseDeps,
+ X86::FeatureSSEUnalignedMem,
+ X86::FeatureSlow3OpsLEA,
+ X86::FeatureSlowDivide32,
+ X86::FeatureSlowDivide64,
+ X86::FeatureSlowIncDec,
+ X86::FeatureSlowLEA,
+ X86::FeatureSlowPMADDWD,
+ X86::FeatureSlowPMULLD,
+ X86::FeatureSlowSHLD,
+ X86::FeatureSlowTwoMemOps,
+ X86::FeatureSlowUAMem16,
+
+ // Perf-tuning flags.
+ X86::FeatureHasFastGather,
+ X86::FeatureSlowUAMem32,
+
+ // Based on whether user set the -mprefer-vector-width command line.
+ X86::FeaturePrefer256Bit,
+
+ // CPU name enums. These just follow CPU string.
+ X86::ProcIntelAtom,
+ X86::ProcIntelGLM,
+ X86::ProcIntelGLP,
+ X86::ProcIntelSLM,
+ X86::ProcIntelTRM,
+ };
+
public:
explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
--- /dev/null
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -S -inline | FileCheck %s
+
+define i32 @func_target_cpu_base() #0 {
+ ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_k8_call_target_cpu_base(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_k8_call_target_cpu_base() #1 {
+ %call = call i32 @func_target_cpu_base()
+ ret i32 %call
+}
+
+; CHECK-LABEL: @target_cpu_target_nehalem_call_target_cpu_base(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_nehalem_call_target_cpu_base() #2 {
+ %call = call i32 @func_target_cpu_base()
+ ret i32 %call
+}
+
+; CHECK-LABEL: @target_cpu_target_goldmont_call_target_cpu_base(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_goldmont_call_target_cpu_base() #3 {
+ %call = call i32 @func_target_cpu_base()
+ ret i32 %call
+}
+
+define i32 @func_target_cpu_nocona() #4 {
+ ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_target_base_call_target_cpu_nocona(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_base_call_target_cpu_nocona() #0 {
+ %call = call i32 @func_target_cpu_nocona()
+ ret i32 %call
+}
+
+attributes #0 = { nounwind "target-cpu"="x86-64" }
+attributes #1 = { nounwind "target-cpu"="k8" }
+attributes #2 = { nounwind "target-cpu"="nehalem" }
+attributes #3 = { nounwind "target-cpu"="goldmont" }
+attributes #4 = { nounwind "target-cpu"="nocona" "target-features"="-sse3" }