From: Craig Topper Date: Tue, 19 Feb 2019 17:05:11 +0000 (+0000) Subject: [X86] Filter out tuning feature flags and a few ISA feature flags when checking for... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7300bbb81417143f104b75d2491a96e4f2437dcd;p=llvm [X86] Filter out tuning feature flags and a few ISA feature flags when checking for function inline compatibility. Tuning flags don't have any effect on the available instructions so aren't a good reason to prevent inlining. There are also some ISA flags that don't have any intrinsics our ABI requirements that we can exclude. I've put only the most basic ones like cmpxchg16b and lahfsahf. These are interesting because they aren't present in all 64-bit CPUs, but we have codegen workarounds when they aren't present. Loosening these checks can help with scenarios where a caller has a more specific CPU than a callee. The default tuning flags on our generic 'x86-64' CPU can currently make it inline compatible with other CPUs. I've also added an example test for 'nocona' and 'prescott' where 'nocona' is just a 64-bit capable version of 'prescott' but in 32-bit mode they should be completely compatible. I've based the implementation here of the similar code in AMDGPU. Differential Revision: https://reviews.llvm.org/D58371 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354355 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 9ae4a928fb5..a7ecfc2e586 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3065,10 +3065,9 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, const FeatureBitset &CalleeBits = TM.getSubtargetImpl(*Callee)->getFeatureBits(); - // FIXME: This is likely too limiting as it will include subtarget features - // that we might not care about for inlining, but it is conservatively - // correct. - return (CallerBits & CalleeBits) == CalleeBits; + FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; + FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; + return (RealCallerBits & RealCalleeBits) == RealCalleeBits; } const X86TTIImpl::TTI::MemCmpExpansionOptions * diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index b2e3e05cbe3..5035818fde9 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -35,6 +35,60 @@ class X86TTIImpl : public BasicTTIImplBase { const X86Subtarget *getST() const { return ST; } const X86TargetLowering *getTLI() const { return TLI; } + const FeatureBitset InlineFeatureIgnoreList = { + // This indicates the CPU is 64 bit capable not that we are in 64-bit mode. + X86::Feature64Bit, + + // These features don't have any intrinsics or ABI effect. + X86::FeatureNOPL, + X86::FeatureCMPXCHG16B, + X86::FeatureLAHFSAHF, + + // Codegen control options. + X86::FeatureFast11ByteNOP, + X86::FeatureFast15ByteNOP, + X86::FeatureFastBEXTR, + X86::FeatureFastHorizontalOps, + X86::FeatureFastLZCNT, + X86::FeatureFastPartialYMMorZMMWrite, + X86::FeatureFastScalarFSQRT, + X86::FeatureFastSHLDRotate, + X86::FeatureFastVariableShuffle, + X86::FeatureFastVectorFSQRT, + X86::FeatureLEAForSP, + X86::FeatureLEAUsesAG, + X86::FeatureLZCNTFalseDeps, + X86::FeatureMacroFusion, + X86::FeatureMergeToThreeWayBranch, + X86::FeaturePadShortFunctions, + X86::FeaturePOPCNTFalseDeps, + X86::FeatureSSEUnalignedMem, + X86::FeatureSlow3OpsLEA, + X86::FeatureSlowDivide32, + X86::FeatureSlowDivide64, + X86::FeatureSlowIncDec, + X86::FeatureSlowLEA, + X86::FeatureSlowPMADDWD, + X86::FeatureSlowPMULLD, + X86::FeatureSlowSHLD, + X86::FeatureSlowTwoMemOps, + X86::FeatureSlowUAMem16, + + // Perf-tuning flags. + X86::FeatureHasFastGather, + X86::FeatureSlowUAMem32, + + // Based on whether user set the -mprefer-vector-width command line. + X86::FeaturePrefer256Bit, + + // CPU name enums. These just follow CPU string. + X86::ProcIntelAtom, + X86::ProcIntelGLM, + X86::ProcIntelGLP, + X86::ProcIntelSLM, + X86::ProcIntelTRM, + }; + public: explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), diff --git a/test/Transforms/Inline/X86/inline-target-cpu-i686.ll b/test/Transforms/Inline/X86/inline-target-cpu-i686.ll new file mode 100644 index 00000000000..a0325441ed9 --- /dev/null +++ b/test/Transforms/Inline/X86/inline-target-cpu-i686.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -mtriple=i686-unknown-unknown -S -inline | FileCheck %s + +define i32 @func_target_cpu_nocona() #0 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_prescott_call_target_cpu_nocona( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_prescott_call_target_cpu_nocona() #1 { + %call = call i32 @func_target_cpu_nocona() + ret i32 %call +} + +attributes #0 = { nounwind "target-cpu"="nocona" } +attributes #1 = { nounwind "target-cpu"="prescott" } diff --git a/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll b/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll new file mode 100644 index 00000000000..fa04a77d4a5 --- /dev/null +++ b/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-unknown -S -inline | FileCheck %s + +define i32 @func_target_cpu_base() #0 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_k8_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_k8_call_target_cpu_base() #1 { + %call = call i32 @func_target_cpu_base() + ret i32 %call +} + +; CHECK-LABEL: @target_cpu_target_nehalem_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_nehalem_call_target_cpu_base() #2 { + %call = call i32 @func_target_cpu_base() + ret i32 %call +} + +; CHECK-LABEL: @target_cpu_target_goldmont_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_goldmont_call_target_cpu_base() #3 { + %call = call i32 @func_target_cpu_base() + ret i32 %call +} + +define i32 @func_target_cpu_nocona() #4 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_target_base_call_target_cpu_nocona( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_base_call_target_cpu_nocona() #0 { + %call = call i32 @func_target_cpu_nocona() + ret i32 %call +} + +attributes #0 = { nounwind "target-cpu"="x86-64" } +attributes #1 = { nounwind "target-cpu"="k8" } +attributes #2 = { nounwind "target-cpu"="nehalem" } +attributes #3 = { nounwind "target-cpu"="goldmont" } +attributes #4 = { nounwind "target-cpu"="nocona" "target-features"="-sse3" }