]> granicus.if.org Git - llvm/commitdiff
[X86] Filter out tuning feature flags and a few ISA feature flags when checking for...
authorCraig Topper <craig.topper@intel.com>
Tue, 19 Feb 2019 17:05:11 +0000 (17:05 +0000)
committerCraig Topper <craig.topper@intel.com>
Tue, 19 Feb 2019 17:05:11 +0000 (17:05 +0000)
Tuning flags don't have any effect on the available instructions so aren't a good reason to prevent inlining.

There are also some ISA flags that don't have any intrinsics our ABI requirements that we can exclude. I've put only the most basic ones like cmpxchg16b and lahfsahf. These are interesting because they aren't present in all 64-bit CPUs, but we have codegen workarounds when they aren't present.

Loosening these checks can help with scenarios where a caller has a more specific CPU than a callee. The default tuning flags on our generic 'x86-64' CPU can currently make it inline compatible with other CPUs. I've also added an example test for 'nocona' and 'prescott' where 'nocona' is just a 64-bit capable version of 'prescott' but in 32-bit mode they should be completely compatible.

I've based the implementation here of the similar code in AMDGPU.

Differential Revision: https://reviews.llvm.org/D58371

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354355 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86TargetTransformInfo.cpp
lib/Target/X86/X86TargetTransformInfo.h
test/Transforms/Inline/X86/inline-target-cpu-i686.ll [new file with mode: 0644]
test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll [new file with mode: 0644]

index 9ae4a928fb5c547df9727acda9a06d4c1753721d..a7ecfc2e58680c6d748247106aeb2fa4a7b9d2ea 100644 (file)
@@ -3065,10 +3065,9 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
   const FeatureBitset &CalleeBits =
       TM.getSubtargetImpl(*Callee)->getFeatureBits();
 
-  // FIXME: This is likely too limiting as it will include subtarget features
-  // that we might not care about for inlining, but it is conservatively
-  // correct.
-  return (CallerBits & CalleeBits) == CalleeBits;
+  FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
+  FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
+  return (RealCallerBits & RealCalleeBits) == RealCalleeBits;
 }
 
 const X86TTIImpl::TTI::MemCmpExpansionOptions *
index b2e3e05cbe32dc95def2e368f9d412e529e3d21a..5035818fde990d88f86835963118562f3815d503 100644 (file)
@@ -35,6 +35,60 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
   const X86Subtarget *getST() const { return ST; }
   const X86TargetLowering *getTLI() const { return TLI; }
 
+  const FeatureBitset InlineFeatureIgnoreList = {
+    // This indicates the CPU is 64 bit capable not that we are in 64-bit mode.
+    X86::Feature64Bit,
+
+    // These features don't have any intrinsics or ABI effect.
+    X86::FeatureNOPL,
+    X86::FeatureCMPXCHG16B,
+    X86::FeatureLAHFSAHF,
+
+    // Codegen control options.
+    X86::FeatureFast11ByteNOP,
+    X86::FeatureFast15ByteNOP,
+    X86::FeatureFastBEXTR,
+    X86::FeatureFastHorizontalOps,
+    X86::FeatureFastLZCNT,
+    X86::FeatureFastPartialYMMorZMMWrite,
+    X86::FeatureFastScalarFSQRT,
+    X86::FeatureFastSHLDRotate,
+    X86::FeatureFastVariableShuffle,
+    X86::FeatureFastVectorFSQRT,
+    X86::FeatureLEAForSP,
+    X86::FeatureLEAUsesAG,
+    X86::FeatureLZCNTFalseDeps,
+    X86::FeatureMacroFusion,
+    X86::FeatureMergeToThreeWayBranch,
+    X86::FeaturePadShortFunctions,
+    X86::FeaturePOPCNTFalseDeps,
+    X86::FeatureSSEUnalignedMem,
+    X86::FeatureSlow3OpsLEA,
+    X86::FeatureSlowDivide32,
+    X86::FeatureSlowDivide64,
+    X86::FeatureSlowIncDec,
+    X86::FeatureSlowLEA,
+    X86::FeatureSlowPMADDWD,
+    X86::FeatureSlowPMULLD,
+    X86::FeatureSlowSHLD,
+    X86::FeatureSlowTwoMemOps,
+    X86::FeatureSlowUAMem16,
+
+    // Perf-tuning flags.
+    X86::FeatureHasFastGather,
+    X86::FeatureSlowUAMem32,
+
+    // Based on whether user set the -mprefer-vector-width command line.
+    X86::FeaturePrefer256Bit,
+
+    // CPU name enums. These just follow CPU string.
+    X86::ProcIntelAtom,
+    X86::ProcIntelGLM,
+    X86::ProcIntelGLP,
+    X86::ProcIntelSLM,
+    X86::ProcIntelTRM,
+  };
+
 public:
   explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
diff --git a/test/Transforms/Inline/X86/inline-target-cpu-i686.ll b/test/Transforms/Inline/X86/inline-target-cpu-i686.ll
new file mode 100644 (file)
index 0000000..a032544
--- /dev/null
@@ -0,0 +1,15 @@
+; RUN: opt < %s -mtriple=i686-unknown-unknown -S -inline | FileCheck %s
+
+define i32 @func_target_cpu_nocona() #0 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_prescott_call_target_cpu_nocona(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_prescott_call_target_cpu_nocona() #1 {
+  %call = call i32 @func_target_cpu_nocona()
+  ret i32 %call
+}
+
+attributes #0 = { nounwind "target-cpu"="nocona" }
+attributes #1 = { nounwind "target-cpu"="prescott" }
diff --git a/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll b/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll
new file mode 100644 (file)
index 0000000..fa04a77
--- /dev/null
@@ -0,0 +1,43 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -S -inline | FileCheck %s
+
+define i32 @func_target_cpu_base() #0 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_k8_call_target_cpu_base(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_k8_call_target_cpu_base() #1 {
+  %call = call i32 @func_target_cpu_base()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @target_cpu_target_nehalem_call_target_cpu_base(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_nehalem_call_target_cpu_base() #2 {
+  %call = call i32 @func_target_cpu_base()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @target_cpu_target_goldmont_call_target_cpu_base(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_goldmont_call_target_cpu_base() #3 {
+  %call = call i32 @func_target_cpu_base()
+  ret i32 %call
+}
+
+define i32 @func_target_cpu_nocona() #4 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_target_base_call_target_cpu_nocona(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_base_call_target_cpu_nocona() #0 {
+  %call = call i32 @func_target_cpu_nocona()
+  ret i32 %call
+}
+
+attributes #0 = { nounwind "target-cpu"="x86-64" }
+attributes #1 = { nounwind "target-cpu"="k8" }
+attributes #2 = { nounwind "target-cpu"="nehalem" }
+attributes #3 = { nounwind "target-cpu"="goldmont" }
+attributes #4 = { nounwind "target-cpu"="nocona" "target-features"="-sse3" }