]> granicus.if.org Git - llvm/commitdiff
[X86] Remove the SlowBTMem feature flag entirely
authorCraig Topper <craig.topper@intel.com>
Sun, 15 Oct 2017 16:57:33 +0000 (16:57 +0000)
committerCraig Topper <craig.topper@intel.com>
Sun, 15 Oct 2017 16:57:33 +0000 (16:57 +0000)
Turns out we have no patterns on the instructions that were using this feature flag for other reasons. These instructions are slow on all modern CPUs so it seems unlikely that we will spend any effort supporting these instructions going forward. So we might as well just kill of the feature flag and just fix up the comments.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315862 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86.td
lib/Target/X86/X86InstrInfo.td
lib/Target/X86/X86Subtarget.cpp
lib/Target/X86/X86Subtarget.h

index 2e7902a606472a62df7b59f3c0d0b5c8f79b5e2e..03b5a6064c9fce9f966f32fcc99c5af859260afb 100644 (file)
@@ -95,8 +95,6 @@ def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
                                       "64-bit with cmpxchg16b",
                                       [Feature64Bit]>;
-def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
-                                       "Bit testing of memory is slow">;
 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
                                        "SHLD instruction is slow">;
 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
@@ -336,7 +334,7 @@ def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 def : Proc<"pentium3",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
                                FeatureSSE1, FeatureFXSR]>;
 def : Proc<"pentium3m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                               FeatureSSE1, FeatureFXSR, FeatureSlowBTMem]>;
+                               FeatureSSE1, FeatureFXSR]>;
 
 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
 // The intent is to enable it for pentium4 which is the current default
@@ -350,7 +348,7 @@ def : Proc<"pentium3m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 
 def : ProcessorModel<"pentium-m", GenericPostRAModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                      FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
+                      FeatureSSE2, FeatureFXSR]>;
 
 def : ProcessorModel<"pentium4", GenericPostRAModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
@@ -358,7 +356,7 @@ def : ProcessorModel<"pentium4", GenericPostRAModel,
 
 def : ProcessorModel<"pentium4m", GenericPostRAModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                      FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
+                      FeatureSSE2, FeatureFXSR]>;
 
 // Intel Quark.
 def : Proc<"lakemont",        []>;
@@ -366,20 +364,19 @@ def : Proc<"lakemont",        []>;
 // Intel Core Duo.
 def : ProcessorModel<"yonah", SandyBridgeModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
-                      FeatureFXSR, FeatureSlowBTMem]>;
+                      FeatureFXSR]>;
 
 // NetBurst.
 def : ProcessorModel<"prescott", GenericPostRAModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
-                      FeatureFXSR, FeatureSlowBTMem]>;
+                      FeatureFXSR]>;
 def : ProcessorModel<"nocona", GenericPostRAModel, [
   FeatureX87,
   FeatureSlowUAMem16,
   FeatureMMX,
   FeatureSSE3,
   FeatureFXSR,
-  FeatureCMPXCHG16B,
-  FeatureSlowBTMem
+  FeatureCMPXCHG16B
 ]>;
 
 // Intel Core 2 Solo/Duo.
@@ -390,7 +387,6 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
   FeatureSSSE3,
   FeatureFXSR,
   FeatureCMPXCHG16B,
-  FeatureSlowBTMem,
   FeatureLAHFSAHF,
   FeatureMacroFusion
 ]>;
@@ -401,7 +397,6 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [
   FeatureSSE41,
   FeatureFXSR,
   FeatureCMPXCHG16B,
-  FeatureSlowBTMem,
   FeatureLAHFSAHF,
   FeatureMacroFusion
 ]>;
@@ -416,7 +411,6 @@ class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
   FeatureFXSR,
   FeatureCMPXCHG16B,
   FeatureMOVBE,
-  FeatureSlowBTMem,
   FeatureLEAForSP,
   FeatureSlowDivide32,
   FeatureSlowDivide64,
@@ -444,7 +438,6 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
   FeaturePRFCHW,
   FeatureSlowLEA,
   FeatureSlowIncDec,
-  FeatureSlowBTMem,
   FeatureSlowPMULLD,
   FeatureLAHFSAHF
 ]>;
@@ -466,7 +459,6 @@ class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [
   FeatureSlowTwoMemOps,
   FeatureSlowLEA,
   FeatureSlowIncDec,
-  FeatureSlowBTMem,
   FeatureLAHFSAHF,
   FeatureMPX,
   FeatureSHA,
@@ -488,7 +480,6 @@ class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
   FeatureSSE42,
   FeatureFXSR,
   FeatureCMPXCHG16B,
-  FeatureSlowBTMem,
   FeaturePOPCNT,
   FeatureLAHFSAHF,
   FeatureMacroFusion
@@ -504,7 +495,6 @@ class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
   FeatureSSE42,
   FeatureFXSR,
   FeatureCMPXCHG16B,
-  FeatureSlowBTMem,
   FeaturePOPCNT,
   FeatureAES,
   FeaturePCLMUL,
@@ -547,7 +537,6 @@ def SNBFeatures : ProcessorFeatures<[], [
 
 class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
                                                SNBFeatures.Value, [
-  FeatureSlowBTMem,
   FeatureSlowUAMem32
 ]>;
 def : SandyBridgeProc<"sandybridge">;
@@ -561,7 +550,6 @@ def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
 
 class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
                                              IVBFeatures.Value, [
-  FeatureSlowBTMem,
   FeatureSlowUAMem32
 ]>;
 def : IvyBridgeProc<"ivybridge">;
@@ -579,8 +567,7 @@ def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
 
 class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
                                            HSWFeatures.Value, [
-  ProcIntelHSW,
-  FeatureSlowBTMem
+  ProcIntelHSW
 ]>;
 def : HaswellProc<"haswell">;
 def : HaswellProc<"core-avx2">; // Legacy alias.
@@ -591,8 +578,7 @@ def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
 ]>;
 class BroadwellProc<string Name> : ProcModel<Name, HaswellModel,
                                              BDWFeatures.Value, [
-  ProcIntelBDW,
-  FeatureSlowBTMem
+  ProcIntelBDW
 ]>;
 def : BroadwellProc<"broadwell">;
 
@@ -608,8 +594,7 @@ def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
 
 class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
                                                  SKLFeatures.Value, [
-  ProcIntelSKL,
-  FeatureSlowBTMem
+  ProcIntelSKL
 ]>;
 def : SkylakeClientProc<"skylake">;
 
@@ -632,7 +617,6 @@ def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
 class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
                                                   KNLFeatures.Value, [
   ProcIntelKNL,
-  FeatureSlowBTMem,
   FeatureSlowTwoMemOps,
   FeatureFastPartialYMMorZMMWrite
 ]>;
@@ -641,7 +625,6 @@ def : KnightsLandingProc<"knl">;
 class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
                                                KNLFeatures.Value, [
   ProcIntelKNL,
-  FeatureSlowBTMem,
   FeatureSlowTwoMemOps,
   FeatureFastPartialYMMorZMMWrite
 ]>;
@@ -659,8 +642,7 @@ def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
 
 class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
                                                  SKXFeatures.Value, [
-  ProcIntelSKX,
-  FeatureSlowBTMem
+  ProcIntelSKX
 ]>;
 def : SkylakeServerProc<"skylake-avx512">;
 def : SkylakeServerProc<"skx">; // Legacy alias.
@@ -673,8 +655,7 @@ def CNLFeatures : ProcessorFeatures<SKXFeatures.Value, [
 
 class CannonlakeProc<string Name> : ProcModel<Name, HaswellModel,
                                               CNLFeatures.Value, [
-  ProcIntelCNL,
-  FeatureSlowBTMem
+  ProcIntelCNL
 ]>;
 def : CannonlakeProc<"cannonlake">;
 
@@ -684,46 +665,43 @@ def : Proc<"k6",              [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 def : Proc<"k6-2",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 def : Proc<"k6-3",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 def : Proc<"athlon",          [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
+                               FeatureSlowSHLD]>;
 def : Proc<"athlon-tbird",    [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-4",        [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
-                               Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
                                FeatureSlowSHLD]>;
+def : Proc<"athlon-4",        [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
+                               Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
 def : Proc<"athlon-xp",       [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
-                               Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
-                               FeatureSlowSHLD]>;
+                               Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
 def : Proc<"athlon-mp",       [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
-                               Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
-                               FeatureSlowSHLD]>;
+                               Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
 def : Proc<"k8",              [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
                                Feature3DNowA, FeatureFXSR, Feature64Bit,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
+                               FeatureSlowSHLD]>;
 def : Proc<"opteron",         [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
                                Feature3DNowA, FeatureFXSR, Feature64Bit,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
+                               FeatureSlowSHLD]>;
 def : Proc<"athlon64",        [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
                                Feature3DNowA, FeatureFXSR, Feature64Bit,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
+                               FeatureSlowSHLD]>;
 def : Proc<"athlon-fx",       [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
                                Feature3DNowA, FeatureFXSR, Feature64Bit,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
+                               FeatureSlowSHLD]>;
 def : Proc<"k8-sse3",         [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
                                Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
+                               FeatureSlowSHLD]>;
 def : Proc<"opteron-sse3",    [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
                                Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
+                               FeatureSlowSHLD]>;
 def : Proc<"athlon64-sse3",   [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
                                Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
+                               FeatureSlowSHLD]>;
 def : Proc<"amdfam10",        [FeatureX87, FeatureSSE4A, Feature3DNowA,
                                FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD,
+                               FeaturePOPCNT, FeatureSlowSHLD,
                                FeatureLAHFSAHF]>;
 def : Proc<"barcelona",       [FeatureX87, FeatureSSE4A, Feature3DNowA,
                                FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD,
+                               FeaturePOPCNT, FeatureSlowSHLD,
                                FeatureLAHFSAHF]>;
 
 // Bobcat
@@ -929,7 +907,6 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [
   FeatureFXSR,
   Feature64Bit,
   FeatureSlow3OpsLEA,
-  FeatureSlowBTMem,
   FeatureSlowIncDec,
   FeatureMacroFusion
 ]>;
index d5e2caa8f281d7b75f3ca22d70ad1cd4501f65d7..2cde6c02e95cd608974c23b4555207ff606945c5 100644 (file)
@@ -904,7 +904,6 @@ let RecomputePerFunction = 1 in {
                             "MF->getFunction()->optForSize()">;
 }
 
-def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
 def CallImmAddr  : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
 def FavorMemIndirectCall  : Predicate<"!Subtarget->slowTwoMemOps()">;
 def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
@@ -1672,25 +1671,20 @@ def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
 // Unlike with the register+register form, the memory+register form of the
 // bt instruction does not ignore the high bits of the index. From ISel's
 // perspective, this is pretty bizarre. Make these instructions disassembly
-// only for now.
+// only for now. These instructions are also slow on modern CPUs so that's
+// another reason to avoid generating them.
 
 let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
   def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                  "bt{w}\t{$src2, $src1|$src1, $src2}",
-  //               [(X86bt (loadi16 addr:$src1), GR16:$src2),
-  //                (implicit EFLAGS)]
                  [], IIC_BT_MR
-                 >, OpSize16, TB, Requires<[FastBTMem]>, NotMemoryFoldable;
+                 >, OpSize16, TB, NotMemoryFoldable;
   def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                  "bt{l}\t{$src2, $src1|$src1, $src2}",
-  //               [(X86bt (loadi32 addr:$src1), GR32:$src2),
-  //                (implicit EFLAGS)]
                  [], IIC_BT_MR
-                 >, OpSize32, TB, Requires<[FastBTMem]>, NotMemoryFoldable;
+                 >, OpSize32, TB, NotMemoryFoldable;
   def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  "bt{q}\t{$src2, $src1|$src1, $src2}",
-  //               [(X86bt (loadi64 addr:$src1), GR64:$src2),
-  //                (implicit EFLAGS)]
                   [], IIC_BT_MR
                   >, TB, NotMemoryFoldable;
 }
@@ -1710,9 +1704,8 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                 IIC_BT_RI>, TB;
 } // SchedRW
 
-// Note that these instructions don't need FastBTMem because that
-// only applies when the other operand is in a register. When it's
-// an immediate, bt is still fast.
+// Note that these instructions aren't slow because that only applies when the
+// other operand is in a register. When it's an immediate, bt is still fast.
 let SchedRW = [WriteALU] in {
 def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
index 13062ca8cfe28dd5049517098c0a2c4a9c1ec6fb..0de5619cff28dd17096acbee4d4f3e8f714f602d 100644 (file)
@@ -345,7 +345,6 @@ void X86Subtarget::initializeEnvironment() {
   HasSGX = false;
   HasCLFLUSHOPT = false;
   HasCLWB = false;
-  IsBTMemSlow = false;
   IsPMULLDSlow = false;
   IsSHLDSlow = false;
   IsUAMem16Slow = false;
index 5816e860e0162f28ae827b6507a71cd785373fb1..d866ecc2ef95ae2e45707858bed0d7679477c992 100644 (file)
@@ -193,9 +193,6 @@ protected:
   /// Processor has Prefetch with intent to Write instruction
   bool HasPFPREFETCHWT1;
 
-  /// True if BT (bit test) of memory instructions are slow.
-  bool IsBTMemSlow;
-
   /// True if SHLD instructions are slow.
   bool IsSHLDSlow;
 
@@ -489,7 +486,6 @@ public:
   bool hasLAHFSAHF() const { return HasLAHFSAHF; }
   bool hasMWAITX() const { return HasMWAITX; }
   bool hasCLZERO() const { return HasCLZERO; }
-  bool isBTMemSlow() const { return IsBTMemSlow; }
   bool isSHLDSlow() const { return IsSHLDSlow; }
   bool isPMULLDSlow() const { return IsPMULLDSlow; }
   bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }