def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b",
[Feature64Bit]>;
-def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
- "Bit testing of memory is slow">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
def : Proc<"pentium3", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
FeatureSSE1, FeatureFXSR]>;
def : Proc<"pentium3m", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE1, FeatureFXSR, FeatureSlowBTMem]>;
+ FeatureSSE1, FeatureFXSR]>;
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
// The intent is to enable it for pentium4 which is the current default
def : ProcessorModel<"pentium-m", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
+ FeatureSSE2, FeatureFXSR]>;
def : ProcessorModel<"pentium4", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
def : ProcessorModel<"pentium4m", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
+ FeatureSSE2, FeatureFXSR]>;
// Intel Quark.
def : Proc<"lakemont", []>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureSlowBTMem]>;
+ FeatureFXSR]>;
// NetBurst.
def : ProcessorModel<"prescott", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureSlowBTMem]>;
+ FeatureFXSR]>;
def : ProcessorModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSE3,
FeatureFXSR,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem
+ FeatureCMPXCHG16B
]>;
// Intel Core 2 Solo/Duo.
FeatureSSSE3,
FeatureFXSR,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
FeatureSSE41,
FeatureFXSR,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureMOVBE,
- FeatureSlowBTMem,
FeatureLEAForSP,
FeatureSlowDivide32,
FeatureSlowDivide64,
FeaturePRFCHW,
FeatureSlowLEA,
FeatureSlowIncDec,
- FeatureSlowBTMem,
FeatureSlowPMULLD,
FeatureLAHFSAHF
]>;
FeatureSlowTwoMemOps,
FeatureSlowLEA,
FeatureSlowIncDec,
- FeatureSlowBTMem,
FeatureLAHFSAHF,
FeatureMPX,
FeatureSHA,
FeatureSSE42,
FeatureFXSR,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
FeaturePOPCNT,
FeatureLAHFSAHF,
FeatureMacroFusion
FeatureSSE42,
FeatureFXSR,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
SNBFeatures.Value, [
- FeatureSlowBTMem,
FeatureSlowUAMem32
]>;
def : SandyBridgeProc<"sandybridge">;
class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
IVBFeatures.Value, [
- FeatureSlowBTMem,
FeatureSlowUAMem32
]>;
def : IvyBridgeProc<"ivybridge">;
class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
HSWFeatures.Value, [
- ProcIntelHSW,
- FeatureSlowBTMem
+ ProcIntelHSW
]>;
def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias.
]>;
class BroadwellProc<string Name> : ProcModel<Name, HaswellModel,
BDWFeatures.Value, [
- ProcIntelBDW,
- FeatureSlowBTMem
+ ProcIntelBDW
]>;
def : BroadwellProc<"broadwell">;
class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
SKLFeatures.Value, [
- ProcIntelSKL,
- FeatureSlowBTMem
+ ProcIntelSKL
]>;
def : SkylakeClientProc<"skylake">;
class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
KNLFeatures.Value, [
ProcIntelKNL,
- FeatureSlowBTMem,
FeatureSlowTwoMemOps,
FeatureFastPartialYMMorZMMWrite
]>;
class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
KNLFeatures.Value, [
ProcIntelKNL,
- FeatureSlowBTMem,
FeatureSlowTwoMemOps,
FeatureFastPartialYMMorZMMWrite
]>;
class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
SKXFeatures.Value, [
- ProcIntelSKX,
- FeatureSlowBTMem
+ ProcIntelSKX
]>;
def : SkylakeServerProc<"skylake-avx512">;
def : SkylakeServerProc<"skx">; // Legacy alias.
class CannonlakeProc<string Name> : ProcModel<Name, HaswellModel,
CNLFeatures.Value, [
- ProcIntelCNL,
- FeatureSlowBTMem
+ ProcIntelCNL
]>;
def : CannonlakeProc<"cannonlake">;
def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"athlon", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"athlon-tbird", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-4", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
- Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
FeatureSlowSHLD]>;
+def : Proc<"athlon-4", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
+ Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
def : Proc<"athlon-xp", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
- Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
+ Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
def : Proc<"athlon-mp", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
- Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
+ Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
def : Proc<"k8", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
Feature3DNowA, FeatureFXSR, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"opteron", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
Feature3DNowA, FeatureFXSR, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"athlon64", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
Feature3DNowA, FeatureFXSR, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"athlon-fx", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
Feature3DNowA, FeatureFXSR, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"k8-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"opteron-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"athlon64-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"amdfam10", [FeatureX87, FeatureSSE4A, Feature3DNowA,
FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD,
+ FeaturePOPCNT, FeatureSlowSHLD,
FeatureLAHFSAHF]>;
def : Proc<"barcelona", [FeatureX87, FeatureSSE4A, Feature3DNowA,
FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD,
+ FeaturePOPCNT, FeatureSlowSHLD,
FeatureLAHFSAHF]>;
// Bobcat
FeatureFXSR,
Feature64Bit,
FeatureSlow3OpsLEA,
- FeatureSlowBTMem,
FeatureSlowIncDec,
FeatureMacroFusion
]>;
"MF->getFunction()->optForSize()">;
}
-def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">;
def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
// Unlike with the register+register form, the memory+register form of the
// bt instruction does not ignore the high bits of the index. From ISel's
// perspective, this is pretty bizarre. Make these instructions disassembly
-// only for now.
+// only for now. These instructions are also slow on modern CPUs so that's
+// another reason to avoid generating them.
let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
- // [(X86bt (loadi16 addr:$src1), GR16:$src2),
- // (implicit EFLAGS)]
[], IIC_BT_MR
- >, OpSize16, TB, Requires<[FastBTMem]>, NotMemoryFoldable;
+ >, OpSize16, TB, NotMemoryFoldable;
def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
- // [(X86bt (loadi32 addr:$src1), GR32:$src2),
- // (implicit EFLAGS)]
[], IIC_BT_MR
- >, OpSize32, TB, Requires<[FastBTMem]>, NotMemoryFoldable;
+ >, OpSize32, TB, NotMemoryFoldable;
def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- // [(X86bt (loadi64 addr:$src1), GR64:$src2),
- // (implicit EFLAGS)]
[], IIC_BT_MR
>, TB, NotMemoryFoldable;
}
IIC_BT_RI>, TB;
} // SchedRW
-// Note that these instructions don't need FastBTMem because that
-// only applies when the other operand is in a register. When it's
-// an immediate, bt is still fast.
+// Note that these instructions aren't slow because that only applies when the
+// other operand is in a register. When it's an immediate, bt is still fast.
let SchedRW = [WriteALU] in {
def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",