From: Simon Pilgrim Date: Tue, 31 Jul 2018 13:00:51 +0000 (+0000) Subject: Revert r338365: [X86] Improved sched models for X86 BT*rr instructions. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=947573c21d9f1a13219cafa2631f50808b39a65a;p=llvm Revert r338365: [X86] Improved sched models for X86 BT*rr instructions. https://reviews.llvm.org/D49243 Contains WIP code that should not have been included. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338369 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index bc7afd32d49..7509b312c10 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1750,7 +1750,7 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags // Bit tests instructions: BT, BTS, BTR, BTC. let Defs = [EFLAGS] in { -let SchedRW = [WriteBitTest] in { +let SchedRW = [WriteALU] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, @@ -1783,7 +1783,7 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { []>, TB, NotMemoryFoldable; } -let SchedRW = [WriteBitTest] in { +let SchedRW = [WriteALU] in { def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>, @@ -1818,7 +1818,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), } // SchedRW let hasSideEffects = 0 in { -let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB, NotMemoryFoldable; @@ -1842,7 +1842,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), NotMemoryFoldable; } -let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), @@ -1861,7 +1861,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), Requires<[In64BitMode]>; } -let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB, NotMemoryFoldable; @@ -1885,7 +1885,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), NotMemoryFoldable; } -let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; @@ -1908,7 +1908,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), Requires<[In64BitMode]>; } -let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB, NotMemoryFoldable; @@ -1932,7 +1932,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), NotMemoryFoldable; } -let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td index a1945c668d8..3aeb2da2448 100755 --- a/lib/Target/X86/X86SchedBroadwell.td +++ b/lib/Target/X86/X86SchedBroadwell.td @@ -137,7 +137,6 @@ def : WriteRes { let NumMicroOps = 3; } def : WriteRes; -def : WriteRes; // Bit Test instrs // Bit counts. defm : BWWriteResPair; @@ -604,6 +603,14 @@ def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> { let ResourceCycles = [1]; } def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>; +def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8", + "BT(16|32|64)rr", + "BTC(16|32|64)ri8", + "BTC(16|32|64)rr", + "BTR(16|32|64)ri8", + "BTR(16|32|64)rr", + "BTS(16|32|64)ri8", + "BTS(16|32|64)rr")>; def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> { let Latency = 1; diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 9abff9c704e..824e9e28a61 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -150,7 +150,6 @@ def : WriteRes { let NumMicroOps = 3; } def : WriteRes; -def : WriteRes; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on @@ -896,6 +895,14 @@ def HWWriteResGroup7 : SchedWriteRes<[HWPort06]> { let ResourceCycles = [1]; } def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>; +def: InstRW<[HWWriteResGroup7], (instregex "BT(16|32|64)ri8", + "BT(16|32|64)rr", + "BTC(16|32|64)ri8", + "BTC(16|32|64)rr", + "BTR(16|32|64)ri8", + "BTR(16|32|64)rr", + "BTS(16|32|64)ri8", + "BTS(16|32|64)rr")>; def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> { let Latency = 1; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index 060da449a05..d43c4e3cb0f 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -145,7 +145,6 @@ def : WriteRes { let NumMicroOps = 3; } def : WriteRes; -def : WriteRes; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on @@ -571,6 +570,14 @@ def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> { let ResourceCycles = [1]; } def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>; +def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8", + "BT(16|32|64)rr", + "BTC(16|32|64)ri8", + "BTC(16|32|64)rr", + "BTR(16|32|64)ri8", + "BTR(16|32|64)rr", + "BTS(16|32|64)ri8", + "BTS(16|32|64)rr")>; def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> { let Latency = 1; diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td index f64f3816a99..53d8e63152a 100644 --- a/lib/Target/X86/X86SchedSkylakeClient.td +++ b/lib/Target/X86/X86SchedSkylakeClient.td @@ -136,7 +136,6 @@ def : WriteRes { let NumMicroOps = 3; } def : WriteRes; -def : WriteRes; // // Bit counts. defm : SKLWriteResPair; @@ -606,6 +605,14 @@ def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> { let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; +def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8", + "BT(16|32|64)rr", + "BTC(16|32|64)ri8", + "BTC(16|32|64)rr", + "BTR(16|32|64)ri8", + "BTR(16|32|64)rr", + "BTS(16|32|64)ri8", + "BTS(16|32|64)rr")>; def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> { let Latency = 1; diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td index 0e2615e3755..129fc2e7f46 100755 --- a/lib/Target/X86/X86SchedSkylakeServer.td +++ b/lib/Target/X86/X86SchedSkylakeServer.td @@ -136,7 +136,6 @@ def : WriteRes { let NumMicroOps = 3; } def : WriteRes; -def : WriteRes; // // Integer shifts and rotates. defm : SKXWriteResPair; @@ -619,6 +618,14 @@ def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { let ResourceCycles = [1]; } def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; +def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8", + "BT(16|32|64)rr", + "BTC(16|32|64)ri8", + "BTC(16|32|64)rr", + "BTR(16|32|64)ri8", + "BTR(16|32|64)rr", + "BTS(16|32|64)ri8", + "BTS(16|32|64)rr")>; def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { let Latency = 1; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index d1ec8c66d20..44079bf566d 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -142,7 +142,6 @@ def WriteFCMOV : SchedWrite; // X87 conditional move. def WriteSETCC : SchedWrite; // Set register based on condition code. def WriteSETCCStore : SchedWrite; def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH. -def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support // Integer shifts and rotates. defm WriteShift : X86SchedWritePair; diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 86cb0aaa507..2880d47ce2d 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -108,7 +108,6 @@ def : WriteRes { let Latency = 2; let ResourceCycles = [2]; } -def : WriteRes; defm : X86WriteResUnsupported; diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index 499e3454c03..44687e31984 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -188,7 +188,6 @@ defm : X86WriteRes; // x87 conditional m def : WriteRes; // Setcc. def : WriteRes; def : WriteRes; -def : WriteRes; // This is for simple LEAs with one or two input operands. def : WriteRes; diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index d45136c479e..9107e9fdba2 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -120,7 +120,6 @@ def : WriteRes { let ResourceCycles = [2,1]; } def : WriteRes; -def : WriteRes; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td index 8c13dc13128..5c8ed43b2a5 100644 --- a/lib/Target/X86/X86ScheduleZnver1.td +++ b/lib/Target/X86/X86ScheduleZnver1.td @@ -198,7 +198,6 @@ defm : ZnWriteResPair; def : WriteRes; def : WriteRes; defm : X86WriteRes; -def : WriteRes; // Bit counts. defm : ZnWriteResPair; diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp index ebfdd36577a..9331fadf409 100644 --- a/utils/TableGen/CodeGenSchedule.cpp +++ b/utils/TableGen/CodeGenSchedule.cpp @@ -21,7 +21,6 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" @@ -34,16 +33,6 @@ using namespace llvm; #define DEBUG_TYPE "subtarget-emitter" -#ifdef EXPENSIVE_CHECKS -// FIXME: TableGen is failed iff EXPENSIVE_CHECKS defined -static constexpr bool OptCheckSchedClasses = true; -#else -// FIXME: the default value should be false -static cl::opt OptCheckSchedClasses( - "check-sched-class-table", cl::init(true), cl::Hidden, - cl::desc("Check sched class table on different types of inconsistencies")); -#endif - #ifndef NDEBUG static void dumpIdxVec(ArrayRef V) { for (unsigned Idx : V) @@ -234,7 +223,6 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK, collectOptionalProcessorInfo(); checkCompleteness(); - checkSchedClasses(); } void CodeGenSchedModels::collectRetireControlUnits() { @@ -711,86 +699,6 @@ void CodeGenSchedModels::collectSchedClasses() { } } -void CodeGenSchedModels::checkSchedClasses() { - if (!OptCheckSchedClasses) - return; - - std::string str; - raw_string_ostream OS(str); - - // Check each instruction for each model to see if its overridden too often. - // Iff YES it's a candidate for more fine-grained Sched Class. - for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { - StringRef InstName = Inst->TheDef->getName(); - unsigned SCIdx = getSchedClassIdx(*Inst); - if (!SCIdx) - continue; - CodeGenSchedClass &SC = getSchedClass(SCIdx); - if (SC.Writes.empty()) - continue; - const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs; - if (RWDefs.empty()) - continue; - // FIXME: what should be threshold here? - if (RWDefs.size() > (ProcModels.size() / 2)) { - // FIXME: this dump hangs the execution !!! - // SC.dump(&Target.getSchedModels()); - OS << "SchedRW machine model for inst '" << InstName << "' ("; - for (auto I : SC.Writes) - OS << " " << SchedWrites[I].Name; - for (auto I : SC.Reads) - OS << " " << SchedReads[I].Name; - OS << " ) should be updated /improvedbecause it's overriden " << RWDefs.size() - << " times out of " << ProcModels.size() << " models:\n\t"; - for (Record *RWDef : RWDefs) - OS << " " << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName; - PrintWarning(OS.str()); - str.clear(); - } - - // TODO: here we should check latency/uop in SC vs. RWDef. Maybe we - // should do it iff RWDefs.size() == 1 only. - // Iff latency/uop are the same then warn about unnecessary redefine. - if (RWDefs.size()) { - for (Record *RWDef : RWDefs) { - IdxVec Writes; - IdxVec Reads; - findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), Writes, - Reads); - - if ((Writes.size() == SC.Writes.size()) && - (Reads.size() == SC.Reads.size())) { - // TODO: do we need sorting Write & Reads? - for (unsigned I = 0, S = SC.Writes.size(); I < S; I++) { - auto SCSchedW = SchedWrites[SC.Writes[I]]; - auto SchedW = SchedWrites[Writes[I]]; - if (!SCSchedW.TheDef || !SchedW.TheDef) - continue; - const RecordVal *R = SCSchedW.TheDef->getValue("Latency"); - // FIXME: We should deal with default Latency here - if (!R || !R->getValue()) - continue; - auto SCLat = SCSchedW.TheDef->getValueAsInt("Latency"); - auto SCuOp = SCSchedW.TheDef->getValueAsInt("NumMicroOps"); - auto Lat = SchedW.TheDef->getValueAsInt("Latency"); - auto uOp = SchedW.TheDef->getValueAsInt("NumMicroOps"); - if ((SCLat == Lat) && (SCuOp == uOp)) - OS << "Overridden verion of inst '" << InstName - << "' has the same latency & uOp values as the original one " - "for model '" - << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName - << "'\n"; - } - if (!str.empty()) { - PrintWarning(OS.str()); - str.clear(); - } - } - } - } - } -} - // Get the SchedClass index for an instruction. unsigned CodeGenSchedModels::getSchedClassIdx(const CodeGenInstruction &Inst) const { diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h index ce53d67bd81..07c11596ade 100644 --- a/utils/TableGen/CodeGenSchedule.h +++ b/utils/TableGen/CodeGenSchedule.h @@ -443,8 +443,6 @@ private: void collectSchedClasses(); - void checkSchedClasses(); - void collectRetireControlUnits(); void collectRegisterFiles();