From: Craig Topper Date: Fri, 5 Apr 2019 19:27:41 +0000 (+0000) Subject: [X86] Merge the different CMOV instructions for each condition code into single instr... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d8490747ade4067a4bcfcf45ee95d8265c975394;p=llvm [X86] Merge the different CMOV instructions for each condition code into single instructions that store the condition code as an immediate. Summary: Reorder the condition code enum to match their encodings. Move it to MC layer so it can be used by the scheduler models. This avoids needing an isel pattern for each condition code. And it removes translation switches for converting between CMOV instructions and condition codes. Now the printer, encoder and disassembler take care of converting the immediate. We use InstAliases to handle the assembly matching. But we print using the asm string in the instruction definition. The instruction itself is marked IsCodeGenOnly=1 to hide it from the assembly parser. This does complicate the scheduler models a little since we can't assign the A and BE instructions to a separate class now. I plan to make similar changes for SETcc and Jcc. Reviewers: RKSimon, spatel, lebedev.ri, andreadb, courbet Reviewed By: RKSimon Subscribers: gchatelet, hiraditya, kristina, lebedev.ri, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60041 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357800 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Support/X86DisassemblerDecoderCommon.h b/include/llvm/Support/X86DisassemblerDecoderCommon.h index c4b7edb4afb..0ee0661a9e7 100644 --- a/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -393,6 +393,7 @@ enum ModRMDecisionType { ENUM_ENTRY(ENCODING_IRC, "Immediate for static rounding control") \ ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \ "opcode byte") \ + ENUM_ENTRY(ENCODING_CC, "Condition code encoded in opcode") \ ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \ "in type") \ ENUM_ENTRY(ENCODING_SI, "Source index; encoded in OpSize/Adsize prefix") \ diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index b990a67b133..2463e04c36a 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -781,6 +781,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, case ENCODING_Rv: translateRegister(mcInst, insn.opcodeRegister); return false; + case ENCODING_CC: + mcInst.addOperand(MCOperand::createImm(insn.immediates[0])); + return false; case ENCODING_FP: translateFPRegister(mcInst, insn.modRM & 7); return false; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index 9a7bcc8bd5d..97341a0d027 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -1846,6 +1846,9 @@ static int readOperands(struct InternalInstruction* insn) { if (readOpcodeRegister(insn, 0)) return -1; break; + case ENCODING_CC: + insn->immediates[0] = insn->opcode & 0xf; + break; case ENCODING_FP: break; case ENCODING_VVVV: diff --git a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp b/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp index 133119b0a86..85d0675154e 100644 --- a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp +++ b/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp @@ -24,6 +24,30 @@ using namespace llvm; +void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op, + raw_ostream &O) { + int64_t Imm = MI->getOperand(Op).getImm(); + switch (Imm) { + default: llvm_unreachable("Invalid condcode argument!"); + case 0: O << "o"; break; + case 1: O << "no"; break; + case 2: O << "b"; break; + case 3: O << "ae"; break; + case 4: O << "e"; break; + case 5: O << "ne"; break; + case 6: O << "be"; break; + case 7: O << "a"; break; + case 8: O << "s"; break; + case 9: O << "ns"; break; + case 0xa: O << "p"; break; + case 0xb: O << "np"; break; + case 0xc: O << "l"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "le"; break; + case 0xf: O << "g"; break; + } +} + void X86InstPrinterCommon::printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O) { int64_t Imm = MI->getOperand(Op).getImm(); diff --git a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h b/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h index 489be73b650..c00d320a8f0 100644 --- a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h +++ b/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h @@ -23,6 +23,7 @@ public: using MCInstPrinter::MCInstPrinter; virtual void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) = 0; + void printCondCode(const MCInst *MI, unsigned Op, raw_ostream &OS); void printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS); void printVPCOMMnemonic(const MCInst *MI, raw_ostream &OS); void printVPCMPMnemonic(const MCInst *MI, raw_ostream &OS); diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 7df587273c8..f13254ffa54 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -66,6 +66,39 @@ namespace X86 { enum OperandType : unsigned { /// AVX512 embedded rounding control. This should only have values 0-3. OPERAND_ROUNDING_CONTROL = MCOI::OPERAND_FIRST_TARGET, + OPERAND_COND_CODE, + }; + + // X86 specific condition code. These correspond to X86_*_COND in + // X86InstrInfo.td. They must be kept in synch. + enum CondCode { + COND_O = 0, + COND_NO = 1, + COND_B = 2, + COND_AE = 3, + COND_E = 4, + COND_NE = 5, + COND_BE = 6, + COND_A = 7, + COND_S = 8, + COND_NS = 9, + COND_P = 10, + COND_NP = 11, + COND_L = 12, + COND_GE = 13, + COND_LE = 14, + COND_G = 15, + LAST_VALID_COND = COND_G, + + // Artificial condition codes. These are used by AnalyzeBranch + // to indicate a block terminated with two conditional branches that together + // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE, + // which can't be represented on x86 with a single condition. These + // are never used in MachineInstrs and are inverses of one another. + COND_NE_OR_P, + COND_E_AND_NP, + + COND_INVALID }; } // end namespace X86; @@ -313,6 +346,11 @@ namespace X86II { /// MRMSrcMemOp4 = 35, + /// MRMSrcMemCC - This form is used for instructions that use the Mod/RM + /// byte to specify the operands and also encodes a condition code. + /// + MRMSrcMemCC = 36, + /// MRMXm - This form is used for instructions that use the Mod/RM byte /// to specify a memory source, but doesn't use the middle field. /// @@ -342,6 +380,11 @@ namespace X86II { /// MRMSrcRegOp4 = 51, + /// MRMSrcRegCC - This form is used for instructions that use the Mod/RM + /// byte to specify the operands and also encodes a condition code + /// + MRMSrcRegCC = 52, + /// MRMXr - This form is used for instructions that use the Mod/RM byte /// to specify a register source, but doesn't use the middle field. /// @@ -727,10 +770,15 @@ namespace X86II { case X86II::MRMSrcMemOp4: // Skip registers encoded in reg, VEX_VVVV, and I8IMM. return 3; + case X86II::MRMSrcMemCC: + // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a + // mask register. + return 1; case X86II::MRMDestReg: case X86II::MRMSrcReg: case X86II::MRMSrcReg4VOp3: case X86II::MRMSrcRegOp4: + case X86II::MRMSrcRegCC: case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 353f52a8c20..2f3cbcfc4c8 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1060,16 +1060,17 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B break; case X86II::MRMSrcReg: + case X86II::MRMSrcRegCC: REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B break; - case X86II::MRMSrcMem: { + case X86II::MRMSrcMem: + case X86II::MRMSrcMemCC: REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X CurOp += X86::AddrNumOperands; break; - } case X86II::MRMDestReg: REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R @@ -1436,6 +1437,17 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, CurOp = SrcRegNum + 1; break; } + case X86II::MRMSrcRegCC: { + unsigned FirstOp = CurOp++; + unsigned SecondOp = CurOp++; + + unsigned CC = MI.getOperand(CurOp++).getImm(); + EmitByte(BaseOpcode + CC, CurByte, OS); + + EmitRegModRMByte(MI.getOperand(SecondOp), + GetX86RegNum(MI.getOperand(FirstOp)), CurByte, OS); + break; + } case X86II::MRMSrcMem: { unsigned FirstMemOp = CurOp+1; @@ -1481,6 +1493,18 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, CurOp = FirstMemOp + X86::AddrNumOperands; break; } + case X86II::MRMSrcMemCC: { + unsigned RegOp = CurOp++; + unsigned FirstMemOp = CurOp; + CurOp = FirstMemOp + X86::AddrNumOperands; + + unsigned CC = MI.getOperand(CurOp++).getImm(); + EmitByte(BaseOpcode + CC, CurByte, OS); + + emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(RegOp)), + TSFlags, Rex, CurByte, OS, Fixups, STI); + break; + } case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp index e8f6e8f61fc..8039dd668f0 100644 --- a/lib/Target/X86/X86CmovConversion.cpp +++ b/lib/Target/X86/X86CmovConversion.cpp @@ -290,7 +290,7 @@ bool X86CmovConverterPass::collectCmovCandidates( // Skip debug instructions. if (I.isDebugInstr()) continue; - X86::CondCode CC = X86::getCondFromCMovOpc(I.getOpcode()); + X86::CondCode CC = X86::getCondFromCMov(I); // Check if we found a X86::CMOVrr instruction. if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) { if (Group.empty()) { @@ -545,7 +545,7 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates( } unsigned CondCost = - DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth; + DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth; unsigned ValCost = getDepthOfOptCmov( DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth, DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth); @@ -593,7 +593,7 @@ static bool checkEFLAGSLive(MachineInstr *MI) { /// move all debug instructions to after the last CMOV instruction, making the /// CMOV group consecutive. static void packCmovGroup(MachineInstr *First, MachineInstr *Last) { - assert(X86::getCondFromCMovOpc(Last->getOpcode()) != X86::COND_INVALID && + assert(X86::getCondFromCMov(*Last) != X86::COND_INVALID && "Last instruction in a CMOV group must be a CMOV instruction"); SmallVector DBGInstructions; @@ -651,14 +651,14 @@ void X86CmovConverterPass::convertCmovInstsToBranches( MachineInstr *LastCMOV = Group.back(); DebugLoc DL = MI.getDebugLoc(); - X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode())); + X86::CondCode CC = X86::CondCode(X86::getCondFromCMov(MI)); X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); // Potentially swap the condition codes so that any memory operand to a CMOV // is in the *false* position instead of the *true* position. We can invert // any non-memory operand CMOV instructions to cope with this and we ensure // memory operand CMOVs are only included with a single condition code. if (llvm::any_of(Group, [&](MachineInstr *I) { - return I->mayLoad() && X86::getCondFromCMovOpc(I->getOpcode()) == CC; + return I->mayLoad() && X86::getCondFromCMov(*I) == CC; })) std::swap(CC, OppCC); @@ -712,8 +712,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( if (!MI.mayLoad()) { // Remember the false-side register input. unsigned FalseReg = - MI.getOperand(X86::getCondFromCMovOpc(MI.getOpcode()) == CC ? 1 : 2) - .getReg(); + MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg(); // Walk back through any intermediate cmovs referenced. while (true) { auto FRIt = FalseBBRegRewriteTable.find(FalseReg); @@ -728,7 +727,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // The condition must be the *opposite* of the one we've decided to branch // on as the branch will go *around* the load and the load should happen // when the CMOV condition is false. - assert(X86::getCondFromCMovOpc(MI.getOpcode()) == OppCC && + assert(X86::getCondFromCMov(MI) == OppCC && "Can only handle memory-operand cmov instructions with a condition " "opposite to the selected branch direction."); @@ -767,7 +766,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // Move the new CMOV to just before the old one and reset any impacted // iterator. auto *NewCMOV = NewMIs.pop_back_val(); - assert(X86::getCondFromCMovOpc(NewCMOV->getOpcode()) == OppCC && + assert(X86::getCondFromCMov(*NewCMOV) == OppCC && "Last new instruction isn't the expected CMOV!"); LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump()); MBB->insert(MachineBasicBlock::iterator(MI), NewCMOV); @@ -819,7 +818,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // If this CMOV we are processing is the opposite condition from the jump we // generated, then we have to swap the operands for the PHI that is going to // be generated. - if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC) + if (X86::getCondFromCMov(*MIIt) == OppCC) std::swap(Op1Reg, Op2Reg); auto Op1Itr = RegRewriteTable.find(Op1Reg); diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 8f34f49444f..bf1a6c6e9e0 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2144,9 +2144,9 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { return false; const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo(); - unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8); - unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, - LHSReg, LHSIsKill); + unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8); + unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, + LHSReg, LHSIsKill, CC); updateValueMap(I, ResultReg); return true; } diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp index 6d77170f709..52ae70a60dc 100644 --- a/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -599,7 +599,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { } // Otherwise we can just rewrite in-place. - if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) { + if (X86::getCondFromCMov(MI) != X86::COND_INVALID) { rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); } else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) { @@ -841,7 +841,7 @@ void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB, MachineOperand &FlagUse, CondRegArray &CondRegs) { // First get the register containing this specific condition. - X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode()); + X86::CondCode Cond = X86::getCondFromCMov(CMovI); unsigned CondReg; bool Inverted; std::tie(CondReg, Inverted) = @@ -852,12 +852,10 @@ void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB, // Insert a direct test of the saved register. insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); - // Rewrite the CMov to use the !ZF flag from the test (but match register - // size and memory operand), and then kill its use of the flags afterward. - auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg()); - CMovI.setDesc(TII->get(X86::getCMovFromCond( - Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8, - !CMovI.memoperands_empty()))); + // Rewrite the CMov to use the !ZF flag from the test, and then kill its use + // of the flags afterward. + CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1) + .setImm(Inverted ? X86::COND_E : X86::COND_NE); FlagUse.setIsKill(true); LLVM_DEBUG(dbgs() << " fixed cmov: "; CMovI.dump()); } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 6d8bcd7cd01..b5bab764ee4 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -653,9 +653,10 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg) .addReg(CopyReg) .addReg(SizeReg); - BuildMI(&MBB, DL, TII.get(X86::CMOVB64rr), FinalReg) + BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg) .addReg(TestReg) - .addReg(ZeroReg); + .addReg(ZeroReg) + .addImm(X86::COND_B); // FinalReg now holds final stack pointer value, or zero if // allocation would overflow. Compare against the current stack diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index f16d7e15c1a..eeffc4d7080 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2321,14 +2321,21 @@ bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { CR->getSignedMax().slt(1ull << Width); } -static X86::CondCode getCondFromOpc(unsigned Opc) { +static X86::CondCode getCondFromNode(SDNode *N) { + assert(N->isMachineOpcode() && "Unexpected node"); X86::CondCode CC = X86::COND_INVALID; if (CC == X86::COND_INVALID) - CC = X86::getCondFromBranchOpc(Opc); + CC = X86::getCondFromBranchOpc(N->getMachineOpcode()); if (CC == X86::COND_INVALID) - CC = X86::getCondFromSETOpc(Opc); - if (CC == X86::COND_INVALID) - CC = X86::getCondFromCMovOpc(Opc); + CC = X86::getCondFromSETOpc(N->getMachineOpcode()); + if (CC == X86::COND_INVALID) { + unsigned Opc = N->getMachineOpcode(); + if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr || Opc == X86::CMOV64rr) + CC = static_cast(N->getConstantOperandVal(2)); + else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm || + Opc == X86::CMOV64rm) + CC = static_cast(N->getConstantOperandVal(6)); + } return CC; } @@ -2354,7 +2361,7 @@ bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const { // Anything unusual: assume conservatively. if (!FlagUI->isMachineOpcode()) return false; // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + X86::CondCode CC = getCondFromNode(*FlagUI); switch (CC) { // Comparisons which only use the zero flag. @@ -2390,7 +2397,7 @@ bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { // Anything unusual: assume conservatively. if (!FlagUI->isMachineOpcode()) return false; // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + X86::CondCode CC = getCondFromNode(*FlagUI); switch (CC) { // Comparisons which don't examine the SF flag. @@ -2451,7 +2458,7 @@ static bool mayUseCarryFlag(X86::CondCode CC) { if (!FlagUI->isMachineOpcode()) return false; // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + X86::CondCode CC = getCondFromNode(*FlagUI); if (mayUseCarryFlag(CC)) return false; diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 2edb25abc11..2c41169bf67 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -13,67 +13,79 @@ // CMOV instructions. -multiclass CMOV opc, string Mnemonic, X86FoldableSchedWrite Sched, - PatLeaf CondNode> { - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - isCommutable = 1, SchedRW = [Sched] in { - def NAME#16rr - : I, - TB, OpSize16; - def NAME#32rr - : I, - TB, OpSize32; - def NAME#64rr - :RI, TB; - } +let isCodeGenOnly = 1, ForceDisassemble = 1 in { +let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + isCommutable = 1, SchedRW = [WriteCMOV] in { + def CMOV16rr + : I<0x40, MRMSrcRegCC, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, ccode:$cond), + "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, + (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>, + TB, OpSize16; + def CMOV32rr + : I<0x40, MRMSrcRegCC, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, ccode:$cond), + "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, + (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>, + TB, OpSize32; + def CMOV64rr + :RI<0x40, MRMSrcRegCC, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, ccode:$cond), + "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, + (X86cmov GR64:$src1, GR64:$src2, imm:$cond, EFLAGS))]>, TB; +} - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - SchedRW = [Sched.Folded, Sched.ReadAfterFold] in { - def NAME#16rm - : I, TB, OpSize16; - def NAME#32rm - : I, TB, OpSize32; - def NAME#64rm - :RI, TB; - } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" -} // end multiclass +let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in { + def CMOV16rm + : I<0x40, MRMSrcMemCC, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2, ccode:$cond), + "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + imm:$cond, EFLAGS))]>, TB, OpSize16; + def CMOV32rm + : I<0x40, MRMSrcMemCC, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2, ccode:$cond), + "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + imm:$cond, EFLAGS))]>, TB, OpSize32; + def CMOV64rm + :RI<0x40, MRMSrcMemCC, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2, ccode:$cond), + "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + imm:$cond, EFLAGS))]>, TB; +} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" +} // isCodeGenOnly = 1, ForceDisassemble = 1 +multiclass CMOV_Aliases { + def : InstAlias; + def : InstAlias; + def : InstAlias; + def : InstAlias; + def : InstAlias; + def : InstAlias; +} -// Conditional Moves. -defm CMOVO : CMOV<0x40, "cmovo" , WriteCMOV, X86_COND_O>; -defm CMOVNO : CMOV<0x41, "cmovno", WriteCMOV, X86_COND_NO>; -defm CMOVB : CMOV<0x42, "cmovb" , WriteCMOV, X86_COND_B>; -defm CMOVAE : CMOV<0x43, "cmovae", WriteCMOV, X86_COND_AE>; -defm CMOVE : CMOV<0x44, "cmove" , WriteCMOV, X86_COND_E>; -defm CMOVNE : CMOV<0x45, "cmovne", WriteCMOV, X86_COND_NE>; -defm CMOVBE : CMOV<0x46, "cmovbe", WriteCMOV2, X86_COND_BE>; -defm CMOVA : CMOV<0x47, "cmova" , WriteCMOV2, X86_COND_A>; -defm CMOVS : CMOV<0x48, "cmovs" , WriteCMOV, X86_COND_S>; -defm CMOVNS : CMOV<0x49, "cmovns", WriteCMOV, X86_COND_NS>; -defm CMOVP : CMOV<0x4A, "cmovp" , WriteCMOV, X86_COND_P>; -defm CMOVNP : CMOV<0x4B, "cmovnp", WriteCMOV, X86_COND_NP>; -defm CMOVL : CMOV<0x4C, "cmovl" , WriteCMOV, X86_COND_L>; -defm CMOVGE : CMOV<0x4D, "cmovge", WriteCMOV, X86_COND_GE>; -defm CMOVLE : CMOV<0x4E, "cmovle", WriteCMOV, X86_COND_LE>; -defm CMOVG : CMOV<0x4F, "cmovg" , WriteCMOV, X86_COND_G>; +defm : CMOV_Aliases<"cmovo" , 0>; +defm : CMOV_Aliases<"cmovno", 1>; +defm : CMOV_Aliases<"cmovb" , 2>; +defm : CMOV_Aliases<"cmovae", 3>; +defm : CMOV_Aliases<"cmove" , 4>; +defm : CMOV_Aliases<"cmovne", 5>; +defm : CMOV_Aliases<"cmovbe", 6>; +defm : CMOV_Aliases<"cmova" , 7>; +defm : CMOV_Aliases<"cmovs" , 8>; +defm : CMOV_Aliases<"cmovns", 9>; +defm : CMOV_Aliases<"cmovp" , 10>; +defm : CMOV_Aliases<"cmovnp", 11>; +defm : CMOV_Aliases<"cmovl" , 12>; +defm : CMOV_Aliases<"cmovge", 13>; +defm : CMOV_Aliases<"cmovle", 14>; +defm : CMOV_Aliases<"cmovg" , 15>; // SetCC instructions. diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 7aab8f8f377..03494f3689f 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1236,37 +1236,23 @@ def : Pat<(X86cmp GR32:$src1, 0), def : Pat<(X86cmp GR64:$src1, 0), (TEST64rr GR64:$src1, GR64:$src1)>; +def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); + return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC), + SDLoc(N), MVT::i8); +}]>; + // Conditional moves with folded loads with operands swapped and conditions // inverted. -multiclass CMOVmr { - let Predicates = [HasCMov] in { - def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), - (Inst16 GR16:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), - (Inst32 GR32:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), - (Inst64 GR64:$src2, addr:$src1)>; - } +let Predicates = [HasCMov] in { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, imm:$cond, EFLAGS), + (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, imm:$cond, EFLAGS), + (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, imm:$cond, EFLAGS), + (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; } -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; - // zextload bool -> zextload byte // i1 stored in one byte in zero-extended form. // Upper bits cleanup should be executed before Store. diff --git a/lib/Target/X86/X86InstrFoldTables.cpp b/lib/Target/X86/X86InstrFoldTables.cpp index d8c21f60137..e0be42b4d4a 100644 --- a/lib/Target/X86/X86InstrFoldTables.cpp +++ b/lib/Target/X86/X86InstrFoldTables.cpp @@ -1249,54 +1249,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = { { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 }, { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 }, { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 }, - { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, - { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, - { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, - { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, - { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, - { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, - { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, - { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, - { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, - { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, - { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, - { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, - { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, - { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, - { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, - { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, - { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, - { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, - { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, - { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, - { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, - { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, - { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, - { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, - { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, - { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, - { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, - { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, - { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, - { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, - { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, - { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, - { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, - { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, - { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, - { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, - { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, - { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, - { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, - { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, - { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, - { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, - { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, - { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, - { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, - { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, - { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, - { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMOV16rr, X86::CMOV16rm, 0 }, + { X86::CMOV32rr, X86::CMOV32rm, 0 }, + { X86::CMOV64rr, X86::CMOV64rm, 0 }, { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 }, { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, { X86::CMPSDrr, X86::CMPSDrm, 0 }, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index d8a442dc9d6..5b25101b33d 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -30,6 +30,7 @@ def MRMDestMem : Format<32>; def MRMSrcMem : Format<33>; def MRMSrcMem4VOp3 : Format<34>; def MRMSrcMemOp4 : Format<35>; +def MRMSrcMemCC : Format<36>; def MRMXm : Format<39>; def MRM0m : Format<40>; def MRM1m : Format<41>; def MRM2m : Format<42>; def MRM3m : Format<43>; def MRM4m : Format<44>; def MRM5m : Format<45>; @@ -38,6 +39,7 @@ def MRMDestReg : Format<48>; def MRMSrcReg : Format<49>; def MRMSrcReg4VOp3 : Format<50>; def MRMSrcRegOp4 : Format<51>; +def MRMSrcRegCC : Format<52>; def MRMXr : Format<55>; def MRM0r : Format<56>; def MRM1r : Format<57>; def MRM2r : Format<58>; def MRM3r : Format<59>; def MRM4r : Format<60>; def MRM5r : Format<61>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 3ed88d9840b..911b6b0ec5a 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1641,76 +1641,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } - case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: - case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: - case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: - case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: - case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr: - case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr: - case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr: - case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr: - case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr: - case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr: - case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr: - case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr: - case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr: - case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr: - case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr: - case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: { - unsigned Opc; - switch (MI.getOpcode()) { - default: llvm_unreachable("Unreachable!"); - case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; - case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; - case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; - case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; - case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; - case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; - case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; - case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; - case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; - case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; - case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; - case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; - case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; - case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; - case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; - case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; - case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; - case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; - case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; - case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; - case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; - case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; - case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; - case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; - case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; - case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; - case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; - case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; - case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; - case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; - case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; - case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; - case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; - case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; - case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; - case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; - case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; - case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; - case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; - case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; - case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; - case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; - case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; - case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; - case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; - case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; - case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; - case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; - } + case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: { auto &WorkingMI = cloneIfNew(MI); - WorkingMI.setDesc(get(Opc)); + unsigned OpNo = MI.getDesc().getNumOperands() - 1; + X86::CondCode CC = static_cast(MI.getOperand(OpNo).getImm()); + WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC)); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } @@ -2090,57 +2025,13 @@ X86::CondCode X86::getCondFromSETOpc(unsigned Opc) { } /// Return condition code of a CMov opcode. -X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) { - switch (Opc) { +X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return X86::COND_INVALID; - case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm: - case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr: - return X86::COND_A; - case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm: - case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr: - return X86::COND_AE; - case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm: - case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr: - return X86::COND_B; - case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm: - case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr: - return X86::COND_BE; - case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm: - case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr: - return X86::COND_E; - case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm: - case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr: - return X86::COND_G; - case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm: - case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr: - return X86::COND_GE; - case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm: - case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr: - return X86::COND_L; - case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm: - case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr: - return X86::COND_LE; - case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm: - case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr: - return X86::COND_NE; - case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm: - case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr: - return X86::COND_NO; - case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm: - case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr: - return X86::COND_NP; - case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm: - case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr: - return X86::COND_NS; - case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm: - case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr: - return X86::COND_O; - case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm: - case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr: - return X86::COND_P; - case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm: - case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr: - return X86::COND_S; + case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: + case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm: + return static_cast( + MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); } } @@ -2252,74 +2143,35 @@ X86::getX86ConditionCode(CmpInst::Predicate Predicate) { /// whether it has memory operand. unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) { static const uint16_t Opc[16][2] = { - { X86::SETAr, X86::SETAm }, - { X86::SETAEr, X86::SETAEm }, + { X86::SETOr, X86::SETOm }, + { X86::SETNOr, X86::SETNOm }, { X86::SETBr, X86::SETBm }, - { X86::SETBEr, X86::SETBEm }, + { X86::SETAEr, X86::SETAEm }, { X86::SETEr, X86::SETEm }, - { X86::SETGr, X86::SETGm }, - { X86::SETGEr, X86::SETGEm }, - { X86::SETLr, X86::SETLm }, - { X86::SETLEr, X86::SETLEm }, { X86::SETNEr, X86::SETNEm }, - { X86::SETNOr, X86::SETNOm }, - { X86::SETNPr, X86::SETNPm }, + { X86::SETBEr, X86::SETBEm }, + { X86::SETAr, X86::SETAm }, + { X86::SETSr, X86::SETSm }, { X86::SETNSr, X86::SETNSm }, - { X86::SETOr, X86::SETOm }, { X86::SETPr, X86::SETPm }, - { X86::SETSr, X86::SETSm } + { X86::SETNPr, X86::SETNPm }, + { X86::SETLr, X86::SETLm }, + { X86::SETGEr, X86::SETGEm }, + { X86::SETLEr, X86::SETLEm }, + { X86::SETGr, X86::SETGm }, }; assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes"); return Opc[CC][HasMemoryOperand ? 1 : 0]; } -/// Return a cmov opcode for the given condition, -/// register size in bytes, and operand type. -unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes, - bool HasMemoryOperand) { - static const uint16_t Opc[32][3] = { - { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, - { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, - { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, - { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr }, - { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr }, - { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr }, - { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr }, - { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr }, - { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr }, - { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr }, - { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr }, - { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr }, - { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr }, - { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr }, - { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr }, - { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr }, - { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm }, - { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm }, - { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm }, - { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm }, - { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm }, - { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm }, - { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm }, - { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm }, - { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm }, - { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm }, - { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm }, - { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm }, - { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm }, - { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm }, - { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm }, - { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm } - }; - - assert(CC < 16 && "Can only handle standard cond codes"); - unsigned Idx = HasMemoryOperand ? 16+CC : CC; +/// Return a cmov opcode for the given register size in bytes, and operand type. +unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) { switch(RegBytes) { default: llvm_unreachable("Illegal register size!"); - case 2: return Opc[Idx][0]; - case 4: return Opc[Idx][1]; - case 8: return Opc[Idx][2]; + case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr; + case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr; + case 8: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV64rr; } } @@ -2870,10 +2722,12 @@ void X86InstrInfo::insertSelect(MachineBasicBlock &MBB, const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); const TargetRegisterClass &RC = *MRI.getRegClass(DstReg); assert(Cond.size() == 1 && "Invalid Cond array"); - unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(), - TRI.getRegSizeInBits(RC) / 8, - false /*HasMemoryOperand*/); - BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg); + unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8, + false /*HasMemoryOperand*/); + BuildMI(MBB, I, DL, get(Opc), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addImm(Cond[0].getImm()); } /// Test if the given register is a physical h register. @@ -3728,7 +3582,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (OldCC != X86::COND_INVALID) OpcIsSET = true; else - OldCC = X86::getCondFromCMovOpc(Instr.getOpcode()); + OldCC = X86::getCondFromCMov(Instr); } if (OldCC == X86::COND_INVALID) return false; } @@ -3781,10 +3635,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, else if(OpcIsSET) NewOpc = getSETFromCond(ReplacementCC, HasMemoryOperand); else { - unsigned DstReg = Instr.getOperand(0).getReg(); - const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); - NewOpc = getCMovFromCond(ReplacementCC, TRI->getRegSizeInBits(*DstRC)/8, - HasMemoryOperand); + NewOpc = ReplacementCC; } // Push the MachineInstr to OpsToUpdate. @@ -3844,8 +3695,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, CmpInstr.eraseFromParent(); // Modify the condition code of instructions in OpsToUpdate. - for (auto &Op : OpsToUpdate) - Op.first->setDesc(get(Op.second)); + for (auto &Op : OpsToUpdate) { + if (X86::getCondFromCMov(*Op.first) != X86::COND_INVALID) + Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1) + .setImm(Op.second); + else + Op.first->setDesc(get(Op.second)); + } return true; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f95681b14e2..b0e83527247 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -35,38 +35,6 @@ enum AsmComments { AC_EVEX_2_VEX = MachineInstr::TAsmComments }; -// X86 specific condition code. These correspond to X86_*_COND in -// X86InstrInfo.td. They must be kept in synch. -enum CondCode { - COND_A = 0, - COND_AE = 1, - COND_B = 2, - COND_BE = 3, - COND_E = 4, - COND_G = 5, - COND_GE = 6, - COND_L = 7, - COND_LE = 8, - COND_NE = 9, - COND_NO = 10, - COND_NP = 11, - COND_NS = 12, - COND_O = 13, - COND_P = 14, - COND_S = 15, - LAST_VALID_COND = COND_S, - - // Artificial condition codes. These are used by AnalyzeBranch - // to indicate a block terminated with two conditional branches that together - // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE, - // which can't be represented on x86 with a single condition. These - // are never used in MachineInstrs and are inverses of one another. - COND_NE_OR_P, - COND_E_AND_NP, - - COND_INVALID -}; - // Turn condition code into conditional branch opcode. unsigned GetCondBranchFromCond(CondCode CC); @@ -78,10 +46,8 @@ std::pair getX86ConditionCode(CmpInst::Predicate Predicate); /// a memory operand. unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false); -/// Return a cmov opcode for the given condition, register size in -/// bytes, and operand type. -unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, - bool HasMemoryOperand = false); +/// Return a cmov opcode for the given register size in bytes, and operand type. +unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false); // Turn jCC opcode into condition code. CondCode getCondFromBranchOpc(unsigned Opc); @@ -90,7 +56,7 @@ CondCode getCondFromBranchOpc(unsigned Opc); CondCode getCondFromSETOpc(unsigned Opc); // Turn CMov opcode into condition code. -CondCode getCondFromCMovOpc(unsigned Opc); +CondCode getCondFromCMov(const MachineInstr &MI); /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f5ff8d24303..dc5e04cdd67 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -602,6 +602,12 @@ def offset64_32 : X86MemOffsOperand; +def ccode : Operand { + let PrintMethod = "printCondCode"; + let OperandNamespace = "X86"; + let OperandType = "OPERAND_COND_CODE"; +} + class ImmSExtAsmOperandClass : AsmOperandClass { let SuperClasses = [ImmAsmOperand]; let RenderMethod = "addImmOperands"; @@ -956,22 +962,22 @@ include "X86InstrFormats.td" // X86 specific condition code. These correspond to CondCode in // X86InstrInfo.h. They must be kept in synch. -def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE -def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC +def X86_COND_O : PatLeaf<(i8 0)>; +def X86_COND_NO : PatLeaf<(i8 1)>; def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C -def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA +def X86_COND_AE : PatLeaf<(i8 3)>; // alt. COND_NC def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z -def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE -def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL -def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE -def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG -def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ -def X86_COND_NO : PatLeaf<(i8 10)>; +def X86_COND_NE : PatLeaf<(i8 5)>; // alt. COND_NZ +def X86_COND_BE : PatLeaf<(i8 6)>; // alt. COND_NA +def X86_COND_A : PatLeaf<(i8 7)>; // alt. COND_NBE +def X86_COND_S : PatLeaf<(i8 8)>; +def X86_COND_NS : PatLeaf<(i8 9)>; +def X86_COND_P : PatLeaf<(i8 10)>; // alt. COND_PE def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO -def X86_COND_NS : PatLeaf<(i8 12)>; -def X86_COND_O : PatLeaf<(i8 13)>; -def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE -def X86_COND_S : PatLeaf<(i8 15)>; +def X86_COND_L : PatLeaf<(i8 12)>; // alt. COND_NGE +def X86_COND_GE : PatLeaf<(i8 13)>; // alt. COND_NL +def X86_COND_LE : PatLeaf<(i8 14)>; // alt. COND_NG +def X86_COND_G : PatLeaf<(i8 15)>; // alt. COND_NLE def i16immSExt8 : ImmLeaf(Imm); }]>; def i32immSExt8 : ImmLeaf(Imm); }]>; diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td index 6d69cb01eee..4e76a8e2a6e 100644 --- a/lib/Target/X86/X86SchedBroadwell.td +++ b/lib/Target/X86/X86SchedBroadwell.td @@ -160,7 +160,6 @@ defm : BWWriteResPair; def : WriteRes; // LEA instructions can't fold loads. defm : BWWriteResPair; // Conditional move. -defm : BWWriteResPair; // // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. @@ -1602,4 +1601,30 @@ def: InstRW<[BWWriteResGroup202], (instrs FSTENVm)>; def: InstRW<[WriteZero], (instrs CLC)>; +// CMOVs that use both Z and C flag require an extra uop. +def BWWriteCMOVA_CMOVBErr : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 2; + let ResourceCycles = [1,1]; + let NumMicroOps = 2; +} + +def BWWriteCMOVA_CMOVBErm : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { + let Latency = 7; + let ResourceCycles = [1,1,1]; + let NumMicroOps = 3; +} + +def BWCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [BWWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def BWCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [BWWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[BWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[BWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index cb0e56432fb..a8855f0f39a 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -165,7 +165,6 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Conditional move. -defm : HWWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -1886,4 +1885,30 @@ def HWWriteADC : SchedWriteVariant<[ def : InstRW<[HWWriteADC], (instrs ADC16ri8, ADC32ri8, ADC64ri8, SBB16ri8, SBB32ri8, SBB64ri8)>; +// CMOVs that use both Z and C flag require an extra uop. +def HWWriteCMOVA_CMOVBErr : SchedWriteRes<[HWPort06,HWPort0156]> { + let Latency = 3; + let ResourceCycles = [1,2]; + let NumMicroOps = 3; +} + +def HWWriteCMOVA_CMOVBErm : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> { + let Latency = 8; + let ResourceCycles = [1,1,2]; + let NumMicroOps = 4; +} + +def HWCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [HWWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def HWCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [HWWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[HWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[HWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel diff --git a/lib/Target/X86/X86SchedPredicates.td b/lib/Target/X86/X86SchedPredicates.td index 62ed351b194..c1e5ad0834d 100644 --- a/lib/Target/X86/X86SchedPredicates.td +++ b/lib/Target/X86/X86SchedPredicates.td @@ -60,3 +60,15 @@ def IsThreeOperandsLEABody : // X86GenInstrInfo. def IsThreeOperandsLEAFn : TIIPredicate<"isThreeOperandsLEA", IsThreeOperandsLEABody>; + +// A predicate to check for COND_A and COND_BE CMOVs which have an extra uop +// on recent Intel CPUs. +def IsCMOVArr_Or_CMOVBErr : CheckAny<[ + CheckImmOperand_s<3, "X86::COND_A">, + CheckImmOperand_s<3, "X86::COND_BE"> +]>; + +def IsCMOVArm_Or_CMOVBErm : CheckAny<[ + CheckImmOperand_s<7, "X86::COND_A">, + CheckImmOperand_s<7, "X86::COND_BE"> +]>; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index 503b905842b..234f3dcb552 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -160,7 +160,6 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; // Conditional move. -defm : SBWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -1173,4 +1172,30 @@ def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ ]>; def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>; +// CMOVs that use both Z and C flag require an extra uop. +def SBWriteCMOVA_CMOVBErr : SchedWriteRes<[SBPort05,SBPort015]> { + let Latency = 3; + let ResourceCycles = [2,1]; + let NumMicroOps = 3; +} + +def SBWriteCMOVA_CMOVBErm : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> { + let Latency = 8; + let ResourceCycles = [1,2,1]; + let NumMicroOps = 4; +} + +def SBCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [SBWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def SBCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [SBWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[SBCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td index ff6a0e35c5c..87dc88f482f 100644 --- a/lib/Target/X86/X86SchedSkylakeClient.td +++ b/lib/Target/X86/X86SchedSkylakeClient.td @@ -158,7 +158,6 @@ defm : SKLWriteResPair; def : WriteRes; // LEA instructions can't fold loads. defm : SKLWriteResPair; // Conditional move. -defm : SKLWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -1757,4 +1756,30 @@ def: InstRW<[SKLWriteResGroup223], (instrs FSTENVm)>; def: InstRW<[WriteZero], (instrs CLC)>; +// CMOVs that use both Z and C flag require an extra uop. +def SKLWriteCMOVA_CMOVBErr : SchedWriteRes<[SKLPort06]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def SKLWriteCMOVA_CMOVBErm : SchedWriteRes<[SKLPort23,SKLPort06]> { + let Latency = 7; + let ResourceCycles = [1,2]; + let NumMicroOps = 3; +} + +def SKLCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [SKLWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def SKLCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [SKLWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[SKLCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[SKLCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td index 0fdeba7cef3..b532e7ae817 100644 --- a/lib/Target/X86/X86SchedSkylakeServer.td +++ b/lib/Target/X86/X86SchedSkylakeServer.td @@ -159,7 +159,6 @@ defm : SKXWriteResPair; def : WriteRes; // LEA instructions can't fold loads. defm : SKXWriteResPair; // Conditional move. -defm : SKXWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -2473,4 +2472,30 @@ def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>; def: InstRW<[WriteZero], (instrs CLC)>; +// CMOVs that use both Z and C flag require an extra uop. +def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> { + let Latency = 7; + let ResourceCycles = [1,2]; + let NumMicroOps = 3; +} + +def SKXCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [SKXWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def SKXCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [SKXWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index f50cb621046..55ca85ec1e3 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -163,7 +163,6 @@ defm WritePOPCNT : X86SchedWritePair; // Bit population count. defm WriteLZCNT : X86SchedWritePair; // Leading zero count. defm WriteTZCNT : X86SchedWritePair; // Trailing zero count. defm WriteCMOV : X86SchedWritePair; // Conditional move. -defm WriteCMOV2 : X86SchedWritePair; // Conditional (CF + ZF flag) move. def WriteFCMOV : SchedWrite; // X87 conditional move. def WriteSETCC : SchedWrite; // Set register based on condition code. def WriteSETCCStore : SchedWrite; diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 3ebd885753b..b0334655de7 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -113,7 +113,6 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; defm : X86WriteRes; // x87 conditional move. def : WriteRes; diff --git a/lib/Target/X86/X86ScheduleBdVer2.td b/lib/Target/X86/X86ScheduleBdVer2.td index 82920ad43c6..6e11ac25172 100644 --- a/lib/Target/X86/X86ScheduleBdVer2.td +++ b/lib/Target/X86/X86ScheduleBdVer2.td @@ -444,12 +444,24 @@ def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> { def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; defm : PdWriteResExPair; // Conditional move. -defm : PdWriteResExPair; // Conditional (CF + ZF flag) move. -def : InstRW<[WriteCMOV2.Folded], (instrs CMOVG16rm, CMOVG32rm, CMOVG64rm, - CMOVGE16rm, CMOVGE32rm, CMOVGE64rm, - CMOVL16rm, CMOVL32rm, CMOVL64rm, - CMOVLE16rm, CMOVLE32rm, CMOVLE64rm)>; +def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> { + let Latency = 5; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; +} + +def PdWriteCMOVmVar : SchedWriteVariant<[ + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar +]>; + +def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; defm : PdWriteRes; // x87 conditional move. diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index 7931a956b52..2d26232b413 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -221,7 +221,6 @@ defm : JWriteResIntPair; defm : JWriteResIntPair; defm : JWriteResIntPair; // Conditional move. -defm : JWriteResIntPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes; diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index fc150fca545..34c251a5c5b 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -131,7 +131,6 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; -defm : SLMWriteResPair; defm : X86WriteRes; // x87 conditional move. def : WriteRes; def : WriteRes { diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td index 1a75281cf0c..65f6d89df61 100644 --- a/lib/Target/X86/X86ScheduleZnver1.td +++ b/lib/Target/X86/X86ScheduleZnver1.td @@ -215,7 +215,6 @@ defm : ZnWriteResPair; defm : ZnWriteResFpuPair; defm : ZnWriteResPair; -defm : ZnWriteResPair; def : WriteRes; def : WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/lib/Target/X86/X86SpeculativeLoadHardening.cpp index 1d94f2a80a0..289c5f19b06 100644 --- a/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -751,7 +751,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( for (X86::CondCode Cond : Conds) { int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; - auto CMovOp = X86::getCMovFromCond(Cond, PredStateSizeInBytes); + auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); // Note that we intentionally use an empty debug location so that @@ -759,7 +759,8 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) .addReg(CurStateReg) - .addReg(PS->PoisonReg); + .addReg(PS->PoisonReg) + .addImm(Cond); // If this is the last cmov and the EFLAGS weren't originally // live-in, mark them as killed. if (!LiveEFLAGS && Cond == Conds.back()) @@ -1176,12 +1177,13 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( // Now cmov over the predicate if the comparison wasn't equal. int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; - auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); auto CMovI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) .addReg(PS->InitialReg) - .addReg(PS->PoisonReg); + .addReg(PS->PoisonReg) + .addImm(X86::COND_NE); CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); ++NumInstsInserted; LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); @@ -2545,12 +2547,13 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( // Now conditionally update the predicate state we just extracted if we ended // up at a different return address than expected. int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; - auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg) .addReg(NewStateReg, RegState::Kill) - .addReg(PS->PoisonReg); + .addReg(PS->PoisonReg) + .addImm(X86::COND_NE); CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); ++NumInstsInserted; LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); diff --git a/test/CodeGen/X86/flags-copy-lowering.mir b/test/CodeGen/X86/flags-copy-lowering.mir index daa4f2305da..3f009429bd5 100644 --- a/test/CodeGen/X86/flags-copy-lowering.mir +++ b/test/CodeGen/X86/flags-copy-lowering.mir @@ -283,19 +283,19 @@ body: | ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $eflags = COPY %2 - %3:gr64 = CMOVA64rr %0, %1, implicit $eflags - %4:gr64 = CMOVB64rr %0, %1, implicit $eflags - %5:gr64 = CMOVE64rr %0, %1, implicit $eflags - %6:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + %3:gr64 = CMOV64rr %0, %1, 7, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 2, implicit $eflags + %5:gr64 = CMOV64rr %0, %1, 4, implicit $eflags + %6:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %3:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %3:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %6:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %6:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %3 MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4 MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 @@ -396,12 +396,12 @@ body: | ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $eflags = COPY %3 - %4:gr64 = CMOVE64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 4, implicit $eflags %5:gr64 = MOV64ri32 42 %6:gr64 = ADCX64rr %2, %5, implicit-def $eflags, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: %5:gr64 = MOV64ri32 42 ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags ; CHECK-NEXT: %6:gr64 = ADCX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags @@ -435,12 +435,12 @@ body: | ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $eflags = COPY %3 - %4:gr64 = CMOVE64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 4, implicit $eflags %5:gr64 = MOV64ri32 42 %6:gr64 = ADOX64rr %2, %5, implicit-def $eflags, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: %5:gr64 = MOV64ri32 42 ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[OF_REG]], 127, implicit-def $eflags ; CHECK-NEXT: %6:gr64 = ADOX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags @@ -628,30 +628,30 @@ body: | bb.1: liveins: $eflags - %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags $rax = COPY %3 RET 0, $rax bb.2: liveins: $eflags - %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %4 RET 0, $rax bb.3: liveins: $eflags - %5:gr64 = CMOVS64rr %0, %1, implicit killed $eflags + %5:gr64 = CMOV64rr %0, %1, 8, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[S_REG]], %[[S_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %5 RET 0, $rax @@ -703,10 +703,10 @@ body: | bb.1: liveins: $eflags - %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags $rax = COPY %3 RET 0, $rax @@ -728,30 +728,30 @@ body: | bb.3: liveins: $eflags - %4:gr64 = CMOVNE64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 5, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %4 RET 0, $rax bb.4: liveins: $eflags - %5:gr64 = CMOVP64rr %0, %1, implicit $eflags + %5:gr64 = CMOV64rr %0, %1, 10, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[P_REG]], %[[P_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %5 RET 0, $rax bb.5: liveins: $eflags - %6:gr64 = CMOVS64rr %0, %1, implicit killed $eflags + %6:gr64 = CMOV64rr %0, %1, 8, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[S_REG]], %[[S_REG]], implicit-def $eflags - ; CHECK-NEXT: %6:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %6:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %6 RET 0, $rax @@ -876,11 +876,11 @@ body: | liveins: $eflags ; Just use $eflags on this side of the diamond. - %4:gr64 = CMOVA64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 7, implicit $eflags ; CHECK: bb.5: ; CHECK-NOT: COPY{{( killed)?}} $eflags ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4 JMP_1 %bb.7 @@ -890,21 +890,21 @@ body: | liveins: $eflags ; Use, copy, and then use $eflags again. - %5:gr64 = CMOVA64rr %0, %1, implicit $eflags + %5:gr64 = CMOV64rr %0, %1, 7, implicit $eflags ; CHECK: bb.6: ; CHECK-NOT: COPY{{( killed)?}} $eflags ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 %6:gr64 = COPY $eflags $eflags = COPY %6:gr64 - %7:gr64 = CMOVA64rr %0, %1, implicit $eflags + %7:gr64 = CMOV64rr %0, %1, 7, implicit $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %7:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %7:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %7 JMP_1 %bb.7 @@ -940,12 +940,12 @@ body: | liveins: $eflags ; And we're done. - %8:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + %8:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags $rax = COPY %8 RET 0, $rax ; CHECK: bb.9: ; CHECK-NOT: $eflags - ; CHECK: %8:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK: %8:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags ... --- diff --git a/test/CodeGen/X86/non-value-mem-operand.mir b/test/CodeGen/X86/non-value-mem-operand.mir index a290b04a184..ce55db19e22 100644 --- a/test/CodeGen/X86/non-value-mem-operand.mir +++ b/test/CodeGen/X86/non-value-mem-operand.mir @@ -217,7 +217,7 @@ body: | $rax = MOV64ri @global.1 $rax = MOV64rm killed $rax, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from @global.1) TEST64rr $rax, $rax, implicit-def $eflags - $rax = CMOVE64rr undef $rax, killed $rax, implicit killed $eflags + $rax = CMOV64rr undef $rax, killed $rax, 4, implicit killed $eflags $ecx = MOV32rm undef $rax, 1, $noreg, 0, $noreg :: (load 4 from `i32* undef`) $rdx = MOV64rm $r12, 8, $r14, 0, $noreg :: (load 8 from %ir.tmp3) $r15 = LEA64r $rdx, 1, $noreg, 1, _ diff --git a/test/CodeGen/X86/post-ra-sched-with-debug.mir b/test/CodeGen/X86/post-ra-sched-with-debug.mir index 079374752b1..f045b0b4183 100644 --- a/test/CodeGen/X86/post-ra-sched-with-debug.mir +++ b/test/CodeGen/X86/post-ra-sched-with-debug.mir @@ -283,13 +283,13 @@ body: | $edx = XOR32rr undef $edx, undef $edx, implicit-def dead $eflags, implicit-def $rdx TEST64rr $rcx, $rcx, implicit-def $eflags $esi = MOV32ri @o, implicit-def $rsi - $rsi = CMOVNE64rr killed $rsi, $rdx, implicit killed $eflags + $rsi = CMOV64rr killed $rsi, $rdx, 5, implicit killed $eflags $rsi = OR64rr killed $rsi, killed $rcx, implicit-def $eflags $rcx = LEA64r $rbp, 1, $noreg, -20, $noreg DBG_VALUE $rcx, $noreg, !46, !17, debug-location !48 DBG_VALUE $rcx, $noreg, !39, !17, debug-location !44 DBG_VALUE $rbp, -20, !29, !17, debug-location !36 - $rcx = CMOVNE64rr killed $rcx, killed $rdx, implicit killed $eflags + $rcx = CMOV64rr killed $rcx, killed $rdx, 5, implicit killed $eflags $rcx = OR64rr killed $rcx, killed $rsi, implicit-def dead $eflags $rdx = MOVSX64rm32 $rbx, 1, $noreg, 0, $noreg :: (load 4, align 8) TEST32mr killed $rcx, 4, killed $rdx, 0, $noreg, killed $eax, implicit-def $eflags :: (load 4) diff --git a/test/CodeGen/X86/tail-call-conditional.mir b/test/CodeGen/X86/tail-call-conditional.mir index 77d1e4676da..c3ac5c09c25 100644 --- a/test/CodeGen/X86/tail-call-conditional.mir +++ b/test/CodeGen/X86/tail-call-conditional.mir @@ -48,7 +48,7 @@ body: | ; CHECK-NEXT: $rdi = COPY $rsi ; CHECK-NEXT: $rsi = COPY $rax ; CHECK-NEXT: CMP64ri8 $rax, 9, implicit-def $eflags - ; CHECK-NEXT: TCRETURNdi64cc @f1, 0, 3, csr_64, implicit $rsp, implicit $eflags, implicit $ssp, implicit $rsp, implicit $rdi, implicit $rsi, implicit $rdi, implicit-def $rdi, implicit $hsi, implicit-def $hsi, implicit $sih, implicit-def $sih, implicit $sil, implicit-def $sil, implicit $si, implicit-def $si, implicit $esi, implicit-def $esi, implicit $rsi, implicit-def $rsi, implicit $hdi, implicit-def $hdi, implicit $dih, implicit-def $dih, implicit $dil, implicit-def $dil, implicit $di, implicit-def $di, implicit $edi, implicit-def $edi + ; CHECK-NEXT: TCRETURNdi64cc @f1, 0, 6, csr_64, implicit $rsp, implicit $eflags, implicit $ssp, implicit $rsp, implicit $rdi, implicit $rsi, implicit $rdi, implicit-def $rdi, implicit $hsi, implicit-def $hsi, implicit $sih, implicit-def $sih, implicit $sil, implicit-def $sil, implicit $si, implicit-def $si, implicit $esi, implicit-def $esi, implicit $rsi, implicit-def $rsi, implicit $hdi, implicit-def $hdi, implicit $dih, implicit-def $dih, implicit $dil, implicit-def $dil, implicit $di, implicit-def $di, implicit $edi, implicit-def $edi bb.1: successors: %bb.2, %bb.3 diff --git a/tools/llvm-exegesis/lib/X86/Target.cpp b/tools/llvm-exegesis/lib/X86/Target.cpp index 369ed2f97d7..3acde820c37 100644 --- a/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/tools/llvm-exegesis/lib/X86/Target.cpp @@ -32,6 +32,7 @@ static Error isInvalidMemoryInstr(const Instruction &Instr) { case X86II::MRMSrcReg: case X86II::MRMSrcReg4VOp3: case X86II::MRMSrcRegOp4: + case X86II::MRMSrcRegCC: case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: @@ -118,6 +119,7 @@ static Error isInvalidMemoryInstr(const Instruction &Instr) { case X86II::MRMSrcMem: case X86II::MRMSrcMem4VOp3: case X86II::MRMSrcMemOp4: + case X86II::MRMSrcMemCC: case X86II::MRMXm: case X86II::MRM0m: case X86II::MRM1m: diff --git a/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp b/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp index 0f8dc1f39c6..8b110f8c8a5 100644 --- a/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ b/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -227,19 +227,20 @@ TEST_F(UopsSnippetGeneratorTest, SerialInstruction) { } TEST_F(UopsSnippetGeneratorTest, StaticRenaming) { - // CMOVA32rr has tied variables, we enumerate the possible values to execute + // CMOV32rr has tied variables, we enumerate the possible values to execute // as many in parallel as possible. - // - CMOVA32rr + // - CMOV32rr // - Op0 Explicit Def RegClass(GR32) // - Op1 Explicit Use RegClass(GR32) TiedToOp0 // - Op2 Explicit Use RegClass(GR32) + // - Op3 Explicit Use Immediate // - Op3 Implicit Use Reg(EFLAGS) // - Var0 [Op0,Op1] // - Var1 [Op2] // - hasTiedRegisters (execution is always serial) // - hasAliasingRegisters - const unsigned Opcode = llvm::X86::CMOVA32rr; + const unsigned Opcode = llvm::X86::CMOV32rr; const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; @@ -249,7 +250,7 @@ TEST_F(UopsSnippetGeneratorTest, StaticRenaming) { ASSERT_THAT(CT.Instructions, SizeIs(kInstructionCount)); std::unordered_set AllDefRegisters; for (const auto &IT : CT.Instructions) { - ASSERT_THAT(IT.VariableValues, SizeIs(2)); + ASSERT_THAT(IT.VariableValues, SizeIs(3)); AllDefRegisters.insert(IT.VariableValues[0].getReg()); } EXPECT_THAT(AllDefRegisters, SizeIs(kInstructionCount)) diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index 3c537e9176c..3f1ef968ef7 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -580,6 +580,13 @@ void RecognizableInstr::emitInstructionSpecifier() { HANDLE_OPERAND(rmRegister) HANDLE_OPTIONAL(immediate) break; + case X86Local::MRMSrcRegCC: + assert(numPhysicalOperands == 3 && + "Unexpected number of operands for MRMSrcRegCC"); + HANDLE_OPERAND(roRegister) + HANDLE_OPERAND(rmRegister) + HANDLE_OPERAND(opcodeModifier) + break; case X86Local::MRMSrcMem: // Operand 1 is a register operand in the Reg/Opcode field. // Operand 2 is a memory operand (possibly SIB-extended) @@ -620,6 +627,13 @@ void RecognizableInstr::emitInstructionSpecifier() { HANDLE_OPERAND(memory) HANDLE_OPTIONAL(immediate) break; + case X86Local::MRMSrcMemCC: + assert(numPhysicalOperands == 3 && + "Unexpected number of operands for MRMSrcMemCC"); + HANDLE_OPERAND(roRegister) + HANDLE_OPERAND(memory) + HANDLE_OPERAND(opcodeModifier) + break; case X86Local::MRMXr: case X86Local::MRM0r: case X86Local::MRM1r: @@ -729,6 +743,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::MRMSrcReg: case X86Local::MRMSrcReg4VOp3: case X86Local::MRMSrcRegOp4: + case X86Local::MRMSrcRegCC: case X86Local::MRMXr: filter = llvm::make_unique(true); break; @@ -736,6 +751,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::MRMSrcMem: case X86Local::MRMSrcMem4VOp3: case X86Local::MRMSrcMemOp4: + case X86Local::MRMSrcMemCC: case X86Local::MRMXm: filter = llvm::make_unique(false); break; @@ -768,14 +784,14 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { assert(opcodeType && "Opcode type not set"); assert(filter && "Filter not set"); - if (Form == X86Local::AddRegFrm) { - assert(((opcodeToSet & 7) == 0) && - "ADDREG_FRM opcode not aligned"); + if (Form == X86Local::AddRegFrm || Form == X86Local::MRMSrcRegCC || + Form == X86Local::MRMSrcMemCC) { + unsigned Count = Form == X86Local::AddRegFrm ? 8 : 16; + assert(((opcodeToSet % Count) == 0) && "ADDREG_FRM opcode not aligned"); uint8_t currentOpcode; - for (currentOpcode = opcodeToSet; - currentOpcode < opcodeToSet + 8; + for (currentOpcode = opcodeToSet; currentOpcode < opcodeToSet + Count; ++currentOpcode) tables.setTableFields(*opcodeType, insnContext(), currentOpcode, *filter, UID, Is32Bit, OpPrefix == 0, @@ -850,6 +866,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, TYPE("i64i32imm_pcrel", TYPE_REL) TYPE("i16imm_pcrel", TYPE_REL) TYPE("i32imm_pcrel", TYPE_REL) + TYPE("ccode", TYPE_IMM) TYPE("AVX512RC", TYPE_IMM) TYPE("brtarget32", TYPE_REL) TYPE("brtarget16", TYPE_REL) @@ -1165,6 +1182,7 @@ RecognizableInstr::opcodeModifierEncodingFromString(const std::string &s, ENCODING("GR64", ENCODING_RO) ENCODING("GR16", ENCODING_Rv) ENCODING("GR8", ENCODING_RB) + ENCODING("ccode", ENCODING_CC) errs() << "Unhandled opcode modifier encoding " << s << "\n"; llvm_unreachable("Unhandled opcode modifier encoding"); } diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h index 7b703fcfca0..73ae6f3c6d6 100644 --- a/utils/TableGen/X86RecognizableInstr.h +++ b/utils/TableGen/X86RecognizableInstr.h @@ -105,6 +105,7 @@ namespace X86Local { MRMSrcMem = 33, MRMSrcMem4VOp3 = 34, MRMSrcMemOp4 = 35, + MRMSrcMemCC = 36, MRMXm = 39, MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43, MRM4m = 44, MRM5m = 45, MRM6m = 46, MRM7m = 47, @@ -112,6 +113,7 @@ namespace X86Local { MRMSrcReg = 49, MRMSrcReg4VOp3 = 50, MRMSrcRegOp4 = 51, + MRMSrcRegCC = 52, MRMXr = 55, MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59, MRM4r = 60, MRM5r = 61, MRM6r = 62, MRM7r = 63,