From: Craig Topper Date: Wed, 3 Apr 2019 23:28:30 +0000 (+0000) Subject: [X86] Remove CustomInserter pseudos for MONITOR/MONITORX/CLZERO. Use custom instructi... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=364edc291c54c9a276e2f4fe75e0935146a16ea4;p=llvm [X86] Remove CustomInserter pseudos for MONITOR/MONITORX/CLZERO. Use custom instruction selection instead. This custom inserter existed so we could do a weird thing where we pretended that the instructions support a full address mode instead of taking a pointer in EAX/RAX. I think was largely so we could be pointer size agnostic in the isel pattern. To make this work we would then put the address into an LEA into EAX/RAX in front of the instruction after isel. But the LEA is overkill when we just have a base pointer. So we end up using the LEA as a slower MOV instruction. With this change we now just do custom selection during isel instead and just assign the incoming address of the intrinsic into EAX/RAX based on its size. After the intrinsic is selected, we can let isel take care of selecting an LEA or other operation to do any address computation needed in this basic block. I've also split the instruction into a 32-bit mode version and a 64-bit mode version so the implicit use is properly sized based on the pointer. Without this we get comments in the assembly output about killing eax and defing rax or vice versa depending on whether we define the instruction to use EAX/RAX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357652 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 8d18a1f8138..272be22996f 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3420,6 +3420,61 @@ void X86DAGToDAGISel::Select(SDNode *Node) { switch (Opcode) { default: break; + case ISD::INTRINSIC_VOID: { + unsigned IntNo = Node->getConstantOperandVal(1); + switch (IntNo) { + default: break; + case Intrinsic::x86_sse3_monitor: + case Intrinsic::x86_monitorx: + case Intrinsic::x86_clzero: { + bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64; + + unsigned Opc = 0; + switch (IntNo) { + case Intrinsic::x86_sse3_monitor: + if (!Subtarget->hasSSE3()) + break; + Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr; + break; + case Intrinsic::x86_monitorx: + if (!Subtarget->hasMWAITX()) + break; + Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr; + break; + case Intrinsic::x86_clzero: + if (!Subtarget->hasCLZERO()) + break; + Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r; + break; + } + + if (Opc) { + unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX; + SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg, + Node->getOperand(2), SDValue()); + SDValue InFlag = Chain.getValue(1); + + if (IntNo == Intrinsic::x86_sse3_monitor || + IntNo == Intrinsic::x86_monitorx) { + // Copy the other two operands to ECX and EDX. + Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3), + InFlag); + InFlag = Chain.getValue(1); + Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4), + InFlag); + InFlag = Chain.getValue(1); + } + + MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, + { Chain, InFlag}); + ReplaceNode(Node, CNode); + return; + } + } + } + + break; + } case ISD::BRIND: { if (Subtarget->isTargetNaCl()) // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a05fd557ca2..99cf13262f3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -28306,49 +28306,6 @@ static MachineBasicBlock *emitRDPKRU(MachineInstr &MI, MachineBasicBlock *BB, return BB; } -static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB, - const X86Subtarget &Subtarget, - unsigned Opc) { - DebugLoc dl = MI.getDebugLoc(); - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - // Address into RAX/EAX, other two args into ECX, EDX. - unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r; - unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); - for (int i = 0; i < X86::AddrNumOperands; ++i) - MIB.add(MI.getOperand(i)); - - unsigned ValOps = X86::AddrNumOperands; - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) - .addReg(MI.getOperand(ValOps).getReg()); - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX) - .addReg(MI.getOperand(ValOps + 1).getReg()); - - // The instruction doesn't actually take any operands though. - BuildMI(*BB, MI, dl, TII->get(Opc)); - - MI.eraseFromParent(); // The pseudo is gone now. - return BB; -} - -static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB, - const X86Subtarget &Subtarget) { - DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - // Address into RAX/EAX - unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r; - unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); - for (int i = 0; i < X86::AddrNumOperands; ++i) - MIB.add(MI->getOperand(i)); - - // The instruction doesn't actually take any operands though. - BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr)); - - MI->eraseFromParent(); // The pseudo is gone now. - return BB; -} - MachineBasicBlock * @@ -30460,15 +30417,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } - // Thread synchronization. - case X86::MONITOR: - return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr); - case X86::MONITORX: - return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr); - - // Cache line zero - case X86::CLZERO: - return emitClzero(&MI, BB, Subtarget); // PKU feature case X86::WRPKRU: diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f9e729077f3..e399f8b862e 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2647,16 +2647,12 @@ defm LWPVAL64 : lwpval_intr, VEX_W; // MONITORX/MWAITX Instructions // let SchedRW = [ WriteSystem ] in { - let usesCustomInserter = 1 in { - def MONITORX : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), - [(int_x86_monitorx addr:$src1, GR32:$src2, GR32:$src3)]>, - Requires<[ HasMWAITX ]>; - } - - let Uses = [ EAX, ECX, EDX ] in { - def MONITORXrrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>, - TB, Requires<[ HasMWAITX ]>; - } + let Uses = [ EAX, ECX, EDX ] in + def MONITORX32rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>, + TB, Requires<[ HasMWAITX, Not64BitMode ]>; + let Uses = [ RAX, ECX, EDX ] in + def MONITORX64rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>, + TB, Requires<[ HasMWAITX, In64BitMode ]>; let Uses = [ ECX, EAX, EBX ] in { def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx", @@ -2670,9 +2666,9 @@ def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrrr)>, def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrrr)>, Requires<[ In64BitMode ]>; -def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>, +def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORX32rrr)>, Requires<[ Not64BitMode ]>; -def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>, +def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORX64rrr)>, Requires<[ In64BitMode ]>; //===----------------------------------------------------------------------===// @@ -2736,17 +2732,15 @@ def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), // let SchedRW = [WriteSystem] in { let Uses = [EAX] in - def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, - TB, Requires<[HasCLZERO]>; - - let usesCustomInserter = 1 in { - def CLZERO : PseudoI<(outs), (ins i32mem:$src1), - [(int_x86_clzero addr:$src1)]>, Requires<[HasCLZERO]>; - } + def CLZERO32r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, + TB, Requires<[HasCLZERO, Not64BitMode]>; + let Uses = [RAX] in + def CLZERO64r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, + TB, Requires<[HasCLZERO, In64BitMode]>; } // SchedRW -def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, Requires<[Not64BitMode]>; -def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, Requires<[In64BitMode]>; +def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>; +def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Pattern fragments to auto generate TBM instructions. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index c3f471edb5a..5bc9b3e26ec 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5044,15 +5044,12 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in //===---------------------------------------------------------------------===// let SchedRW = [WriteSystem] in { -let usesCustomInserter = 1 in { -def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), - [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, - Requires<[HasSSE3]>; -} - let Uses = [EAX, ECX, EDX] in -def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, - TB, Requires<[HasSSE3]>; +def MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, + TB, Requires<[HasSSE3, Not64BitMode]>; +let Uses = [RAX, ECX, EDX] in +def MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, + TB, Requires<[HasSSE3, In64BitMode]>; let Uses = [ECX, EAX] in def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", @@ -5062,9 +5059,9 @@ def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; -def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORrrr)>, +def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>, Requires<[Not64BitMode]>; -def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>, +def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index bf50aeee1df..3ebd885753b 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -741,7 +741,7 @@ def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> { let Latency = 45; let ResourceCycles = [45]; } -def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>; +def : InstRW<[AtomWrite01_45], (instrs MONITOR32rrr, MONITOR64rrr)>; def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> { let Latency = 46; diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll index 859f33ad357..38866c7cd45 100644 --- a/test/CodeGen/X86/apm.ll +++ b/test/CodeGen/X86/apm.ll @@ -8,23 +8,22 @@ define void @foo(i8* %P, i32 %E, i32 %H) nounwind { ; X86-LABEL: foo: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: monitor ; X86-NEXT: retl ; ; X64-LABEL: foo: ; X64: # %bb.0: # %entry ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: leaq (%rdi), %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: monitor ; X64-NEXT: retq ; ; WIN64-LABEL: foo: ; WIN64: # %bb.0: # %entry -; WIN64-NEXT: leaq (%rcx), %rax +; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: movl %edx, %ecx ; WIN64-NEXT: movl %r8d, %edx ; WIN64-NEXT: monitor diff --git a/test/CodeGen/X86/clzero.ll b/test/CodeGen/X86/clzero.ll index d08470dda92..a185cb7d05b 100644 --- a/test/CodeGen/X86/clzero.ll +++ b/test/CodeGen/X86/clzero.ll @@ -5,14 +5,13 @@ define void @foo(i8* %p) #0 { ; X64-LABEL: foo: ; X64: # %bb.0: # %entry -; X64-NEXT: leaq (%rdi), %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: clzero ; X64-NEXT: retq ; ; X32-LABEL: foo: ; X32: # %bb.0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: leal (%eax), %eax ; X32-NEXT: clzero ; X32-NEXT: retl entry: diff --git a/test/CodeGen/X86/mwaitx.ll b/test/CodeGen/X86/mwaitx.ll index 4895297966d..202a360ff2c 100644 --- a/test/CodeGen/X86/mwaitx.ll +++ b/test/CodeGen/X86/mwaitx.ll @@ -8,13 +8,13 @@ define void @foo(i8* %P, i32 %E, i32 %H) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: leaq (%rdi), %rax +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: monitorx ; CHECK-NEXT: retq ; ; WIN64-LABEL: foo: ; WIN64: # %bb.0: # %entry -; WIN64-NEXT: leaq (%rcx), %rax +; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: movl %edx, %ecx ; WIN64-NEXT: movl %r8d, %edx ; WIN64-NEXT: monitorx diff --git a/test/CodeGen/X86/sse3-intrinsics-x86.ll b/test/CodeGen/X86/sse3-intrinsics-x86.ll index dfeb3cf378e..7124712ae49 100644 --- a/test/CodeGen/X86/sse3-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse3-intrinsics-x86.ll @@ -134,17 +134,16 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly define void @monitor(i8* %P, i32 %E, i32 %H) nounwind { ; X86-LABEL: monitor: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x0c] -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: leal (%eax), %eax ## encoding: [0x8d,0x00] +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x0c] ; X86-NEXT: monitor ## encoding: [0x0f,0x01,0xc8] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: monitor: ; X64: ## %bb.0: ; X64-NEXT: movl %esi, %ecx ## encoding: [0x89,0xf1] -; X64-NEXT: leaq (%rdi), %rax ## encoding: [0x48,0x8d,0x07] +; X64-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] ; X64-NEXT: monitor ## encoding: [0x0f,0x01,0xc8] ; X64-NEXT: retq ## encoding: [0xc3] tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)