From 0a2d836fd998ef546fb78e24ebb04d210cb8a948 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 15 Aug 2019 18:23:37 +0000 Subject: [PATCH] [X86] Add custom type legalization for bitcasting mmx to v2i32/v4i16/v8i8 to use movq2dq instead of going through memory. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369031 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 ++ lib/Target/X86/X86ISelLowering.h | 4 + lib/Target/X86/X86InstrMMX.td | 7 ++ test/CodeGen/X86/3dnow-intrinsics.ll | 24 ++-- test/CodeGen/X86/mmx-arg-passing-x86-64.ll | 9 +- test/CodeGen/X86/mmx-arith.ll | 126 ++++++++------------- test/CodeGen/X86/mmx-cvt.ll | 16 +-- test/CodeGen/X86/vec_extract-mmx.ll | 16 +-- test/CodeGen/X86/vec_insert-7.ll | 5 +- 9 files changed, 95 insertions(+), 122 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7e36a05e8b1..fa6cc53df3d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -27823,6 +27823,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } + if (DstVT.isVector() && SrcVT == MVT::x86mmx) { + assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector && + "Unexpected type action!"); + EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT); + SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, WideVT, N->getOperand(0)); + Results.push_back(Res); + return; + } + return; } case ISD::MGATHER: { @@ -27934,6 +27943,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP"; + case X86ISD::MOVQ2DQ: return "X86ISD::MOVQ2DQ"; case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q"; case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W"; case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 09b0f6bc42b..3b39c85a6a8 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -144,6 +144,10 @@ namespace llvm { /// relative displacements. WrapperRIP, + /// Copies a 64-bit value from an MMX vector to the low word + /// of an XMM vector, with the high word zero filled. + MOVQ2DQ, + /// Copies a 64-bit value from the low word of an XMM vector /// to an MMX vector. MOVDQ2Q, diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 54d4757dbd2..5125edf7834 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -567,6 +567,13 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (int_x86_mmx_pmovmskb VR64:$src))]>, Sched<[WriteMMXMOVMSK]>; +// MMX to XMM for vector types +def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, + [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>; + +def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; + // Low word of XMM to MMX. def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>; diff --git a/test/CodeGen/X86/3dnow-intrinsics.ll b/test/CodeGen/X86/3dnow-intrinsics.ll index cb23655c039..611ba11c510 100644 --- a/test/CodeGen/X86/3dnow-intrinsics.ll +++ b/test/CodeGen/X86/3dnow-intrinsics.ll @@ -13,8 +13,7 @@ define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind rea ; X64-LABEL: test_pavgusb: ; X64: # %bb.0: # %entry ; X64-NEXT: pavgusb %mm1, %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: retq entry: %0 = bitcast x86_mmx %a.coerce to <8 x i8> @@ -50,8 +49,7 @@ define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone { ; X64: # %bb.0: # %entry ; X64-NEXT: movdq2q %xmm0, %mm0 ; X64-NEXT: pf2id %mm0, %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx @@ -166,8 +164,7 @@ define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone ; X64-NEXT: movdq2q %xmm1, %mm0 ; X64-NEXT: movdq2q %xmm0, %mm1 ; X64-NEXT: pfcmpeq %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: movq2dq %mm1, %xmm0 ; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx @@ -205,8 +202,7 @@ define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone ; X64-NEXT: movdq2q %xmm1, %mm0 ; X64-NEXT: movdq2q %xmm0, %mm1 ; X64-NEXT: pfcmpge %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: movq2dq %mm1, %xmm0 ; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx @@ -244,8 +240,7 @@ define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone ; X64-NEXT: movdq2q %xmm1, %mm0 ; X64-NEXT: movdq2q %xmm0, %mm1 ; X64-NEXT: pfcmpgt %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: movq2dq %mm1, %xmm0 ; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx @@ -679,8 +674,7 @@ define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind re ; X64-LABEL: test_pmulhrw: ; X64: # %bb.0: # %entry ; X64-NEXT: pmulhrw %mm1, %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: retq entry: %0 = bitcast x86_mmx %a.coerce to <4 x i16> @@ -716,8 +710,7 @@ define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone { ; X64: # %bb.0: # %entry ; X64-NEXT: movdq2q %xmm0, %mm0 ; X64-NEXT: pf2iw %mm0, %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx @@ -891,8 +884,7 @@ define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone { ; X64: # %bb.0: # %entry ; X64-NEXT: movdq2q %xmm0, %mm0 ; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: retq entry: %0 = bitcast <2 x i32> %a to x86_mmx diff --git a/test/CodeGen/X86/mmx-arg-passing-x86-64.ll b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll index 1f654109ab8..85fa9bb2a8f 100644 --- a/test/CodeGen/X86/mmx-arg-passing-x86-64.ll +++ b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll @@ -22,12 +22,11 @@ define void @t3() nounwind { define void @t4(x86_mmx %v1, x86_mmx %v2) nounwind { ; X86-64-LABEL: t4: ; X86-64: ## %bb.0: -; X86-64-NEXT: movdq2q %xmm0, %mm0 -; X86-64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) ; X86-64-NEXT: movdq2q %xmm1, %mm0 -; X86-64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X86-64-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0 -; X86-64-NEXT: paddb -{{[0-9]+}}(%rsp), %xmm0 +; X86-64-NEXT: movdq2q %xmm0, %mm1 +; X86-64-NEXT: movq2dq %mm1, %xmm1 +; X86-64-NEXT: movq2dq %mm0, %xmm0 +; X86-64-NEXT: paddb %xmm1, %xmm0 ; X86-64-NEXT: movb $1, %al ; X86-64-NEXT: jmp _pass_v8qi ## TAILCALL %v1a = bitcast x86_mmx %v1 to <8 x i8> diff --git a/test/CodeGen/X86/mmx-arith.ll b/test/CodeGen/X86/mmx-arith.ll index 0654606108d..e9c86af6503 100644 --- a/test/CodeGen/X86/mmx-arith.ll +++ b/test/CodeGen/X86/mmx-arith.ll @@ -8,15 +8,8 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) { ; X32-LABEL: test0: ; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl 12(%ebp), %ecx -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X32-NEXT: paddb %xmm0, %xmm1 @@ -25,9 +18,8 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) { ; X32-NEXT: paddsb (%ecx), %mm0 ; X32-NEXT: movq %mm0, (%eax) ; X32-NEXT: paddusb (%ecx), %mm0 -; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp) -; X32-NEXT: movdqa {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: movq %mm0, (%eax) +; X32-NEXT: movq2dq %mm0, %xmm0 ; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X32-NEXT: psubb %xmm1, %xmm0 ; X32-NEXT: movdq2q %xmm0, %mm0 @@ -35,29 +27,25 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) { ; X32-NEXT: psubsb (%ecx), %mm0 ; X32-NEXT: movq %mm0, (%eax) ; X32-NEXT: psubusb (%ecx), %mm0 -; X32-NEXT: movq %mm0, (%esp) -; X32-NEXT: movdqa (%esp), %xmm0 ; X32-NEXT: movq %mm0, (%eax) +; X32-NEXT: movq2dq %mm0, %xmm0 ; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-NEXT: pmullw %xmm1, %xmm0 -; X32-NEXT: pand {{\.LCPI.*}}, %xmm0 -; X32-NEXT: packuswb %xmm0, %xmm0 -; X32-NEXT: movq %xmm0, (%eax) -; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X32-NEXT: pand %xmm0, %xmm1 +; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-NEXT: pmullw %xmm0, %xmm1 +; X32-NEXT: pand {{\.LCPI.*}}, %xmm1 +; X32-NEXT: packuswb %xmm1, %xmm1 ; X32-NEXT: movq %xmm1, (%eax) ; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: por %xmm1, %xmm0 +; X32-NEXT: pand %xmm1, %xmm0 ; X32-NEXT: movq %xmm0, (%eax) ; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X32-NEXT: pxor %xmm0, %xmm1 +; X32-NEXT: por %xmm0, %xmm1 ; X32-NEXT: movq %xmm1, (%eax) +; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: pxor %xmm1, %xmm0 +; X32-NEXT: movq %xmm0, (%eax) ; X32-NEXT: emms -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; ; X64-LABEL: test0: @@ -70,9 +58,8 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) { ; X64-NEXT: paddsb (%rsi), %mm0 ; X64-NEXT: movq %mm0, (%rdi) ; X64-NEXT: paddusb (%rsi), %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0 ; X64-NEXT: movq %mm0, (%rdi) +; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: psubb %xmm1, %xmm0 ; X64-NEXT: movdq2q %xmm0, %mm0 @@ -80,25 +67,24 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) { ; X64-NEXT: psubsb (%rsi), %mm0 ; X64-NEXT: movq %mm0, (%rdi) ; X64-NEXT: psubusb (%rsi), %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0 ; X64-NEXT: movq %mm0, (%rdi) +; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X64-NEXT: pmullw %xmm1, %xmm0 -; X64-NEXT: pand {{.*}}(%rip), %xmm0 -; X64-NEXT: packuswb %xmm0, %xmm0 -; X64-NEXT: movq %xmm0, (%rdi) -; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-NEXT: pand %xmm0, %xmm1 +; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X64-NEXT: pmullw %xmm0, %xmm1 +; X64-NEXT: pand {{.*}}(%rip), %xmm1 +; X64-NEXT: packuswb %xmm1, %xmm1 ; X64-NEXT: movq %xmm1, (%rdi) ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: por %xmm1, %xmm0 +; X64-NEXT: pand %xmm1, %xmm0 ; X64-NEXT: movq %xmm0, (%rdi) ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-NEXT: pxor %xmm0, %xmm1 +; X64-NEXT: por %xmm0, %xmm1 ; X64-NEXT: movq %xmm1, (%rdi) +; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: pxor %xmm1, %xmm0 +; X64-NEXT: movq %xmm0, (%rdi) ; X64-NEXT: emms ; X64-NEXT: retq entry: @@ -253,15 +239,8 @@ entry: define void @test2(x86_mmx* %A, x86_mmx* %B) { ; X32-LABEL: test2: ; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $64, %esp -; X32-NEXT: movl 12(%ebp), %ecx -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X32-NEXT: paddw %xmm0, %xmm1 @@ -270,9 +249,8 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) { ; X32-NEXT: paddsw (%ecx), %mm0 ; X32-NEXT: movq %mm0, (%eax) ; X32-NEXT: paddusw (%ecx), %mm0 -; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp) -; X32-NEXT: movdqa {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: movq %mm0, (%eax) +; X32-NEXT: movq2dq %mm0, %xmm0 ; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X32-NEXT: psubw %xmm1, %xmm0 ; X32-NEXT: movdq2q %xmm0, %mm0 @@ -280,30 +258,27 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) { ; X32-NEXT: psubsw (%ecx), %mm0 ; X32-NEXT: movq %mm0, (%eax) ; X32-NEXT: psubusw (%ecx), %mm0 -; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp) ; X32-NEXT: movq %mm0, (%eax) -; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: pmullw {{[0-9]+}}(%esp), %xmm0 -; X32-NEXT: movdq2q %xmm0, %mm0 -; X32-NEXT: movq %xmm0, (%eax) +; X32-NEXT: movq2dq %mm0, %xmm0 +; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X32-NEXT: pmullw %xmm0, %xmm1 +; X32-NEXT: movdq2q %xmm1, %mm0 +; X32-NEXT: movq %xmm1, (%eax) ; X32-NEXT: pmulhw (%ecx), %mm0 ; X32-NEXT: movq %mm0, (%eax) ; X32-NEXT: pmaddwd (%ecx), %mm0 -; X32-NEXT: movq %mm0, (%esp) ; X32-NEXT: movq %mm0, (%eax) -; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: andps (%esp), %xmm0 -; X32-NEXT: movlps %xmm0, (%eax) +; X32-NEXT: movq2dq %mm0, %xmm0 ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X32-NEXT: orps %xmm0, %xmm1 +; X32-NEXT: andps %xmm0, %xmm1 ; X32-NEXT: movlps %xmm1, (%eax) ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: xorps %xmm1, %xmm0 +; X32-NEXT: orps %xmm1, %xmm0 ; X32-NEXT: movlps %xmm0, (%eax) +; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X32-NEXT: xorps %xmm0, %xmm1 +; X32-NEXT: movlps %xmm1, (%eax) ; X32-NEXT: emms -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; ; X64-LABEL: test2: @@ -316,9 +291,8 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) { ; X64-NEXT: paddsw (%rsi), %mm0 ; X64-NEXT: movq %mm0, (%rdi) ; X64-NEXT: paddusw (%rsi), %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0 ; X64-NEXT: movq %mm0, (%rdi) +; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: psubw %xmm1, %xmm0 ; X64-NEXT: movdq2q %xmm0, %mm0 @@ -326,26 +300,26 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) { ; X64-NEXT: psubsw (%rsi), %mm0 ; X64-NEXT: movq %mm0, (%rdi) ; X64-NEXT: psubusw (%rsi), %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movq %mm0, (%rdi) -; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: pmullw -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: movdq2q %xmm0, %mm0 -; X64-NEXT: movq %xmm0, (%rdi) +; X64-NEXT: movq2dq %mm0, %xmm0 +; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: pmullw %xmm0, %xmm1 +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movq %xmm1, (%rdi) ; X64-NEXT: pmulhw (%rsi), %mm0 ; X64-NEXT: movq %mm0, (%rdi) ; X64-NEXT: pmaddwd (%rsi), %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movq %mm0, (%rdi) -; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: andps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: movlps %xmm0, (%rdi) +; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X64-NEXT: orps %xmm0, %xmm1 +; X64-NEXT: andps %xmm0, %xmm1 ; X64-NEXT: movlps %xmm1, (%rdi) ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: xorps %xmm1, %xmm0 +; X64-NEXT: orps %xmm1, %xmm0 ; X64-NEXT: movlps %xmm0, (%rdi) +; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: xorps %xmm0, %xmm1 +; X64-NEXT: movlps %xmm1, (%rdi) ; X64-NEXT: emms ; X64-NEXT: retq entry: diff --git a/test/CodeGen/X86/mmx-cvt.ll b/test/CodeGen/X86/mmx-cvt.ll index 5f6a8885b61..339df30892a 100644 --- a/test/CodeGen/X86/mmx-cvt.ll +++ b/test/CodeGen/X86/mmx-cvt.ll @@ -294,26 +294,20 @@ define <2 x double> @sitofp_v2i32_v2f64(<1 x i64>*) nounwind { define <4 x float> @sitofp_v2i32_v2f32(<1 x i64>*) nounwind { ; X86-LABEL: sitofp_v2i32_v2f32: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $32, %esp -; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movq (%eax), %mm0 ; X86-NEXT: paddd %mm0, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: movq2dq %mm0, %xmm0 +; X86-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; X86-NEXT: cvtdq2ps %xmm0, %xmm0 -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: sitofp_v2i32_v2f32: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %mm0 ; X64-NEXT: paddd %mm0, %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: movq2dq %mm0, %xmm0 +; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; X64-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq %2 = bitcast <1 x i64>* %0 to x86_mmx* diff --git a/test/CodeGen/X86/vec_extract-mmx.ll b/test/CodeGen/X86/vec_extract-mmx.ll index e6a7232adbf..5ce632d0aa1 100644 --- a/test/CodeGen/X86/vec_extract-mmx.ll +++ b/test/CodeGen/X86/vec_extract-mmx.ll @@ -113,20 +113,16 @@ define i32 @test3(x86_mmx %a) nounwind { define i32 @test4(x86_mmx %a) nounwind { ; X32-LABEL: test4: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $32, %esp -; X32-NEXT: movq %mm0, (%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp +; X32-NEXT: movq2dq %mm0, %xmm0 +; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X32-NEXT: movd %xmm0, %eax ; X32-NEXT: retl ; ; X64-LABEL: test4: ; X64: # %bb.0: -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movq2dq %mm0, %xmm0 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X64-NEXT: movd %xmm0, %eax ; X64-NEXT: retq %tmp0 = bitcast x86_mmx %a to <2 x i32> %tmp1 = extractelement <2 x i32> %tmp0, i32 1 diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll index 2c719e61c57..52d6e7ca7a2 100644 --- a/test/CodeGen/X86/vec_insert-7.ll +++ b/test/CodeGen/X86/vec_insert-7.ll @@ -8,15 +8,12 @@ define x86_mmx @mmx_movzl(x86_mmx %x) nounwind { ; X32-LABEL: mmx_movzl: ; X32: ## %bb.0: -; X32-NEXT: subl $28, %esp -; X32-NEXT: movq %mm0, (%esp) -; X32-NEXT: movdqa (%esp), %xmm0 +; X32-NEXT: movq2dq %mm0, %xmm0 ; X32-NEXT: movl $32, %eax ; X32-NEXT: pinsrd $0, %eax, %xmm0 ; X32-NEXT: pxor %xmm1, %xmm1 ; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] ; X32-NEXT: movdq2q %xmm1, %mm0 -; X32-NEXT: addl $28, %esp ; X32-NEXT: retl ; ; X64-LABEL: mmx_movzl: -- 2.40.0