From 8201c54c00281f1707b9f51333b50e19ce8e9835 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 17 Feb 2019 19:23:49 +0000 Subject: [PATCH] [X86] In FP_TO_INTHelper, when moving data from SSE register to X87 register file via the stack, use the same stack slot we use for the integer conversion. No need for a separate stack slot. The lifetimes don't overlap. Also fix the MachinePointerInfo for the final load after the integer conversion to indicate it came from the stack slot. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354234 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 22 ++--- test/CodeGen/X86/scalar-fp-to-i64.ll | 136 +++++++++++++-------------- test/CodeGen/X86/vec_cast3.ll | 10 +- 3 files changed, 82 insertions(+), 86 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 068253d85c5..e033c211943 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18104,7 +18104,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // We lower FP->int64 into FISTP64 followed by a load from a temporary // stack slot. MachineFunction &MF = DAG.getMachineFunction(); - unsigned MemSize = DstTy.getSizeInBits()/8; + unsigned MemSize = DstTy.getStoreSize(); int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); @@ -18160,37 +18160,33 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub); } + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI); + // FIXME This causes a redundant load/store if the SSE-class value is already // in memory, such as if it is on the callstack. if (isScalarFPTypeInSSEReg(TheVT)) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); - Chain = DAG.getStore(Chain, DL, Value, StackSlot, - MachinePointerInfo::getFixedStack(MF, SSFI)); + Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI); SDVTList Tys = DAG.getVTList(TheVT, MVT::Other); SDValue Ops[] = { Chain, StackSlot }; unsigned FLDSize = TheVT.getStoreSize(); assert(FLDSize <= MemSize && "Stack slot not big enough"); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), - MachineMemOperand::MOLoad, FLDSize, FLDSize); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MPI, MachineMemOperand::MOLoad, FLDSize, FLDSize); Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, TheVT, MMO); Chain = Value.getValue(1); - SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false); - StackSlot = DAG.getFrameIndex(SSFI, PtrVT); } // Build the FP_TO_INT*_IN_MEM - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), - MachineMemOperand::MOStore, MemSize, MemSize); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MPI, MachineMemOperand::MOStore, MemSize, MemSize); SDValue Ops[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL, DAG.getVTList(MVT::Other), Ops, DstTy, MMO); - SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, - MachinePointerInfo()); + SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MPI); // If we need an unsigned fixup, XOR the result with adjust. if (UnsignedFixup) diff --git a/test/CodeGen/X86/scalar-fp-to-i64.ll b/test/CodeGen/X86/scalar-fp-to-i64.ll index 402ff12e7de..ec9156b1a94 100644 --- a/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -85,14 +85,14 @@ define i64 @f_to_u64(float %a) nounwind { ; AVX512F_32_WIN-NEXT: pushl %ebp ; AVX512F_32_WIN-NEXT: movl %esp, %ebp ; AVX512F_32_WIN-NEXT: andl $-8, %esp -; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: subl $8, %esp ; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX512F_32_WIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 ; AVX512F_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 ; AVX512F_32_WIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} -; AVX512F_32_WIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) -; AVX512F_32_WIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: vmovss %xmm2, (%esp) +; AVX512F_32_WIN-NEXT: flds (%esp) ; AVX512F_32_WIN-NEXT: fisttpll (%esp) ; AVX512F_32_WIN-NEXT: xorl %edx, %edx ; AVX512F_32_WIN-NEXT: vucomiss %xmm0, %xmm1 @@ -106,14 +106,14 @@ define i64 @f_to_u64(float %a) nounwind { ; ; AVX512F_32_LIN-LABEL: f_to_u64: ; AVX512F_32_LIN: # %bb.0: -; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: subl $12, %esp ; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX512F_32_LIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 ; AVX512F_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 ; AVX512F_32_LIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} -; AVX512F_32_LIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) -; AVX512F_32_LIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: vmovss %xmm2, (%esp) +; AVX512F_32_LIN-NEXT: flds (%esp) ; AVX512F_32_LIN-NEXT: fisttpll (%esp) ; AVX512F_32_LIN-NEXT: xorl %edx, %edx ; AVX512F_32_LIN-NEXT: vucomiss %xmm0, %xmm1 @@ -121,7 +121,7 @@ define i64 @f_to_u64(float %a) nounwind { ; AVX512F_32_LIN-NEXT: shll $31, %edx ; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; AVX512F_32_LIN-NEXT: movl (%esp), %eax -; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: addl $12, %esp ; AVX512F_32_LIN-NEXT: retl ; ; SSE3_32_WIN-LABEL: f_to_u64: @@ -129,7 +129,7 @@ define i64 @f_to_u64(float %a) nounwind { ; SSE3_32_WIN-NEXT: pushl %ebp ; SSE3_32_WIN-NEXT: movl %esp, %ebp ; SSE3_32_WIN-NEXT: andl $-8, %esp -; SSE3_32_WIN-NEXT: subl $16, %esp +; SSE3_32_WIN-NEXT: subl $8, %esp ; SSE3_32_WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE3_32_WIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE3_32_WIN-NEXT: movaps %xmm0, %xmm2 @@ -141,8 +141,8 @@ define i64 @f_to_u64(float %a) nounwind { ; SSE3_32_WIN-NEXT: subss %xmm1, %xmm0 ; SSE3_32_WIN-NEXT: andnps %xmm0, %xmm3 ; SSE3_32_WIN-NEXT: orps %xmm3, %xmm2 -; SSE3_32_WIN-NEXT: movss %xmm2, {{[0-9]+}}(%esp) -; SSE3_32_WIN-NEXT: flds {{[0-9]+}}(%esp) +; SSE3_32_WIN-NEXT: movss %xmm2, (%esp) +; SSE3_32_WIN-NEXT: flds (%esp) ; SSE3_32_WIN-NEXT: fisttpll (%esp) ; SSE3_32_WIN-NEXT: setbe %dl ; SSE3_32_WIN-NEXT: shll $31, %edx @@ -154,7 +154,7 @@ define i64 @f_to_u64(float %a) nounwind { ; ; SSE3_32_LIN-LABEL: f_to_u64: ; SSE3_32_LIN: # %bb.0: -; SSE3_32_LIN-NEXT: subl $20, %esp +; SSE3_32_LIN-NEXT: subl $12, %esp ; SSE3_32_LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE3_32_LIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE3_32_LIN-NEXT: movaps %xmm0, %xmm2 @@ -166,14 +166,14 @@ define i64 @f_to_u64(float %a) nounwind { ; SSE3_32_LIN-NEXT: subss %xmm1, %xmm0 ; SSE3_32_LIN-NEXT: andnps %xmm0, %xmm3 ; SSE3_32_LIN-NEXT: orps %xmm3, %xmm2 -; SSE3_32_LIN-NEXT: movss %xmm2, {{[0-9]+}}(%esp) -; SSE3_32_LIN-NEXT: flds {{[0-9]+}}(%esp) +; SSE3_32_LIN-NEXT: movss %xmm2, (%esp) +; SSE3_32_LIN-NEXT: flds (%esp) ; SSE3_32_LIN-NEXT: fisttpll (%esp) ; SSE3_32_LIN-NEXT: setbe %dl ; SSE3_32_LIN-NEXT: shll $31, %edx ; SSE3_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; SSE3_32_LIN-NEXT: movl (%esp), %eax -; SSE3_32_LIN-NEXT: addl $20, %esp +; SSE3_32_LIN-NEXT: addl $12, %esp ; SSE3_32_LIN-NEXT: retl ; ; SSE3_64-LABEL: f_to_u64: @@ -194,7 +194,7 @@ define i64 @f_to_u64(float %a) nounwind { ; SSE2_32_WIN-NEXT: pushl %ebp ; SSE2_32_WIN-NEXT: movl %esp, %ebp ; SSE2_32_WIN-NEXT: andl $-8, %esp -; SSE2_32_WIN-NEXT: subl $24, %esp +; SSE2_32_WIN-NEXT: subl $16, %esp ; SSE2_32_WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2_32_WIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2_32_WIN-NEXT: movaps %xmm0, %xmm2 @@ -226,7 +226,7 @@ define i64 @f_to_u64(float %a) nounwind { ; ; SSE2_32_LIN-LABEL: f_to_u64: ; SSE2_32_LIN: # %bb.0: -; SSE2_32_LIN-NEXT: subl $28, %esp +; SSE2_32_LIN-NEXT: subl $20, %esp ; SSE2_32_LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2_32_LIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2_32_LIN-NEXT: movaps %xmm0, %xmm2 @@ -252,7 +252,7 @@ define i64 @f_to_u64(float %a) nounwind { ; SSE2_32_LIN-NEXT: shll $31, %edx ; SSE2_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE2_32_LIN-NEXT: addl $28, %esp +; SSE2_32_LIN-NEXT: addl $20, %esp ; SSE2_32_LIN-NEXT: retl ; ; SSE2_64-LABEL: f_to_u64: @@ -388,10 +388,10 @@ define i64 @f_to_s64(float %a) nounwind { ; AVX512F_32_WIN-NEXT: pushl %ebp ; AVX512F_32_WIN-NEXT: movl %esp, %ebp ; AVX512F_32_WIN-NEXT: andl $-8, %esp -; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: subl $8, %esp ; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512F_32_WIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) -; AVX512F_32_WIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: vmovss %xmm0, (%esp) +; AVX512F_32_WIN-NEXT: flds (%esp) ; AVX512F_32_WIN-NEXT: fisttpll (%esp) ; AVX512F_32_WIN-NEXT: movl (%esp), %eax ; AVX512F_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -401,14 +401,14 @@ define i64 @f_to_s64(float %a) nounwind { ; ; AVX512F_32_LIN-LABEL: f_to_s64: ; AVX512F_32_LIN: # %bb.0: -; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: subl $12, %esp ; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512F_32_LIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) -; AVX512F_32_LIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: vmovss %xmm0, (%esp) +; AVX512F_32_LIN-NEXT: flds (%esp) ; AVX512F_32_LIN-NEXT: fisttpll (%esp) ; AVX512F_32_LIN-NEXT: movl (%esp), %eax ; AVX512F_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: addl $12, %esp ; AVX512F_32_LIN-NEXT: retl ; ; SSE3_32_WIN-LABEL: f_to_s64: @@ -416,10 +416,10 @@ define i64 @f_to_s64(float %a) nounwind { ; SSE3_32_WIN-NEXT: pushl %ebp ; SSE3_32_WIN-NEXT: movl %esp, %ebp ; SSE3_32_WIN-NEXT: andl $-8, %esp -; SSE3_32_WIN-NEXT: subl $16, %esp +; SSE3_32_WIN-NEXT: subl $8, %esp ; SSE3_32_WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE3_32_WIN-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; SSE3_32_WIN-NEXT: flds {{[0-9]+}}(%esp) +; SSE3_32_WIN-NEXT: movss %xmm0, (%esp) +; SSE3_32_WIN-NEXT: flds (%esp) ; SSE3_32_WIN-NEXT: fisttpll (%esp) ; SSE3_32_WIN-NEXT: movl (%esp), %eax ; SSE3_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -429,14 +429,14 @@ define i64 @f_to_s64(float %a) nounwind { ; ; SSE3_32_LIN-LABEL: f_to_s64: ; SSE3_32_LIN: # %bb.0: -; SSE3_32_LIN-NEXT: subl $20, %esp +; SSE3_32_LIN-NEXT: subl $12, %esp ; SSE3_32_LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE3_32_LIN-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; SSE3_32_LIN-NEXT: flds {{[0-9]+}}(%esp) +; SSE3_32_LIN-NEXT: movss %xmm0, (%esp) +; SSE3_32_LIN-NEXT: flds (%esp) ; SSE3_32_LIN-NEXT: fisttpll (%esp) ; SSE3_32_LIN-NEXT: movl (%esp), %eax ; SSE3_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; SSE3_32_LIN-NEXT: addl $20, %esp +; SSE3_32_LIN-NEXT: addl $12, %esp ; SSE3_32_LIN-NEXT: retl ; ; SSE3_64-LABEL: f_to_s64: @@ -449,7 +449,7 @@ define i64 @f_to_s64(float %a) nounwind { ; SSE2_32_WIN-NEXT: pushl %ebp ; SSE2_32_WIN-NEXT: movl %esp, %ebp ; SSE2_32_WIN-NEXT: andl $-8, %esp -; SSE2_32_WIN-NEXT: subl $24, %esp +; SSE2_32_WIN-NEXT: subl $16, %esp ; SSE2_32_WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2_32_WIN-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: flds {{[0-9]+}}(%esp) @@ -468,7 +468,7 @@ define i64 @f_to_s64(float %a) nounwind { ; ; SSE2_32_LIN-LABEL: f_to_s64: ; SSE2_32_LIN: # %bb.0: -; SSE2_32_LIN-NEXT: subl $28, %esp +; SSE2_32_LIN-NEXT: subl $20, %esp ; SSE2_32_LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2_32_LIN-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: flds {{[0-9]+}}(%esp) @@ -481,7 +481,7 @@ define i64 @f_to_s64(float %a) nounwind { ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; SSE2_32_LIN-NEXT: addl $28, %esp +; SSE2_32_LIN-NEXT: addl $20, %esp ; SSE2_32_LIN-NEXT: retl ; ; SSE2_64-LABEL: f_to_s64: @@ -575,14 +575,14 @@ define i64 @d_to_u64(double %a) nounwind { ; AVX512F_32_WIN-NEXT: pushl %ebp ; AVX512F_32_WIN-NEXT: movl %esp, %ebp ; AVX512F_32_WIN-NEXT: andl $-8, %esp -; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: subl $8, %esp ; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX512F_32_WIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 ; AVX512F_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 ; AVX512F_32_WIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} -; AVX512F_32_WIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) -; AVX512F_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: vmovsd %xmm2, (%esp) +; AVX512F_32_WIN-NEXT: fldl (%esp) ; AVX512F_32_WIN-NEXT: fisttpll (%esp) ; AVX512F_32_WIN-NEXT: xorl %edx, %edx ; AVX512F_32_WIN-NEXT: vucomisd %xmm0, %xmm1 @@ -596,14 +596,14 @@ define i64 @d_to_u64(double %a) nounwind { ; ; AVX512F_32_LIN-LABEL: d_to_u64: ; AVX512F_32_LIN: # %bb.0: -; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: subl $12, %esp ; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX512F_32_LIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 ; AVX512F_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 ; AVX512F_32_LIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} -; AVX512F_32_LIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) -; AVX512F_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: vmovsd %xmm2, (%esp) +; AVX512F_32_LIN-NEXT: fldl (%esp) ; AVX512F_32_LIN-NEXT: fisttpll (%esp) ; AVX512F_32_LIN-NEXT: xorl %edx, %edx ; AVX512F_32_LIN-NEXT: vucomisd %xmm0, %xmm1 @@ -611,7 +611,7 @@ define i64 @d_to_u64(double %a) nounwind { ; AVX512F_32_LIN-NEXT: shll $31, %edx ; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; AVX512F_32_LIN-NEXT: movl (%esp), %eax -; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: addl $12, %esp ; AVX512F_32_LIN-NEXT: retl ; ; SSE3_32_WIN-LABEL: d_to_u64: @@ -619,7 +619,7 @@ define i64 @d_to_u64(double %a) nounwind { ; SSE3_32_WIN-NEXT: pushl %ebp ; SSE3_32_WIN-NEXT: movl %esp, %ebp ; SSE3_32_WIN-NEXT: andl $-8, %esp -; SSE3_32_WIN-NEXT: subl $16, %esp +; SSE3_32_WIN-NEXT: subl $8, %esp ; SSE3_32_WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE3_32_WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE3_32_WIN-NEXT: movapd %xmm0, %xmm2 @@ -631,8 +631,8 @@ define i64 @d_to_u64(double %a) nounwind { ; SSE3_32_WIN-NEXT: subsd %xmm1, %xmm0 ; SSE3_32_WIN-NEXT: andnpd %xmm0, %xmm3 ; SSE3_32_WIN-NEXT: orpd %xmm3, %xmm2 -; SSE3_32_WIN-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) -; SSE3_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) +; SSE3_32_WIN-NEXT: movsd %xmm2, (%esp) +; SSE3_32_WIN-NEXT: fldl (%esp) ; SSE3_32_WIN-NEXT: fisttpll (%esp) ; SSE3_32_WIN-NEXT: setbe %dl ; SSE3_32_WIN-NEXT: shll $31, %edx @@ -644,7 +644,7 @@ define i64 @d_to_u64(double %a) nounwind { ; ; SSE3_32_LIN-LABEL: d_to_u64: ; SSE3_32_LIN: # %bb.0: -; SSE3_32_LIN-NEXT: subl $20, %esp +; SSE3_32_LIN-NEXT: subl $12, %esp ; SSE3_32_LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE3_32_LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE3_32_LIN-NEXT: movapd %xmm0, %xmm2 @@ -656,14 +656,14 @@ define i64 @d_to_u64(double %a) nounwind { ; SSE3_32_LIN-NEXT: subsd %xmm1, %xmm0 ; SSE3_32_LIN-NEXT: andnpd %xmm0, %xmm3 ; SSE3_32_LIN-NEXT: orpd %xmm3, %xmm2 -; SSE3_32_LIN-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) -; SSE3_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) +; SSE3_32_LIN-NEXT: movsd %xmm2, (%esp) +; SSE3_32_LIN-NEXT: fldl (%esp) ; SSE3_32_LIN-NEXT: fisttpll (%esp) ; SSE3_32_LIN-NEXT: setbe %dl ; SSE3_32_LIN-NEXT: shll $31, %edx ; SSE3_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; SSE3_32_LIN-NEXT: movl (%esp), %eax -; SSE3_32_LIN-NEXT: addl $20, %esp +; SSE3_32_LIN-NEXT: addl $12, %esp ; SSE3_32_LIN-NEXT: retl ; ; SSE3_64-LABEL: d_to_u64: @@ -684,7 +684,7 @@ define i64 @d_to_u64(double %a) nounwind { ; SSE2_32_WIN-NEXT: pushl %ebp ; SSE2_32_WIN-NEXT: movl %esp, %ebp ; SSE2_32_WIN-NEXT: andl $-8, %esp -; SSE2_32_WIN-NEXT: subl $24, %esp +; SSE2_32_WIN-NEXT: subl $16, %esp ; SSE2_32_WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2_32_WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE2_32_WIN-NEXT: movapd %xmm0, %xmm2 @@ -716,7 +716,7 @@ define i64 @d_to_u64(double %a) nounwind { ; ; SSE2_32_LIN-LABEL: d_to_u64: ; SSE2_32_LIN: # %bb.0: -; SSE2_32_LIN-NEXT: subl $28, %esp +; SSE2_32_LIN-NEXT: subl $20, %esp ; SSE2_32_LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2_32_LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE2_32_LIN-NEXT: movapd %xmm0, %xmm2 @@ -742,7 +742,7 @@ define i64 @d_to_u64(double %a) nounwind { ; SSE2_32_LIN-NEXT: shll $31, %edx ; SSE2_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE2_32_LIN-NEXT: addl $28, %esp +; SSE2_32_LIN-NEXT: addl $20, %esp ; SSE2_32_LIN-NEXT: retl ; ; SSE2_64-LABEL: d_to_u64: @@ -878,10 +878,10 @@ define i64 @d_to_s64(double %a) nounwind { ; AVX512F_32_WIN-NEXT: pushl %ebp ; AVX512F_32_WIN-NEXT: movl %esp, %ebp ; AVX512F_32_WIN-NEXT: andl $-8, %esp -; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: subl $8, %esp ; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512F_32_WIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) -; AVX512F_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: vmovsd %xmm0, (%esp) +; AVX512F_32_WIN-NEXT: fldl (%esp) ; AVX512F_32_WIN-NEXT: fisttpll (%esp) ; AVX512F_32_WIN-NEXT: movl (%esp), %eax ; AVX512F_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -891,14 +891,14 @@ define i64 @d_to_s64(double %a) nounwind { ; ; AVX512F_32_LIN-LABEL: d_to_s64: ; AVX512F_32_LIN: # %bb.0: -; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: subl $12, %esp ; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512F_32_LIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) -; AVX512F_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: vmovsd %xmm0, (%esp) +; AVX512F_32_LIN-NEXT: fldl (%esp) ; AVX512F_32_LIN-NEXT: fisttpll (%esp) ; AVX512F_32_LIN-NEXT: movl (%esp), %eax ; AVX512F_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: addl $12, %esp ; AVX512F_32_LIN-NEXT: retl ; ; SSE3_32_WIN-LABEL: d_to_s64: @@ -906,10 +906,10 @@ define i64 @d_to_s64(double %a) nounwind { ; SSE3_32_WIN-NEXT: pushl %ebp ; SSE3_32_WIN-NEXT: movl %esp, %ebp ; SSE3_32_WIN-NEXT: andl $-8, %esp -; SSE3_32_WIN-NEXT: subl $16, %esp +; SSE3_32_WIN-NEXT: subl $8, %esp ; SSE3_32_WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE3_32_WIN-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) -; SSE3_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) +; SSE3_32_WIN-NEXT: movsd %xmm0, (%esp) +; SSE3_32_WIN-NEXT: fldl (%esp) ; SSE3_32_WIN-NEXT: fisttpll (%esp) ; SSE3_32_WIN-NEXT: movl (%esp), %eax ; SSE3_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -919,14 +919,14 @@ define i64 @d_to_s64(double %a) nounwind { ; ; SSE3_32_LIN-LABEL: d_to_s64: ; SSE3_32_LIN: # %bb.0: -; SSE3_32_LIN-NEXT: subl $20, %esp +; SSE3_32_LIN-NEXT: subl $12, %esp ; SSE3_32_LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE3_32_LIN-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) -; SSE3_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) +; SSE3_32_LIN-NEXT: movsd %xmm0, (%esp) +; SSE3_32_LIN-NEXT: fldl (%esp) ; SSE3_32_LIN-NEXT: fisttpll (%esp) ; SSE3_32_LIN-NEXT: movl (%esp), %eax ; SSE3_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; SSE3_32_LIN-NEXT: addl $20, %esp +; SSE3_32_LIN-NEXT: addl $12, %esp ; SSE3_32_LIN-NEXT: retl ; ; SSE3_64-LABEL: d_to_s64: @@ -939,7 +939,7 @@ define i64 @d_to_s64(double %a) nounwind { ; SSE2_32_WIN-NEXT: pushl %ebp ; SSE2_32_WIN-NEXT: movl %esp, %ebp ; SSE2_32_WIN-NEXT: andl $-8, %esp -; SSE2_32_WIN-NEXT: subl $24, %esp +; SSE2_32_WIN-NEXT: subl $16, %esp ; SSE2_32_WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2_32_WIN-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) @@ -958,7 +958,7 @@ define i64 @d_to_s64(double %a) nounwind { ; ; SSE2_32_LIN-LABEL: d_to_s64: ; SSE2_32_LIN: # %bb.0: -; SSE2_32_LIN-NEXT: subl $28, %esp +; SSE2_32_LIN-NEXT: subl $20, %esp ; SSE2_32_LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2_32_LIN-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) @@ -971,7 +971,7 @@ define i64 @d_to_s64(double %a) nounwind { ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; SSE2_32_LIN-NEXT: addl $28, %esp +; SSE2_32_LIN-NEXT: addl $20, %esp ; SSE2_32_LIN-NEXT: retl ; ; SSE2_64-LABEL: d_to_s64: diff --git a/test/CodeGen/X86/vec_cast3.ll b/test/CodeGen/X86/vec_cast3.ll index 6b45c853523..f89cf47c2fb 100644 --- a/test/CodeGen/X86/vec_cast3.ll +++ b/test/CodeGen/X86/vec_cast3.ll @@ -190,8 +190,8 @@ define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) { define <2 x i32> @cvt_v2f32_v2u32(<2 x float> %src) { ; CHECK-LABEL: cvt_v2f32_v2u32: ; CHECK: ## %bb.0: -; CHECK-NEXT: subl $68, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 72 +; CHECK-NEXT: subl $36, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 40 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vucomiss %xmm1, %xmm2 @@ -199,8 +199,8 @@ define <2 x i32> @cvt_v2f32_v2u32(<2 x float> %src) { ; CHECK-NEXT: ## %bb.1: ; CHECK-NEXT: vsubss %xmm1, %xmm2, %xmm2 ; CHECK-NEXT: LBB11_2: -; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) -; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: vmovss %xmm2, (%esp) +; CHECK-NEXT: flds (%esp) ; CHECK-NEXT: fisttpll (%esp) ; CHECK-NEXT: setae %al ; CHECK-NEXT: movzbl %al, %eax @@ -222,7 +222,7 @@ define <2 x i32> @cvt_v2f32_v2u32(<2 x float> %src) { ; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 ; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; CHECK-NEXT: addl $68, %esp +; CHECK-NEXT: addl $36, %esp ; CHECK-NEXT: retl ; ; CHECK-WIDE-LABEL: cvt_v2f32_v2u32: -- 2.40.0