From d11a7fa9a6eb1d2da7294c8c908ad02df4501059 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 11 Apr 2019 19:04:38 +0000 Subject: [PATCH] Revert r358211 "[X86] Use FILD/FIST to implement i64 atomic load on 32-bit targets with X87, but no SSE2" I seem to have messed up the test checks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358212 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 70 +-- lib/Target/X86/X86ISelLowering.h | 12 +- lib/Target/X86/X86InstrFPStack.td | 13 - test/CodeGen/X86/atomic-fp.ll | 65 +-- test/CodeGen/X86/atomic-load-store-wide.ll | 58 +-- test/CodeGen/X86/atomic-mi.ll | 431 ++++++++---------- test/CodeGen/X86/atomic-non-integer.ll | 54 ++- .../X86/misched_phys_reg_assign_order.ll | 2 +- 8 files changed, 323 insertions(+), 382 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 216fd5b8d14..2258e0ce24e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -25584,18 +25584,17 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // Note: this turns large loads into lock cmpxchg8b/16b. // TODO: In 32-bit mode, use MOVLPS when SSE1 is available? +// TODO: In 32-bit mode, use FILD/FISTP when X87 is available? TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { Type *MemType = LI->getType(); // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we - // can use movq to do the load. If we have X87 we can load into an 80-bit - // X87 register and store it to a stack temporary. + // can use movq to do the load. bool NoImplicitFloatOps = LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && - !Subtarget.useSoftFloat() && !NoImplicitFloatOps && - (Subtarget.hasSSE2() || Subtarget.hasX87())) + !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2()) return AtomicExpansionKind::None; return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg @@ -27441,57 +27440,23 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, bool NoImplicitFloatOps = DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); - if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) { + if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps && + Subtarget.hasSSE2()) { auto *Node = cast(N); - if (Subtarget.hasSSE2()) { - // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the - // lower 64-bits. - SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); - SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; - SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, - MVT::i64, Node->getMemOperand()); - SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - Results.push_back(Ld.getValue(1)); - return; - } - if (Subtarget.hasX87()) { - // First load this into an 80-bit X87 register. This will put the whole - // integer into the significand. - // FIXME: Do we need to glue? See FIXME comment in BuildFILD. - SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue); - SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; - SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG, - dl, Tys, Ops, MVT::i64, - Node->getMemOperand()); - SDValue Chain = Result.getValue(1); - SDValue InFlag = Result.getValue(2); - - // Now store the X87 register to a stack temporary and convert to i64. - // This store is not atomic and doesn't need to be. - // FIXME: We don't need a stack temporary if the result of the load - // is already being stored. We could just directly store there. - SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); - int SPFI = cast(StackPtr.getNode())->getIndex(); - MachinePointerInfo MPI = - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); - SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag }; - Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl, - DAG.getVTList(MVT::Other), StoreOps, - MVT::i64, MPI, 0 /*Align*/, - MachineMemOperand::MOStore); - - // Finally load the value back from the stack temporary and return it. - // This load is not atomic and doesn't need to be. - // This load will be further type legalized. - Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI); - Results.push_back(Result); - Results.push_back(Result.getValue(1)); - return; - } + // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the lower + // 64-bits. + SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; + SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, + MVT::i64, Node->getMemOperand()); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, + DAG.getIntPtrConstant(0, dl)); + Results.push_back(Res); + Results.push_back(Ld.getValue(1)); + return; } // TODO: Use MOVLPS when SSE1 is available? + // TODO: Use FILD/FISTP when X87 is available? // Delegate to generic TypeLegalization. Situations we can really handle // should have already been dealt with by AtomicExpandPass.cpp. break; @@ -27684,7 +27649,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FXOR: return "X86ISD::FXOR"; case X86ISD::FILD: return "X86ISD::FILD"; case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; - case X86ISD::FIST: return "X86ISD::FIST"; case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM"; case X86ISD::FLD: return "X86ISD::FLD"; case X86ISD::FST: return "X86ISD::FST"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 193d04094e2..b46fb8ef6fc 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -608,22 +608,16 @@ namespace llvm { FILD, FILD_FLAG, - /// This instruction implements a fp->int store from FP stack - /// slots. This corresponds to the fist instruction. It takes a - /// chain operand, value to store, address, and glue. The memory VT - /// specifies the type to store as. - FIST, - /// This instruction implements an extending load to FP stack slots. /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain /// operand, and ptr to load from. The memory VT specifies the type to /// load from. FLD, - /// This instruction implements a truncating store from FP stack + /// This instruction implements a truncating store to FP stack /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a - /// chain operand, value to store, address, and glue. The memory VT - /// specifies the type to store as. + /// chain operand, value to store, and address. The memory VT specifies + /// the type to store as. FST, /// This instruction grabs the address of the next argument diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 2ec6d50f970..8a756bfc3b1 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -21,7 +21,6 @@ def SDTX86Fld : SDTypeProfile<1, 1, [SDTCisFP<0>, def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>; -def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -36,9 +35,6 @@ def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild, def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, SDNPMemOperand]>; -def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist, - [SDNPHasChain, SDNPInGlue, SDNPMayStore, - SDNPMemOperand]>; def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>; def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -83,11 +79,6 @@ def X86fildflag64 : PatFrag<(ops node:$ptr), (X86fildflag node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; }]>; -def X86fist64 : PatFrag<(ops node:$val, node:$ptr), - (X86fist node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i64; -}]>; - def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr), (X86fp_to_mem node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; @@ -769,10 +760,6 @@ def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>; // Used to conv. i64 to f64 since there isn't a SSE version. def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>; -// Used to conv. between f80 and i64 for i64 atomic loads. -def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>; -def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>; - // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, diff --git a/test/CodeGen/X86/atomic-fp.ll b/test/CodeGen/X86/atomic-fp.ll index faeba24abdc..23b5b1ecfe1 100644 --- a/test/CodeGen/X86/atomic-fp.ll +++ b/test/CodeGen/X86/atomic-fp.ll @@ -77,13 +77,14 @@ define void @fadd_64r(double* %loc, double %val) nounwind { ; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $24, %esp +; X86-NOSSE-NEXT: subl $16, %esp ; X86-NOSSE-NEXT: movl 8(%ebp), %esi -; X86-NOSSE-NEXT: fildll (%esi) -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: faddl 12(%ebp) @@ -282,12 +283,13 @@ define void @fadd_64g() nounwind { ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp -; X86-NOSSE-NEXT: fildll glob64 -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: subl $24, %esp +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b glob64 +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) @@ -482,12 +484,13 @@ define void @fadd_64imm() nounwind { ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp -; X86-NOSSE-NEXT: fildll -559038737 -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: subl $24, %esp +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b -559038737 +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) @@ -688,12 +691,13 @@ define void @fadd_64stack() nounwind { ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $40, %esp -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b (%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) @@ -827,14 +831,15 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE-NEXT: pushl %edi ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: movl 20(%ebp), %esi ; X86-NOSSE-NEXT: movl 8(%ebp), %edi -; X86-NOSSE-NEXT: fildll (%edi,%esi,8) -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b (%edi,%esi,8) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: faddl 12(%ebp) diff --git a/test/CodeGen/X86/atomic-load-store-wide.ll b/test/CodeGen/X86/atomic-load-store-wide.ll index 7be6cb2738c..d1b280661a3 100644 --- a/test/CodeGen/X86/atomic-load-store-wide.ll +++ b/test/CodeGen/X86/atomic-load-store-wide.ll @@ -45,21 +45,22 @@ define i64 @test2(i64* %ptr) { ; ; NOSSE-LABEL: test2: ; NOSSE: # %bb.0: -; NOSSE-NEXT: pushl %ebp +; NOSSE-NEXT: pushl %ebx ; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: .cfi_offset %ebp, -8 -; NOSSE-NEXT: movl %esp, %ebp -; NOSSE-NEXT: .cfi_def_cfa_register %ebp -; NOSSE-NEXT: andl $-8, %esp -; NOSSE-NEXT: subl $8, %esp -; NOSSE-NEXT: movl 8(%ebp), %eax -; NOSSE-NEXT: fildll (%eax) -; NOSSE-NEXT: fistpll (%esp) -; NOSSE-NEXT: movl (%esp), %eax -; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; NOSSE-NEXT: movl %ebp, %esp -; NOSSE-NEXT: popl %ebp -; NOSSE-NEXT: .cfi_def_cfa %esp, 4 +; NOSSE-NEXT: pushl %esi +; NOSSE-NEXT: .cfi_def_cfa_offset 12 +; NOSSE-NEXT: .cfi_offset %esi, -12 +; NOSSE-NEXT: .cfi_offset %ebx, -8 +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; NOSSE-NEXT: xorl %eax, %eax +; NOSSE-NEXT: xorl %edx, %edx +; NOSSE-NEXT: xorl %ecx, %ecx +; NOSSE-NEXT: xorl %ebx, %ebx +; NOSSE-NEXT: lock cmpxchg8b (%esi) +; NOSSE-NEXT: popl %esi +; NOSSE-NEXT: .cfi_def_cfa_offset 8 +; NOSSE-NEXT: popl %ebx +; NOSSE-NEXT: .cfi_def_cfa_offset 4 ; NOSSE-NEXT: retl %val = load atomic i64, i64* %ptr seq_cst, align 8 ret i64 %val @@ -101,21 +102,22 @@ define i64 @test4(i64* %ptr) { ; ; NOSSE-LABEL: test4: ; NOSSE: # %bb.0: -; NOSSE-NEXT: pushl %ebp +; NOSSE-NEXT: pushl %ebx +; NOSSE-NEXT: .cfi_def_cfa_offset 8 +; NOSSE-NEXT: pushl %esi +; NOSSE-NEXT: .cfi_def_cfa_offset 12 +; NOSSE-NEXT: .cfi_offset %esi, -12 +; NOSSE-NEXT: .cfi_offset %ebx, -8 +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; NOSSE-NEXT: xorl %eax, %eax +; NOSSE-NEXT: xorl %edx, %edx +; NOSSE-NEXT: xorl %ecx, %ecx +; NOSSE-NEXT: xorl %ebx, %ebx +; NOSSE-NEXT: lock cmpxchg8b (%esi) +; NOSSE-NEXT: popl %esi ; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: .cfi_offset %ebp, -8 -; NOSSE-NEXT: movl %esp, %ebp -; NOSSE-NEXT: .cfi_def_cfa_register %ebp -; NOSSE-NEXT: andl $-8, %esp -; NOSSE-NEXT: subl $8, %esp -; NOSSE-NEXT: movl 8(%ebp), %eax -; NOSSE-NEXT: fildll (%eax) -; NOSSE-NEXT: fistpll (%esp) -; NOSSE-NEXT: movl (%esp), %eax -; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; NOSSE-NEXT: movl %ebp, %esp -; NOSSE-NEXT: popl %ebp -; NOSSE-NEXT: .cfi_def_cfa %esp, 4 +; NOSSE-NEXT: popl %ebx +; NOSSE-NEXT: .cfi_def_cfa_offset 4 ; NOSSE-NEXT: retl %val = load atomic volatile i64, i64* %ptr seq_cst, align 8 ret i64 %val diff --git a/test/CodeGen/X86/atomic-mi.ll b/test/CodeGen/X86/atomic-mi.ll index f660d3311fd..492d7ae8f2d 100644 --- a/test/CodeGen/X86/atomic-mi.ll +++ b/test/CodeGen/X86/atomic-mi.ll @@ -331,22 +331,20 @@ define void @add_64i(i64* %p) { ; ; X32-LABEL: add_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $2, %ebx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl (%esi), %eax @@ -357,11 +355,10 @@ define void @add_64i(i64* %p) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB14_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'addq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -378,24 +375,22 @@ define void @add_64r(i64* %p, i64 %v) { ; ; X32-LABEL: add_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl 12(%ebp), %ebx -; X32-NEXT: adcl 16(%ebp), %ecx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 @@ -404,11 +399,10 @@ define void @add_64r(i64* %p, i64 %v) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB15_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'addq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -571,24 +565,22 @@ define void @sub_64r(i64* %p, i64 %v) { ; ; X32-LABEL: sub_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: subl 12(%ebp), %ebx -; X32-NEXT: sbbl 16(%ebp), %ecx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: subl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 @@ -597,11 +589,10 @@ define void @sub_64r(i64* %p, i64 %v) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB23_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'subq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -746,21 +737,19 @@ define void @and_64i(i64* %p) { ; ; X32-LABEL: and_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: andl $2, %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx @@ -771,11 +760,10 @@ define void @and_64i(i64* %p) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB31_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'andq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -792,24 +780,22 @@ define void @and_64r(i64* %p, i64 %v) { ; ; X32-LABEL: and_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: andl 16(%ebp), %ecx -; X32-NEXT: andl 12(%ebp), %ebx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 @@ -818,11 +804,10 @@ define void @and_64r(i64* %p, i64 %v) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB32_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'andq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -988,22 +973,20 @@ define void @or_64i(i64* %p) { ; ; X32-LABEL: or_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: orl $2, %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx @@ -1013,11 +996,10 @@ define void @or_64i(i64* %p) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB41_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'orq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1034,24 +1016,22 @@ define void @or_64r(i64* %p, i64 %v) { ; ; X32-LABEL: or_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: orl 16(%ebp), %ecx -; X32-NEXT: orl 12(%ebp), %ebx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 @@ -1060,11 +1040,10 @@ define void @or_64r(i64* %p, i64 %v) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB42_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'orq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1230,22 +1209,20 @@ define void @xor_64i(i64* %p) { ; ; X32-LABEL: xor_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: xorl $2, %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx @@ -1255,11 +1232,10 @@ define void @xor_64i(i64* %p) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB51_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'xorq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1276,24 +1252,22 @@ define void @xor_64r(i64* %p, i64 %v) { ; ; X32-LABEL: xor_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: xorl 16(%ebp), %ecx -; X32-NEXT: xorl 12(%ebp), %ebx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: xorl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 @@ -1302,11 +1276,10 @@ define void @xor_64r(i64* %p, i64 %v) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB52_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'xorq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1433,22 +1406,20 @@ define void @inc_64(i64* %p) { ; ; X32-LABEL: inc_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $1, %ebx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl (%esi), %eax @@ -1459,11 +1430,10 @@ define void @inc_64(i64* %p) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB58_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; ; SLOW_INC-LABEL: inc_64: @@ -1581,22 +1551,20 @@ define void @dec_64(i64* %p) { ; ; X32-LABEL: dec_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $-1, %ebx ; X32-NEXT: adcl $-1, %ecx ; X32-NEXT: movl (%esi), %eax @@ -1607,11 +1575,10 @@ define void @dec_64(i64* %p) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB63_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; ; SLOW_INC-LABEL: dec_64: @@ -1714,22 +1681,20 @@ define void @not_64(i64* %p) { ; ; X32-LABEL: not_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: notl %ecx ; X32-NEXT: notl %ebx ; X32-NEXT: movl (%esi), %eax @@ -1740,11 +1705,10 @@ define void @not_64(i64* %p) { ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB68_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'notq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1839,37 +1803,40 @@ define void @neg_64(i64* %p) { ; ; X32-LABEL: neg_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: pushl %edi +; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 16 ; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) +; X32-NEXT: .cfi_offset %edi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: xorl %esi, %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: subl (%esp), %ebx -; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx +; X32-NEXT: lock cmpxchg8b (%edi) +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: negl %ebx +; X32-NEXT: sbbl %edx, %esi +; X32-NEXT: movl (%edi), %eax +; X32-NEXT: movl 4(%edi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB73_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: lock cmpxchg8b (%edi) ; X32-NEXT: jne .LBB73_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: popl %edi +; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do neg check X86-32 as it canneg do 'negq'. %1 = load atomic i64, i64* %p acquire, align 8 diff --git a/test/CodeGen/X86/atomic-non-integer.ll b/test/CodeGen/X86/atomic-non-integer.ll index 5d54eca05b3..a0ede060eb3 100644 --- a/test/CodeGen/X86/atomic-non-integer.ll +++ b/test/CodeGen/X86/atomic-non-integer.ll @@ -448,17 +448,28 @@ define double @load_double(double* %fptr) { ; ; X86-NOSSE-LABEL: load_double: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: subl $20, %esp +; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 +; X86-NOSSE-NEXT: subl $12, %esp ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: fildll (%eax) -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: .cfi_offset %esi, -12 +; X86-NOSSE-NEXT: .cfi_offset %ebx, -8 +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fldl (%esp) -; X86-NOSSE-NEXT: addl $20, %esp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: popl %ebx ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 ; X86-NOSSE-NEXT: retl ; @@ -816,17 +827,28 @@ define double @load_double_seq_cst(double* %fptr) { ; ; X86-NOSSE-LABEL: load_double_seq_cst: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: subl $20, %esp +; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 +; X86-NOSSE-NEXT: subl $12, %esp ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: fildll (%eax) -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: .cfi_offset %esi, -12 +; X86-NOSSE-NEXT: .cfi_offset %ebx, -8 +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fldl (%esp) -; X86-NOSSE-NEXT: addl $20, %esp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: popl %ebx ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 ; X86-NOSSE-NEXT: retl ; diff --git a/test/CodeGen/X86/misched_phys_reg_assign_order.ll b/test/CodeGen/X86/misched_phys_reg_assign_order.ll index fd40d7f92e9..d9548b98839 100644 --- a/test/CodeGen/X86/misched_phys_reg_assign_order.ll +++ b/test/CodeGen/X86/misched_phys_reg_assign_order.ll @@ -49,4 +49,4 @@ k.end: ; preds = %entry declare i32 @m() -attributes #0 = { noimplicitfloat "no-frame-pointer-elim-non-leaf" } +attributes #0 = { "no-frame-pointer-elim-non-leaf" } -- 2.50.1