// Note: this turns large loads into lock cmpxchg8b/16b.
// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
-// TODO: In 32-bit mode, use FILD/FISTP when X87 is available?
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
Type *MemType = LI->getType();
// If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
- // can use movq to do the load.
+ // can use movq to do the load. If we have X87 we can load into an 80-bit
+ // X87 register and store it to a stack temporary.
bool NoImplicitFloatOps =
LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
- !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
+ !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+ (Subtarget.hasSSE2() || Subtarget.hasX87()))
return AtomicExpansionKind::None;
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
bool NoImplicitFloatOps =
DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat);
- if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
- Subtarget.hasSSE2()) {
+ if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
auto *Node = cast<AtomicSDNode>(N);
- // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the lower
- // 64-bits.
- SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
- SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
- SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
- MVT::i64, Node->getMemOperand());
- SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- Results.push_back(Ld.getValue(1));
- return;
+ if (Subtarget.hasSSE2()) {
+ // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the
+ // lower 64-bits.
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+ SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+ MVT::i64, Node->getMemOperand());
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Res);
+ Results.push_back(Ld.getValue(1));
+ return;
+ }
+ if (Subtarget.hasX87()) {
+ // First load this into an 80-bit X87 register. This will put the whole
+ // integer into the significand.
+ // FIXME: Do we need to glue? See FIXME comment in BuildFILD.
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG,
+ dl, Tys, Ops, MVT::i64,
+ Node->getMemOperand());
+ SDValue Chain = Result.getValue(1);
+ SDValue InFlag = Result.getValue(2);
+
+ // Now store the X87 register to a stack temporary and convert to i64.
+ // This store is not atomic and doesn't need to be.
+ // FIXME: We don't need a stack temporary if the result of the load
+ // is already being stored. We could just directly store there.
+ SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag };
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl,
+ DAG.getVTList(MVT::Other), StoreOps,
+ MVT::i64, MPI, 0 /*Align*/,
+ MachineMemOperand::MOStore);
+
+ // Finally load the value back from the stack temporary and return it.
+ // This load is not atomic and doesn't need to be.
+ // This load will be further type legalized.
+ Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI);
+ Results.push_back(Result);
+ Results.push_back(Result.getValue(1));
+ return;
+ }
}
// TODO: Use MOVLPS when SSE1 is available?
- // TODO: Use FILD/FISTP when X87 is available?
// Delegate to generic TypeLegalization. Situations we can really handle
// should have already been dealt with by AtomicExpandPass.cpp.
break;
case X86ISD::FXOR: return "X86ISD::FXOR";
case X86ISD::FILD: return "X86ISD::FILD";
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
+ case X86ISD::FIST: return "X86ISD::FIST";
case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM";
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
FILD,
FILD_FLAG,
+ /// This instruction implements a fp->int store from FP stack
+ /// slots. This corresponds to the fist instruction. It takes a
+ /// chain operand, value to store, address, and glue. The memory VT
+ /// specifies the type to store as.
+ FIST,
+
/// This instruction implements an extending load to FP stack slots.
/// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
/// operand, and ptr to load from. The memory VT specifies the type to
/// load from.
FLD,
- /// This instruction implements a truncating store to FP stack
+ /// This instruction implements a truncating store from FP stack
/// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
- /// chain operand, value to store, and address. The memory VT specifies
- /// the type to store as.
+ /// chain operand, value to store, address, and glue. The memory VT
+ /// specifies the type to store as.
FST,
/// This instruction grabs the address of the next argument
def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>,
SDTCisPtrTy<1>]>;
def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
SDNPMemOperand]>;
+def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
+ [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+ SDNPMemOperand]>;
def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
}]>;
+def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fist node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
(X86fp_to_mem node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
// Used to conv. i64 to f64 since there isn't a SSE version.
def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>;
+// Used to conv. between f80 and i64 for i64 atomic loads.
+def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>;
+def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
+
// FP extensions map onto simple pseudo-value conversions if they are to/from
// the FP stack.
def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $16, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: movl 8(%ebp), %esi
-; X86-NOSSE-NEXT: xorl %eax, %eax
-; X86-NOSSE-NEXT: xorl %edx, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: xorl %ebx, %ebx
-; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fildll (%esi)
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: faddl 12(%ebp)
; X86-SSE1-NEXT: pushl %ebx
; X86-SSE1-NEXT: pushl %esi
; X86-SSE1-NEXT: andl $-8, %esp
-; X86-SSE1-NEXT: subl $16, %esp
+; X86-SSE1-NEXT: subl $24, %esp
; X86-SSE1-NEXT: movl 8(%ebp), %esi
-; X86-SSE1-NEXT: xorl %eax, %eax
-; X86-SSE1-NEXT: xorl %edx, %edx
-; X86-SSE1-NEXT: xorl %ecx, %ecx
-; X86-SSE1-NEXT: xorl %ebx, %ebx
-; X86-SSE1-NEXT: lock cmpxchg8b (%esi)
-; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: fildll (%esi)
+; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: faddl 12(%ebp)
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $24, %esp
-; X86-NOSSE-NEXT: xorl %eax, %eax
-; X86-NOSSE-NEXT: xorl %edx, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: xorl %ebx, %ebx
-; X86-NOSSE-NEXT: lock cmpxchg8b glob64
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: fildll glob64
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %esp, %ebp
; X86-SSE1-NEXT: pushl %ebx
; X86-SSE1-NEXT: andl $-8, %esp
-; X86-SSE1-NEXT: subl $24, %esp
-; X86-SSE1-NEXT: xorl %eax, %eax
-; X86-SSE1-NEXT: xorl %edx, %edx
-; X86-SSE1-NEXT: xorl %ecx, %ecx
-; X86-SSE1-NEXT: xorl %ebx, %ebx
-; X86-SSE1-NEXT: lock cmpxchg8b glob64
-; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: subl $32, %esp
+; X86-SSE1-NEXT: fildll glob64
+; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $24, %esp
-; X86-NOSSE-NEXT: xorl %eax, %eax
-; X86-NOSSE-NEXT: xorl %edx, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: xorl %ebx, %ebx
-; X86-NOSSE-NEXT: lock cmpxchg8b -559038737
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: fildll -559038737
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %esp, %ebp
; X86-SSE1-NEXT: pushl %ebx
; X86-SSE1-NEXT: andl $-8, %esp
-; X86-SSE1-NEXT: subl $24, %esp
-; X86-SSE1-NEXT: xorl %eax, %eax
-; X86-SSE1-NEXT: xorl %edx, %edx
-; X86-SSE1-NEXT: xorl %ecx, %ecx
-; X86-SSE1-NEXT: xorl %ebx, %ebx
-; X86-SSE1-NEXT: lock cmpxchg8b -559038737
-; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: subl $32, %esp
+; X86-SSE1-NEXT: fildll -559038737
+; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
-; X86-NOSSE-NEXT: xorl %eax, %eax
-; X86-NOSSE-NEXT: xorl %edx, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: xorl %ebx, %ebx
-; X86-NOSSE-NEXT: lock cmpxchg8b (%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %esp, %ebp
; X86-SSE1-NEXT: pushl %ebx
; X86-SSE1-NEXT: andl $-8, %esp
-; X86-SSE1-NEXT: subl $32, %esp
-; X86-SSE1-NEXT: xorl %eax, %eax
-; X86-SSE1-NEXT: xorl %edx, %edx
-; X86-SSE1-NEXT: xorl %ecx, %ecx
-; X86-SSE1-NEXT: xorl %ebx, %ebx
-; X86-SSE1-NEXT: lock cmpxchg8b (%esp)
-; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: subl $40, %esp
+; X86-SSE1-NEXT: fildll (%esp)
+; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: pushl %edi
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $24, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: movl 20(%ebp), %esi
; X86-NOSSE-NEXT: movl 8(%ebp), %edi
-; X86-NOSSE-NEXT: xorl %eax, %eax
-; X86-NOSSE-NEXT: xorl %edx, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: xorl %ebx, %ebx
-; X86-NOSSE-NEXT: lock cmpxchg8b (%edi,%esi,8)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fildll (%edi,%esi,8)
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: faddl 12(%ebp)
; X86-SSE1-NEXT: pushl %edi
; X86-SSE1-NEXT: pushl %esi
; X86-SSE1-NEXT: andl $-8, %esp
-; X86-SSE1-NEXT: subl $24, %esp
+; X86-SSE1-NEXT: subl $32, %esp
; X86-SSE1-NEXT: movl 20(%ebp), %esi
; X86-SSE1-NEXT: movl 8(%ebp), %edi
-; X86-SSE1-NEXT: xorl %eax, %eax
-; X86-SSE1-NEXT: xorl %edx, %edx
-; X86-SSE1-NEXT: xorl %ecx, %ecx
-; X86-SSE1-NEXT: xorl %ebx, %ebx
-; X86-SSE1-NEXT: lock cmpxchg8b (%edi,%esi,8)
-; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: fildll (%edi,%esi,8)
+; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: faddl 12(%ebp)
;
; NOSSE-LABEL: test2:
; NOSSE: # %bb.0:
-; NOSSE-NEXT: pushl %ebx
+; NOSSE-NEXT: pushl %ebp
; NOSSE-NEXT: .cfi_def_cfa_offset 8
-; NOSSE-NEXT: pushl %esi
-; NOSSE-NEXT: .cfi_def_cfa_offset 12
-; NOSSE-NEXT: .cfi_offset %esi, -12
-; NOSSE-NEXT: .cfi_offset %ebx, -8
-; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; NOSSE-NEXT: xorl %eax, %eax
-; NOSSE-NEXT: xorl %edx, %edx
-; NOSSE-NEXT: xorl %ecx, %ecx
-; NOSSE-NEXT: xorl %ebx, %ebx
-; NOSSE-NEXT: lock cmpxchg8b (%esi)
-; NOSSE-NEXT: popl %esi
-; NOSSE-NEXT: .cfi_def_cfa_offset 8
-; NOSSE-NEXT: popl %ebx
-; NOSSE-NEXT: .cfi_def_cfa_offset 4
+; NOSSE-NEXT: .cfi_offset %ebp, -8
+; NOSSE-NEXT: movl %esp, %ebp
+; NOSSE-NEXT: .cfi_def_cfa_register %ebp
+; NOSSE-NEXT: andl $-8, %esp
+; NOSSE-NEXT: subl $8, %esp
+; NOSSE-NEXT: movl 8(%ebp), %eax
+; NOSSE-NEXT: fildll (%eax)
+; NOSSE-NEXT: fistpll (%esp)
+; NOSSE-NEXT: movl (%esp), %eax
+; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; NOSSE-NEXT: movl %ebp, %esp
+; NOSSE-NEXT: popl %ebp
+; NOSSE-NEXT: .cfi_def_cfa %esp, 4
; NOSSE-NEXT: retl
%val = load atomic i64, i64* %ptr seq_cst, align 8
ret i64 %val
;
; NOSSE-LABEL: test4:
; NOSSE: # %bb.0:
-; NOSSE-NEXT: pushl %ebx
-; NOSSE-NEXT: .cfi_def_cfa_offset 8
-; NOSSE-NEXT: pushl %esi
-; NOSSE-NEXT: .cfi_def_cfa_offset 12
-; NOSSE-NEXT: .cfi_offset %esi, -12
-; NOSSE-NEXT: .cfi_offset %ebx, -8
-; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; NOSSE-NEXT: xorl %eax, %eax
-; NOSSE-NEXT: xorl %edx, %edx
-; NOSSE-NEXT: xorl %ecx, %ecx
-; NOSSE-NEXT: xorl %ebx, %ebx
-; NOSSE-NEXT: lock cmpxchg8b (%esi)
-; NOSSE-NEXT: popl %esi
+; NOSSE-NEXT: pushl %ebp
; NOSSE-NEXT: .cfi_def_cfa_offset 8
-; NOSSE-NEXT: popl %ebx
-; NOSSE-NEXT: .cfi_def_cfa_offset 4
+; NOSSE-NEXT: .cfi_offset %ebp, -8
+; NOSSE-NEXT: movl %esp, %ebp
+; NOSSE-NEXT: .cfi_def_cfa_register %ebp
+; NOSSE-NEXT: andl $-8, %esp
+; NOSSE-NEXT: subl $8, %esp
+; NOSSE-NEXT: movl 8(%ebp), %eax
+; NOSSE-NEXT: fildll (%eax)
+; NOSSE-NEXT: fistpll (%esp)
+; NOSSE-NEXT: movl (%esp), %eax
+; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; NOSSE-NEXT: movl %ebp, %esp
+; NOSSE-NEXT: popl %ebp
+; NOSSE-NEXT: .cfi_def_cfa %esp, 4
; NOSSE-NEXT: retl
%val = load atomic volatile i64, i64* %ptr seq_cst, align 8
ret i64 %val
;
; X32-LABEL: add_64i:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl $2, %ebx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB14_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'addq'.
%1 = load atomic i64, i64* %p acquire, align 8
;
; X32-LABEL: add_64r:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: addl 12(%ebp), %ebx
+; X32-NEXT: adcl 16(%ebp), %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB15_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'addq'.
%1 = load atomic i64, i64* %p acquire, align 8
;
; X32-LABEL: sub_64r:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: subl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: subl 12(%ebp), %ebx
+; X32-NEXT: sbbl 16(%ebp), %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB23_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'subq'.
%1 = load atomic i64, i64* %p acquire, align 8
;
; X32-LABEL: and_64i:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: andl $2, %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB31_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'andq'.
%1 = load atomic i64, i64* %p acquire, align 8
;
; X32-LABEL: and_64r:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: andl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: andl 16(%ebp), %ecx
+; X32-NEXT: andl 12(%ebp), %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB32_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'andq'.
%1 = load atomic i64, i64* %p acquire, align 8
;
; X32-LABEL: or_64i:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: orl $2, %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB41_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'orq'.
%1 = load atomic i64, i64* %p acquire, align 8
;
; X32-LABEL: or_64r:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: orl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: orl 16(%ebp), %ecx
+; X32-NEXT: orl 12(%ebp), %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB42_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'orq'.
%1 = load atomic i64, i64* %p acquire, align 8
;
; X32-LABEL: xor_64i:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xorl $2, %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB51_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'xorq'.
%1 = load atomic i64, i64* %p acquire, align 8
;
; X32-LABEL: xor_64r:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: xorl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: xorl 16(%ebp), %ecx
+; X32-NEXT: xorl 12(%ebp), %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB52_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'xorq'.
%1 = load atomic i64, i64* %p acquire, align 8
;
; X32-LABEL: inc_64:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl $1, %ebx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB58_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
;
; SLOW_INC-LABEL: inc_64:
;
; X32-LABEL: dec_64:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl $-1, %ebx
; X32-NEXT: adcl $-1, %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB63_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
;
; SLOW_INC-LABEL: dec_64:
;
; X32-LABEL: not_64:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
+; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: notl %ecx
; X32-NEXT: notl %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB68_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'notq'.
%1 = load atomic i64, i64* %p acquire, align 8
;
; X32-LABEL: neg_64:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: pushl %edi
-; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %edi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: xorl %esi, %esi
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: movl 8(%ebp), %esi
+; X32-NEXT: fildll (%esi)
+; X32-NEXT: fistpll (%esp)
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: lock cmpxchg8b (%edi)
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: negl %ebx
-; X32-NEXT: sbbl %edx, %esi
-; X32-NEXT: movl (%edi), %eax
-; X32-NEXT: movl 4(%edi), %edx
+; X32-NEXT: subl (%esp), %ebx
+; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl (%esi), %eax
+; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB73_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: lock cmpxchg8b (%edi)
+; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB73_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
+; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: popl %edi
-; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do neg check X86-32 as it canneg do 'negq'.
%1 = load atomic i64, i64* %p acquire, align 8
define double @load_double(double* %fptr) {
; X86-SSE1-LABEL: load_double:
; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl %ebx
-; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE1-NEXT: pushl %esi
-; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
-; X86-SSE1-NEXT: subl $12, %esp
+; X86-SSE1-NEXT: subl $20, %esp
; X86-SSE1-NEXT: .cfi_def_cfa_offset 24
-; X86-SSE1-NEXT: .cfi_offset %esi, -12
-; X86-SSE1-NEXT: .cfi_offset %ebx, -8
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE1-NEXT: xorl %eax, %eax
-; X86-SSE1-NEXT: xorl %edx, %edx
-; X86-SSE1-NEXT: xorl %ecx, %ecx
-; X86-SSE1-NEXT: xorl %ebx, %ebx
-; X86-SSE1-NEXT: lock cmpxchg8b (%esi)
-; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: fildll (%eax)
+; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %eax, (%esp)
; X86-SSE1-NEXT: fldl (%esp)
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
-; X86-SSE1-NEXT: popl %esi
-; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE1-NEXT: popl %ebx
+; X86-SSE1-NEXT: addl $20, %esp
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
; X86-SSE1-NEXT: retl
;
;
; X86-NOSSE-LABEL: load_double:
; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %ebx
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
-; X86-NOSSE-NEXT: subl $12, %esp
+; X86-NOSSE-NEXT: subl $20, %esp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24
-; X86-NOSSE-NEXT: .cfi_offset %esi, -12
-; X86-NOSSE-NEXT: .cfi_offset %ebx, -8
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: xorl %eax, %eax
-; X86-NOSSE-NEXT: xorl %edx, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: xorl %ebx, %ebx
-; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: fildll (%eax)
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: fldl (%esp)
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
-; X86-NOSSE-NEXT: popl %esi
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: addl $20, %esp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl
;
define double @load_double_seq_cst(double* %fptr) {
; X86-SSE1-LABEL: load_double_seq_cst:
; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl %ebx
-; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE1-NEXT: pushl %esi
-; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
-; X86-SSE1-NEXT: subl $12, %esp
+; X86-SSE1-NEXT: subl $20, %esp
; X86-SSE1-NEXT: .cfi_def_cfa_offset 24
-; X86-SSE1-NEXT: .cfi_offset %esi, -12
-; X86-SSE1-NEXT: .cfi_offset %ebx, -8
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE1-NEXT: xorl %eax, %eax
-; X86-SSE1-NEXT: xorl %edx, %edx
-; X86-SSE1-NEXT: xorl %ecx, %ecx
-; X86-SSE1-NEXT: xorl %ebx, %ebx
-; X86-SSE1-NEXT: lock cmpxchg8b (%esi)
-; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: fildll (%eax)
+; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %eax, (%esp)
; X86-SSE1-NEXT: fldl (%esp)
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
-; X86-SSE1-NEXT: popl %esi
-; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE1-NEXT: popl %ebx
+; X86-SSE1-NEXT: addl $20, %esp
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
; X86-SSE1-NEXT: retl
;
;
; X86-NOSSE-LABEL: load_double_seq_cst:
; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %ebx
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
-; X86-NOSSE-NEXT: subl $12, %esp
+; X86-NOSSE-NEXT: subl $20, %esp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24
-; X86-NOSSE-NEXT: .cfi_offset %esi, -12
-; X86-NOSSE-NEXT: .cfi_offset %ebx, -8
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: xorl %eax, %eax
-; X86-NOSSE-NEXT: xorl %edx, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: xorl %ebx, %ebx
-; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: fildll (%eax)
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: fldl (%esp)
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
-; X86-NOSSE-NEXT: popl %esi
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: addl $20, %esp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl
;
declare i32 @m()
-attributes #0 = { "no-frame-pointer-elim-non-leaf" }
+attributes #0 = { noimplicitfloat "no-frame-pointer-elim-non-leaf" }