From c5f25a94d877a47148a9970082468c6082ae62dc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 10 Apr 2019 22:35:32 +0000 Subject: [PATCH] [X86] Add SSE1 command line to atomic-fp.ll and atomic-non-integer.ll. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358141 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/atomic-fp.ll | 537 +++++++++++++++++-------- test/CodeGen/X86/atomic-non-integer.ll | 171 +++++--- 2 files changed, 505 insertions(+), 203 deletions(-) diff --git a/test/CodeGen/X86/atomic-fp.ll b/test/CodeGen/X86/atomic-fp.ll index 30d2078fda0..23b5b1ecfe1 100644 --- a/test/CodeGen/X86/atomic-fp.ll +++ b/test/CodeGen/X86/atomic-fp.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X86 --check-prefix X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 -verify-machineinstrs | FileCheck %s --check-prefix X86 --check-prefix X86-SSE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse -verify-machineinstrs | FileCheck %s --check-prefix X86 --check-prefix X86-SSE --check-prefix X86-SSE1 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 -verify-machineinstrs | FileCheck %s --check-prefix X86 --check-prefix X86-SSE --check-prefix X86-SSE2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix X86 --check-prefix X86-AVX --check-prefix X86-AVX1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix X86 --check-prefix X86-AVX --check-prefix X86-AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix X64-SSE @@ -24,13 +25,27 @@ define void @fadd_32r(float* %loc, float %val) nounwind { ; X86-NOSSE-NEXT: addl $8, %esp ; X86-NOSSE-NEXT: retl ; -; X86-SSE-LABEL: fadd_32r: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: addss (%eax), %xmm0 -; X86-SSE-NEXT: movss %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: fadd_32r: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl (%eax), %ecx +; X86-SSE1-NEXT: movl %ecx, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: addss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl %ecx, (%eax) +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fadd_32r: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: addss (%eax), %xmm0 +; X86-SSE2-NEXT: movss %xmm0, (%eax) +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_32r: ; X86-AVX: # %bb.0: @@ -90,33 +105,68 @@ define void @fadd_64r(double* %loc, double %val) nounwind { ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; -; X86-SSE-LABEL: fadd_64r: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: movl %esp, %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: andl $-8, %esp -; X86-SSE-NEXT: subl $8, %esp -; X86-SSE-NEXT: movl 8(%ebp), %esi -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: addsd 12(%ebp), %xmm0 -; X86-SSE-NEXT: movsd %xmm0, (%esp) -; X86-SSE-NEXT: movl (%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl (%esi), %eax -; X86-SSE-NEXT: movl 4(%esi), %edx -; X86-SSE-NEXT: .p2align 4, 0x90 -; X86-SSE-NEXT: .LBB1_1: # %atomicrmw.start -; X86-SSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-SSE-NEXT: lock cmpxchg8b (%esi) -; X86-SSE-NEXT: jne .LBB1_1 -; X86-SSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-SSE-NEXT: leal -8(%ebp), %esp -; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: fadd_64r: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: pushl %ebx +; X86-SSE1-NEXT: pushl %esi +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: movl 8(%ebp), %esi +; X86-SSE1-NEXT: xorl %eax, %eax +; X86-SSE1-NEXT: xorl %edx, %edx +; X86-SSE1-NEXT: xorl %ecx, %ecx +; X86-SSE1-NEXT: xorl %ebx, %ebx +; X86-SSE1-NEXT: lock cmpxchg8b (%esi) +; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: faddl 12(%ebp) +; X86-SSE1-NEXT: fstpl (%esp) +; X86-SSE1-NEXT: movl (%esp), %ebx +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl (%esi), %eax +; X86-SSE1-NEXT: movl 4(%esi), %edx +; X86-SSE1-NEXT: .p2align 4, 0x90 +; X86-SSE1-NEXT: .LBB1_1: # %atomicrmw.start +; X86-SSE1-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE1-NEXT: lock cmpxchg8b (%esi) +; X86-SSE1-NEXT: jne .LBB1_1 +; X86-SSE1-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE1-NEXT: leal -8(%ebp), %esp +; X86-SSE1-NEXT: popl %esi +; X86-SSE1-NEXT: popl %ebx +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fadd_64r: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movl 8(%ebp), %esi +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movl (%esp), %ebx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movl (%esi), %eax +; X86-SSE2-NEXT: movl 4(%esi), %edx +; X86-SSE2-NEXT: .p2align 4, 0x90 +; X86-SSE2-NEXT: .LBB1_1: # %atomicrmw.start +; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE2-NEXT: lock cmpxchg8b (%esi) +; X86-SSE2-NEXT: jne .LBB1_1 +; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE2-NEXT: leal -8(%ebp), %esp +; X86-SSE2-NEXT: popl %esi +; X86-SSE2-NEXT: popl %ebx +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_64r: ; X86-AVX: # %bb.0: @@ -178,12 +228,25 @@ define void @fadd_32g() nounwind { ; X86-NOSSE-NEXT: addl $8, %esp ; X86-NOSSE-NEXT: retl ; -; X86-SSE-LABEL: fadd_32g: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: addss glob32, %xmm0 -; X86-SSE-NEXT: movss %xmm0, glob32 -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: fadd_32g: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl glob32, %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: addss {{\.LCPI.*}}, %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, glob32 +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fadd_32g: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: addss glob32, %xmm0 +; X86-SSE2-NEXT: movss %xmm0, glob32 +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_32g: ; X86-AVX: # %bb.0: @@ -246,30 +309,62 @@ define void @fadd_64g() nounwind { ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; -; X86-SSE-LABEL: fadd_64g: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: movl %esp, %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: andl $-8, %esp -; X86-SSE-NEXT: subl $16, %esp -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: addsd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movsd %xmm0, (%esp) -; X86-SSE-NEXT: movl (%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl glob64+4, %edx -; X86-SSE-NEXT: movl glob64, %eax -; X86-SSE-NEXT: .p2align 4, 0x90 -; X86-SSE-NEXT: .LBB3_1: # %atomicrmw.start -; X86-SSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-SSE-NEXT: lock cmpxchg8b glob64 -; X86-SSE-NEXT: jne .LBB3_1 -; X86-SSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-SSE-NEXT: leal -4(%ebp), %esp -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: fadd_64g: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: pushl %ebx +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $24, %esp +; X86-SSE1-NEXT: xorl %eax, %eax +; X86-SSE1-NEXT: xorl %edx, %edx +; X86-SSE1-NEXT: xorl %ecx, %ecx +; X86-SSE1-NEXT: xorl %ebx, %ebx +; X86-SSE1-NEXT: lock cmpxchg8b glob64 +; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fld1 +; X86-SSE1-NEXT: faddl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fstpl (%esp) +; X86-SSE1-NEXT: movl (%esp), %ebx +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl glob64+4, %edx +; X86-SSE1-NEXT: movl glob64, %eax +; X86-SSE1-NEXT: .p2align 4, 0x90 +; X86-SSE1-NEXT: .LBB3_1: # %atomicrmw.start +; X86-SSE1-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE1-NEXT: lock cmpxchg8b glob64 +; X86-SSE1-NEXT: jne .LBB3_1 +; X86-SSE1-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE1-NEXT: leal -4(%ebp), %esp +; X86-SSE1-NEXT: popl %ebx +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fadd_64g: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movl (%esp), %ebx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movl glob64+4, %edx +; X86-SSE2-NEXT: movl glob64, %eax +; X86-SSE2-NEXT: .p2align 4, 0x90 +; X86-SSE2-NEXT: .LBB3_1: # %atomicrmw.start +; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE2-NEXT: lock cmpxchg8b glob64 +; X86-SSE2-NEXT: jne .LBB3_1 +; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE2-NEXT: leal -4(%ebp), %esp +; X86-SSE2-NEXT: popl %ebx +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_64g: ; X86-AVX: # %bb.0: @@ -332,12 +427,25 @@ define void @fadd_32imm() nounwind { ; X86-NOSSE-NEXT: addl $8, %esp ; X86-NOSSE-NEXT: retl ; -; X86-SSE-LABEL: fadd_32imm: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: addss -559038737, %xmm0 -; X86-SSE-NEXT: movss %xmm0, -559038737 -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: fadd_32imm: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl -559038737, %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: addss {{\.LCPI.*}}, %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, -559038737 +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fadd_32imm: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: addss -559038737, %xmm0 +; X86-SSE2-NEXT: movss %xmm0, -559038737 +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_32imm: ; X86-AVX: # %bb.0: @@ -402,30 +510,62 @@ define void @fadd_64imm() nounwind { ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; -; X86-SSE-LABEL: fadd_64imm: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: movl %esp, %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: andl $-8, %esp -; X86-SSE-NEXT: subl $16, %esp -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: addsd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movsd %xmm0, (%esp) -; X86-SSE-NEXT: movl (%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl -559038737, %eax -; X86-SSE-NEXT: movl -559038733, %edx -; X86-SSE-NEXT: .p2align 4, 0x90 -; X86-SSE-NEXT: .LBB5_1: # %atomicrmw.start -; X86-SSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-SSE-NEXT: lock cmpxchg8b -559038737 -; X86-SSE-NEXT: jne .LBB5_1 -; X86-SSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-SSE-NEXT: leal -4(%ebp), %esp -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: fadd_64imm: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: pushl %ebx +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $24, %esp +; X86-SSE1-NEXT: xorl %eax, %eax +; X86-SSE1-NEXT: xorl %edx, %edx +; X86-SSE1-NEXT: xorl %ecx, %ecx +; X86-SSE1-NEXT: xorl %ebx, %ebx +; X86-SSE1-NEXT: lock cmpxchg8b -559038737 +; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fld1 +; X86-SSE1-NEXT: faddl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fstpl (%esp) +; X86-SSE1-NEXT: movl (%esp), %ebx +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl -559038737, %eax +; X86-SSE1-NEXT: movl -559038733, %edx +; X86-SSE1-NEXT: .p2align 4, 0x90 +; X86-SSE1-NEXT: .LBB5_1: # %atomicrmw.start +; X86-SSE1-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE1-NEXT: lock cmpxchg8b -559038737 +; X86-SSE1-NEXT: jne .LBB5_1 +; X86-SSE1-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE1-NEXT: leal -4(%ebp), %esp +; X86-SSE1-NEXT: popl %ebx +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fadd_64imm: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movl (%esp), %ebx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movl -559038737, %eax +; X86-SSE2-NEXT: movl -559038733, %edx +; X86-SSE2-NEXT: .p2align 4, 0x90 +; X86-SSE2-NEXT: .LBB5_1: # %atomicrmw.start +; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE2-NEXT: lock cmpxchg8b -559038737 +; X86-SSE2-NEXT: jne .LBB5_1 +; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE2-NEXT: leal -4(%ebp), %esp +; X86-SSE2-NEXT: popl %ebx +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_64imm: ; X86-AVX: # %bb.0: @@ -490,14 +630,27 @@ define void @fadd_32stack() nounwind { ; X86-NOSSE-NEXT: addl $12, %esp ; X86-NOSSE-NEXT: retl ; -; X86-SSE-LABEL: fadd_32stack: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: addss (%esp), %xmm0 -; X86-SSE-NEXT: movss %xmm0, (%esp) -; X86-SSE-NEXT: popl %eax -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: fadd_32stack: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $12, %esp +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: addss {{\.LCPI.*}}, %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: addl $12, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fadd_32stack: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: addss (%esp), %xmm0 +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: popl %eax +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_32stack: ; X86-AVX: # %bb.0: @@ -564,30 +717,62 @@ define void @fadd_64stack() nounwind { ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; -; X86-SSE-LABEL: fadd_64stack: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: movl %esp, %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: andl $-8, %esp -; X86-SSE-NEXT: subl $24, %esp -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: addsd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl (%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: .p2align 4, 0x90 -; X86-SSE-NEXT: .LBB7_1: # %atomicrmw.start -; X86-SSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-SSE-NEXT: lock cmpxchg8b (%esp) -; X86-SSE-NEXT: jne .LBB7_1 -; X86-SSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-SSE-NEXT: leal -4(%ebp), %esp -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: fadd_64stack: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: pushl %ebx +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $32, %esp +; X86-SSE1-NEXT: xorl %eax, %eax +; X86-SSE1-NEXT: xorl %edx, %edx +; X86-SSE1-NEXT: xorl %ecx, %ecx +; X86-SSE1-NEXT: xorl %ebx, %ebx +; X86-SSE1-NEXT: lock cmpxchg8b (%esp) +; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fld1 +; X86-SSE1-NEXT: faddl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl (%esp), %eax +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE1-NEXT: .p2align 4, 0x90 +; X86-SSE1-NEXT: .LBB7_1: # %atomicrmw.start +; X86-SSE1-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE1-NEXT: lock cmpxchg8b (%esp) +; X86-SSE1-NEXT: jne .LBB7_1 +; X86-SSE1-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE1-NEXT: leal -4(%ebp), %esp +; X86-SSE1-NEXT: popl %ebx +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fadd_64stack: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $24, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movl (%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE2-NEXT: .p2align 4, 0x90 +; X86-SSE2-NEXT: .LBB7_1: # %atomicrmw.start +; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE2-NEXT: lock cmpxchg8b (%esp) +; X86-SSE2-NEXT: jne .LBB7_1 +; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE2-NEXT: leal -4(%ebp), %esp +; X86-SSE2-NEXT: popl %ebx +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_64stack: ; X86-AVX: # %bb.0: @@ -676,36 +861,74 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; -; X86-SSE-LABEL: fadd_array: -; X86-SSE: # %bb.0: # %bb -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: movl %esp, %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: pushl %edi -; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: andl $-8, %esp -; X86-SSE-NEXT: subl $16, %esp -; X86-SSE-NEXT: movl 20(%ebp), %esi -; X86-SSE-NEXT: movl 8(%ebp), %edi -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: addsd 12(%ebp), %xmm0 -; X86-SSE-NEXT: movsd %xmm0, (%esp) -; X86-SSE-NEXT: movl (%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl (%edi,%esi,8), %eax -; X86-SSE-NEXT: movl 4(%edi,%esi,8), %edx -; X86-SSE-NEXT: .p2align 4, 0x90 -; X86-SSE-NEXT: .LBB8_1: # %atomicrmw.start -; X86-SSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-SSE-NEXT: lock cmpxchg8b (%edi,%esi,8) -; X86-SSE-NEXT: jne .LBB8_1 -; X86-SSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-SSE-NEXT: leal -12(%ebp), %esp -; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: fadd_array: +; X86-SSE1: # %bb.0: # %bb +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: pushl %ebx +; X86-SSE1-NEXT: pushl %edi +; X86-SSE1-NEXT: pushl %esi +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $24, %esp +; X86-SSE1-NEXT: movl 20(%ebp), %esi +; X86-SSE1-NEXT: movl 8(%ebp), %edi +; X86-SSE1-NEXT: xorl %eax, %eax +; X86-SSE1-NEXT: xorl %edx, %edx +; X86-SSE1-NEXT: xorl %ecx, %ecx +; X86-SSE1-NEXT: xorl %ebx, %ebx +; X86-SSE1-NEXT: lock cmpxchg8b (%edi,%esi,8) +; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: faddl 12(%ebp) +; X86-SSE1-NEXT: fstpl (%esp) +; X86-SSE1-NEXT: movl (%esp), %ebx +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl (%edi,%esi,8), %eax +; X86-SSE1-NEXT: movl 4(%edi,%esi,8), %edx +; X86-SSE1-NEXT: .p2align 4, 0x90 +; X86-SSE1-NEXT: .LBB8_1: # %atomicrmw.start +; X86-SSE1-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE1-NEXT: lock cmpxchg8b (%edi,%esi,8) +; X86-SSE1-NEXT: jne .LBB8_1 +; X86-SSE1-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE1-NEXT: leal -12(%ebp), %esp +; X86-SSE1-NEXT: popl %esi +; X86-SSE1-NEXT: popl %edi +; X86-SSE1-NEXT: popl %ebx +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fadd_array: +; X86-SSE2: # %bb.0: # %bb +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: pushl %edi +; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movl 20(%ebp), %esi +; X86-SSE2-NEXT: movl 8(%ebp), %edi +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movl (%esp), %ebx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movl (%edi,%esi,8), %eax +; X86-SSE2-NEXT: movl 4(%edi,%esi,8), %edx +; X86-SSE2-NEXT: .p2align 4, 0x90 +; X86-SSE2-NEXT: .LBB8_1: # %atomicrmw.start +; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE2-NEXT: lock cmpxchg8b (%edi,%esi,8) +; X86-SSE2-NEXT: jne .LBB8_1 +; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE2-NEXT: leal -12(%ebp), %esp +; X86-SSE2-NEXT: popl %esi +; X86-SSE2-NEXT: popl %edi +; X86-SSE2-NEXT: popl %ebx +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_array: ; X86-AVX: # %bb.0: # %bb diff --git a/test/CodeGen/X86/atomic-non-integer.ll b/test/CodeGen/X86/atomic-non-integer.ll index 637082f1702..a0ede060eb3 100644 --- a/test/CodeGen/X86/atomic-non-integer.ll +++ b/test/CodeGen/X86/atomic-non-integer.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE +; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE1 +; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE2 ; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1 ; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX512 ; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE @@ -332,17 +333,29 @@ define half @load_half(half* %fptr) { } define float @load_float(float* %fptr) { -; X86-SSE-LABEL: load_float: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd (%eax), %xmm0 -; X86-SSE-NEXT: movd %xmm0, (%esp) -; X86-SSE-NEXT: flds (%esp) -; X86-SSE-NEXT: popl %eax -; X86-SSE-NEXT: .cfi_def_cfa_offset 4 -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: load_float: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %eax +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl (%eax), %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: flds (%esp) +; X86-SSE1-NEXT: popl %eax +; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: load_float: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movd (%eax), %xmm0 +; X86-SSE2-NEXT: movd %xmm0, (%esp) +; X86-SSE2-NEXT: flds (%esp) +; X86-SSE2-NEXT: popl %eax +; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: load_float: ; X86-AVX: # %bb.0: @@ -382,17 +395,44 @@ define float @load_float(float* %fptr) { } define double @load_double(double* %fptr) { -; X86-SSE-LABEL: load_double: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: subl $12, %esp -; X86-SSE-NEXT: .cfi_def_cfa_offset 16 -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movlps %xmm0, (%esp) -; X86-SSE-NEXT: fldl (%esp) -; X86-SSE-NEXT: addl $12, %esp -; X86-SSE-NEXT: .cfi_def_cfa_offset 4 -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: load_double: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebx +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: pushl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE1-NEXT: subl $12, %esp +; X86-SSE1-NEXT: .cfi_def_cfa_offset 24 +; X86-SSE1-NEXT: .cfi_offset %esi, -12 +; X86-SSE1-NEXT: .cfi_offset %ebx, -8 +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE1-NEXT: xorl %eax, %eax +; X86-SSE1-NEXT: xorl %edx, %edx +; X86-SSE1-NEXT: xorl %ecx, %ecx +; X86-SSE1-NEXT: xorl %ebx, %ebx +; X86-SSE1-NEXT: lock cmpxchg8b (%esi) +; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: addl $12, %esp +; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE1-NEXT: popl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: popl %ebx +; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: load_double: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: subl $12, %esp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, (%esp) +; X86-SSE2-NEXT: fldl (%esp) +; X86-SSE2-NEXT: addl $12, %esp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: load_double: ; X86-AVX: # %bb.0: @@ -668,18 +708,30 @@ define void @store_double_seq_cst(double* %fptr, double %v) { } define float @load_float_seq_cst(float* %fptr) { -; X86-SSE-LABEL: load_float_seq_cst: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl (%eax), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movd %xmm0, (%esp) -; X86-SSE-NEXT: flds (%esp) -; X86-SSE-NEXT: popl %eax -; X86-SSE-NEXT: .cfi_def_cfa_offset 4 -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: load_float_seq_cst: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %eax +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl (%eax), %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: flds (%esp) +; X86-SSE1-NEXT: popl %eax +; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: load_float_seq_cst: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl (%eax), %eax +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: movd %xmm0, (%esp) +; X86-SSE2-NEXT: flds (%esp) +; X86-SSE2-NEXT: popl %eax +; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: load_float_seq_cst: ; X86-AVX: # %bb.0: @@ -722,17 +774,44 @@ define float @load_float_seq_cst(float* %fptr) { } define double @load_double_seq_cst(double* %fptr) { -; X86-SSE-LABEL: load_double_seq_cst: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: subl $12, %esp -; X86-SSE-NEXT: .cfi_def_cfa_offset 16 -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movlps %xmm0, (%esp) -; X86-SSE-NEXT: fldl (%esp) -; X86-SSE-NEXT: addl $12, %esp -; X86-SSE-NEXT: .cfi_def_cfa_offset 4 -; X86-SSE-NEXT: retl +; X86-SSE1-LABEL: load_double_seq_cst: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebx +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: pushl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE1-NEXT: subl $12, %esp +; X86-SSE1-NEXT: .cfi_def_cfa_offset 24 +; X86-SSE1-NEXT: .cfi_offset %esi, -12 +; X86-SSE1-NEXT: .cfi_offset %ebx, -8 +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE1-NEXT: xorl %eax, %eax +; X86-SSE1-NEXT: xorl %edx, %edx +; X86-SSE1-NEXT: xorl %ecx, %ecx +; X86-SSE1-NEXT: xorl %ebx, %ebx +; X86-SSE1-NEXT: lock cmpxchg8b (%esi) +; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: addl $12, %esp +; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE1-NEXT: popl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: popl %ebx +; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: load_double_seq_cst: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: subl $12, %esp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, (%esp) +; X86-SSE2-NEXT: fldl (%esp) +; X86-SSE2-NEXT: addl $12, %esp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: load_double_seq_cst: ; X86-AVX: # %bb.0: -- 2.50.1