From 064d9fea373539e630299b0ad302bf87740f7ef8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 8 Apr 2019 01:54:27 +0000 Subject: [PATCH] [X86] Split floating point tests out of atomic-mi.ll into atomic-fp.ll. Add avx and avx512f command lines. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357882 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/atomic-fp.ll | 754 ++++++++++++++++++++++++++++++++++ test/CodeGen/X86/atomic-mi.ll | 417 ------------------- 2 files changed, 754 insertions(+), 417 deletions(-) create mode 100644 test/CodeGen/X86/atomic-fp.ll diff --git a/test/CodeGen/X86/atomic-fp.ll b/test/CodeGen/X86/atomic-fp.ll new file mode 100644 index 00000000000..30d2078fda0 --- /dev/null +++ b/test/CodeGen/X86/atomic-fp.ll @@ -0,0 +1,754 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X86 --check-prefix X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 -verify-machineinstrs | FileCheck %s --check-prefix X86 --check-prefix X86-SSE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix X86 --check-prefix X86-AVX --check-prefix X86-AVX1 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix X86 --check-prefix X86-AVX --check-prefix X86-AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix X64-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix X64-AVX --check-prefix X64-AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix X64-AVX --check-prefix X64-AVX512 + +; ----- FADD ----- + +define void @fadd_32r(float* %loc, float %val) nounwind { +; X86-NOSSE-LABEL: fadd_32r: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl (%eax), %ecx +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: flds (%esp) +; X86-NOSSE-NEXT: fadds {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, (%eax) +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: fadd_32r: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: addss (%eax), %xmm0 +; X86-SSE-NEXT: movss %xmm0, (%eax) +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: fadd_32r: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: addss (%eax), %xmm0 +; X86-AVX-NEXT: movss %xmm0, (%eax) +; X86-AVX-NEXT: retl +; +; X64-LABEL: fadd_32r: +; X64: # %bb.0: +; X64-NEXT: addss (%rdi), %xmm0 +; X64-NEXT: movss %xmm0, (%rdi) +; X64-NEXT: retq + %floc = bitcast float* %loc to i32* + %1 = load atomic i32, i32* %floc seq_cst, align 4 + %2 = bitcast i32 %1 to float + %add = fadd float %2, %val + %3 = bitcast float %add to i32 + store atomic i32 %3, i32* %floc release, align 4 + ret void +} + +define void @fadd_64r(double* %loc, double %val) nounwind { +; X86-NOSSE-LABEL: fadd_64r: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $16, %esp +; X86-NOSSE-NEXT: movl 8(%ebp), %esi +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: faddl 12(%ebp) +; X86-NOSSE-NEXT: fstpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl (%esi), %eax +; X86-NOSSE-NEXT: movl 4(%esi), %edx +; X86-NOSSE-NEXT: .p2align 4, 0x90 +; X86-NOSSE-NEXT: .LBB1_1: # %atomicrmw.start +; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) +; X86-NOSSE-NEXT: jne .LBB1_1 +; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOSSE-NEXT: leal -8(%ebp), %esp +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: fadd_64r: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: movl %esp, %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: andl $-8, %esp +; X86-SSE-NEXT: subl $8, %esp +; X86-SSE-NEXT: movl 8(%ebp), %esi +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: addsd 12(%ebp), %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: movl (%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl (%esi), %eax +; X86-SSE-NEXT: movl 4(%esi), %edx +; X86-SSE-NEXT: .p2align 4, 0x90 +; X86-SSE-NEXT: .LBB1_1: # %atomicrmw.start +; X86-SSE-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE-NEXT: lock cmpxchg8b (%esi) +; X86-SSE-NEXT: jne .LBB1_1 +; X86-SSE-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE-NEXT: leal -8(%ebp), %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: fadd_64r: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: pushl %ebx +; X86-AVX-NEXT: pushl %esi +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: movl 8(%ebp), %esi +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: movl (%esp), %ebx +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX-NEXT: movl (%esi), %eax +; X86-AVX-NEXT: movl 4(%esi), %edx +; X86-AVX-NEXT: .p2align 4, 0x90 +; X86-AVX-NEXT: .LBB1_1: # %atomicrmw.start +; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-AVX-NEXT: lock cmpxchg8b (%esi) +; X86-AVX-NEXT: jne .LBB1_1 +; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end +; X86-AVX-NEXT: leal -8(%ebp), %esp +; X86-AVX-NEXT: popl %esi +; X86-AVX-NEXT: popl %ebx +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-LABEL: fadd_64r: +; X64: # %bb.0: +; X64-NEXT: addsd (%rdi), %xmm0 +; X64-NEXT: movsd %xmm0, (%rdi) +; X64-NEXT: retq + %floc = bitcast double* %loc to i64* + %1 = load atomic i64, i64* %floc seq_cst, align 8 + %2 = bitcast i64 %1 to double + %add = fadd double %2, %val + %3 = bitcast double %add to i64 + store atomic i64 %3, i64* %floc release, align 8 + ret void +} + +@glob32 = global float 0.000000e+00, align 4 +@glob64 = global double 0.000000e+00, align 8 + +; Floating-point add to a global using an immediate. +define void @fadd_32g() nounwind { +; X86-NOSSE-LABEL: fadd_32g: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl glob32, %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fadds (%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, glob32 +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: fadd_32g: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: addss glob32, %xmm0 +; X86-SSE-NEXT: movss %xmm0, glob32 +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: fadd_32g: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: addss glob32, %xmm0 +; X86-AVX-NEXT: movss %xmm0, glob32 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fadd_32g: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-SSE-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: movss %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fadd_32g: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: movss %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: retq + %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 + %f = bitcast i32 %i to float + %add = fadd float %f, 1.000000e+00 + %s = bitcast float %add to i32 + store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 + ret void +} + +define void @fadd_64g() nounwind { +; X86-NOSSE-LABEL: fadd_64g: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $24, %esp +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b glob64 +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl glob64+4, %edx +; X86-NOSSE-NEXT: movl glob64, %eax +; X86-NOSSE-NEXT: .p2align 4, 0x90 +; X86-NOSSE-NEXT: .LBB3_1: # %atomicrmw.start +; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOSSE-NEXT: lock cmpxchg8b glob64 +; X86-NOSSE-NEXT: jne .LBB3_1 +; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOSSE-NEXT: leal -4(%ebp), %esp +; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: fadd_64g: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: movl %esp, %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: andl $-8, %esp +; X86-SSE-NEXT: subl $16, %esp +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: addsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: movl (%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl glob64+4, %edx +; X86-SSE-NEXT: movl glob64, %eax +; X86-SSE-NEXT: .p2align 4, 0x90 +; X86-SSE-NEXT: .LBB3_1: # %atomicrmw.start +; X86-SSE-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE-NEXT: lock cmpxchg8b glob64 +; X86-SSE-NEXT: jne .LBB3_1 +; X86-SSE-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE-NEXT: leal -4(%ebp), %esp +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: fadd_64g: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: pushl %ebx +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $16, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: movl (%esp), %ebx +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX-NEXT: movl glob64+4, %edx +; X86-AVX-NEXT: movl glob64, %eax +; X86-AVX-NEXT: .p2align 4, 0x90 +; X86-AVX-NEXT: .LBB3_1: # %atomicrmw.start +; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-AVX-NEXT: lock cmpxchg8b glob64 +; X86-AVX-NEXT: jne .LBB3_1 +; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end +; X86-AVX-NEXT: leal -4(%ebp), %esp +; X86-AVX-NEXT: popl %ebx +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fadd_64g: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-SSE-NEXT: addsd {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fadd_64g: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-AVX-NEXT: addsd {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: movsd %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: retq + %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 + %f = bitcast i64 %i to double + %add = fadd double %f, 1.000000e+00 + %s = bitcast double %add to i64 + store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 + ret void +} + +; Floating-point add to a hard-coded immediate location using an immediate. +define void @fadd_32imm() nounwind { +; X86-NOSSE-LABEL: fadd_32imm: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl -559038737, %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fadds (%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, -559038737 +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: fadd_32imm: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: addss -559038737, %xmm0 +; X86-SSE-NEXT: movss %xmm0, -559038737 +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: fadd_32imm: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: addss -559038737, %xmm0 +; X86-AVX-NEXT: movss %xmm0, -559038737 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fadd_32imm: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-SSE-NEXT: addss (%rax), %xmm0 +; X64-SSE-NEXT: movss %xmm0, (%rax) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fadd_32imm: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-AVX-NEXT: addss (%rax), %xmm0 +; X64-AVX-NEXT: movss %xmm0, (%rax) +; X64-AVX-NEXT: retq + %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 + %f = bitcast i32 %i to float + %add = fadd float %f, 1.000000e+00 + %s = bitcast float %add to i32 + store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 + ret void +} + +define void @fadd_64imm() nounwind { +; X86-NOSSE-LABEL: fadd_64imm: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $24, %esp +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b -559038737 +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl -559038737, %eax +; X86-NOSSE-NEXT: movl -559038733, %edx +; X86-NOSSE-NEXT: .p2align 4, 0x90 +; X86-NOSSE-NEXT: .LBB5_1: # %atomicrmw.start +; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOSSE-NEXT: lock cmpxchg8b -559038737 +; X86-NOSSE-NEXT: jne .LBB5_1 +; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOSSE-NEXT: leal -4(%ebp), %esp +; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: fadd_64imm: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: movl %esp, %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: andl $-8, %esp +; X86-SSE-NEXT: subl $16, %esp +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: addsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: movl (%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl -559038737, %eax +; X86-SSE-NEXT: movl -559038733, %edx +; X86-SSE-NEXT: .p2align 4, 0x90 +; X86-SSE-NEXT: .LBB5_1: # %atomicrmw.start +; X86-SSE-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE-NEXT: lock cmpxchg8b -559038737 +; X86-SSE-NEXT: jne .LBB5_1 +; X86-SSE-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE-NEXT: leal -4(%ebp), %esp +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: fadd_64imm: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: pushl %ebx +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $16, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: movl (%esp), %ebx +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX-NEXT: movl -559038737, %eax +; X86-AVX-NEXT: movl -559038733, %edx +; X86-AVX-NEXT: .p2align 4, 0x90 +; X86-AVX-NEXT: .LBB5_1: # %atomicrmw.start +; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-AVX-NEXT: lock cmpxchg8b -559038737 +; X86-AVX-NEXT: jne .LBB5_1 +; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end +; X86-AVX-NEXT: leal -4(%ebp), %esp +; X86-AVX-NEXT: popl %ebx +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fadd_64imm: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-SSE-NEXT: addsd (%rax), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, (%rax) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fadd_64imm: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-AVX-NEXT: addsd (%rax), %xmm0 +; X64-AVX-NEXT: movsd %xmm0, (%rax) +; X64-AVX-NEXT: retq + %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 + %f = bitcast i64 %i to double + %add = fadd double %f, 1.000000e+00 + %s = bitcast double %add to i64 + store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 + ret void +} + +; Floating-point add to a stack location. +define void @fadd_32stack() nounwind { +; X86-NOSSE-LABEL: fadd_32stack: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $12, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fadds (%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: fadd_32stack: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: addss (%esp), %xmm0 +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: popl %eax +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: fadd_32stack: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %eax +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: addss (%esp), %xmm0 +; X86-AVX-NEXT: movss %xmm0, (%esp) +; X86-AVX-NEXT: popl %eax +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fadd_32stack: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-SSE-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0 +; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fadd_32stack: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0 +; X64-AVX-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX-NEXT: retq + %ptr = alloca i32, align 4 + %bc3 = bitcast i32* %ptr to float* + %load = load atomic i32, i32* %ptr acquire, align 4 + %bc0 = bitcast i32 %load to float + %fadd = fadd float 1.000000e+00, %bc0 + %bc1 = bitcast float %fadd to i32 + store atomic i32 %bc1, i32* %ptr release, align 4 + ret void +} + +define void @fadd_64stack() nounwind { +; X86-NOSSE-LABEL: fadd_64stack: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b (%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: .p2align 4, 0x90 +; X86-NOSSE-NEXT: .LBB7_1: # %atomicrmw.start +; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOSSE-NEXT: lock cmpxchg8b (%esp) +; X86-NOSSE-NEXT: jne .LBB7_1 +; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOSSE-NEXT: leal -4(%ebp), %esp +; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: fadd_64stack: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: movl %esp, %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: andl $-8, %esp +; X86-SSE-NEXT: subl $24, %esp +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: addsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl (%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: .p2align 4, 0x90 +; X86-SSE-NEXT: .LBB7_1: # %atomicrmw.start +; X86-SSE-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE-NEXT: lock cmpxchg8b (%esp) +; X86-SSE-NEXT: jne .LBB7_1 +; X86-SSE-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE-NEXT: leal -4(%ebp), %esp +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: fadd_64stack: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: pushl %ebx +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $24, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX-NEXT: movl (%esp), %eax +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX-NEXT: .p2align 4, 0x90 +; X86-AVX-NEXT: .LBB7_1: # %atomicrmw.start +; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-AVX-NEXT: lock cmpxchg8b (%esp) +; X86-AVX-NEXT: jne .LBB7_1 +; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end +; X86-AVX-NEXT: leal -4(%ebp), %esp +; X86-AVX-NEXT: popl %ebx +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fadd_64stack: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-SSE-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fadd_64stack: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-AVX-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0 +; X64-AVX-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX-NEXT: retq + %ptr = alloca i64, align 8 + %bc3 = bitcast i64* %ptr to double* + %load = load atomic i64, i64* %ptr acquire, align 8 + %bc0 = bitcast i64 %load to double + %fadd = fadd double 1.000000e+00, %bc0 + %bc1 = bitcast double %fadd to i64 + store atomic i64 %bc1, i64* %ptr release, align 8 + ret void +} + +define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind { +; X86-NOSSE-LABEL: fadd_array: +; X86-NOSSE: # %bb.0: # %bb +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: pushl %edi +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $24, %esp +; X86-NOSSE-NEXT: movl 20(%ebp), %esi +; X86-NOSSE-NEXT: movl 8(%ebp), %edi +; X86-NOSSE-NEXT: xorl %eax, %eax +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: xorl %ebx, %ebx +; X86-NOSSE-NEXT: lock cmpxchg8b (%edi,%esi,8) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: faddl 12(%ebp) +; X86-NOSSE-NEXT: fstpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl (%edi,%esi,8), %eax +; X86-NOSSE-NEXT: movl 4(%edi,%esi,8), %edx +; X86-NOSSE-NEXT: .p2align 4, 0x90 +; X86-NOSSE-NEXT: .LBB8_1: # %atomicrmw.start +; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOSSE-NEXT: lock cmpxchg8b (%edi,%esi,8) +; X86-NOSSE-NEXT: jne .LBB8_1 +; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOSSE-NEXT: leal -12(%ebp), %esp +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: popl %edi +; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: fadd_array: +; X86-SSE: # %bb.0: # %bb +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: movl %esp, %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: andl $-8, %esp +; X86-SSE-NEXT: subl $16, %esp +; X86-SSE-NEXT: movl 20(%ebp), %esi +; X86-SSE-NEXT: movl 8(%ebp), %edi +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: addsd 12(%ebp), %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: movl (%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl (%edi,%esi,8), %eax +; X86-SSE-NEXT: movl 4(%edi,%esi,8), %edx +; X86-SSE-NEXT: .p2align 4, 0x90 +; X86-SSE-NEXT: .LBB8_1: # %atomicrmw.start +; X86-SSE-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SSE-NEXT: lock cmpxchg8b (%edi,%esi,8) +; X86-SSE-NEXT: jne .LBB8_1 +; X86-SSE-NEXT: # %bb.2: # %atomicrmw.end +; X86-SSE-NEXT: leal -12(%ebp), %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: fadd_array: +; X86-AVX: # %bb.0: # %bb +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: pushl %ebx +; X86-AVX-NEXT: pushl %edi +; X86-AVX-NEXT: pushl %esi +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $16, %esp +; X86-AVX-NEXT: movl 20(%ebp), %esi +; X86-AVX-NEXT: movl 8(%ebp), %edi +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: movl (%esp), %ebx +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX-NEXT: movl (%edi,%esi,8), %eax +; X86-AVX-NEXT: movl 4(%edi,%esi,8), %edx +; X86-AVX-NEXT: .p2align 4, 0x90 +; X86-AVX-NEXT: .LBB8_1: # %atomicrmw.start +; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-AVX-NEXT: lock cmpxchg8b (%edi,%esi,8) +; X86-AVX-NEXT: jne .LBB8_1 +; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end +; X86-AVX-NEXT: leal -12(%ebp), %esp +; X86-AVX-NEXT: popl %esi +; X86-AVX-NEXT: popl %edi +; X86-AVX-NEXT: popl %ebx +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-LABEL: fadd_array: +; X64: # %bb.0: # %bb +; X64-NEXT: addsd (%rdi,%rsi,8), %xmm0 +; X64-NEXT: movsd %xmm0, (%rdi,%rsi,8) +; X64-NEXT: retq +bb: + %tmp4 = getelementptr inbounds i64, i64* %arg, i64 %arg2 + %tmp6 = load atomic i64, i64* %tmp4 monotonic, align 8 + %tmp7 = bitcast i64 %tmp6 to double + %tmp8 = fadd double %tmp7, %arg1 + %tmp9 = bitcast double %tmp8 to i64 + store atomic i64 %tmp9, i64* %tmp4 monotonic, align 8 + ret void +} diff --git a/test/CodeGen/X86/atomic-mi.ll b/test/CodeGen/X86/atomic-mi.ll index 353580bbf7e..492d7ae8f2d 100644 --- a/test/CodeGen/X86/atomic-mi.ll +++ b/test/CodeGen/X86/atomic-mi.ll @@ -1866,420 +1866,3 @@ define void @neg_32_seq_cst(i32* %p) { ret void } -; ----- FADD ----- - -define void @fadd_32r(float* %loc, float %val) { -; X64-LABEL: fadd_32r: -; X64: # %bb.0: -; X64-NEXT: addss (%rdi), %xmm0 -; X64-NEXT: movss %xmm0, (%rdi) -; X64-NEXT: retq -; -; X32-LABEL: fadd_32r: -; X32: # %bb.0: -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl (%eax), %ecx -; X32-NEXT: movl %ecx, (%esp) -; X32-NEXT: flds (%esp) -; X32-NEXT: fadds {{[0-9]+}}(%esp) -; X32-NEXT: fstps {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, (%eax) -; X32-NEXT: addl $8, %esp -; X32-NEXT: .cfi_def_cfa_offset 4 -; X32-NEXT: retl -; Don't check x86-32. -; LLVM's SSE handling is conservative on x86-32 even without using atomics. - %floc = bitcast float* %loc to i32* - %1 = load atomic i32, i32* %floc seq_cst, align 4 - %2 = bitcast i32 %1 to float - %add = fadd float %2, %val - %3 = bitcast float %add to i32 - store atomic i32 %3, i32* %floc release, align 4 - ret void -} - -define void @fadd_64r(double* %loc, double %val) { -; X64-LABEL: fadd_64r: -; X64: # %bb.0: -; X64-NEXT: addsd (%rdi), %xmm0 -; X64-NEXT: movsd %xmm0, (%rdi) -; X64-NEXT: retq -; -; X32-LABEL: fadd_64r: -; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $16, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: fldl {{[0-9]+}}(%esp) -; X32-NEXT: faddl 12(%ebp) -; X32-NEXT: fstpl (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB76_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB76_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 -; X32-NEXT: retl -; Don't check x86-32 (see comment above). - %floc = bitcast double* %loc to i64* - %1 = load atomic i64, i64* %floc seq_cst, align 8 - %2 = bitcast i64 %1 to double - %add = fadd double %2, %val - %3 = bitcast double %add to i64 - store atomic i64 %3, i64* %floc release, align 8 - ret void -} - -@glob32 = global float 0.000000e+00, align 4 -@glob64 = global double 0.000000e+00, align 8 - -; Floating-point add to a global using an immediate. -define void @fadd_32g() { -; X64-LABEL: fadd_32g: -; X64: # %bb.0: -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: addss {{.*}}(%rip), %xmm0 -; X64-NEXT: movss %xmm0, {{.*}}(%rip) -; X64-NEXT: retq -; -; X32-LABEL: fadd_32g: -; X32: # %bb.0: -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: movl glob32, %eax -; X32-NEXT: movl %eax, (%esp) -; X32-NEXT: fld1 -; X32-NEXT: fadds (%esp) -; X32-NEXT: fstps {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, glob32 -; X32-NEXT: addl $8, %esp -; X32-NEXT: .cfi_def_cfa_offset 4 -; X32-NEXT: retl -; Don't check x86-32 (see comment above). - %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 - %f = bitcast i32 %i to float - %add = fadd float %f, 1.000000e+00 - %s = bitcast float %add to i32 - store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 - ret void -} - -define void @fadd_64g() { -; X64-LABEL: fadd_64g: -; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: addsd {{.*}}(%rip), %xmm0 -; X64-NEXT: movsd %xmm0, {{.*}}(%rip) -; X64-NEXT: retq -; -; X32-LABEL: fadd_64g: -; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $24, %esp -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b glob64 -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: fld1 -; X32-NEXT: faddl {{[0-9]+}}(%esp) -; X32-NEXT: fstpl (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl glob64+4, %edx -; X32-NEXT: movl glob64, %eax -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB78_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b glob64 -; X32-NEXT: jne .LBB78_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -4(%ebp), %esp -; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 -; X32-NEXT: retl -; Don't check x86-32 (see comment above). - %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 - %f = bitcast i64 %i to double - %add = fadd double %f, 1.000000e+00 - %s = bitcast double %add to i64 - store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 - ret void -} - -; Floating-point add to a hard-coded immediate location using an immediate. -define void @fadd_32imm() { -; X64-LABEL: fadd_32imm: -; X64: # %bb.0: -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF -; X64-NEXT: addss (%rax), %xmm0 -; X64-NEXT: movss %xmm0, (%rax) -; X64-NEXT: retq -; -; X32-LABEL: fadd_32imm: -; X32: # %bb.0: -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: movl -559038737, %eax -; X32-NEXT: movl %eax, (%esp) -; X32-NEXT: fld1 -; X32-NEXT: fadds (%esp) -; X32-NEXT: fstps {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, -559038737 -; X32-NEXT: addl $8, %esp -; X32-NEXT: .cfi_def_cfa_offset 4 -; X32-NEXT: retl -; Don't check x86-32 (see comment above). - %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 - %f = bitcast i32 %i to float - %add = fadd float %f, 1.000000e+00 - %s = bitcast float %add to i32 - store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 - ret void -} - -define void @fadd_64imm() { -; X64-LABEL: fadd_64imm: -; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF -; X64-NEXT: addsd (%rax), %xmm0 -; X64-NEXT: movsd %xmm0, (%rax) -; X64-NEXT: retq -; -; X32-LABEL: fadd_64imm: -; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $24, %esp -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b -559038737 -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: fld1 -; X32-NEXT: faddl {{[0-9]+}}(%esp) -; X32-NEXT: fstpl (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl -559038737, %eax -; X32-NEXT: movl -559038733, %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB80_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b -559038737 -; X32-NEXT: jne .LBB80_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -4(%ebp), %esp -; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 -; X32-NEXT: retl -; Don't check x86-32 (see comment above). - %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 - %f = bitcast i64 %i to double - %add = fadd double %f, 1.000000e+00 - %s = bitcast double %add to i64 - store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 - ret void -} - -; Floating-point add to a stack location. -define void @fadd_32stack() { -; X64-LABEL: fadd_32stack: -; X64: # %bb.0: -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: retq -; -; X32-LABEL: fadd_32stack: -; X32: # %bb.0: -; X32-NEXT: subl $12, %esp -; X32-NEXT: .cfi_def_cfa_offset 16 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, (%esp) -; X32-NEXT: fld1 -; X32-NEXT: fadds (%esp) -; X32-NEXT: fstps {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: addl $12, %esp -; X32-NEXT: .cfi_def_cfa_offset 4 -; X32-NEXT: retl -; Don't check x86-32 (see comment above). - %ptr = alloca i32, align 4 - %bc3 = bitcast i32* %ptr to float* - %load = load atomic i32, i32* %ptr acquire, align 4 - %bc0 = bitcast i32 %load to float - %fadd = fadd float 1.000000e+00, %bc0 - %bc1 = bitcast float %fadd to i32 - store atomic i32 %bc1, i32* %ptr release, align 4 - ret void -} - -define void @fadd_64stack() { -; X64-LABEL: fadd_64stack: -; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: retq -; -; X32-LABEL: fadd_64stack: -; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $32, %esp -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esp) -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: fld1 -; X32-NEXT: faddl {{[0-9]+}}(%esp) -; X32-NEXT: fstpl {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl (%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB82_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esp) -; X32-NEXT: jne .LBB82_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -4(%ebp), %esp -; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 -; X32-NEXT: retl -; Don't check x86-32 (see comment above). - %ptr = alloca i64, align 8 - %bc3 = bitcast i64* %ptr to double* - %load = load atomic i64, i64* %ptr acquire, align 8 - %bc0 = bitcast i64 %load to double - %fadd = fadd double 1.000000e+00, %bc0 - %bc1 = bitcast double %fadd to i64 - store atomic i64 %bc1, i64* %ptr release, align 8 - ret void -} - -define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) { -; X64-LABEL: fadd_array: -; X64: # %bb.0: # %bb -; X64-NEXT: addsd (%rdi,%rsi,8), %xmm0 -; X64-NEXT: movsd %xmm0, (%rdi,%rsi,8) -; X64-NEXT: retq -; -; X32-LABEL: fadd_array: -; X32: # %bb.0: # %bb -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $24, %esp -; X32-NEXT: .cfi_offset %esi, -20 -; X32-NEXT: .cfi_offset %edi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 20(%ebp), %esi -; X32-NEXT: movl 8(%ebp), %edi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%edi,%esi,8) -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: fldl {{[0-9]+}}(%esp) -; X32-NEXT: faddl 12(%ebp) -; X32-NEXT: fstpl (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl (%edi,%esi,8), %eax -; X32-NEXT: movl 4(%edi,%esi,8), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB83_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%edi,%esi,8) -; X32-NEXT: jne .LBB83_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -12(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %edi -; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 -; X32-NEXT: retl -; Don't check x86-32 (see comment above). -bb: - %tmp4 = getelementptr inbounds i64, i64* %arg, i64 %arg2 - %tmp6 = load atomic i64, i64* %tmp4 monotonic, align 8 - %tmp7 = bitcast i64 %tmp6 to double - %tmp8 = fadd double %tmp7, %arg1 - %tmp9 = bitcast double %tmp8 to i64 - store atomic i64 %tmp9, i64* %tmp4 monotonic, align 8 - ret void -} -- 2.50.1