From 2d9526eb168cf34a76a315ea197e91be4435f868 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 20 Oct 2017 14:13:02 +0000 Subject: [PATCH] [X86][AVX512] Regenerate regcall tests. As part of tracking down machine verifier issues (PR27481) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316213 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx512-regcall-Mask.ll | 1041 ++++++++++++--- test/CodeGen/X86/avx512-regcall-NoMask.ll | 1390 +++++++++++++++------ 2 files changed, 1860 insertions(+), 571 deletions(-) diff --git a/test/CodeGen/X86/avx512-regcall-Mask.ll b/test/CodeGen/X86/avx512-regcall-Mask.ll index 36fe322d982..bb541f46567 100644 --- a/test/CodeGen/X86/avx512-regcall-Mask.ll +++ b/test/CodeGen/X86/avx512-regcall-Mask.ll @@ -1,72 +1,85 @@ -; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=X32 %s -; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=CHECK64 --check-prefix=WIN64 %s -; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=CHECK64 --check-prefix=LINUXOSX64 %s - -; X32-LABEL: test_argv64i1: -; X32: kmovd %edx, %k0 -; X32: kmovd %edi, %k1 -; X32: kmovd %eax, %k1 -; X32: kmovd %ecx, %k2 -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x -; X32: retl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64 +; Test regcall when receiving arguments of v64i1 type +define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12) { +; X32-LABEL: test_argv64i1: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $16, %esp +; X32-NEXT: kmovd %edx, %k0 +; X32-NEXT: kmovd %edi, %k1 +; X32-NEXT: kunpckdq %k0, %k1, %k0 +; X32-NEXT: kmovd %eax, %k1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kunpckdq %k1, %k2, %k1 +; X32-NEXT: kmovq %k1, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: kmovq %k0, (%esp) +; X32-NEXT: addl (%esp), %eax +; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: addl 8(%ebp), %eax +; X32-NEXT: adcl 12(%ebp), %ecx +; X32-NEXT: addl 16(%ebp), %eax +; X32-NEXT: adcl 20(%ebp), %ecx +; X32-NEXT: addl 24(%ebp), %eax +; X32-NEXT: adcl 28(%ebp), %ecx +; X32-NEXT: addl 32(%ebp), %eax +; X32-NEXT: adcl 36(%ebp), %ecx +; X32-NEXT: addl 40(%ebp), %eax +; X32-NEXT: adcl 44(%ebp), %ecx +; X32-NEXT: addl 48(%ebp), %eax +; X32-NEXT: adcl 52(%ebp), %ecx +; X32-NEXT: addl 56(%ebp), %eax +; X32-NEXT: adcl 60(%ebp), %ecx +; X32-NEXT: addl 64(%ebp), %eax +; X32-NEXT: adcl 68(%ebp), %ecx +; X32-NEXT: addl 72(%ebp), %eax +; X32-NEXT: adcl 76(%ebp), %ecx +; X32-NEXT: addl 80(%ebp), %eax +; X32-NEXT: adcl 84(%ebp), %ecx +; X32-NEXT: addl 88(%ebp), %eax +; X32-NEXT: adcl 92(%ebp), %ecx +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; ; WIN64-LABEL: test_argv64i1: -; WIN64: addq %rcx, %rax -; WIN64: addq %rdx, %rax -; WIN64: addq %rdi, %rax -; WIN64: addq %rsi, %rax -; WIN64: addq %r8, %rax -; WIN64: addq %r9, %rax -; WIN64: addq %r10, %rax -; WIN64: addq %r11, %rax -; WIN64: addq %r12, %rax -; WIN64: addq %r14, %rax -; WIN64: addq %r15, %rax -; WIN64: addq {{([0-9])*}}(%rsp), %rax -; WIN64: retq - +; WIN64: # BB#0: +; WIN64-NEXT: addq %rcx, %rax +; WIN64-NEXT: addq %rdx, %rax +; WIN64-NEXT: addq %rdi, %rax +; WIN64-NEXT: addq %rsi, %rax +; WIN64-NEXT: addq %r8, %rax +; WIN64-NEXT: addq %r9, %rax +; WIN64-NEXT: addq %r10, %rax +; WIN64-NEXT: addq %r11, %rax +; WIN64-NEXT: addq %r12, %rax +; WIN64-NEXT: addq %r14, %rax +; WIN64-NEXT: addq %r15, %rax +; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax +; WIN64-NEXT: retq +; ; LINUXOSX64-LABEL: test_argv64i1: -; LINUXOSX64: addq %rcx, %rax -; LINUXOSX64: addq %rdx, %rax -; LINUXOSX64: addq %rdi, %rax -; LINUXOSX64: addq %rsi, %rax -; LINUXOSX64: addq %r8, %rax -; LINUXOSX64: addq %r9, %rax -; LINUXOSX64: addq %r12, %rax -; LINUXOSX64: addq %r13, %rax -; LINUXOSX64: addq %r14, %rax -; LINUXOSX64: addq %r15, %rax -; LINUXOSX64: addq {{([0-9])*}}(%rsp), %rax -; LINUXOSX64: addq {{([0-9])*}}(%rsp), %rax -; LINUXOSX64: retq - -; Test regcall when receiving arguments of v64i1 type -define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, - <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, - <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, - <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, - <64 x i1> %x12) { +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: addq %rcx, %rax +; LINUXOSX64-NEXT: addq %rdx, %rax +; LINUXOSX64-NEXT: addq %rdi, %rax +; LINUXOSX64-NEXT: addq %rsi, %rax +; LINUXOSX64-NEXT: addq %r8, %rax +; LINUXOSX64-NEXT: addq %r9, %rax +; LINUXOSX64-NEXT: addq %r12, %rax +; LINUXOSX64-NEXT: addq %r13, %rax +; LINUXOSX64-NEXT: addq %r14, %rax +; LINUXOSX64-NEXT: addq %r15, %rax +; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax +; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax +; LINUXOSX64-NEXT: retq %y0 = bitcast <64 x i1> %x0 to i64 %y1 = bitcast <64 x i1> %x1 to i64 %y2 = bitcast <64 x i1> %x2 to i64 @@ -95,54 +108,114 @@ define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> ret i64 %add12 } -; X32-LABEL: caller_argv64i1: -; X32: pushl %edi -; X32: subl $88, %esp -; X32: vmovaps __xmm@00000001000000020000000100000002, %xmm0 # xmm0 = [2,1,2,1] -; X32: vmovups %xmm0, 64(%esp) -; X32: vmovaps LCPI1_1, %zmm0 # zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1] -; X32: vmovups %zmm0, (%esp) -; X32: movl $1, 84(%esp) -; X32: movl $2, 80(%esp) -; X32: movl $2, %eax -; X32: movl $1, %ecx -; X32: movl $2, %edx -; X32: movl $1, %edi -; X32: vzeroupper -; X32: calll _test_argv64i1 - -; WIN64-LABEL: caller_argv64i1: -; WIN64: movabsq $4294967298, %rax -; WIN64: movq %rax, (%rsp) -; WIN64: movq %rax, %rcx -; WIN64: movq %rax, %rdx -; WIN64: movq %rax, %rdi -; WIN64: movq %rax, %rsi -; WIN64: movq %rax, %r8 -; WIN64: movq %rax, %r9 -; WIN64: movq %rax, %r10 -; WIN64: movq %rax, %r11 -; WIN64: movq %rax, %r12 -; WIN64: movq %rax, %r14 -; WIN64: movq %rax, %r15 -; WIN64: callq test_argv64i1 - -; LINUXOSX64-LABEL: caller_argv64i1: -; LINUXOSX64: movabsq $4294967298, %rax -; LINUXOSX64: movq %rax, %rcx -; LINUXOSX64: movq %rax, %rdx -; LINUXOSX64: movq %rax, %rdi -; LINUXOSX64: movq %rax, %rsi -; LINUXOSX64: movq %rax, %r8 -; LINUXOSX64: movq %rax, %r9 -; LINUXOSX64: movq %rax, %r12 -; LINUXOSX64: movq %rax, %r13 -; LINUXOSX64: movq %rax, %r14 -; LINUXOSX64: movq %rax, %r15 -; LINUXOSX64: call{{.*}} test_argv64i1 - ; Test regcall when passing arguments of v64i1 type define i64 @caller_argv64i1() #0 { +; X32-LABEL: caller_argv64i1: +; X32: # BB#0: # %entry +; X32-NEXT: pushl %edi +; X32-NEXT: subl $88, %esp +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2,1,2,1] +; X32-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp) +; X32-NEXT: vmovaps {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1] +; X32-NEXT: vmovups %zmm0, (%esp) +; X32-NEXT: movl $1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $2, %eax +; X32-NEXT: movl $1, %ecx +; X32-NEXT: movl $2, %edx +; X32-NEXT: movl $1, %edi +; X32-NEXT: vzeroupper +; X32-NEXT: calll _test_argv64i1 +; X32-NEXT: movl %ecx, %edx +; X32-NEXT: addl $88, %esp +; X32-NEXT: popl %edi +; X32-NEXT: retl +; +; WIN64-LABEL: caller_argv64i1: +; WIN64: # BB#0: # %entry +; WIN64-NEXT: pushq %r15 +; WIN64-NEXT: .seh_pushreg 15 +; WIN64-NEXT: pushq %r14 +; WIN64-NEXT: .seh_pushreg 14 +; WIN64-NEXT: pushq %r12 +; WIN64-NEXT: .seh_pushreg 12 +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: .seh_pushreg 6 +; WIN64-NEXT: pushq %rdi +; WIN64-NEXT: .seh_pushreg 7 +; WIN64-NEXT: subq $48, %rsp +; WIN64-NEXT: .seh_stackalloc 48 +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 7, 32 +; WIN64-NEXT: vmovaps %xmm6, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 6, 16 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 +; WIN64-NEXT: movq %rax, (%rsp) +; WIN64-NEXT: movq %rax, %rcx +; WIN64-NEXT: movq %rax, %rdx +; WIN64-NEXT: movq %rax, %rdi +; WIN64-NEXT: movq %rax, %rsi +; WIN64-NEXT: movq %rax, %r8 +; WIN64-NEXT: movq %rax, %r9 +; WIN64-NEXT: movq %rax, %r10 +; WIN64-NEXT: movq %rax, %r11 +; WIN64-NEXT: movq %rax, %r12 +; WIN64-NEXT: movq %rax, %r14 +; WIN64-NEXT: movq %rax, %r15 +; WIN64-NEXT: callq test_argv64i1 +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: addq $48, %rsp +; WIN64-NEXT: popq %rdi +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: popq %r12 +; WIN64-NEXT: popq %r14 +; WIN64-NEXT: popq %r15 +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: caller_argv64i1: +; LINUXOSX64: # BB#0: # %entry +; LINUXOSX64-NEXT: pushq %r15 +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: pushq %r14 +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24 +; LINUXOSX64-NEXT: pushq %r13 +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 +; LINUXOSX64-NEXT: pushq %r12 +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 40 +; LINUXOSX64-NEXT: pushq %rax +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48 +; LINUXOSX64-NEXT: .cfi_offset %r12, -40 +; LINUXOSX64-NEXT: .cfi_offset %r13, -32 +; LINUXOSX64-NEXT: .cfi_offset %r14, -24 +; LINUXOSX64-NEXT: .cfi_offset %r15, -16 +; LINUXOSX64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 +; LINUXOSX64-NEXT: movq %rax, %rcx +; LINUXOSX64-NEXT: movq %rax, %rdx +; LINUXOSX64-NEXT: movq %rax, %rdi +; LINUXOSX64-NEXT: movq %rax, %rsi +; LINUXOSX64-NEXT: movq %rax, %r8 +; LINUXOSX64-NEXT: movq %rax, %r9 +; LINUXOSX64-NEXT: movq %rax, %r12 +; LINUXOSX64-NEXT: movq %rax, %r13 +; LINUXOSX64-NEXT: movq %rax, %r14 +; LINUXOSX64-NEXT: movq %rax, %r15 +; LINUXOSX64-NEXT: pushq %rax +; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8 +; LINUXOSX64-NEXT: pushq %rax +; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8 +; LINUXOSX64-NEXT: callq test_argv64i1 +; LINUXOSX64-NEXT: addq $24, %rsp +; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset -16 +; LINUXOSX64-NEXT: popq %r12 +; LINUXOSX64-NEXT: popq %r13 +; LINUXOSX64-NEXT: popq %r14 +; LINUXOSX64-NEXT: popq %r15 +; LINUXOSX64-NEXT: retq entry: %v0 = bitcast i64 4294967298 to <64 x i1> %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, @@ -153,83 +226,294 @@ entry: ret i64 %call } -; X32-LABEL: test_retv64i1: -; X32: mov{{.*}} $2, %eax -; X32: mov{{.*}} $1, %ecx -; X32: ret{{.*}} - -; CHECK64-LABEL: test_retv64i1: -; CHECK64: mov{{.*}} $4294967298, %rax -; CHECK64: ret{{.*}} - ; Test regcall when returning v64i1 type define x86_regcallcc <64 x i1> @test_retv64i1() { +; X32-LABEL: test_retv64i1: +; X32: # BB#0: +; X32-NEXT: movl $2, %eax +; X32-NEXT: movl $1, %ecx +; X32-NEXT: retl +; +; CHECK64-LABEL: test_retv64i1: +; CHECK64: # BB#0: +; CHECK64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 +; CHECK64-NEXT: retq %a = bitcast i64 4294967298 to <64 x i1> ret <64 x i1> %a } -; X32-LABEL: caller_retv64i1: -; X32: call{{.*}} _test_retv64i1 -; X32: kmov{{.*}} %eax, %k0 -; X32: kmov{{.*}} %ecx, %k1 -; X32: kunpckdq %k0, %k1, %k0 - -; CHECK64-LABEL: caller_retv64i1: -; CHECK64: call{{.*}} {{_*}}test_retv64i1 -; CHECK64: kmovq %rax, %k0 -; CHECK64: ret{{.*}} - ; Test regcall when processing result of v64i1 type define <64 x i1> @caller_retv64i1() #0 { +; X32-LABEL: caller_retv64i1: +; X32: # BB#0: # %entry +; X32-NEXT: calll _test_retv64i1 +; X32-NEXT: kmovd %eax, %k0 +; X32-NEXT: kmovd %ecx, %k1 +; X32-NEXT: kunpckdq %k0, %k1, %k0 +; X32-NEXT: vpmovm2b %k0, %zmm0 +; X32-NEXT: retl +; +; WIN64-LABEL: caller_retv64i1: +; WIN64: # BB#0: # %entry +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: .seh_pushreg 6 +; WIN64-NEXT: pushq %rdi +; WIN64-NEXT: .seh_pushreg 7 +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: .seh_stackalloc 40 +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 7, 16 +; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 6, 0 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: callq test_retv64i1 +; WIN64-NEXT: kmovq %rax, %k0 +; WIN64-NEXT: vpmovm2b %k0, %zmm0 +; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: popq %rdi +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: caller_retv64i1: +; LINUXOSX64: # BB#0: # %entry +; LINUXOSX64-NEXT: pushq %rax +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: callq test_retv64i1 +; LINUXOSX64-NEXT: kmovq %rax, %k0 +; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm0 +; LINUXOSX64-NEXT: popq %rax +; LINUXOSX64-NEXT: retq entry: %call = call x86_regcallcc <64 x i1> @test_retv64i1() ret <64 x i1> %call } -; CHECK-LABEL: test_argv32i1: -; CHECK: kmovd %edx, %k{{[0-9]+}} -; CHECK: kmovd %ecx, %k{{[0-9]+}} -; CHECK: kmovd %eax, %k{{[0-9]+}} -; CHECK: ret{{l|q}} - ; Test regcall when receiving arguments of v32i1 type declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) { +; X32-LABEL: test_argv32i1: +; X32: # BB#0: # %entry +; X32-NEXT: pushl %esp +; X32-NEXT: subl $72, %esp +; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill +; X32-NEXT: kmovd %edx, %k0 +; X32-NEXT: kmovd %ecx, %k1 +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: vpmovm2b %k2, %zmm0 +; X32-NEXT: vpmovm2b %k1, %zmm1 +; X32-NEXT: vpmovm2b %k0, %zmm2 +; X32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; X32-NEXT: # kill: %YMM1 %YMM1 %ZMM1 +; X32-NEXT: # kill: %YMM2 %YMM2 %ZMM2 +; X32-NEXT: calll _test_argv32i1helper +; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload +; X32-NEXT: addl $72, %esp +; X32-NEXT: popl %esp +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; WIN64-LABEL: test_argv32i1: +; WIN64: # BB#0: # %entry +; WIN64-NEXT: pushq %r11 +; WIN64-NEXT: .seh_pushreg 11 +; WIN64-NEXT: pushq %r10 +; WIN64-NEXT: .seh_pushreg 10 +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: subq $32, %rsp +; WIN64-NEXT: .seh_stackalloc 32 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: kmovd %edx, %k0 +; WIN64-NEXT: kmovd %ecx, %k1 +; WIN64-NEXT: kmovd %eax, %k2 +; WIN64-NEXT: vpmovm2b %k2, %zmm0 +; WIN64-NEXT: vpmovm2b %k1, %zmm1 +; WIN64-NEXT: vpmovm2b %k0, %zmm2 +; WIN64-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; WIN64-NEXT: # kill: %YMM1 %YMM1 %ZMM1 +; WIN64-NEXT: # kill: %YMM2 %YMM2 %ZMM2 +; WIN64-NEXT: callq test_argv32i1helper +; WIN64-NEXT: nop +; WIN64-NEXT: addq $32, %rsp +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %r10 +; WIN64-NEXT: popq %r11 +; WIN64-NEXT: vzeroupper +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_argv32i1: +; LINUXOSX64: # BB#0: # %entry +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: subq $128, %rsp +; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 +; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 +; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 +; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 +; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 +; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 +; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 +; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 +; LINUXOSX64-NEXT: kmovd %edx, %k0 +; LINUXOSX64-NEXT: kmovd %ecx, %k1 +; LINUXOSX64-NEXT: kmovd %eax, %k2 +; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0 +; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1 +; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2 +; LINUXOSX64-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; LINUXOSX64-NEXT: # kill: %YMM1 %YMM1 %ZMM1 +; LINUXOSX64-NEXT: # kill: %YMM2 %YMM2 %ZMM2 +; LINUXOSX64-NEXT: callq test_argv32i1helper +; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload +; LINUXOSX64-NEXT: addq $128, %rsp +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: vzeroupper +; LINUXOSX64-NEXT: retq entry: %res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) ret i32 %res } -; CHECK-LABEL: caller_argv32i1: -; CHECK: mov{{.*}} $1, %eax -; CHECK: mov{{.*}} $1, %ecx -; CHECK: mov{{.*}} $1, %edx -; CHECK: call{{.*}} {{_*}}test_argv32i1 - ; Test regcall when passing arguments of v32i1 type define i32 @caller_argv32i1() #0 { +; X32-LABEL: caller_argv32i1: +; X32: # BB#0: # %entry +; X32-NEXT: movl $1, %eax +; X32-NEXT: movl $1, %ecx +; X32-NEXT: movl $1, %edx +; X32-NEXT: calll _test_argv32i1 +; X32-NEXT: retl +; +; WIN64-LABEL: caller_argv32i1: +; WIN64: # BB#0: # %entry +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: .seh_pushreg 6 +; WIN64-NEXT: pushq %rdi +; WIN64-NEXT: .seh_pushreg 7 +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: .seh_stackalloc 40 +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 7, 16 +; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 6, 0 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: movl $1, %eax +; WIN64-NEXT: movl $1, %ecx +; WIN64-NEXT: movl $1, %edx +; WIN64-NEXT: callq test_argv32i1 +; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: popq %rdi +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: caller_argv32i1: +; LINUXOSX64: # BB#0: # %entry +; LINUXOSX64-NEXT: pushq %rax +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: movl $1, %eax +; LINUXOSX64-NEXT: movl $1, %ecx +; LINUXOSX64-NEXT: movl $1, %edx +; LINUXOSX64-NEXT: callq test_argv32i1 +; LINUXOSX64-NEXT: popq %rcx +; LINUXOSX64-NEXT: retq entry: %v0 = bitcast i32 1 to <32 x i1> %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0) ret i32 %call } -; CHECK-LABEL: test_retv32i1: -; CHECK: movl $1, %eax -; CHECK: ret{{l|q}} - ; Test regcall when returning v32i1 type define x86_regcallcc <32 x i1> @test_retv32i1() { +; X32-LABEL: test_retv32i1: +; X32: # BB#0: +; X32-NEXT: movl $1, %eax +; X32-NEXT: retl +; +; CHECK64-LABEL: test_retv32i1: +; CHECK64: # BB#0: +; CHECK64-NEXT: movl $1, %eax +; CHECK64-NEXT: retq %a = bitcast i32 1 to <32 x i1> ret <32 x i1> %a } -; CHECK-LABEL: caller_retv32i1: -; CHECK: call{{.*}} {{_*}}test_retv32i1 -; CHECK: incl %eax - ; Test regcall when processing result of v32i1 type define i32 @caller_retv32i1() #0 { +; X32-LABEL: caller_retv32i1: +; X32: # BB#0: # %entry +; X32-NEXT: calll _test_retv32i1 +; X32-NEXT: incl %eax +; X32-NEXT: retl +; +; WIN64-LABEL: caller_retv32i1: +; WIN64: # BB#0: # %entry +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: .seh_pushreg 6 +; WIN64-NEXT: pushq %rdi +; WIN64-NEXT: .seh_pushreg 7 +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: .seh_stackalloc 40 +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 7, 16 +; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 6, 0 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: callq test_retv32i1 +; WIN64-NEXT: incl %eax +; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: popq %rdi +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: caller_retv32i1: +; LINUXOSX64: # BB#0: # %entry +; LINUXOSX64-NEXT: pushq %rax +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: callq test_retv32i1 +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: popq %rcx +; LINUXOSX64-NEXT: retq entry: %call = call x86_regcallcc <32 x i1> @test_retv32i1() %c = bitcast <32 x i1> %call to i32 @@ -237,49 +521,232 @@ entry: ret i32 %add } -; CHECK-LABEL: test_argv16i1: -; CHECK: kmovd %edx, %k{{[0-9]+}} -; CHECK: kmovd %ecx, %k{{[0-9]+}} -; CHECK: kmovd %eax, %k{{[0-9]+}} -; CHECK: ret{{l|q}} - ; Test regcall when receiving arguments of v16i1 type declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) { +; X32-LABEL: test_argv16i1: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: subl $72, %esp +; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill +; X32-NEXT: kmovd %edx, %k0 +; X32-NEXT: kmovd %ecx, %k1 +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: vpmovm2b %k2, %zmm0 +; X32-NEXT: vpmovm2b %k1, %zmm1 +; X32-NEXT: vpmovm2b %k0, %zmm2 +; X32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; X32-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; X32-NEXT: # kill: %XMM2 %XMM2 %ZMM2 +; X32-NEXT: vzeroupper +; X32-NEXT: calll _test_argv16i1helper +; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload +; X32-NEXT: addl $72, %esp +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_argv16i1: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %r11 +; WIN64-NEXT: .seh_pushreg 11 +; WIN64-NEXT: pushq %r10 +; WIN64-NEXT: .seh_pushreg 10 +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: subq $32, %rsp +; WIN64-NEXT: .seh_stackalloc 32 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: kmovd %edx, %k0 +; WIN64-NEXT: kmovd %ecx, %k1 +; WIN64-NEXT: kmovd %eax, %k2 +; WIN64-NEXT: vpmovm2b %k2, %zmm0 +; WIN64-NEXT: vpmovm2b %k1, %zmm1 +; WIN64-NEXT: vpmovm2b %k0, %zmm2 +; WIN64-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; WIN64-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; WIN64-NEXT: # kill: %XMM2 %XMM2 %ZMM2 +; WIN64-NEXT: vzeroupper +; WIN64-NEXT: callq test_argv16i1helper +; WIN64-NEXT: nop +; WIN64-NEXT: addq $32, %rsp +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %r10 +; WIN64-NEXT: popq %r11 +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_argv16i1: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: subq $128, %rsp +; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 +; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 +; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 +; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 +; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 +; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 +; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 +; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 +; LINUXOSX64-NEXT: kmovd %edx, %k0 +; LINUXOSX64-NEXT: kmovd %ecx, %k1 +; LINUXOSX64-NEXT: kmovd %eax, %k2 +; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0 +; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1 +; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2 +; LINUXOSX64-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; LINUXOSX64-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; LINUXOSX64-NEXT: # kill: %XMM2 %XMM2 %ZMM2 +; LINUXOSX64-NEXT: vzeroupper +; LINUXOSX64-NEXT: callq test_argv16i1helper +; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload +; LINUXOSX64-NEXT: addq $128, %rsp +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) ret i16 %res } -; CHECK-LABEL: caller_argv16i1: -; CHECK: movl $1, %eax -; CHECK: movl $1, %ecx -; CHECK: movl $1, %edx -; CHECK: call{{l|q}} {{_*}}test_argv16i1 - ; Test regcall when passing arguments of v16i1 type define i16 @caller_argv16i1() #0 { +; X32-LABEL: caller_argv16i1: +; X32: # BB#0: # %entry +; X32-NEXT: movl $1, %eax +; X32-NEXT: movl $1, %ecx +; X32-NEXT: movl $1, %edx +; X32-NEXT: calll _test_argv16i1 +; X32-NEXT: retl +; +; WIN64-LABEL: caller_argv16i1: +; WIN64: # BB#0: # %entry +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: .seh_pushreg 6 +; WIN64-NEXT: pushq %rdi +; WIN64-NEXT: .seh_pushreg 7 +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: .seh_stackalloc 40 +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 7, 16 +; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 6, 0 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: movl $1, %eax +; WIN64-NEXT: movl $1, %ecx +; WIN64-NEXT: movl $1, %edx +; WIN64-NEXT: callq test_argv16i1 +; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: popq %rdi +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: caller_argv16i1: +; LINUXOSX64: # BB#0: # %entry +; LINUXOSX64-NEXT: pushq %rax +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: movl $1, %eax +; LINUXOSX64-NEXT: movl $1, %ecx +; LINUXOSX64-NEXT: movl $1, %edx +; LINUXOSX64-NEXT: callq test_argv16i1 +; LINUXOSX64-NEXT: popq %rcx +; LINUXOSX64-NEXT: retq entry: %v0 = bitcast i16 1 to <16 x i1> %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0) ret i16 %call } -; CHECK-LABEL: test_retv16i1: -; CHECK: movw $1, %ax -; CHECK: ret{{l|q}} - ; Test regcall when returning v16i1 type define x86_regcallcc <16 x i1> @test_retv16i1() { +; X32-LABEL: test_retv16i1: +; X32: # BB#0: +; X32-NEXT: movw $1, %ax +; X32-NEXT: retl +; +; CHECK64-LABEL: test_retv16i1: +; CHECK64: # BB#0: +; CHECK64-NEXT: movw $1, %ax +; CHECK64-NEXT: retq %a = bitcast i16 1 to <16 x i1> ret <16 x i1> %a } -; CHECK-LABEL: caller_retv16i1: -; CHECK: call{{l|q}} {{_*}}test_retv16i1 -; CHECK: incl %eax - ; Test regcall when processing result of v16i1 type define i16 @caller_retv16i1() #0 { +; X32-LABEL: caller_retv16i1: +; X32: # BB#0: # %entry +; X32-NEXT: calll _test_retv16i1 +; X32-NEXT: # kill: %AX %AX %EAX +; X32-NEXT: incl %eax +; X32-NEXT: # kill: %AX %AX %EAX +; X32-NEXT: retl +; +; WIN64-LABEL: caller_retv16i1: +; WIN64: # BB#0: # %entry +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: .seh_pushreg 6 +; WIN64-NEXT: pushq %rdi +; WIN64-NEXT: .seh_pushreg 7 +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: .seh_stackalloc 40 +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 7, 16 +; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 6, 0 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: callq test_retv16i1 +; WIN64-NEXT: # kill: %AX %AX %EAX +; WIN64-NEXT: incl %eax +; WIN64-NEXT: # kill: %AX %AX %EAX +; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: popq %rdi +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: caller_retv16i1: +; LINUXOSX64: # BB#0: # %entry +; LINUXOSX64-NEXT: pushq %rax +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: callq test_retv16i1 +; LINUXOSX64-NEXT: # kill: %AX %AX %EAX +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: # kill: %AX %AX %EAX +; LINUXOSX64-NEXT: popq %rcx +; LINUXOSX64-NEXT: retq entry: %call = call x86_regcallcc <16 x i1> @test_retv16i1() %c = bitcast <16 x i1> %call to i16 @@ -287,50 +754,238 @@ entry: ret i16 %add } -; CHECK-LABEL: test_argv8i1: -; CHECK: kmovd %edx, %k{{[0-9]+}} -; CHECK: kmovd %ecx, %k{{[0-9]+}} -; CHECK: kmovd %eax, %k{{[0-9]+}} -; CHECK: ret{{l|q}} - ; Test regcall when receiving arguments of v8i1 type declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) { +; X32-LABEL: test_argv8i1: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: subl $72, %esp +; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill +; X32-NEXT: kmovd %edx, %k0 +; X32-NEXT: kmovd %ecx, %k1 +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: vpmovm2w %k2, %zmm0 +; X32-NEXT: vpmovm2w %k1, %zmm1 +; X32-NEXT: vpmovm2w %k0, %zmm2 +; X32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; X32-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; X32-NEXT: # kill: %XMM2 %XMM2 %ZMM2 +; X32-NEXT: vzeroupper +; X32-NEXT: calll _test_argv8i1helper +; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload +; X32-NEXT: addl $72, %esp +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_argv8i1: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %r11 +; WIN64-NEXT: .seh_pushreg 11 +; WIN64-NEXT: pushq %r10 +; WIN64-NEXT: .seh_pushreg 10 +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: subq $32, %rsp +; WIN64-NEXT: .seh_stackalloc 32 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: kmovd %edx, %k0 +; WIN64-NEXT: kmovd %ecx, %k1 +; WIN64-NEXT: kmovd %eax, %k2 +; WIN64-NEXT: vpmovm2w %k2, %zmm0 +; WIN64-NEXT: vpmovm2w %k1, %zmm1 +; WIN64-NEXT: vpmovm2w %k0, %zmm2 +; WIN64-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; WIN64-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; WIN64-NEXT: # kill: %XMM2 %XMM2 %ZMM2 +; WIN64-NEXT: vzeroupper +; WIN64-NEXT: callq test_argv8i1helper +; WIN64-NEXT: nop +; WIN64-NEXT: addq $32, %rsp +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %r10 +; WIN64-NEXT: popq %r11 +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_argv8i1: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: subq $128, %rsp +; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 +; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 +; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 +; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 +; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 +; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 +; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 +; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 +; LINUXOSX64-NEXT: kmovd %edx, %k0 +; LINUXOSX64-NEXT: kmovd %ecx, %k1 +; LINUXOSX64-NEXT: kmovd %eax, %k2 +; LINUXOSX64-NEXT: vpmovm2w %k2, %zmm0 +; LINUXOSX64-NEXT: vpmovm2w %k1, %zmm1 +; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm2 +; LINUXOSX64-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; LINUXOSX64-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; LINUXOSX64-NEXT: # kill: %XMM2 %XMM2 %ZMM2 +; LINUXOSX64-NEXT: vzeroupper +; LINUXOSX64-NEXT: callq test_argv8i1helper +; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload +; LINUXOSX64-NEXT: addq $128, %rsp +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) ret i8 %res } -; CHECK-LABEL: caller_argv8i1: -; CHECK: movl $1, %eax -; CHECK: movl $1, %ecx -; CHECK: movl $1, %edx -; CHECK: call{{l|q}} {{_*}}test_argv8i1 - ; Test regcall when passing arguments of v8i1 type define i8 @caller_argv8i1() #0 { +; X32-LABEL: caller_argv8i1: +; X32: # BB#0: # %entry +; X32-NEXT: movl $1, %eax +; X32-NEXT: movl $1, %ecx +; X32-NEXT: movl $1, %edx +; X32-NEXT: calll _test_argv8i1 +; X32-NEXT: retl +; +; WIN64-LABEL: caller_argv8i1: +; WIN64: # BB#0: # %entry +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: .seh_pushreg 6 +; WIN64-NEXT: pushq %rdi +; WIN64-NEXT: .seh_pushreg 7 +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: .seh_stackalloc 40 +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 7, 16 +; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 6, 0 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: movl $1, %eax +; WIN64-NEXT: movl $1, %ecx +; WIN64-NEXT: movl $1, %edx +; WIN64-NEXT: callq test_argv8i1 +; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: popq %rdi +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: caller_argv8i1: +; LINUXOSX64: # BB#0: # %entry +; LINUXOSX64-NEXT: pushq %rax +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: movl $1, %eax +; LINUXOSX64-NEXT: movl $1, %ecx +; LINUXOSX64-NEXT: movl $1, %edx +; LINUXOSX64-NEXT: callq test_argv8i1 +; LINUXOSX64-NEXT: popq %rcx +; LINUXOSX64-NEXT: retq entry: %v0 = bitcast i8 1 to <8 x i1> %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0) ret i8 %call } -; CHECK-LABEL: test_retv8i1: -; CHECK: movb $1, %al -; CHECK: ret{{q|l}} - ; Test regcall when returning v8i1 type define x86_regcallcc <8 x i1> @test_retv8i1() { +; X32-LABEL: test_retv8i1: +; X32: # BB#0: +; X32-NEXT: movb $1, %al +; X32-NEXT: retl +; +; CHECK64-LABEL: test_retv8i1: +; CHECK64: # BB#0: +; CHECK64-NEXT: movb $1, %al +; CHECK64-NEXT: retq %a = bitcast i8 1 to <8 x i1> ret <8 x i1> %a } -; CHECK-LABEL: caller_retv8i1: -; CHECK: call{{l|q}} {{_*}}test_retv8i1 -; CHECK: kmovd %eax, %k{{[0-9]+}} -; CHECK: ret{{l|q}} - ; Test regcall when processing result of v8i1 type define <8 x i1> @caller_retv8i1() #0 { +; X32-LABEL: caller_retv8i1: +; X32: # BB#0: # %entry +; X32-NEXT: calll _test_retv8i1 +; X32-NEXT: # kill: %AL %AL %EAX +; X32-NEXT: kmovd %eax, %k0 +; X32-NEXT: vpmovm2w %k0, %zmm0 +; X32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; WIN64-LABEL: caller_retv8i1: +; WIN64: # BB#0: # %entry +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: .seh_pushreg 6 +; WIN64-NEXT: pushq %rdi +; WIN64-NEXT: .seh_pushreg 7 +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: .seh_stackalloc 40 +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 7, 16 +; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 6, 0 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: callq test_retv8i1 +; WIN64-NEXT: # kill: %AL %AL %EAX +; WIN64-NEXT: kmovd %eax, %k0 +; WIN64-NEXT: vpmovm2w %k0, %zmm0 +; WIN64-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: popq %rdi +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: vzeroupper +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: caller_retv8i1: +; LINUXOSX64: # BB#0: # %entry +; LINUXOSX64-NEXT: pushq %rax +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: callq test_retv8i1 +; LINUXOSX64-NEXT: # kill: %AL %AL %EAX +; LINUXOSX64-NEXT: kmovd %eax, %k0 +; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm0 +; LINUXOSX64-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; LINUXOSX64-NEXT: popq %rax +; LINUXOSX64-NEXT: vzeroupper +; LINUXOSX64-NEXT: retq entry: %call = call x86_regcallcc <8 x i1> @test_retv8i1() ret <8 x i1> %call diff --git a/test/CodeGen/X86/avx512-regcall-NoMask.ll b/test/CodeGen/X86/avx512-regcall-NoMask.ll index f43d5b3e11d..43a1871245b 100644 --- a/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ b/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -1,307 +1,617 @@ -; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck --check-prefix=ALL --check-prefix=X32 %s -; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck --check-prefix=ALL --check-prefix=WIN64 %s -; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck --check-prefix=LINUXOSX64 %s - -; ALL-LABEL: test_argReti1: -; ALL: incb %al -; ALL: ret{{.*}} +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck %s --check-prefix=WIN64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck %s --check-prefix=LINUXOSX64 ; Test regcall when receiving/returning i1 define x86_regcallcc i1 @test_argReti1(i1 %a) { +; X32-LABEL: test_argReti1: +; X32: # BB#0: +; X32-NEXT: incb %al +; X32-NEXT: # kill: %AL %AL %EAX +; X32-NEXT: retl +; +; WIN64-LABEL: test_argReti1: +; WIN64: # BB#0: +; WIN64-NEXT: incb %al +; WIN64-NEXT: # kill: %AL %AL %EAX +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argReti1: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: incb %al +; LINUXOSX64-NEXT: # kill: %AL %AL %EAX +; LINUXOSX64-NEXT: retq %add = add i1 %a, 1 ret i1 %add } -; ALL-LABEL: test_CallargReti1: -; ALL: movzbl %al, %eax -; ALL: call{{.*}}test_argReti1 -; ALL: incb %al -; ALL: ret{{.*}} - ; Test regcall when passing/retrieving i1 define x86_regcallcc i1 @test_CallargReti1(i1 %a) { +; X32-LABEL: test_CallargReti1: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: incb %al +; X32-NEXT: movzbl %al, %eax +; X32-NEXT: calll _test_argReti1 +; X32-NEXT: incb %al +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargReti1: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: incb %al +; WIN64-NEXT: movzbl %al, %eax +; WIN64-NEXT: callq test_argReti1 +; WIN64-NEXT: incb %al +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargReti1: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: incb %al +; LINUXOSX64-NEXT: movzbl %al, %eax +; LINUXOSX64-NEXT: callq test_argReti1 +; LINUXOSX64-NEXT: incb %al +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = add i1 %a, 1 %c = call x86_regcallcc i1 @test_argReti1(i1 %b) %d = add i1 %c, 1 ret i1 %d } -; X32-LABEL: test_argReti8: -; X32: incb %al -; X32: ret{{.*}} - -; WIN64-LABEL: test_argReti8: -; WIN64: incb %al -; WIN64: ret{{.*}} - ; Test regcall when receiving/returning i8 define x86_regcallcc i8 @test_argReti8(i8 %a) { +; X32-LABEL: test_argReti8: +; X32: # BB#0: +; X32-NEXT: incb %al +; X32-NEXT: # kill: %AL %AL %EAX +; X32-NEXT: retl +; +; WIN64-LABEL: test_argReti8: +; WIN64: # BB#0: +; WIN64-NEXT: incb %al +; WIN64-NEXT: # kill: %AL %AL %EAX +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argReti8: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: incb %al +; LINUXOSX64-NEXT: # kill: %AL %AL %EAX +; LINUXOSX64-NEXT: retq %add = add i8 %a, 1 ret i8 %add } -; X32-LABEL: test_CallargReti8: -; X32: incb %al -; X32: call{{.*}} {{.*}}test_argReti8 -; X32: incb %al -; X32: ret{{.*}} - -; WIN64-LABEL: test_CallargReti8: -; WIN64: incb %al -; WIN64: call{{.*}} {{.*}}test_argReti8 -; WIN64: incb %al -; WIN64: ret{{.*}} - ; Test regcall when passing/retrieving i8 define x86_regcallcc i8 @test_CallargReti8(i8 %a) { +; X32-LABEL: test_CallargReti8: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: incb %al +; X32-NEXT: movzbl %al, %eax +; X32-NEXT: calll _test_argReti8 +; X32-NEXT: incb %al +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargReti8: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: incb %al +; WIN64-NEXT: movzbl %al, %eax +; WIN64-NEXT: callq test_argReti8 +; WIN64-NEXT: incb %al +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargReti8: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: incb %al +; LINUXOSX64-NEXT: movzbl %al, %eax +; LINUXOSX64-NEXT: callq test_argReti8 +; LINUXOSX64-NEXT: incb %al +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = add i8 %a, 1 %c = call x86_regcallcc i8 @test_argReti8(i8 %b) %d = add i8 %c, 1 ret i8 %d } -; X32-LABEL: test_argReti16: -; X32: incl %eax -; X32: ret{{.*}} - -; WIN64-LABEL: test_argReti16: -; WIN64: incl %eax -; WIN64: ret{{.*}} - ; Test regcall when receiving/returning i16 define x86_regcallcc i16 @test_argReti16(i16 %a) { +; X32-LABEL: test_argReti16: +; X32: # BB#0: +; X32-NEXT: incl %eax +; X32-NEXT: # kill: %AX %AX %EAX +; X32-NEXT: retl +; +; WIN64-LABEL: test_argReti16: +; WIN64: # BB#0: +; WIN64-NEXT: incl %eax +; WIN64-NEXT: # kill: %AX %AX %EAX +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argReti16: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: # kill: %AX %AX %EAX +; LINUXOSX64-NEXT: retq %add = add i16 %a, 1 ret i16 %add } -; X32-LABEL: test_CallargReti16: -; X32: incl %eax -; X32: call{{.*}} {{.*}}test_argReti16 -; X32: incl %eax -; X32: ret{{.*}} - -; WIN64-LABEL: test_CallargReti16: -; WIN64: incl %eax -; WIN64: call{{.*}} {{.*}}test_argReti16 -; WIN64: incl %eax -; WIN64: ret{{.*}} - ; Test regcall when passing/retrieving i16 define x86_regcallcc i16 @test_CallargReti16(i16 %a) { +; X32-LABEL: test_CallargReti16: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: incl %eax +; X32-NEXT: calll _test_argReti16 +; X32-NEXT: # kill: %AX %AX %EAX +; X32-NEXT: incl %eax +; X32-NEXT: # kill: %AX %AX %EAX +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargReti16: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: incl %eax +; WIN64-NEXT: callq test_argReti16 +; WIN64-NEXT: # kill: %AX %AX %EAX +; WIN64-NEXT: incl %eax +; WIN64-NEXT: # kill: %AX %AX %EAX +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargReti16: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: callq test_argReti16 +; LINUXOSX64-NEXT: # kill: %AX %AX %EAX +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: # kill: %AX %AX %EAX +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = add i16 %a, 1 %c = call x86_regcallcc i16 @test_argReti16(i16 %b) %d = add i16 %c, 1 ret i16 %d } -; X32-LABEL: test_argReti32: -; X32: incl %eax -; X32: ret{{.*}} - -; WIN64-LABEL: test_argReti32: -; WIN64: incl %eax -; WIN64: ret{{.*}} - ; Test regcall when receiving/returning i32 define x86_regcallcc i32 @test_argReti32(i32 %a) { +; X32-LABEL: test_argReti32: +; X32: # BB#0: +; X32-NEXT: incl %eax +; X32-NEXT: retl +; +; WIN64-LABEL: test_argReti32: +; WIN64: # BB#0: +; WIN64-NEXT: incl %eax +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argReti32: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: retq %add = add i32 %a, 1 ret i32 %add } -; X32-LABEL: test_CallargReti32: -; X32: incl %eax -; X32: call{{.*}} {{.*}}test_argReti32 -; X32: incl %eax -; X32: ret{{.*}} - -; WIN64-LABEL: test_CallargReti32: -; WIN64: incl %eax -; WIN64: call{{.*}} {{.*}}test_argReti32 -; WIN64: incl %eax -; WIN64: ret{{.*}} - ; Test regcall when passing/retrieving i32 define x86_regcallcc i32 @test_CallargReti32(i32 %a) { +; X32-LABEL: test_CallargReti32: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: incl %eax +; X32-NEXT: calll _test_argReti32 +; X32-NEXT: incl %eax +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargReti32: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: incl %eax +; WIN64-NEXT: callq test_argReti32 +; WIN64-NEXT: incl %eax +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargReti32: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: callq test_argReti32 +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = add i32 %a, 1 %c = call x86_regcallcc i32 @test_argReti32(i32 %b) %d = add i32 %c, 1 ret i32 %d } -; X32-LABEL: test_argReti64: -; X32: addl $3, %eax -; X32: adcl $1, %ecx -; X32: ret{{.*}} - -; WIN64-LABEL: test_argReti64: -; WIN64: movabsq $4294967299, %r{{.*}} -; WIN64: addq %r{{.*}}, %rax -; WIN64: ret{{.*}} - ; Test regcall when receiving/returning i64 define x86_regcallcc i64 @test_argReti64(i64 %a) { +; X32-LABEL: test_argReti64: +; X32: # BB#0: +; X32-NEXT: addl $3, %eax +; X32-NEXT: adcl $1, %ecx +; X32-NEXT: retl +; +; WIN64-LABEL: test_argReti64: +; WIN64: # BB#0: +; WIN64-NEXT: movabsq $4294967299, %rcx # imm = 0x100000003 +; WIN64-NEXT: addq %rcx, %rax +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argReti64: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: movabsq $4294967299, %rcx # imm = 0x100000003 +; LINUXOSX64-NEXT: addq %rcx, %rax +; LINUXOSX64-NEXT: retq %add = add i64 %a, 4294967299 ret i64 %add } -; X32-LABEL: test_CallargReti64: -; X32: add{{.*}} $1, %eax -; X32: adcl $0, {{%e(cx|dx|si|di|bx|bp)}} -; X32: call{{.*}} {{.*}}test_argReti64 -; X32: add{{.*}} $1, %eax -; X32: adcl $0, {{%e(cx|dx|si|di|bx|bp)}} -; X32: ret{{.*}} - -; WIN64-LABEL: test_CallargReti64: -; WIN64: incq %rax -; WIN64: call{{.*}} {{.*}}test_argReti64 -; WIN64: incq %rax -; WIN64: ret{{.*}} - ; Test regcall when passing/retrieving i64 define x86_regcallcc i64 @test_CallargReti64(i64 %a) { +; X32-LABEL: test_CallargReti64: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: addl $1, %eax +; X32-NEXT: adcl $0, %ecx +; X32-NEXT: calll _test_argReti64 +; X32-NEXT: addl $1, %eax +; X32-NEXT: adcl $0, %ecx +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargReti64: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: incq %rax +; WIN64-NEXT: callq test_argReti64 +; WIN64-NEXT: incq %rax +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargReti64: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: incq %rax +; LINUXOSX64-NEXT: callq test_argReti64 +; LINUXOSX64-NEXT: incq %rax +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = add i64 %a, 1 %c = call x86_regcallcc i64 @test_argReti64(i64 %b) %d = add i64 %c, 1 ret i64 %d } -; X32-LABEL: test_argRetFloat: -; X32: vadd{{.*}} {{.*}}, %xmm0 -; X32: ret{{.*}} - -; WIN64-LABEL: test_argRetFloat: -; WIN64: vadd{{.*}} {{.*}}, %xmm0 -; WIN64: ret{{.*}} - ; Test regcall when receiving/returning float define x86_regcallcc float @test_argRetFloat(float %a) { +; X32-LABEL: test_argRetFloat: +; X32: # BB#0: +; X32-NEXT: vaddss __real@3f800000, %xmm0, %xmm0 +; X32-NEXT: retl +; +; WIN64-LABEL: test_argRetFloat: +; WIN64: # BB#0: +; WIN64-NEXT: vaddss __real@{{.*}}(%rip), %xmm0, %xmm0 +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argRetFloat: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 +; LINUXOSX64-NEXT: retq %add = fadd float 1.0, %a ret float %add } -; X32-LABEL: test_CallargRetFloat: -; X32: vadd{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0 -; X32: call{{.*}} {{.*}}test_argRetFloat -; X32: vadd{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0 -; X32: ret{{.*}} - -; WIN64-LABEL: test_CallargRetFloat: -; WIN64: vadd{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0 -; WIN64: call{{.*}} {{.*}}test_argRetFloat -; WIN64: vadd{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0 -; WIN64: ret{{.*}} - ; Test regcall when passing/retrieving float define x86_regcallcc float @test_CallargRetFloat(float %a) { +; X32-LABEL: test_CallargRetFloat: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: subl $24, %esp +; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill +; X32-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; X32-NEXT: vaddss %xmm4, %xmm0, %xmm0 +; X32-NEXT: calll _test_argRetFloat +; X32-NEXT: vaddss %xmm4, %xmm0, %xmm0 +; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload +; X32-NEXT: addl $24, %esp +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargRetFloat: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: subq $16, %rsp +; WIN64-NEXT: .seh_stackalloc 16 +; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 8, 0 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; WIN64-NEXT: vaddss %xmm8, %xmm0, %xmm0 +; WIN64-NEXT: callq test_argRetFloat +; WIN64-NEXT: vaddss %xmm8, %xmm0, %xmm0 +; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload +; WIN64-NEXT: addq $16, %rsp +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargRetFloat: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: subq $16, %rsp +; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32 +; LINUXOSX64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; LINUXOSX64-NEXT: vaddss %xmm8, %xmm0, %xmm0 +; LINUXOSX64-NEXT: callq test_argRetFloat +; LINUXOSX64-NEXT: vaddss %xmm8, %xmm0, %xmm0 +; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload +; LINUXOSX64-NEXT: addq $16, %rsp +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = fadd float 1.0, %a %c = call x86_regcallcc float @test_argRetFloat(float %b) %d = fadd float 1.0, %c ret float %d } -; X32-LABEL: test_argRetDouble: -; X32: vadd{{.*}} {{.*}}, %xmm0 -; X32: ret{{.*}} - -; WIN64-LABEL: test_argRetDouble: -; WIN64: vadd{{.*}} {{.*}}, %xmm0 -; WIN64: ret{{.*}} - ; Test regcall when receiving/returning double define x86_regcallcc double @test_argRetDouble(double %a) { +; X32-LABEL: test_argRetDouble: +; X32: # BB#0: +; X32-NEXT: vaddsd __real@3ff0000000000000, %xmm0, %xmm0 +; X32-NEXT: retl +; +; WIN64-LABEL: test_argRetDouble: +; WIN64: # BB#0: +; WIN64-NEXT: vaddsd __real@{{.*}}(%rip), %xmm0, %xmm0 +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argRetDouble: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 +; LINUXOSX64-NEXT: retq %add = fadd double %a, 1.0 ret double %add } -; X32-LABEL: test_CallargRetDouble: -; X32: vadd{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0 -; X32: call{{.*}} {{.*}}test_argRetDouble -; X32: vadd{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0 -; X32: ret{{.*}} - -; WIN64-LABEL: test_CallargRetDouble: -; WIN64: vadd{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0 -; WIN64: call{{.*}} {{.*}}test_argRetDouble -; WIN64: vadd{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0 -; WIN64: ret{{.*}} - ; Test regcall when passing/retrieving double define x86_regcallcc double @test_CallargRetDouble(double %a) { +; X32-LABEL: test_CallargRetDouble: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: subl $24, %esp +; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill +; X32-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero +; X32-NEXT: vaddsd %xmm4, %xmm0, %xmm0 +; X32-NEXT: calll _test_argRetDouble +; X32-NEXT: vaddsd %xmm4, %xmm0, %xmm0 +; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload +; X32-NEXT: addl $24, %esp +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargRetDouble: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: subq $16, %rsp +; WIN64-NEXT: .seh_stackalloc 16 +; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 8, 0 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero +; WIN64-NEXT: vaddsd %xmm8, %xmm0, %xmm0 +; WIN64-NEXT: callq test_argRetDouble +; WIN64-NEXT: vaddsd %xmm8, %xmm0, %xmm0 +; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload +; WIN64-NEXT: addq $16, %rsp +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargRetDouble: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: subq $16, %rsp +; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32 +; LINUXOSX64-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero +; LINUXOSX64-NEXT: vaddsd %xmm8, %xmm0, %xmm0 +; LINUXOSX64-NEXT: callq test_argRetDouble +; LINUXOSX64-NEXT: vaddsd %xmm8, %xmm0, %xmm0 +; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload +; LINUXOSX64-NEXT: addq $16, %rsp +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = fadd double 1.0, %a %c = call x86_regcallcc double @test_argRetDouble(double %b) %d = fadd double 1.0, %c ret double %d } -; X32: test_argRetf80 -; X32-NOT: fldt -; X32: fadd %st(0), %st(0) -; X32: retl - -; WIN64: test_argRetf80 -; WIN64-NOT: fldt -; WIN64: fadd %st(0), %st(0) -; WIN64: retq - ; Test regcall when receiving/returning long double define x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind { +; X32-LABEL: test_argRetf80: +; X32: # BB#0: +; X32-NEXT: fadd %st(0), %st(0) +; X32-NEXT: retl +; +; WIN64-LABEL: test_argRetf80: +; WIN64: # BB#0: +; WIN64-NEXT: fadd %st(0), %st(0) +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argRetf80: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: fadd %st(0), %st(0) +; LINUXOSX64-NEXT: retq %r0 = fadd x86_fp80 %a0, %a0 ret x86_fp80 %r0 } -; X32: test_CallargRetf80 -; X32-NOT: fldt -; X32: fadd %st({{[0-7]}}), %st({{[0-7]}}) -; X32: call{{.*}} {{.*}}test_argRetf80 -; X32: fadd{{.*}} %st({{[0-7]}}) -; X32: retl - -; WIN64: test_CallargRetf80 -; WIN64-NOT: fldt -; WIN64: fadd %st({{[0-7]}}), %st({{[0-7]}}) -; WIN64: call{{.*}} {{.*}}test_argRetf80 -; WIN64: fadd{{.*}} %st({{[0-7]}}) -; WIN64: retq - ; Test regcall when passing/retrieving long double define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) { +; X32-LABEL: test_CallargRetf80: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: fadd %st(0), %st(0) +; X32-NEXT: calll _test_argRetf80 +; X32-NEXT: fadd %st(0), %st(0) +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargRetf80: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: fadd %st(0), %st(0) +; WIN64-NEXT: callq test_argRetf80 +; WIN64-NEXT: fadd %st(0), %st(0) +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargRetf80: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: fadd %st(0), %st(0) +; LINUXOSX64-NEXT: callq test_argRetf80 +; LINUXOSX64-NEXT: fadd %st(0), %st(0) +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = fadd x86_fp80 %a, %a %c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b) %d = fadd x86_fp80 %c, %c ret x86_fp80 %d } -; X32-LABEL: test_argRetPointer: -; X32: incl %eax -; X32: ret{{.*}} - -; WIN64-LABEL: test_argRetPointer: -; WIN64: incl %eax -; WIN64: ret{{.*}} - ; Test regcall when receiving/returning pointer define x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %a) { +; X32-LABEL: test_argRetPointer: +; X32: # BB#0: +; X32-NEXT: incl %eax +; X32-NEXT: retl +; +; WIN64-LABEL: test_argRetPointer: +; WIN64: # BB#0: +; WIN64-NEXT: incl %eax +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argRetPointer: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: retq %b = ptrtoint [4 x i32]* %a to i32 %c = add i32 %b, 1 %d = inttoptr i32 %c to [4 x i32]* ret [4 x i32]* %d } -; X32-LABEL: test_CallargRetPointer: -; X32: incl %eax -; X32: call{{.*}} {{.*}}test_argRetPointer -; X32: incl %eax -; X32: ret{{.*}} - -; WIN64-LABEL: test_CallargRetPointer: -; WIN64: incl %eax -; WIN64: call{{.*}} {{.*}}test_argRetPointer -; WIN64: incl %eax -; WIN64: ret{{.*}} - ; Test regcall when passing/retrieving pointer define x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a) { +; X32-LABEL: test_CallargRetPointer: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: incl %eax +; X32-NEXT: calll _test_argRetPointer +; X32-NEXT: incl %eax +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargRetPointer: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: incl %eax +; WIN64-NEXT: callq test_argRetPointer +; WIN64-NEXT: incl %eax +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargRetPointer: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: callq test_argRetPointer +; LINUXOSX64-NEXT: incl %eax +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = ptrtoint [4 x i32]* %a to i32 %c = add i32 %b, 1 %d = inttoptr i32 %c to [4 x i32]* @@ -312,144 +622,276 @@ define x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a) { ret [4 x i32]* %h } -; X32-LABEL: test_argRet128Vector: -; X32: vpblend{{.*}} %xmm0, %xmm1, %xmm0 -; X32: ret{{.*}} - -; WIN64-LABEL: test_argRet128Vector: -; WIN64: vpblend{{.*}} %xmm0, %xmm1, %xmm0 -; WIN64: ret{{.*}} - ; Test regcall when receiving/returning 128 bit vector define x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %b) { +; X32-LABEL: test_argRet128Vector: +; X32: # BB#0: +; X32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; WIN64-LABEL: test_argRet128Vector: +; WIN64: # BB#0: +; WIN64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argRet128Vector: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; LINUXOSX64-NEXT: retq %d = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b ret <4 x i32> %d } -; X32-LABEL: test_CallargRet128Vector: -; X32: vmov{{.*}} %xmm0, {{%xmm([0-7])}} -; X32: call{{.*}} {{.*}}test_argRet128Vector -; X32: vmovdqa{{.*}} {{%xmm([0-7])}}, %xmm0 -; X32: ret{{.*}} - -; WIN64-LABEL: test_CallargRet128Vector: -; WIN64: vmov{{.*}} %xmm0, {{%xmm([0-9]+)}} -; WIN64: call{{.*}} {{.*}}test_argRet128Vector -; WIN64: vmovdqa{{.*}} {{%xmm([0-9]+)}}, %xmm0 -; WIN64: ret{{.*}} - ; Test regcall when passing/retrieving 128 bit vector define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i32> %a) { +; X32-LABEL: test_CallargRet128Vector: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: subl $24, %esp +; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill +; X32-NEXT: vmovdqa %xmm0, %xmm4 +; X32-NEXT: vmovdqa %xmm4, %xmm1 +; X32-NEXT: calll _test_argRet128Vector +; X32-NEXT: vmovdqa32 %xmm4, %xmm0 {%k1} +; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload +; X32-NEXT: addl $24, %esp +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargRet128Vector: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: subq $16, %rsp +; WIN64-NEXT: .seh_stackalloc 16 +; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill +; WIN64-NEXT: .seh_savexmm 8, 0 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: vmovdqa %xmm0, %xmm8 +; WIN64-NEXT: vmovdqa %xmm8, %xmm1 +; WIN64-NEXT: callq test_argRet128Vector +; WIN64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1} +; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload +; WIN64-NEXT: addq $16, %rsp +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargRet128Vector: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: subq $16, %rsp +; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32 +; LINUXOSX64-NEXT: vmovdqa %xmm0, %xmm8 +; LINUXOSX64-NEXT: vmovdqa %xmm8, %xmm1 +; LINUXOSX64-NEXT: callq test_argRet128Vector +; LINUXOSX64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1} +; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload +; LINUXOSX64-NEXT: addq $16, %rsp +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %a) %c = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b ret <4 x i32> %c } -; X32-LABEL: test_argRet256Vector: -; X32: vpblend{{.*}} %ymm0, %ymm1, %ymm0 -; X32: ret{{.*}} - -; WIN64-LABEL: test_argRet256Vector: -; WIN64: vpblend{{.*}} %ymm0, %ymm1, %ymm0 -; WIN64: ret{{.*}} - ; Test regcall when receiving/returning 256 bit vector define x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %b) { +; X32-LABEL: test_argRet256Vector: +; X32: # BB#0: +; X32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; X32-NEXT: retl +; +; WIN64-LABEL: test_argRet256Vector: +; WIN64: # BB#0: +; WIN64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argRet256Vector: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; LINUXOSX64-NEXT: retq %d = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b ret <8 x i32> %d } -; X32-LABEL: test_CallargRet256Vector: -; X32: vmov{{.*}} %ymm0, %ymm1 -; X32: call{{.*}} {{.*}}test_argRet256Vector -; X32: vmovdqa{{.*}} %ymm1, %ymm0 -; X32: ret{{.*}} - -; WIN64-LABEL: test_CallargRet256Vector: -; WIN64: vmov{{.*}} %ymm0, %ymm1 -; WIN64: call{{.*}} {{.*}}test_argRet256Vector -; WIN64: vmovdqa{{.*}} %ymm1, %ymm0 -; WIN64: ret{{.*}} - ; Test regcall when passing/retrieving 256 bit vector define x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i32> %a) { +; X32-LABEL: test_CallargRet256Vector: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: subl $56, %esp +; X32-NEXT: vmovdqu %ymm0, (%esp) # 32-byte Spill +; X32-NEXT: vmovdqa %ymm0, %ymm1 +; X32-NEXT: calll _test_argRet256Vector +; X32-NEXT: vmovdqu (%esp), %ymm1 # 32-byte Reload +; X32-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X32-NEXT: addl $56, %esp +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargRet256Vector: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: subq $48, %rsp +; WIN64-NEXT: .seh_stackalloc 48 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: vmovdqu %ymm0, (%rsp) # 32-byte Spill +; WIN64-NEXT: vmovdqa %ymm0, %ymm1 +; WIN64-NEXT: callq test_argRet256Vector +; WIN64-NEXT: vmovdqu (%rsp), %ymm1 # 32-byte Reload +; WIN64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; WIN64-NEXT: addq $48, %rsp +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargRet256Vector: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: subq $48, %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 64 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: vmovdqu %ymm0, (%rsp) # 32-byte Spill +; LINUXOSX64-NEXT: vmovdqa %ymm0, %ymm1 +; LINUXOSX64-NEXT: callq test_argRet256Vector +; LINUXOSX64-NEXT: vmovdqu (%rsp), %ymm1 # 32-byte Reload +; LINUXOSX64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; LINUXOSX64-NEXT: addq $48, %rsp +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %a) %c = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b ret <8 x i32> %c } -; X32-LABEL: test_argRet512Vector: -; X32: vpblend{{.*}} %zmm0, %zmm1, %zmm0 -; X32: ret{{.*}} - -; WIN64-LABEL: test_argRet512Vector: -; WIN64: vpblend{{.*}} %zmm0, %zmm1, %zmm0 -; WIN64: ret{{.*}} - ; Test regcall when receiving/returning 512 bit vector define x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> %b) { +; X32-LABEL: test_argRet512Vector: +; X32: # BB#0: +; X32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; X32-NEXT: retl +; +; WIN64-LABEL: test_argRet512Vector: +; WIN64: # BB#0: +; WIN64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argRet512Vector: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; LINUXOSX64-NEXT: retq %d = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b ret <16 x i32> %d } -; X32-LABEL: test_CallargRet512Vector: -; X32: vmov{{.*}} %zmm0, %zmm1 -; X32: call{{.*}} {{.*}}test_argRet512Vector -; X32: movdqa{{.*}} %zmm1, %zmm0 -; X32: ret{{.*}} - -; WIN64-LABEL: test_CallargRet512Vector: -; WIN64: vmov{{.*}} %zmm0, %zmm1 -; WIN64: call{{.*}} {{.*}}test_argRet512Vector -; WIN64: vmovdqa{{.*}} %zmm1, %zmm0 -; WIN64: ret{{.*}} - ; Test regcall when passing/retrieving 512 bit vector define x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i32> %a) { +; X32-LABEL: test_CallargRet512Vector: +; X32: # BB#0: +; X32-NEXT: pushl %esp +; X32-NEXT: subl $120, %esp +; X32-NEXT: vmovdqu64 %zmm0, (%esp) # 64-byte Spill +; X32-NEXT: vmovdqa64 %zmm0, %zmm1 +; X32-NEXT: calll _test_argRet512Vector +; X32-NEXT: vmovdqu64 (%esp), %zmm1 # 64-byte Reload +; X32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; X32-NEXT: addl $120, %esp +; X32-NEXT: popl %esp +; X32-NEXT: retl +; +; WIN64-LABEL: test_CallargRet512Vector: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rsp +; WIN64-NEXT: .seh_pushreg 4 +; WIN64-NEXT: subq $112, %rsp +; WIN64-NEXT: .seh_stackalloc 112 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: vmovdqu64 %zmm0, (%rsp) # 64-byte Spill +; WIN64-NEXT: vmovdqa64 %zmm0, %zmm1 +; WIN64-NEXT: callq test_argRet512Vector +; WIN64-NEXT: vmovdqu64 (%rsp), %zmm1 # 64-byte Reload +; WIN64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; WIN64-NEXT: addq $112, %rsp +; WIN64-NEXT: popq %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX64-LABEL: test_CallargRet512Vector: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX64-NEXT: subq $112, %rsp +; LINUXOSX64-NEXT: .cfi_def_cfa_offset 128 +; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 +; LINUXOSX64-NEXT: vmovdqu64 %zmm0, (%rsp) # 64-byte Spill +; LINUXOSX64-NEXT: vmovdqa64 %zmm0, %zmm1 +; LINUXOSX64-NEXT: callq test_argRet512Vector +; LINUXOSX64-NEXT: vmovdqu64 (%rsp), %zmm1 # 64-byte Reload +; LINUXOSX64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; LINUXOSX64-NEXT: addq $112, %rsp +; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: retq %b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> %a) %c = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b ret <16 x i32> %c } -; WIN64-LABEL: testf32_inp -; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; WIN64: retq - -; X32-LABEL: testf32_inp -; X32: vmovups {{%xmm([0-7])}}, {{.*(%esp).*}} {{#+}} 16-byte Spill -; X32: vmovups {{%xmm([0-7])}}, {{.*(%esp).*}} {{#+}} 16-byte Spill -; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}} -; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}} -; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}} -; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}} -; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}} -; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}} -; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}} -; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}} -; X32: vmovups {{.*(%esp).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload -; X32: vmovups {{.*(%esp).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload -; X32: retl - -; LINUXOSX64-LABEL: testf32_inp -; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}} -; LINUXOSX64: retq - ; Test regcall when running multiple input parameters - callee saved XMMs define x86_regcallcc <32 x float> @testf32_inp(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind { +; X32-LABEL: testf32_inp: +; X32: # BB#0: +; X32-NEXT: subl $44, %esp +; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm6, (%esp) # 16-byte Spill +; X32-NEXT: vaddps %zmm2, %zmm0, %zmm6 +; X32-NEXT: vaddps %zmm3, %zmm1, %zmm7 +; X32-NEXT: vmulps %zmm2, %zmm0, %zmm0 +; X32-NEXT: vsubps %zmm0, %zmm6, %zmm0 +; X32-NEXT: vmulps %zmm3, %zmm1, %zmm1 +; X32-NEXT: vsubps %zmm1, %zmm7, %zmm1 +; X32-NEXT: vaddps %zmm4, %zmm0, %zmm0 +; X32-NEXT: vaddps %zmm5, %zmm1, %zmm1 +; X32-NEXT: vmovups (%esp), %xmm6 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload +; X32-NEXT: addl $44, %esp +; X32-NEXT: retl +; +; WIN64-LABEL: testf32_inp: +; WIN64: # BB#0: +; WIN64-NEXT: vaddps %zmm2, %zmm0, %zmm6 +; WIN64-NEXT: vaddps %zmm3, %zmm1, %zmm7 +; WIN64-NEXT: vmulps %zmm2, %zmm0, %zmm0 +; WIN64-NEXT: vsubps %zmm0, %zmm6, %zmm0 +; WIN64-NEXT: vmulps %zmm3, %zmm1, %zmm1 +; WIN64-NEXT: vsubps %zmm1, %zmm7, %zmm1 +; WIN64-NEXT: vaddps %zmm4, %zmm0, %zmm0 +; WIN64-NEXT: vaddps %zmm5, %zmm1, %zmm1 +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: testf32_inp: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: vaddps %zmm2, %zmm0, %zmm6 +; LINUXOSX64-NEXT: vaddps %zmm3, %zmm1, %zmm7 +; LINUXOSX64-NEXT: vmulps %zmm2, %zmm0, %zmm0 +; LINUXOSX64-NEXT: vsubps %zmm0, %zmm6, %zmm0 +; LINUXOSX64-NEXT: vmulps %zmm3, %zmm1, %zmm1 +; LINUXOSX64-NEXT: vsubps %zmm1, %zmm7, %zmm1 +; LINUXOSX64-NEXT: vaddps %zmm4, %zmm0, %zmm0 +; LINUXOSX64-NEXT: vaddps %zmm5, %zmm1, %zmm1 +; LINUXOSX64-NEXT: retq %x1 = fadd <32 x float> %a, %b %x2 = fmul <32 x float> %a, %b %x3 = fsub <32 x float> %x1, %x2 @@ -457,32 +899,136 @@ define x86_regcallcc <32 x float> @testf32_inp(<32 x float> %a, <32 x float> %b, ret <32 x float> %x4 } -; X32-LABEL: testi32_inp -; X32: pushl {{%e(bx|bp)}} -; X32: pushl {{%e(bx|bp)}} -; X32: popl {{%e(bx|bp)}} -; X32: popl {{%e(bx|bp)}} -; X32: retl - -; WIN64-LABEL: testi32_inp -; WIN64: pushq {{%r(bp|bx|1[0-5])}} -; WIN64: pushq {{%r(bp|bx|1[0-5])}} -; WIN64: pushq {{%r(bp|bx|1[0-5])}} -; WIN64: popq {{%r(bp|bx|1[0-5])}} -; WIN64: popq {{%r(bp|bx|1[0-5])}} -; WIN64: popq {{%r(bp|bx|1[0-5])}} -; WIN64: retq - -; LINUXOSX64-LABEL: testi32_inp -; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}} -; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}} -; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}} -; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}} -; LINUXOSX64: retq - ; Test regcall when running multiple input parameters - callee saved GPRs -define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, - i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind { +define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind { +; X32-LABEL: testi32_inp: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: subl $20, %esp +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: subl %ecx, %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: imull %ebp, %edx +; X32-NEXT: subl %esi, %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: addl %ecx, %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %ebp +; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: subl {{[0-9]+}}(%esp), %eax +; X32-NEXT: imull %ebp, %eax +; X32-NEXT: addl %eax, %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: addl {{[0-9]+}}(%esp), %edi +; X32-NEXT: imull %eax, %edi +; X32-NEXT: addl {{[0-9]+}}(%esp), %esi +; X32-NEXT: imull %ebp, %esi +; X32-NEXT: addl %edi, %esi +; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: addl %ecx, %edx +; X32-NEXT: movl %edx, %eax +; X32-NEXT: addl $20, %esp +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; WIN64-LABEL: testi32_inp: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %r13 +; WIN64-NEXT: pushq %rbp +; WIN64-NEXT: pushq %rbx +; WIN64-NEXT: movl %eax, %r13d +; WIN64-NEXT: subl %ecx, %eax +; WIN64-NEXT: movl %edx, %ebp +; WIN64-NEXT: subl %edi, %ebp +; WIN64-NEXT: movl %r9d, %ebx +; WIN64-NEXT: subl %r10d, %ebx +; WIN64-NEXT: imull %ebx, %eax +; WIN64-NEXT: movl %r11d, %ebx +; WIN64-NEXT: subl %r12d, %ebx +; WIN64-NEXT: imull %ebp, %ebx +; WIN64-NEXT: movl %esi, %ebp +; WIN64-NEXT: subl %r8d, %ebp +; WIN64-NEXT: addl %ebx, %eax +; WIN64-NEXT: movl %r14d, %ebx +; WIN64-NEXT: subl %r15d, %ebx +; WIN64-NEXT: imull %ebp, %ebx +; WIN64-NEXT: addl %ebx, %eax +; WIN64-NEXT: addl %ecx, %r13d +; WIN64-NEXT: addl %edi, %edx +; WIN64-NEXT: addl %r8d, %esi +; WIN64-NEXT: addl %r10d, %r9d +; WIN64-NEXT: imull %r13d, %r9d +; WIN64-NEXT: addl %r12d, %r11d +; WIN64-NEXT: imull %edx, %r11d +; WIN64-NEXT: addl %r9d, %r11d +; WIN64-NEXT: addl %r15d, %r14d +; WIN64-NEXT: imull %esi, %r14d +; WIN64-NEXT: addl %r11d, %r14d +; WIN64-NEXT: addl %r14d, %eax +; WIN64-NEXT: popq %rbx +; WIN64-NEXT: popq %rbp +; WIN64-NEXT: popq %r13 +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: testi32_inp: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rbp +; LINUXOSX64-NEXT: pushq %rbx +; LINUXOSX64-NEXT: movl %eax, %r10d +; LINUXOSX64-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; LINUXOSX64-NEXT: subl %ecx, %eax +; LINUXOSX64-NEXT: movl %edx, %ebx +; LINUXOSX64-NEXT: subl %edi, %ebx +; LINUXOSX64-NEXT: movl %r9d, %ebp +; LINUXOSX64-NEXT: subl %r12d, %ebp +; LINUXOSX64-NEXT: imull %ebp, %eax +; LINUXOSX64-NEXT: movl %r13d, %ebp +; LINUXOSX64-NEXT: subl %r14d, %ebp +; LINUXOSX64-NEXT: imull %ebx, %ebp +; LINUXOSX64-NEXT: movl %esi, %ebx +; LINUXOSX64-NEXT: subl %r8d, %ebx +; LINUXOSX64-NEXT: addl %ebp, %eax +; LINUXOSX64-NEXT: movl %r15d, %ebp +; LINUXOSX64-NEXT: subl %r11d, %ebp +; LINUXOSX64-NEXT: imull %ebx, %ebp +; LINUXOSX64-NEXT: addl %ebp, %eax +; LINUXOSX64-NEXT: addl %ecx, %r10d +; LINUXOSX64-NEXT: addl %edi, %edx +; LINUXOSX64-NEXT: addl %r8d, %esi +; LINUXOSX64-NEXT: addl %r12d, %r9d +; LINUXOSX64-NEXT: imull %r10d, %r9d +; LINUXOSX64-NEXT: addl %r14d, %r13d +; LINUXOSX64-NEXT: imull %edx, %r13d +; LINUXOSX64-NEXT: addl %r9d, %r13d +; LINUXOSX64-NEXT: addl %r11d, %r15d +; LINUXOSX64-NEXT: imull %esi, %r15d +; LINUXOSX64-NEXT: addl %r13d, %r15d +; LINUXOSX64-NEXT: addl %r15d, %eax +; LINUXOSX64-NEXT: popq %rbx +; LINUXOSX64-NEXT: popq %rbp +; LINUXOSX64-NEXT: retq %x1 = sub i32 %a1, %a2 %x2 = sub i32 %a3, %a4 %x3 = sub i32 %a5, %a6 @@ -509,48 +1055,85 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a ret i32 %r1 } -; X32-LABEL: testf32_stack -; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}} -; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-1])}} -; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-1])}} -; X32: retl - -; LINUXOSX64-LABEL: testf32_stack -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}} -; LINUXOSX64: vaddps {{([0-9])+}}(%rbp), {{%zmm([0-9]+)}}, {{%zmm([0-1])}} -; LINUXOSX64: vaddps {{([0-9])+}}(%rbp), {{%zmm([0-9]+)}}, {{%zmm([0-1])}} -; LINUXOSX64: retq - ; Test that parameters, overflowing register capacity, are passed through the stack -define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a0, <32 x float> %b0, <32 x float> %c0, - <32 x float> %a1, <32 x float> %b1, <32 x float> %c1, - <32 x float> %a2, <32 x float> %b2, <32 x float> %c2) nounwind { +define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a0, <32 x float> %b0, <32 x float> %c0, <32 x float> %a1, <32 x float> %b1, <32 x float> %c1, <32 x float> %a2, <32 x float> %b2, <32 x float> %c2) nounwind { +; X32-LABEL: testf32_stack: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-64, %esp +; X32-NEXT: subl $64, %esp +; X32-NEXT: vaddps %zmm3, %zmm1, %zmm1 +; X32-NEXT: vaddps %zmm2, %zmm0, %zmm0 +; X32-NEXT: vaddps %zmm0, %zmm4, %zmm0 +; X32-NEXT: vaddps %zmm1, %zmm5, %zmm1 +; X32-NEXT: vaddps %zmm1, %zmm7, %zmm1 +; X32-NEXT: vaddps %zmm0, %zmm6, %zmm0 +; X32-NEXT: vaddps 8(%ebp), %zmm0, %zmm0 +; X32-NEXT: vaddps 72(%ebp), %zmm1, %zmm1 +; X32-NEXT: vaddps 200(%ebp), %zmm1, %zmm1 +; X32-NEXT: vaddps 136(%ebp), %zmm0, %zmm0 +; X32-NEXT: vaddps 264(%ebp), %zmm0, %zmm0 +; X32-NEXT: vaddps 328(%ebp), %zmm1, %zmm1 +; X32-NEXT: vaddps 456(%ebp), %zmm1, %zmm1 +; X32-NEXT: vaddps 392(%ebp), %zmm0, %zmm0 +; X32-NEXT: vaddps 520(%ebp), %zmm0, %zmm0 +; X32-NEXT: vaddps 584(%ebp), %zmm1, %zmm1 +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; WIN64-LABEL: testf32_stack: +; WIN64: # BB#0: +; WIN64-NEXT: pushq %rbp +; WIN64-NEXT: subq $48, %rsp +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; WIN64-NEXT: andq $-64, %rsp +; WIN64-NEXT: vaddps %zmm3, %zmm1, %zmm1 +; WIN64-NEXT: vaddps %zmm2, %zmm0, %zmm0 +; WIN64-NEXT: vaddps %zmm0, %zmm4, %zmm0 +; WIN64-NEXT: vaddps %zmm1, %zmm5, %zmm1 +; WIN64-NEXT: vaddps %zmm1, %zmm7, %zmm1 +; WIN64-NEXT: vaddps %zmm0, %zmm6, %zmm0 +; WIN64-NEXT: vaddps %zmm0, %zmm8, %zmm0 +; WIN64-NEXT: vaddps %zmm1, %zmm9, %zmm1 +; WIN64-NEXT: vaddps %zmm1, %zmm11, %zmm1 +; WIN64-NEXT: vaddps %zmm0, %zmm10, %zmm0 +; WIN64-NEXT: vaddps %zmm0, %zmm12, %zmm0 +; WIN64-NEXT: vaddps %zmm1, %zmm13, %zmm1 +; WIN64-NEXT: vaddps %zmm1, %zmm15, %zmm1 +; WIN64-NEXT: vaddps %zmm0, %zmm14, %zmm0 +; WIN64-NEXT: vaddps 16(%rbp), %zmm0, %zmm0 +; WIN64-NEXT: vaddps 80(%rbp), %zmm1, %zmm1 +; WIN64-NEXT: movq %rbp, %rsp +; WIN64-NEXT: popq %rbp +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: testf32_stack: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: pushq %rbp +; LINUXOSX64-NEXT: movq %rsp, %rbp +; LINUXOSX64-NEXT: andq $-64, %rsp +; LINUXOSX64-NEXT: subq $64, %rsp +; LINUXOSX64-NEXT: vaddps %zmm3, %zmm1, %zmm1 +; LINUXOSX64-NEXT: vaddps %zmm2, %zmm0, %zmm0 +; LINUXOSX64-NEXT: vaddps %zmm0, %zmm4, %zmm0 +; LINUXOSX64-NEXT: vaddps %zmm1, %zmm5, %zmm1 +; LINUXOSX64-NEXT: vaddps %zmm1, %zmm7, %zmm1 +; LINUXOSX64-NEXT: vaddps %zmm0, %zmm6, %zmm0 +; LINUXOSX64-NEXT: vaddps %zmm0, %zmm8, %zmm0 +; LINUXOSX64-NEXT: vaddps %zmm1, %zmm9, %zmm1 +; LINUXOSX64-NEXT: vaddps %zmm1, %zmm11, %zmm1 +; LINUXOSX64-NEXT: vaddps %zmm0, %zmm10, %zmm0 +; LINUXOSX64-NEXT: vaddps %zmm0, %zmm12, %zmm0 +; LINUXOSX64-NEXT: vaddps %zmm1, %zmm13, %zmm1 +; LINUXOSX64-NEXT: vaddps %zmm1, %zmm15, %zmm1 +; LINUXOSX64-NEXT: vaddps %zmm0, %zmm14, %zmm0 +; LINUXOSX64-NEXT: vaddps 16(%rbp), %zmm0, %zmm0 +; LINUXOSX64-NEXT: vaddps 80(%rbp), %zmm1, %zmm1 +; LINUXOSX64-NEXT: movq %rbp, %rsp +; LINUXOSX64-NEXT: popq %rbp +; LINUXOSX64-NEXT: retq %x1 = fadd <32 x float> %a0, %b0 %x2 = fadd <32 x float> %c0, %x1 %x3 = fadd <32 x float> %a1, %x2 @@ -562,25 +1145,69 @@ define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a0, <32 x float> ret <32 x float> %x8 } -; X32-LABEL: vmovd %edx, {{%xmm([0-9])}} -; X32: vcvtsi2sdl %eax, {{%xmm([0-9])}}, {{%xmm([0-9])}} -; X32: vcvtsi2sdl %ecx, {{%xmm([0-9])}}, {{%xmm([0-9])}} -; X32: vcvtsi2sdl %esi, {{%xmm([0-9])}}, {{%xmm([0-9])}} -; X32: vaddsd %xmm1, %xmm0, %xmm0 -; X32: vcvttsd2si %xmm0, %eax -; X32: retl - -; LINUXOSX64-LABEL: test_argRetMixTypes -; LINUXOSX64: vcvtss2sd %xmm1, %xmm1, %xmm1 -; LINUXOSX64: vcvtsi2sdl %eax, {{%xmm([0-9])}}, {{%xmm([0-9])}} -; LINUXOSX64: vcvtsi2sdl %ecx, {{%xmm([0-9])}}, {{%xmm([0-9])}} -; LINUXOSX64: vcvtsi2sdq %rdx, {{%xmm([0-9])}}, {{%xmm([0-9])}} -; LINUXOSX64: vcvtsi2sdl %edi, {{%xmm([0-9])}}, {{%xmm([0-9])}} -; LINUXOSX64: vcvtsi2sdl (%rsi), {{%xmm([0-9])}}, {{%xmm([0-9])}} -; LINUXOSX64: vcvttsd2si {{%xmm([0-9])}}, %eax - ; Test regcall when passing/retrieving mixed types define x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 { +; X32-LABEL: test_argRetMixTypes: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $16, %esp +; X32-NEXT: vmovd %edx, %xmm2 +; X32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2 +; X32-NEXT: movl 8(%ebp), %edx +; X32-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 +; X32-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; X32-NEXT: vcvtsi2sdl %eax, %xmm3, %xmm1 +; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; X32-NEXT: vcvtsi2sdl %ecx, %xmm3, %xmm1 +; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; X32-NEXT: vmovq %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: fildll {{[0-9]+}}(%esp) +; X32-NEXT: fstpl (%esp) +; X32-NEXT: vaddsd (%esp), %xmm0, %xmm0 +; X32-NEXT: vcvtsi2sdl %esi, %xmm3, %xmm1 +; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; X32-NEXT: vcvtsi2sdl (%edx), %xmm3, %xmm1 +; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; X32-NEXT: vcvttsd2si %xmm0, %eax +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; WIN64-LABEL: test_argRetMixTypes: +; WIN64: # BB#0: +; WIN64-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 +; WIN64-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; WIN64-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1 +; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; WIN64-NEXT: vcvtsi2sdl %ecx, %xmm2, %xmm1 +; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; WIN64-NEXT: vcvtsi2sdq %rdx, %xmm2, %xmm1 +; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; WIN64-NEXT: vcvtsi2sdl %edi, %xmm2, %xmm1 +; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; WIN64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1 +; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; WIN64-NEXT: vcvttsd2si %xmm0, %eax +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argRetMixTypes: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 +; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; LINUXOSX64-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1 +; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; LINUXOSX64-NEXT: vcvtsi2sdl %ecx, %xmm2, %xmm1 +; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; LINUXOSX64-NEXT: vcvtsi2sdq %rdx, %xmm2, %xmm1 +; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; LINUXOSX64-NEXT: vcvtsi2sdl %edi, %xmm2, %xmm1 +; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; LINUXOSX64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1 +; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; LINUXOSX64-NEXT: vcvttsd2si %xmm0, %eax +; LINUXOSX64-NEXT: retq %8 = fpext float %1 to double %9 = fadd double %8, %0 %10 = sitofp i8 %2 to double @@ -600,23 +1227,31 @@ define x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i6 %struct.complex = type { float, double, i32, i8, i64} - -; X32-LABEL: test_argMultiRet -; X32: vaddsd {{.*}}, %xmm1, %xmm1 -; X32: movl $4, %eax -; X32: movb $7, %cl -; X32: movl $999, %edx -; X32: xorl %edi, %edi -; X32: retl - -; LINUXOSX64-LABEL: test_argMultiRet -; LINUXOSX64: vaddsd {{.*}}, %xmm1, %xmm1 -; LINUXOSX64: movl $4, %eax -; LINUXOSX64: movb $7, %cl -; LINUXOSX64: movl $999, %edx -; LINUXOSX64: retq - define x86_regcallcc %struct.complex @test_argMultiRet(float, double, i32, i8, i64) local_unnamed_addr #0 { +; X32-LABEL: test_argMultiRet: +; X32: # BB#0: +; X32-NEXT: vaddsd __real@4014000000000000, %xmm1, %xmm1 +; X32-NEXT: movl $4, %eax +; X32-NEXT: movb $7, %cl +; X32-NEXT: movl $999, %edx # imm = 0x3E7 +; X32-NEXT: xorl %edi, %edi +; X32-NEXT: retl +; +; WIN64-LABEL: test_argMultiRet: +; WIN64: # BB#0: +; WIN64-NEXT: vaddsd __real@{{.*}}(%rip), %xmm1, %xmm1 +; WIN64-NEXT: movl $4, %eax +; WIN64-NEXT: movb $7, %cl +; WIN64-NEXT: movl $999, %edx # imm = 0x3E7 +; WIN64-NEXT: retq +; +; LINUXOSX64-LABEL: test_argMultiRet: +; LINUXOSX64: # BB#0: +; LINUXOSX64-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1 +; LINUXOSX64-NEXT: movl $4, %eax +; LINUXOSX64-NEXT: movb $7, %cl +; LINUXOSX64-NEXT: movl $999, %edx # imm = 0x3E7 +; LINUXOSX64-NEXT: retq %6 = fadd double %1, 5.000000e+00 %7 = insertvalue %struct.complex undef, float %0, 0 %8 = insertvalue %struct.complex %7, double %6, 1 @@ -625,4 +1260,3 @@ define x86_regcallcc %struct.complex @test_argMultiRet(float, double, i32, i8, i %11 = insertvalue %struct.complex %10, i64 999, 4 ret %struct.complex %11 } - -- 2.40.0