From b0ed82c8f925c45edeadf6f003f25dffcaeec0b3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 11 Apr 2019 19:57:24 +0000 Subject: [PATCH] [X86] Add 32-bit command line to extractelement-fp.ll so I can add a test case for a 32-bit only crasher. NFC This is a bit ugly for ABI reasons about how floats/doubles are returned. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358217 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/extractelement-fp.ll | 1052 +++++++++++++++++++------ 1 file changed, 806 insertions(+), 246 deletions(-) diff --git a/test/CodeGen/X86/extractelement-fp.ll b/test/CodeGen/X86/extractelement-fp.ll index 028f9a61fac..f25cce89372 100644 --- a/test/CodeGen/X86/extractelement-fp.ll +++ b/test/CodeGen/X86/extractelement-fp.ll @@ -1,129 +1,266 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2,fma | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2,fma | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-- -mattr=avx2,fma | FileCheck %s --check-prefixes=CHECK,X86 define float @fneg_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: fneg_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: fneg_v4f32: +; X64: # %bb.0: +; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: fneg_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; X86-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = fneg <4 x float> %x %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fneg_v4f64(<4 x double> %x) nounwind { -; CHECK-LABEL: fneg_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: fneg_v4f64: +; X64: # %bb.0: +; X64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: fneg_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vbroadcastsd {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; X86-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovlps %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = fneg <4 x double> %x %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @fadd_v4f32(<4 x float> %x, <4 x float> %y) nounwind { -; CHECK-LABEL: fadd_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: fadd_v4f32: +; X64: # %bb.0: +; X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: fadd_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = fadd <4 x float> %x, %y %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fadd_v4f64(<4 x double> %x, <4 x double> %y) nounwind { -; CHECK-LABEL: fadd_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: fadd_v4f64: +; X64: # %bb.0: +; X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: fadd_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = fadd <4 x double> %x, %y %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @fsub_v4f32(<4 x float> %x, <4 x float> %y) nounwind { -; CHECK-LABEL: fsub_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: fsub_v4f32: +; X64: # %bb.0: +; X64-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: fsub_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = fsub <4 x float> %x, %y %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fsub_v4f64(<4 x double> %x, <4 x double> %y) nounwind { -; CHECK-LABEL: fsub_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: fsub_v4f64: +; X64: # %bb.0: +; X64-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: fsub_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = fsub <4 x double> %x, %y %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @fmul_v4f32(<4 x float> %x, <4 x float> %y) nounwind { -; CHECK-LABEL: fmul_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: fmul_v4f32: +; X64: # %bb.0: +; X64-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: fmul_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = fmul <4 x float> %x, %y %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fmul_v4f64(<4 x double> %x, <4 x double> %y) nounwind { -; CHECK-LABEL: fmul_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: fmul_v4f64: +; X64: # %bb.0: +; X64-NEXT: vmulsd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: fmul_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vmulsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = fmul <4 x double> %x, %y %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @fdiv_v4f32(<4 x float> %x, <4 x float> %y) nounwind { -; CHECK-LABEL: fdiv_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: fdiv_v4f32: +; X64: # %bb.0: +; X64-NEXT: vdivss %xmm1, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: fdiv_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vdivss %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = fdiv <4 x float> %x, %y %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fdiv_v4f64(<4 x double> %x, <4 x double> %y) nounwind { -; CHECK-LABEL: fdiv_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: fdiv_v4f64: +; X64: # %bb.0: +; X64-NEXT: vdivsd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: fdiv_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vdivsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = fdiv <4 x double> %x, %y %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @frem_v4f32(<4 x float> %x, <4 x float> %y) nounwind { -; CHECK-LABEL: frem_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: jmp fmodf # TAILCALL +; X64-LABEL: frem_v4f32: +; X64: # %bb.0: +; X64-NEXT: jmp fmodf # TAILCALL +; +; X86-LABEL: frem_v4f32: +; X86: # %bb.0: +; X86-NEXT: subl $8, %esp +; X86-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: calll fmodf +; X86-NEXT: addl $8, %esp +; X86-NEXT: retl %v = frem <4 x float> %x, %y %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @frem_v4f64(<4 x double> %x, <4 x double> %y) nounwind { -; CHECK-LABEL: frem_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 killed $ymm1 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: jmp fmod # TAILCALL +; X64-LABEL: frem_v4f64: +; X64: # %bb.0: +; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; X64-NEXT: # kill: def $xmm1 killed $xmm1 killed $ymm1 +; X64-NEXT: vzeroupper +; X64-NEXT: jmp fmod # TAILCALL +; +; X86-LABEL: frem_v4f64: +; X86: # %bb.0: +; X86-NEXT: subl $16, %esp +; X86-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X86-NEXT: vmovups %xmm0, (%esp) +; X86-NEXT: vzeroupper +; X86-NEXT: calll fmod +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl %v = frem <4 x double> %x, %y %r = extractelement <4 x double> %v, i32 0 ret double %r @@ -134,7 +271,7 @@ define i1 @fcmp_v4f32(<4 x float> %x, <4 x float> %y) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vucomiss %xmm1, %xmm0 ; CHECK-NEXT: seta %al -; CHECK-NEXT: retq +; CHECK-NEXT: ret{{[l|q]}} %v = fcmp ogt <4 x float> %x, %y %r = extractelement <4 x i1> %v, i32 0 ret i1 %r @@ -146,7 +283,7 @@ define i1 @fcmp_v4f64(<4 x double> %x, <4 x double> %y) nounwind { ; CHECK-NEXT: vucomisd %xmm0, %xmm1 ; CHECK-NEXT: setb %al ; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; CHECK-NEXT: ret{{[l|q]}} %v = fcmp ugt <4 x double> %x, %y %r = extractelement <4 x i1> %v, i32 0 ret i1 %r @@ -156,12 +293,19 @@ define i1 @fcmp_v4f64(<4 x double> %x, <4 x double> %y) nounwind { ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=13700 define void @extsetcc(<4 x float> %x) { -; CHECK-LABEL: extsetcc: -; CHECK: # %bb.0: -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: setb (%rax) -; CHECK-NEXT: retq +; X64-LABEL: extsetcc: +; X64: # %bb.0: +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-NEXT: vucomiss %xmm1, %xmm0 +; X64-NEXT: setb (%rax) +; X64-NEXT: retq +; +; X86-LABEL: extsetcc: +; X86: # %bb.0: +; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-NEXT: vucomiss %xmm1, %xmm0 +; X86-NEXT: setb (%eax) +; X86-NEXT: retl %cmp = fcmp ult <4 x float> %x, zeroinitializer %sext = sext <4 x i1> %cmp to <4 x i32> %e = extractelement <4 x i1> %cmp, i1 0 @@ -170,11 +314,26 @@ define void @extsetcc(<4 x float> %x) { } define float @select_fcmp_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) nounwind { -; CHECK-LABEL: select_fcmp_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vcmpneq_oqss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: select_fcmp_v4f32: +; X64: # %bb.0: +; X64-NEXT: vcmpneq_oqss %xmm1, %xmm0, %xmm0 +; X64-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: select_fcmp_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: vmovaps 8(%ebp), %xmm3 +; X86-NEXT: vcmpneq_oqss %xmm1, %xmm0, %xmm0 +; X86-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 +; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl %c = fcmp one <4 x float> %x, %y %s = select <4 x i1> %c, <4 x float> %z, <4 x float> %w %r = extractelement <4 x float> %s, i32 0 @@ -182,12 +341,28 @@ define float @select_fcmp_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, } define double @select_fcmp_v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z, <4 x double> %w) nounwind { -; CHECK-LABEL: select_fcmp_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: select_fcmp_v4f64: +; X64: # %bb.0: +; X64-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm0 +; X64-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: select_fcmp_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-32, %esp +; X86-NEXT: subl $32, %esp +; X86-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm0 +; X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X86-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; X86-NEXT: vmovlpd %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %c = fcmp ule <4 x double> %x, %y %s = select <4 x i1> %c, <4 x double> %z, <4 x double> %w %r = extractelement <4 x double> %s, i32 0 @@ -195,135 +370,276 @@ define double @select_fcmp_v4f64(<4 x double> %x, <4 x double> %y, <4 x double> } define float @fsqrt_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: fsqrt_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: fsqrt_v4f32: +; X64: # %bb.0: +; X64-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: fsqrt_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fsqrt_v4f64(<4 x double> %x) nounwind { -; CHECK-LABEL: fsqrt_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: fsqrt_v4f64: +; X64: # %bb.0: +; X64-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: fsqrt_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %x) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @fsin_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: fsin_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: jmp sinf # TAILCALL +; X64-LABEL: fsin_v4f32: +; X64: # %bb.0: +; X64-NEXT: jmp sinf # TAILCALL +; +; X86-LABEL: fsin_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: calll sinf +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.sin.v4f32(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fsin_v4f64(<4 x double> %x) nounwind { -; CHECK-LABEL: fsin_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: jmp sin # TAILCALL +; X64-LABEL: fsin_v4f64: +; X64: # %bb.0: +; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; X64-NEXT: vzeroupper +; X64-NEXT: jmp sin # TAILCALL +; +; X86-LABEL: fsin_v4f64: +; X86: # %bb.0: +; X86-NEXT: subl $8, %esp +; X86-NEXT: vmovlps %xmm0, (%esp) +; X86-NEXT: vzeroupper +; X86-NEXT: calll sin +; X86-NEXT: addl $8, %esp +; X86-NEXT: retl %v = call <4 x double> @llvm.sin.v4f64(<4 x double> %x) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @fma_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) nounwind { -; CHECK-LABEL: fma_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 -; CHECK-NEXT: retq +; X64-LABEL: fma_v4f32: +; X64: # %bb.0: +; X64-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; X64-NEXT: retq +; +; X86-LABEL: fma_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fma_v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z) nounwind { -; CHECK-LABEL: fma_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: fma_v4f64: +; X64: # %bb.0: +; X64-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: fma_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + xmm2 +; X86-NEXT: vmovsd %xmm1, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.fma.v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @fabs_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: fabs_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] -; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: fabs_v4f32: +; X64: # %bb.0: +; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] +; X64-NEXT: vandps %xmm1, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: fabs_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] +; X86-NEXT: vandps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fabs_v4f64(<4 x double> %x) nounwind { -; CHECK-LABEL: fabs_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: fabs_v4f64: +; X64: # %bb.0: +; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: fabs_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; X86-NEXT: vmovlps %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @fmaxnum_v4f32(<4 x float> %x, <4 x float> %y) nounwind { -; CHECK-LABEL: fmaxnum_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vmaxss %xmm0, %xmm1, %xmm2 -; CHECK-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: fmaxnum_v4f32: +; X64: # %bb.0: +; X64-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; X64-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; X64-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: fmaxnum_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; X86-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fmaxnum_v4f64(<4 x double> %x, <4 x double> %y) nounwind { -; CHECK-LABEL: fmaxnum_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 -; CHECK-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: fmaxnum_v4f64: +; X64: # %bb.0: +; X64-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; X64-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; X64-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: fmaxnum_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; X86-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; X86-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; X86-NEXT: vmovlpd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @fminnum_v4f32(<4 x float> %x, <4 x float> %y) nounwind { -; CHECK-LABEL: fminnum_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vminss %xmm0, %xmm1, %xmm2 -; CHECK-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: fminnum_v4f32: +; X64: # %bb.0: +; X64-NEXT: vminss %xmm0, %xmm1, %xmm2 +; X64-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; X64-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: fminnum_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vminss %xmm0, %xmm1, %xmm2 +; X86-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @fminnum_v4f64(<4 x double> %x, <4 x double> %y) nounwind { -; CHECK-LABEL: fminnum_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vminsd %xmm0, %xmm1, %xmm2 -; CHECK-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: fminnum_v4f64: +; X64: # %bb.0: +; X64-NEXT: vminsd %xmm0, %xmm1, %xmm2 +; X64-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; X64-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: fminnum_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vminsd %xmm0, %xmm1, %xmm2 +; X86-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; X86-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; X86-NEXT: vmovlpd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y) %r = extractelement <4 x double> %v, i32 0 ret double %r @@ -354,10 +670,19 @@ define double @fminnum_v4f64(<4 x double> %x, <4 x double> %y) nounwind { ;} define float @maxps_v4f32(<4 x float> %x, <4 x float> %y) nounwind { -; CHECK-LABEL: maxps_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: maxps_v4f32: +; X64: # %bb.0: +; X64-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: maxps_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %cmp = fcmp ogt <4 x float> %x, %y %v = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y %r = extractelement <4 x float> %v, i32 0 @@ -365,11 +690,25 @@ define float @maxps_v4f32(<4 x float> %x, <4 x float> %y) nounwind { } define double @maxpd_v4f64(<4 x double> %x, <4 x double> %y) nounwind { -; CHECK-LABEL: maxpd_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: maxpd_v4f64: +; X64: # %bb.0: +; X64-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: maxpd_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %cmp = fcmp ogt <4 x double> %x, %y %v = select <4 x i1> %cmp, <4 x double> %x, <4 x double> %y %r = extractelement <4 x double> %v, i32 0 @@ -377,10 +716,19 @@ define double @maxpd_v4f64(<4 x double> %x, <4 x double> %y) nounwind { } define float @minps_v4f32(<4 x float> %x, <4 x float> %y) nounwind { -; CHECK-LABEL: minps_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: minps_v4f32: +; X64: # %bb.0: +; X64-NEXT: vminss %xmm1, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: minps_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vminss %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %cmp = fcmp olt <4 x float> %x, %y %v = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y %r = extractelement <4 x float> %v, i32 0 @@ -388,11 +736,25 @@ define float @minps_v4f32(<4 x float> %x, <4 x float> %y) nounwind { } define double @minpd_v4f64(<4 x double> %x, <4 x double> %y) nounwind { -; CHECK-LABEL: minpd_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: minpd_v4f64: +; X64: # %bb.0: +; X64-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: minpd_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %cmp = fcmp olt <4 x double> %x, %y %v = select <4 x i1> %cmp, <4 x double> %x, <4 x double> %y %r = extractelement <4 x double> %v, i32 0 @@ -400,202 +762,400 @@ define double @minpd_v4f64(<4 x double> %x, <4 x double> %y) nounwind { } define float @copysign_v4f32(<4 x float> %x, <4 x float> %y) nounwind { -; CHECK-LABEL: copysign_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; CHECK-NEXT: vandps %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] -; CHECK-NEXT: vandps %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: copysign_v4f32: +; X64: # %bb.0: +; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; X64-NEXT: vandps %xmm2, %xmm1, %xmm1 +; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] +; X64-NEXT: vandps %xmm2, %xmm0, %xmm0 +; X64-NEXT: vorps %xmm1, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: copysign_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; X86-NEXT: vandps %xmm2, %xmm1, %xmm1 +; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] +; X86-NEXT: vandps %xmm2, %xmm0, %xmm0 +; X86-NEXT: vorps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %y) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @copysign_v4f64(<4 x double> %x, <4 x double> %y) nounwind { -; CHECK-LABEL: copysign_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: copysign_v4f64: +; X64: # %bb.0: +; X64-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 +; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vorps %xmm1, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: copysign_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vandps {{\.LCPI.*}}, %xmm1, %xmm1 +; X86-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; X86-NEXT: vorps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovlps %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %y) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @floor_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: floor_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: floor_v4f32: +; X64: # %bb.0: +; X64-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: floor_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.floor.v4f32(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @floor_v4f64(<4 x double> %x) nounwind { -; CHECK-LABEL: floor_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: floor_v4f64: +; X64: # %bb.0: +; X64-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: floor_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @ceil_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: ceil_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: ceil_v4f32: +; X64: # %bb.0: +; X64-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: ceil_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @ceil_v4f64(<4 x double> %x) nounwind { -; CHECK-LABEL: ceil_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: ceil_v4f64: +; X64: # %bb.0: +; X64-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: ceil_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @trunc_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: trunc_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: trunc_v4f32: +; X64: # %bb.0: +; X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: trunc_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @trunc_v4f64(<4 x double> %x) nounwind { -; CHECK-LABEL: trunc_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: trunc_v4f64: +; X64: # %bb.0: +; X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: trunc_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @rint_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: rint_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: rint_v4f32: +; X64: # %bb.0: +; X64-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: rint_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.rint.v4f32(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @rint_v4f64(<4 x double> %x) nounwind { -; CHECK-LABEL: rint_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: rint_v4f64: +; X64: # %bb.0: +; X64-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: rint_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.rint.v4f64(<4 x double> %x) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @nearbyint_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: nearbyint_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: nearbyint_v4f32: +; X64: # %bb.0: +; X64-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: nearbyint_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @nearbyint_v4f64(<4 x double> %x) nounwind { -; CHECK-LABEL: nearbyint_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: nearbyint_v4f64: +; X64: # %bb.0: +; X64-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: nearbyint_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @round_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: round_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: jmp roundf # TAILCALL +; X64-LABEL: round_v4f32: +; X64: # %bb.0: +; X64-NEXT: jmp roundf # TAILCALL +; +; X86-LABEL: round_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: calll roundf +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.round.v4f32(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define double @round_v4f64(<4 x double> %x) nounwind { -; CHECK-LABEL: round_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: jmp round # TAILCALL +; X64-LABEL: round_v4f64: +; X64: # %bb.0: +; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; X64-NEXT: vzeroupper +; X64-NEXT: jmp round # TAILCALL +; +; X86-LABEL: round_v4f64: +; X86: # %bb.0: +; X86-NEXT: subl $8, %esp +; X86-NEXT: vmovlps %xmm0, (%esp) +; X86-NEXT: vzeroupper +; X86-NEXT: calll round +; X86-NEXT: addl $8, %esp +; X86-NEXT: retl %v = call <4 x double> @llvm.round.v4f64(<4 x double> %x) %r = extractelement <4 x double> %v, i32 0 ret double %r } define float @rcp_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: rcp_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: rcp_v4f32: +; X64: # %bb.0: +; X64-NEXT: vrcpss %xmm0, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: rcp_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vrcpss %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define float @rcp_v8f32(<8 x float> %x) nounwind { -; CHECK-LABEL: rcp_v8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vrcpps %ymm0, %ymm0 -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: rcp_v8f32: +; X64: # %bb.0: +; X64-NEXT: vrcpps %ymm0, %ymm0 +; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: rcp_v8f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vrcpps %ymm0, %ymm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %x) %r = extractelement <8 x float> %v, i32 0 ret float %r } define float @rsqrt_v4f32(<4 x float> %x) nounwind { -; CHECK-LABEL: rsqrt_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq +; X64-LABEL: rsqrt_v4f32: +; X64: # %bb.0: +; X64-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: rsqrt_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl %v = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 ret float %r } define float @rsqrt_v8f32(<8 x float> %x) nounwind { -; CHECK-LABEL: rsqrt_v8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vrsqrtps %ymm0, %ymm0 -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; X64-LABEL: rsqrt_v8f32: +; X64: # %bb.0: +; X64-NEXT: vrsqrtps %ymm0, %ymm0 +; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: rsqrt_v8f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: vrsqrtps %ymm0, %ymm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: vzeroupper +; X86-NEXT: retl %v = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %x) %r = extractelement <8 x float> %v, i32 0 ret float %r } - declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) declare <4 x float> @llvm.sin.v4f32(<4 x float>) -- 2.50.1