From: Simon Pilgrim Date: Tue, 22 Nov 2016 22:04:50 +0000 (+0000) Subject: [X86][AVX512DQ] Add fp <-> int tests for AVX512DQ/AVX512DQ+VL X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2d1bdd9d616e924d04b9e0dcd91b23833756acb8;p=llvm [X86][AVX512DQ] Add fp <-> int tests for AVX512DQ/AVX512DQ+VL git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287706 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll index 2e9fababef1..bd41fafe3bb 100644 --- a/test/CodeGen/X86/vec_fp_to_int.ll +++ b/test/CodeGen/X86/vec_fp_to_int.ll @@ -1,9 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VLDQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VLDQ ; ; 32-bit tests to make sure we're not doing anything stupid. ; RUN: llc < %s -mtriple=i686-unknown-unknown @@ -26,15 +28,15 @@ define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: fptosi_2f64_to_2i64: -; AVX: # BB#0: -; AVX-NEXT: vcvttsd2si %xmm0, %rax -; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX-NEXT: vcvttsd2si %xmm0, %rax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX-NEXT: retq +; VEX-LABEL: fptosi_2f64_to_2i64: +; VEX: # BB#0: +; VEX-NEXT: vcvttsd2si %xmm0, %rax +; VEX-NEXT: vmovq %rax, %xmm1 +; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; VEX-NEXT: vcvttsd2si %xmm0, %rax +; VEX-NEXT: vmovq %rax, %xmm0 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptosi_2f64_to_2i64: ; AVX512F: # BB#0: @@ -56,6 +58,16 @@ define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptosi_2f64_to_2i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcvttsd2si %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512DQ-NEXT: vcvttsd2si %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0 @@ -74,11 +86,6 @@ define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) { ; AVX: # BB#0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_2f64_to_4i32: -; AVX512: # BB#0: -; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512-NEXT: retq %cvt = fptosi <2 x double> %a to <2 x i32> %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %ext @@ -96,12 +103,6 @@ define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) { ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_2f64_to_2i32: -; AVX512: # BB#0: -; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX512-NEXT: retq %cvt = fptosi <2 x double> %a to <2 x i32> ret <2 x i32> %cvt } @@ -114,12 +115,12 @@ define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) { ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; -; AVX-LABEL: fptosi_4f64_to_2i32: -; AVX: # BB#0: -; AVX-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; VEX-LABEL: fptosi_4f64_to_2i32: +; VEX: # BB#0: +; VEX-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VEX-NEXT: vzeroupper +; VEX-NEXT: retq ; ; AVX512-LABEL: fptosi_4f64_to_2i32: ; AVX512: # BB#0: @@ -222,6 +223,24 @@ define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptosi_4f64_to_4i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512DQ-NEXT: vcvttsd2si %xmm1, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm2 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512DQ-NEXT: vcvttsd2si %xmm1, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512DQ-NEXT: vcvttsd2si %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm2 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512DQ-NEXT: vcvttsd2si %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0 @@ -238,11 +257,11 @@ define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) { ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; -; AVX-LABEL: fptosi_4f64_to_4i32: -; AVX: # BB#0: -; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; VEX-LABEL: fptosi_4f64_to_4i32: +; VEX: # BB#0: +; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VEX-NEXT: vzeroupper +; VEX-NEXT: retq ; ; AVX512-LABEL: fptosi_4f64_to_4i32: ; AVX512: # BB#0: @@ -282,27 +301,27 @@ define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) { ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: fptoui_2f64_to_2i64: -; AVX: # BB#0: -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vcvttsd2si %xmm2, %rax -; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttsd2si %xmm0, %rdx -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rdx -; AVX-NEXT: vmovq %rdx, %xmm2 -; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 -; AVX-NEXT: vcvttsd2si %xmm3, %rax -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttsd2si %xmm0, %rcx -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; AVX-NEXT: retq +; VEX-LABEL: fptoui_2f64_to_2i64: +; VEX: # BB#0: +; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcvttsd2si %xmm2, %rax +; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rdx +; VEX-NEXT: vucomisd %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rdx +; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcvttsd2si %xmm3, %rax +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rcx +; VEX-NEXT: vucomisd %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm0 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f64_to_2i64: ; AVX512F: # BB#0: @@ -324,6 +343,16 @@ define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) { ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_2f64_to_2i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0 @@ -359,29 +388,29 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) { ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero ; SSE-NEXT: retq ; -; AVX-LABEL: fptoui_2f64_to_4i32: -; AVX: # BB#0: -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vcvttsd2si %xmm2, %rax -; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttsd2si %xmm0, %rdx -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rdx -; AVX-NEXT: vmovq %rdx, %xmm2 -; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 -; AVX-NEXT: vcvttsd2si %xmm3, %rax -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttsd2si %xmm0, %rcx -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; AVX-NEXT: retq +; VEX-LABEL: fptoui_2f64_to_4i32: +; VEX: # BB#0: +; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcvttsd2si %xmm2, %rax +; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rdx +; VEX-NEXT: vucomisd %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rdx +; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcvttsd2si %xmm3, %rax +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rcx +; VEX-NEXT: vucomisd %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm0 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f64_to_4i32: ; AVX512F: # BB#0: @@ -395,6 +424,13 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) { ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_2f64_to_4i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 +; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 @@ -430,28 +466,28 @@ define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) { ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] ; SSE-NEXT: retq ; -; AVX-LABEL: fptoui_2f64_to_2i32: -; AVX: # BB#0: -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vcvttsd2si %xmm2, %rax -; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttsd2si %xmm0, %rdx -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rdx -; AVX-NEXT: vmovq %rdx, %xmm2 -; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 -; AVX-NEXT: vcvttsd2si %xmm3, %rax -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttsd2si %xmm0, %rcx -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: retq +; VEX-LABEL: fptoui_2f64_to_2i32: +; VEX: # BB#0: +; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcvttsd2si %xmm2, %rax +; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rdx +; VEX-NEXT: vucomisd %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rdx +; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcvttsd2si %xmm3, %rax +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rcx +; VEX-NEXT: vucomisd %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm0 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f64_to_2i32: ; AVX512F: # BB#0: @@ -465,6 +501,13 @@ define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) { ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_2f64_to_2i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 @@ -508,17 +551,17 @@ define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) { ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; -; AVX-LABEL: fptoui_4f64_to_2i32: -; AVX: # BB#0: -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vcvttsd2si %xmm1, %rax -; AVX-NEXT: vcvttsd2si %xmm0, %rcx -; AVX-NEXT: vmovd %ecx, %xmm0 -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX-NEXT: vcvttsd2si %xmm0, %rax -; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 -; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; AVX-NEXT: retq +; VEX-LABEL: fptoui_4f64_to_2i32: +; VEX: # BB#0: +; VEX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; VEX-NEXT: vcvttsd2si %xmm1, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rcx +; VEX-NEXT: vmovd %ecx, %xmm0 +; VEX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; VEX-NEXT: vcvttsd2si %xmm0, %rax +; VEX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; VEX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_4f64_to_2i32: ; AVX512F: # BB#0: @@ -533,6 +576,13 @@ define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) { ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_4f64_to_2i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 @@ -703,6 +753,24 @@ define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) { ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_4f64_to_4i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512DQ-NEXT: vcvttsd2usi %xmm1, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm2 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512DQ-NEXT: vcvttsd2usi %xmm1, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm2 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttpd2uqq %ymm0, %ymm0 @@ -757,21 +825,21 @@ define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) { ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; -; AVX-LABEL: fptoui_4f64_to_4i32: -; AVX: # BB#0: -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vcvttsd2si %xmm1, %rax -; AVX-NEXT: vcvttsd2si %xmm0, %rcx -; AVX-NEXT: vmovd %ecx, %xmm1 -; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX-NEXT: vcvttsd2si %xmm0, %rax -; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX-NEXT: vcvttsd2si %xmm0, %rax -; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; VEX-LABEL: fptoui_4f64_to_4i32: +; VEX: # BB#0: +; VEX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; VEX-NEXT: vcvttsd2si %xmm1, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rcx +; VEX-NEXT: vmovd %ecx, %xmm1 +; VEX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; VEX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; VEX-NEXT: vcvttsd2si %xmm0, %rax +; VEX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; VEX-NEXT: vcvttsd2si %xmm0, %rax +; VEX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; VEX-NEXT: vzeroupper +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_4f64_to_4i32: ; AVX512F: # BB#0: @@ -785,6 +853,13 @@ define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) { ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_4f64_to_4i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0 @@ -809,12 +884,6 @@ define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) { ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_2f32_to_2i32: -; AVX512: # BB#0: -; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX512-NEXT: retq %cvt = fptosi <2 x float> %a to <2 x i32> ret <2 x i32> %cvt } @@ -829,11 +898,6 @@ define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) { ; AVX: # BB#0: ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_4f32_to_4i32: -; AVX512: # BB#0: -; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512-NEXT: retq %cvt = fptosi <4 x float> %a to <4 x i32> ret <4 x i32> %cvt } @@ -859,16 +923,6 @@ define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) { ; AVX-NEXT: vmovq %rax, %xmm0 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_2f32_to_2i64: -; AVX512: # BB#0: -; AVX512-NEXT: vcvttss2si %xmm0, %rax -; AVX512-NEXT: vmovq %rax, %xmm1 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX512-NEXT: vcvttss2si %xmm0, %rax -; AVX512-NEXT: vmovq %rax, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512-NEXT: retq %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> %cvt = fptosi <2 x float> %shuf to <2 x i64> ret <2 x i64> %cvt @@ -886,15 +940,15 @@ define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) { ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: fptosi_4f32_to_2i64: -; AVX: # BB#0: -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vcvttss2si %xmm1, %rax -; AVX-NEXT: vcvttss2si %xmm0, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 -; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX-NEXT: retq +; VEX-LABEL: fptosi_4f32_to_2i64: +; VEX: # BB#0: +; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; VEX-NEXT: vcvttss2si %xmm1, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: vmovq %rcx, %xmm0 +; VEX-NEXT: vmovq %rax, %xmm1 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptosi_4f32_to_2i64: ; AVX512F: # BB#0: @@ -916,6 +970,16 @@ define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) { ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptosi_4f32_to_2i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512DQ-NEXT: vcvttss2si %xmm1, %rax +; AVX512DQ-NEXT: vcvttss2si %xmm0, %rcx +; AVX512DQ-NEXT: vmovq %rcx, %xmm0 +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0 @@ -937,11 +1001,6 @@ define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) { ; AVX: # BB#0: ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_8f32_to_8i32: -; AVX512: # BB#0: -; AVX512-NEXT: vcvttps2dq %ymm0, %ymm0 -; AVX512-NEXT: retq %cvt = fptosi <8 x float> %a to <8 x i32> ret <8 x i32> %cvt } @@ -1039,6 +1098,24 @@ define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) { ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptosi_4f32_to_4i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] +; AVX512DQ-NEXT: vcvttss2si %xmm1, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512DQ-NEXT: vcvttss2si %xmm2, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm2 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512DQ-NEXT: vcvttss2si %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm2 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512DQ-NEXT: vcvttss2si %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0 @@ -1141,6 +1218,12 @@ define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) { ; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptosi_8f32_to_4i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttps2qq %ymm0, %zmm0 @@ -1181,27 +1264,27 @@ define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) { ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: fptoui_2f32_to_2i32: -; AVX: # BB#0: -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vcvttss2si %xmm2, %rax -; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttss2si %xmm0, %rdx -; AVX-NEXT: vucomiss %xmm1, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rdx -; AVX-NEXT: vmovq %rdx, %xmm2 -; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 -; AVX-NEXT: vcvttss2si %xmm3, %rax -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttss2si %xmm0, %rcx -; AVX-NEXT: vucomiss %xmm1, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; AVX-NEXT: retq +; VEX-LABEL: fptoui_2f32_to_2i32: +; VEX: # BB#0: +; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcvttss2si %xmm2, %rax +; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rdx +; VEX-NEXT: vucomiss %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rdx +; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcvttss2si %xmm3, %rax +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: vucomiss %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm0 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f32_to_2i32: ; AVX512F: # BB#0: @@ -1216,6 +1299,13 @@ define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) { ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_2f32_to_2i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0 @@ -1247,20 +1337,20 @@ define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) { ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: fptoui_4f32_to_4i32: -; AVX: # BB#0: -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vcvttss2si %xmm1, %rax -; AVX-NEXT: vcvttss2si %xmm0, %rcx -; AVX-NEXT: vmovd %ecx, %xmm1 -; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX-NEXT: vcvttss2si %xmm2, %rax -; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX-NEXT: vcvttss2si %xmm0, %rax -; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 -; AVX-NEXT: retq +; VEX-LABEL: fptoui_4f32_to_4i32: +; VEX: # BB#0: +; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; VEX-NEXT: vcvttss2si %xmm1, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: vmovd %ecx, %xmm1 +; VEX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; VEX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; VEX-NEXT: vcvttss2si %xmm2, %rax +; VEX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; VEX-NEXT: vcvttss2si %xmm0, %rax +; VEX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_4f32_to_4i32: ; AVX512F: # BB#0: @@ -1274,6 +1364,13 @@ define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) { ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_4f32_to_4i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0 @@ -1308,27 +1405,27 @@ define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) { ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: fptoui_2f32_to_2i64: -; AVX: # BB#0: -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vcvttss2si %xmm2, %rax -; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttss2si %xmm0, %rdx -; AVX-NEXT: vucomiss %xmm1, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rdx -; AVX-NEXT: vmovq %rdx, %xmm2 -; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 -; AVX-NEXT: vcvttss2si %xmm3, %rax -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttss2si %xmm0, %rcx -; AVX-NEXT: vucomiss %xmm1, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; AVX-NEXT: retq +; VEX-LABEL: fptoui_2f32_to_2i64: +; VEX: # BB#0: +; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcvttss2si %xmm2, %rax +; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rdx +; VEX-NEXT: vucomiss %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rdx +; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcvttss2si %xmm3, %rax +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: vucomiss %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm0 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; VEX-NEXT: retq ; ; AVX512-LABEL: fptoui_2f32_to_2i64: ; AVX512: # BB#0: @@ -1370,27 +1467,27 @@ define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) { ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: fptoui_4f32_to_2i64: -; AVX: # BB#0: -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX-NEXT: vsubss %xmm2, %xmm1, %xmm3 -; AVX-NEXT: vcvttss2si %xmm3, %rax -; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttss2si %xmm1, %rdx -; AVX-NEXT: vucomiss %xmm2, %xmm1 -; AVX-NEXT: cmovaeq %rax, %rdx -; AVX-NEXT: vsubss %xmm2, %xmm0, %xmm1 -; AVX-NEXT: vcvttss2si %xmm1, %rax -; AVX-NEXT: xorq %rcx, %rax -; AVX-NEXT: vcvttss2si %xmm0, %rcx -; AVX-NEXT: vucomiss %xmm2, %xmm0 -; AVX-NEXT: cmovaeq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 -; AVX-NEXT: vmovq %rdx, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX-NEXT: retq +; VEX-LABEL: fptoui_4f32_to_2i64: +; VEX: # BB#0: +; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; VEX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3 +; VEX-NEXT: vcvttss2si %xmm3, %rax +; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttss2si %xmm1, %rdx +; VEX-NEXT: vucomiss %xmm2, %xmm1 +; VEX-NEXT: cmovaeq %rax, %rdx +; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1 +; VEX-NEXT: vcvttss2si %xmm1, %rax +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: vucomiss %xmm2, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm0 +; VEX-NEXT: vmovq %rdx, %xmm1 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_4f32_to_2i64: ; AVX512F: # BB#0: @@ -1412,6 +1509,16 @@ define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) { ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_4f32_to_2i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512DQ-NEXT: vcvttss2usi %xmm1, %rax +; AVX512DQ-NEXT: vcvttss2usi %xmm0, %rcx +; AVX512DQ-NEXT: vmovq %rcx, %xmm0 +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0 @@ -1528,6 +1635,13 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { ; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_8f32_to_8i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttps2udq %ymm0, %ymm0 @@ -1699,6 +1813,24 @@ define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) { ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_4f32_to_4i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] +; AVX512DQ-NEXT: vcvttss2usi %xmm1, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512DQ-NEXT: vcvttss2usi %xmm2, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm2 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512DQ-NEXT: vcvttss2usi %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm2 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512DQ-NEXT: vcvttss2usi %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0 @@ -1871,6 +2003,12 @@ define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) { ; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptoui_8f32_to_4i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvttps2uqq %ymm0, %zmm0 @@ -1895,11 +2033,6 @@ define <2 x i64> @fptosi_2f64_to_2i64_const() { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_2f64_to_2i64_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615] -; AVX512-NEXT: retq %cvt = fptosi <2 x double> to <2 x i64> ret <2 x i64> %cvt } @@ -1914,11 +2047,6 @@ define <4 x i32> @fptosi_2f64_to_2i32_const() { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u> ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_2f64_to_2i32_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u> -; AVX512-NEXT: retq %cvt = fptosi <2 x double> to <2 x i32> %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> ret <4 x i32> %ext @@ -1935,11 +2063,6 @@ define <4 x i64> @fptosi_4f64_to_4i64_const() { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_4f64_to_4i64_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613] -; AVX512-NEXT: retq %cvt = fptosi <4 x double> to <4 x i64> ret <4 x i64> %cvt } @@ -1954,11 +2077,6 @@ define <4 x i32> @fptosi_4f64_to_4i32_const() { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_4f64_to_4i32_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] -; AVX512-NEXT: retq %cvt = fptosi <4 x double> to <4 x i32> ret <4 x i32> %cvt } @@ -1973,11 +2091,6 @@ define <2 x i64> @fptoui_2f64_to_2i64_const() { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptoui_2f64_to_2i64_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [2,4] -; AVX512-NEXT: retq %cvt = fptoui <2 x double> to <2 x i64> ret <2 x i64> %cvt } @@ -1992,11 +2105,6 @@ define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u> ; AVX-NEXT: retq -; -; AVX512-LABEL: fptoui_2f64_to_2i32_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u> -; AVX512-NEXT: retq %cvt = fptoui <2 x double> to <2 x i32> %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> ret <4 x i32> %ext @@ -2013,11 +2121,6 @@ define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptoui_4f64_to_4i64_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8] -; AVX512-NEXT: retq %cvt = fptoui <4 x double> to <4 x i64> ret <4 x i64> %cvt } @@ -2032,11 +2135,6 @@ define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptoui_4f64_to_4i32_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8] -; AVX512-NEXT: retq %cvt = fptoui <4 x double> to <4 x i32> ret <4 x i32> %cvt } @@ -2051,11 +2149,6 @@ define <4 x i32> @fptosi_4f32_to_4i32_const() { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_4f32_to_4i32_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3] -; AVX512-NEXT: retq %cvt = fptosi <4 x float> to <4 x i32> ret <4 x i32> %cvt } @@ -2071,11 +2164,6 @@ define <4 x i64> @fptosi_4f32_to_4i64_const() { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_4f32_to_4i64_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3] -; AVX512-NEXT: retq %cvt = fptosi <4 x float> to <4 x i64> ret <4 x i64> %cvt } @@ -2091,11 +2179,6 @@ define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_8f32_to_8i32_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295] -; AVX512-NEXT: retq %cvt = fptosi <8 x float> to <8 x i32> ret <8 x i32> %cvt } @@ -2110,11 +2193,6 @@ define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptoui_4f32_to_4i32_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6] -; AVX512-NEXT: retq %cvt = fptoui <4 x float> to <4 x i32> ret <4 x i32> %cvt } @@ -2130,11 +2208,6 @@ define <4 x i64> @fptoui_4f32_to_4i64_const() { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptoui_4f32_to_4i64_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8] -; AVX512-NEXT: retq %cvt = fptoui <4 x float> to <4 x i64> ret <4 x i64> %cvt } @@ -2150,11 +2223,6 @@ define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1] ; AVX-NEXT: retq -; -; AVX512-LABEL: fptoui_8f32_to_8i32_const: -; AVX512: # BB#0: -; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1] -; AVX512-NEXT: retq %cvt = fptoui <8 x float> to <8 x i32> ret <8 x i32> %cvt } @@ -2187,28 +2255,28 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind { ; SSE-NEXT: popq %rax ; SSE-NEXT: retq ; -; AVX-LABEL: fptosi_2f16_to_4i32: -; AVX: # BB#0: -; AVX-NEXT: pushq %rax -; AVX-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) # 4-byte Spill -; AVX-NEXT: callq __gnu_f2h_ieee -; AVX-NEXT: movzwl %ax, %edi -; AVX-NEXT: callq __gnu_h2f_ieee -; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX-NEXT: vmovss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload -; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: callq __gnu_f2h_ieee -; AVX-NEXT: movzwl %ax, %edi -; AVX-NEXT: callq __gnu_h2f_ieee -; AVX-NEXT: vcvttss2si %xmm0, %rax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vcvttss2si (%rsp), %rax # 4-byte Folded Reload -; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; AVX-NEXT: popq %rax -; AVX-NEXT: retq +; VEX-LABEL: fptosi_2f16_to_4i32: +; VEX: # BB#0: +; VEX-NEXT: pushq %rax +; VEX-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) # 4-byte Spill +; VEX-NEXT: callq __gnu_f2h_ieee +; VEX-NEXT: movzwl %ax, %edi +; VEX-NEXT: callq __gnu_h2f_ieee +; VEX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; VEX-NEXT: vmovss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload +; VEX-NEXT: # xmm0 = mem[0],zero,zero,zero +; VEX-NEXT: callq __gnu_f2h_ieee +; VEX-NEXT: movzwl %ax, %edi +; VEX-NEXT: callq __gnu_h2f_ieee +; VEX-NEXT: vcvttss2si %xmm0, %rax +; VEX-NEXT: vmovq %rax, %xmm0 +; VEX-NEXT: vcvttss2si (%rsp), %rax # 4-byte Folded Reload +; VEX-NEXT: vmovq %rax, %xmm1 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; VEX-NEXT: popq %rax +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptosi_2f16_to_4i32: ; AVX512F: # BB#0: @@ -2242,6 +2310,23 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind { ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptosi_2f16_to_4i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; AVX512DQ-NEXT: vcvtph2ps %ymm0, %zmm0 +; AVX512DQ-NEXT: vcvtps2ph $4, %zmm1, %ymm1 +; AVX512DQ-NEXT: vcvtph2ps %ymm1, %zmm1 +; AVX512DQ-NEXT: vcvttss2si %xmm1, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vcvttss2si %xmm0, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptosi_2f16_to_4i32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -2299,19 +2384,6 @@ define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind { ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: retq -; -; AVX512-LABEL: fptosi_2f80_to_4i32: -; AVX512: # BB#0: -; AVX512-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX512-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX512-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; AVX512-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; AVX512-NEXT: retq %cvt = fptosi <2 x x86_fp80> %a to <2 x i32> %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %ext @@ -2343,30 +2415,30 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind { ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq ; -; AVX-LABEL: fptosi_2f128_to_4i32: -; AVX: # BB#0: -; AVX-NEXT: pushq %r14 -; AVX-NEXT: pushq %rbx -; AVX-NEXT: subq $24, %rsp -; AVX-NEXT: movq %rsi, %r14 -; AVX-NEXT: movq %rdi, %rbx -; AVX-NEXT: movq %rdx, %rdi -; AVX-NEXT: movq %rcx, %rsi -; AVX-NEXT: callq __fixtfdi -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill -; AVX-NEXT: movq %rbx, %rdi -; AVX-NEXT: movq %r14, %rsi -; AVX-NEXT: callq __fixtfdi -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload -; AVX-NEXT: # xmm0 = xmm0[0],mem[0] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; AVX-NEXT: addq $24, %rsp -; AVX-NEXT: popq %rbx -; AVX-NEXT: popq %r14 -; AVX-NEXT: retq +; VEX-LABEL: fptosi_2f128_to_4i32: +; VEX: # BB#0: +; VEX-NEXT: pushq %r14 +; VEX-NEXT: pushq %rbx +; VEX-NEXT: subq $24, %rsp +; VEX-NEXT: movq %rsi, %r14 +; VEX-NEXT: movq %rdi, %rbx +; VEX-NEXT: movq %rdx, %rdi +; VEX-NEXT: movq %rcx, %rsi +; VEX-NEXT: callq __fixtfdi +; VEX-NEXT: vmovq %rax, %xmm0 +; VEX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; VEX-NEXT: movq %rbx, %rdi +; VEX-NEXT: movq %r14, %rsi +; VEX-NEXT: callq __fixtfdi +; VEX-NEXT: vmovq %rax, %xmm0 +; VEX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; VEX-NEXT: # xmm0 = xmm0[0],mem[0] +; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; VEX-NEXT: addq $24, %rsp +; VEX-NEXT: popq %rbx +; VEX-NEXT: popq %r14 +; VEX-NEXT: retq ; ; AVX512F-LABEL: fptosi_2f128_to_4i32: ; AVX512F: # BB#0: @@ -2418,6 +2490,31 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind { ; AVX512VL-NEXT: popq %r14 ; AVX512VL-NEXT: retq ; +; AVX512DQ-LABEL: fptosi_2f128_to_4i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: pushq %r14 +; AVX512DQ-NEXT: pushq %rbx +; AVX512DQ-NEXT: subq $24, %rsp +; AVX512DQ-NEXT: movq %rsi, %r14 +; AVX512DQ-NEXT: movq %rdi, %rbx +; AVX512DQ-NEXT: movq %rdx, %rdi +; AVX512DQ-NEXT: movq %rcx, %rsi +; AVX512DQ-NEXT: callq __fixtfdi +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512DQ-NEXT: movq %rbx, %rdi +; AVX512DQ-NEXT: movq %r14, %rsi +; AVX512DQ-NEXT: callq __fixtfdi +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512DQ-NEXT: addq $24, %rsp +; AVX512DQ-NEXT: popq %rbx +; AVX512DQ-NEXT: popq %r14 +; AVX512DQ-NEXT: retq +; ; AVX512VLDQ-LABEL: fptosi_2f128_to_4i32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: pushq %r14 diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 303971643c4..bbcd228f3d2 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -3,7 +3,9 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VLDQ ; ; 32-bit tests to make sure we're not doing anything stupid. ; RUN: llc < %s -mtriple=i686-unknown-unknown @@ -27,14 +29,46 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: sitofp_2i64_to_2f64: -; AVX: # BB#0: -; AVX-NEXT: vpextrq $1, %xmm0, %rax -; AVX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX-NEXT: retq +; VEX-LABEL: sitofp_2i64_to_2f64: +; VEX: # BB#0: +; VEX-NEXT: vpextrq $1, %xmm0, %rax +; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 +; VEX-NEXT: vmovq %xmm0, %rax +; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 +; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: retq +; +; AVX512F-LABEL: sitofp_2i64_to_2f64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: sitofp_2i64_to_2f64: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax +; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_2i64_to_2f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_2i64_to_2f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq %cvt = sitofp <2 x i64> %a to <2 x double> ret <2 x double> %cvt } @@ -269,6 +303,27 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_4i64_to_4f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vmovq %xmm1, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512DQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_4i64_to_4f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0 +; AVX512VLDQ-NEXT: retq %cvt = sitofp <4 x i64> %a to <4 x double> ret <4 x double> %cvt } @@ -431,14 +486,37 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { ; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0 ; VEX-NEXT: retq ; -; AVX512-LABEL: uitofp_2i64_to_2f64: -; AVX512: # BB#0: -; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 -; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512-NEXT: retq +; AVX512F-LABEL: uitofp_2i64_to_2f64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: uitofp_2i64_to_2f64: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_2i64_to_2f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_2i64_to_2f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq %cvt = uitofp <2 x i64> %a to <2 x double> ret <2 x double> %cvt } @@ -486,6 +564,18 @@ define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_2i32_to_2f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_2i32_to_2f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %cvt = uitofp <2 x i32> %shuf to <2 x double> ret <2 x double> %cvt @@ -548,6 +638,19 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 ; AVX512VL-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_4i32_to_2f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_4i32_to_2f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0 +; AVX512VLDQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512VLDQ-NEXT: retq %cvt = uitofp <4 x i32> %a to <4 x double> %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> ret <2 x double> %shuf @@ -759,6 +862,27 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_4i64_to_4f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vmovq %xmm1, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512DQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_4i64_to_4f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtuqq2pd %ymm0, %ymm0 +; AVX512VLDQ-NEXT: retq %cvt = uitofp <4 x i64> %a to <4 x double> ret <4 x double> %cvt } @@ -827,6 +951,18 @@ define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_4i32_to_4f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_4i32_to_4f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0 +; AVX512VLDQ-NEXT: retq %cvt = uitofp <4 x i32> %a to <4 x double> ret <4 x double> %cvt } @@ -960,17 +1096,59 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: sitofp_2i64_to_4f32: -; AVX: # BB#0: -; AVX-NEXT: vpextrq $1, %xmm0, %rax -; AVX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] -; AVX-NEXT: retq +; VEX-LABEL: sitofp_2i64_to_4f32: +; VEX: # BB#0: +; VEX-NEXT: vpextrq $1, %xmm0, %rax +; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; VEX-NEXT: vmovq %xmm0, %rax +; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; VEX-NEXT: retq +; +; AVX512F-LABEL: sitofp_2i64_to_4f32: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: sitofp_2i64_to_4f32: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax +; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_2i64_to_4f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: retq %cvt = sitofp <2 x i64> %a to <2 x float> %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> ret <4 x float> %ext @@ -992,17 +1170,59 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: sitofp_4i64_to_4f32_undef: -; AVX: # BB#0: -; AVX-NEXT: vpextrq $1, %xmm0, %rax -; AVX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] -; AVX-NEXT: retq +; VEX-LABEL: sitofp_4i64_to_4f32_undef: +; VEX: # BB#0: +; VEX-NEXT: vpextrq $1, %xmm0, %rax +; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; VEX-NEXT: vmovq %xmm0, %rax +; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; VEX-NEXT: retq +; +; AVX512F-LABEL: sitofp_4i64_to_4f32_undef: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax +; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32_undef: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: retq %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> %cvt = sitofp <4 x i64> %ext to <4 x float> ret <4 x float> %cvt @@ -1222,6 +1442,27 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_4i64_to_4f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: retq %cvt = sitofp <4 x i64> %a to <4 x float> ret <4 x float> %cvt } @@ -1443,17 +1684,47 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; VEX-NEXT: retq ; -; AVX512-LABEL: uitofp_2i64_to_4f32: -; AVX512: # BB#0: -; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 -; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1 -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] -; AVX512-NEXT: retq +; AVX512F-LABEL: uitofp_2i64_to_4f32: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: uitofp_2i64_to_4f32: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1 +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_2i64_to_4f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_2i64_to_4f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: retq %cvt = uitofp <2 x i64> %a to <2 x float> %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> ret <4 x float> %ext @@ -1549,17 +1820,47 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; VEX-NEXT: retq ; -; AVX512-LABEL: uitofp_4i64_to_4f32_undef: -; AVX512: # BB#0: -; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 -; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1 -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] -; AVX512-NEXT: retq +; AVX512F-LABEL: uitofp_4i64_to_4f32_undef: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1 +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32_undef: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: retq %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> %cvt = uitofp <4 x i64> %ext to <4 x float> ret <4 x float> %cvt @@ -1609,6 +1910,18 @@ define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_4i32_to_4f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_4i32_to_4f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq %cvt = uitofp <4 x i32> %a to <4 x float> ret <4 x float> %cvt } @@ -1961,6 +2274,27 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_4i64_to_4f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: retq %cvt = uitofp <4 x i64> %a to <4 x float> ret <4 x float> %cvt } @@ -2023,6 +2357,18 @@ define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_8i32_to_8f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_8i32_to_8f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512VLDQ-NEXT: retq %cvt = uitofp <8 x i32> %a to <8 x float> ret <8 x float> %cvt } @@ -2186,6 +2532,21 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_load_2i64_to_2f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %xmm0 +; AVX512VLDQ-NEXT: retq %ld = load <2 x i64>, <2 x i64> *%a %cvt = sitofp <2 x i64> %ld to <2 x double> ret <2 x double> %cvt @@ -2213,6 +2574,18 @@ define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) { ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_load_2i32_to_2f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcvtdq2pd (%rdi), %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_load_2i32_to_2f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq %ld = load <2 x i32>, <2 x i32> *%a %cvt = sitofp <2 x i32> %ld to <2 x double> ret <2 x double> %cvt @@ -2349,6 +2722,28 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_load_4i64_to_4f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vmovq %xmm1, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512DQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %ymm0 +; AVX512VLDQ-NEXT: retq %ld = load <4 x i64>, <4 x i64> *%a %cvt = sitofp <4 x i64> %ld to <4 x double> ret <4 x double> %cvt @@ -2469,6 +2864,21 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_load_2i64_to_2f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %xmm0 +; AVX512VLDQ-NEXT: retq %ld = load <2 x i64>, <2 x i64> *%a %cvt = uitofp <2 x i64> %ld to <2 x double> ret <2 x double> %cvt @@ -2520,6 +2930,20 @@ define <2 x double> @uitofp_load_2i32_to_2f64(<2 x i32> *%a) { ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_load_2i32_to_2f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_load_2i32_to_2f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq %ld = load <2 x i32>, <2 x i32> *%a %cvt = uitofp <2 x i32> %ld to <2 x double> ret <2 x double> %cvt @@ -2556,6 +2980,22 @@ define <2 x double> @uitofp_load_2i16_to_2f64(<2 x i16> *%a) { ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7] ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_load_2i16_to_2f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_load_2i16_to_2f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512VLDQ-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VLDQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7] +; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq %ld = load <2 x i16>, <2 x i16> *%a %cvt = uitofp <2 x i16> %ld to <2 x double> ret <2 x double> %cvt @@ -2594,6 +3034,21 @@ define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) { ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u],zero,zero,zero,xmm0[u],zero,zero,zero ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_load_2i8_to_2f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: movzwl (%rdi), %eax +; AVX512DQ-NEXT: vmovd %eax, %xmm0 +; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_load_2i8_to_2f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u],zero,zero,zero,xmm0[u],zero,zero,zero +; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq %ld = load <2 x i8>, <2 x i8> *%a %cvt = uitofp <2 x i8> %ld to <2 x double> ret <2 x double> %cvt @@ -2703,6 +3158,28 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_load_4i64_to_4f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vmovq %xmm1, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0 +; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512DQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %ymm0 +; AVX512VLDQ-NEXT: retq %ld = load <4 x i64>, <4 x i64> *%a %cvt = uitofp <4 x i64> %ld to <4 x double> ret <4 x double> %cvt @@ -2775,6 +3252,18 @@ define <4 x double> @uitofp_load_4i32_to_4f64(<4 x i32> *%a) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vcvtudq2pd (%rdi), %ymm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_load_4i32_to_4f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 +; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f64: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %ymm0 +; AVX512VLDQ-NEXT: retq %ld = load <4 x i32>, <4 x i32> *%a %cvt = uitofp <4 x i32> %ld to <4 x double> ret <4 x double> %cvt @@ -2918,6 +3407,28 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_load_4i64_to_4f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0 +; AVX512VLDQ-NEXT: retq %ld = load <4 x i64>, <4 x i64> *%a %cvt = sitofp <4 x i64> %ld to <4 x float> ret <4 x float> %cvt @@ -3141,6 +3652,16 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_load_8i64_to_8f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcvtqq2ps (%rdi), %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_load_8i64_to_8f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtqq2ps (%rdi), %ymm0 +; AVX512VLDQ-NEXT: retq %ld = load <8 x i64>, <8 x i64> *%a %cvt = sitofp <8 x i64> %ld to <8 x float> ret <8 x float> %cvt @@ -3483,6 +4004,28 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_load_4i64_to_4f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtuqq2psy (%rdi), %xmm0 +; AVX512VLDQ-NEXT: retq %ld = load <4 x i64>, <4 x i64> *%a %cvt = uitofp <4 x i64> %ld to <4 x float> ret <4 x float> %cvt @@ -3535,6 +4078,18 @@ define <4 x float> @uitofp_load_4i32_to_4f32(<4 x i32> *%a) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vcvtudq2ps (%rdi), %xmm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_load_4i32_to_4f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 +; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %xmm0 +; AVX512VLDQ-NEXT: retq %ld = load <4 x i32>, <4 x i32> *%a %cvt = uitofp <4 x i32> %ld to <4 x float> ret <4 x float> %cvt @@ -4033,6 +4588,16 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_load_8i64_to_8f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcvtuqq2ps (%rdi), %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_load_8i64_to_8f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtuqq2ps (%rdi), %ymm0 +; AVX512VLDQ-NEXT: retq %ld = load <8 x i64>, <8 x i64> *%a %cvt = uitofp <8 x i64> %ld to <8 x float> ret <8 x float> %cvt @@ -4100,6 +4665,18 @@ define <8 x float> @uitofp_load_8i32_to_8f32(<8 x i32> *%a) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vcvtudq2ps (%rdi), %ymm0 ; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_load_8i32_to_8f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 +; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_load_8i32_to_8f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %ymm0 +; AVX512VLDQ-NEXT: retq %ld = load <8 x i32>, <8 x i32> *%a %cvt = uitofp <8 x i32> %ld to <8 x float> ret <8 x float> %cvt