From a7c6d16bf9faa38efd88964dc772fe77f16bd958 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 25 Sep 2016 16:33:57 +0000 Subject: [PATCH] [AVX-512] Add the scalar unsigned integer to fp conversion instructions to hasUndefRegUpdate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282356 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 16 +++++++ test/CodeGen/X86/avx512-cvt.ll | 24 +++++----- test/CodeGen/X86/vec_int_to_fp.ll | 76 +++++++++++++++---------------- 3 files changed, 66 insertions(+), 50 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 14f4102658d..d55f00d3dbd 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -6397,6 +6397,22 @@ static bool hasUndefRegUpdate(unsigned Opcode) { case X86::VCVTSI642SDZrm: case X86::VCVTSI642SDZrr_Int: case X86::VCVTSI642SDZrm_Int: + case X86::VCVTUSI2SSZrr: + case X86::VCVTUSI2SSZrm: + case X86::VCVTUSI2SSZrr_Int: + case X86::VCVTUSI2SSZrm_Int: + case X86::VCVTUSI642SSZrr: + case X86::VCVTUSI642SSZrm: + case X86::VCVTUSI642SSZrr_Int: + case X86::VCVTUSI642SSZrm_Int: + case X86::VCVTUSI2SDZrr: + case X86::VCVTUSI2SDZrm: + case X86::VCVTUSI2SDZrr_Int: + case X86::VCVTUSI2SDZrm_Int: + case X86::VCVTUSI642SDZrr: + case X86::VCVTUSI642SDZrm: + case X86::VCVTUSI642SDZrr_Int: + case X86::VCVTUSI642SDZrm_Int: case X86::VCVTSD2SSZrr: case X86::VCVTSD2SSZrm: case X86::VCVTSS2SDZrr: diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 501e6bcde48..ff4270a283d 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -207,16 +207,16 @@ define <4 x float> @ultof432(<4 x i64> %a) { ; KNL-LABEL: ultof432: ; KNL: ## BB#0: ; KNL-NEXT: vpextrq $1, %xmm0, %rax -; KNL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm1 +; KNL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; KNL-NEXT: vmovq %xmm0, %rax -; KNL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 +; KNL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vmovq %xmm0, %rax -; KNL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 +; KNL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; KNL-NEXT: vpextrq $1, %xmm0, %rax -; KNL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm0 +; KNL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; KNL-NEXT: retq ; @@ -233,27 +233,27 @@ define <8 x double> @ultof64(<8 x i64> %a) { ; KNL: ## BB#0: ; KNL-NEXT: vextracti32x4 $3, %zmm0, %xmm1 ; KNL-NEXT: vpextrq $1, %xmm1, %rax -; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm2 +; KNL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 ; KNL-NEXT: vmovq %xmm1, %rax -; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1 +; KNL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; KNL-NEXT: vpextrq $1, %xmm2, %rax -; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm3 +; KNL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3 ; KNL-NEXT: vmovq %xmm2, %rax -; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm2 +; KNL-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm2 ; KNL-NEXT: vpextrq $1, %xmm2, %rax -; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm3 +; KNL-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3 ; KNL-NEXT: vmovq %xmm2, %rax -; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm2 +; KNL-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; KNL-NEXT: vpextrq $1, %xmm0, %rax -; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm3 +; KNL-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3 ; KNL-NEXT: vmovq %xmm0, %rax -; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 +; KNL-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm0 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 338ca3bc3b1..bbc222a29b2 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -418,9 +418,9 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512-LABEL: uitofp_2i64_to_2f64: ; AVX512: # BB#0: ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 ; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: retq %cvt = uitofp <2 x i64> %a to <2 x double> @@ -465,9 +465,9 @@ define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) { ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 ; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: retq %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> @@ -717,14 +717,14 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512: # BB#0: ; AVX512-NEXT: vextracti32x4 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpextrq $1, %xmm1, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 ; AVX512-NEXT: vmovq %xmm1, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 ; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0 ; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq @@ -1392,11 +1392,11 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512-LABEL: uitofp_2i64_to_4f32: ; AVX512: # BB#0: ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1 ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; AVX512-NEXT: retq @@ -1498,11 +1498,11 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512-LABEL: uitofp_4i64_to_4f32_undef: ; AVX512: # BB#0: ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1 ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; AVX512-NEXT: retq @@ -1872,16 +1872,16 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512-LABEL: uitofp_4i64_to_4f32: ; AVX512: # BB#0: ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2 ; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; AVX512-NEXT: vextracti32x4 $1, %ymm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2 ; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX512-NEXT: retq %cvt = uitofp <4 x i64> %a to <4 x float> @@ -2340,9 +2340,9 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512: # BB#0: ; AVX512-NEXT: vmovdqa64 (%rdi), %xmm0 ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 ; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: retq %ld = load <2 x i64>, <2 x i64> *%a @@ -2391,9 +2391,9 @@ define <2 x double> @uitofp_load_2i32_to_2f64(<2 x i32> *%a) { ; AVX512-NEXT: vpxord %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 ; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: retq %ld = load <2 x i32>, <2 x i32> *%a @@ -2544,14 +2544,14 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512-NEXT: vmovdqa64 (%rdi), %ymm0 ; AVX512-NEXT: vextracti32x4 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpextrq $1, %xmm1, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 ; AVX512-NEXT: vmovq %xmm1, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 ; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0 ; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq @@ -3251,16 +3251,16 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512: # BB#0: ; AVX512-NEXT: vmovdqa64 (%rdi), %ymm0 ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2 ; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; AVX512-NEXT: vextracti32x4 $1, %ymm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2 ; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX512-NEXT: retq %ld = load <4 x i64>, <4 x i64> *%a @@ -3750,28 +3750,28 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 ; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm1 ; AVX512-NEXT: vpextrq $1, %xmm1, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2 ; AVX512-NEXT: vmovq %xmm1, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm1 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm1 ; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] ; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; AVX512-NEXT: vmovq %xmm2, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm3 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm3 ; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] ; AVX512-NEXT: vpextrq $1, %xmm2, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2 ; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm3 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3 ; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] ; AVX512-NEXT: vextracti32x4 $1, %zmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm3 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3 ; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] ; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm0 +; AVX512-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm0 ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] ; AVX512-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq -- 2.50.1