From 483b6a88f82c7d74cd2f9f8b8a7672f93935b652 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 23 Feb 2019 00:34:58 +0000 Subject: [PATCH] [X86] Add a few test cases for a v8i64 sext/zext from an illegal type that needs to be promoted to 128 bits. If v8i64 isn't a legal type but v4i64 is, these will be split and then each half will get their input promoted and become an any_extend_vector_inreg/punpckhwd + any_extend + and/sign_extend_inreg. If we instead recognize the input will be promoted we can emit the and/sign_extend_inreg first in a 128 bit register. Then we can sign_extend/zero_extend one half and pshufd+sign_extend/zero_extend the other half. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354708 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vector-sext-widen.ll | 266 ++++++++++++++++++++++++++ test/CodeGen/X86/vector-sext.ll | 266 ++++++++++++++++++++++++++ test/CodeGen/X86/vector-zext-widen.ll | 113 +++++++++++ test/CodeGen/X86/vector-zext.ll | 113 +++++++++++ 4 files changed, 758 insertions(+) diff --git a/test/CodeGen/X86/vector-sext-widen.ll b/test/CodeGen/X86/vector-sext-widen.ll index f8c55bb26e4..b8a2293949a 100644 --- a/test/CodeGen/X86/vector-sext-widen.ll +++ b/test/CodeGen/X86/vector-sext-widen.ll @@ -6057,3 +6057,269 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { %b = sext <4 x i17> %a to <4 x i32> ret <4 x i32> %b } + +define <8 x i64> @sext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_8i6_to_8i64: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSE2-NEXT: psllq $58, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; SSE2-NEXT: psrad $26, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7] +; SSE2-NEXT: psllq $58, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; SSE2-NEXT: psrad $26, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7] +; SSE2-NEXT: psllq $58, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: psrad $31, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; SSE2-NEXT: psrad $26, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7] +; SSE2-NEXT: psllq $58, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: psrad $31, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; SSE2-NEXT: psrad $26, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_8i6_to_8i64: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movd %edi, %xmm0 +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: psllq $58, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: psrad $31, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; SSSE3-NEXT: psrad $26, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: psllq $58, %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: psrad $31, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; SSSE3-NEXT: psrad $26, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: psllq $58, %xmm2 +; SSSE3-NEXT: movdqa %xmm2, %xmm4 +; SSSE3-NEXT: psrad $31, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; SSSE3-NEXT: psrad $26, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: psllq $58, %xmm3 +; SSSE3-NEXT: movdqa %xmm3, %xmm4 +; SSSE3-NEXT: psrad $31, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; SSSE3-NEXT: psrad $26, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_8i6_to_8i64: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; SSE41-NEXT: psllq $58, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrad $31, %xmm1 +; SSE41-NEXT: psrad $26, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,2,3] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; SSE41-NEXT: psllq $58, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrad $31, %xmm2 +; SSE41-NEXT: psrad $26, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; SSE41-NEXT: psllq $58, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: psrad $31, %xmm4 +; SSE41-NEXT: psrad $26, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; SSE41-NEXT: psllq $58, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: psrad $31, %xmm4 +; SSE41-NEXT: psrad $26, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: sext_8i6_to_8i64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7] +; AVX1-NEXT: vpslld $26, %xmm1, %xmm1 +; AVX1-NEXT: vpsrad $26, %xmm1, %xmm1 +; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vpslld $26, %xmm0, %xmm0 +; AVX1-NEXT: vpsrad $26, %xmm0, %xmm0 +; AVX1-NEXT: vpmovsxdq %xmm0, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: sext_8i6_to_8i64: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovd %edi, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vpslld $26, %xmm0, %xmm0 +; AVX2-NEXT: vpsrad $26, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 +; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX2-NEXT: vpslld $26, %xmm1, %xmm1 +; AVX2-NEXT: vpsrad $26, %xmm1, %xmm1 +; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 +; AVX2-NEXT: retq +; +; AVX512-LABEL: sext_8i6_to_8i64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovd %edi, %xmm0 +; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512-NEXT: vpsllq $58, %zmm0, %zmm0 +; AVX512-NEXT: vpsraq $58, %zmm0, %zmm0 +; AVX512-NEXT: retq +; +; X32-SSE2-LABEL: sext_8i6_to_8i64: +; X32-SSE2: # %bb.0: # %entry +; X32-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; X32-SSE2-NEXT: paddw {{\.LCPI.*}}, %xmm3 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3] +; X32-SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; X32-SSE2-NEXT: psllq $58, %xmm0 +; X32-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE2-NEXT: psrad $31, %xmm1 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; X32-SSE2-NEXT: psrad $26, %xmm0 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3] +; X32-SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7] +; X32-SSE2-NEXT: psllq $58, %xmm1 +; X32-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X32-SSE2-NEXT: psrad $31, %xmm2 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; X32-SSE2-NEXT: psrad $26, %xmm1 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; X32-SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7] +; X32-SSE2-NEXT: psllq $58, %xmm2 +; X32-SSE2-NEXT: movdqa %xmm2, %xmm4 +; X32-SSE2-NEXT: psrad $31, %xmm4 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; X32-SSE2-NEXT: psrad $26, %xmm2 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3] +; X32-SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7] +; X32-SSE2-NEXT: psllq $58, %xmm3 +; X32-SSE2-NEXT: movdqa %xmm3, %xmm4 +; X32-SSE2-NEXT: psrad $31, %xmm4 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; X32-SSE2-NEXT: psrad $26, %xmm3 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; X32-SSE2-NEXT: retl +; +; X32-SSE41-LABEL: sext_8i6_to_8i64: +; X32-SSE41: # %bb.0: # %entry +; X32-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; X32-SSE41-NEXT: paddw {{\.LCPI.*}}, %xmm3 +; X32-SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; X32-SSE41-NEXT: psllq $58, %xmm0 +; X32-SSE41-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE41-NEXT: psrad $31, %xmm1 +; X32-SSE41-NEXT: psrad $26, %xmm0 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X32-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,2,3] +; X32-SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; X32-SSE41-NEXT: psllq $58, %xmm1 +; X32-SSE41-NEXT: movdqa %xmm1, %xmm2 +; X32-SSE41-NEXT: psrad $31, %xmm2 +; X32-SSE41-NEXT: psrad $26, %xmm1 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X32-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1] +; X32-SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; X32-SSE41-NEXT: psllq $58, %xmm2 +; X32-SSE41-NEXT: movdqa %xmm2, %xmm4 +; X32-SSE41-NEXT: psrad $31, %xmm4 +; X32-SSE41-NEXT: psrad $26, %xmm2 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; X32-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] +; X32-SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; X32-SSE41-NEXT: psllq $58, %xmm3 +; X32-SSE41-NEXT: movdqa %xmm3, %xmm4 +; X32-SSE41-NEXT: psrad $31, %xmm4 +; X32-SSE41-NEXT: psrad $26, %xmm3 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; X32-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] +; X32-SSE41-NEXT: retl +entry: + %a = trunc i32 %x to i6 + %b = insertelement <8 x i6> undef, i6 %a, i32 0 + %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer + %d = add <8 x i6> %c, + %e = sext <8 x i6> %d to <8 x i64> + ret <8 x i64> %e +} diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll index 5449552d195..6466aadebb8 100644 --- a/test/CodeGen/X86/vector-sext.ll +++ b/test/CodeGen/X86/vector-sext.ll @@ -6075,3 +6075,269 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { %b = sext <4 x i17> %a to <4 x i32> ret <4 x i32> %b } + +define <8 x i64> @sext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_8i6_to_8i64: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSE2-NEXT: psllq $58, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; SSE2-NEXT: psrad $26, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7] +; SSE2-NEXT: psllq $58, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; SSE2-NEXT: psrad $26, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7] +; SSE2-NEXT: psllq $58, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: psrad $31, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; SSE2-NEXT: psrad $26, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7] +; SSE2-NEXT: psllq $58, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: psrad $31, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; SSE2-NEXT: psrad $26, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_8i6_to_8i64: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movd %edi, %xmm0 +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: psllq $58, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: psrad $31, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; SSSE3-NEXT: psrad $26, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: psllq $58, %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: psrad $31, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; SSSE3-NEXT: psrad $26, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: psllq $58, %xmm2 +; SSSE3-NEXT: movdqa %xmm2, %xmm4 +; SSSE3-NEXT: psrad $31, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; SSSE3-NEXT: psrad $26, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: psllq $58, %xmm3 +; SSSE3-NEXT: movdqa %xmm3, %xmm4 +; SSSE3-NEXT: psrad $31, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; SSSE3-NEXT: psrad $26, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_8i6_to_8i64: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; SSE41-NEXT: psllq $58, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrad $31, %xmm1 +; SSE41-NEXT: psrad $26, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,2,3] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; SSE41-NEXT: psllq $58, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrad $31, %xmm2 +; SSE41-NEXT: psrad $26, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; SSE41-NEXT: psllq $58, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: psrad $31, %xmm4 +; SSE41-NEXT: psrad $26, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; SSE41-NEXT: psllq $58, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: psrad $31, %xmm4 +; SSE41-NEXT: psrad $26, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: sext_8i6_to_8i64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7] +; AVX1-NEXT: vpslld $26, %xmm1, %xmm1 +; AVX1-NEXT: vpsrad $26, %xmm1, %xmm1 +; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vpslld $26, %xmm0, %xmm0 +; AVX1-NEXT: vpsrad $26, %xmm0, %xmm0 +; AVX1-NEXT: vpmovsxdq %xmm0, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: sext_8i6_to_8i64: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovd %edi, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vpslld $26, %xmm0, %xmm0 +; AVX2-NEXT: vpsrad $26, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 +; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX2-NEXT: vpslld $26, %xmm1, %xmm1 +; AVX2-NEXT: vpsrad $26, %xmm1, %xmm1 +; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 +; AVX2-NEXT: retq +; +; AVX512-LABEL: sext_8i6_to_8i64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovd %edi, %xmm0 +; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512-NEXT: vpsllq $58, %zmm0, %zmm0 +; AVX512-NEXT: vpsraq $58, %zmm0, %zmm0 +; AVX512-NEXT: retq +; +; X32-SSE2-LABEL: sext_8i6_to_8i64: +; X32-SSE2: # %bb.0: # %entry +; X32-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; X32-SSE2-NEXT: paddw {{\.LCPI.*}}, %xmm3 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3] +; X32-SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; X32-SSE2-NEXT: psllq $58, %xmm0 +; X32-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE2-NEXT: psrad $31, %xmm1 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; X32-SSE2-NEXT: psrad $26, %xmm0 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3] +; X32-SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7] +; X32-SSE2-NEXT: psllq $58, %xmm1 +; X32-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X32-SSE2-NEXT: psrad $31, %xmm2 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; X32-SSE2-NEXT: psrad $26, %xmm1 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; X32-SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7] +; X32-SSE2-NEXT: psllq $58, %xmm2 +; X32-SSE2-NEXT: movdqa %xmm2, %xmm4 +; X32-SSE2-NEXT: psrad $31, %xmm4 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; X32-SSE2-NEXT: psrad $26, %xmm2 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3] +; X32-SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7] +; X32-SSE2-NEXT: psllq $58, %xmm3 +; X32-SSE2-NEXT: movdqa %xmm3, %xmm4 +; X32-SSE2-NEXT: psrad $31, %xmm4 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] +; X32-SSE2-NEXT: psrad $26, %xmm3 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; X32-SSE2-NEXT: retl +; +; X32-SSE41-LABEL: sext_8i6_to_8i64: +; X32-SSE41: # %bb.0: # %entry +; X32-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; X32-SSE41-NEXT: paddw {{\.LCPI.*}}, %xmm3 +; X32-SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; X32-SSE41-NEXT: psllq $58, %xmm0 +; X32-SSE41-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE41-NEXT: psrad $31, %xmm1 +; X32-SSE41-NEXT: psrad $26, %xmm0 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X32-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,2,3] +; X32-SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; X32-SSE41-NEXT: psllq $58, %xmm1 +; X32-SSE41-NEXT: movdqa %xmm1, %xmm2 +; X32-SSE41-NEXT: psrad $31, %xmm2 +; X32-SSE41-NEXT: psrad $26, %xmm1 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X32-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1] +; X32-SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; X32-SSE41-NEXT: psllq $58, %xmm2 +; X32-SSE41-NEXT: movdqa %xmm2, %xmm4 +; X32-SSE41-NEXT: psrad $31, %xmm4 +; X32-SSE41-NEXT: psrad $26, %xmm2 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; X32-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] +; X32-SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; X32-SSE41-NEXT: psllq $58, %xmm3 +; X32-SSE41-NEXT: movdqa %xmm3, %xmm4 +; X32-SSE41-NEXT: psrad $31, %xmm4 +; X32-SSE41-NEXT: psrad $26, %xmm3 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; X32-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] +; X32-SSE41-NEXT: retl +entry: + %a = trunc i32 %x to i6 + %b = insertelement <8 x i6> undef, i6 %a, i32 0 + %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer + %d = add <8 x i6> %c, + %e = sext <8 x i6> %d to <8 x i64> + ret <8 x i64> %e +} diff --git a/test/CodeGen/X86/vector-zext-widen.ll b/test/CodeGen/X86/vector-zext-widen.ll index 4d7a4f36659..df9d84668d1 100644 --- a/test/CodeGen/X86/vector-zext-widen.ll +++ b/test/CodeGen/X86/vector-zext-widen.ll @@ -2402,3 +2402,116 @@ define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) { %b = zext <4 x i17> %a to <4 x i32> ret <4 x i32> %b } + +define <8 x i64> @zext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_8i6_to_8i64: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,63] +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7] +; SSE2-NEXT: pand %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7] +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7] +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_8i6_to_8i64: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movd %edi, %xmm0 +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [63,63] +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: pand %xmm4, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: pand %xmm4, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: pand %xmm4, %xmm3 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_8i6_to_8i64: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [63,63] +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,2,3] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: retq +; +; AVX1-LABEL: zext_8i6_to_8i64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7] +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [63,63,63,63] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: zext_8i6_to_8i64: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovd %edi, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63] +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: retq +; +; AVX512-LABEL: zext_8i6_to_8i64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovd %edi, %xmm0 +; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: retq +entry: + %a = trunc i32 %x to i6 + %b = insertelement <8 x i6> undef, i6 %a, i32 0 + %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer + %d = add <8 x i6> %c, + %e = zext <8 x i6> %d to <8 x i64> + ret <8 x i64> %e +} diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll index d1983483412..edbfd1d8c18 100644 --- a/test/CodeGen/X86/vector-zext.ll +++ b/test/CodeGen/X86/vector-zext.ll @@ -2416,3 +2416,116 @@ define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) { %b = zext <4 x i17> %a to <4 x i32> ret <4 x i32> %b } + +define <8 x i64> @zext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_8i6_to_8i64: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,63] +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7] +; SSE2-NEXT: pand %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7] +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7] +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_8i6_to_8i64: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movd %edi, %xmm0 +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [63,63] +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: pand %xmm4, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: pand %xmm4, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: pand %xmm4, %xmm3 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_8i6_to_8i64: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [63,63] +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,2,3] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: retq +; +; AVX1-LABEL: zext_8i6_to_8i64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7] +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [63,63,63,63] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: zext_8i6_to_8i64: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovd %edi, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63] +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: retq +; +; AVX512-LABEL: zext_8i6_to_8i64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovd %edi, %xmm0 +; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: retq +entry: + %a = trunc i32 %x to i6 + %b = insertelement <8 x i6> undef, i6 %a, i32 0 + %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer + %d = add <8 x i6> %c, + %e = zext <8 x i6> %d to <8 x i64> + ret <8 x i64> %e +} -- 2.40.0