From 95a2fb2bc5523f84079b02dce1894824a0c5386c Mon Sep 17 00:00:00 2001 From: Amaury Sechet Date: Tue, 8 Oct 2019 13:08:51 +0000 Subject: [PATCH] Add test for rotating truncated vectors. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374043 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/rot16.ll | 13 ++- test/CodeGen/X86/vector-rotate-128.ll | 143 ++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 7 deletions(-) diff --git a/test/CodeGen/X86/rot16.ll b/test/CodeGen/X86/rot16.ll index 81838f84e3a..5a1a8da3c67 100644 --- a/test/CodeGen/X86/rot16.ll +++ b/test/CodeGen/X86/rot16.ll @@ -186,22 +186,21 @@ define i32 @rot16_demandedbits(i32 %x, i32 %y) nounwind { ; X32-NEXT: shrl $11, %ecx ; X32-NEXT: shll $5, %eax ; X32-NEXT: orl %ecx, %eax -; X32-NEXT: andl $65536, %eax # imm = 0x10000 +; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: retl ; ; X64-LABEL: rot16_demandedbits: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrl $11, %ecx -; X64-NEXT: shll $5, %eax -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: andl $65536, %eax # imm = 0x10000 +; X64-NEXT: shrl $11, %eax +; X64-NEXT: shll $5, %edi +; X64-NEXT: orl %eax, %edi +; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq %t0 = lshr i32 %x, 11 %t1 = shl i32 %x, 5 %t2 = or i32 %t0, %t1 - %t3 = and i32 %t2, 65536 + %t3 = and i32 %t2, 65535 ret i32 %t3 } diff --git a/test/CodeGen/X86/vector-rotate-128.ll b/test/CodeGen/X86/vector-rotate-128.ll index 3acdca7cda5..666325b35f7 100644 --- a/test/CodeGen/X86/vector-rotate-128.ll +++ b/test/CodeGen/X86/vector-rotate-128.ll @@ -2087,3 +2087,146 @@ define <16 x i8> @splatconstant_rotate_mask_v16i8(<16 x i8> %a) nounwind { %or = or <16 x i8> %lmask, %rmask ret <16 x i8> %or } + +define <4 x i32> @rot16_demandedbits(<4 x i32> %x, <4 x i32> %y) nounwind { +; X32-LABEL: rot16_demandedbits: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: shrl $11, %ecx +; X32-NEXT: shll $5, %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: andl $65536, %eax # imm = 0x10000 +; X32-NEXT: retl +; +; X64-LABEL: rot16_demandedbits: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: shrl $11, %ecx +; X64-NEXT: shll $5, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl $65536, %eax # imm = 0x10000 +; X64-NEXT: retq +; SSE2-LABEL: rot16_demandedbits: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrld $11, %xmm1 +; SSE2-NEXT: pslld $11, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: rot16_demandedbits: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrld $11, %xmm1 +; SSE41-NEXT: pslld $11, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; SSE41-NEXT: retq +; +; AVX-LABEL: rot16_demandedbits: +; AVX: # %bb.0: +; AVX-NEXT: vpsrld $11, %xmm0, %xmm1 +; AVX-NEXT: vpslld $11, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; AVX-NEXT: retq +; +; AVX512-LABEL: rot16_demandedbits: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrld $11, %xmm0, %xmm1 +; AVX512-NEXT: vpslld $11, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; AVX512-NEXT: retq +; +; XOP-LABEL: rot16_demandedbits: +; XOP: # %bb.0: +; XOP-NEXT: vpsrld $11, %xmm0, %xmm1 +; XOP-NEXT: vpslld $11, %xmm0, %xmm0 +; XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 +; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; XOP-NEXT: retq +; +; X32-SSE-LABEL: rot16_demandedbits: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE-NEXT: psrld $11, %xmm1 +; X32-SSE-NEXT: pslld $11, %xmm0 +; X32-SSE-NEXT: por %xmm1, %xmm0 +; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: retl + %t0 = lshr <4 x i32> %x, + %t1 = shl <4 x i32> %x, + %t2 = or <4 x i32> %t0, %t1 + %t3 = and <4 x i32> %t2, + ret <4 x i32> %t3 +} + +define <4 x i16> @rot16_trunc(<4 x i32> %x, <4 x i32> %y) nounwind { +; SSE2-LABEL: rot16_trunc: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrld $11, %xmm1 +; SSE2-NEXT: pslld $5, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: retq +; +; SSE41-LABEL: rot16_trunc: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrld $11, %xmm1 +; SSE41-NEXT: pslld $5, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSE41-NEXT: retq +; +; AVX-LABEL: rot16_trunc: +; AVX: # %bb.0: +; AVX-NEXT: vpsrld $11, %xmm0, %xmm1 +; AVX-NEXT: vpslld $5, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX-NEXT: retq +; +; AVX512-LABEL: rot16_trunc: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrld $11, %xmm0, %xmm1 +; AVX512-NEXT: vpslld $5, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX512-NEXT: retq +; +; XOP-LABEL: rot16_trunc: +; XOP: # %bb.0: +; XOP-NEXT: vpsrld $11, %xmm0, %xmm1 +; XOP-NEXT: vpslld $5, %xmm0, %xmm0 +; XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 +; XOP-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; XOP-NEXT: retq +; +; X32-SSE-LABEL: rot16_trunc: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE-NEXT: psrld $11, %xmm1 +; X32-SSE-NEXT: pslld $5, %xmm0 +; X32-SSE-NEXT: por %xmm1, %xmm0 +; X32-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; X32-SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X32-SSE-NEXT: retl + %t0 = lshr <4 x i32> %x, + %t1 = shl <4 x i32> %x, + %t2 = or <4 x i32> %t0, %t1 + %t3 = trunc <4 x i32> %t2 to <4 x i16> + ret <4 x i16> %t3 +} -- 2.49.0