From: Simon Pilgrim Date: Wed, 21 Jun 2017 12:58:19 +0000 (+0000) Subject: [X86][SSE] Add SSE2/SSE42 shuffle truncation tests X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1fa0c45640a15ab570ea329534770cd1dfcf0634;p=llvm [X86][SSE] Add SSE2/SSE42 shuffle truncation tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305904 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/shuffle-vs-trunc-128.ll b/test/CodeGen/X86/shuffle-vs-trunc-128.ll index f9fe97b21ee..12a8443c31b 100644 --- a/test/CodeGen/X86/shuffle-vs-trunc-128.ll +++ b/test/CodeGen/X86/shuffle-vs-trunc-128.ll @@ -1,4 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F @@ -11,6 +13,21 @@ ; Ideally, the shuffles should be lowered to code with the same quality as the truncates. define void @shuffle_v16i8_to_v8i8(<16 x i8>* %L, <8 x i8>* %S) nounwind { +; SSE2-LABEL: shuffle_v16i8_to_v8i8: +; SSE2: # BB#0: +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movq %xmm0, (%rsi) +; SSE2-NEXT: retq +; +; SSE42-LABEL: shuffle_v16i8_to_v8i8: +; SSE42: # BB#0: +; SSE42-NEXT: movdqa (%rdi), %xmm0 +; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSE42-NEXT: movq %xmm0, (%rsi) +; SSE42-NEXT: retq +; ; AVX-LABEL: shuffle_v16i8_to_v8i8: ; AVX: # BB#0: ; AVX-NEXT: vmovdqa (%rdi), %xmm0 @@ -51,6 +68,21 @@ define void @shuffle_v16i8_to_v8i8(<16 x i8>* %L, <8 x i8>* %S) nounwind { } define void @trunc_v8i16_to_v8i8(<16 x i8>* %L, <8 x i8>* %S) nounwind { +; SSE2-LABEL: trunc_v8i16_to_v8i8: +; SSE2: # BB#0: +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movq %xmm0, (%rsi) +; SSE2-NEXT: retq +; +; SSE42-LABEL: trunc_v8i16_to_v8i8: +; SSE42: # BB#0: +; SSE42-NEXT: movdqa (%rdi), %xmm0 +; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSE42-NEXT: movq %xmm0, (%rsi) +; SSE42-NEXT: retq +; ; AVX-LABEL: trunc_v8i16_to_v8i8: ; AVX: # BB#0: ; AVX-NEXT: vmovdqa (%rdi), %xmm0 @@ -92,6 +124,21 @@ define void @trunc_v8i16_to_v8i8(<16 x i8>* %L, <8 x i8>* %S) nounwind { } define void @shuffle_v8i16_to_v4i16(<8 x i16>* %L, <4 x i16>* %S) nounwind { +; SSE2-LABEL: shuffle_v8i16_to_v4i16: +; SSE2: # BB#0: +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: movq %xmm0, (%rsi) +; SSE2-NEXT: retq +; +; SSE42-LABEL: shuffle_v8i16_to_v4i16: +; SSE42: # BB#0: +; SSE42-NEXT: movdqa (%rdi), %xmm0 +; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSE42-NEXT: movq %xmm0, (%rsi) +; SSE42-NEXT: retq +; ; AVX-LABEL: shuffle_v8i16_to_v4i16: ; AVX: # BB#0: ; AVX-NEXT: vmovdqa (%rdi), %xmm0 @@ -131,6 +178,21 @@ define void @shuffle_v8i16_to_v4i16(<8 x i16>* %L, <4 x i16>* %S) nounwind { } define void @trunc_v4i32_to_v4i16(<8 x i16>* %L, <4 x i16>* %S) nounwind { +; SSE2-LABEL: trunc_v4i32_to_v4i16: +; SSE2: # BB#0: +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: movq %xmm0, (%rsi) +; SSE2-NEXT: retq +; +; SSE42-LABEL: trunc_v4i32_to_v4i16: +; SSE42: # BB#0: +; SSE42-NEXT: movdqa (%rdi), %xmm0 +; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSE42-NEXT: movq %xmm0, (%rsi) +; SSE42-NEXT: retq +; ; AVX-LABEL: trunc_v4i32_to_v4i16: ; AVX: # BB#0: ; AVX-NEXT: vmovdqa (%rdi), %xmm0 @@ -171,6 +233,12 @@ define void @trunc_v4i32_to_v4i16(<8 x i16>* %L, <4 x i16>* %S) nounwind { } define void @shuffle_v4i32_to_v2i32(<4 x i32>* %L, <2 x i32>* %S) nounwind { +; SSE-LABEL: shuffle_v4i32_to_v2i32: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,2,2,3] +; SSE-NEXT: movq %xmm0, (%rsi) +; SSE-NEXT: retq +; ; AVX-LABEL: shuffle_v4i32_to_v2i32: ; AVX: # BB#0: ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,2,2,3] @@ -207,6 +275,12 @@ define void @shuffle_v4i32_to_v2i32(<4 x i32>* %L, <2 x i32>* %S) nounwind { } define void @trunc_v2i64_to_v2i32(<4 x i32>* %L, <2 x i32>* %S) nounwind { +; SSE-LABEL: trunc_v2i64_to_v2i32: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,2,2,3] +; SSE-NEXT: movq %xmm0, (%rsi) +; SSE-NEXT: retq +; ; AVX-LABEL: trunc_v2i64_to_v2i32: ; AVX: # BB#0: ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,2,2,3] @@ -244,6 +318,22 @@ define void @trunc_v2i64_to_v2i32(<4 x i32>* %L, <2 x i32>* %S) nounwind { } define void @shuffle_v16i8_to_v4i8(<16 x i8>* %L, <4 x i8>* %S) nounwind { +; SSE2-LABEL: shuffle_v16i8_to_v4i8: +; SSE2: # BB#0: +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movd %xmm0, (%rsi) +; SSE2-NEXT: retq +; +; SSE42-LABEL: shuffle_v16i8_to_v4i8: +; SSE42: # BB#0: +; SSE42-NEXT: movdqa (%rdi), %xmm0 +; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE42-NEXT: movd %xmm0, (%rsi) +; SSE42-NEXT: retq +; ; AVX-LABEL: shuffle_v16i8_to_v4i8: ; AVX: # BB#0: ; AVX-NEXT: vmovdqa (%rdi), %xmm0 @@ -283,6 +373,22 @@ define void @shuffle_v16i8_to_v4i8(<16 x i8>* %L, <4 x i8>* %S) nounwind { } define void @trunc_v4i32_to_v4i8(<16 x i8>* %L, <4 x i8>* %S) nounwind { +; SSE2-LABEL: trunc_v4i32_to_v4i8: +; SSE2: # BB#0: +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movd %xmm0, (%rsi) +; SSE2-NEXT: retq +; +; SSE42-LABEL: trunc_v4i32_to_v4i8: +; SSE42: # BB#0: +; SSE42-NEXT: movdqa (%rdi), %xmm0 +; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE42-NEXT: movd %xmm0, (%rsi) +; SSE42-NEXT: retq +; ; AVX-LABEL: trunc_v4i32_to_v4i8: ; AVX: # BB#0: ; AVX-NEXT: vmovdqa (%rdi), %xmm0 @@ -323,6 +429,13 @@ define void @trunc_v4i32_to_v4i8(<16 x i8>* %L, <4 x i8>* %S) nounwind { } define void @shuffle_v8i16_to_v2i16(<8 x i16>* %L, <2 x i16>* %S) nounwind { +; SSE-LABEL: shuffle_v8i16_to_v2i16: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,2,2,3] +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE-NEXT: movd %xmm0, (%rsi) +; SSE-NEXT: retq +; ; AVX-LABEL: shuffle_v8i16_to_v2i16: ; AVX: # BB#0: ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,2,2,3] @@ -362,6 +475,13 @@ define void @shuffle_v8i16_to_v2i16(<8 x i16>* %L, <2 x i16>* %S) nounwind { } define void @trunc_v2i64_to_v2i16(<8 x i16>* %L, <2 x i16>* %S) nounwind { +; SSE-LABEL: trunc_v2i64_to_v2i16: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,2,2,3] +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE-NEXT: movd %xmm0, (%rsi) +; SSE-NEXT: retq +; ; AVX-LABEL: trunc_v2i64_to_v2i16: ; AVX: # BB#0: ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,2,2,3] @@ -402,6 +522,24 @@ define void @trunc_v2i64_to_v2i16(<8 x i16>* %L, <2 x i16>* %S) nounwind { } define void @shuffle_v16i8_to_v2i8(<16 x i8>* %L, <2 x i8>* %S) nounwind { +; SSE2-LABEL: shuffle_v16i8_to_v2i8: +; SSE2: # BB#0: +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movw %ax, (%rsi) +; SSE2-NEXT: retq +; +; SSE42-LABEL: shuffle_v16i8_to_v2i8: +; SSE42: # BB#0: +; SSE42-NEXT: movdqa (%rdi), %xmm0 +; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE42-NEXT: pextrw $0, %xmm0, (%rsi) +; SSE42-NEXT: retq +; ; AVX-LABEL: shuffle_v16i8_to_v2i8: ; AVX: # BB#0: ; AVX-NEXT: vmovdqa (%rdi), %xmm0 @@ -441,6 +579,24 @@ define void @shuffle_v16i8_to_v2i8(<16 x i8>* %L, <2 x i8>* %S) nounwind { } define void @trunc_v2i64_to_v2i8(<16 x i8>* %L, <2 x i8>* %S) nounwind { +; SSE2-LABEL: trunc_v2i64_to_v2i8: +; SSE2: # BB#0: +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movw %ax, (%rsi) +; SSE2-NEXT: retq +; +; SSE42-LABEL: trunc_v2i64_to_v2i8: +; SSE42: # BB#0: +; SSE42-NEXT: movdqa (%rdi), %xmm0 +; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE42-NEXT: pextrw $0, %xmm0, (%rsi) +; SSE42-NEXT: retq +; ; AVX-LABEL: trunc_v2i64_to_v2i8: ; AVX: # BB#0: ; AVX-NEXT: vmovdqa (%rdi), %xmm0