From: Craig Topper <craig.topper@gmail.com>
Date: Thu, 19 Jan 2017 07:37:45 +0000 (+0000)
Subject: [AVX-512] Add test cases that show where we are using two subvector inserts to broadc... 
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9720a787a1c430ef1bf2ac2dc7d5564287612c3b;p=llvm

[AVX-512] Add test cases that show where we are using two subvector inserts to broadcast a 128-bit subvector into a 512-bit vector. We'd be better off using something like SHUFF32X4.

If the subvector comes from a load, we convert to SUBV_BROADCAST and use a broadcast instruction. But if there is no load we keep the inserts. I think we should create the SUBV_BROADCAST even without the load and let isel use the fallback patterns that are used if the load can't be folded. This will use the SHUFF32X4 or similar instruction for the 128-bit into 512-bit case and a single insert for 128 into 256 or 256 into 512.

This should be fixed so subvector broadcast intrinsics can be replaced with native IR since some of those currently lower directly to SHUFF32X4.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292475 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll
index 983360184fb..482f07bb0bb 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -548,3 +548,25 @@ define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13
   %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru
   ret <16 x i32> %res
 }
+
+define <16 x i32> @mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x i32> %a) {
+; ALL-LABEL: mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03:
+; ALL:       # BB#0:
+; ALL-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vinserti32x8 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <16 x i32> %res
+}
+
+define <16 x float> @mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x float> %a) {
+; ALL-LABEL: mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03:
+; ALL:       # BB#0:
+; ALL-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %res = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <16 x float> %res
+}
diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 26e08d365e0..a85e74b363b 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -2571,3 +2571,39 @@ define <8 x i64> @shuffle_v8i64_01234589(<8 x i64> %a, <8 x i64> %b) {
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
   ret <8 x i64> %shuffle
 }
+
+define <8 x i64> @shuffle_v2i64_v8i64_01010101(<2 x i64> %a) {
+; AVX512F-LABEL: shuffle_v2i64_v8i64_01010101:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v2i64_v8i64_01010101:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; AVX512F-32-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x double> @shuffle_v2f64_v8f64_01010101(<2 x double> %a) {
+; AVX512F-LABEL: shuffle_v2f64_v8f64_01010101:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; AVX512F-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v2f64_v8f64_01010101:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; AVX512F-32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-32-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
+  %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  ret <8 x double> %shuffle
+}