From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 31 Mar 2019 15:01:30 +0000 (+0000)
Subject: [InstCombine] canonicalize select shuffles by commuting
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=dbc3b913b6334f75bcd8dd6331aaef0770334228;p=clang

[InstCombine] canonicalize select shuffles by commuting

In PR41304:
https://bugs.llvm.org/show_bug.cgi?id=41304
...we have a case where we want to fold a binop of select-shuffle (blended) values.

Rather than try to match commuted variants of the pattern, we can canonicalize the
shuffles and check for mask equality with commuted operands.

We don't produce arbitrary shuffle masks in instcombine, but select-shuffles are a
special case that the backend is required to handle because we already canonicalize
vector select to this shuffle form.

So there should be no codegen difference from this change. It's possible that this
improves CSE in IR though.

Differential Revision: https://reviews.llvm.org/D60016

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@357366 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/test/CodeGen/avx-cmp-builtins.c b/test/CodeGen/avx-cmp-builtins.c
index 38d3ae24cd..609f5964f3 100644
--- a/test/CodeGen/avx-cmp-builtins.c
+++ b/test/CodeGen/avx-cmp-builtins.c
@@ -22,25 +22,25 @@ __m128d test_cmp_ss(__m128 a, __m128 b) {
 
 __m128 test_cmpgt_ss(__m128 a, __m128 b) {
   // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpgt_ss(a, b);
 }
 
 __m128 test_cmpge_ss(__m128 a, __m128 b) {
   // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpge_ss(a, b);
 }
 
 __m128 test_cmpngt_ss(__m128 a, __m128 b) {
   // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpngt_ss(a, b);
 }
 
 __m128 test_cmpnge_ss(__m128 a, __m128 b) {
   // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpnge_ss(a, b);
 }
 
diff --git a/test/CodeGen/avx-shuffle-builtins.c b/test/CodeGen/avx-shuffle-builtins.c
index fef2879abd..061cad76a5 100644
--- a/test/CodeGen/avx-shuffle-builtins.c
+++ b/test/CodeGen/avx-shuffle-builtins.c
@@ -91,19 +91,19 @@ test_mm256_broadcast_ss(float const *__a) {
 
 __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
   // CHECK-LABEL: @test_mm256_insertf128_ps_0
-  // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
   return _mm256_insertf128_ps(a, b, 0);
 }
 
 __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) {
   // CHECK-LABEL: @test_mm256_insertf128_pd_0
-  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 6, i32 7>
   return _mm256_insertf128_pd(a, b, 0);
 }
 
 __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
   // CHECK-LABEL: @test_mm256_insertf128_si256_0
-  // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
   return _mm256_insertf128_si256(a, b, 0);
 }