From f2c20cc531e80a0a3c16e742170e9086fa2e4dbc Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 11 Oct 2019 14:17:56 +0000
Subject: [PATCH] [DAGCombiner] fold vselect-of-constants to shift

The diffs suggest that we are missing some more basic
analysis/transforms, but this keeps the vector path in
sync with the scalar (rL374397). This is again a
preliminary step for introducing the reverse transform
in IR as proposed in D63382.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374555 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  9 +++++++
 test/CodeGen/X86/selectcc-to-shiftand.ll | 10 +++-----
 test/CodeGen/X86/vselect.ll              | 31 ++++++++----------------
 3 files changed, 22 insertions(+), 28 deletions(-)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 41303921d87..7fa95ce5cf9 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8614,6 +8614,15 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
   }
 
+  // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
+  APInt Pow2C;
+  if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
+      isNullOrNullSplat(N2)) {
+    SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
+    SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
+    return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
+  }
+
   // The general case for select-of-constants:
   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
diff --git a/test/CodeGen/X86/selectcc-to-shiftand.ll b/test/CodeGen/X86/selectcc-to-shiftand.ll
index 6cc41fd0cf8..8e8e1e806f4 100644
--- a/test/CodeGen/X86/selectcc-to-shiftand.ll
+++ b/test/CodeGen/X86/selectcc-to-shiftand.ll
@@ -213,9 +213,8 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) {
 define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
 ; ANY-LABEL: sel_shift_bool_v8i16:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    psllw $15, %xmm0
-; ANY-NEXT:    psraw $15, %xmm0
 ; ANY-NEXT:    pand {{.*}}(%rip), %xmm0
+; ANY-NEXT:    psllw $7, %xmm0
 ; ANY-NEXT:    retq
   %shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer
   ret <8 x i16> %shl
@@ -224,9 +223,8 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
 define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
 ; ANY-LABEL: sel_shift_bool_v4i32:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    pslld $31, %xmm0
-; ANY-NEXT:    psrad $31, %xmm0
 ; ANY-NEXT:    pand {{.*}}(%rip), %xmm0
+; ANY-NEXT:    pslld $6, %xmm0
 ; ANY-NEXT:    retq
   %shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer
   ret <4 x i32> %shl
@@ -235,10 +233,8 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
 define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) {
 ; ANY-LABEL: sel_shift_bool_v2i64:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    psllq $63, %xmm0
-; ANY-NEXT:    psrad $31, %xmm0
-; ANY-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; ANY-NEXT:    pand {{.*}}(%rip), %xmm0
+; ANY-NEXT:    psllq $16, %xmm0
 ; ANY-NEXT:    retq
   %shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer
   ret <2 x i64> %shl
diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll
index d3e2b5477ac..22166021b4e 100644
--- a/test/CodeGen/X86/vselect.ll
+++ b/test/CodeGen/X86/vselect.ll
@@ -647,33 +647,22 @@ define void @vselect_allzeros_LHS_multiple_use_setcc(<4 x i32> %x, <4 x i32> %y,
 ; This test case previously crashed after r363802, r363850, and r363856 due
 ; any_extend_vector_inreg not being handled by the X86 backend.
 define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) {
-; SSE2-LABEL: vselect_any_extend_vector_inreg_crash:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $24, %xmm0
-; SSE2-NEXT:    movq %xmm0, %rax
-; SSE2-NEXT:    andl $32768, %eax # imm = 0x8000
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: vselect_any_extend_vector_inreg_crash:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE41-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    pmovsxbq %xmm0, %xmm0
-; SSE41-NEXT:    movq %xmm0, %rax
-; SSE41-NEXT:    andl $32768, %eax # imm = 0x8000
-; SSE41-NEXT:    retq
+; SSE-LABEL: vselect_any_extend_vector_inreg_crash:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movq %xmm0, %rax
+; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    shlq $15, %rax
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: vselect_any_extend_vector_inreg_crash:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vpmovsxbq %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    andl $32768, %eax # imm = 0x8000
+; AVX-NEXT:    andl $1, %eax
+; AVX-NEXT:    shlq $15, %rax
 ; AVX-NEXT:    retq
 0:
   %1 = load <8 x i8>, <8 x i8>* %x
-- 
2.40.0