From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 17 Aug 2017 13:03:34 +0000 (+0000)
Subject: [DAGCombiner] Add support for non-uniform constant vectors to (mul x, (1 << c)) ... 
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8f5ac0464c9c3003646c475e95e11e7d6475bd72;p=llvm

[DAGCombiner] Add support for non-uniform constant vectors to (mul x, (1 << c)) -> x << c

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311083 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bd11c5aa1b3..5a5a81e8f6e 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2631,12 +2631,16 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                        DAG.getConstant(0, DL, VT), N0);
   }
   // fold (mul x, (1 << c)) -> x << c
-  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
-      IsFullSplat) {
+  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+      DAG.isKnownToBeAPowerOfTwo(N1)) {
     SDLoc DL(N);
-    return DAG.getNode(ISD::SHL, DL, VT, N0,
-                       DAG.getConstant(ConstValue1.logBase2(), DL,
-                                       getShiftAmountTy(N0.getValueType())));
+    SDValue LogBase2 = BuildLogBase2(N1, DL);
+    AddToWorklist(LogBase2.getNode());
+
+    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+    SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+    AddToWorklist(Trunc.getNode());
+    return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
   }
   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
diff --git a/test/CodeGen/X86/combine-mul.ll b/test/CodeGen/X86/combine-mul.ll
index 3a805828024..1512a3dd2ca 100644
--- a/test/CodeGen/X86/combine-mul.ll
+++ b/test/CodeGen/X86/combine-mul.ll
@@ -97,7 +97,7 @@ define <4 x i32> @combine_vec_mul_pow2b(<4 x i32> %x) {
 ;
 ; AVX-LABEL: combine_vec_mul_pow2b:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = mul <4 x i32> %x, <i32 1, i32 2, i32 4, i32 16>
   ret <4 x i32> %1
@@ -106,30 +106,19 @@ define <4 x i32> @combine_vec_mul_pow2b(<4 x i32> %x) {
 define <4 x i64> @combine_vec_mul_pow2c(<4 x i64> %x) {
 ; SSE-LABEL: combine_vec_mul_pow2c:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2]
-; SSE-NEXT:    movdqa %xmm0, %xmm3
-; SSE-NEXT:    pmuludq %xmm2, %xmm3
-; SSE-NEXT:    psrlq $32, %xmm0
-; SSE-NEXT:    pmuludq %xmm2, %xmm0
-; SSE-NEXT:    psllq $32, %xmm0
-; SSE-NEXT:    paddq %xmm3, %xmm0
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [4,16]
-; SSE-NEXT:    movdqa %xmm1, %xmm3
-; SSE-NEXT:    pmuludq %xmm2, %xmm3
-; SSE-NEXT:    psrlq $32, %xmm1
-; SSE-NEXT:    pmuludq %xmm2, %xmm1
-; SSE-NEXT:    psllq $32, %xmm1
-; SSE-NEXT:    paddq %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    psllq $1, %xmm2
+; SSE-NEXT:    pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    psllq $4, %xmm0
+; SSE-NEXT:    psllq $2, %xmm1
+; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE-NEXT:    movdqa %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_mul_pow2c:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,16]
-; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2
-; AVX-NEXT:    vpsrlq $32, %ymm0, %ymm0
-; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpsllq $32, %ymm0, %ymm0
-; AVX-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %1 = mul <4 x i64> %x, <i64 1, i64 2, i64 4, i64 16>
   ret <4 x i64> %1