From: Simon Pilgrim Date: Thu, 17 Aug 2017 13:03:34 +0000 (+0000) Subject: [DAGCombiner] Add support for non-uniform constant vectors to (mul x, (1 << c)) ... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8f5ac0464c9c3003646c475e95e11e7d6475bd72;p=llvm [DAGCombiner] Add support for non-uniform constant vectors to (mul x, (1 << c)) -> x << c git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311083 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bd11c5aa1b3..5a5a81e8f6e 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2631,12 +2631,16 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getConstant(0, DL, VT), N0); } // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && - IsFullSplat) { + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N1)) { SDLoc DL(N); - return DAG.getNode(ISD::SHL, DL, VT, N0, - DAG.getConstant(ConstValue1.logBase2(), DL, - getShiftAmountTy(N0.getValueType()))); + SDValue LogBase2 = BuildLogBase2(N1, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); + AddToWorklist(Trunc.getNode()); + return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && diff --git a/test/CodeGen/X86/combine-mul.ll b/test/CodeGen/X86/combine-mul.ll index 3a805828024..1512a3dd2ca 100644 --- a/test/CodeGen/X86/combine-mul.ll +++ b/test/CodeGen/X86/combine-mul.ll @@ -97,7 +97,7 @@ define <4 x i32> @combine_vec_mul_pow2b(<4 x i32> %x) { ; ; AVX-LABEL: combine_vec_mul_pow2b: ; AVX: # BB#0: -; AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %1 = mul <4 x i32> %x, ret <4 x i32> %1 @@ -106,30 +106,19 @@ define <4 x i32> @combine_vec_mul_pow2b(<4 x i32> %x) { define <4 x i64> @combine_vec_mul_pow2c(<4 x i64> %x) { ; SSE-LABEL: combine_vec_mul_pow2c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2] -; SSE-NEXT: movdqa %xmm0, %xmm3 -; SSE-NEXT: pmuludq %xmm2, %xmm3 -; SSE-NEXT: psrlq $32, %xmm0 -; SSE-NEXT: pmuludq %xmm2, %xmm0 -; SSE-NEXT: psllq $32, %xmm0 -; SSE-NEXT: paddq %xmm3, %xmm0 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [4,16] -; SSE-NEXT: movdqa %xmm1, %xmm3 -; SSE-NEXT: pmuludq %xmm2, %xmm3 -; SSE-NEXT: psrlq $32, %xmm1 -; SSE-NEXT: pmuludq %xmm2, %xmm1 -; SSE-NEXT: psllq $32, %xmm1 -; SSE-NEXT: paddq %xmm3, %xmm1 +; SSE-NEXT: movdqa %xmm0, %xmm2 +; SSE-NEXT: psllq $1, %xmm2 +; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psllq $4, %xmm0 +; SSE-NEXT: psllq $2, %xmm1 +; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE-NEXT: movdqa %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_mul_pow2c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,16] -; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 -; AVX-NEXT: vpsrlq $32, %ymm0, %ymm0 -; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpsllq $32, %ymm0, %ymm0 -; AVX-NEXT: vpaddq %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 ; AVX-NEXT: retq %1 = mul <4 x i64> %x, ret <4 x i64> %1