From: Simon Pilgrim Date: Sat, 14 Oct 2017 15:01:36 +0000 (+0000) Subject: [X86][SSE] Support combining AND(EXTRACT(SHUF(X)), C) -> EXTRACT(SHUF(X)) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=809f654cc8f400a0f669278b0133a5ecabd9b47e;p=llvm [X86][SSE] Support combining AND(EXTRACT(SHUF(X)), C) -> EXTRACT(SHUF(X)) If we are applying a byte mask to a value extracted from a shuffle, see if we can combine the mask into shuffle. Fixes the last issue with PR22415 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315807 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6644baf8fa5..adb4986f117 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32442,6 +32442,45 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, } } + // Attempt to combine a scalar bitmask AND with an extracted shuffle. + if ((VT.getScalarSizeInBits() % 8) == 0 && + N->getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa(N->getOperand(0).getOperand(1))) { + SDValue BitMask = N->getOperand(1); + SDValue SrcVec = N->getOperand(0).getOperand(0); + EVT SrcVecVT = SrcVec.getValueType(); + + // Check that the constant bitmask masks whole bytes. + APInt UndefElts; + SmallVector EltBits; + if (VT == SrcVecVT.getScalarType() && + N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) && + getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) && + llvm::all_of(EltBits, [](APInt M) { + return M.isNullValue() || M.isAllOnesValue(); + })) { + unsigned NumElts = SrcVecVT.getVectorNumElements(); + unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8; + unsigned Idx = N->getOperand(0).getConstantOperandVal(1); + + // Create a root shuffle mask from the byte mask and the extracted index. + SmallVector ShuffleMask(NumElts * Scale, SM_SentinelUndef); + for (unsigned i = 0; i != Scale; ++i) { + if (UndefElts[i]) + continue; + int VecIdx = Scale * Idx + i; + ShuffleMask[VecIdx] = + EltBits[i].isNullValue() ? SM_SentinelZero : VecIdx; + } + + if (SDValue Shuffle = combineX86ShufflesRecursively( + {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2, + /*HasVarMask*/ false, DAG, DCI, Subtarget)) + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, + N->getOperand(0).getOperand(1)); + } + } + return SDValue(); } diff --git a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index 3abf0570abf..c17d45f6fd4 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -750,23 +750,20 @@ define <16 x i8> @constant_fold_pshufb_2() { define i32 @mask_zzz3_v16i8(<16 x i8> %a0) { ; SSSE3-LABEL: mask_zzz3_v16i8: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,10,12,14,8,10,12,14,0,2,4,6,8,10,12,14] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[14,u,u,u,u,u,u,u,u,u,u,u,u] ; SSSE3-NEXT: movd %xmm0, %eax -; SSSE3-NEXT: andl $-16777216, %eax # imm = 0xFF000000 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: mask_zzz3_v16i8: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,xmm0[14] ; SSE41-NEXT: pextrd $3, %xmm0, %eax -; SSE41-NEXT: andl $-16777216, %eax # imm = 0xFF000000 ; SSE41-NEXT: retq ; ; AVX-LABEL: mask_zzz3_v16i8: ; AVX: # BB#0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,xmm0[14] ; AVX-NEXT: vpextrd $3, %xmm0, %eax -; AVX-NEXT: andl $-16777216, %eax # imm = 0xFF000000 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> ) %2 = bitcast <16 x i8> %1 to <4 x i32> @@ -778,23 +775,20 @@ define i32 @mask_zzz3_v16i8(<16 x i8> %a0) { define i32 @mask_z1z3_v16i8(<16 x i8> %a0) { ; SSSE3-LABEL: mask_z1z3_v16i8: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,10,12,14,8,10,12,14,0,2,4,6,8,10,12,14] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[10],zero,xmm0[14,u,u,u,u,u,u,u,u,u,u,u,u] ; SSSE3-NEXT: movd %xmm0, %eax -; SSSE3-NEXT: andl $-16711936, %eax # imm = 0xFF00FF00 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: mask_z1z3_v16i8: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,xmm0[10],zero,xmm0[14] ; SSE41-NEXT: pextrd $3, %xmm0, %eax -; SSE41-NEXT: andl $-16711936, %eax # imm = 0xFF00FF00 ; SSE41-NEXT: retq ; ; AVX-LABEL: mask_z1z3_v16i8: ; AVX: # BB#0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,xmm0[10],zero,xmm0[14] ; AVX-NEXT: vpextrd $3, %xmm0, %eax -; AVX-NEXT: andl $-16711936, %eax # imm = 0xFF00FF00 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> ) %2 = bitcast <16 x i8> %1 to <4 x i32> @@ -806,16 +800,14 @@ define i32 @mask_z1z3_v16i8(<16 x i8> %a0) { define i32 @PR22415(double %a0) { ; SSE-LABEL: PR22415: ; SSE: # BB#0: -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,u,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] ; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: andl $16777215, %eax # imm = 0xFFFFFF ; SSE-NEXT: retq ; ; AVX-LABEL: PR22415: ; AVX: # BB#0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: andl $16777215, %eax # imm = 0xFFFFFF ; AVX-NEXT: retq %1 = bitcast double %a0 to <8 x i8> %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <4 x i32>