From: Craig Topper Date: Wed, 29 Jun 2016 03:29:12 +0000 (+0000) Subject: [DAGCombine] Teach DAG combine to handle ORs of shuffles involving zero vectors where... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7268ea5189e6b212075d6cc7376ef409f367463b;p=llvm [DAGCombine] Teach DAG combine to handle ORs of shuffles involving zero vectors where the zero vector is the first operand to the shuffle instead of the second. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274097 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bdc0f62f967..51f58bd62b7 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3751,59 +3751,66 @@ SDValue DAGCombiner::visitOR(SDNode *N) { N1.getValueType().getScalarType().getSizeInBits()), SDLoc(N), N1.getValueType()); - // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) - // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) // Do this only if the resulting shuffle is legal. if (isa(N0) && isa(N1) && // Avoid folding a node with illegal type. - TLI.isTypeLegal(VT) && - N0->getOperand(1) == N1->getOperand(1) && - ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { - bool CanFold = true; - unsigned NumElts = VT.getVectorNumElements(); - const ShuffleVectorSDNode *SV0 = cast(N0); - const ShuffleVectorSDNode *SV1 = cast(N1); - // We construct two shuffle masks: - // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand - // and N1 as the second operand. - // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand - // and N0 as the second operand. - // We do this because OR is commutable and therefore there might be - // two ways to fold this node into a shuffle. - SmallVector Mask1; - SmallVector Mask2; - - for (unsigned i = 0; i != NumElts && CanFold; ++i) { - int M0 = SV0->getMaskElt(i); - int M1 = SV1->getMaskElt(i); - - // Both shuffle indexes are undef. Propagate Undef. - if (M0 < 0 && M1 < 0) { - Mask1.push_back(M0); - Mask2.push_back(M0); - continue; - } + TLI.isTypeLegal(VT)) { + bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); + bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); + bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); + // Ensure both shuffles have a zero input. + if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) { + assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); + assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); + const ShuffleVectorSDNode *SV0 = cast(N0); + const ShuffleVectorSDNode *SV1 = cast(N1); + bool CanFold = true; + int NumElts = VT.getVectorNumElements(); + SmallVector Mask(NumElts); + + for (int i = 0; i != NumElts; ++i) { + int M0 = SV0->getMaskElt(i); + int M1 = SV1->getMaskElt(i); + + // Both shuffle indexes are undef. Propagate Undef. + if (M0 < 0 && M1 < 0) { + Mask[i] = -1; + continue; + } - if (M0 < 0 || M1 < 0 || - (M0 < (int)NumElts && M1 < (int)NumElts) || - (M0 >= (int)NumElts && M1 >= (int)NumElts)) { - CanFold = false; - break; + // Determine if either index is pointing to a zero vector. + bool M0Zero = M0 >= 0 && (ZeroN00 == (M0 < NumElts)); + bool M1Zero = M1 >= 0 && (ZeroN10 == (M1 < NumElts)); + if (M0Zero == M1Zero) { + CanFold = false; + break; + } + + // We have a zero and non-zero element. If the non-zero came from + // SV0 make the index a LHS index. If it came from SV1, make it + // a RHS index. We need to mod by NumElts because we don't care + // which operand it came from in the original shuffles. + Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; } - Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); - Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); - } + if (CanFold) { + SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); + SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); - if (CanFold) { - // Fold this sequence only if the resulting shuffle is 'legal'. - if (TLI.isShuffleMaskLegal(Mask1, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), - N1->getOperand(0), &Mask1[0]); - if (TLI.isShuffleMaskLegal(Mask2, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), - N0->getOperand(0), &Mask2[0]); + bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); + if (!LegalMask) { + std::swap(NewLHS, NewRHS); + ShuffleVectorSDNode::commuteMask(Mask); + LegalMask = TLI.isShuffleMaskLegal(Mask, VT); + } + + if (LegalMask) + return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, + NewRHS, &Mask[0]); + } } } } diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll index d9e241758d8..22f0dbbd1a9 100644 --- a/test/CodeGen/X86/combine-or.ll +++ b/test/CodeGen/X86/combine-or.ll @@ -361,10 +361,7 @@ define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) { define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test2b: ; CHECK: # BB#0: -; CHECK-NEXT: pxor %xmm2, %xmm2 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] -; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero -; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32> @@ -375,10 +372,7 @@ define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test2c: ; CHECK: # BB#0: -; CHECK-NEXT: pxor %xmm2, %xmm2 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] -; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero -; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32> @@ -390,10 +384,7 @@ define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test2d: ; CHECK: # BB#0: -; CHECK-NEXT: pxor %xmm2, %xmm2 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] -; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero -; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32>