From: Simon Pilgrim Date: Fri, 17 Feb 2017 15:14:48 +0000 (+0000) Subject: [DAGCombine] Recognise any_extend_vector_inreg and truncation style shuffle masks X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2566cbcc53d2888e3cf8230d176a011ec938b337;p=llvm [DAGCombine] Recognise any_extend_vector_inreg and truncation style shuffle masks During legalization we are often creating shuffles (via a build_vector scalarization stage) that are "any_extend_vector_inreg" style masks, and also other masks that are the equivalent of "truncate_vector_inreg" (if we had such a thing). This patch is an attempt to match these cases to help undo the effects of just leaving shuffle lowering to handle it - which typically means we lose track of the undefined elements of the shuffles resulting in an unnecessary extension+truncation stage for widened illegal types. The 2011-10-21-widen-cmp.ll regression will be fixed by making SIGN_EXTEND_VECTOR_IN_REG legal in SSE instead of lowering them to X86ISD::VSEXT (PR31712). Differential Revision: https://reviews.llvm.org/D29454 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295451 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7997a333862..9aa9b310d49 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7530,6 +7530,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } + // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x) + if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || + N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || + N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && + N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) { + if (!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) + return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT); + } + // fold (sext_in_reg (zext x)) -> (sext x) // iff we are extending the source sign bit. if (N0.getOpcode() == ISD::ZERO_EXTEND) { @@ -14194,6 +14204,113 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, return DAG.getBuildVector(VT, SDLoc(SVN), Ops); } +// Match shuffles that can be converted to any_vector_extend_in_reg. +// This is often generated during legalization. +// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) +// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. +SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { + EVT VT = SVN->getValueType(0); + bool IsBigEndian = DAG.getDataLayout().isBigEndian(); + + // TODO Add support for big-endian when we have a test case. + if (!VT.isInteger() || IsBigEndian) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + ArrayRef Mask = SVN->getMask(); + SDValue N0 = SVN->getOperand(0); + + // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32)) + auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) { + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] < 0) + continue; + if ((i % Scale) == 0 && Mask[i] == (i / Scale)) + continue; + return false; + } + return true; + }; + + // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for + // power-of-2 extensions as they are the most likely. + for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { + if (!isAnyExtend(Scale)) + continue; + + EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) + return DAG.getBitcast(VT, + DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT)); + } + + return SDValue(); +} + +// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of +// each source element of a large type into the lowest elements of a smaller +// destination type. This is often generated during legalization. +// If the source node itself was a '*_extend_vector_inreg' node then we should +// then be able to remove it. +SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { + EVT VT = SVN->getValueType(0); + bool IsBigEndian = DAG.getDataLayout().isBigEndian(); + + // TODO Add support for big-endian when we have a test case. + if (!VT.isInteger() || IsBigEndian) + return SDValue(); + + SDValue N0 = SVN->getOperand(0); + while (N0.getOpcode() == ISD::BITCAST) + N0 = N0.getOperand(0); + + unsigned Opcode = N0.getOpcode(); + if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && + Opcode != ISD::SIGN_EXTEND_VECTOR_INREG && + Opcode != ISD::ZERO_EXTEND_VECTOR_INREG) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + ArrayRef Mask = SVN->getMask(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); + + // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> + // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> + // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1> + auto isTruncate = [&Mask, &NumElts](unsigned Scale) { + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] < 0) + continue; + if ((i * Scale) < NumElts && Mask[i] == (i * Scale)) + continue; + return false; + } + return true; + }; + + // At the moment we just handle the case where we've truncated back to the + // same size as before the extension. + // TODO: handle more extension/truncation cases as cases arise. + if (EltSizeInBits != ExtSrcSizeInBits) + return SDValue(); + + // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for + // power-of-2 truncations as they are the most likely. + for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) + if (isTruncate(Scale)) + return DAG.getBitcast(VT, N00); + + return SDValue(); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -14298,6 +14415,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) return S; + // Match shuffles that can be converted to any_vector_extend_in_reg. + if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) + return V; + + // Combine "truncate_vector_in_reg" style shuffles. + if (SDValue V = combineTruncationShuffle(SVN, DAG)) + return V; + if (N0.getOpcode() == ISD::CONCAT_VECTORS && Level < AfterLegalizeVectorOps && (N1.isUndef() || diff --git a/test/CodeGen/X86/2011-10-21-widen-cmp.ll b/test/CodeGen/X86/2011-10-21-widen-cmp.ll index 429960333ae..782490900b2 100644 --- a/test/CodeGen/X86/2011-10-21-widen-cmp.ll +++ b/test/CodeGen/X86/2011-10-21-widen-cmp.ll @@ -9,7 +9,8 @@ define void @cmp_2_floats(<2 x float> %a, <2 x float> %b) { ; CHECK: # BB#0: # %entry ; CHECK-NEXT: movaps %xmm0, %xmm2 ; CHECK-NEXT: cmpordps %xmm0, %xmm0 -; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1] +; CHECK-NEXT: pmovsxdq %xmm0, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-NEXT: pslld $31, %xmm0 ; CHECK-NEXT: blendvps %xmm0, %xmm2, %xmm1 ; CHECK-NEXT: movlps %xmm1, (%rax) diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 87deeb9e16c..40f342185fd 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -1075,7 +1075,6 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) { ; NOVL: ## BB#0: ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1] ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 ; NOVL-NEXT: retq ; diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index b0e3e95afe2..c39509b788a 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -1226,11 +1226,7 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 { ; KNL-LABEL: test46: ; KNL: ## BB#0: ; KNL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; KNL-NEXT: vpsrad $31, %xmm0, %xmm1 -; KNL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; KNL-NEXT: vpmovsxdq %xmm0, %xmm0 ; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; KNL-NEXT: retq ; diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index 27008f26b81..eae0b710427 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -1277,8 +1277,6 @@ define <2 x float> @test_maxps_illegal_v2f32(<2 x float> %x, <2 x float> %y) { ; STRICT-NEXT: movaps %xmm0, %xmm2 ; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: cmpleps %xmm2, %xmm0 -; STRICT-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1] -; STRICT-NEXT: pslld $31, %xmm0 ; STRICT-NEXT: blendvps %xmm0, %xmm2, %xmm1 ; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq @@ -1297,8 +1295,6 @@ define <2 x float> @test_minps_illegal_v2f32(<2 x float> %x, <2 x float> %y) { ; STRICT: # BB#0: ; STRICT-NEXT: movaps %xmm0, %xmm2 ; STRICT-NEXT: cmpleps %xmm1, %xmm0 -; STRICT-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1] -; STRICT-NEXT: pslld $31, %xmm0 ; STRICT-NEXT: blendvps %xmm0, %xmm2, %xmm1 ; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq