From: Simon Pilgrim Date: Thu, 1 Aug 2019 14:46:03 +0000 (+0000) Subject: [X86][SSE] SimplifyMultipleUseDemandedBits - Add PEXTR/PINSR B+W handling X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7d6377b68a98488031725834ffb732e520a26de5;p=llvm [X86][SSE] SimplifyMultipleUseDemandedBits - Add PEXTR/PINSR B+W handling This adds SimplifyMultipleUseDemandedBitsForTargetNode X86 support and uses it to allow us to peek through vector insertions to avoid dependencies on entire insertion chains. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367570 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6fe906de8cf..c2be03f8d90 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -34577,6 +34577,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( KnownVec, TLO, Depth + 1)) return true; + if (SDValue V = SimplifyMultipleUseDemandedBits( + Vec, DemandedVecBits, DemandedVecElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1))); + Known = KnownVec.zext(BitWidth, true); return false; } @@ -34678,6 +34683,28 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); } +SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + unsigned Opc = Op.getOpcode(); + switch (Opc) { + case X86ISD::PINSRB: + case X86ISD::PINSRW: { + // If we don't demand the inserted element, return the base vector. + SDValue Vec = Op.getOperand(0); + auto *CIdx = dyn_cast(Op.getOperand(2)); + MVT VecVT = Vec.getSimpleValueType(); + if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && + !DemandedElts[CIdx->getZExtValue()]) + return Vec; + break; + } + } + + return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + Op, DemandedBits, DemandedElts, DAG, Depth); +} + /// Check if a vector extract from a target-specific shuffle of a load can be /// folded into a single element load. /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index dd11cc4497a..625b42d3515 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -920,6 +920,10 @@ namespace llvm { TargetLoweringOpt &TLO, unsigned Depth) const override; + SDValue SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const override; + const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; SDValue unwrapAddress(SDValue N) const override; diff --git a/test/CodeGen/X86/promote-vec3.ll b/test/CodeGen/X86/promote-vec3.ll index cb4e99b3b18..b66570e8aaa 100644 --- a/test/CodeGen/X86/promote-vec3.ll +++ b/test/CodeGen/X86/promote-vec3.ll @@ -8,14 +8,14 @@ define <3 x i16> @zext_i8(<3 x i8>) { ; SSE3-LABEL: zext_i8: ; SSE3: # %bb.0: -; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; SSE3-NEXT: movd %edx, %xmm0 -; SSE3-NEXT: pinsrw $1, %ecx, %xmm0 -; SSE3-NEXT: pinsrw $2, %eax, %xmm0 +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: movd %eax, %xmm0 ; SSE3-NEXT: pextrw $0, %xmm0, %eax +; SSE3-NEXT: pinsrw $1, %edx, %xmm0 ; SSE3-NEXT: pextrw $1, %xmm0, %edx +; SSE3-NEXT: pinsrw $2, %ecx, %xmm0 ; SSE3-NEXT: pextrw $2, %xmm0, %ecx ; SSE3-NEXT: # kill: def $ax killed $ax killed $eax ; SSE3-NEXT: # kill: def $dx killed $dx killed $edx @@ -27,9 +27,9 @@ define <3 x i16> @zext_i8(<3 x i8>) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 ; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: pextrw $2, %xmm0, %edx ; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 ; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: pextrw $2, %xmm0, %edx ; SSE41-NEXT: pextrw $4, %xmm0, %ecx ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: # kill: def $dx killed $dx killed $edx @@ -41,10 +41,10 @@ define <3 x i16> @zext_i8(<3 x i8>) { ; AVX-32-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vmovd %xmm0, %eax +; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm1 ; AVX-32-NEXT: vpextrw $2, %xmm0, %edx -; AVX-32-NEXT: vpextrw $4, %xmm0, %ecx +; AVX-32-NEXT: vmovd %xmm1, %eax +; AVX-32-NEXT: vpextrw $4, %xmm1, %ecx ; AVX-32-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-32-NEXT: # kill: def $dx killed $dx killed $edx ; AVX-32-NEXT: # kill: def $cx killed $cx killed $ecx