From: Simon Pilgrim Date: Tue, 6 Aug 2019 13:10:42 +0000 (+0000) Subject: [X86][SSE] Call SimplifyMultipleUseDemandedBits on PACKSS/PACKUS arguments. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=19f5c7bbedc5ad10db9e6a42dcb443b37b155ecb;p=llvm [X86][SSE] Call SimplifyMultipleUseDemandedBits on PACKSS/PACKUS arguments. This mainly helps to replace unused arguments with UNDEF in the case where they have multiple users. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368026 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dc3ba714def..43bf7289c16 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -34129,16 +34129,36 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } case X86ISD::PACKSS: case X86ISD::PACKUS: { + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + APInt DemandedLHS, DemandedRHS; getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef, - SrcZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(N0, DemandedLHS, SrcUndef, SrcZero, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef, - SrcZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(N1, DemandedRHS, SrcUndef, SrcZero, TLO, + Depth + 1)) return true; + + // Aggressively peek through ops to get at the demanded elts. + // TODO - we should do this for all target/faux shuffles ops. + if (!DemandedElts.isAllOnesValue()) { + APInt DemandedSrcBits = + APInt::getAllOnesValue(N0.getScalarValueSizeInBits()); + SDValue NewN0 = SimplifyMultipleUseDemandedBits( + N0, DemandedSrcBits, DemandedLHS, TLO.DAG, Depth + 1); + SDValue NewN1 = SimplifyMultipleUseDemandedBits( + N1, DemandedSrcBits, DemandedRHS, TLO.DAG, Depth + 1); + if (NewN0 || NewN1) { + NewN0 = NewN0 ? NewN0 : N0; + NewN1 = NewN1 ? NewN1 : N1; + return TLO.CombineTo(Op, + TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); + } + } break; } case X86ISD::HADD: diff --git a/test/CodeGen/X86/vector-trunc-packus-widen.ll b/test/CodeGen/X86/vector-trunc-packus-widen.ll index 434464b498c..eb0a32fee08 100644 --- a/test/CodeGen/X86/vector-trunc-packus-widen.ll +++ b/test/CodeGen/X86/vector-trunc-packus-widen.ll @@ -1982,7 +1982,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1 ; SSE41-NEXT: packusdw %xmm3, %xmm1 ; SSE41-NEXT: packusdw %xmm1, %xmm4 -; SSE41-NEXT: packuswb %xmm4, %xmm4 +; SSE41-NEXT: packuswb %xmm0, %xmm4 ; SSE41-NEXT: movq %xmm4, (%rdi) ; SSE41-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-trunc-packus.ll b/test/CodeGen/X86/vector-trunc-packus.ll index a5cc48ff4a7..db809666cd9 100644 --- a/test/CodeGen/X86/vector-trunc-packus.ll +++ b/test/CodeGen/X86/vector-trunc-packus.ll @@ -1982,7 +1982,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1 ; SSE41-NEXT: packusdw %xmm3, %xmm1 ; SSE41-NEXT: packusdw %xmm1, %xmm4 -; SSE41-NEXT: packuswb %xmm4, %xmm4 +; SSE41-NEXT: packuswb %xmm0, %xmm4 ; SSE41-NEXT: movq %xmm4, (%rdi) ; SSE41-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-trunc-ssat-widen.ll b/test/CodeGen/X86/vector-trunc-ssat-widen.ll index 8bd80c2f996..4d32267b61e 100644 --- a/test/CodeGen/X86/vector-trunc-ssat-widen.ll +++ b/test/CodeGen/X86/vector-trunc-ssat-widen.ll @@ -1927,7 +1927,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; SSE41-NEXT: andpd %xmm0, %xmm2 ; SSE41-NEXT: packusdw %xmm7, %xmm2 ; SSE41-NEXT: packusdw %xmm3, %xmm2 -; SSE41-NEXT: packuswb %xmm2, %xmm2 +; SSE41-NEXT: packuswb %xmm0, %xmm2 ; SSE41-NEXT: movq %xmm2, (%rdi) ; SSE41-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-trunc-ssat.ll b/test/CodeGen/X86/vector-trunc-ssat.ll index 5e254ef53e2..7bdacc90336 100644 --- a/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/test/CodeGen/X86/vector-trunc-ssat.ll @@ -1927,7 +1927,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; SSE41-NEXT: andpd %xmm0, %xmm2 ; SSE41-NEXT: packusdw %xmm7, %xmm2 ; SSE41-NEXT: packusdw %xmm3, %xmm2 -; SSE41-NEXT: packuswb %xmm2, %xmm2 +; SSE41-NEXT: packuswb %xmm0, %xmm2 ; SSE41-NEXT: movq %xmm2, (%rdi) ; SSE41-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-trunc-usat-widen.ll b/test/CodeGen/X86/vector-trunc-usat-widen.ll index e80e5d46e3b..fd76cb53c6e 100644 --- a/test/CodeGen/X86/vector-trunc-usat-widen.ll +++ b/test/CodeGen/X86/vector-trunc-usat-widen.ll @@ -1247,7 +1247,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: packuswb %xmm4, %xmm0 ; SSE2-NEXT: packuswb %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm1, %xmm1 +; SSE2-NEXT: packuswb %xmm0, %xmm1 ; SSE2-NEXT: movq %xmm1, (%rdi) ; SSE2-NEXT: retq ; @@ -1310,7 +1310,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; SSSE3-NEXT: por %xmm2, %xmm0 ; SSSE3-NEXT: packuswb %xmm4, %xmm0 ; SSSE3-NEXT: packuswb %xmm0, %xmm1 -; SSSE3-NEXT: packuswb %xmm1, %xmm1 +; SSSE3-NEXT: packuswb %xmm0, %xmm1 ; SSSE3-NEXT: movq %xmm1, (%rdi) ; SSSE3-NEXT: retq ; @@ -1364,7 +1364,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE41-NEXT: packusdw %xmm5, %xmm9 ; SSE41-NEXT: packusdw %xmm9, %xmm1 -; SSE41-NEXT: packuswb %xmm1, %xmm1 +; SSE41-NEXT: packuswb %xmm0, %xmm1 ; SSE41-NEXT: movq %xmm1, (%rdi) ; SSE41-NEXT: retq ; @@ -1998,7 +1998,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; SSE2-NEXT: pandn %xmm2, %xmm5 ; SSE2-NEXT: por %xmm0, %xmm5 ; SSE2-NEXT: packuswb %xmm6, %xmm5 -; SSE2-NEXT: packuswb %xmm5, %xmm5 +; SSE2-NEXT: packuswb %xmm0, %xmm5 ; SSE2-NEXT: movq %xmm5, (%rdi) ; SSE2-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-trunc-usat.ll b/test/CodeGen/X86/vector-trunc-usat.ll index 2168f10a924..2648a0c29fa 100644 --- a/test/CodeGen/X86/vector-trunc-usat.ll +++ b/test/CodeGen/X86/vector-trunc-usat.ll @@ -1247,7 +1247,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: packuswb %xmm4, %xmm0 ; SSE2-NEXT: packuswb %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm1, %xmm1 +; SSE2-NEXT: packuswb %xmm0, %xmm1 ; SSE2-NEXT: movq %xmm1, (%rdi) ; SSE2-NEXT: retq ; @@ -1310,7 +1310,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; SSSE3-NEXT: por %xmm2, %xmm0 ; SSSE3-NEXT: packuswb %xmm4, %xmm0 ; SSSE3-NEXT: packuswb %xmm0, %xmm1 -; SSSE3-NEXT: packuswb %xmm1, %xmm1 +; SSSE3-NEXT: packuswb %xmm0, %xmm1 ; SSSE3-NEXT: movq %xmm1, (%rdi) ; SSSE3-NEXT: retq ; @@ -1364,7 +1364,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE41-NEXT: packusdw %xmm5, %xmm9 ; SSE41-NEXT: packusdw %xmm9, %xmm1 -; SSE41-NEXT: packuswb %xmm1, %xmm1 +; SSE41-NEXT: packuswb %xmm0, %xmm1 ; SSE41-NEXT: movq %xmm1, (%rdi) ; SSE41-NEXT: retq ; @@ -1998,7 +1998,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; SSE2-NEXT: pandn %xmm2, %xmm5 ; SSE2-NEXT: por %xmm0, %xmm5 ; SSE2-NEXT: packuswb %xmm6, %xmm5 -; SSE2-NEXT: packuswb %xmm5, %xmm5 +; SSE2-NEXT: packuswb %xmm0, %xmm5 ; SSE2-NEXT: movq %xmm5, (%rdi) ; SSE2-NEXT: retq ;