From 9c32ef6fdbc10207c1bf08d5e1f1fcd0b9647598 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 3 Jun 2017 11:12:57 +0000 Subject: [PATCH] [X86][SSE] Add SCALAR_TO_VECTOR(PEXTRW/PEXTRB) support to faux shuffle combining Generalized existing SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT) code to support AssertZext + PEXTRW/PEXTRB cases as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304659 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 40 +++++++++++++---- .../X86/clear_upper_vector_element_bits.ll | 44 +++---------------- 2 files changed, 36 insertions(+), 48 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 63874767b71..5303d7a406a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5848,17 +5848,39 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, return true; } case ISD::SCALAR_TO_VECTOR: { - // Match against a scalar_to_vector of an extract from a similar vector. + // Match against a scalar_to_vector of an extract from a vector, + // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar. SDValue N0 = N.getOperand(0); - if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - N0.getOperand(0).getValueType() != VT || - !isa(N0.getOperand(1)) || - NumElts <= N0.getConstantOperandVal(1) || - !N->isOnlyUserOf(N0.getNode())) + SDValue SrcExtract; + + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getValueType() == VT) { + SrcExtract = N0; + } else if (N0.getOpcode() == ISD::AssertZext && + N0.getOperand(0).getOpcode() == X86ISD::PEXTRW && + cast(N0.getOperand(1))->getVT() == MVT::i16) { + SrcExtract = N0.getOperand(0); + assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16); + } else if (N0.getOpcode() == ISD::AssertZext && + N0.getOperand(0).getOpcode() == X86ISD::PEXTRB && + cast(N0.getOperand(1))->getVT() == MVT::i8) { + SrcExtract = N0.getOperand(0); + assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8); + } + + if (!SrcExtract || !isa(SrcExtract.getOperand(1)) || + NumElts <= SrcExtract.getConstantOperandVal(1)) return false; - Ops.push_back(N0.getOperand(0)); - Mask.push_back(N0.getConstantOperandVal(1)); - Mask.append(NumElts - 1, SM_SentinelUndef); + + SDValue SrcVec = SrcExtract.getOperand(0); + EVT SrcVT = SrcVec.getValueType(); + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1; + + Ops.push_back(SrcVec); + Mask.push_back(SrcExtract.getConstantOperandVal(1)); + Mask.append(NumZeros, SM_SentinelZero); + Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef); return true; } case X86ISD::PINSRB: diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll index ae0f4406ba0..1218b68b1be 100644 --- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -405,12 +405,7 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { ; ; AVX-LABEL: _clearupper16xi8a: ; AVX: # BB#0: -; AVX-NEXT: vpextrb $0, %xmm0, %eax -; AVX-NEXT: vpextrb $1, %xmm0, %ecx -; AVX-NEXT: vmovd %eax, %xmm1 -; AVX-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] -; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x0 = extractelement <16 x i8> %0, i32 0 %x1 = extractelement <16 x i8> %0, i32 1 @@ -575,39 +570,10 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { ; SSE-NEXT: pand %xmm2, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: _clearupper32xi8a: -; AVX1: # BB#0: -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: vpextrb $1, %xmm0, %ecx -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpextrb $0, %xmm1, %edx -; AVX1-NEXT: vpextrb $1, %xmm1, %esi -; AVX1-NEXT: vmovd %edx, %xmm2 -; AVX1-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7] -; AVX1-NEXT: vmovd %eax, %xmm2 -; AVX1-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: _clearupper32xi8a: -; AVX2: # BB#0: -; AVX2-NEXT: vpextrb $0, %xmm0, %eax -; AVX2-NEXT: vpextrb $1, %xmm0, %ecx -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpextrb $0, %xmm1, %edx -; AVX2-NEXT: vpextrb $1, %xmm1, %esi -; AVX2-NEXT: vmovd %edx, %xmm2 -; AVX2-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7] -; AVX2-NEXT: vmovd %eax, %xmm2 -; AVX2-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq +; AVX-LABEL: _clearupper32xi8a: +; AVX: # BB#0: +; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: retq %x0 = extractelement <32 x i8> %0, i32 0 %x1 = extractelement <32 x i8> %0, i32 1 %x2 = extractelement <32 x i8> %0, i32 2 -- 2.50.1