From 56a9634f03c6af6c8c13694971b7ff237b9ef354 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 11 Jun 2016 13:38:28 +0000 Subject: [PATCH] [X86][SSE] Added PSLLDQ/PSRLDQ as a target shuffle type Ensure that PALIGNR/PSLLDQ/PSRLDQ are byte vectors so that they can be correctly decoded for target shuffle combining git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272471 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 17 +++++++++++++++++ .../X86/vector-shuffle-combining-avx2.ll | 6 ++---- .../X86/vector-shuffle-combining-ssse3.ll | 12 ++++-------- test/CodeGen/X86/vector-zext.ll | 5 +---- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1356d278d6c..95b74b18a92 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3806,6 +3806,8 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::SHUFP: case X86ISD::INSERTPS: case X86ISD::PALIGNR: + case X86ISD::VSHLDQ: + case X86ISD::VSRLDQ: case X86ISD::MOVLHPS: case X86ISD::MOVLHPD: case X86ISD::MOVHLPS: @@ -4878,9 +4880,22 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::PALIGNR: + assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); ImmN = N->getOperand(N->getNumOperands()-1); DecodePALIGNRMask(VT, cast(ImmN)->getZExtValue(), Mask); break; + case X86ISD::VSHLDQ: + assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); + ImmN = N->getOperand(N->getNumOperands() - 1); + DecodePSLLDQMask(VT, cast(ImmN)->getZExtValue(), Mask); + IsUnary = true; + break; + case X86ISD::VSRLDQ: + assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); + ImmN = N->getOperand(N->getNumOperands() - 1); + DecodePSRLDQMask(VT, cast(ImmN)->getZExtValue(), Mask); + IsUnary = true; + break; case X86ISD::PSHUFD: case X86ISD::VPERMILPI: ImmN = N->getOperand(N->getNumOperands()-1); @@ -30175,6 +30190,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::INSERTPS: case X86ISD::PALIGNR: + case X86ISD::VSHLDQ: + case X86ISD::VSRLDQ: case X86ISD::BLENDI: case X86ISD::UNPCKH: case X86ISD::UNPCKL: diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 553a9ac1bf3..1f8531a91b4 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -8,8 +8,7 @@ declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) { ; CHECK-LABEL: combine_pshufb_pslldq: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: retq %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> ) %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> @@ -19,8 +18,7 @@ define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) { define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) { ; CHECK-LABEL: combine_pshufb_psrldq: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: retq %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> ) %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index 8d1e3301cf3..e6b46c42f79 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -110,14 +110,12 @@ define <16 x i8> @combine_pshufb_palignr(<16 x i8> %a0, <16 x i8> %a1) { define <16 x i8> @combine_pshufb_pslldq(<16 x i8> %a0) { ; SSE-LABEL: combine_pshufb_pslldq: ; SSE: # BB#0: -; SSE-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] -; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_pshufb_pslldq: ; AVX: # BB#0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] -; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> ) %2 = shufflevector <16 x i8> %1, <16 x i8> zeroinitializer, <16 x i32> @@ -127,14 +125,12 @@ define <16 x i8> @combine_pshufb_pslldq(<16 x i8> %a0) { define <16 x i8> @combine_pshufb_psrldq(<16 x i8> %a0) { ; SSE-LABEL: combine_pshufb_psrldq: ; SSE: # BB#0: -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero -; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_pshufb_psrldq: ; AVX: # BB#0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero -; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> ) %2 = shufflevector <16 x i8> %1, <16 x i8> zeroinitializer, <16 x i32> diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll index 90007aa9803..a71e3b7b712 100644 --- a/test/CodeGen/X86/vector-zext.ll +++ b/test/CodeGen/X86/vector-zext.ll @@ -1378,10 +1378,7 @@ define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable ; ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero -; SSSE3-NEXT: pxor %xmm1, %xmm1 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6: -- 2.50.1