From b5df2def3be3b06f83e08ce70ba36bdcba3b416c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 1 Dec 2016 11:52:37 +0000 Subject: [PATCH] [X86][SSE] Add support for combining ISD::AND with shuffles. Attempts to convert an AND with a vector of 255 or 0 values into a shuffle (blend) mask. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288333 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 19 +++++++++++++++++++ .../X86/vector-shuffle-combining-avx2.ll | 2 -- .../X86/vector-shuffle-combining-ssse3.ll | 3 +-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3e2f5f1ab7c..63f191fa8dd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5558,6 +5558,25 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, unsigned Opcode = N.getOpcode(); switch (Opcode) { + case ISD::AND: { + // Attempt to decode as a per-byte mask. + SmallBitVector UndefElts; + SmallVector EltBits; + if (!getTargetConstantBitsFromNode(N.getOperand(1), 8, UndefElts, EltBits)) + return false; + for (int i = 0, e = (int)EltBits.size(); i != e; ++i) { + if (UndefElts[i]) { + Mask.push_back(SM_SentinelUndef); + continue; + } + uint64_t ByteBits = EltBits[i].getZExtValue(); + if (ByteBits != 0 && ByteBits != 255) + return false; + Mask.push_back(ByteBits == 0 ? SM_SentinelZero : i); + } + Ops.push_back(N.getOperand(0)); + return true; + } case X86ISD::VSHLI: case X86ISD::VSRLI: { uint64_t ShiftVal = N.getConstantOperandVal(1); diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 02a33a70b80..4fddbac7502 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -72,13 +72,11 @@ define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) { define <32 x i8> @combine_and_pshufb(<32 x i8> %a0) { ; X32-LABEL: combine_and_pshufb: ; X32: # BB#0: -; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1],zero,zero,zero,zero,zero,zero,ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17],zero,zero,zero,zero,zero,zero,ymm0[24,25],zero,zero,zero,zero,zero,zero ; X32-NEXT: retl ; ; X64-LABEL: combine_and_pshufb: ; X64: # BB#0: -; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1],zero,zero,zero,zero,zero,zero,ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17],zero,zero,zero,zero,zero,zero,ymm0[24,25],zero,zero,zero,zero,zero,zero ; X64-NEXT: retq %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index 95e86d2b899..b7aa81964c3 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -277,8 +277,7 @@ define <16 x i8> @combine_pshufb_psrldq(<16 x i8> %a0) { define <16 x i8> @combine_and_pshufb(<16 x i8> %a0) { ; SSSE3-LABEL: combine_and_pshufb: ; SSSE3: # BB#0: -; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero ; SSSE3-NEXT: retq ; ; SSE41-LABEL: combine_and_pshufb: -- 2.50.1