From: Simon Pilgrim Date: Sun, 19 Feb 2017 19:40:31 +0000 (+0000) Subject: [X86][SSE] Use getTargetConstantBitsFromNode to find zeroable shuffle elements. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f8d4b524ddde41e89a2bba1b655948f88b4dddf2;p=llvm [X86][SSE] Use getTargetConstantBitsFromNode to find zeroable shuffle elements. Replaces existing approach that could only search BUILD_VECTOR nodes. Requires getTargetConstantBitsFromNode to discriminate cases with all/partial UNDEF bits in each element - this should also be useful when we get around to supporting getTargetShuffleMaskIndices with UNDEF elements. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295613 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4d660acbc20..44e3f18c401 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5154,7 +5154,8 @@ static const Constant *getTargetConstantFromNode(SDValue Op) { static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, SmallBitVector &UndefElts, SmallVectorImpl &EltBits, - bool AllowUndefs = true) { + bool AllowWholeUndefs = true, + bool AllowPartialUndefs = true) { assert(UndefElts.empty() && "Expected an empty UndefElts vector"); assert(EltBits.empty() && "Expected an empty EltBits vector"); @@ -5175,6 +5176,7 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, // Split the undef/constant single bitset data into the target elements. auto SplitBitData = [&]() { // Don't split if we don't allow undef bits. + bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs; if (UndefBits.getBoolValue() && !AllowUndefs) return false; @@ -5185,13 +5187,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, APInt UndefEltBits = UndefBits.lshr(i * EltSizeInBits); UndefEltBits = UndefEltBits.zextOrTrunc(EltSizeInBits); - // Only treat an element as UNDEF if all bits are UNDEF, otherwise - // treat it as zero. + // Only treat an element as UNDEF if all bits are UNDEF. if (UndefEltBits.isAllOnesValue()) { + if (!AllowWholeUndefs) + return false; UndefElts[i] = true; continue; } + // If only some bits are UNDEF then treat them as zero (or bail if not + // supported). + if (UndefEltBits.getBoolValue() && !AllowPartialUndefs) + return false; + APInt Bits = MaskBits.lshr(i * EltSizeInBits); Bits = Bits.zextOrTrunc(EltSizeInBits); EltBits[i] = Bits.getZExtValue(); @@ -5293,7 +5301,8 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode, // Extract the raw target constant bits. // FIXME: We currently don't support UNDEF bits or mask entries. if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts, - EltBits, /* AllowUndefs */ false)) + EltBits, /* AllowWholeUndefs */ false, + /* AllowPartialUndefs */ false)) return false; // Insert the extracted elements into the mask. @@ -5600,6 +5609,19 @@ static bool setTargetShuffleZeroElements(SDValue N, V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); + assert((VT.getSizeInBits() % Mask.size()) == 0 && + "Illegal split of shuffle value type"); + unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size(); + + // Extract known constant input data. + SmallBitVector UndefSrcElts[2]; + SmallVector SrcEltBits[2]; + bool IsSrcConstant[2] = { + getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0], + SrcEltBits[0], true, false), + getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1], + SrcEltBits[1], true, false)}; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { int M = Mask[i]; @@ -5608,6 +5630,7 @@ static bool setTargetShuffleZeroElements(SDValue N, continue; // Determine shuffle input and normalize the mask. + unsigned SrcIdx = M / Size; SDValue V = M < Size ? V1 : V2; M %= Size; @@ -5632,39 +5655,12 @@ static bool setTargetShuffleZeroElements(SDValue N, continue; } - // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements. - if (V.getOpcode() != ISD::BUILD_VECTOR) - continue; - - // If the BUILD_VECTOR has fewer elements then the (larger) source - // element must be UNDEF/ZERO. - // TODO: Is it worth testing the individual bits of a constant? - if ((Size % V.getNumOperands()) == 0) { - int Scale = Size / V->getNumOperands(); - SDValue Op = V.getOperand(M / Scale); - if (Op.isUndef()) - Mask[i] = SM_SentinelUndef; - else if (X86::isZeroNode(Op)) - Mask[i] = SM_SentinelZero; - continue; - } - - // If the BUILD_VECTOR has more elements then all the (smaller) source - // elements must be all UNDEF or all ZERO. - if ((V.getNumOperands() % Size) == 0) { - int Scale = V->getNumOperands() / Size; - bool AllUndef = true; - bool AllZero = true; - for (int j = 0; j < Scale; ++j) { - SDValue Op = V.getOperand((M * Scale) + j); - AllUndef &= Op.isUndef(); - AllZero &= X86::isZeroNode(Op); - } - if (AllUndef) + // Attempt to extract from the source's constant bits. + if (IsSrcConstant[SrcIdx]) { + if (UndefSrcElts[SrcIdx][M]) Mask[i] = SM_SentinelUndef; - else if (AllZero) + else if (SrcEltBits[SrcIdx][M] == 0) Mask[i] = SM_SentinelZero; - continue; } } diff --git a/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 241c63c6acd..a9dff916431 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -441,16 +441,14 @@ define <4 x float> @PR31296(i8* %in) { ; X32: # BB#0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: vmovaps {{.*#+}} xmm1 = <0,1,u,u> -; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1] +; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,mem[0] ; X32-NEXT: retl ; ; X64-LABEL: PR31296: ; X64: # BB#0: # %entry ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: vmovq %rax, %xmm0 -; X64-NEXT: vmovaps {{.*#+}} xmm1 = <0,1,u,u> -; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1] +; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,mem[0] ; X64-NEXT: retq entry: %0 = getelementptr i8, i8* %in, i32 0