llvm_unreachable("Unimplemented!");
}
-// This function assumes its argument is a BUILD_VECTOR of constants or
-// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
-// true.
-static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
- unsigned &MaskValue) {
- MaskValue = 0;
- unsigned NumElems = BuildVector->getNumOperands();
-
- // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
- // We don't handle the >2 lanes case right now.
- unsigned NumLanes = (NumElems - 1) / 8 + 1;
- if (NumLanes > 2)
- return false;
-
- unsigned NumElemsInLane = NumElems / NumLanes;
-
- // Blend for v16i16 should be symmetric for the both lanes.
- for (unsigned i = 0; i < NumElemsInLane; ++i) {
- SDValue EltCond = BuildVector->getOperand(i);
- SDValue SndLaneEltCond =
- (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond;
-
- int Lane1Cond = -1, Lane2Cond = -1;
- if (isa<ConstantSDNode>(EltCond))
- Lane1Cond = !isNullConstant(EltCond);
- if (isa<ConstantSDNode>(SndLaneEltCond))
- Lane2Cond = !isNullConstant(SndLaneEltCond);
-
- unsigned LaneMask = 0;
- if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
- // Lane1Cond != 0, means we want the first argument.
- // Lane1Cond == 0, means we want the second argument.
- // The encoding of this argument is 0 for the first argument, 1
- // for the second. Therefore, invert the condition.
- LaneMask = !Lane1Cond << i;
- else if (Lane1Cond < 0)
- LaneMask = !Lane2Cond << i;
- else
- return false;
-
- MaskValue |= LaneMask;
- if (NumLanes == 2)
- MaskValue |= LaneMask << NumElemsInLane;
- }
- return true;
-}
-
/// \brief Try to lower a VSELECT instruction to a vector shuffle.
static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
const X86Subtarget *Subtarget,
return SDValue();
}
-static SDValue
-transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
- SDLoc dl(N);
- SDValue Cond = N->getOperand(0);
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
-
- if (Cond.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue CondSrc = Cond->getOperand(0);
- if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG)
- Cond = CondSrc->getOperand(0);
- }
-
- if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
- return SDValue();
-
- // A vselect where all conditions and data are constants can be optimized into
- // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
- if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
- ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
- return SDValue();
-
- unsigned MaskValue = 0;
- if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
- return SDValue();
-
- MVT VT = N->getSimpleValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 8> ShuffleMask(NumElems, -1);
- for (unsigned i = 0; i < NumElems; ++i) {
- // Be sure we emit undef where we can.
- if (Cond.getOperand(i)->getOpcode() == ISD::UNDEF)
- ShuffleMask[i] = -1;
- else
- ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1);
- }
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isShuffleMaskLegal(ShuffleMask, VT))
- return SDValue();
- return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]);
-}
-
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
}
- // We should generate an X86ISD::BLENDI from a vselect if its argument
- // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
- // constants. This specific pattern gets generated when we split a
- // selector for a 512 bit vector in a machine without AVX512 (but with
- // 256-bit vectors), during legalization:
- //
- // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
- //
- // Iff we find this pattern and the build_vectors are built from
- // constants, we translate the vselect into a shuffle_vector that we
- // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
- if ((N->getOpcode() == ISD::VSELECT ||
- N->getOpcode() == X86ISD::SHRUNKBLEND) &&
- !DCI.isBeforeLegalize() && !VT.is512BitVector()) {
- SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
- if (Shuffle.getNode())
- return Shuffle;
- }
-
// If this is a *dynamic* select (non-constant condition) and we can match
// this node with one of the variable blend instructions, restructure the
// condition so that the blends can use the high bit of each element and use
; SSE41-LABEL: vsel_i8:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
-; SSE41-NEXT: pblendvb %xmm1, %xmm2
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE41-NEXT: pblendvb %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_i8:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
-; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
entry:
%vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
; SSE41-LABEL: constant_pblendvb_avx2:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm4
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; SSE41-NEXT: pblendvb %xmm2, %xmm4
-; SSE41-NEXT: pblendvb %xmm3, %xmm1
-; SSE41-NEXT: movdqa %xmm4, %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; SSE41-NEXT: pblendvb %xmm4, %xmm2
+; SSE41-NEXT: pblendvb %xmm1, %xmm3
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm3, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: constant_pblendvb_avx2: