if (!isa<ConstantSDNode>(Idx))
return SDValue();
+ // Inserting undef is a nop. We can just return the original vector.
+ if (SubVec.isUndef())
+ return Vec;
+
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
return Op;
MVT OpVT = Op.getSimpleValueType();
unsigned NumElems = OpVT.getVectorNumElements();
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
+
+ // Extend to natively supported kshift.
+ MVT WideOpVT = OpVT;
+ if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
+ WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+
// Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
// if necessary.
if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
- if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) {
- // Need to promote to v16i1, do the insert, then extract back.
- Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
- getZeroVector(MVT::v16i1, Subtarget, DAG, dl),
- SubVec, Idx);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op,
- DAG.getIntPtrConstant(0, dl));
- }
-
- return Op;
+ // May need to promote to a legal type.
+ Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ getZeroVector(WideOpVT, Subtarget, DAG, dl),
+ SubVec, Idx);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
MVT SubVecVT = SubVec.getSimpleValueType();
IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR");
- // extend to natively supported kshift
- MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
- MVT WideOpVT = OpVT;
- if (OpVT.getSizeInBits() < MinVT.getStoreSizeInBits())
- WideOpVT = MinVT;
-
- SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
SDValue Undef = DAG.getUNDEF(WideOpVT);
- SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- Undef, SubVec, ZeroIdx);
- // Extract sub-vector if require.
- auto ExtractSubVec = [&](SDValue V) {
- return (WideOpVT == OpVT) ? V : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
- OpVT, V, ZeroIdx);
- };
+ if (IdxVal == 0) {
+ // Zero lower bits of the Vec
+ SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
+ ZeroIdx);
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
+ // Merge them together, SubVec should be zero extended.
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ getZeroVector(WideOpVT, Subtarget, DAG, dl),
+ SubVec, ZeroIdx);
+ Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op,
+ ZeroIdx);
+ }
+
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ Undef, SubVec, ZeroIdx);
if (Vec.isUndef()) {
- if (IdxVal != 0) {
- SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
- WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
- ShiftBits);
- }
- return ExtractSubVec(WideSubVec);
+ assert(IdxVal != 0 && "Unexpected index");
+ Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
NumElems = WideOpVT.getVectorNumElements();
unsigned ShiftLeft = NumElems - SubVecNumElems;
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
- Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
- DAG.getConstant(ShiftLeft, dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
- DAG.getConstant(ShiftRight, dl, MVT::i8));
- return ExtractSubVec(Vec);
- }
-
- if (IdxVal == 0) {
- // Zero lower bits of the Vec
- SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
- // Merge them together, SubVec should be zero extended.
- WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- getZeroVector(WideOpVT, Subtarget, DAG, dl),
- SubVec, ZeroIdx);
- Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
- return ExtractSubVec(Vec);
+ SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getConstant(ShiftLeft, dl, MVT::i8));
+ Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
+ DAG.getConstant(ShiftRight, dl, MVT::i8));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
// Simple case when we put subvector in the upper part
if (IdxVal + SubVecNumElems == NumElems) {
- // Zero upper bits of the Vec
- WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
- NumElems = WideOpVT.getVectorNumElements();
- SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
- return ExtractSubVec(Vec);
+ SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ if (SubVecNumElems * 2 == NumElems) {
+ // Special case, use legal zero extending insert_subvector. This allows
+ // isel to opimitize when bits are known zero.
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ getZeroVector(WideOpVT, Subtarget, DAG, dl),
+ Vec, ZeroIdx);
+ } else {
+ // Otherwise use explicit shifts to zero the bits.
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ Undef, Vec, ZeroIdx);
+ NumElems = WideOpVT.getVectorNumElements();
+ SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
+ }
+ Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
- // Subvector should be inserted in the middle - use shuffle
- WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
- SubVec, ZeroIdx);
- SmallVector<int, 64> Mask;
- for (unsigned i = 0; i < NumElems; ++i)
- Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
- i : i + NumElems);
- return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
+
+ // Inserting into the middle is more complicated.
+
+ NumElems = WideOpVT.getVectorNumElements();
+
+ // Widen the vector if needed.
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
+ // Move the current value of the bit to be replace to the lsbs.
+ Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ // Xor with the new bit.
+ Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
+ // Shift to MSB, filling bottom bits with 0.
+ unsigned ShiftLeft = NumElems - SubVecNumElems;
+ Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
+ DAG.getConstant(ShiftLeft, dl, MVT::i8));
+ // Shift to the final position, filling upper bits with 0.
+ unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
+ Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
+ DAG.getConstant(ShiftRight, dl, MVT::i8));
+ // Xor with original vector leaving the new value.
+ Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
+ // Reduce to original width if needed.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
+; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
+; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k0
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
-; CHECK-NEXT: vpslld $31, %xmm1, %xmm0
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1
-; CHECK-NEXT: kshiftlb $4, %k1, %k1
; CHECK-NEXT: kshiftlb $4, %k0, %k0
-; CHECK-NEXT: kshiftrb $4, %k0, %k0
-; CHECK-NEXT: korb %k1, %k0, %k0
+; CHECK-NEXT: korb %k0, %k1, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
; CHECK-NEXT: retq
define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1
+; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k0
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
-; CHECK-NEXT: vpsllq $63, %xmm1, %xmm0
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1
-; CHECK-NEXT: kshiftlb $2, %k1, %k1
-; CHECK-NEXT: kshiftlb $6, %k0, %k0
-; CHECK-NEXT: kshiftrb $6, %k0, %k0
-; CHECK-NEXT: korb %k1, %k0, %k0
+; CHECK-NEXT: kshiftlb $2, %k0, %k0
+; CHECK-NEXT: korb %k0, %k1, %k0
; CHECK-NEXT: vpmovm2d %k0, %xmm0
; CHECK-NEXT: retq
define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1
+; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k0
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
-; CHECK-NEXT: vpsllq $63, %xmm1, %xmm0
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1
-; CHECK-NEXT: kshiftlb $2, %k1, %k1
-; CHECK-NEXT: kshiftlb $6, %k0, %k0
-; CHECK-NEXT: kshiftrb $6, %k0, %k0
-; CHECK-NEXT: korb %k1, %k0, %k0
+; CHECK-NEXT: kshiftlb $2, %k0, %k0
+; CHECK-NEXT: korb %k0, %k1, %k0
; CHECK-NEXT: vpmovm2b %k0, %xmm0
; CHECK-NEXT: retq
define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
+; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
+; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k0
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
-; CHECK-NEXT: vpslld $31, %xmm1, %xmm0
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1
-; CHECK-NEXT: kshiftlb $4, %k1, %k1
; CHECK-NEXT: kshiftlb $4, %k0, %k0
-; CHECK-NEXT: kshiftrb $4, %k0, %k0
-; CHECK-NEXT: korb %k1, %k0, %k0
+; CHECK-NEXT: korb %k0, %k1, %k0
; CHECK-NEXT: vpmovm2b %k0, %ymm0
; CHECK-NEXT: retq