SelectionDAG & DAG) {
SDLoc dl(Op);
MVT ResVT = Op.getSimpleValueType();
- unsigned NumOfOperands = Op.getNumOperands();
+ unsigned NumOperands = Op.getNumOperands();
- assert(isPowerOf2_32(NumOfOperands) &&
+ assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
// If this node promotes - by concatenating zeroes - the type of the result
ZeroC);
}
- SDValue Undef = DAG.getUNDEF(ResVT);
- if (NumOfOperands > 2) {
- // Specialize the cases when all, or all but one, of the operands are undef.
- unsigned NumOfDefinedOps = 0;
- unsigned OpIdx = 0;
- for (unsigned i = 0; i < NumOfOperands; i++)
- if (!Op.getOperand(i).isUndef()) {
- NumOfDefinedOps++;
- OpIdx = i;
- }
- if (NumOfDefinedOps == 0)
- return Undef;
- if (NumOfDefinedOps == 1) {
- unsigned SubVecNumElts =
- Op.getOperand(OpIdx).getValueType().getVectorNumElements();
- SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef,
- Op.getOperand(OpIdx), IdxVal);
+ unsigned NumZero = 0;
+ unsigned NumNonZero = 0;
+ uint64_t NonZeros = 0;
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ SDValue SubVec = Op.getOperand(i);
+ if (SubVec.isUndef())
+ continue;
+ if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
+ ++NumZero;
+ else {
+ assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
+ NonZeros |= (uint64_t)1 << i;
+ ++NumNonZero;
}
+ }
+
+
+ // If there are zero or one non-zeros we can handle this very simply.
+ if (NumNonZero <= 1) {
+ SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl)
+ : DAG.getUNDEF(ResVT);
+ if (!NumNonZero)
+ return Vec;
+ unsigned Idx = countTrailingZeros(NonZeros);
+ SDValue SubVec = Op.getOperand(Idx);
+ unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements();
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec,
+ DAG.getIntPtrConstant(Idx * SubVecNumElts, dl));
+ }
+ if (NumOperands > 2) {
MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
ResVT.getVectorNumElements()/2);
- SmallVector<SDValue, 2> Ops;
- for (unsigned i = 0; i < NumOfOperands/2; i++)
- Ops.push_back(Op.getOperand(i));
- SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
- Ops.clear();
- for (unsigned i = NumOfOperands/2; i < NumOfOperands; i++)
- Ops.push_back(Op.getOperand(i));
- SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
+ ArrayRef<SDUse> Ops = Op->ops();
+ SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
+ Ops.slice(0, NumOperands/2));
+ SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
+ Ops.slice(NumOperands/2));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
- // 2 operands
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- unsigned NumElems = ResVT.getVectorNumElements();
- assert(V1.getValueType() == V2.getValueType() &&
- V1.getValueType().getVectorNumElements() == NumElems/2 &&
- "Unexpected operands in CONCAT_VECTORS");
-
- // If this can be done with a subreg insert do that first.
- SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
- if (V2.isUndef())
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
+ assert(NumNonZero == 2 && "Simple cases not handled?");
- if (ResVT.getSizeInBits() >= 16)
+ if (ResVT.getVectorNumElements() >= 16)
return Op; // The operation is legal with KUNPCK
- bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode());
- bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode());
- SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl);
- if (IsZeroV1 && IsZeroV2)
- return ZeroVec;
-
- if (IsZeroV2)
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx);
-
- SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl);
- if (V1.isUndef())
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal);
-
- if (IsZeroV1)
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal);
-
- V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal);
+ SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT,
+ DAG.getUNDEF(ResVT), Op.getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ unsigned NumElems = ResVT.getVectorNumElements();
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1),
+ DAG.getIntPtrConstant(NumElems/2, dl));
}
static SDValue LowerCONCAT_VECTORS(SDValue Op,
define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_cmp_q_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k5 ## encoding: [0xc5,0xf8,0x92,0xef]
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k5} ## encoding: [0x62,0xf2,0xfd,0x0d,0x29,0xc1]
-; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k2 {%k5} ## encoding: [0x62,0xf2,0xf5,0x0d,0x37,0xd0]
-; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1f,0xc9,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k3 ## encoding: [0xc5,0xfc,0x47,0xd8]
-; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1f,0xe1,0x04]
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k6 {%k5} ## encoding: [0x62,0xf3,0xf5,0x0d,0x1f,0xf0,0x02]
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k7 {%k5} ## encoding: [0x62,0xf2,0xfd,0x0d,0x37,0xf9]
-; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
+; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x37,0xd8]
+; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd1,0x02]
+; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
+; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04]
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k6 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x1f,0xf0,0x02]
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf9]
+; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
+; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_ucmp_q_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k5 ## encoding: [0xc5,0xf8,0x92,0xef]
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k5} ## encoding: [0x62,0xf2,0xfd,0x0d,0x29,0xc1]
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k2 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1e,0xd1,0x01]
-; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k1 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1e,0xc9,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k3 ## encoding: [0xc5,0xfc,0x47,0xd8]
-; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1f,0xe1,0x04]
-; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k6 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1e,0xf1,0x05]
-; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k7 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1e,0xf9,0x06]
-; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x01]
+; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x02]
+; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
+; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04]
+; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x05]
+; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf9,0x06]
+; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
+; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)