[X86] Add a special case to LowerCONCAT_VECTORSvXi1 to handle concatenating zero...

author Craig Topper <craig.topper@intel.com>

Sun, 18 Aug 2019 23:30:11 +0000 (23:30 +0000)

committer Craig Topper <craig.topper@intel.com>

Sun, 18 Aug 2019 23:30:11 +0000 (23:30 +0000)
author Craig Topper <craig.topper@intel.com>
Sun, 18 Aug 2019 23:30:11 +0000 (23:30 +0000)
committer Craig Topper <craig.topper@intel.com>
Sun, 18 Aug 2019 23:30:11 +0000 (23:30 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index f55446fc5f6d7794186a05d03fcf77d2c68699fe..82cee72d812ed96167ffea3c85c315e59b8caf9e 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -9823,30 +9823,47 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
    assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
           "Unexpected number of operands in CONCAT_VECTORS");
  
-  unsigned NumZero = 0;
-  unsigned NumNonZero = 0;
+  uint64_t Zeros = 0;
    uint64_t NonZeros = 0;
    for (unsigned i = 0; i != NumOperands; ++i) {
      SDValue SubVec = Op.getOperand(i);
      if (SubVec.isUndef())
        continue;
+    assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
      if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
-      ++NumZero;
-    else {
-      assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
+      Zeros |= (uint64_t)1 << i;
+    else
        NonZeros |= (uint64_t)1 << i;
-      ++NumNonZero;
-    }
    }
  
+  unsigned NumElems = ResVT.getVectorNumElements();
+
+  // If we are inserting non-zero vector and there are zeros in LSBs and undef
+  // in the MSBs we need to emit a KSHIFTL. The generic lowering to
+  // insert_subvector will give us two kshifts.
+  if (isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros &&
+      Log2_64(NonZeros) != NumOperands - 1) {
+    MVT ShiftVT = ResVT;
+    if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
+      ShiftVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+    unsigned Idx = Log2_64(NonZeros);
+    SDValue SubVec = Op.getOperand(Idx);
+    unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements();
+    SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ShiftVT,
+                         DAG.getUNDEF(ShiftVT), SubVec,
+                         DAG.getIntPtrConstant(0, dl));
+    Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec,
+                     DAG.getConstant(Idx * SubVecNumElts, dl, MVT::i8));
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op,
+                       DAG.getIntPtrConstant(0, dl));
+  }
  
    // If there are zero or one non-zeros we can handle this very simply.
-  if (NumNonZero <= 1) {
-    SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl)
-                          : DAG.getUNDEF(ResVT);
-    if (!NumNonZero)
+  if (NonZeros == 0 || isPowerOf2_64(NonZeros)) {
+    SDValue Vec = Zeros ? DAG.getConstant(0, dl, ResVT) : DAG.getUNDEF(ResVT);
+    if (!NonZeros)
        return Vec;
-    unsigned Idx = countTrailingZeros(NonZeros);
+    unsigned Idx = Log2_64(NonZeros);
      SDValue SubVec = Op.getOperand(Idx);
      unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements();
      return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec,
@@ -9864,7 +9881,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
      return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
    }
  
-  assert(NumNonZero == 2 && "Simple cases not handled?");
+  assert(countPopulation(NonZeros) == 2 && "Simple cases not handled?");
  
    if (ResVT.getVectorNumElements() >= 16)
      return Op; // The operation is legal with KUNPCK
@@ -9872,7 +9889,6 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
    SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT,
                              DAG.getUNDEF(ResVT), Op.getOperand(0),
                              DAG.getIntPtrConstant(0, dl));
-  unsigned NumElems = ResVT.getVectorNumElements();
    return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1),
                       DAG.getIntPtrConstant(NumElems/2, dl));
  }
diff --git a/test/CodeGen/X86/avx512-skx-insert-subvec.ll b/test/CodeGen/X86/avx512-skx-insert-subvec.ll

index bd0057e0d829e1fae8f6d23eb79246ced32fc9bc..5ed2ef5fa50d6339b69fcdf55a4b4d9822b178fb 100644 (file)
--- a/test/CodeGen/X86/avx512-skx-insert-subvec.ll
+++ b/test/CodeGen/X86/avx512-skx-insert-subvec.ll
@@ -169,8 +169,7 @@ define <16 x i1> @test12(<2 x i1> %a) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
  ; CHECK-NEXT:    vpmovq2m %xmm0, %k0
-; CHECK-NEXT:    kshiftlw $14, %k0, %k0
-; CHECK-NEXT:    kshiftrw $4, %k0, %k0
+; CHECK-NEXT:    kshiftlw $10, %k0, %k0
  ; CHECK-NEXT:    vpmovm2b %k0, %xmm0
  ; CHECK-NEXT:    retq
    %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -182,8 +181,7 @@ define <32 x i1> @test13(<2 x i1> %a) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
  ; CHECK-NEXT:    vpmovq2m %xmm0, %k0
-; CHECK-NEXT:    kshiftld $30, %k0, %k0
-; CHECK-NEXT:    kshiftrd $20, %k0, %k0
+; CHECK-NEXT:    kshiftld $10, %k0, %k0
  ; CHECK-NEXT:    vpmovm2b %k0, %ymm0
  ; CHECK-NEXT:    retq
    %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <32 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -195,8 +193,7 @@ define <64 x i1> @test14(<2 x i1> %a) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
  ; CHECK-NEXT:    vpmovq2m %xmm0, %k0
-; CHECK-NEXT:    kshiftlq $62, %k0, %k0
-; CHECK-NEXT:    kshiftrq $52, %k0, %k0
+; CHECK-NEXT:    kshiftlq $10, %k0, %k0
  ; CHECK-NEXT:    vpmovm2b %k0, %zmm0
  ; CHECK-NEXT:    retq
    %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <64 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
author	Craig Topper <craig.topper@intel.com>
	Sun, 18 Aug 2019 23:30:11 +0000 (23:30 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Sun, 18 Aug 2019 23:30:11 +0000 (23:30 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/avx512-skx-insert-subvec.ll		patch \| blob \| history