Merging r366660 and r367306:

author Hans Wennborg <hans@hanshq.net>

Tue, 6 Aug 2019 08:08:11 +0000 (08:08 +0000)

committer Hans Wennborg <hans@hanshq.net>

Tue, 6 Aug 2019 08:08:11 +0000 (08:08 +0000)
author Hans Wennborg <hans@hanshq.net>
Tue, 6 Aug 2019 08:08:11 +0000 (08:08 +0000)
committer Hans Wennborg <hans@hanshq.net>
Tue, 6 Aug 2019 08:08:11 +0000 (08:08 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 0b4bf687e6cfd8d0bd52cd47988c9a53eb376de4..a03ce7988580446802c2d80ceca38e98ada5ef5a 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -34062,25 +34062,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
        return true;
      break;
    }
-  case X86ISD::SUBV_BROADCAST: {
-    // Reduce size of broadcast if we don't need the upper half.
-    unsigned HalfElts = NumElts / 2;
-    if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) {
-      SDValue Src = Op.getOperand(0);
-      MVT SrcVT = Src.getSimpleValueType();
-
-      SDValue Half = Src;
-      if (SrcVT.getVectorNumElements() != HalfElts) {
-        MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts);
-        Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src);
-      }
-
-      return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0,
-                                               TLO.DAG, SDLoc(Op),
-                                               Half.getValueSizeInBits()));
-    }
-    break;
-  }
    case X86ISD::VPERMV: {
      SDValue Mask = Op.getOperand(0);
      APInt MaskUndef, MaskZero;
@@ -34134,6 +34115,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
        SDValue Insert =
            insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
        return TLO.CombineTo(Op, Insert);
+    }
+      // Subvector broadcast.
+    case X86ISD::SUBV_BROADCAST: {
+      SDLoc DL(Op);
+      SDValue Src = Op.getOperand(0);
+      if (Src.getValueSizeInBits() > ExtSizeInBits)
+        Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits);
+      else if (Src.getValueSizeInBits() < ExtSizeInBits) {
+        MVT SrcSVT = Src.getSimpleValueType().getScalarType();
+        MVT SrcVT =
+            MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits());
+        Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src);
+      }
+      return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0,
+                                               TLO.DAG, DL, ExtSizeInBits));
      }
        // Byte shifts by immediate.
      case X86ISD::VSHLDQ:
diff --git a/test/CodeGen/X86/oddsubvector.ll b/test/CodeGen/X86/oddsubvector.ll

index 9bc6c0f380a07674494cc37641020f2c70db95a6..bb384e118dca291de53d9af5a692f1ad2af943df 100644 (file)
--- a/test/CodeGen/X86/oddsubvector.ll
+++ b/test/CodeGen/X86/oddsubvector.ll
@@ -158,3 +158,35 @@ define void @PR40815(%struct.Mat4* nocapture readonly dereferenceable(64), %stru
    store <4 x float> %5, <4 x float>* %13, align 16
    ret void
  }
+
+define <16 x i32> @PR42819(<8 x i32>* %a0) {
+; SSE-LABEL: PR42819:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqu (%rdi), %xmm3
+; SSE-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    xorps %xmm2, %xmm2
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: PR42819:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,0,1,2]
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm0[5,6,7]
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: PR42819:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovdqu (%rdi), %xmm0
+; AVX512-NEXT:    movw $-8192, %ax # imm = 0xE000
+; AVX512-NEXT:    kmovw %eax, %k1
+; AVX512-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT:    retq
+  %1 = load <8 x i32>, <8 x i32>* %a0, align 4
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %3 = shufflevector <16 x i32> zeroinitializer, <16 x i32> %2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+  ret <16 x i32> %3
+}
author	Hans Wennborg <hans@hanshq.net>
	Tue, 6 Aug 2019 08:08:11 +0000 (08:08 +0000)
committer	Hans Wennborg <hans@hanshq.net>
	Tue, 6 Aug 2019 08:08:11 +0000 (08:08 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/oddsubvector.ll		patch \| blob \| history