[X86] Use vmovq for v4i64/v4f64/v8i64/v8f64 vzmovl.

author Craig Topper <craig.topper@intel.com>

Fri, 21 Jun 2019 17:24:21 +0000 (17:24 +0000)

committer Craig Topper <craig.topper@intel.com>

Fri, 21 Jun 2019 17:24:21 +0000 (17:24 +0000)
author Craig Topper <craig.topper@intel.com>
Fri, 21 Jun 2019 17:24:21 +0000 (17:24 +0000)
committer Craig Topper <craig.topper@intel.com>
Fri, 21 Jun 2019 17:24:21 +0000 (17:24 +0000)
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td

index 9f4a75c668910bbefff60e95a918f30a2c126d17..8315b867316a95d501091ceffdb4c5f23b2c2964 100644 (file)
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -4286,15 +4286,6 @@ let Predicates = [HasAVX512, OptForSize] in {
               (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
                (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
  
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
-              (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
-              (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
-
    def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
              (SUBREG_TO_REG (i32 0),
               (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
@@ -4303,17 +4294,6 @@ let Predicates = [HasAVX512, OptForSize] in {
              (SUBREG_TO_REG (i32 0),
               (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
                (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
-              (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
-              (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
-
  }
  
  // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
@@ -4329,17 +4309,6 @@ let Predicates = [HasAVX512, OptForSpeed] in {
               (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
                            (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
                            (i8 3))), sub_xmm)>;
-
-  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
-                          (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
-                          (i8 1))), sub_xmm)>;
-  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
-                          (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
-                          (i8 0xf))), sub_xmm)>;
  }
  
  let Predicates = [HasAVX512] in {
@@ -4452,6 +4421,28 @@ let Predicates = [HasAVX512] in {
              (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
    def : Pat<(v8i64 (X86vzload addr:$src)),
              (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
+
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIZrr
+                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIZrr
+                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
+             sub_xmm)>;
+
+  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIZrr
+                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIZrr
+                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
+             sub_xmm)>;
  }
  
  //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index c96bac6828f018380fdb53e50b754ab5df72ffd4..e25d2dca4047ed040e0473b6f5afb91b5ed67261 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -312,17 +312,6 @@ let Predicates = [UseAVX, OptForSize] in {
              (SUBREG_TO_REG (i32 0),
               (v4i32 (VMOVSSrr (v4i32 (V_SET0)),
                (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDrr (v2f64 (V_SET0)),
-                       (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
-             sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDrr (v2i64 (V_SET0)),
-                       (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
-             sub_xmm)>;
  }
  
  let Predicates = [UseSSE1] in {
@@ -4307,6 +4296,19 @@ let Predicates = [UseSSE2] in {
              (MOVZPQILo2PQIrr VR128:$src)>;
  }
  
+let Predicates = [UseAVX] in {
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIrr
+                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIrr
+                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
+             sub_xmm)>;
+}
+
  //===---------------------------------------------------------------------===//
  // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
  //===---------------------------------------------------------------------===//
@@ -6319,17 +6321,6 @@ let Predicates = [HasAVX, OptForSpeed] in {
               (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
                            (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)),
                            (i8 3))), sub_xmm)>;
-
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
-                          (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)),
-                          (i8 1))), sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
-                          (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)),
-                          (i8 0xf))), sub_xmm)>;
  }
  
  // Prefer a movss or movsd over a blendps when optimizing for size. these were
diff --git a/test/CodeGen/X86/vec_extract-avx.ll b/test/CodeGen/X86/vec_extract-avx.ll

index a15424a763e193d1700740b52d2c2a789bd58da7..5b3fcb1e0233865cf50a663f24502fdea4a94ef4 100644 (file)
--- a/test/CodeGen/X86/vec_extract-avx.ll
+++ b/test/CodeGen/X86/vec_extract-avx.ll
@@ -144,19 +144,17 @@ define void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) {
  ; X32:       # %bb.0:
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovups (%ecx), %xmm0
-; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X32-NEXT:    vmovaps %ymm0, (%eax)
+; X32-NEXT:    vmovdqu (%ecx), %xmm0
+; X32-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X32-NEXT:    vmovdqa %ymm0, (%eax)
  ; X32-NEXT:    vzeroupper
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: legal_vzmovl_2i64_4i64:
  ; X64:       # %bb.0:
-; X64-NEXT:    vmovups (%rdi), %xmm0
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X64-NEXT:    vmovaps %ymm0, (%rsi)
+; X64-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X64-NEXT:    vmovdqa %ymm0, (%rsi)
  ; X64-NEXT:    vzeroupper
  ; X64-NEXT:    retq
    %ld = load <2 x i64>, <2 x i64>* %in, align 8
@@ -198,19 +196,17 @@ define void @legal_vzmovl_2f64_4f64(<2 x double>* %in, <4 x double>* %out) {
  ; X32:       # %bb.0:
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovups (%ecx), %xmm0
-; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X32-NEXT:    vmovaps %ymm0, (%eax)
+; X32-NEXT:    vmovdqu (%ecx), %xmm0
+; X32-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X32-NEXT:    vmovdqa %ymm0, (%eax)
  ; X32-NEXT:    vzeroupper
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: legal_vzmovl_2f64_4f64:
  ; X64:       # %bb.0:
-; X64-NEXT:    vmovups (%rdi), %xmm0
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X64-NEXT:    vmovaps %ymm0, (%rsi)
+; X64-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X64-NEXT:    vmovdqa %ymm0, (%rsi)
  ; X64-NEXT:    vzeroupper
  ; X64-NEXT:    retq
    %ld = load <2 x double>, <2 x double>* %in, align 8
diff --git a/test/CodeGen/X86/vector-extend-inreg.ll b/test/CodeGen/X86/vector-extend-inreg.ll

index d790cb54b61f123483357a7dd23c9964f1674263..f60bf4b010954ff4574cb46517689b3983554bb5 100644 (file)
--- a/test/CodeGen/X86/vector-extend-inreg.ll
+++ b/test/CodeGen/X86/vector-extend-inreg.ll
@@ -71,18 +71,17 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
  ; X32-AVX-NEXT:    andl $-128, %esp
  ; X32-AVX-NEXT:    subl $384, %esp # imm = 0x180
  ; X32-AVX-NEXT:    movl 40(%ebp), %ecx
-; X32-AVX-NEXT:    vbroadcastsd 32(%ebp), %ymm0
-; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; X32-AVX-NEXT:    vpbroadcastq 32(%ebp), %ymm0
+; X32-AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
  ; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
  ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
  ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%esp)
  ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
  ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
  ; X32-AVX-NEXT:    vmovaps %ymm1, (%esp)
-; X32-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%esp)
  ; X32-AVX-NEXT:    leal (%ecx,%ecx), %eax
  ; X32-AVX-NEXT:    andl $31, %eax
  ; X32-AVX-NEXT:    movl 128(%esp,%eax,4), %eax
@@ -101,14 +100,13 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
  ; X64-AVX-NEXT:    andq $-128, %rsp
  ; X64-AVX-NEXT:    subq $256, %rsp # imm = 0x100
  ; X64-AVX-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-AVX-NEXT:    vpermpd {{.*#+}} ymm0 = ymm3[3,1,2,3]
-; X64-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; X64-AVX-NEXT:    vpermq {{.*#+}} ymm0 = ymm3[3,1,2,3]
+; X64-AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
  ; X64-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X64-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
  ; X64-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
  ; X64-AVX-NEXT:    vmovaps %ymm1, (%rsp)
-; X64-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%rsp)
  ; X64-AVX-NEXT:    andl $15, %edi
  ; X64-AVX-NEXT:    movq (%rsp,%rdi,8), %rax
  ; X64-AVX-NEXT:    movq %rbp, %rsp
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll

index 4ae23a0437e8e5d24a97c6a685af4d74bce55ba8..099aad76ba756fe0e3e6578530523ae713bfb558 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -1505,8 +1505,7 @@ define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
  ; ALL-LABEL: insert_reg_and_zero_v4f64:
  ; ALL:       # %bb.0:
  ; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
  ; ALL-NEXT:    retq
    %v = insertelement <4 x double> undef, double %a, i32 0
    %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -1987,8 +1986,7 @@ entry:
  define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize {
  ; ALL-LABEL: shuffle_v4f64_0zzz_optsize:
  ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
  ; ALL-NEXT:    retq
    %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    ret <4 x double> %b
@@ -1997,8 +1995,7 @@ define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize {
  define <4 x i64> @shuffle_v4i64_0zzz_optsize(<4 x i64> %a) optsize {
  ; ALL-LABEL: shuffle_v4i64_0zzz_optsize:
  ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
  ; ALL-NEXT:    retq
    %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    ret <4 x i64> %b
diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll

index ba0707a5bba22e84420543a699a123045f8f8e29..dea5457baeacdb3563d223ba7825ccc4bc760211 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -1973,8 +1973,7 @@ define <8 x double> @shuffle_v8f64_uuu2301(<8 x double> %a0, <8 x double> %a1) {
  define <8 x i64> @shuffle_v8i64_0zzzzzzz(<8 x i64> %a) {
  ; ALL-LABEL: shuffle_v8i64_0zzzzzzz:
  ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
  ; ALL-NEXT:    ret{{[l|q]}}
    %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    ret <8 x i64> %shuffle
@@ -1983,8 +1982,7 @@ define <8 x i64> @shuffle_v8i64_0zzzzzzz(<8 x i64> %a) {
  define <8 x double> @shuffle_v8f64_0zzzzzzz(<8 x double> %a) {
  ; ALL-LABEL: shuffle_v8f64_0zzzzzzz:
  ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
  ; ALL-NEXT:    ret{{[l|q]}}
    %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    ret <8 x double> %shuffle
diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

index 1d416edbfda4800591d99efaf6acafb61d0141b5..7a8a7d32623893e6d205a3150e9a76648c675de0 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -380,8 +380,7 @@ define <4 x i64> @combine_pshufb_as_zext128(<32 x i8> %a0) {
  define <4 x double> @combine_pshufb_as_vzmovl_64(<4 x double> %a0) {
  ; CHECK-LABEL: combine_pshufb_as_vzmovl_64:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
  ; CHECK-NEXT:    ret{{[l|q]}}
    %1 = bitcast <4 x double> %a0 to <32 x i8>
    %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
author	Craig Topper <craig.topper@intel.com>
	Fri, 21 Jun 2019 17:24:21 +0000 (17:24 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Fri, 21 Jun 2019 17:24:21 +0000 (17:24 +0000)
lib/Target/X86/X86InstrAVX512.td		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/vec_extract-avx.ll		patch \| blob \| history
test/CodeGen/X86/vector-extend-inreg.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v4.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-512-v8.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-combining-avx2.ll		patch \| blob \| history