[X86] Make getZeroVector return floating point vectors in their native type on SSE2...

author Craig Topper <craig.topper@intel.com>

Sun, 8 Sep 2019 00:43:52 +0000 (00:43 +0000)

committer Craig Topper <craig.topper@intel.com>

Sun, 8 Sep 2019 00:43:52 +0000 (00:43 +0000)
author Craig Topper <craig.topper@intel.com>
Sun, 8 Sep 2019 00:43:52 +0000 (00:43 +0000)
committer Craig Topper <craig.topper@intel.com>
Sun, 8 Sep 2019 00:43:52 +0000 (00:43 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 8e45651bcab32f842fe061dc05b36e6fb0ab04b1..552c91703a7d96e52144bbc9f9f57c908c3fc545 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5403,6 +5403,8 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
    SDValue Vec;
    if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
      Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
+  } else if (VT.isFloatingPoint()) {
+    Vec = DAG.getConstantFP(+0.0, dl, VT);
    } else if (VT.getVectorElementType() == MVT::i1) {
      assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
             "Unexpected vector type");
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td

index 31a46e68a86b85087fbf36671963520abab402f8..94108402ac966f2e97ec92d8453a7c59603753a4 100644 (file)
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -412,6 +412,11 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
                 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
  }
  
+let Predicates = [HasAVX512] in {
+def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
+}
+
  // Alias instructions that allow VPTERNLOG to be used with a mask to create
  // a mix of all ones and all zeros elements. This is done this way to force
  // the same register to be used as input for all three sources.
@@ -436,6 +441,13 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
                 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
  }
  
+let Predicates = [HasAVX512] in {
+def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
+}
+
  // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
  // This is expanded by ExpandPostRAPseudos.
  let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index b3c88982a395c0cbc395e7232f808869d22015be..1626228b06b0510aa76a0bf93cd9678a71b1b14a 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -128,13 +128,15 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
  // We set canFoldAsLoad because this can be converted to a constant-pool
  // load of an all-zeros value if folding it would be beneficial.
  let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isPseudo = 1, SchedRW = [WriteZero] in {
+    isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
  def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
                 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
  }
  
-let Predicates = [NoAVX512] in
+let Predicates = [NoAVX512] in {
  def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+}
  
  
  // The same as done above but for AVX.  The 256-bit AVX1 ISA doesn't support PI,
@@ -147,6 +149,11 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
                   [(set VR256:$dst, (v8i32 immAllZerosV))]>;
  }
  
+let Predicates = [NoAVX512] in {
+def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>;
+def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
+}
+
  // We set canFoldAsLoad because this can be converted to a constant-pool
  // load of an all-ones value if folding it would be beneficial.
  let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll

index afa8bf44c206e45660fab1614313f9db6d57361e..2590a27a982390de8dda93b7a6b4d8e8d66688ab 100644 (file)
--- a/test/CodeGen/X86/2012-04-26-sdglue.ll
+++ b/test/CodeGen/X86/2012-04-26-sdglue.ll
@@ -9,17 +9,17 @@ define void @func(<4 x float> %a, <16 x i8> %b, <16 x i8> %c, <8 x float> %d, <8
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vmovdqu 0, %xmm0
  ; CHECK-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
-; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
  ; CHECK-NEXT:    vmulps %xmm1, %xmm1, %xmm1
  ; CHECK-NEXT:    vmulps %xmm0, %xmm0, %xmm0
  ; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
  ; CHECK-NEXT:    vaddps %xmm0, %xmm0, %xmm0
  ; CHECK-NEXT:    vmulps %xmm0, %xmm0, %xmm0
  ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; CHECK-NEXT:    vaddps %ymm0, %ymm0, %ymm0
  ; CHECK-NEXT:    vhaddps %ymm4, %ymm0, %ymm0
  ; CHECK-NEXT:    vsubps %ymm0, %ymm0, %ymm0
-; CHECK-NEXT:    vhaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    vhaddps %ymm0, %ymm1, %ymm0
  ; CHECK-NEXT:    vmovaps %ymm0, (%rdi)
  ; CHECK-NEXT:    vzeroupper
  ; CHECK-NEXT:    retq
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll

index b85fd4e6482fb23cae0ce12d605631d9ae243bc8..67b029104b403d8243e83f6df6161f924a4afd2a 100644 (file)
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -19,8 +19,8 @@ define void @zero128() nounwind ssp {
  define void @zero256() nounwind ssp {
  ; CHECK-LABEL: zero256:
  ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    movq _x@{{.*}}(%rip), %rax
  ; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    movq _x@{{.*}}(%rip), %rax
  ; CHECK-NEXT:    vmovaps %ymm0, (%rax)
  ; CHECK-NEXT:    movq _y@{{.*}}(%rip), %rax
  ; CHECK-NEXT:    vmovaps %ymm0, (%rax)
diff --git a/test/CodeGen/X86/avx2-gather.ll b/test/CodeGen/X86/avx2-gather.ll

index 23a8c9ef8268c092cd8b4839da93d818d185bb00..cf4a624ebf431fff2fd9ff69f4a37ff84687c508 100644 (file)
--- a/test/CodeGen/X86/avx2-gather.ll
+++ b/test/CodeGen/X86/avx2-gather.ll
@@ -124,16 +124,16 @@ define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) {
  ; X32-LABEL: test_mm_i32gather_pd:
  ; X32:       # %bb.0:
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
  ; X32-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
  ; X32-NEXT:    vgatherdpd %xmm2, (%eax,%xmm0,2), %xmm1
  ; X32-NEXT:    vmovapd %xmm1, %xmm0
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm_i32gather_pd:
  ; X64:       # %bb.0:
-; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
  ; X64-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
  ; X64-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1
  ; X64-NEXT:    vmovapd %xmm1, %xmm0
  ; X64-NEXT:    retq
diff --git a/test/CodeGen/X86/nontemporal-3.ll b/test/CodeGen/X86/nontemporal-3.ll

index f0a2f6fac1fc8b203aa8fd65296696296c9dfd3b..457e5e19d8d7b79d176f0b686c9ef6e71c65d4db 100644 (file)
--- a/test/CodeGen/X86/nontemporal-3.ll
+++ b/test/CodeGen/X86/nontemporal-3.ll
@@ -37,27 +37,12 @@ define void @test_zero_v2f64_align1(<2 x double>* %dst) nounwind {
  }
  
  define void @test_zero_v4f32_align1(<4 x float>* %dst) nounwind {
-; SSE2-LABEL: test_zero_v4f32_align1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    xorl %eax, %eax
-; SSE2-NEXT:    movntiq %rax, 8(%rdi)
-; SSE2-NEXT:    movntiq %rax, (%rdi)
-; SSE2-NEXT:    retq
-;
-; SSE4A-LABEL: test_zero_v4f32_align1:
-; SSE4A:       # %bb.0:
-; SSE4A-NEXT:    xorl %eax, %eax
-; SSE4A-NEXT:    movntiq %rax, 8(%rdi)
-; SSE4A-NEXT:    xorps %xmm0, %xmm0
-; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
-; SSE4A-NEXT:    retq
-;
-; SSE41-LABEL: test_zero_v4f32_align1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    xorl %eax, %eax
-; SSE41-NEXT:    movntiq %rax, 8(%rdi)
-; SSE41-NEXT:    movntiq %rax, (%rdi)
-; SSE41-NEXT:    retq
+; SSE-LABEL: test_zero_v4f32_align1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    movntiq %rax, 8(%rdi)
+; SSE-NEXT:    movntiq %rax, (%rdi)
+; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: test_zero_v4f32_align1:
  ; AVX:       # %bb.0:
@@ -77,27 +62,12 @@ define void @test_zero_v4f32_align1(<4 x float>* %dst) nounwind {
  }
  
  define void @test_zero_v2i64_align1(<2 x i64>* %dst) nounwind {
-; SSE2-LABEL: test_zero_v2i64_align1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    xorl %eax, %eax
-; SSE2-NEXT:    movntiq %rax, 8(%rdi)
-; SSE2-NEXT:    movntiq %rax, (%rdi)
-; SSE2-NEXT:    retq
-;
-; SSE4A-LABEL: test_zero_v2i64_align1:
-; SSE4A:       # %bb.0:
-; SSE4A-NEXT:    xorl %eax, %eax
-; SSE4A-NEXT:    movntiq %rax, 8(%rdi)
-; SSE4A-NEXT:    xorps %xmm0, %xmm0
-; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
-; SSE4A-NEXT:    retq
-;
-; SSE41-LABEL: test_zero_v2i64_align1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    xorl %eax, %eax
-; SSE41-NEXT:    movntiq %rax, 8(%rdi)
-; SSE41-NEXT:    movntiq %rax, (%rdi)
-; SSE41-NEXT:    retq
+; SSE-LABEL: test_zero_v2i64_align1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    movntiq %rax, 8(%rdi)
+; SSE-NEXT:    movntiq %rax, (%rdi)
+; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: test_zero_v2i64_align1:
  ; AVX:       # %bb.0:
@@ -117,27 +87,12 @@ define void @test_zero_v2i64_align1(<2 x i64>* %dst) nounwind {
  }
  
  define void @test_zero_v4i32_align1(<4 x i32>* %dst) nounwind {
-; SSE2-LABEL: test_zero_v4i32_align1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    xorl %eax, %eax
-; SSE2-NEXT:    movntiq %rax, 8(%rdi)
-; SSE2-NEXT:    movntiq %rax, (%rdi)
-; SSE2-NEXT:    retq
-;
-; SSE4A-LABEL: test_zero_v4i32_align1:
-; SSE4A:       # %bb.0:
-; SSE4A-NEXT:    xorl %eax, %eax
-; SSE4A-NEXT:    movntiq %rax, 8(%rdi)
-; SSE4A-NEXT:    xorps %xmm0, %xmm0
-; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
-; SSE4A-NEXT:    retq
-;
-; SSE41-LABEL: test_zero_v4i32_align1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    xorl %eax, %eax
-; SSE41-NEXT:    movntiq %rax, 8(%rdi)
-; SSE41-NEXT:    movntiq %rax, (%rdi)
-; SSE41-NEXT:    retq
+; SSE-LABEL: test_zero_v4i32_align1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    movntiq %rax, 8(%rdi)
+; SSE-NEXT:    movntiq %rax, (%rdi)
+; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: test_zero_v4i32_align1:
  ; AVX:       # %bb.0:
@@ -157,27 +112,12 @@ define void @test_zero_v4i32_align1(<4 x i32>* %dst) nounwind {
  }
  
  define void @test_zero_v8i16_align1(<8 x i16>* %dst) nounwind {
-; SSE2-LABEL: test_zero_v8i16_align1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    xorl %eax, %eax
-; SSE2-NEXT:    movntiq %rax, 8(%rdi)
-; SSE2-NEXT:    movntiq %rax, (%rdi)
-; SSE2-NEXT:    retq
-;
-; SSE4A-LABEL: test_zero_v8i16_align1:
-; SSE4A:       # %bb.0:
-; SSE4A-NEXT:    xorl %eax, %eax
-; SSE4A-NEXT:    movntiq %rax, 8(%rdi)
-; SSE4A-NEXT:    xorps %xmm0, %xmm0
-; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
-; SSE4A-NEXT:    retq
-;
-; SSE41-LABEL: test_zero_v8i16_align1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    xorl %eax, %eax
-; SSE41-NEXT:    movntiq %rax, 8(%rdi)
-; SSE41-NEXT:    movntiq %rax, (%rdi)
-; SSE41-NEXT:    retq
+; SSE-LABEL: test_zero_v8i16_align1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    movntiq %rax, 8(%rdi)
+; SSE-NEXT:    movntiq %rax, (%rdi)
+; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: test_zero_v8i16_align1:
  ; AVX:       # %bb.0:
@@ -197,27 +137,12 @@ define void @test_zero_v8i16_align1(<8 x i16>* %dst) nounwind {
  }
  
  define void @test_zero_v16i8_align1(<16 x i8>* %dst) nounwind {
-; SSE2-LABEL: test_zero_v16i8_align1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    xorl %eax, %eax
-; SSE2-NEXT:    movntiq %rax, 8(%rdi)
-; SSE2-NEXT:    movntiq %rax, (%rdi)
-; SSE2-NEXT:    retq
-;
-; SSE4A-LABEL: test_zero_v16i8_align1:
-; SSE4A:       # %bb.0:
-; SSE4A-NEXT:    xorl %eax, %eax
-; SSE4A-NEXT:    movntiq %rax, 8(%rdi)
-; SSE4A-NEXT:    xorps %xmm0, %xmm0
-; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
-; SSE4A-NEXT:    retq
-;
-; SSE41-LABEL: test_zero_v16i8_align1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    xorl %eax, %eax
-; SSE41-NEXT:    movntiq %rax, 8(%rdi)
-; SSE41-NEXT:    movntiq %rax, (%rdi)
-; SSE41-NEXT:    retq
+; SSE-LABEL: test_zero_v16i8_align1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    movntiq %rax, 8(%rdi)
+; SSE-NEXT:    movntiq %rax, (%rdi)
+; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: test_zero_v16i8_align1:
  ; AVX:       # %bb.0:
@@ -281,10 +206,11 @@ define void @test_zero_v8f32_align1(<8 x float>* %dst) nounwind {
  ;
  ; SSE4A-LABEL: test_zero_v8f32_align1:
  ; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    xorl %eax, %eax
+; SSE4A-NEXT:    movntiq %rax, 8(%rdi)
+; SSE4A-NEXT:    movntiq %rax, 24(%rdi)
  ; SSE4A-NEXT:    xorps %xmm0, %xmm0
-; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
  ; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
-; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
  ; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
  ; SSE4A-NEXT:    retq
  ;
@@ -725,14 +651,15 @@ define void @test_zero_v16f32_align1(<16 x float>* %dst) nounwind {
  ;
  ; SSE4A-LABEL: test_zero_v16f32_align1:
  ; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    xorl %eax, %eax
+; SSE4A-NEXT:    movntiq %rax, 24(%rdi)
+; SSE4A-NEXT:    movntiq %rax, 8(%rdi)
+; SSE4A-NEXT:    movntiq %rax, 56(%rdi)
+; SSE4A-NEXT:    movntiq %rax, 40(%rdi)
  ; SSE4A-NEXT:    xorps %xmm0, %xmm0
-; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
  ; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
  ; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
-; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
  ; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
  ; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
  ; SSE4A-NEXT:    retq
  ;
diff --git a/test/CodeGen/X86/packss.ll b/test/CodeGen/X86/packss.ll

index e3bd9d9e6ed12619366eb09dc34e6ea637f3e578..c0fa42e3c2bad402968b2a6b45150b27e4a6ee2a 100644 (file)
--- a/test/CodeGen/X86/packss.ll
+++ b/test/CodeGen/X86/packss.ll
@@ -356,18 +356,18 @@ define <32 x i8> @packsswb_icmp_zero_trunc_256(<16 x i16> %a0) {
  ;
  ; AVX1-LABEL: packsswb_icmp_zero_trunc_256:
  ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = zero,zero,ymm0[0,1]
-; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
  ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = zero,zero,ymm0[0,1]
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
  ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
  ; AVX1-NEXT:    ret{{[l|q]}}
  ;
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll

index 1f5503067c619fe642cfb589c105616e68a5ca0c..259c19c681512f578a85b6db052f3afd291c996b 100644 (file)
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -64,8 +64,8 @@ define i16 @test1(float %f) nounwind {
  ; X32_AVX512-NEXT:    vmulss LCPI0_1, %xmm0, %xmm0
  ; X32_AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X32_AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; X32_AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X32_AVX512-NEXT:    vminss LCPI0_2, %xmm0, %xmm0
+; X32_AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X32_AVX512-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
  ; X32_AVX512-NEXT:    vcvttss2si %xmm0, %eax
  ; X32_AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
@@ -77,8 +77,8 @@ define i16 @test1(float %f) nounwind {
  ; X64_AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
  ; X64_AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X64_AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; X64_AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X64_AVX512-NEXT:    vminss {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X64_AVX512-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
  ; X64_AVX512-NEXT:    vcvttss2si %xmm0, %eax
  ; X64_AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
author	Craig Topper <craig.topper@intel.com>
	Sun, 8 Sep 2019 00:43:52 +0000 (00:43 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Sun, 8 Sep 2019 00:43:52 +0000 (00:43 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86InstrAVX512.td		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/2012-04-26-sdglue.ll		patch \| blob \| history
test/CodeGen/X86/avx-basic.ll		patch \| blob \| history
test/CodeGen/X86/avx2-gather.ll		patch \| blob \| history
test/CodeGen/X86/nontemporal-3.ll		patch \| blob \| history
test/CodeGen/X86/packss.ll		patch \| blob \| history
test/CodeGen/X86/vec_ss_load_fold.ll		patch \| blob \| history