[X86] Add a combine to turn (insert_subvector zero, (insert_subvector zero, X, Idx...

author Craig Topper <craig.topper@intel.com>

Sun, 3 Sep 2017 22:25:52 +0000 (22:25 +0000)

committer Craig Topper <craig.topper@intel.com>

Sun, 3 Sep 2017 22:25:52 +0000 (22:25 +0000)
author Craig Topper <craig.topper@intel.com>
Sun, 3 Sep 2017 22:25:52 +0000 (22:25 +0000)
committer Craig Topper <craig.topper@intel.com>
Sun, 3 Sep 2017 22:25:52 +0000 (22:25 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 2cb6ec376c437719ba34ac797a5654dd55c63952..58ea9e0ffd929976618ee93d99443b633127f3a5 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -35656,10 +35656,21 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
    MVT SubVecVT = SubVec.getSimpleValueType();
  
-  // Inserting zeros into zeros is a nop.
-  if (ISD::isBuildVectorAllZeros(Vec.getNode()) &&
-      ISD::isBuildVectorAllZeros(SubVec.getNode()))
-    return Vec;
+  if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
+    // Inserting zeros into zeros is a nop.
+    if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
+      return Vec;
+
+    // If we're inserting into a zero vector and then into a larger zero vector,
+    // just insert into the larger zero vector directly.
+    if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR &&
+        ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) {
+      unsigned Idx2Val = cast<ConstantSDNode>(Idx)->getZExtValue();
+      return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec,
+                         SubVec.getOperand(1),
+                         DAG.getIntPtrConstant(IdxVal + Idx2Val, dl));
+    }
+  }
  
    // If this is an insert of an extract, combine to a shuffle. Don't do this
    // if the insert or extract can be represented with a subregister operation.
diff --git a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll

index e5487ae2b101951c4a759fab4e8265fd663e45e4..d9a0c4e02953ef24d5e58e3307e18fa408ccec62 100644 (file)
--- a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@@ -1134,13 +1134,11 @@ define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind {
  ; X32-LABEL: test_mm512_zextpd128_pd512:
  ; X32:       # BB#0:
  ; X32-NEXT:    vmovaps %xmm0, %xmm0
-; X32-NEXT:    vmovaps %ymm0, %ymm0
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm512_zextpd128_pd512:
  ; X64:       # BB#0:
  ; X64-NEXT:    vmovaps %xmm0, %xmm0
-; X64-NEXT:    vmovaps %ymm0, %ymm0
  ; X64-NEXT:    retq
    %res = shufflevector <2 x double> %a0, <2 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
    ret <8 x double> %res
@@ -1196,13 +1194,11 @@ define <8 x i64> @test_mm512_zextsi128_si512(<2 x i64> %a0) nounwind {
  ; X32-LABEL: test_mm512_zextsi128_si512:
  ; X32:       # BB#0:
  ; X32-NEXT:    vmovaps %xmm0, %xmm0
-; X32-NEXT:    vmovaps %ymm0, %ymm0
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm512_zextsi128_si512:
  ; X64:       # BB#0:
  ; X64-NEXT:    vmovaps %xmm0, %xmm0
-; X64-NEXT:    vmovaps %ymm0, %ymm0
  ; X64-NEXT:    retq
    %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
    ret <8 x i64> %res
diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll

index 7049a72518a9d13a52a9a00faa718770be76ee4e..716f7767935ee7a978f8e8d395f564fbad927dde 100644 (file)
--- a/test/CodeGen/X86/merge-consecutive-loads-512.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll
@@ -107,14 +107,12 @@ define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noin
  ; ALL-LABEL: merge_8f64_f64_12zzuuzz:
  ; ALL:       # BB#0:
  ; ALL-NEXT:    vmovaps 8(%rdi), %xmm0
-; ALL-NEXT:    vmovaps %ymm0, %ymm0
  ; ALL-NEXT:    retq
  ;
  ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
  ; X32-AVX512F:       # BB#0:
  ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X32-AVX512F-NEXT:    vmovaps 8(%eax), %xmm0
-; X32-AVX512F-NEXT:    vmovaps %ymm0, %ymm0
  ; X32-AVX512F-NEXT:    retl
    %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
    %ptr1 = getelementptr inbounds double, double* %ptr, i64 2
author	Craig Topper <craig.topper@intel.com>
	Sun, 3 Sep 2017 22:25:52 +0000 (22:25 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Sun, 3 Sep 2017 22:25:52 +0000 (22:25 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/avx512-intrinsics-fast-isel.ll		patch \| blob \| history
test/CodeGen/X86/merge-consecutive-loads-512.ll		patch \| blob \| history