Merging r354034 and r354117:

author Hans Wennborg <hans@hanshq.net>

Mon, 18 Feb 2019 11:21:42 +0000 (11:21 +0000)

committer Hans Wennborg <hans@hanshq.net>

Mon, 18 Feb 2019 11:21:42 +0000 (11:21 +0000)
author Hans Wennborg <hans@hanshq.net>
Mon, 18 Feb 2019 11:21:42 +0000 (11:21 +0000)
committer Hans Wennborg <hans@hanshq.net>
Mon, 18 Feb 2019 11:21:42 +0000 (11:21 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index f4f37a894620ed33dc546973aa5ba8fc8bc4fc94..e1a6d22f0616557d1bf6634e4f8b87cf6504f1e2 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -13884,7 +13884,6 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
    int NumEltsPerLane = NumElts / NumLanes;
  
    SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
-  SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
    SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
  
    for (int i = 0; i != NumElts; ++i) {
@@ -13899,10 +13898,20 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
        return SDValue();
      SrcLaneMask[DstLane] = SrcLane;
  
-    LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
      PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
    }
  
+  // Make sure we set all elements of the lane mask, to avoid undef propagation.
+  SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
+  for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
+    int SrcLane = SrcLaneMask[DstLane];
+    if (0 <= SrcLane)
+      for (int j = 0; j != NumEltsPerLane; ++j) {
+        LaneMask[(DstLane * NumEltsPerLane) + j] =
+            (SrcLane * NumEltsPerLane) + j;
+      }
+  }
+
    // If we're only shuffling a single lowest lane and the rest are identity
    // then don't bother.
    // TODO - isShuffleMaskInputInPlace could be extended to something like this.
diff --git a/test/CodeGen/X86/pr40730.ll b/test/CodeGen/X86/pr40730.ll

new file mode 100644 (file)

index 0000000..12b372d
--- /dev/null
+++ b/test/CodeGen/X86/pr40730.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+
+define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: shuffle_v8i32_0dcd3f14:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; CHECK-NEXT:    vblendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm2 = xmm2[3,1,1,0]
+; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,2,3]
+; CHECK-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2]
+; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5],ymm0[6,7]
+; CHECK-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 13, i32 12, i32 13, i32 3, i32 15, i32 1, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK:      .LCPI1_0:
+; CHECK-NEXT: .quad   60129542157
+; CHECK-NEXT: .quad   60129542157
+; CHECK-NEXT: .quad   68719476736
+; CHECK-NEXT: .quad   60129542157
+
+define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0)  {
+; CHECK-LABEL: shuffle_v8i32_0dcd3f14_constant:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,1,0]
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],mem[1,2,3],ymm0[4],mem[5],ymm0[6,7]
+; CHECK-NEXT:    retq
+  %res = shufflevector <8 x i32> %a0, <8 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, <8 x i32> <i32 0, i32 13, i32 12, i32 13, i32 3, i32 15, i32 1, i32 4>
+  ret <8 x i32> %res
+}
author	Hans Wennborg <hans@hanshq.net>
	Mon, 18 Feb 2019 11:21:42 +0000 (11:21 +0000)
committer	Hans Wennborg <hans@hanshq.net>
	Mon, 18 Feb 2019 11:21:42 +0000 (11:21 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/pr40730.ll	[new file with mode: 0644]	patch \| blob