[DAGCombine] Fix a miscompile when reducing BUILD_VECTORs to a shuffle

author Justin Bogner <mail@justinbogner.com>

Tue, 19 Mar 2019 16:52:00 +0000 (16:52 +0000)

committer Justin Bogner <mail@justinbogner.com>

Tue, 19 Mar 2019 16:52:00 +0000 (16:52 +0000)
author Justin Bogner <mail@justinbogner.com>
Tue, 19 Mar 2019 16:52:00 +0000 (16:52 +0000)
committer Justin Bogner <mail@justinbogner.com>
Tue, 19 Mar 2019 16:52:00 +0000 (16:52 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index cb19c0a4e0fa20d18bca055c46b895faf2498f3a..4d85532a81bebc59fcf9e1cc7540ba7f0d5a9a41 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -475,7 +475,8 @@ namespace {
      SDValue reduceBuildVecToShuffle(SDNode *N);
      SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                    ArrayRef<int> VectorMask, SDValue VecIn1,
-                                  SDValue VecIn2, unsigned LeftIdx);
+                                  SDValue VecIn2, unsigned LeftIdx,
+                                  bool DidSplitVec);
      SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
  
      /// Walk up chain skipping non-aliasing memory nodes,
@@ -16416,7 +16417,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
  SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                             ArrayRef<int> VectorMask,
                                             SDValue VecIn1, SDValue VecIn2,
-                                           unsigned LeftIdx) {
+                                           unsigned LeftIdx, bool DidSplitVec) {
    MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
    SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
  
@@ -16424,17 +16425,12 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
    EVT InVT1 = VecIn1.getValueType();
    EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
  
-  unsigned Vec2Offset = 0;
    unsigned NumElems = VT.getVectorNumElements();
    unsigned ShuffleNumElems = NumElems;
  
-  // In case both the input vectors are extracted from same base
-  // vector we do not need extra addend (Vec2Offset) while
-  // computing shuffle mask.
-  if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
-      !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
-      !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
-    Vec2Offset = InVT1.getVectorNumElements();
+  // If we artificially split a vector in two already, then the offsets in the
+  // operands will all be based off of VecIn1, even those in VecIn2.
+  unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
  
    // We can't generate a shuffle node with mismatched input and output types.
    // Try to make the types match the type of the output.
@@ -16693,6 +16689,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
    // vector, then split the vector efficiently based on the maximum
    // vector access index and adjust the VectorMask and
    // VecIn accordingly.
+  bool DidSplitVec = false;
    if (VecIn.size() == 2) {
      unsigned MaxIndex = 0;
      unsigned NearestPow2 = 0;
@@ -16723,6 +16720,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
          VecIn.pop_back();
          VecIn.push_back(VecIn1);
          VecIn.push_back(VecIn2);
+        DidSplitVec = true;
  
          for (unsigned i = 0; i < NumElems; i++) {
            if (VectorMask[i] <= 0)
@@ -16760,7 +16758,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
          (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
  
      if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
-                                                VecRight, LeftIdx))
+                                                VecRight, LeftIdx, DidSplitVec))
        Shuffles.push_back(Shuffle);
      else
        return SDValue();
diff --git a/test/CodeGen/X86/shuffle-extract-subvector.ll b/test/CodeGen/X86/shuffle-extract-subvector.ll

new file mode 100644 (file)

index 0000000..02ec475
--- /dev/null
+++ b/test/CodeGen/X86/shuffle-extract-subvector.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define void @f(<4 x half>* %a, <4 x half>* %b, <8 x half>* %c) {
+; CHECK-LABEL: f:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movzwl (%rdi), %r8d
+; CHECK-NEXT:    movzwl 2(%rdi), %r9d
+; CHECK-NEXT:    movzwl 4(%rdi), %r11d
+; CHECK-NEXT:    movzwl 6(%rdi), %edi
+; CHECK-NEXT:    movzwl (%rsi), %r10d
+; CHECK-NEXT:    movzwl 2(%rsi), %ecx
+; CHECK-NEXT:    movzwl 4(%rsi), %eax
+; CHECK-NEXT:    movzwl 6(%rsi), %esi
+; CHECK-NEXT:    movw %si, 14(%rdx)
+; CHECK-NEXT:    movw %di, 12(%rdx)
+; CHECK-NEXT:    movw %ax, 10(%rdx)
+; CHECK-NEXT:    movw %r11w, 8(%rdx)
+; CHECK-NEXT:    movw %cx, 6(%rdx)
+; CHECK-NEXT:    movw %r9w, 4(%rdx)
+; CHECK-NEXT:    movw %r10w, 2(%rdx)
+; CHECK-NEXT:    movw %r8w, (%rdx)
+; CHECK-NEXT:    retq
+  %tmp4 = load <4 x half>, <4 x half>* %a
+  %tmp5 = load <4 x half>, <4 x half>* %b
+  %tmp7 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 0, i32 4>
+  %tmp8 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 1, i32 5>
+  %tmp9 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 2, i32 6>
+  %tmp10 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 3, i32 7>
+  %tmp11 = extractelement <2 x half> %tmp7, i32 0
+  %tmp12 = insertelement <8 x half> undef, half %tmp11, i32 0
+  %tmp13 = extractelement <2 x half> %tmp7, i32 1
+  %tmp14 = insertelement <8 x half> %tmp12, half %tmp13, i32 1
+  %tmp15 = extractelement <2 x half> %tmp8, i32 0
+  %tmp16 = insertelement <8 x half> %tmp14, half %tmp15, i32 2
+  %tmp17 = extractelement <2 x half> %tmp8, i32 1
+  %tmp18 = insertelement <8 x half> %tmp16, half %tmp17, i32 3
+  %tmp19 = extractelement <2 x half> %tmp9, i32 0
+  %tmp20 = insertelement <8 x half> %tmp18, half %tmp19, i32 4
+  %tmp21 = extractelement <2 x half> %tmp9, i32 1
+  %tmp22 = insertelement <8 x half> %tmp20, half %tmp21, i32 5
+  %tmp23 = extractelement <2 x half> %tmp10, i32 0
+  %tmp24 = insertelement <8 x half> %tmp22, half %tmp23, i32 6
+  %tmp25 = extractelement <2 x half> %tmp10, i32 1
+  %tmp26 = insertelement <8 x half> %tmp24, half %tmp25, i32 7
+  store <8 x half> %tmp26, <8 x half>* %c
+  ret void
+}
author	Justin Bogner <mail@justinbogner.com>
	Tue, 19 Mar 2019 16:52:00 +0000 (16:52 +0000)
committer	Justin Bogner <mail@justinbogner.com>
	Tue, 19 Mar 2019 16:52:00 +0000 (16:52 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/shuffle-extract-subvector.ll	[new file with mode: 0644]	patch \| blob