[x86] Fix PR20355 (for real). There are many layers to this bug.

author Chandler Carruth <chandlerc@gmail.com>

Sat, 26 Jul 2014 03:46:57 +0000 (03:46 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Sat, 26 Jul 2014 03:46:57 +0000 (03:46 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Sat, 26 Jul 2014 03:46:57 +0000 (03:46 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Sat, 26 Jul 2014 03:46:57 +0000 (03:46 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index ce06eb9cdebbc05cf0e90a084599f0c95309810c..2555132edc36058980f5343a8ed95a287354ecda 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -15444,29 +15444,16 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
                               DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
  
    // Shuffle it back into the right order.
-  // The internal representation is big endian.
-  // In other words, a i64 bitcasted to 2 x i32 has its high part at index 0
-  // and its low part at index 1.
-  // Moreover, we have: Mul1 = <ae|cg> ; Mul2 = <bf|dh>
-  // Vector index                0 1   ;          2 3
-  // We want      <ae|bf|cg|dh>
-  // Vector index   0  2  1  3
-  // Since each element is seen as 2 x i32, we get:
-  // high_mask[i] = 2 x vector_index[i]
-  // low_mask[i] = 2 x vector_index[i] + 1
-  // where vector_index = {0, Size/2, 1, Size/2 + 1, ...,
-  //                       Size/2 - 1, Size/2 + Size/2 - 1}
-  // where Size is the number of element of the final vector.
    SDValue Highs, Lows;
    if (VT == MVT::v8i32) {
-    const int HighMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
+    const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
      Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
-    const int LowMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
+    const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
      Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
    } else {
-    const int HighMask[] = {0, 4, 2, 6};
+    const int HighMask[] = {1, 5, 3, 7};
      Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
-    const int LowMask[] = {1, 5, 3, 7};
+    const int LowMask[] = {1, 4, 2, 6};
      Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
    }
  
@@ -15484,9 +15471,9 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
      Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
    }
  
-  // THe first result of MUL_LOHI is actually the high value, followed by the
-  // low value.
-  SDValue Ops[] = {Highs, Lows};
+  // The first result of MUL_LOHI is actually the low value, followed by the
+  // high value.
+  SDValue Ops[] = {Lows, Highs};
    return DAG.getMergeValues(Ops, dl);
  }
  
diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll

index ec1ce3da5e14c92d5df26590d5b0a37ef0c77171..62162c3b9779f30dc12dda8d412f8eeb32b4b24c 100644 (file)
--- a/test/CodeGen/X86/vector-idiv.ll
+++ b/test/CodeGen/X86/vector-idiv.ll
@@ -220,3 +220,42 @@ define <2 x i16> @test12() {
  ; AVX-LABEL: test12:
  ; AVX: xorps
  }
+
+define <4 x i32> @PR20355(<4 x i32> %a) {
+; SSE-LABEL: PR20355:
+; SSE:         movdqa {{.*}}, %[[X1:xmm[0-9]+]]
+; SSE-NEXT:    movdqa %[[X1]], %[[X2:xmm[0-9]+]]
+; SSE-NEXT:    psrad  $31, %[[X2]]
+; SSE-NEXT:    pand   %xmm0, %[[X2]]
+; SSE-NEXT:    movdqa %xmm0, %[[X3:xmm[0-9]+]]
+; SSE-NEXT:    psrad  $31, %[[X3]]
+; SSE-NEXT:    pand   %[[X1]], %[[X3]]
+; SSE-NEXT:    paddd  %[[X2]], %[[X3]]
+; SSE-NEXT:    pshufd {{.*}} # [[X4:xmm[0-9]+]] = xmm0[1,0,3,0]
+; SSE-NEXT:    pmuludq %[[X1]], %xmm0
+; SSE-NEXT:    pshufd {{.*}} # [[X1]] = [[X1]][1,0,3,0]
+; SSE-NEXT:    pmuludq %[[X4]], %[[X1]]
+; SSE-NEXT:    shufps {{.*}} # xmm0 = xmm0[1,3],[[X1]][1,3]
+; SSE-NEXT:    pshufd {{.*}} # [[X5:xmm[0-9]+]] = xmm0[0,2,1,3]
+; SSE-NEXT:    psubd  %[[X3]], %[[X5]]
+; SSE-NEXT:    movdqa %[[X5]], %xmm0
+; SSE-NEXT:    psrld  $31, %xmm0
+; SSE-NEXT:    paddd  %[[X5]], %xmm0
+; SSE-NEXT:    retq
+;
+; SSE41-LABEL: PR20355:
+; SSE41:         movdqa {{.*}}, %[[X1:xmm[0-9]+]]
+; SSE41-NEXT:    pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,0,3,0]
+; SSE41-NEXT:    pmuldq %[[X1]], %xmm0
+; SSE41-NEXT:    pshufd {{.*}} # [[X1]] = [[X1]][1,0,3,0]
+; SSE41-NEXT:    pmuldq %[[X2]], %[[X1]]
+; SSE41-NEXT:    shufps {{.*}} # xmm0 = xmm0[1,3],[[X1]][1,3]
+; SSE41-NEXT:    pshufd {{.*}} # [[X3:xmm[0-9]+]] = xmm0[0,2,1,3]
+; SSE41-NEXT:    movdqa %[[X3]], %xmm0
+; SSE41-NEXT:    psrld  $31, %xmm0
+; SSE41-NEXT:    paddd  %[[X3]], %xmm0
+; SSE41-NEXT:    retq
+entry:
+  %sdiv = sdiv <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %sdiv
+}
author	Chandler Carruth <chandlerc@gmail.com>
	Sat, 26 Jul 2014 03:46:57 +0000 (03:46 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Sat, 26 Jul 2014 03:46:57 +0000 (03:46 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-idiv.ll		patch \| blob \| history