DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
// Shuffle it back into the right order.
- // The internal representation is big endian.
- // In other words, a i64 bitcasted to 2 x i32 has its high part at index 0
- // and its low part at index 1.
- // Moreover, we have: Mul1 = <ae|cg> ; Mul2 = <bf|dh>
- // Vector index 0 1 ; 2 3
- // We want <ae|bf|cg|dh>
- // Vector index 0 2 1 3
- // Since each element is seen as 2 x i32, we get:
- // high_mask[i] = 2 x vector_index[i]
- // low_mask[i] = 2 x vector_index[i] + 1
- // where vector_index = {0, Size/2, 1, Size/2 + 1, ...,
- // Size/2 - 1, Size/2 + Size/2 - 1}
- // where Size is the number of element of the final vector.
SDValue Highs, Lows;
if (VT == MVT::v8i32) {
- const int HighMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
+ const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- const int LowMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
+ const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
} else {
- const int HighMask[] = {0, 4, 2, 6};
+ const int HighMask[] = {1, 5, 3, 7};
Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- const int LowMask[] = {1, 5, 3, 7};
+ const int LowMask[] = {1, 4, 2, 6};
Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
}
Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
}
- // THe first result of MUL_LOHI is actually the high value, followed by the
- // low value.
- SDValue Ops[] = {Highs, Lows};
+ // The first result of MUL_LOHI is actually the low value, followed by the
+ // high value.
+ SDValue Ops[] = {Lows, Highs};
return DAG.getMergeValues(Ops, dl);
}
; AVX-LABEL: test12:
; AVX: xorps
}
+
+define <4 x i32> @PR20355(<4 x i32> %a) {
+; SSE-LABEL: PR20355:
+; SSE: movdqa {{.*}}, %[[X1:xmm[0-9]+]]
+; SSE-NEXT: movdqa %[[X1]], %[[X2:xmm[0-9]+]]
+; SSE-NEXT: psrad $31, %[[X2]]
+; SSE-NEXT: pand %xmm0, %[[X2]]
+; SSE-NEXT: movdqa %xmm0, %[[X3:xmm[0-9]+]]
+; SSE-NEXT: psrad $31, %[[X3]]
+; SSE-NEXT: pand %[[X1]], %[[X3]]
+; SSE-NEXT: paddd %[[X2]], %[[X3]]
+; SSE-NEXT: pshufd {{.*}} # [[X4:xmm[0-9]+]] = xmm0[1,0,3,0]
+; SSE-NEXT: pmuludq %[[X1]], %xmm0
+; SSE-NEXT: pshufd {{.*}} # [[X1]] = [[X1]][1,0,3,0]
+; SSE-NEXT: pmuludq %[[X4]], %[[X1]]
+; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[1,3],[[X1]][1,3]
+; SSE-NEXT: pshufd {{.*}} # [[X5:xmm[0-9]+]] = xmm0[0,2,1,3]
+; SSE-NEXT: psubd %[[X3]], %[[X5]]
+; SSE-NEXT: movdqa %[[X5]], %xmm0
+; SSE-NEXT: psrld $31, %xmm0
+; SSE-NEXT: paddd %[[X5]], %xmm0
+; SSE-NEXT: retq
+;
+; SSE41-LABEL: PR20355:
+; SSE41: movdqa {{.*}}, %[[X1:xmm[0-9]+]]
+; SSE41-NEXT: pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,0,3,0]
+; SSE41-NEXT: pmuldq %[[X1]], %xmm0
+; SSE41-NEXT: pshufd {{.*}} # [[X1]] = [[X1]][1,0,3,0]
+; SSE41-NEXT: pmuldq %[[X2]], %[[X1]]
+; SSE41-NEXT: shufps {{.*}} # xmm0 = xmm0[1,3],[[X1]][1,3]
+; SSE41-NEXT: pshufd {{.*}} # [[X3:xmm[0-9]+]] = xmm0[0,2,1,3]
+; SSE41-NEXT: movdqa %[[X3]], %xmm0
+; SSE41-NEXT: psrld $31, %xmm0
+; SSE41-NEXT: paddd %[[X3]], %xmm0
+; SSE41-NEXT: retq
+entry:
+ %sdiv = sdiv <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x i32> %sdiv
+}