return true;
}
-bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
- unsigned &InsertAtByte, bool &Swap, bool IsLE) {
// Check that the mask is shuffling words
+static bool isWordShuffleMask(ShuffleVectorSDNode *N) {
for (unsigned i = 0; i < 4; ++i) {
unsigned B0 = N->getMaskElt(i*4);
unsigned B1 = N->getMaskElt(i*4+1);
return false;
}
+ return true;
+}
+
+bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ unsigned &InsertAtByte, bool &Swap, bool IsLE) {
+ if (!isWordShuffleMask(N))
+ return false;
+
// Now we look at mask elements 0,4,8,12
unsigned M0 = N->getMaskElt(0) / 4;
unsigned M1 = N->getMaskElt(4) / 4;
return false;
}
+bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ bool &Swap, bool IsLE) {
+ assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
+ // Ensure each byte index of the word is consecutive.
+ if (!isWordShuffleMask(N))
+ return false;
+
+ // Now we look at mask elements 0,4,8,12, which are the beginning of words.
+ unsigned M0 = N->getMaskElt(0) / 4;
+ unsigned M1 = N->getMaskElt(4) / 4;
+ unsigned M2 = N->getMaskElt(8) / 4;
+ unsigned M3 = N->getMaskElt(12) / 4;
+
+ // If both vector operands for the shuffle are the same vector, the mask will
+ // contain only elements from the first one and the second one will be undef.
+ if (N->getOperand(1).isUndef()) {
+ assert(M0 < 4 && "Indexing into an undef vector?");
+ if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
+ return false;
+
+ ShiftElts = IsLE ? (4 - M0) % 4 : M0;
+ Swap = false;
+ return true;
+ }
+
+ // Ensure each word index of the ShuffleVector Mask is consecutive.
+ if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
+ return false;
+
+ if (IsLE) {
+ if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
+ // Input vectors don't need to be swapped if the leading element
+ // of the result is one of the 3 left elements of the second vector
+ // (or if there is no shift to be done at all).
+ Swap = false;
+ ShiftElts = (8 - M0) % 8;
+ } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
+ // Input vectors need to be swapped if the leading element
+ // of the result is one of the 3 left elements of the first vector
+ // (or if we're shifting by 4 - thereby simply swapping the vectors).
+ Swap = true;
+ ShiftElts = (4 - M0) % 4;
+ }
+
+ return true;
+ } else { // BE
+ if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
+ // Input vectors don't need to be swapped if the leading element
+ // of the result is one of the 4 elements of the first vector.
+ Swap = false;
+ ShiftElts = M0;
+ } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
+ // Input vectors need to be swapped if the leading element
+ // of the result is one of the 4 elements of the right vector.
+ Swap = true;
+ ShiftElts = M0 - 4;
+ }
+
+ return true;
+ }
+}
+
+
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
+
+ if (Subtarget.hasVSX() &&
+ PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
+ if (Swap)
+ std::swap(V1, V2);
+ SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ SDValue Conv2 =
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
+
+ SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
+ DAG.getConstant(ShiftElts, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
+ }
+
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
/// a VMRGEW or VMRGOW instruction
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG);
-
+ /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXSLDWI instruction.
+ bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ bool &Swap, bool IsLE);
+
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
]>;
def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
- SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+ SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3>
]>;
def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
+// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
+def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>;
+
// Selects.
def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
(SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
ret <16 x i8> %strided.vec
; CHECK-LABEL: @test2
-; CHECK: vsldoi 2, 2, 2, 12
+; CHECK: xxsldwi 34, 34, 34, 3
; CHECK: blr
}
define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 0
%vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
ret <4 x float> %vecins
define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 4
%vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
ret <4 x float> %vecins
define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 8
%vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
ret <4 x float> %vecins
define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 12
%vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x float> %vecins
define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 0
%vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
ret <4 x i32> %vecins
define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 4
%vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
ret <4 x i32> %vecins
define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 8
%vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
ret <4 x i32> %vecins
define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 12
%vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i32> %vecins
define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 0
%vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 3, i32 5, i32 6, i32 7>
ret <4 x float> %vecins
define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 4
%vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 3, i32 6, i32 7>
ret <4 x float> %vecins
define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 8
%vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 3, i32 7>
ret <4 x float> %vecins
define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 12
%vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
ret <4 x float> %vecins
define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 0
%vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 3, i32 5, i32 6, i32 7>
ret <4 x i32> %vecins
define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 4
%vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 3, i32 6, i32 7>
ret <4 x i32> %vecins
define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 8
%vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 3, i32 7>
ret <4 x i32> %vecins
define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
-; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
-; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 12
%vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
ret <4 x i32> %vecins
%6 = shufflevector <12 x float> %5, <12 x float> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
ret <4 x float> %6
-; CHECK: vsldoi
+; CHECK: xxsldwi
; CHECK-NEXT: vmrghw
; CHECK-NEXT: vmrglw
-; CHECK-NEXT: vsldoi
-; CHECK-NEXT: vsldoi
-; CHECK-NEXT: vsldoi
+; CHECK-NEXT: xxsldwi
+; CHECK-NEXT: xxsldwi
+; CHECK-NEXT: xxsldwi
; CHECK-NEXT: blr
}
--- /dev/null
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \
+; RUN: FileCheck %s -check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \
+; RUN: FileCheck %s -check-prefix=CHECK-BE
+
+; Possible LE ShuffleVector masks (Case 1):
+; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3)
+; ShuffleVector((vector int)a, vector(int)b, 7, 0, 1, 2)
+; ShuffleVector((vector int)a, vector(int)b, 6, 7, 0, 1)
+; ShuffleVector((vector int)a, vector(int)b, 5, 6, 7, 0)
+; which targets at:
+; xxsldwi a, b, 0
+; xxsldwi a, b, 1
+; xxsldwi a, b, 2
+; xxsldwi a, b, 3
+; Possible LE Swap ShuffleVector masks (Case 2):
+; ShuffleVector((vector int)a, vector(int)b, 4, 5, 6, 7)
+; ShuffleVector((vector int)a, vector(int)b, 3, 4, 5, 6)
+; ShuffleVector((vector int)a, vector(int)b, 2, 3, 4, 5)
+; ShuffleVector((vector int)a, vector(int)b, 1, 2, 3, 4)
+; which targets at:
+; xxsldwi b, a, 0
+; xxsldwi b, a, 1
+; xxsldwi b, a, 2
+; xxsldwi b, a, 3
+; Possible LE ShuffleVector masks when a == b, b is undef (Case 3):
+; ShuffleVector((vector int)a, vector(int)a, 0, 1, 2, 3)
+; ShuffleVector((vector int)a, vector(int)a, 3, 0, 1, 2)
+; ShuffleVector((vector int)a, vector(int)a, 2, 3, 0, 1)
+; ShuffleVector((vector int)a, vector(int)a, 1, 2, 3, 0)
+; which targets at:
+; xxsldwi a, a, 0
+; xxsldwi a, a, 1
+; xxsldwi a, a, 2
+; xxsldwi a, a, 3
+
+; Possible BE ShuffleVector masks (Case 4):
+; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3)
+; ShuffleVector((vector int)a, vector(int)b, 1, 2, 3, 4)
+; ShuffleVector((vector int)a, vector(int)b, 2, 3, 4, 5)
+; ShuffleVector((vector int)a, vector(int)b, 3, 4, 5, 6)
+; which targets at:
+; xxsldwi b, a, 0
+; xxsldwi b, a, 1
+; xxsldwi a, a, 2
+; xxsldwi a, a, 3
+; Possible BE Swap ShuffleVector masks (Case 5):
+; ShuffleVector((vector int)a, vector(int)b, 4, 5, 6, 7)
+; ShuffleVector((vector int)a, vector(int)b, 5, 6, 7, 0)
+; ShuffleVector((vector int)a, vector(int)b, 6, 7, 0, 1)
+; ShuffleVector((vector int)a, vector(int)b, 7, 0, 1, 2)
+; which targets at:
+; xxsldwi b, a, 0
+; xxsldwi b, a, 1
+; xxsldwi b, a, 2
+; xxsldwi b, a, 3
+; Possible BE ShuffleVector masks when a == b, b is undef (Case 6):
+; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3)
+; ShuffleVector((vector int)a, vector(int)a, 1, 2, 3, 0)
+; ShuffleVector((vector int)a, vector(int)a, 2, 3, 0, 1)
+; ShuffleVector((vector int)a, vector(int)a, 3, 0, 1, 2)
+; which targets at:
+; xxsldwi a, a, 0
+; xxsldwi a, a, 1
+; xxsldwi a, a, 2
+; xxsldwi a, a, 3
+
+define <4 x i32> @check_le_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_0
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_1
+; CHECK-LE: xxsldwi 34, 34, 35, 1
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_2
+; CHECK-LE: xxsldwi 34, 34, 35, 2
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_3
+; CHECK-LE: xxsldwi 34, 34, 35, 3
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_swap_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_0
+; CHECK-LE; vmr 2, 3
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_swap_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_1
+; CHECK-LE: xxsldwi 34, 35, 34, 1
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_swap_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_2
+; CHECK-LE: xxsldwi 34, 35, 34, 2
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_swap_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_3
+; CHECK-LE: xxsldwi 34, 35, 34, 3
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_vec_sldwi_va_undef_0(<4 x i32> %VA) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_0
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_vec_sldwi_va_undef_1(<4 x i32> %VA) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_le_vec_sldwi_va_undef_1
+; CHECK-LE: xxsldwi 34, 34, 34, 1
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_vec_sldwi_va_undef_2(<4 x i32> %VA) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_2
+; CHECK-LE: xxswapd 34, 34
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_le_vec_sldwi_va_undef_3(<4 x i32> %VA) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_3
+; CHECK-LE: xxsldwi 34, 34, 34, 3
+; CHECK-LE: blr
+}
+
+define <4 x i32> @check_be_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_0
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_1
+; CHECK-BE: xxsldwi 34, 34, 35, 1
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_2
+; CHECK-BE: xxsldwi 34, 34, 35, 2
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_3
+; CHECK-BE: xxsldwi 34, 34, 35, 3
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_swap_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_0
+; CHECK-LE; vmr 2, 3
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_swap_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_1
+; CHECK-BE: xxsldwi 34, 35, 34, 1
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_swap_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_2
+; CHECK-BE: xxsldwi 34, 35, 34, 2
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_swap_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_3
+; CHECK-BE: xxsldwi 34, 35, 34, 3
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_vec_sldwi_va_undef_0(<4 x i32> %VA) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %0
+; CHECK-LE-LABEL: @check_be_vec_sldwi_va_undef_0
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_vec_sldwi_va_undef_1(<4 x i32> %VA) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_1
+; CHECK-BE: xxsldwi 34, 34, 34, 1
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_vec_sldwi_va_undef_2(<4 x i32> %VA) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_2
+; CHECK-BE: xxswapd 34, 34
+; CHECK-BE: blr
+}
+
+define <4 x i32> @check_be_vec_sldwi_va_undef_3(<4 x i32> %VA) {
+entry:
+ %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+ ret <4 x i32> %0
+; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_3
+; CHECK-BE: xxsldwi 34, 34, 34, 3
+; CHECK-BE: blr
+}
+
+; More test cases to test different types of vector inputs
+define <16 x i8> @test_le_vec_sldwi_v16i8_v16i8(<16 x i8> %VA, <16 x i8> %VB) {
+ entry:
+ %0 = shufflevector <16 x i8> %VA, <16 x i8> %VB,<16 x i32> <i32 28, i32 29, i32 30, i32 31,i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+ ret <16 x i8> %0
+; CHECK-LE-LABEL: @test_le_vec_sldwi_v16i8_v16i8
+; CHECK-LE: xxsldwi 34, 34, 35, 1
+; CHECK-LE: blr
+}
+
+define <8 x i16> @test_le_vec_sldwi_v8i16_v8i16(<8 x i16> %VA, <8 x i16> %VB) {
+ entry:
+ %0 = shufflevector <8 x i16> %VA, <8 x i16> %VB,<8 x i32> <i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+ ret <8 x i16> %0
+; CHECK-LE-LABEL: @test_le_vec_sldwi_v8i16_v8i16
+; CHECK-LE: xxsldwi 34, 34, 35, 1
+; CHECK-LE: blr
+}
+
+; Note here xxpermdi 34, 34, 35, 2 <=> xxsldwi 34, 34, 35, 2
+define <2 x i64> @test_be_vec_sldwi_v2i64_v2i64(<2 x i64> %VA, <2 x i64> %VB) {
+ entry:
+ %0 = shufflevector <2 x i64> %VA, <2 x i64> %VB,<2 x i32> <i32 3, i32 0>
+ ret <2 x i64> %0
+; CHECK-LE-LABEL: @test_be_vec_sldwi_v2i64_v2i64
+; CHECK-LE: xxpermdi 34, 34, 35, 2
+; CHECK-LE: blr
+}