case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
case PPCISD::STXSIX: return "PPCISD::STXSIX";
case PPCISD::VEXTS: return "PPCISD::VEXTS";
+ case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
return SDValue();
}
+// This function adds the required vector_shuffle needed to get
+// the elements of the vector extract in the correct position
+// as specified by the CorrectElems encoding.
+static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
+ SDValue Input, uint64_t Elems,
+ uint64_t CorrectElems) {
+ SDLoc dl(N);
+
+ unsigned NumElems = Input.getValueType().getVectorNumElements();
+ SmallVector<int, 16> ShuffleMask(NumElems, -1);
+
+ // Knowing the element indices being extracted from the original
+ // vector and the order in which they're being inserted, just put
+ // them at element indices required for the instruction.
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (DAG.getDataLayout().isLittleEndian())
+ ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
+ else
+ ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
+ CorrectElems = CorrectElems >> 8;
+ Elems = Elems >> 8;
+ }
+
+ SDValue Shuffle =
+ DAG.getVectorShuffle(Input.getValueType(), dl, Input,
+ DAG.getUNDEF(Input.getValueType()), ShuffleMask);
+
+ EVT Ty = N->getValueType(0);
+ SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
+ return BV;
+}
+
+// Look for build vector patterns where input operands come from sign
+// extended vector_extract elements of specific indices. If the correct indices
+// aren't used, add a vector shuffle to fix up the indices and create a new
+// PPCISD:SExtVElems node which selects the vector sign extend instructions
+// during instruction selection.
+static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
+ // This array encodes the indices that the vector sign extend instructions
+ // extract from when extending from one type to another for both BE and LE.
+ // The right nibble of each byte corresponds to the LE incides.
+ // and the left nibble of each byte corresponds to the BE incides.
+ // For example: 0x3074B8FC byte->word
+ // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
+ // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
+ // For example: 0x000070F8 byte->double word
+ // For LE: the allowed indices are: 0x0,0x8
+ // For BE: the allowed indices are: 0x7,0xF
+ uint64_t TargetElems[] = {
+ 0x3074B8FC, // b->w
+ 0x000070F8, // b->d
+ 0x10325476, // h->w
+ 0x00003074, // h->d
+ 0x00001032, // w->d
+ };
+
+ uint64_t Elems = 0;
+ int Index;
+ SDValue Input;
+
+ auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
+ if (!Op)
+ return false;
+ if (Op.getOpcode() != ISD::SIGN_EXTEND)
+ return false;
+
+ SDValue Extract = Op.getOperand(0);
+ if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
+ if (!ExtOp)
+ return false;
+
+ Index = ExtOp->getZExtValue();
+ if (Input && Input != Extract.getOperand(0))
+ return false;
+
+ if (!Input)
+ Input = Extract.getOperand(0);
+
+ Elems = Elems << 8;
+ Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
+ Elems |= Index;
+
+ return true;
+ };
+
+ // If the build vector operands aren't sign extended vector extracts,
+ // of the same input vector, then return.
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (!isSExtOfVecExtract(N->getOperand(i))) {
+ return SDValue();
+ }
+ }
+
+ // If the vector extract indicies are not correct, add the appropriate
+ // vector_shuffle.
+ int TgtElemArrayIdx;
+ int InputSize = Input.getValueType().getScalarSizeInBits();
+ int OutputSize = N->getValueType(0).getScalarSizeInBits();
+ if (InputSize + OutputSize == 40)
+ TgtElemArrayIdx = 0;
+ else if (InputSize + OutputSize == 72)
+ TgtElemArrayIdx = 1;
+ else if (InputSize + OutputSize == 48)
+ TgtElemArrayIdx = 2;
+ else if (InputSize + OutputSize == 80)
+ TgtElemArrayIdx = 3;
+ else if (InputSize + OutputSize == 96)
+ TgtElemArrayIdx = 4;
+ else
+ return SDValue();
+
+ uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
+ CorrectElems = DAG.getDataLayout().isLittleEndian()
+ ? CorrectElems & 0x0F0F0F0F0F0F0F0F
+ : CorrectElems & 0xF0F0F0F0F0F0F0F0;
+ if (Elems != CorrectElems) {
+ return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
+ }
+
+ // Regular lowering will catch cases where a shuffle is not needed.
+ return SDValue();
+}
+
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
DAGCombinerInfo &DCI) const {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
if (Reduced)
return Reduced;
+ // If we're building a vector out of extended elements from another vector
+ // we have P9 vector integer extend instructions.
+ if (Subtarget.hasP9Altivec()) {
+ Reduced = combineBVOfVecSExt(N, DAG);
+ if (Reduced)
+ return Reduced;
+ }
+
+
if (N->getValueType(0) != MVT::v2f64)
return SDValue();
/// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
VEXTS,
+ /// SExtVElems, takes an input vector of a smaller type and sign
+ /// extends to an output vector of a larger type.
+ SExtVElems,
+
/// Reciprocal estimate instructions (unary FP ops).
FRE, FRSQRTE,
def SDT_PPCVexts : SDTypeProfile<1, 2, [
SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
]>;
+def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisVec<1>
+]>;
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
[SDNPHasChain, SDNPMayStore]>;
def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
+def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>;
// Extract FPSCR (not modeled at the DAG level).
def PPCmffs : SDNode<"PPCISD::MFFS",
}
def ByteToWord {
- dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
- dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
- dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
- dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
+ dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
+ dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
+ dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
+ dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
+ dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8));
+ dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8));
+ dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8));
+ dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8));
}
def ByteToDWord {
- dag A0 = (i64 (sext_inreg
- (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
- dag A1 = (i64 (sext_inreg
- (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
+ dag LE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
+ dag LE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
+ dag BE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8));
+ dag BE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8));
}
def HWordToWord {
- dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
- dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
- dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
- dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
+ dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
+ dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
+ dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
+ dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
+ dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16));
+ dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16));
+ dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16));
+ dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16));
}
def HWordToDWord {
- dag A0 = (i64 (sext_inreg
- (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
- dag A1 = (i64 (sext_inreg
- (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
+ dag LE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
+ dag LE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
+ dag BE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16));
+ dag BE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16));
}
def WordToDWord {
- dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
- dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
+ dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
+ dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
+ dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1))));
+ dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3))));
}
def FltToIntLoad {
// P9 Altivec instructions that can be used to build vectors.
// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
// with complexities of existing build vector patterns in this file.
- let Predicates = [HasP9Altivec] in {
- def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)),
+ let Predicates = [HasP9Altivec, IsLittleEndian] in {
+ def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
+ (v2i64 (VEXTSW2D $A))>;
+ def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
+ (v2i64 (VEXTSH2D $A))>;
+ def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
+ HWordToWord.LE_A2, HWordToWord.LE_A3)),
+ (v4i32 (VEXTSH2W $A))>;
+ def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
+ ByteToWord.LE_A2, ByteToWord.LE_A3)),
+ (v4i32 (VEXTSB2W $A))>;
+ def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
+ (v2i64 (VEXTSB2D $A))>;
+ }
+
+ let Predicates = [HasP9Altivec, IsBigEndian] in {
+ def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
(v2i64 (VEXTSW2D $A))>;
- def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)),
+ def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
(v2i64 (VEXTSH2D $A))>;
- def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1,
- HWordToWord.A2, HWordToWord.A3)),
+ def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
+ HWordToWord.BE_A2, HWordToWord.BE_A3)),
(v4i32 (VEXTSH2W $A))>;
- def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1,
- ByteToWord.A2, ByteToWord.A3)),
+ def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
+ ByteToWord.BE_A2, ByteToWord.BE_A3)),
(v4i32 (VEXTSB2W $A))>;
- def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)),
+ def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
(v2i64 (VEXTSB2D $A))>;
}
+
+ let Predicates = [HasP9Altivec] in {
+ def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)),
+ (v2i64 (VEXTSB2D $A))>;
+ def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)),
+ (v2i64 (VEXTSH2D $A))>;
+ def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)),
+ (v2i64 (VEXTSW2D $A))>;
+ def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)),
+ (v4i32 (VEXTSB2W $A))>;
+ def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)),
+ (v4i32 (VEXTSH2W $A))>;
+ }
}
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9
-target triple = "powerpc64le-unknown-linux-gnu"
-
-define <4 x i32> @vextsb2w(<16 x i8> %a) {
-; PWR9-LABEL: vextsb2w:
-; PWR9: # BB#0: # %entry
-; PWR9-NEXT: vextsb2w 2, 2
-; PWR9-NEXT: blr
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-BE
+
+define <4 x i32> @vextsb2wLE(<16 x i8> %a) {
+; CHECK-LE-LABEL: vextsb2wLE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vextsb2w 2, 2
+; CHECK-LE-NEXT: blr
+; CHECK-BE-LABEL: vextsb2wLE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE: vperm 2, 2, 2, 3
+; CHECK-BE-NEXT: vextsb2w 2, 2
+; CHECK-BE-NEXT: blr
+
entry:
%vecext = extractelement <16 x i8> %a, i32 0
%conv = sext i8 %vecext to i32
ret <4 x i32> %vecinit9
}
-define <2 x i64> @vextsb2d(<16 x i8> %a) {
-; PWR9-LABEL: vextsb2d:
-; PWR9: # BB#0: # %entry
-; PWR9-NEXT: vextsb2d 2, 2
-; PWR9-NEXT: blr
+define <2 x i64> @vextsb2dLE(<16 x i8> %a) {
+; CHECK-LE-LABEL: vextsb2dLE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vextsb2d 2, 2
+; CHECK-LE-NEXT: blr
+; CHECK-BE-LABEL: vextsb2dLE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE: vperm 2, 2, 2, 3
+; CHECK-BE-NEXT: vextsb2d 2, 2
+; CHECK-BE-NEXT: blr
+
entry:
%vecext = extractelement <16 x i8> %a, i32 0
%conv = sext i8 %vecext to i64
ret <2 x i64> %vecinit3
}
-define <4 x i32> @vextsh2w(<8 x i16> %a) {
-; PWR9-LABEL: vextsh2w:
-; PWR9: # BB#0: # %entry
-; PWR9-NEXT: vextsh2w 2, 2
-; PWR9-NEXT: blr
+define <4 x i32> @vextsh2wLE(<8 x i16> %a) {
+; CHECK-LE-LABEL: vextsh2wLE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vextsh2w 2, 2
+; CHECK-LE-NEXT: blr
+; CHECK-BE-LABEL: vextsh2wLE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE: vperm 2, 2, 2, 3
+; CHECK-BE-NEXT: vextsh2w 2, 2
+; CHECK-BE-NEXT: blr
+
entry:
%vecext = extractelement <8 x i16> %a, i32 0
%conv = sext i16 %vecext to i32
ret <4 x i32> %vecinit9
}
-define <2 x i64> @vextsh2d(<8 x i16> %a) {
-; PWR9-LABEL: vextsh2d:
-; PWR9: # BB#0: # %entry
-; PWR9-NEXT: vextsh2d 2, 2
-; PWR9-NEXT: blr
+define <2 x i64> @vextsh2dLE(<8 x i16> %a) {
+; CHECK-LE-LABEL: vextsh2dLE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vextsh2d 2, 2
+; CHECK-LE-NEXT: blr
+; CHECK-BE-LABEL: vextsh2dLE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE: vperm 2, 2, 2, 3
+; CHECK-BE-NEXT: vextsh2d 2, 2
+; CHECK-BE-NEXT: blr
+
entry:
%vecext = extractelement <8 x i16> %a, i32 0
%conv = sext i16 %vecext to i64
ret <2 x i64> %vecinit3
}
-define <2 x i64> @vextsw2d(<4 x i32> %a) {
-; PWR9-LABEL: vextsw2d:
-; PWR9: # BB#0: # %entry
-; PWR9-NEXT: vextsw2d 2, 2
-; PWR9-NEXT: blr
+define <2 x i64> @vextsw2dLE(<4 x i32> %a) {
+; CHECK-LE-LABEL: vextsw2dLE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vextsw2d 2, 2
+; CHECK-LE-NEXT: blr
+; CHECK-BE-LABEL: vextsw2dLE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE: vmrgew
+; CHECK-BE-NEXT: vextsw2d 2, 2
+; CHECK-BE-NEXT: blr
+
entry:
%vecext = extractelement <4 x i32> %a, i32 0
%conv = sext i32 %vecext to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
+
+define <4 x i32> @vextsb2wBE(<16 x i8> %a) {
+; CHECK-BE-LABEL: vextsb2wBE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NEXT: vextsb2w 2, 2
+; CHECK-BE-NEXT: blr
+; CHECK-LE-LABEL: vextsb2wBE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vsldoi 2, 2, 2, 13
+; CHECK-LE-NEXT: vextsb2w 2, 2
+; CHECK-LE-NEXT: blr
+entry:
+ %vecext = extractelement <16 x i8> %a, i32 3
+ %conv = sext i8 %vecext to i32
+ %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
+ %vecext1 = extractelement <16 x i8> %a, i32 7
+ %conv2 = sext i8 %vecext1 to i32
+ %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
+ %vecext4 = extractelement <16 x i8> %a, i32 11
+ %conv5 = sext i8 %vecext4 to i32
+ %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
+ %vecext7 = extractelement <16 x i8> %a, i32 15
+ %conv8 = sext i8 %vecext7 to i32
+ %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
+ ret <4 x i32> %vecinit9
+}
+
+define <2 x i64> @vextsb2dBE(<16 x i8> %a) {
+; CHECK-BE-LABEL: vextsb2dBE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NEXT: vextsb2d 2, 2
+; CHECK-BE-NEXT: blr
+; CHECK-LE-LABEL: vextsb2dBE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vsldoi 2, 2, 2, 9
+; CHECK-LE-NEXT: vextsb2d 2, 2
+; CHECK-LE-NEXT: blr
+entry:
+ %vecext = extractelement <16 x i8> %a, i32 7
+ %conv = sext i8 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <16 x i8> %a, i32 15
+ %conv2 = sext i8 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define <4 x i32> @vextsh2wBE(<8 x i16> %a) {
+; CHECK-BE-LABEL: vextsh2wBE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NEXT: vextsh2w 2, 2
+; CHECK-BE-NEXT: blr
+; CHECK-LE-LABEL: vextsh2wBE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vsldoi 2, 2, 2, 14
+; CHECK-LE-NEXT: vextsh2w 2, 2
+; CHECK-LE-NEXT: blr
+entry:
+ %vecext = extractelement <8 x i16> %a, i32 1
+ %conv = sext i16 %vecext to i32
+ %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
+ %vecext1 = extractelement <8 x i16> %a, i32 3
+ %conv2 = sext i16 %vecext1 to i32
+ %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
+ %vecext4 = extractelement <8 x i16> %a, i32 5
+ %conv5 = sext i16 %vecext4 to i32
+ %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
+ %vecext7 = extractelement <8 x i16> %a, i32 7
+ %conv8 = sext i16 %vecext7 to i32
+ %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
+ ret <4 x i32> %vecinit9
+}
+
+define <2 x i64> @vextsh2dBE(<8 x i16> %a) {
+; CHECK-BE-LABEL: vextsh2dBE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NEXT: vextsh2d 2, 2
+; CHECK-BE-NEXT: blr
+; CHECK-LE-LABEL: vextsh2dBE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vsldoi 2, 2, 2, 10
+; CHECK-LE-NEXT: vextsh2d 2, 2
+; CHECK-LE-NEXT: blr
+entry:
+ %vecext = extractelement <8 x i16> %a, i32 3
+ %conv = sext i16 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <8 x i16> %a, i32 7
+ %conv2 = sext i16 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define <2 x i64> @vextsw2dBE(<4 x i32> %a) {
+; CHECK-BE-LABEL: vextsw2dBE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NEXT: vextsw2d 2, 2
+; CHECK-BE-NEXT: blr
+; CHECK-LE-LABEL: vextsw2dBE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vsldoi 2, 2, 2, 12
+; CHECK-LE-NEXT: vextsw2d 2, 2
+; CHECK-LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 1
+ %conv = sext i32 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <4 x i32> %a, i32 3
+ %conv2 = sext i32 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define <2 x i64> @vextDiffVectors(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LE-LABEL: vextDiffVectors:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NOT: vextsw2d
+
+; CHECK-BE-LABEL: vextDiffVectors:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NOT: vextsw2d
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 0
+ %conv = sext i32 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <4 x i32> %b, i32 2
+ %conv2 = sext i32 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define <8 x i16> @testInvalidExtend(<16 x i8> %a) {
+entry:
+; CHECK-LE-LABEL: testInvalidExtend:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NOT: vexts
+
+; CHECK-BE-LABEL: testInvalidExtend:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NOT: vexts
+
+ %vecext = extractelement <16 x i8> %a, i32 0
+ %conv = sext i8 %vecext to i16
+ %vecinit = insertelement <8 x i16> undef, i16 %conv, i32 0
+ %vecext1 = extractelement <16 x i8> %a, i32 2
+ %conv2 = sext i8 %vecext1 to i16
+ %vecinit3 = insertelement <8 x i16> %vecinit, i16 %conv2, i32 1
+ %vecext4 = extractelement <16 x i8> %a, i32 4
+ %conv5 = sext i8 %vecext4 to i16
+ %vecinit6 = insertelement <8 x i16> %vecinit3, i16 %conv5, i32 2
+ %vecext7 = extractelement <16 x i8> %a, i32 6
+ %conv8 = sext i8 %vecext7 to i16
+ %vecinit9 = insertelement <8 x i16> %vecinit6, i16 %conv8, i32 3
+ %vecext10 = extractelement <16 x i8> %a, i32 8
+ %conv11 = sext i8 %vecext10 to i16
+ %vecinit12 = insertelement <8 x i16> %vecinit9, i16 %conv11, i32 4
+ %vecext13 = extractelement <16 x i8> %a, i32 10
+ %conv14 = sext i8 %vecext13 to i16
+ %vecinit15 = insertelement <8 x i16> %vecinit12, i16 %conv14, i32 5
+ %vecext16 = extractelement <16 x i8> %a, i32 12
+ %conv17 = sext i8 %vecext16 to i16
+ %vecinit18 = insertelement <8 x i16> %vecinit15, i16 %conv17, i32 6
+ %vecext19 = extractelement <16 x i8> %a, i32 14
+ %conv20 = sext i8 %vecext19 to i16
+ %vecinit21 = insertelement <8 x i16> %vecinit18, i16 %conv20, i32 7
+ ret <8 x i16> %vecinit21
+}