From: Nemanja Ivanovic Date: Wed, 4 May 2016 16:04:02 +0000 (+0000) Subject: [PowerPC] Generate VSX version of splat word X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=990ef3411fc39ac61eae0bcfaf25f824209a76a7;p=llvm [PowerPC] Generate VSX version of splat word This patch corresponds to review: http://reviews.llvm.org/D18592 It allows the PPC back end to generate the xxspltw instruction where we previously only emitted vspltw. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268516 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index d0f9e8dcc98..e5cd3e30afb 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1014,6 +1014,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; case PPCISD::VPERM: return "PPCISD::VPERM"; + case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; @@ -7419,6 +7420,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); + if (Subtarget.hasVSX()) { + if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { + int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, + DAG.getConstant(SplatIdx, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); + } + } + if (Subtarget.hasQPX()) { if (VT.getVectorNumElements() != 4) return SDValue(); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index c097a62712f..b60fa538f3c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -61,6 +61,10 @@ namespace llvm { /// VPERM, + /// XXSPLT - The PPC VSX splat instructions + /// + XXSPLT, + /// The CMPB instruction (takes two operands of i32 or i64). CMPB, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 838889660a3..ad382341ed5 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -31,6 +31,10 @@ def SDT_PPCvperm : SDTypeProfile<1, 3, [ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2> ]>; +def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisInt<2> +]>; + def SDT_PPCvcmp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> ]>; @@ -141,6 +145,7 @@ def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; +def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>; diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index d7f64c97d6c..bc91fb6874b 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -775,7 +775,9 @@ let Uses = [RM] in { "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>; def XXSPLTW : XX2Form_2<60, 164, (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), - "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, + [(set v4i32:$XT, + (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>; } // hasSideEffects // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 10636b86c3b..a1aa841eccc 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -407,9 +407,9 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { case PPC::VSPLTB: case PPC::VSPLTH: case PPC::VSPLTW: + case PPC::XXSPLTW: // Splats are lane-sensitive, but we can use special handling - // to adjust the source lane for the splat. This is not yet - // implemented. When it is, we need to uncomment the following: + // to adjust the source lane for the splat. SwapVector[VecIdx].IsSwappable = 1; SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT; break; @@ -515,7 +515,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // permute control vectors (for shift values 1, 2, 3). However, // VPERM has a more restrictive register class. case PPC::XXSLDWI: - case PPC::XXSPLTW: break; } } @@ -806,12 +805,21 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { llvm_unreachable("Unexpected splat opcode"); case PPC::VSPLTB: NElts = 16; break; case PPC::VSPLTH: NElts = 8; break; - case PPC::VSPLTW: NElts = 4; break; + case PPC::VSPLTW: + case PPC::XXSPLTW: NElts = 4; break; } - unsigned EltNo = MI->getOperand(1).getImm(); + unsigned EltNo; + if (MI->getOpcode() == PPC::XXSPLTW) + EltNo = MI->getOperand(2).getImm(); + else + EltNo = MI->getOperand(1).getImm(); + EltNo = (EltNo + NElts / 2) % NElts; - MI->getOperand(1).setImm(EltNo); + if (MI->getOpcode() == PPC::XXSPLTW) + MI->getOperand(2).setImm(EltNo); + else + MI->getOperand(1).setImm(EltNo); DEBUG(dbgs() << " Into: "); DEBUG(MI->dump()); diff --git a/test/CodeGen/PowerPC/pr27078.ll b/test/CodeGen/PowerPC/pr27078.ll index 324462cf601..b1fdbbde692 100644 --- a/test/CodeGen/PowerPC/pr27078.ll +++ b/test/CodeGen/PowerPC/pr27078.ll @@ -9,7 +9,7 @@ define <4 x float> @bar(float* %p, float* %q) { %6 = shufflevector <12 x float> %5, <12 x float> undef, <4 x i32> ret <4 x float> %6 -; CHECK: vspltw +; CHECK: xxspltw ; CHECK: vmrghw ; CHECK: vsldoi } diff --git a/test/CodeGen/PowerPC/swaps-le-2.ll b/test/CodeGen/PowerPC/swaps-le-2.ll index 08096ed20dd..9d1eb412cba 100644 --- a/test/CodeGen/PowerPC/swaps-le-2.ll +++ b/test/CodeGen/PowerPC/swaps-le-2.ll @@ -87,5 +87,5 @@ entry: ; CHECK-LABEL: @ifoo ; CHECK: lxvd2x -; CHECK: vspltw {{[0-9]+}}, {{[0-9]+}}, 0 +; CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 ; CHECK: stxvd2x diff --git a/test/CodeGen/PowerPC/vsx-word-splats.ll b/test/CodeGen/PowerPC/vsx-word-splats.ll new file mode 100644 index 00000000000..5632011da35 --- /dev/null +++ b/test/CodeGen/PowerPC/vsx-word-splats.ll @@ -0,0 +1,147 @@ +; RUN: llc -mcpu=pwr7 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-BE + +define <4 x float> @test0f(<4 x float> %a) { +entry: + %0 = bitcast <4 x float> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x float> + ret <4 x float> %2 +; CHECK-LABEL: test0f +; CHECK xxspltw: 34, 34, 3 +; CHECK-BE-LABEL: test0f +; CHECK-BE: xxspltw 34, 34, 0 +} + +define <4 x float> @test1f(<4 x float> %a) { +entry: + %0 = bitcast <4 x float> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x float> + ret <4 x float> %2 +; CHECK-LABEL: test1f +; CHECK xxspltw: 34, 34, 2 +; CHECK-BE-LABEL: test1f +; CHECK-BE: xxspltw 34, 34, 1 +} + +define <4 x float> @test2f(<4 x float> %a) { +entry: + %0 = bitcast <4 x float> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x float> + ret <4 x float> %2 +; CHECK-LABEL: test2f +; CHECK xxspltw: 34, 34, 1 +; CHECK-LABEL: test2f +; CHECK-BE: xxspltw 34, 34, 2 +} + +define <4 x float> @test3f(<4 x float> %a) { +entry: + %0 = bitcast <4 x float> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x float> + ret <4 x float> %2 +; CHECK-LABEL: test3f +; CHECK xxspltw: 34, 34, 0 +; CHECK-BE-LABEL: test3f +; CHECK-BE: xxspltw 34, 34, 3 +} + +define <4 x i32> @test0si(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test0si +; CHECK xxspltw: 34, 34, 3 +; CHECK-BE-LABEL: test0si +; CHECK-BE: xxspltw 34, 34, 0 +} + +define <4 x i32> @test1si(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test1si +; CHECK xxspltw: 34, 34, 2 +; CHECK-BE-LABEL: test1si +; CHECK-BE: xxspltw 34, 34, 1 +} + +define <4 x i32> @test2si(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test2si +; CHECK xxspltw: 34, 34, 1 +; CHECK-BE-LABEL: test2si +; CHECK-BE: xxspltw 34, 34, 2 +} + +define <4 x i32> @test3si(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test3si +; CHECK xxspltw: 34, 34, 0 +; CHECK-BE-LABEL: test3si +; CHECK-BE: xxspltw 34, 34, 3 +} + +define <4 x i32> @test0ui(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test0ui +; CHECK xxspltw: 34, 34, 3 +; CHECK-BE-LABEL: test0ui +; CHECK-BE: xxspltw 34, 34, 0 +} + +define <4 x i32> @test1ui(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test1ui +; CHECK xxspltw: 34, 34, 2 +; CHECK-BE-LABEL: test1ui +; CHECK-BE: xxspltw 34, 34, 1 +} + +define <4 x i32> @test2ui(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test2ui +; CHECK xxspltw: 34, 34, 1 +; CHECK-BE-LABEL: test2ui +; CHECK-BE: xxspltw 34, 34, 2 +} + +define <4 x i32> @test3ui(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test3ui +; CHECK xxspltw: 34, 34, 0 +; CHECK-BE-LABEL: test3ui +; CHECK-BE: xxspltw 34, 34, 3 +}