From a0d05ab6f4f738baf32ef9d25d5bb745b331f733 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 2 Feb 2017 08:03:20 +0000 Subject: [PATCH] [X86] Move some INSERT_SUBVECTOR optimizations from legalize to DAG combine. This moves creation of SUBV_BROADCAST and merging of adjacent loads that are being inserted together. This is a step towards removing legalizing of INSERT_SUBVECTOR except for vXi1 cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293872 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 127 +++++++++++++++++------------ 1 file changed, 74 insertions(+), 53 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 71523cc1088..5bfe3aae764 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1672,6 +1672,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::INSERT_SUBVECTOR); setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::SELECT); @@ -14065,59 +14066,6 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, assert((OpVT.is256BitVector() || OpVT.is512BitVector()) && "Can only insert into 256-bit or 512-bit vectors"); - // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte - // load: - // (insert_subvector (insert_subvector undef, (load16 addr), 0), - // (load16 addr + 16), Elts/2) - // --> load32 addr - // or: - // (insert_subvector (insert_subvector undef, (load32 addr), 0), - // (load32 addr + 32), Elts/2) - // --> load64 addr - // or a 16-byte or 32-byte broadcast: - // (insert_subvector (insert_subvector undef, (load16 addr), 0), - // (load16 addr), Elts/2) - // --> X86SubVBroadcast(load16 addr) - // or: - // (insert_subvector (insert_subvector undef, (load32 addr), 0), - // (load32 addr), Elts/2) - // --> X86SubVBroadcast(load32 addr) - if ((IdxVal == OpVT.getVectorNumElements() / 2) && - Vec.getOpcode() == ISD::INSERT_SUBVECTOR && - OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) { - auto *Idx2 = dyn_cast(Vec.getOperand(2)); - if (Idx2 && Idx2->getZExtValue() == 0) { - SDValue SubVec2 = Vec.getOperand(1); - // If needed, look through bitcasts to get to the load. - if (auto *FirstLd = dyn_cast(peekThroughBitcasts(SubVec2))) { - bool Fast; - unsigned Alignment = FirstLd->getAlignment(); - unsigned AS = FirstLd->getAddressSpace(); - const X86TargetLowering *TLI = Subtarget.getTargetLowering(); - if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), - OpVT, AS, Alignment, &Fast) && Fast) { - SDValue Ops[] = {SubVec2, SubVec}; - if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) - return Ld; - } - } - // If lower/upper loads are the same and the only users of the load, then - // lower to a VBROADCASTF128/VBROADCASTI128/etc. - if (auto *Ld = dyn_cast(peekThroughOneUseBitcasts(SubVec2))) { - if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && - areOnlyUsersOf(SubVec2.getNode(), {Op, Vec})) { - return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec); - } - } - // If this is subv_broadcast insert into both halves, use a larger - // subv_broadcast. - if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) { - return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, - SubVec.getOperand(0)); - } - } - } - if (SubVecVT.is128BitVector()) return insert128BitVector(Vec, SubVec, IdxVal, DAG, dl); @@ -34122,6 +34070,77 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDLoc dl(N); + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + + unsigned IdxVal = cast(Idx)->getZExtValue(); + MVT OpVT = N->getSimpleValueType(0); + MVT SubVecVT = SubVec.getSimpleValueType(); + + // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte + // load: + // (insert_subvector (insert_subvector undef, (load16 addr), 0), + // (load16 addr + 16), Elts/2) + // --> load32 addr + // or: + // (insert_subvector (insert_subvector undef, (load32 addr), 0), + // (load32 addr + 32), Elts/2) + // --> load64 addr + // or a 16-byte or 32-byte broadcast: + // (insert_subvector (insert_subvector undef, (load16 addr), 0), + // (load16 addr), Elts/2) + // --> X86SubVBroadcast(load16 addr) + // or: + // (insert_subvector (insert_subvector undef, (load32 addr), 0), + // (load32 addr), Elts/2) + // --> X86SubVBroadcast(load32 addr) + if ((IdxVal == OpVT.getVectorNumElements() / 2) && + Vec.getOpcode() == ISD::INSERT_SUBVECTOR && + OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) { + auto *Idx2 = dyn_cast(Vec.getOperand(2)); + if (Idx2 && Idx2->getZExtValue() == 0) { + SDValue SubVec2 = Vec.getOperand(1); + // If needed, look through bitcasts to get to the load. + if (auto *FirstLd = dyn_cast(peekThroughBitcasts(SubVec2))) { + bool Fast; + unsigned Alignment = FirstLd->getAlignment(); + unsigned AS = FirstLd->getAddressSpace(); + const X86TargetLowering *TLI = Subtarget.getTargetLowering(); + if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), + OpVT, AS, Alignment, &Fast) && Fast) { + SDValue Ops[] = {SubVec2, SubVec}; + if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) + return Ld; + } + } + // If lower/upper loads are the same and the only users of the load, then + // lower to a VBROADCASTF128/VBROADCASTI128/etc. + if (auto *Ld = dyn_cast(peekThroughOneUseBitcasts(SubVec2))) { + if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && + areOnlyUsersOf(SubVec2.getNode(), {SDValue(N, 0), Vec})) { + return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec); + } + } + // If this is subv_broadcast insert into both halves, use a larger + // subv_broadcast. + if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) { + return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, + SubVec.getOperand(0)); + } + } + } + + return SDValue(); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -34130,6 +34149,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::EXTRACT_VECTOR_ELT: return combineExtractVectorElt(N, DAG, DCI, Subtarget); + case ISD::INSERT_SUBVECTOR: + return combineInsertSubvector(N, DAG, DCI, Subtarget); case ISD::VSELECT: case ISD::SELECT: case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget); -- 2.50.1