// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
assert((OpVT.is256BitVector() || OpVT.is512BitVector()) &&
"Can only insert into 256-bit or 512-bit vectors");
- // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
- // load:
- // (insert_subvector (insert_subvector undef, (load16 addr), 0),
- // (load16 addr + 16), Elts/2)
- // --> load32 addr
- // or:
- // (insert_subvector (insert_subvector undef, (load32 addr), 0),
- // (load32 addr + 32), Elts/2)
- // --> load64 addr
- // or a 16-byte or 32-byte broadcast:
- // (insert_subvector (insert_subvector undef, (load16 addr), 0),
- // (load16 addr), Elts/2)
- // --> X86SubVBroadcast(load16 addr)
- // or:
- // (insert_subvector (insert_subvector undef, (load32 addr), 0),
- // (load32 addr), Elts/2)
- // --> X86SubVBroadcast(load32 addr)
- if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
- Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
- OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
- auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
- if (Idx2 && Idx2->getZExtValue() == 0) {
- SDValue SubVec2 = Vec.getOperand(1);
- // If needed, look through bitcasts to get to the load.
- if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
- bool Fast;
- unsigned Alignment = FirstLd->getAlignment();
- unsigned AS = FirstLd->getAddressSpace();
- const X86TargetLowering *TLI = Subtarget.getTargetLowering();
- if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- OpVT, AS, Alignment, &Fast) && Fast) {
- SDValue Ops[] = {SubVec2, SubVec};
- if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
- return Ld;
- }
- }
- // If lower/upper loads are the same and the only users of the load, then
- // lower to a VBROADCASTF128/VBROADCASTI128/etc.
- if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
- if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
- areOnlyUsersOf(SubVec2.getNode(), {Op, Vec})) {
- return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
- }
- }
- // If this is subv_broadcast insert into both halves, use a larger
- // subv_broadcast.
- if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) {
- return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
- SubVec.getOperand(0));
- }
- }
- }
-
if (SubVecVT.is128BitVector())
return insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
return SDValue();
}
+static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDLoc dl(N);
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ MVT OpVT = N->getSimpleValueType(0);
+ MVT SubVecVT = SubVec.getSimpleValueType();
+
+ // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
+ // load:
+ // (insert_subvector (insert_subvector undef, (load16 addr), 0),
+ // (load16 addr + 16), Elts/2)
+ // --> load32 addr
+ // or:
+ // (insert_subvector (insert_subvector undef, (load32 addr), 0),
+ // (load32 addr + 32), Elts/2)
+ // --> load64 addr
+ // or a 16-byte or 32-byte broadcast:
+ // (insert_subvector (insert_subvector undef, (load16 addr), 0),
+ // (load16 addr), Elts/2)
+ // --> X86SubVBroadcast(load16 addr)
+ // or:
+ // (insert_subvector (insert_subvector undef, (load32 addr), 0),
+ // (load32 addr), Elts/2)
+ // --> X86SubVBroadcast(load32 addr)
+ if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
+ Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
+ auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+ if (Idx2 && Idx2->getZExtValue() == 0) {
+ SDValue SubVec2 = Vec.getOperand(1);
+ // If needed, look through bitcasts to get to the load.
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
+ bool Fast;
+ unsigned Alignment = FirstLd->getAlignment();
+ unsigned AS = FirstLd->getAddressSpace();
+ const X86TargetLowering *TLI = Subtarget.getTargetLowering();
+ if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ OpVT, AS, Alignment, &Fast) && Fast) {
+ SDValue Ops[] = {SubVec2, SubVec};
+ if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+ return Ld;
+ }
+ }
+ // If lower/upper loads are the same and the only users of the load, then
+ // lower to a VBROADCASTF128/VBROADCASTI128/etc.
+ if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
+ if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
+ areOnlyUsersOf(SubVec2.getNode(), {SDValue(N, 0), Vec})) {
+ return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
+ }
+ }
+ // If this is subv_broadcast insert into both halves, use a larger
+ // subv_broadcast.
+ if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) {
+ return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
+ SubVec.getOperand(0));
+ }
+ }
+ }
+
+ return SDValue();
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
return combineExtractVectorElt(N, DAG, DCI, Subtarget);
+ case ISD::INSERT_SUBVECTOR:
+ return combineInsertSubvector(N, DAG, DCI, Subtarget);
case ISD::VSELECT:
case ISD::SELECT:
case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);