From: Simon Pilgrim Date: Fri, 20 Jul 2018 13:26:51 +0000 (+0000) Subject: [X86][AVX] Convert X86ISD::VBROADCAST demanded elts combine to use SimplifyDemandedVe... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ca4d424e2e6d8944123e410ca56e89e2cda3ce2d;p=llvm [X86][AVX] Convert X86ISD::VBROADCAST demanded elts combine to use SimplifyDemandedVectorElts This is an early step towards using SimplifyDemandedVectorElts for target shuffle combining - this merely moves the existing X86ISD::VBROADCAST simplification code to use the SimplifyDemandedVectorElts mechanism. Adds X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode to handle X86ISD::VBROADCAST - in time we can support all target shuffles (and other ops) here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337547 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b1b6b7c6aad..d9b42f69ad9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -30635,24 +30635,13 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, switch (Opcode) { case X86ISD::VBROADCAST: { - // If broadcasting from another shuffle, attempt to simplify it. // TODO - we really need a general SimplifyDemandedVectorElts mechanism. - SDValue Src = N.getOperand(0); - SDValue BC = peekThroughBitcasts(Src); - EVT SrcVT = Src.getValueType(); - EVT BCVT = BC.getValueType(); - if (isTargetShuffle(BC.getOpcode()) && - VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) { - unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits(); - SmallVector DemandedMask(BCVT.getVectorNumElements(), - SM_SentinelUndef); - for (unsigned i = 0; i != Scale; ++i) - DemandedMask[i] = i; - if (SDValue Res = combineX86ShufflesRecursively( - {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) - return DAG.getNode(X86ISD::VBROADCAST, DL, VT, - DAG.getBitcast(SrcVT, Res)); + APInt KnownUndef, KnownZero; + APInt DemandedMask(APInt::getAllOnesValue(VT.getVectorNumElements())); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.SimplifyDemandedVectorElts(N, DemandedMask, KnownUndef, KnownZero, + DCI)) { + return SDValue(N.getNode(), 0); } return SDValue(); } @@ -31298,6 +31287,41 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, return SDValue(); } +bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( + SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, + TargetLoweringOpt &TLO, unsigned Depth) const { + + if (X86ISD::VBROADCAST != Op.getOpcode()) + return false; + + EVT VT = Op.getValueType(); + SDValue Src = Op.getOperand(0); + SDValue BC = peekThroughBitcasts(Src); + EVT SrcVT = Src.getValueType(); + EVT BCVT = BC.getValueType(); + + if (!isTargetShuffle(BC.getOpcode()) || + (VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits()) != 0) + return false; + + unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits(); + SmallVector DemandedMask(BCVT.getVectorNumElements(), + SM_SentinelUndef); + for (unsigned i = 0; i != Scale; ++i) + DemandedMask[i] = i; + + if (SDValue Res = combineX86ShufflesRecursively( + {BC}, 0, BC, DemandedMask, {}, Depth + 1, /*HasVarMask*/ false, + TLO.DAG, Subtarget)) { + SDLoc DL(Op); + Res = TLO.DAG.getNode(X86ISD::VBROADCAST, DL, VT, + TLO.DAG.getBitcast(SrcVT, Res)); + return TLO.CombineTo(Op, Res); + } + + return false; +} + /// Check if a vector extract from a target-specific shuffle of a load can be /// folded into a single element load. /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 32215b170a8..623b95b3705 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -866,6 +866,13 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth) const override; + bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, + const APInt &DemandedElts, + APInt &KnownUndef, + APInt &KnownZero, + TargetLoweringOpt &TLO, + unsigned Depth) const override; + SDValue unwrapAddress(SDValue N) const override; bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,