STATISTIC(NumTailCalls, "Number of tail calls");
-cl::opt<bool> ExperimentalVectorWideningLegalization(
- "x86-experimental-vector-widening-legalization", cl::init(true),
- cl::desc("Enable an experimental vector type legalization through widening "
- "rather than promotion."),
- cl::Hidden);
-
static cl::opt<int> ExperimentalPrefLoopAlignment(
"x86-experimental-pref-loop-alignment", cl::init(4),
cl::desc(
setOperationAction(ISD::UREM, VT, Custom);
}
- if (!ExperimentalVectorWideningLegalization) {
- setOperationAction(ISD::MUL, MVT::v2i16, Custom);
- setOperationAction(ISD::MUL, MVT::v2i32, Custom);
- setOperationAction(ISD::MUL, MVT::v4i16, Custom);
- }
-
setOperationAction(ISD::MUL, MVT::v2i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i8, Custom);
setOperationAction(ISD::MUL, MVT::v8i8, Custom);
setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
- if (!ExperimentalVectorWideningLegalization) {
- // Use widening instead of promotion.
- for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
- MVT::v4i16, MVT::v2i16 }) {
- setOperationAction(ISD::UADDSAT, VT, Custom);
- setOperationAction(ISD::SADDSAT, VT, Custom);
- setOperationAction(ISD::USUBSAT, VT, Custom);
- setOperationAction(ISD::SSUBSAT, VT, Custom);
- }
- }
-
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
- // Provide custom widening for v2f32 setcc. This is really for VLX when
- // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
- // type legalization changing the result type to v4i1 during widening.
- // It works fine for SSE2 and is probably faster so no need to qualify with
- // VLX support.
- if (!ExperimentalVectorWideningLegalization)
- setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
-
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
- // We support custom legalizing of sext and anyext loads for specific
- // memory vector types which we can load as a scalar (or sequence of
- // scalars) and extend in-register to a legal 128-bit vector type. For sext
- // loads these must work with a single scalar load.
- if (!ExperimentalVectorWideningLegalization) {
- for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
- }
- }
-
for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
- if (ExperimentalVectorWideningLegalization) {
- setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
- } else {
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
- }
+ setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
// In the customized shift lowering, the legal v4i32/v2i64 cases
// in AVX2 will be recognized.
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
}
- if (!ExperimentalVectorWideningLegalization) {
- // Avoid narrow result types when widening. The legal types are listed
- // in the next loop.
- for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
- }
- }
-
// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
- if (!ExperimentalVectorWideningLegalization)
- setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- if (ExperimentalVectorWideningLegalization) {
- // Need to custom widen this if we don't have AVX512BW.
- setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
- }
+ // Need to custom widen this if we don't have AVX512BW.
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return TypeSplitVector;
- if (ExperimentalVectorWideningLegalization &&
- VT.getVectorNumElements() != 1 &&
+ if (VT.getVectorNumElements() != 1 &&
VT.getVectorElementType() != MVT::i1)
return TypeWidenVector;
// Custom legalize v8i8->v8i64 on CPUs without avx512bw.
if (InVT == MVT::v8i8) {
- if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
+ if (VT != MVT::v8i64)
return SDValue();
In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
- if (!ExperimentalVectorWideningLegalization) {
- // Without vector widening we need to manually construct X86 specific
- // nodes and an unpcklqdq.
- Lo = DAG.getNode(X86ISD::VTRUNC, DL, VT, Lo);
- Hi = DAG.getNode(X86ISD::VTRUNC, DL, VT, Hi);
-
- // Manually concat the truncates using a shuffle.
- unsigned NumElts = VT.getVectorNumElements();
- SmallVector<int, 16> ShufMask(NumElts);
- for (unsigned i = 0; i != NumElts / 2; ++i)
- ShufMask[i] = i;
- for (unsigned i = NumElts / 2; i != NumElts; ++i)
- ShufMask[i] = i + (NumElts / 2);
- return DAG.getVectorShuffle(VT, DL, Lo, Hi, ShufMask);
- }
-
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() &&
"Invalid number of packed elements for source and destination!");
- // This is being called by type legalization because v2i32 is marked custom
- // for result type legalization for v2f32.
- if (VTOp0 == MVT::v2i32) {
- assert(!ExperimentalVectorWideningLegalization &&
- "Should only get here with promote legalization!");
- return SDValue();
- }
-
// The non-AVX512 code below works under the assumption that source and
// destination types are the same.
assert((Subtarget.hasAVX512() || (VT == VTOp0)) &&
// Custom legalize v8i8->v8i64 on CPUs without avx512bw.
if (InVT == MVT::v8i8) {
- if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
+ if (VT != MVT::v8i64)
return SDValue();
In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
return SDValue();
}
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
"Unexpected VT");
- if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
- TargetLowering::TypeWidenVector)
- return SDValue();
+ assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) ==
+ TargetLowering::TypeWidenVector && "Unexpected type action!");
- MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(),
- StoreVT.getVectorNumElements() * 2);
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT);
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
DAG.getUNDEF(StoreVT));
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
- EVT MemVT = Ld->getMemoryVT();
// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads.
if (RegVT.getVectorElementType() == MVT::i1) {
- assert(EVT(RegVT) == MemVT && "Expected non-extending load");
+ assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load");
assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT");
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
"Expected AVX512F without AVX512DQI");
return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl);
}
- if (ExperimentalVectorWideningLegalization)
- return SDValue();
-
- // Nothing useful we can do without SSE2 shuffles.
- assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2.");
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned RegSz = RegVT.getSizeInBits();
-
- ISD::LoadExtType Ext = Ld->getExtensionType();
-
- assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)
- && "Only anyext and sext are currently implemented.");
- assert(MemVT != RegVT && "Cannot extend to the same type");
- assert(MemVT.isVector() && "Must load a vector from memory");
-
- unsigned NumElems = RegVT.getVectorNumElements();
- unsigned MemSz = MemVT.getSizeInBits();
- assert(RegSz > MemSz && "Register size must be greater than the mem size");
-
- if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) {
- // The only way in which we have a legal 256-bit vector result but not the
- // integer 256-bit operations needed to directly lower a sextload is if we
- // have AVX1 but not AVX2. In that case, we can always emit a sextload to
- // a 128-bit vector and a normal sign_extend to 256-bits that should get
- // correctly legalized. We do this late to allow the canonical form of
- // sextload to persist throughout the rest of the DAG combiner -- it wants
- // to fold together any extensions it can, and so will fuse a sign_extend
- // of an sextload into a sextload targeting a wider value.
- SDValue Load;
- if (MemSz == 128) {
- // Just switch this to a normal load.
- assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, "
- "it must be a legal 128-bit vector "
- "type!");
- Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), Ld->getAlignment(),
- Ld->getMemOperand()->getFlags());
- } else {
- assert(MemSz < 128 &&
- "Can't extend a type wider than 128 bits to a 256 bit vector!");
- // Do an sext load to a 128-bit vector type. We want to use the same
- // number of elements, but elements half as wide. This will end up being
- // recursively lowered by this routine, but will succeed as we definitely
- // have all the necessary features if we're using AVX1.
- EVT HalfEltVT =
- EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2);
- EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems);
- Load =
- DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), MemVT, Ld->getAlignment(),
- Ld->getMemOperand()->getFlags());
- }
-
- // Replace chain users with the new chain.
- assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
-
- // Finally, do a normal sign-extend to the desired register.
- SDValue SExt = DAG.getSExtOrTrunc(Load, dl, RegVT);
- return DAG.getMergeValues({SExt, Load.getValue(1)}, dl);
- }
-
- // All sizes must be a power of two.
- assert(isPowerOf2_32(RegSz * MemSz * NumElems) &&
- "Non-power-of-two elements are not custom lowered!");
-
- // Attempt to load the original value using scalar loads.
- // Find the largest scalar type that divides the total loaded size.
- MVT SclrLoadTy = MVT::i8;
- for (MVT Tp : MVT::integer_valuetypes()) {
- if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
- SclrLoadTy = Tp;
- }
- }
-
- // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
- if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
- (64 <= MemSz))
- SclrLoadTy = MVT::f64;
-
- // Calculate the number of scalar loads that we need to perform
- // in order to load our vector from memory.
- unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
-
- assert((Ext != ISD::SEXTLOAD || NumLoads == 1) &&
- "Can only lower sext loads with a single scalar load!");
-
- unsigned loadRegSize = RegSz;
- if (Ext == ISD::SEXTLOAD && RegSz >= 256)
- loadRegSize = 128;
-
- // If we don't have BWI we won't be able to create the shuffle needed for
- // v8i8->v8i64.
- if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
- MemVT == MVT::v8i8)
- loadRegSize = 128;
-
- // Represent our vector as a sequence of elements which are the
- // largest scalar that we can load.
- EVT LoadUnitVecVT = EVT::getVectorVT(
- *DAG.getContext(), SclrLoadTy, loadRegSize / SclrLoadTy.getSizeInBits());
-
- // Represent the data using the same element type that is stored in
- // memory. In practice, we ''widen'' MemVT.
- EVT WideVecVT =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- loadRegSize / MemVT.getScalarSizeInBits());
-
- assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
- "Invalid vector type");
-
- // We can't shuffle using an illegal type.
- assert(TLI.isTypeLegal(WideVecVT) &&
- "We only lower types that form legal widened vector types");
-
- SmallVector<SDValue, 8> Chains;
- SDValue Ptr = Ld->getBasePtr();
- unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8;
- SDValue Increment = DAG.getConstant(OffsetInc, dl,
- TLI.getPointerTy(DAG.getDataLayout()));
- SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
-
- unsigned Offset = 0;
- for (unsigned i = 0; i < NumLoads; ++i) {
- unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset);
-
- // Perform a single load.
- SDValue ScalarLoad =
- DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr,
- Ld->getPointerInfo().getWithOffset(Offset),
- NewAlign, Ld->getMemOperand()->getFlags());
- Chains.push_back(ScalarLoad.getValue(1));
- // Create the first element type using SCALAR_TO_VECTOR in order to avoid
- // another round of DAGCombining.
- if (i == 0)
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
- else
- Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
- ScalarLoad, DAG.getIntPtrConstant(i, dl));
-
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- Offset += OffsetInc;
- }
-
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
-
- // Bitcast the loaded value to a vector of the original element type, in
- // the size of the target vector type.
- SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res);
- unsigned SizeRatio = RegSz / MemSz;
-
- if (Ext == ISD::SEXTLOAD) {
- SDValue Sext = getExtendInVec(ISD::SIGN_EXTEND, dl, RegVT, SlicedVec, DAG);
- return DAG.getMergeValues({Sext, TF}, dl);
- }
-
- if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
- MemVT == MVT::v8i8) {
- SDValue Sext = getExtendInVec(ISD::ZERO_EXTEND, dl, RegVT, SlicedVec, DAG);
- return DAG.getMergeValues({Sext, TF}, dl);
- }
-
- // Redistribute the loaded elements into the different locations.
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i * SizeRatio] = i;
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
- DAG.getUNDEF(WideVecVT), ShuffleVec);
-
- // Bitcast to the requested type.
- Shuff = DAG.getBitcast(RegVT, Shuff);
- return DAG.getMergeValues({Shuff, TF}, dl);
+ return SDValue();
}
/// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes
SDValue Chain = N->getChain();
SDValue BasePtr = N->getBasePtr();
- if (VT == MVT::v2f32) {
+ if (VT == MVT::v2f32 || VT == MVT::v2i32) {
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
// If the index is v2i64 and we have VLX we can use xmm for data and index.
if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) {
- Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
- DAG.getUNDEF(MVT::v2f32));
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT));
SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other);
SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
return SDValue();
}
- if (VT == MVT::v2i32) {
- assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
- Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
- DAG.getUNDEF(MVT::v2i32));
- // If the index is v2i64 and we have VLX we can use xmm for data and index.
- if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) {
- SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other);
- SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
- SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
- VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
- return SDValue(NewScatter.getNode(), 1);
- }
- // Custom widen all the operands to avoid promotion.
- EVT NewIndexVT = EVT::getVectorVT(
- *DAG.getContext(), Index.getValueType().getVectorElementType(), 4);
- Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
- DAG.getUNDEF(Index.getValueType()));
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
- DAG.getConstant(0, dl, MVT::v2i1));
- SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(), dl,
- Ops, N->getMemOperand(), N->getIndexType());
- }
-
MVT IndexVT = Index.getSimpleValueType();
MVT MaskVT = Mask.getSimpleValueType();
}
case ISD::MUL: {
EVT VT = N->getValueType(0);
- assert(VT.isVector() && "Unexpected VT");
- if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger &&
- VT.getVectorNumElements() == 2) {
- // Promote to a pattern that will be turned into PMULUDQ.
- SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
- N->getOperand(0));
- SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
- N->getOperand(1));
- SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1);
- Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul));
- } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
- VT.getVectorElementType() == MVT::i8) {
- // Pre-promote these to vXi16 to avoid op legalization thinking all 16
- // elements are needed.
- MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
- SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0));
- SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1));
- SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1);
- Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
- unsigned NumConcats = 16 / VT.getVectorNumElements();
- SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
- ConcatOps[0] = Res;
- Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps);
- Results.push_back(Res);
- }
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ VT.getVectorElementType() == MVT::i8 && "Unexpected VT!");
+ // Pre-promote these to vXi16 to avoid op legalization thinking all 16
+ // elements are needed.
+ MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
+ SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ unsigned NumConcats = 16 / VT.getVectorNumElements();
+ SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
+ ConcatOps[0] = Res;
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps);
+ Results.push_back(Res);
return;
}
- case ISD::UADDSAT:
- case ISD::SADDSAT:
- case ISD::USUBSAT:
- case ISD::SSUBSAT:
case X86ISD::VPMADDWD:
case X86ISD::AVG: {
// Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and
EVT InVT = N->getOperand(0).getValueType();
assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 &&
"Expected a VT that divides into 128 bits.");
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
unsigned NumConcat = 128 / InVT.getSizeInBits();
EVT InWideVT = EVT::getVectorVT(*DAG.getContext(),
SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1);
- if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
- DAG.getIntPtrConstant(0, dl));
Results.push_back(Res);
return;
}
Results.push_back(Hi);
return;
}
- case ISD::SETCC: {
- // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when
- // setCC result type is v2i1 because type legalzation will end up with
- // a v4i1 setcc plus an extend.
- assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type");
- if (N->getOperand(0).getValueType() != MVT::v2f32 ||
- getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector)
- return;
- SDValue UNDEF = DAG.getUNDEF(MVT::v2f32);
- SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
- N->getOperand(0), UNDEF);
- SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
- N->getOperand(1), UNDEF);
- SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS,
- N->getOperand(2));
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- return;
- }
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
case X86ISD::FMINC:
case X86ISD::FMIN:
case ISD::SREM:
case ISD::UREM: {
EVT VT = N->getValueType(0);
- if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector) {
+ if (VT.isVector()) {
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
// If this RHS is a constant splat vector we can widen this and let
// division/remainder by constant optimize it.
// TODO: Can we do something for non-splat?
return;
}
- if (VT == MVT::v2i32) {
- // Legalize v2i32 div/rem by unrolling. Otherwise we promote to the
- // v2i64 and unroll later. But then we create i64 scalar ops which
- // might be slow in 64-bit mode or require a libcall in 32-bit mode.
- Results.push_back(DAG.UnrollVectorOp(N));
- return;
- }
-
- if (VT.isVector())
- return;
-
LLVM_FALLTHROUGH;
}
case ISD::SDIVREM:
}
return;
}
- case ISD::SIGN_EXTEND_VECTOR_INREG: {
- if (ExperimentalVectorWideningLegalization)
- return;
-
- EVT VT = N->getValueType(0);
- SDValue In = N->getOperand(0);
- EVT InVT = In.getValueType();
- if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
- (InVT == MVT::v16i16 || InVT == MVT::v32i8)) {
- // Custom split this so we can extend i8/i16->i32 invec. This is better
- // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
- // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
- // we allow the sra from the extend to i32 to be shared by the split.
- EVT ExtractVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext());
- MVT ExtendVT = MVT::getVectorVT(MVT::i32,
- VT.getVectorNumElements());
- In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT,
- In, DAG.getIntPtrConstant(0, dl));
- In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In);
-
- // Fill a vector with sign bits for each element.
- SDValue Zero = DAG.getConstant(0, dl, ExtendVT);
- SDValue SignBits = DAG.getSetCC(dl, ExtendVT, Zero, In, ISD::SETGT);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- // Create an unpackl and unpackh to interleave the sign bits then bitcast
- // to vXi64.
- SDValue Lo = getUnpackl(DAG, dl, ExtendVT, In, SignBits);
- Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
- SDValue Hi = getUnpackh(DAG, dl, ExtendVT, In, SignBits);
- Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
-
- SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
- Results.push_back(Res);
- return;
- }
- return;
- }
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND: {
EVT VT = N->getValueType(0);
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
- (InVT == MVT::v4i16 || InVT == MVT::v4i8) &&
- getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector) {
+ (InVT == MVT::v4i16 || InVT == MVT::v4i8)){
+ assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector &&
+ "Unexpected type action!");
assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode");
// Custom split this so we can extend i8/i16->i32 invec. This is better
// since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
- // Promote these manually to avoid over promotion to v2i64. Type
- // legalization will revisit the v2i32 operation for more cleanup.
- if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
- getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) {
- // AVX512DQ provides instructions that produce a v2i64 result.
- if (Subtarget.hasDQI())
- return;
-
- SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src);
- Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
- : ISD::AssertSext,
- dl, MVT::v2i32, Res,
- DAG.getValueType(VT.getVectorElementType()));
- Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
- Results.push_back(Res);
- return;
- }
-
if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
- if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
- return;
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
// Try to create a 128 bit vector, but don't exceed a 32 bit element.
unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);
assert((IsSigned || Subtarget.hasAVX512()) &&
"Can only handle signed conversion without AVX512");
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
- bool Widenv2i32 =
- getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector;
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
if (Src.getValueType() == MVT::v2f64) {
- unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
if (!IsSigned && !Subtarget.hasVLX()) {
- // If v2i32 is widened, we can defer to the generic legalizer.
- if (Widenv2i32)
- return;
- // Custom widen by doubling to a legal vector with. Isel will
- // further widen to v8f64.
- Opc = ISD::FP_TO_UINT;
- Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64,
- Src, DAG.getUNDEF(MVT::v2f64));
+ // If we have VLX we can emit a target specific FP_TO_UINT node,
+ // otherwise we can defer to the generic legalizer which will widen
+ // the input as well. This will be further widened during op
+ // legalization to v8i32<-v8f64.
+ return;
}
+ unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
- if (!Widenv2i32)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- return;
- }
- if (SrcVT == MVT::v2f32 &&
- getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
- SDValue Idx = DAG.getIntPtrConstant(0, dl);
- SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
- DAG.getUNDEF(MVT::v2f32));
- Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
- : ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
Results.push_back(Res);
return;
}
return;
}
+ assert(!VT.isVector() && "Vectors should have been handled above!");
+
if (Subtarget.hasDQI() && VT == MVT::i64 &&
(SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
assert(!Subtarget.is64Bit() && "i64 should be legal");
return;
}
- if (DstVT.isVector() && SrcVT == MVT::x86mmx &&
- getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector) {
+ if (DstVT.isVector() && SrcVT == MVT::x86mmx) {
+ assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector &&
+ "Unexpected type action!");
EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT);
SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, WideVT, N->getOperand(0));
Results.push_back(Res);
return;
}
- if (SrcVT != MVT::f64 ||
- (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8) ||
- getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector)
- return;
-
- unsigned NumElts = DstVT.getVectorNumElements();
- EVT SVT = DstVT.getVectorElementType();
- EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
- SDValue Res;
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0));
- Res = DAG.getBitcast(WiderVT, Res);
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
return;
}
case ISD::MGATHER: {
EVT VT = N->getValueType(0);
- if (VT == MVT::v2f32 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
+ if ((VT == MVT::v2f32 || VT == MVT::v2i32) &&
+ (Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
auto *Gather = cast<MaskedGatherSDNode>(N);
SDValue Index = Gather->getIndex();
if (Index.getValueType() != MVT::v2i64)
return;
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
+ EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Mask = Gather->getMask();
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
- SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
+ SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT,
Gather->getPassThru(),
- DAG.getUNDEF(MVT::v2f32));
+ DAG.getUNDEF(VT));
if (!Subtarget.hasVLX()) {
// We need to widen the mask, but the instruction will only use 2
// of its elements. So we can use undef.
SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
Gather->getBasePtr(), Index, Gather->getScale() };
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
- DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl,
+ DAG.getVTList(WideVT, Mask.getValueType(), MVT::Other), Ops, dl,
Gather->getMemoryVT(), Gather->getMemOperand());
Results.push_back(Res);
Results.push_back(Res.getValue(2));
return;
}
- if (VT == MVT::v2i32) {
- auto *Gather = cast<MaskedGatherSDNode>(N);
- SDValue Index = Gather->getIndex();
- SDValue Mask = Gather->getMask();
- assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
- SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
- Gather->getPassThru(),
- DAG.getUNDEF(MVT::v2i32));
- // If the index is v2i64 we can use it directly.
- if (Index.getValueType() == MVT::v2i64 &&
- (Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
- if (!Subtarget.hasVLX()) {
- // We need to widen the mask, but the instruction will only use 2
- // of its elements. So we can use undef.
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
- DAG.getUNDEF(MVT::v2i1));
- Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
- }
- SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
- Gather->getBasePtr(), Index, Gather->getScale() };
- SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
- DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl,
- Gather->getMemoryVT(), Gather->getMemOperand());
- SDValue Chain = Res.getValue(2);
- if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- Results.push_back(Chain);
- return;
- }
- if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
- EVT IndexVT = Index.getValueType();
- EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(),
- IndexVT.getScalarType(), 4);
- // Otherwise we need to custom widen everything to avoid promotion.
- Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
- DAG.getUNDEF(IndexVT));
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
- DAG.getConstant(0, dl, MVT::v2i1));
- SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
- Gather->getBasePtr(), Index, Gather->getScale() };
- SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other),
- Gather->getMemoryVT(), dl, Ops,
- Gather->getMemOperand(),
- Gather->getIndexType());
- SDValue Chain = Res.getValue(1);
- if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- Results.push_back(Chain);
- return;
- }
- }
return;
}
case ISD::LOAD: {
// cast since type legalization will try to use an i64 load.
MVT VT = N->getSimpleValueType(0);
assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT");
- if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
- return;
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
if (!ISD::isNON_EXTLoad(N))
return;
auto *Ld = cast<LoadSDNode>(N);
Ld->getPointerInfo(), Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
SDValue Chain = Res.getValue(1);
- MVT WideVT = MVT::getVectorVT(LdVT, 2);
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
- MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() * 2);
- Res = DAG.getBitcast(CastVT, Res);
+ MVT VecVT = MVT::getVectorVT(LdVT, 2);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Res);
+ EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
+ Res = DAG.getBitcast(WideVT, Res);
Results.push_back(Res);
Results.push_back(Chain);
return;
return HAddSub;
}
- // During Type Legalization, when promoting illegal vector types,
- // the backend might introduce new shuffle dag nodes and bitcasts.
- //
- // This code performs the following transformation:
- // fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
- // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
- //
- // We do this only if both the bitcast and the BINOP dag nodes have
- // one use. Also, perform this transformation only if the new binary
- // operation is legal. This is to avoid introducing dag nodes that
- // potentially need to be further expanded (or custom lowered) into a
- // less optimal sequence of dag nodes.
- if (!ExperimentalVectorWideningLegalization &&
- !DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() &&
- N->getOpcode() == ISD::VECTOR_SHUFFLE &&
- N->getOperand(0).getOpcode() == ISD::BITCAST &&
- N->getOperand(1).isUndef() && N->getOperand(0).hasOneUse()) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- SDValue BC0 = N0.getOperand(0);
- EVT SVT = BC0.getValueType();
- unsigned Opcode = BC0.getOpcode();
- unsigned NumElts = VT.getVectorNumElements();
-
- if (BC0.hasOneUse() && SVT.isVector() &&
- SVT.getVectorNumElements() * 2 == NumElts &&
- TLI.isOperationLegal(Opcode, VT)) {
- bool CanFold = false;
- switch (Opcode) {
- default : break;
- case ISD::ADD:
- case ISD::SUB:
- case ISD::MUL:
- // isOperationLegal lies for integer ops on floating point types.
- CanFold = VT.isInteger();
- break;
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
- // isOperationLegal lies for floating point ops on integer types.
- CanFold = VT.isFloatingPoint();
- break;
- }
-
- unsigned SVTNumElts = SVT.getVectorNumElements();
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i)
- CanFold = SVOp->getMaskElt(i) == (int)(i * 2);
- for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i)
- CanFold = SVOp->getMaskElt(i) < 0;
-
- if (CanFold) {
- SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0));
- SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1));
- SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
- return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, SVOp->getMask());
- }
- }
- }
-
// Attempt to combine into a vector load/broadcast.
if (SDValue LD = combineToConsecutiveLoads(VT, N, dl, DAG, Subtarget, true))
return LD;
}
}
-
- // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
- // operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
- // FIXME: This can probably go away once we default to widening legalization.
- if (!ExperimentalVectorWideningLegalization &&
- Subtarget.hasSSE41() && VT == MVT::v4i32 &&
- N->getOpcode() == ISD::VECTOR_SHUFFLE &&
- N->getOperand(0).getOpcode() == ISD::BITCAST &&
- N->getOperand(0).getOperand(0).getOpcode() == X86ISD::PMULUDQ) {
- SDValue BC = N->getOperand(0);
- SDValue MULUDQ = BC.getOperand(0);
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- ArrayRef<int> Mask = SVOp->getMask();
- if (BC.hasOneUse() && MULUDQ.hasOneUse() &&
- Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) {
- SDValue Op0 = MULUDQ.getOperand(0);
- SDValue Op1 = MULUDQ.getOperand(1);
- if (Op0.getOpcode() == ISD::BITCAST &&
- Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
- Op0.getOperand(0).getValueType() == MVT::v4i32) {
- ShuffleVectorSDNode *SVOp0 =
- cast<ShuffleVectorSDNode>(Op0.getOperand(0));
- ArrayRef<int> Mask2 = SVOp0->getMask();
- if (Mask2[0] == 0 && Mask2[1] == -1 &&
- Mask2[2] == 1 && Mask2[3] == -1) {
- Op0 = SVOp0->getOperand(0);
- Op1 = DAG.getBitcast(MVT::v4i32, Op1);
- Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask);
- return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1);
- }
- }
- if (Op1.getOpcode() == ISD::BITCAST &&
- Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
- Op1.getOperand(0).getValueType() == MVT::v4i32) {
- ShuffleVectorSDNode *SVOp1 =
- cast<ShuffleVectorSDNode>(Op1.getOperand(0));
- ArrayRef<int> Mask2 = SVOp1->getMask();
- if (Mask2[0] == 0 && Mask2[1] == -1 &&
- Mask2[2] == 1 && Mask2[3] == -1) {
- Op0 = DAG.getBitcast(MVT::v4i32, Op0);
- Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask);
- Op1 = SVOp1->getOperand(0);
- return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1);
- }
- }
- }
- }
-
return SDValue();
}
SDLoc DL(ExtElt);
- if (ExperimentalVectorWideningLegalization && VecVT == MVT::v8i8) {
+ if (VecVT == MVT::v8i8) {
// Pad with undef.
Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx,
DAG.getUNDEF(VecVT));
// Since SKX these selects have a proper lowering.
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1 &&
- (ExperimentalVectorWideningLegalization ||
- VT.getVectorNumElements() > 4) &&
(VT.getVectorElementType() == MVT::i8 ||
VT.getVectorElementType() == MVT::i16)) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
if ((NumElts % 2) != 0)
return SDValue();
- unsigned RegSize = 128;
- MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16);
EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
// Shrink the operands of mul.
SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0);
SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1);
- if (ExperimentalVectorWideningLegalization ||
- NumElts >= OpsVT.getVectorNumElements()) {
- // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
- // lower part is needed.
- SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
- if (Mode == MULU8 || Mode == MULS8)
- return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
- DL, VT, MulLo);
-
- MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
- // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
- // the higher part is also needed.
- SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
- ReducedVT, NewN0, NewN1);
-
- // Repack the lower part and higher part result of mul into a wider
- // result.
- // Generate shuffle functioning as punpcklwd.
- SmallVector<int, 16> ShuffleMask(NumElts);
- for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
- ShuffleMask[2 * i] = i;
- ShuffleMask[2 * i + 1] = i + NumElts;
- }
- SDValue ResLo =
- DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
- ResLo = DAG.getBitcast(ResVT, ResLo);
- // Generate shuffle functioning as punpckhwd.
- for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
- ShuffleMask[2 * i] = i + NumElts / 2;
- ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
- }
- SDValue ResHi =
- DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
- ResHi = DAG.getBitcast(ResVT, ResHi);
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
- }
-
- // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want
- // to legalize the mul explicitly because implicit legalization for type
- // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack
- // instructions which will not exist when we explicitly legalize it by
- // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with
- // <4 x i16> undef).
- //
- // Legalize the operands of mul.
- // FIXME: We may be able to handle non-concatenated vectors by insertion.
- unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
- if ((RegSize % ReducedSizeInBits) != 0)
- return SDValue();
-
- SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
- DAG.getUNDEF(ReducedVT));
- Ops[0] = NewN0;
- NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
- Ops[0] = NewN1;
- NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
-
- if (Mode == MULU8 || Mode == MULS8) {
- // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower
- // part is needed.
- SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
-
- // convert the type of mul result to VT.
- MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
- SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG
- : ISD::SIGN_EXTEND_VECTOR_INREG,
- DL, ResVT, Mul);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
- DAG.getIntPtrConstant(0, DL));
- }
+ // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
+ // lower part is needed.
+ SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
+ if (Mode == MULU8 || Mode == MULS8)
+ return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
+ DL, VT, MulLo);
- // Generate the lower and higher part of mul: pmulhw/pmulhuw. For
- // MULU16/MULS16, both parts are needed.
- SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
+ MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
+ // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
+ // the higher part is also needed.
SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
- OpsVT, NewN0, NewN1);
+ ReducedVT, NewN0, NewN1);
// Repack the lower part and higher part result of mul into a wider
- // result. Make sure the type of mul result is VT.
- MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
- SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi);
- Res = DAG.getBitcast(ResVT, Res);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
- DAG.getIntPtrConstant(0, DL));
+ // result.
+ // Generate shuffle functioning as punpcklwd.
+ SmallVector<int, 16> ShuffleMask(NumElts);
+ for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
+ ShuffleMask[2 * i] = i;
+ ShuffleMask[2 * i + 1] = i + NumElts;
+ }
+ SDValue ResLo =
+ DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
+ ResLo = DAG.getBitcast(ResVT, ResLo);
+ // Generate shuffle functioning as punpckhwd.
+ for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
+ ShuffleMask[2 * i] = i + NumElts / 2;
+ ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
+ }
+ SDValue ResHi =
+ DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
+ ResHi = DAG.getBitcast(ResVT, ResHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
}
static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
// Make sure the vXi16 type is legal. This covers the AVX512 without BWI case.
// Also allow v2i32 if it will be widened.
MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements());
- if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) ||
- DAG.getTargetLoweringInfo().isTypeLegal(WVT)))
+ if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(WVT))
return SDValue();
SDValue N0 = N->getOperand(0);
return Blend;
}
- if (ExperimentalVectorWideningLegalization)
- return SDValue();
-
- if (Mld->getExtensionType() != ISD::EXTLOAD)
- return SDValue();
-
- // Resolve extending loads.
- EVT VT = Mld->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
- EVT LdVT = Mld->getMemoryVT();
- SDLoc dl(Mld);
-
- assert(LdVT != VT && "Cannot extend to the same type");
- unsigned ToSz = VT.getScalarSizeInBits();
- unsigned FromSz = LdVT.getScalarSizeInBits();
- // From/To sizes and ElemCount must be pow of two.
- assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
- "Unexpected size for extending masked load");
-
- unsigned SizeRatio = ToSz / FromSz;
- assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
-
- // Create a type on which we perform the shuffle.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
- LdVT.getScalarType(), NumElems*SizeRatio);
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
-
- // Convert PassThru value.
- SDValue WidePassThru = DAG.getBitcast(WideVecVT, Mld->getPassThru());
- if (!Mld->getPassThru().isUndef()) {
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i] = i * SizeRatio;
-
- // Can't shuffle using an illegal type.
- assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
- "WideVecVT should be legal");
- WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru,
- DAG.getUNDEF(WideVecVT), ShuffleVec);
- }
-
- // Prepare the new mask.
- SDValue NewMask;
- SDValue Mask = Mld->getMask();
- if (Mask.getValueType() == VT) {
- // Mask and original value have the same type.
- NewMask = DAG.getBitcast(WideVecVT, Mask);
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i] = i * SizeRatio;
- for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i)
- ShuffleVec[i] = NumElems * SizeRatio;
- NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
- DAG.getConstant(0, dl, WideVecVT),
- ShuffleVec);
- } else {
- assert(Mask.getValueType().getVectorElementType() == MVT::i1);
- unsigned WidenNumElts = NumElems*SizeRatio;
- unsigned MaskNumElts = VT.getVectorNumElements();
- EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- WidenNumElts);
-
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
- SmallVector<SDValue, 16> Ops(NumConcat, ZeroVal);
- Ops[0] = Mask;
- NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
- }
-
- SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
- Mld->getBasePtr(), NewMask, WidePassThru,
- Mld->getMemoryVT(), Mld->getMemOperand(),
- ISD::NON_EXTLOAD);
-
- SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd);
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i * SizeRatio] = i;
-
- // Can't shuffle using an illegal type.
- assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
- "WideVecVT should be legal");
- SlicedVec = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
- DAG.getUNDEF(WideVecVT), ShuffleVec);
- SlicedVec = DAG.getBitcast(VT, SlicedVec);
-
- return DCI.CombineTo(N, SlicedVec, WideLd.getValue(1), true);
+ return SDValue();
}
/// If exactly one element of the mask is set for a non-truncating masked store,
return SDValue();
EVT VT = Mst->getValue().getValueType();
- EVT StVT = Mst->getMemoryVT();
SDLoc dl(Mst);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!Mst->isTruncatingStore()) {
- if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
- return ScalarStore;
-
- // If the mask value has been legalized to a non-boolean vector, try to
- // simplify ops leading up to it. We only demand the MSB of each lane.
- SDValue Mask = Mst->getMask();
- if (Mask.getScalarValueSizeInBits() != 1) {
- APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
- if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
- return SDValue(N, 0);
- }
-
- SDValue Value = Mst->getValue();
- if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&
- TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
- Mst->getMemoryVT())) {
- return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
- Mst->getBasePtr(), Mask,
- Mst->getMemoryVT(), Mst->getMemOperand(), true);
- }
-
- return SDValue();
- }
-
- if (ExperimentalVectorWideningLegalization)
+ if (Mst->isTruncatingStore())
return SDValue();
- // Resolve truncating stores.
- unsigned NumElems = VT.getVectorNumElements();
-
- assert(StVT != VT && "Cannot truncate to the same type");
- unsigned FromSz = VT.getScalarSizeInBits();
- unsigned ToSz = StVT.getScalarSizeInBits();
-
- // The truncating store is legal in some cases. For example
- // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
- // are designated for truncate store.
- // In this case we don't need any further transformations.
- if (TLI.isTruncStoreLegal(VT, StVT))
- return SDValue();
+ if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
+ return ScalarStore;
- // From/To sizes and ElemCount must be pow of two.
- assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
- "Unexpected size for truncating masked store");
- // We are going to use the original vector elt for storing.
- // Accumulated smaller vector elements must be a multiple of the store size.
- assert (((NumElems * FromSz) % ToSz) == 0 &&
- "Unexpected ratio for truncating masked store");
-
- unsigned SizeRatio = FromSz / ToSz;
- assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
-
- // Create a type on which we perform the shuffle.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
- StVT.getScalarType(), NumElems*SizeRatio);
-
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
-
- SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue());
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i] = i * SizeRatio;
-
- // Can't shuffle using an illegal type.
- assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
- "WideVecVT should be legal");
-
- SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
- DAG.getUNDEF(WideVecVT),
- ShuffleVec);
-
- SDValue NewMask;
+ // If the mask value has been legalized to a non-boolean vector, try to
+ // simplify ops leading up to it. We only demand the MSB of each lane.
SDValue Mask = Mst->getMask();
- if (Mask.getValueType() == VT) {
- // Mask and original value have the same type.
- NewMask = DAG.getBitcast(WideVecVT, Mask);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i] = i * SizeRatio;
- for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
- ShuffleVec[i] = NumElems*SizeRatio;
- NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
- DAG.getConstant(0, dl, WideVecVT),
- ShuffleVec);
- } else {
- assert(Mask.getValueType().getVectorElementType() == MVT::i1);
- unsigned WidenNumElts = NumElems*SizeRatio;
- unsigned MaskNumElts = VT.getVectorNumElements();
- EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- WidenNumElts);
+ if (Mask.getScalarValueSizeInBits() != 1) {
+ APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
+ return SDValue(N, 0);
+ }
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
- SmallVector<SDValue, 16> Ops(NumConcat, ZeroVal);
- Ops[0] = Mask;
- NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
+ SDValue Value = Mst->getValue();
+ if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ Mst->getMemoryVT())) {
+ return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
+ Mst->getBasePtr(), Mask,
+ Mst->getMemoryVT(), Mst->getMemOperand(), true);
}
- return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal,
- Mst->getBasePtr(), NewMask, StVT,
- Mst->getMemOperand(), false);
+ return SDValue();
}
static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
MVT::v16i8, St->getMemOperand());
}
- // Look for a truncating store to a less than 128 bit vector that has been
- // truncated from an any_extend_inreg from a 128 bit vector with the same
- // element size. We can use a 64/32/16-bit extractelement and store that.
- // Disabling this when widening legalization is in effect since the trunc
- // store would have been unlikely to be created in that case. Only doing this
- // when truncstore is legal since it would otherwise be decomposed below and
- // then combined away.
- if (St->isTruncatingStore() && TLI.isTruncStoreLegal(VT, StVT) &&
- StoredVal.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
- StoredVal.getValueType().is128BitVector() &&
- !ExperimentalVectorWideningLegalization) {
- EVT OrigVT = StoredVal.getOperand(0).getValueType();
- if (OrigVT.is128BitVector() &&
- OrigVT.getVectorElementType() == StVT.getVectorElementType()) {
- unsigned StoreSize = StVT.getSizeInBits();
- assert((128 % StoreSize == 0) && "Unexpected store size!");
- MVT IntVT = MVT::getIntegerVT(StoreSize);
- MVT CastVT = MVT::getVectorVT(IntVT, 128 / StoreSize);
- StoredVal = DAG.getBitcast(CastVT, StoredVal.getOperand(0));
- // Use extract_store for the 64-bit case to support 32-bit targets.
- if (IntVT == MVT::i64) {
- SDVTList Tys = DAG.getVTList(MVT::Other);
- SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()};
- return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops,
- IntVT, St->getMemOperand());
- }
-
- // Otherwise just use an extract and store.
- StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, IntVT, StoredVal,
- DAG.getIntPtrConstant(0, dl));
- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
- St->getMemOperand());
- }
- }
-
// Optimize trunc store (of multiple scalars) to shuffle and store.
// First, pack all of the elements in one place. Next, store to memory
// in fewer chunks.
St->getMemoryVT(), St->getMemOperand(), DAG);
}
- if (ExperimentalVectorWideningLegalization)
- return SDValue();
-
- unsigned NumElems = VT.getVectorNumElements();
- assert(StVT != VT && "Cannot truncate to the same type");
- unsigned FromSz = VT.getScalarSizeInBits();
- unsigned ToSz = StVT.getScalarSizeInBits();
-
- // The truncating store is legal in some cases. For example
- // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
- // are designated for truncate store.
- // In this case we don't need any further transformations.
- if (TLI.isTruncStoreLegalOrCustom(VT, StVT))
- return SDValue();
-
- // From, To sizes and ElemCount must be pow of two
- if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
- // We are going to use the original vector elt for storing.
- // Accumulated smaller vector elements must be a multiple of the store size.
- if (0 != (NumElems * FromSz) % ToSz) return SDValue();
-
- unsigned SizeRatio = FromSz / ToSz;
-
- assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
-
- // Create a type on which we perform the shuffle
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
- StVT.getScalarType(), NumElems*SizeRatio);
-
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
-
- SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue());
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i] = i * SizeRatio;
-
- // Can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT))
- return SDValue();
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
- DAG.getUNDEF(WideVecVT),
- ShuffleVec);
- // At this point all of the data is stored at the bottom of the
- // register. We now need to save it to mem.
-
- // Find the largest store unit
- MVT StoreType = MVT::i8;
- for (MVT Tp : MVT::integer_valuetypes()) {
- if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz)
- StoreType = Tp;
- }
-
- // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
- if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 &&
- (64 <= NumElems * ToSz))
- StoreType = MVT::f64;
-
- // Bitcast the original vector into a vector of store-size units
- EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
- StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
- assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff);
- SmallVector<SDValue, 8> Chains;
- SDValue Ptr = St->getBasePtr();
-
- // Perform one or more big stores into memory.
- for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
- SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- StoreType, ShuffWide,
- DAG.getIntPtrConstant(i, dl));
- SDValue Ch =
- DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(),
- St->getAlignment(), St->getMemOperand()->getFlags());
- Ptr = DAG.getMemBasePlusOffset(Ptr, StoreType.getStoreSize(), dl);
- Chains.push_back(Ch);
- }
-
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ return SDValue();
}
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
bool F64IsLegal =
!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
- if (((VT.isVector() && !VT.isFloatingPoint() &&
- !ExperimentalVectorWideningLegalization) ||
- (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) &&
+ if ((VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit()) &&
isa<LoadSDNode>(St->getValue()) &&
cast<LoadSDNode>(St->getValue())->isSimple() &&
St->getChain().hasOneUse() && St->isSimple()) {
// Only handle vXi16 types that are at least 128-bits unless they will be
// widened.
- if (!VT.isVector() || VT.getVectorElementType() != MVT::i16 ||
- (!ExperimentalVectorWideningLegalization &&
- VT.getVectorNumElements() < 8))
+ if (!VT.isVector() || VT.getVectorElementType() != MVT::i16)
return SDValue();
// Input type should be vXi32.
DAG.getConstant(EltSizeInBits - 1, DL, VT));
}
-/// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or
-/// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating
-/// with UNDEFs) of the input to vectors of the same size as the target type
-/// which then extends the lowest elements.
-static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- if (ExperimentalVectorWideningLegalization)
- return SDValue();
-
- unsigned Opcode = N->getOpcode();
- // TODO - add ANY_EXTEND support.
- if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND)
- return SDValue();
- if (!DCI.isBeforeLegalizeOps())
- return SDValue();
- if (!Subtarget.hasSSE2())
- return SDValue();
-
- SDValue N0 = N->getOperand(0);
- EVT VT = N->getValueType(0);
- EVT SVT = VT.getScalarType();
- EVT InVT = N0.getValueType();
- EVT InSVT = InVT.getScalarType();
-
- // FIXME: Generic DAGCombiner previously had a bug that would cause a
- // sign_extend of setcc to sometimes return the original node and tricked it
- // into thinking CombineTo was used which prevented the target combines from
- // running.
- // Earlying out here to avoid regressions like this
- // (v4i32 (sext (v4i1 (setcc (v4i16)))))
- // Becomes
- // (v4i32 (sext_invec (v8i16 (concat (v4i16 (setcc (v4i16))), undef))))
- // Type legalized to
- // (v4i32 (sext_invec (v8i16 (trunc_invec (v4i32 (setcc (v4i32)))))))
- // Leading to a packssdw+pmovsxwd
- // We could write a DAG combine to fix this, but really we shouldn't be
- // creating sext_invec that's forcing v8i16 into the DAG.
- if (N0.getOpcode() == ISD::SETCC)
- return SDValue();
-
- // Input type must be a vector and we must be extending legal integer types.
- if (!VT.isVector() || VT.getVectorNumElements() < 2)
- return SDValue();
- if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
- return SDValue();
- if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
- return SDValue();
-
- // If the input/output types are both legal then we have at least AVX1 and
- // we will be able to use SIGN_EXTEND/ZERO_EXTEND directly.
- if (DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
- DAG.getTargetLoweringInfo().isTypeLegal(InVT))
- return SDValue();
-
- SDLoc DL(N);
-
- auto ExtendVecSize = [&DAG](const SDLoc &DL, SDValue N, unsigned Size) {
- EVT SrcVT = N.getValueType();
- EVT DstVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
- Size / SrcVT.getScalarSizeInBits());
- SmallVector<SDValue, 8> Opnds(Size / SrcVT.getSizeInBits(),
- DAG.getUNDEF(SrcVT));
- Opnds[0] = N;
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Opnds);
- };
-
- // If target-size is less than 128-bits, extend to a type that would extend
- // to 128 bits, extend that and extract the original target vector.
- if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits())) {
- unsigned Scale = 128 / VT.getSizeInBits();
- EVT ExVT =
- EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
- SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
- SDValue SExt = DAG.getNode(Opcode, DL, ExVT, Ex);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
- DAG.getIntPtrConstant(0, DL));
- }
-
- // If target-size is 128-bits (or 256-bits on AVX target), then convert to
- // ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT.
- // Also use this if we don't have SSE41 to allow the legalizer do its job.
- if (!Subtarget.hasSSE41() || VT.is128BitVector() ||
- (VT.is256BitVector() && Subtarget.hasAVX()) ||
- (VT.is512BitVector() && Subtarget.useAVX512Regs())) {
- SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
- Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode);
- return DAG.getNode(Opcode, DL, VT, ExOp);
- }
-
- auto SplitAndExtendInReg = [&](unsigned SplitSize) {
- unsigned NumVecs = VT.getSizeInBits() / SplitSize;
- unsigned NumSubElts = SplitSize / SVT.getSizeInBits();
- EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
- EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
-
- unsigned IROpc = getOpcode_EXTEND_VECTOR_INREG(Opcode);
- SmallVector<SDValue, 8> Opnds;
- for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
- SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
- DAG.getIntPtrConstant(Offset, DL));
- SrcVec = ExtendVecSize(DL, SrcVec, SplitSize);
- SrcVec = DAG.getNode(IROpc, DL, SubVT, SrcVec);
- Opnds.push_back(SrcVec);
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
- };
-
- // On pre-AVX targets, split into 128-bit nodes of
- // ISD::*_EXTEND_VECTOR_INREG.
- if (!Subtarget.hasAVX() && !(VT.getSizeInBits() % 128))
- return SplitAndExtendInReg(128);
-
- // On pre-AVX512 targets, split into 256-bit nodes of
- // ISD::*_EXTEND_VECTOR_INREG.
- if (!Subtarget.useAVX512Regs() && !(VT.getSizeInBits() % 256))
- return SplitAndExtendInReg(256);
-
- return SDValue();
-}
-
// Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm
// result type.
static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT));
}
- if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
- return V;
-
if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
return V;
if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
return V;
- if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
- return V;
-
if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
return V;
// go through type promotion to a 128-bit vector.
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
VT.getVectorElementType() == MVT::i1 &&
- (ExperimentalVectorWideningLegalization ||
- VT.getVectorNumElements() > 4) &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16)) {
SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS,
// result to v2i32 which will be removed by type legalization. If we/ widen
// narrow vectors then we bitcast to v4i32 and extract v2i32.
MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
- if (ExperimentalVectorWideningLegalization ||
- VT.getSizeInBits() >= ResVT.getSizeInBits())
- Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
- else
- Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
+ Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
// Fill the upper elements with zero to match the add width.
SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, DL, ResVT));
Ops[0] = Sad;
Sad = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
- } else if (ExperimentalVectorWideningLegalization &&
- VT.getSizeInBits() < ResVT.getSizeInBits()) {
+ } else if (VT.getSizeInBits() < ResVT.getSizeInBits()) {
Sad = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Sad,
DAG.getIntPtrConstant(0, DL));
}
}
}
- // Combine (ext_invec (ext_invec X)) -> (ext_invec X)
- // Disabling for widening legalization for now. We can enable if we find a
- // case that needs it. Otherwise it can be deleted when we switch to
- // widening legalization.
- if (!ExperimentalVectorWideningLegalization &&
- In.getOpcode() == N->getOpcode() &&
- TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getOperand(0).getValueType()))
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0));
-
// Attempt to combine as a shuffle.
// TODO: SSE41 support
if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) {