/// than the vector element type, and is implicitly truncated to it.
SCALAR_TO_VECTOR,
+ /// SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL
+ /// duplicated in all lanes. The type of the operand must match the vector
+ /// element type, except when they are integer types. In this case the
+ /// operand is allowed to be wider than the vector element type, and is
+ /// implicitly truncated to it.
+ SPLAT_VECTOR,
+
/// MULHU/MULHS - Multiply high - Multiply two integers of type iN,
/// producing an unsigned/signed value of type i[2*N], then return the top
/// part.
}
}
+ // A splat of a single element is a SPLAT_VECTOR if supported on the target.
+ if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
+ if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ assert(!V.isUndef() && "Splat of undef should have been handled earlier");
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
+ }
+
// Check if we can express BUILD VECTOR via subvector extract.
if (!LegalTypes && (N->getNumOperands() > 1)) {
SDValue Op0 = N->getOperand(0);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
const SDLoc &dl, SDValue ChainIn);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSPLAT_VECTOR(SDNode *Node);
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
SmallVectorImpl<SDValue> &Results);
return ExpandVectorBuildThroughStack(Node);
}
+SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue SplatVal = Node->getOperand(0);
+
+ return DAG.getSplatBuildVector(VT, DL, SplatVal);
+}
+
// Expand a node into a call to a libcall. If the result value
// does not fit into a register, return the lo part and set the hi part to the
// by-reg argument. If it does fit into a single register, return the result
case ISD::BUILD_VECTOR:
Results.push_back(ExpandBUILD_VECTOR(Node));
break;
+ case ISD::SPLAT_VECTOR:
+ Results.push_back(ExpandSPLAT_VECTOR(Node));
+ break;
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: {
Res = PromoteIntRes_BUILD_VECTOR(N); break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ Res = PromoteIntRes_SPLAT_VECTOR(N); break;
case ISD::CONCAT_VECTORS:
Res = PromoteIntRes_CONCAT_VECTORS(N); break;
Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ Res = PromoteIntOp_SPLAT_VECTOR(N); break;
case ISD::VSELECT:
case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
GetPromotedInteger(N->getOperand(0))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) {
+ // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the
+ // operand in place.
+ return SDValue(
+ DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
assert(OpNo == 0 && "Only know how to promote the condition!");
SDValue Cond = N->getOperand(0);
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue SplatVal = N->getOperand(0);
+
+ assert(!SplatVal.getValueType().isVector() && "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+ EVT NOutElemVT = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal);
+
+ return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
SDLoc dl(N);
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
+ Constant *MaskV = cast<Constant>(I.getOperand(2));
SDLoc DL = getCurSDLoc();
-
- SmallVector<int, 8> Mask;
- ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
- unsigned MaskNumElts = Mask.size();
-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ if (MaskV->isNullValue() && VT.isScalableVector()) {
+ // Canonical splat form of first element of first input vector.
+ SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ SrcVT.getScalarType(), Src1,
+ DAG.getConstant(0, DL,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
+ return;
+ }
+
+ // For now, we only handle splats for scalable vectors.
+ // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
+ // for targets that support a SPLAT_VECTOR for non-scalable vector types.
+ assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
+
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(MaskV, Mask);
+ unsigned MaskNumElts = Mask.size();
+
if (SrcNumElts == MaskNumElts) {
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
return;
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::SPLAT_VECTOR: return "splat_vector";
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Expand);
}
// Constrained floating-point operations default to expand.
MVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
bool IsLegalWiderType = false;
+ bool IsScalable = VT.isScalableVector();
LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
switch (PreferredAction) {
- case TypePromoteInteger:
+ case TypePromoteInteger: {
+ MVT::SimpleValueType EndVT = IsScalable ?
+ MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE :
+ MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE;
// Try to promote the elements of integer vectors. If no legal
// promotion was found, fall through to the widen-vector method.
for (unsigned nVT = i + 1;
- nVT <= MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE; ++nVT) {
+ (MVT::SimpleValueType)nVT <= EndVT; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
- SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) {
+ SVT.getVectorNumElements() == NElts &&
+ SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
if (IsLegalWiderType)
break;
LLVM_FALLTHROUGH;
+ }
case TypeWidenVector:
if (isPowerOf2_32(NElts)) {
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
if (SVT.getVectorElementType() == EltVT
- && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
+ && SVT.getVectorNumElements() > NElts
+ && SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
}
+ if (Subtarget->hasSVE()) {
+ for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
+ if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1)
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ }
+ }
+
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
}
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SPLAT_VECTOR:
+ return LowerSPLAT_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::SRA:
return GenerateTBL(Op, ShuffleMask, DAG);
}
+SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ EVT ElemVT = VT.getScalarType();
+
+ SDValue SplatVal = Op.getOperand(0);
+
+ // Extend input splat value where needed to fit into a GPR (32b or 64b only)
+ // FPRs don't have this restriction.
+ switch (ElemVT.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ case MVT::i16:
+ SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
+ break;
+ case MVT::i64:
+ SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
+ break;
+ case MVT::i32:
+ // Fine as is
+ break;
+ // TODO: we can support splats of i1s and float types, but haven't added
+ // patterns yet.
+ case MVT::i1:
+ case MVT::f16:
+ case MVT::f32:
+ case MVT::f64:
+ default:
+ llvm_unreachable("Unsupported SPLAT_VECTOR input operand type");
+ break;
+ }
+
+ return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
+}
+
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
APInt &UndefBits) {
EVT VT = BVN->getValueType(0);
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">;
// Splat scalar register (unpredicated, GPR or vector + element index)
- defm DUP_ZR : sve_int_perm_dup_r<"dup">;
+ defm DUP_ZR : sve_int_perm_dup_r<"dup", AArch64dup>;
defm DUP_ZZI : sve_int_perm_dup_i<"dup">;
// Splat scalar register (predicated)
//===----------------------------------------------------------------------===//
class sve_int_perm_dup_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
- RegisterClass srcRegType>
+ ValueType vt, RegisterClass srcRegType,
+ SDPatternOperator op>
: I<(outs zprty:$Zd), (ins srcRegType:$Rn),
asm, "\t$Zd, $Rn",
"",
- []>, Sched<[]> {
+ [(set (vt zprty:$Zd), (op srcRegType:$Rn))]>, Sched<[]> {
bits<5> Rn;
bits<5> Zd;
let Inst{31-24} = 0b00000101;
let Inst{4-0} = Zd;
}
-multiclass sve_int_perm_dup_r<string asm> {
- def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, GPR32sp>;
- def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, GPR32sp>;
- def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, GPR32sp>;
- def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, GPR64sp>;
+multiclass sve_int_perm_dup_r<string asm, SDPatternOperator op> {
+ def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, nxv16i8, GPR32sp, op>;
+ def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, nxv8i16, GPR32sp, op>;
+ def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, nxv4i32, GPR32sp, op>;
+ def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, nxv2i64, GPR64sp, op>;
def : InstAlias<"mov $Zd, $Rn",
(!cast<Instruction>(NAME # _B) ZPR8:$Zd, GPR32sp:$Rn), 1>;
--- /dev/null
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;; Splats of legal integer vector types
+
+define <vscale x 16 x i8> @sve_splat_16xi8(i8 %val) {
+; CHECK-LABEL: @sve_splat_16xi8
+; CHECK: mov z0.b, w0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
+ %splat = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ ret <vscale x 16 x i8> %splat
+}
+
+define <vscale x 8 x i16> @sve_splat_8xi16(i16 %val) {
+; CHECK-LABEL: @sve_splat_8xi16
+; CHECK: mov z0.h, w0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x i16> %splat
+}
+
+define <vscale x 4 x i32> @sve_splat_4xi32(i32 %val) {
+; CHECK-LABEL: @sve_splat_4xi32
+; CHECK: mov z0.s, w0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0
+ %splat = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x i32> %splat
+}
+
+define <vscale x 2 x i64> @sve_splat_2xi64(i64 %val) {
+; CHECK-LABEL: @sve_splat_2xi64
+; CHECK: mov z0.d, x0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0
+ %splat = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x i64> %splat
+}
+
+;; Promote splats of smaller illegal integer vector types
+
+define <vscale x 2 x i8> @sve_splat_2xi8(i8 %val) {
+; CHECK-LABEL: @sve_splat_2xi8
+; CHECK: mov z0.d, x0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 2 x i8> undef, i8 %val, i32 0
+ %splat = shufflevector <vscale x 2 x i8> %ins, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x i8> %splat
+}
+
+define <vscale x 4 x i8> @sve_splat_4xi8(i8 %val) {
+; CHECK-LABEL: @sve_splat_4xi8
+; CHECK: mov z0.s, w0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 4 x i8> undef, i8 %val, i32 0
+ %splat = shufflevector <vscale x 4 x i8> %ins, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x i8> %splat
+}
+
+define <vscale x 8 x i8> @sve_splat_8xi8(i8 %val) {
+; CHECK-LABEL: @sve_splat_8xi8
+; CHECK: mov z0.h, w0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 8 x i8> undef, i8 %val, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %ins, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x i8> %splat
+}
+
+define <vscale x 2 x i16> @sve_splat_2xi16(i16 %val) {
+; CHECK-LABEL: @sve_splat_2xi16
+; CHECK: mov z0.d, x0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 2 x i16> undef, i16 %val, i32 0
+ %splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x i16> %splat
+}
+
+define <vscale x 4 x i16> @sve_splat_4xi16(i16 %val) {
+; CHECK-LABEL: @sve_splat_4xi16
+; CHECK: mov z0.s, w0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 4 x i16> undef, i16 %val, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %ins, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x i16> %splat
+}
+
+define <vscale x 2 x i32> @sve_splat_2xi32(i32 %val) {
+; CHECK-LABEL: @sve_splat_2xi32
+; CHECK: mov z0.d, x0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 2 x i32> undef, i32 %val, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %ins, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x i32> %splat
+}