Summary:
This patch adds support for scalable vectors in intrinsics, enabling
intrinsics such as the following to be defined:
declare <vscale x 4 x i32> @llvm.something.nxv4i32(<vscale x 4 x i32>)
Support for this is implemented by defining a new type descriptor for
scalable vectors and adding mangling support for scalable vector types
in the name mangling scheme used by 'any' types in intrinsic signatures.
Tests have been added for IRBuilder to test scalable vectors work as
expected when using intrinsics through this interface. This required
implementing an intrinsic that is explicitly defined with scalable
vectors, e.g. LLVMType<nxv4i32>, an SVE floating-point convert
intrinsic was used for this. The behaviour of the overloaded type
LLVMScalarOrSameVectorWidth with scalable vectors is tested using the
existing masked load intrinsic. Also added an .ll test to test the
Verifier catches a bad intrinsic argument when passing a fixed-width
predicate (mask) to the masked.load intrinsic where a scalable is
expected.
Patch by Paul Walker
Reviewed By: sdesmalen
Differential Revision: https://reviews.llvm.org/D65930
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370053
91177308-0d34-0410-b5e6-
96231b3b80d8
Integer, Vector, Pointer, Struct,
Argument, ExtendArgument, TruncArgument, HalfVecArgument,
SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt,
- VecElementArgument
+ VecElementArgument, ScalableVecArgument
} Kind;
union {
Intrinsic<[llvm_i64_ty], [],
[IntrNoMem, IntrHasSideEffects]>;
}
+
+//===----------------------------------------------------------------------===//
+// SVE
+
+def llvm_nxv2i1_ty : LLVMType<nxv2i1>;
+def llvm_nxv4i1_ty : LLVMType<nxv4i1>;
+def llvm_nxv8i1_ty : LLVMType<nxv8i1>;
+def llvm_nxv16i1_ty : LLVMType<nxv16i1>;
+def llvm_nxv16i8_ty : LLVMType<nxv16i8>;
+def llvm_nxv4i32_ty : LLVMType<nxv4i32>;
+def llvm_nxv2i64_ty : LLVMType<nxv2i64>;
+def llvm_nxv8f16_ty : LLVMType<nxv8f16>;
+def llvm_nxv4f32_ty : LLVMType<nxv4f32>;
+def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
+
+let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
+ // This class of intrinsics are not intended to be useful within LLVM IR but
+ // are instead here to support some of the more regid parts of the ACLE.
+ class Builtin_SVCVT<string name, LLVMType OUT, LLVMType IN>
+ : GCCBuiltin<"__builtin_sve_" # name>,
+ Intrinsic<[OUT], [OUT, llvm_nxv16i1_ty, IN], [IntrNoMem]>;
+}
+
+//
+// Floating-point comparisons
+//
+
+def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
bool operator==(const ElementCount& RHS) const {
return Min == RHS.Min && Scalable == RHS.Scalable;
}
+ bool operator!=(const ElementCount& RHS) const {
+ return !(*this == RHS);
+ }
};
} // end namespace llvm
Result += "vararg";
// Ensure nested function types are distinguishable.
Result += "f";
- } else if (isa<VectorType>(Ty)) {
- Result += "v" + utostr(Ty->getVectorNumElements()) +
- getMangledTypeStr(Ty->getVectorElementType());
+ } else if (VectorType* VTy = dyn_cast<VectorType>(Ty)) {
+ if (VTy->isScalable())
+ Result += "nx";
+ Result += "v" + utostr(VTy->getVectorNumElements()) +
+ getMangledTypeStr(VTy->getVectorElementType());
} else if (Ty) {
switch (Ty->getTypeID()) {
default: llvm_unreachable("Unhandled type");
IIT_STRUCT7 = 39,
IIT_STRUCT8 = 40,
IIT_F128 = 41,
- IIT_VEC_ELEMENT = 42
+ IIT_VEC_ELEMENT = 42,
+ IIT_SCALABLE_VEC = 43
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
ArgInfo));
return;
}
+ case IIT_SCALABLE_VEC: {
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::ScalableVecArgument,
+ 0));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ }
}
llvm_unreachable("unhandled");
}
Type *EltTy = DecodeFixedType(Infos, Tys, Context);
Type *Ty = Tys[D.getArgumentNumber()];
if (auto *VTy = dyn_cast<VectorType>(Ty))
- return VectorType::get(EltTy, VTy->getNumElements());
+ return VectorType::get(EltTy, VTy->getElementCount());
return EltTy;
}
case IITDescriptor::PtrToArgument: {
case IITDescriptor::VecOfAnyPtrsToElt:
// Return the overloaded type (which determines the pointers address space)
return Tys[D.getOverloadArgNumber()];
+ case IITDescriptor::ScalableVecArgument: {
+ Type *Ty = DecodeFixedType(Infos, Tys, Context);
+ return VectorType::get(Ty->getVectorElementType(),
+ { Ty->getVectorNumElements(), true });
+ }
}
llvm_unreachable("unhandled");
}
return true;
Type *EltTy = Ty;
if (ThisArgType) {
- if (ReferenceType->getVectorNumElements() !=
- ThisArgType->getVectorNumElements())
+ if (ReferenceType->getElementCount() !=
+ ThisArgType->getElementCount())
return true;
EltTy = ThisArgType->getVectorElementType();
}
auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
return !ReferenceType || Ty != ReferenceType->getElementType();
}
+ case IITDescriptor::ScalableVecArgument: {
+ VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (!VTy || !VTy->isScalable())
+ return true;
+ return matchIntrinsicType(VTy, Infos, ArgTys, DeferredChecks,
+ IsDeferredCheck);
+ }
}
llvm_unreachable("unhandled");
}
--- /dev/null
+; RUN: not opt -S -verify 2>&1 < %s | FileCheck %s
+
+; CHECK: Intrinsic has incorrect argument type!
+; CHECK-NEXT: <vscale x 4 x i32> (<vscale x 4 x i32>*, i32, <4 x i1>, <vscale x 4 x i32>)* @llvm.masked.load.nxv4i32.p0nxv4i32
+
+define <vscale x 4 x i32> @masked_load(<vscale x 4 x i32>* %addr, <4 x i1> %mask, <vscale x 4 x i32> %dst) {
+ %res = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %addr, i32 4, <4 x i1> %mask, <vscale x 4 x i32> %dst)
+ ret <vscale x 4 x i32> %res
+}
+declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>*, i32, <4 x i1>, <vscale x 4 x i32>)
EXPECT_FALSE(II->hasNoNaNs());
}
+TEST_F(IRBuilderTest, IntrinsicsWithScalableVectors) {
+ IRBuilder<> Builder(BB);
+ CallInst *Call;
+ FunctionType *FTy;
+
+ // Test scalable flag isn't dropped for intrinsic that is explicitly defined
+ // with scalable vectors, e.g. LLVMType<nxv4i32>.
+ Type *SrcVecTy = VectorType::get(Builder.getHalfTy(), 8, true);
+ Type *DstVecTy = VectorType::get(Builder.getInt32Ty(), 4, true);
+ Type *PredTy = VectorType::get(Builder.getInt1Ty(), 16, true);
+
+ SmallVector<Value*, 3> ArgTys;
+ ArgTys.push_back(UndefValue::get(DstVecTy));
+ ArgTys.push_back(UndefValue::get(PredTy));
+ ArgTys.push_back(UndefValue::get(SrcVecTy));
+
+ Call = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fcvtzs_i32f16, {},
+ ArgTys, nullptr, "aarch64.sve.fcvtzs.i32f16");
+ FTy = Call->getFunctionType();
+ EXPECT_EQ(FTy->getReturnType(), DstVecTy);
+ for (unsigned i = 0; i != ArgTys.size(); ++i)
+ EXPECT_EQ(FTy->getParamType(i), ArgTys[i]->getType());
+
+ // Test scalable flag isn't dropped for intrinsic defined with
+ // LLVMScalarOrSameVectorWidth.
+
+ Type *VecTy = VectorType::get(Builder.getInt32Ty(), 4, true);
+ Type *PtrToVecTy = VecTy->getPointerTo();
+ PredTy = VectorType::get(Builder.getInt1Ty(), 4, true);
+
+ ArgTys.clear();
+ ArgTys.push_back(UndefValue::get(PtrToVecTy));
+ ArgTys.push_back(UndefValue::get(Builder.getInt32Ty()));
+ ArgTys.push_back(UndefValue::get(PredTy));
+ ArgTys.push_back(UndefValue::get(VecTy));
+
+ Call = Builder.CreateIntrinsic(Intrinsic::masked_load,
+ {VecTy, PtrToVecTy}, ArgTys,
+ nullptr, "masked.load");
+ FTy = Call->getFunctionType();
+ EXPECT_EQ(FTy->getReturnType(), VecTy);
+ for (unsigned i = 0; i != ArgTys.size(); ++i)
+ EXPECT_EQ(FTy->getParamType(i), ArgTys[i]->getType());
+}
+
TEST_F(IRBuilderTest, ConstrainedFP) {
IRBuilder<> Builder(BB);
Value *V;
IIT_STRUCT7 = 39,
IIT_STRUCT8 = 40,
IIT_F128 = 41,
- IIT_VEC_ELEMENT = 42
+ IIT_VEC_ELEMENT = 42,
+ IIT_SCALABLE_VEC = 43
};
static void EncodeFixedValueType(MVT::SimpleValueType VT,
if (MVT(VT).isVector()) {
MVT VVT = VT;
+ if (VVT.isScalableVector())
+ Sig.push_back(IIT_SCALABLE_VEC);
switch (VVT.getVectorNumElements()) {
default: PrintFatalError("unhandled vector type width in intrinsic!");
case 1: Sig.push_back(IIT_V1); break;