From 2e22f29b92768ea65ac5c26d354226ecc7509311 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Thu, 14 Nov 2013 01:57:55 +0000 Subject: [PATCH] Implement AArch64 NEON instruction set AdvSIMD (table). git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194649 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 18 +- lib/CodeGen/CGBuiltin.cpp | 231 +++++++++++++++- test/CodeGen/aarch64-neon-tbl.c | 463 ++++++++++++++++++++++++++++++++ utils/TableGen/NeonEmitter.cpp | 39 ++- 4 files changed, 737 insertions(+), 14 deletions(-) create mode 100644 test/CodeGen/aarch64-neon-tbl.c diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index cfde22717f..91cb3eb308 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -183,7 +183,8 @@ class NoTestOpInst : Inst {} // y: scalar of float // o: scalar of double // k: default elt width, double num elts -// #: array of default vectors +// 2,3,4: array of default vectors +// B,C,D: array of default elts, force 'Q' size modifier. // p: pointer type // c: const pointer type @@ -814,6 +815,21 @@ def VZIP2 : SOpInst<"vzip2", "ddd", def VUZP2 : SOpInst<"vuzp2", "ddd", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP2>; +//////////////////////////////////////////////////////////////////////////////// +// Table lookup +let InstName = "vtbl" in { +def VQTBL1_A64 : WInst<"vqtbl1", "djt", "UccPcQUcQcQPc">; +def VQTBL2_A64 : WInst<"vqtbl2", "dBt", "UccPcQUcQcQPc">; +def VQTBL3_A64 : WInst<"vqtbl3", "dCt", "UccPcQUcQcQPc">; +def VQTBL4_A64 : WInst<"vqtbl4", "dDt", "UccPcQUcQcQPc">; +} +let InstName = "vtbx" in { +def VQTBX1_A64 : WInst<"vqtbx1", "ddjt", "UccPcQUcQcQPc">; +def VQTBX2_A64 : WInst<"vqtbx2", "ddBt", "UccPcQUcQcQPc">; +def VQTBX3_A64 : WInst<"vqtbx3", "ddCt", "UccPcQUcQcQPc">; +def VQTBX4_A64 : WInst<"vqtbx4", "ddDt", "UccPcQUcQcQPc">; +} + //////////////////////////////////////////////////////////////////////////////// // Scalar Arithmetic diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index d2f8662b54..d9ec259ca9 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2463,13 +2463,240 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, return CGF.Builder.CreateBitCast(Result, ResultType, s); } -Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef Ops, + Value *ExtOp, Value *IndexOp, + llvm::Type *ResTy, unsigned IntID, + const char *Name) { + SmallVector TblOps; + if (ExtOp) + TblOps.push_back(ExtOp); + + // Build a vector containing sequential number like (0, 1, 2, ..., 15) + SmallVector Indices; + llvm::VectorType *TblTy = cast(Ops[0]->getType()); + for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { + Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i)); + Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1)); + } + Value *SV = llvm::ConstantVector::get(Indices); + + int PairPos = 0, End = Ops.size() - 1; + while (PairPos < End) { + TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], + Ops[PairPos+1], SV, Name)); + PairPos += 2; + } + + // If there's an odd number of 64-bit lookup table, fill the high 64-bit + // of the 128-bit lookup table with zero. + if (PairPos == End) { + Value *ZeroTbl = ConstantAggregateZero::get(TblTy); + TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], + ZeroTbl, SV, Name)); + } + + TblTy = llvm::VectorType::get(TblTy->getElementType(), + 2*TblTy->getNumElements()); + llvm::Type *Tys[2] = { ResTy, TblTy }; + + Function *TblF; + TblOps.push_back(IndexOp); + TblF = CGF.CGM.getIntrinsic(IntID, Tys); + + return CGF.EmitNeonCall(TblF, TblOps, Name); +} + +static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, + unsigned BuiltinID, + const CallExpr *E) { + unsigned int Int = 0; + const char *s = NULL; + + unsigned TblPos; + switch (BuiltinID) { + default: + return 0; + case AArch64::BI__builtin_neon_vtbl1_v: + case AArch64::BI__builtin_neon_vqtbl1_v: + case AArch64::BI__builtin_neon_vqtbl1q_v: + case AArch64::BI__builtin_neon_vtbl2_v: + case AArch64::BI__builtin_neon_vqtbl2_v: + case AArch64::BI__builtin_neon_vqtbl2q_v: + case AArch64::BI__builtin_neon_vtbl3_v: + case AArch64::BI__builtin_neon_vqtbl3_v: + case AArch64::BI__builtin_neon_vqtbl3q_v: + case AArch64::BI__builtin_neon_vtbl4_v: + case AArch64::BI__builtin_neon_vqtbl4_v: + case AArch64::BI__builtin_neon_vqtbl4q_v: + TblPos = 0; + break; + case AArch64::BI__builtin_neon_vtbx1_v: + case AArch64::BI__builtin_neon_vqtbx1_v: + case AArch64::BI__builtin_neon_vqtbx1q_v: + case AArch64::BI__builtin_neon_vtbx2_v: + case AArch64::BI__builtin_neon_vqtbx2_v: + case AArch64::BI__builtin_neon_vqtbx2q_v: + case AArch64::BI__builtin_neon_vtbx3_v: + case AArch64::BI__builtin_neon_vqtbx3_v: + case AArch64::BI__builtin_neon_vqtbx3q_v: + case AArch64::BI__builtin_neon_vtbx4_v: + case AArch64::BI__builtin_neon_vqtbx4_v: + case AArch64::BI__builtin_neon_vqtbx4q_v: + TblPos = 1; + break; + } + + assert(E->getNumArgs() >= 3); + + // Get the last argument, which specifies the vector type. + llvm::APSInt Result; + const Expr *Arg = E->getArg(E->getNumArgs() - 1); + if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) + return 0; + + // Determine the type of this overloaded NEON intrinsic. + NeonTypeFlags Type(Result.getZExtValue()); + llvm::VectorType *VTy = GetNeonType(&CGF, Type); + llvm::Type *Ty = VTy; + if (!Ty) + return 0; + + SmallVector Ops; + for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { + Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); + } + + Arg = E->getArg(TblPos); + llvm::Type *TblTy = CGF.ConvertType(Arg->getType()); + llvm::VectorType *VTblTy = cast(TblTy); + llvm::Type *Tys[2] = { Ty, VTblTy }; + unsigned nElts = VTy->getNumElements(); + // AArch64 scalar builtins are not overloaded, they do not have an extra + // argument that specifies the vector type, need to handle each case. + SmallVector TblOps; + switch (BuiltinID) { + case AArch64::BI__builtin_neon_vtbl1_v: { + TblOps.push_back(Ops[0]); + return packTBLDVectorList(CGF, TblOps, 0, Ops[1], Ty, + Intrinsic::aarch64_neon_vtbl1, "vtbl1"); + } + case AArch64::BI__builtin_neon_vtbl2_v: { + TblOps.push_back(Ops[0]); + TblOps.push_back(Ops[1]); + return packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty, + Intrinsic::aarch64_neon_vtbl1, "vtbl1"); + } + case AArch64::BI__builtin_neon_vtbl3_v: { + TblOps.push_back(Ops[0]); + TblOps.push_back(Ops[1]); + TblOps.push_back(Ops[2]); + return packTBLDVectorList(CGF, TblOps, 0, Ops[3], Ty, + Intrinsic::aarch64_neon_vtbl2, "vtbl2"); + } + case AArch64::BI__builtin_neon_vtbl4_v: { + TblOps.push_back(Ops[0]); + TblOps.push_back(Ops[1]); + TblOps.push_back(Ops[2]); + TblOps.push_back(Ops[3]); + return packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty, + Intrinsic::aarch64_neon_vtbl2, "vtbl2"); + } + case AArch64::BI__builtin_neon_vtbx1_v: { + TblOps.push_back(Ops[1]); + Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty, + Intrinsic::aarch64_neon_vtbl1, "vtbl1"); + + llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); + Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); + Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); + CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); + + SmallVector BslOps; + BslOps.push_back(CmpRes); + BslOps.push_back(Ops[0]); + BslOps.push_back(TblRes); + Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); + return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); + } + case AArch64::BI__builtin_neon_vtbx2_v: { + TblOps.push_back(Ops[1]); + TblOps.push_back(Ops[2]); + return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, + Intrinsic::aarch64_neon_vtbx1, "vtbx1"); + } + case AArch64::BI__builtin_neon_vtbx3_v: { + TblOps.push_back(Ops[1]); + TblOps.push_back(Ops[2]); + TblOps.push_back(Ops[3]); + Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty, + Intrinsic::aarch64_neon_vtbl2, "vtbl2"); + + llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); + Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); + Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], + TwentyFourV); + CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); + + SmallVector BslOps; + BslOps.push_back(CmpRes); + BslOps.push_back(Ops[0]); + BslOps.push_back(TblRes); + Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); + return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); + } + case AArch64::BI__builtin_neon_vtbx4_v: { + TblOps.push_back(Ops[1]); + TblOps.push_back(Ops[2]); + TblOps.push_back(Ops[3]); + TblOps.push_back(Ops[4]); + return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, + Intrinsic::aarch64_neon_vtbx2, "vtbx2"); + } + case AArch64::BI__builtin_neon_vqtbl1_v: + case AArch64::BI__builtin_neon_vqtbl1q_v: + Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break; + case AArch64::BI__builtin_neon_vqtbl2_v: + case AArch64::BI__builtin_neon_vqtbl2q_v: { + Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break; + case AArch64::BI__builtin_neon_vqtbl3_v: + case AArch64::BI__builtin_neon_vqtbl3q_v: + Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break; + case AArch64::BI__builtin_neon_vqtbl4_v: + case AArch64::BI__builtin_neon_vqtbl4q_v: + Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break; + case AArch64::BI__builtin_neon_vqtbx1_v: + case AArch64::BI__builtin_neon_vqtbx1q_v: + Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break; + case AArch64::BI__builtin_neon_vqtbx2_v: + case AArch64::BI__builtin_neon_vqtbx2q_v: + Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break; + case AArch64::BI__builtin_neon_vqtbx3_v: + case AArch64::BI__builtin_neon_vqtbx3q_v: + Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break; + case AArch64::BI__builtin_neon_vqtbx4_v: + case AArch64::BI__builtin_neon_vqtbx4q_v: + Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break; + } + } + + if (!Int) + return 0; + + Function *F = CGF.CGM.getIntrinsic(Int, Tys); + return CGF.EmitNeonCall(F, Ops, s); +} + +Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { // Process AArch64 scalar builtins if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E)) return Result; + // Process AArch64 table lookup builtins + if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E)) + return Result; + if (BuiltinID == AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "Variadic __clear_cache slipped through on AArch64"); diff --git a/test/CodeGen/aarch64-neon-tbl.c b/test/CodeGen/aarch64-neon-tbl.c new file mode 100644 index 0000000000..db78a7aa70 --- /dev/null +++ b/test/CodeGen/aarch64-neon-tbl.c @@ -0,0 +1,463 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s + +// Test new aarch64 intrinsics and types + +#include + +int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vtbl1_s8 + return vtbl1_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) { + // CHECK: test_vqtbl1_s8 + return vqtbl1_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) { + // CHECK: test_vtbl2_s8 + return vtbl2_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) { + // CHECK: test_vqtbl2_s8 + return vqtbl2_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) { + // CHECK: test_vtbl3_s8 + return vtbl3_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) { + // CHECK: test_vqtbl3_s8 + return vqtbl3_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) { + // CHECK: test_vtbl4_s8 + return vtbl4_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) { + // CHECK: test_vqtbl4_s8 + return vqtbl4_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vqtbl1q_s8 + return vqtbl1q_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) { + // CHECK: test_vqtbl2q_s8 + return vqtbl2q_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) { + // CHECK: test_vqtbl3q_s8 + return vqtbl3q_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) { + // CHECK: test_vqtbl4q_s8 + return vqtbl4q_s8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) { + // CHECK: test_vtbx1_s8 + return vtbx1_s8(a, b, c); + // CHECK: movi {{v[0-9]+}}.8b, #0 + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b + // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { + // CHECK: test_vtbx2_s8 + return vtbx2_s8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) { + // CHECK: test_vtbx3_s8 + return vtbx3_s8(a, b, c); + // CHECK: movi {{v[0-9]+}}.8b, #0 + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b + // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) { + // CHECK: test_vtbx4_s8 + return vtbx4_s8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) { + // CHECK: test_vqtbx1_s8 + return vqtbx1_s8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) { + // CHECK: test_vqtbx2_s8 + return vqtbx2_s8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) { + // CHECK: test_vqtbx3_s8 + return vqtbx3_s8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) { + // CHECK: test_vqtbx4_s8 + return vqtbx4_s8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) { + // CHECK: test_vqtbx1q_s8 + return vqtbx1q_s8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) { + // CHECK: test_vqtbx2q_s8 + return vqtbx2q_s8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) { + // CHECK: test_vqtbx3q_s8 + return vqtbx3q_s8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) { + // CHECK: test_vqtbx4q_s8 + return vqtbx4q_s8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vtbl1_u8 + return vtbl1_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) { + // CHECK: test_vqtbl1_u8 + return vqtbl1_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) { + // CHECK: test_vtbl2_u8 + return vtbl2_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) { + // CHECK: test_vqtbl2_u8 + return vqtbl2_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) { + // CHECK: test_vtbl3_u8 + return vtbl3_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) { + // CHECK: test_vqtbl3_u8 + return vqtbl3_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) { + // CHECK: test_vtbl4_u8 + return vtbl4_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) { + // CHECK: test_vqtbl4_u8 + return vqtbl4_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vqtbl1q_u8 + return vqtbl1q_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) { + // CHECK: test_vqtbl2q_u8 + return vqtbl2q_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) { + // CHECK: test_vqtbl3q_u8 + return vqtbl3q_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) { + // CHECK: test_vqtbl4q_u8 + return vqtbl4q_u8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { + // CHECK: test_vtbx1_u8 + return vtbx1_u8(a, b, c); + // CHECK: movi {{v[0-9]+}}.8b, #0 + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b + // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { + // CHECK: test_vtbx2_u8 + return vtbx2_u8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) { + // CHECK: test_vtbx3_u8 + return vtbx3_u8(a, b, c); + // CHECK: movi {{v[0-9]+}}.8b, #0 + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b + // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) { + // CHECK: test_vtbx4_u8 + return vtbx4_u8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) { + // CHECK: test_vqtbx1_u8 + return vqtbx1_u8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) { + // CHECK: test_vqtbx2_u8 + return vqtbx2_u8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) { + // CHECK: test_vqtbx3_u8 + return vqtbx3_u8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) { + // CHECK: test_vqtbx4_u8 + return vqtbx4_u8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { + // CHECK: test_vqtbx1q_u8 + return vqtbx1q_u8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) { + // CHECK: test_vqtbx2q_u8 + return vqtbx2q_u8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) { + // CHECK: test_vqtbx3q_u8 + return vqtbx3q_u8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) { + // CHECK: test_vqtbx4q_u8 + return vqtbx4q_u8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) { + // CHECK: test_vtbl1_p8 + return vtbl1_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) { + // CHECK: test_vqtbl1_p8 + return vqtbl1_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) { + // CHECK: test_vtbl2_p8 + return vtbl2_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) { + // CHECK: test_vqtbl2_p8 + return vqtbl2_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) { + // CHECK: test_vtbl3_p8 + return vtbl3_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) { + // CHECK: test_vqtbl3_p8 + return vqtbl3_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) { + // CHECK: test_vtbl4_p8 + return vtbl4_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) { + // CHECK: test_vqtbl4_p8 + return vqtbl4_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) { + // CHECK: test_vqtbl1q_p8 + return vqtbl1q_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) { + // CHECK: test_vqtbl2q_p8 + return vqtbl2q_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) { + // CHECK: test_vqtbl3q_p8 + return vqtbl3q_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) { + // CHECK: test_vqtbl4q_p8 + return vqtbl4q_p8(a, b); + // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) { + // CHECK: test_vtbx1_p8 + return vtbx1_p8(a, b, c); + // CHECK: movi {{v[0-9]+}}.8b, #0 + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b + // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { + // CHECK: test_vtbx2_p8 + return vtbx2_p8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) { + // CHECK: test_vtbx3_p8 + return vtbx3_p8(a, b, c); + // CHECK: movi {{v[0-9]+}}.8b, #0 + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b + // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) { + // CHECK: test_vtbx4_p8 + return vtbx4_p8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) { + // CHECK: test_vqtbx1_p8 + return vqtbx1_p8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) { + // CHECK: test_vqtbx2_p8 + return vqtbx2_p8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) { + // CHECK: test_vqtbx3_p8 + return vqtbx3_p8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) { + // CHECK: test_vqtbx4_p8 + return vqtbx4_p8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +} + +poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) { + // CHECK: test_vqtbx1q_p8 + return vqtbx1q_p8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) { + // CHECK: test_vqtbx2q_p8 + return vqtbx2q_p8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) { + // CHECK: test_vqtbx3q_p8 + return vqtbx3q_p8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} + +poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) { + // CHECK: test_vqtbx4q_p8 + return vqtbx4q_p8(a, b, c); + // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +} diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 4fd94a982c..891cdc66f9 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -494,6 +494,9 @@ static char ModType(const char mod, char type, bool &quad, bool &poly, case 'g': quad = false; break; + case 'B': + case 'C': + case 'D': case 'j': quad = true; break; @@ -557,6 +560,10 @@ static char ModType(const char mod, char type, bool &quad, bool &poly, return type; } +static bool IsMultiVecProto(const char p) { + return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D')); +} + /// TypeString - for a modifier and type, generate the name of the typedef for /// that type. QUc -> uint8x8_t. static std::string TypeString(const char mod, StringRef typestr) { @@ -631,11 +638,11 @@ static std::string TypeString(const char mod, StringRef typestr) { PrintFatalError("unhandled type!"); } - if (mod == '2') + if (mod == '2' || mod == 'B') s += "x2"; - if (mod == '3') + if (mod == '3' || mod == 'C') s += "x3"; - if (mod == '4') + if (mod == '4' || mod == 'D') s += "x4"; // Append _t, finishing the type string typedef type. @@ -712,7 +719,7 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr, // returning structs of 2, 3, or 4 vectors which are returned in a sret-like // fashion, storing them to a pointer arg. if (ret) { - if (mod >= '2' && mod <= '4') + if (IsMultiVecProto(mod)) return "vv*"; // void result with void* first argument if (mod == 'f' || (ck != ClassB && type == 'f')) return quad ? "V4f" : "V2f"; @@ -729,11 +736,11 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr, } // Non-return array types are passed as individual vectors. - if (mod == '2') + if (mod == '2' || mod == 'B') return quad ? "V16ScV16Sc" : "V8ScV8Sc"; - if (mod == '3') + if (mod == '3' || mod == 'C') return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; - if (mod == '4') + if (mod == '4' || mod == 'D') return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; if (mod == 'f' || (ck != ClassB && type == 'f')) @@ -1996,7 +2003,7 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto, // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit // sret-like argument. - bool sret = (proto[0] >= '2' && proto[0] <= '4'); + bool sret = IsMultiVecProto(proto[0]); bool define = UseMacro(proto); @@ -2056,12 +2063,19 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto, // Handle multiple-vector values specially, emitting each subvector as an // argument to the __builtin. + unsigned NumOfVec = 0; if (proto[i] >= '2' && proto[i] <= '4') { + NumOfVec = proto[i] - '0'; + } else if (proto[i] >= 'B' && proto[i] <= 'D') { + NumOfVec = proto[i] - 'A' + 1; + } + + if (NumOfVec > 0) { // Check if an explicit cast is needed. if (argType != 'c' || argPoly || argUsgn) args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; - for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) { + for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) { s += args + ".val[" + utostr(vi) + "]"; if ((vi + 1) < ve) s += ", "; @@ -2586,7 +2600,7 @@ NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, // Builtins that return a struct of multiple vectors have an extra // leading arg for the struct return. - if (Proto[0] >= '2' && Proto[0] <= '4') + if (IsMultiVecProto(Proto[0])) ++immidx; // Add one to the index for each argument until we reach the immediate @@ -2597,12 +2611,15 @@ NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, immidx += 1; break; case '2': + case 'B': immidx += 2; break; case '3': + case 'C': immidx += 3; break; case '4': + case 'D': immidx += 4; break; case 'i': @@ -2710,7 +2727,7 @@ NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, } } // For sret builtins, adjust the pointer argument index. - if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4')) + if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0])) PtrArgNum += 1; // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, -- 2.40.0