From: Kevin Qin Date: Thu, 14 Nov 2013 03:29:16 +0000 (+0000) Subject: [AArch64 neon] support poly64 and relevant intrinsic functions. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=624bb5e59dbcc24efeee7dff12c9b48d2b5077e9;p=clang [AArch64 neon] support poly64 and relevant intrinsic functions. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194660 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/TargetBuiltins.h b/include/clang/Basic/TargetBuiltins.h index d07f8f7cd3..ed3cc49fed 100644 --- a/include/clang/Basic/TargetBuiltins.h +++ b/include/clang/Basic/TargetBuiltins.h @@ -90,6 +90,7 @@ namespace clang { Int64, Poly8, Poly16, + Poly64, Float16, Float32, Float64 diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 83ed8289fb..f62ff870d3 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -519,23 +519,23 @@ let isA64 = 1 in { //////////////////////////////////////////////////////////////////////////////// // Load/Store -// With additional QUl, Ql, Qd type. +// With additional QUl, Ql, Qd, Pl, QPl type. def LD1 : WInst<"vld1", "dc", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; def LD2 : WInst<"vld2", "2c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; def LD3 : WInst<"vld3", "3c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; def LD4 : WInst<"vld4", "4c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; def ST1 : WInst<"vst1", "vpd", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; def ST2 : WInst<"vst2", "vp2", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; def ST3 : WInst<"vst3", "vp3", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; def ST4 : WInst<"vst4", "vp4", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; //////////////////////////////////////////////////////////////////////////////// // Addition @@ -570,8 +570,9 @@ def FMLS : SInst<"vfms", "dddd", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Logical operations -// With additional Qd type. -def BSL : SInst<"vbsl", "dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQd">; +// With additional Qd, Ql, QPl type. +def BSL : SInst<"vbsl", "dudd", + "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQdPlQPl">; //////////////////////////////////////////////////////////////////////////////// // Absolute Difference @@ -658,13 +659,16 @@ def FSQRT : SInst<"vsqrt", "dd", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Comparison -// With additional Qd type. +// With additional Qd, Ql, QPl type. +def VVCEQ : IOpInst<"vceq", "udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPcPlQPl", + OP_EQ>; def FCAGE : IInst<"vcage", "udd", "fQfQd">; def FCAGT : IInst<"vcagt", "udd", "fQfQd">; def FCALE : IInst<"vcale", "udd", "fQfQd">; def FCALT : IInst<"vcalt", "udd", "fQfQd">; // With additional Ql, QUl, Qd types. -def CMTST : WInst<"vtst", "udd", "csiUcUsUiPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; +def CMTST : WInst<"vtst", "udd", + "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPslUlQlQUlPlQPl">; def CFMEQ : SOpInst<"vceq", "udd", "csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>; def CFMGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GE>; @@ -713,6 +717,13 @@ let isShift = 1 in { def SHLL_HIGH_N : SOpInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi", OP_LONG_HI>; +//////////////////////////////////////////////////////////////////////////////// +// Shifts with insert, with additional Ql, QPl type. +def SRI_N : WInst<"vsri_n", "dddi", + "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">; +def SLI_N : WInst<"vsli_n", "dddi", + "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">; + // Right shift narrow high def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "hmdi", "HsHiHlHUsHUiHUl", OP_NARROW_HI>; @@ -764,36 +775,40 @@ def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; //////////////////////////////////////////////////////////////////////////////// // Extract or insert element from vector def GET_LANE : IInst<"vget_lane", "sdi", - "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">; + "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">; def SET_LANE : IInst<"vset_lane", "dsdi", - "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">; + "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">; def COPY_LANE : IOpInst<"vcopy_lane", "ddidi", - "csiPcPsUcUsUiPcPsf", OP_COPY_LN>; + "csiPcPsUcUsUiPcPsfPl", OP_COPY_LN>; def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi", - "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQd", OP_COPYQ_LN>; + "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPYQ_LN>; def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki", "csiPcPsUcUsUif", OP_COPY_LNQ>; def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi", - "QcQsQiQlQUcQUsQUiQUlQPcQPsQfd", OP_COPY_LN>; - + "QcQsQiQlQUcQUsQUiQUlQPcQPsQfdQPl", OP_COPY_LN>; //////////////////////////////////////////////////////////////////////////////// // Set all lanes to same value def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", - "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQd", + "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", OP_DUP_LN>; def VDUP_LANE2: WOpInst<"vdup_laneq", "dki", - "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQd", + "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", OP_DUP_LN>; def DUP_N : WOpInst<"vdup_n", "ds", - "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd", + "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQdPlQPl", OP_DUP>; def MOV_N : WOpInst<"vmov_n", "ds", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd", OP_DUP>; + +//////////////////////////////////////////////////////////////////////////////// +// Combining vectors, with additional Pl +def COMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfUcUsUiUlPcPsPl", OP_CONC>; + //////////////////////////////////////////////////////////////////////////////// -//Initialize a vector from bit pattern -def CREATE : NoTestOpInst<"vcreate", "dl", "csihfdUcUsUiUlPcPsl", OP_CAST>; +//Initialize a vector from bit pattern, with additional Pl +def CREATE : NoTestOpInst<"vcreate", "dl", "csihfdUcUsUiUlPcPslPl", OP_CAST>; //////////////////////////////////////////////////////////////////////////////// @@ -864,7 +879,7 @@ def FMINNMV : SInst<"vminnmv", "sd", "Qf">; //////////////////////////////////////////////////////////////////////////////// // Newly added Vector Extract for f64 def VEXT_A64 : WInst<"vext", "dddi", - "cUcPcsUsPsiUilUlfdQcQUcQPcQsQUsQPsQiQUiQlQUlQfQd">; + "cUcPcsUsPsiUilUlfdQcQUcQPcQsQUsQPsQiQUiQlQUlQfQdPlQPl">; //////////////////////////////////////////////////////////////////////////////// // Crypto @@ -888,17 +903,17 @@ def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">; //////////////////////////////////////////////////////////////////////////////// // Permutation def VTRN1 : SOpInst<"vtrn1", "ddd", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN1>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>; def VZIP1 : SOpInst<"vzip1", "ddd", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP1>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>; def VUZP1 : SOpInst<"vuzp1", "ddd", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP1>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>; def VTRN2 : SOpInst<"vtrn2", "ddd", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN2>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>; def VZIP2 : SOpInst<"vzip2", "ddd", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP2>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>; def VUZP2 : SOpInst<"vuzp2", "ddd", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP2>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>; //////////////////////////////////////////////////////////////////////////////// // Table lookup diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index b9c6b4df1d..a2669fe8fe 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1622,6 +1622,7 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, case NeonTypeFlags::Int32: return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); case NeonTypeFlags::Int64: + case NeonTypeFlags::Poly64: return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); case NeonTypeFlags::Float32: return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index d059001f9b..0b95c48d4f 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -339,6 +339,7 @@ static unsigned RFT(unsigned t, bool shift = false) { case NeonTypeFlags::Int32: return shift ? 31 : (2 << IsQuad) - 1; case NeonTypeFlags::Int64: + case NeonTypeFlags::Poly64: return shift ? 63 : (1 << IsQuad) - 1; case NeonTypeFlags::Float16: assert(!shift && "cannot shift float types!"); @@ -356,7 +357,8 @@ static unsigned RFT(unsigned t, bool shift = false) { /// getNeonEltType - Return the QualType corresponding to the elements of /// the vector type specified by the NeonTypeFlags. This is used to check /// the pointer arguments for Neon load/store intrinsics. -static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context) { +static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context, + bool IsAArch64) { switch (Flags.getEltType()) { case NeonTypeFlags::Int8: return Flags.isUnsigned() ? Context.UnsignedCharTy : Context.SignedCharTy; @@ -367,11 +369,13 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context) { case NeonTypeFlags::Int64: return Flags.isUnsigned() ? Context.UnsignedLongLongTy : Context.LongLongTy; case NeonTypeFlags::Poly8: - return Context.SignedCharTy; + return IsAArch64 ? Context.UnsignedCharTy : Context.SignedCharTy; case NeonTypeFlags::Poly16: - return Context.ShortTy; + return IsAArch64 ? Context.UnsignedShortTy : Context.ShortTy; + case NeonTypeFlags::Poly64: + return Context.UnsignedLongLongTy; case NeonTypeFlags::Float16: - return Context.UnsignedShortTy; + return Context.HalfTy; case NeonTypeFlags::Float32: return Context.FloatTy; case NeonTypeFlags::Float64: @@ -415,7 +419,7 @@ bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, Arg = ICE->getSubExpr(); ExprResult RHS = DefaultFunctionArrayLvalueConversion(Arg); QualType RHSTy = RHS.get()->getType(); - QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context); + QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context, true); if (HasConstPtr) EltTy = EltTy.withConst(); QualType LHSTy = Context.getPointerType(EltTy); @@ -602,7 +606,7 @@ bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { Arg = ICE->getSubExpr(); ExprResult RHS = DefaultFunctionArrayLvalueConversion(Arg); QualType RHSTy = RHS.get()->getType(); - QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context); + QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context, false); if (HasConstPtr) EltTy = EltTy.withConst(); QualType LHSTy = Context.getPointerType(EltTy); diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp index b4915c01c1..8e81b296cc 100644 --- a/lib/Sema/SemaType.cpp +++ b/lib/Sema/SemaType.cpp @@ -4766,9 +4766,10 @@ static bool isPermittedNeonBaseType(QualType &Ty, if (VecKind == VectorType::NeonPolyVector) { if (IsAArch64) { - // AArch64 polynomial vectors are unsigned + // AArch64 polynomial vectors are unsigned and support poly64. return BTy->getKind() == BuiltinType::UChar || - BTy->getKind() == BuiltinType::UShort; + BTy->getKind() == BuiltinType::UShort || + BTy->getKind() == BuiltinType::ULongLong; } else { // AArch32 polynomial vector are signed. return BTy->getKind() == BuiltinType::SChar || diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 2b1de5e43c..48bb73f245 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -170,6 +170,7 @@ public: Int64, Poly8, Poly16, + Poly64, Float16, Float32, Float64 @@ -626,7 +627,7 @@ static std::string TypeString(const char mod, StringRef typestr) { s += quad ? "x4" : "x2"; break; case 'l': - s += "int64"; + s += (poly && !usgn)? "poly64" : "int64"; if (scal) break; s += quad ? "x2" : "x1"; @@ -810,7 +811,7 @@ static void InstructionTypeCode(const StringRef &typeStr, break; case 'l': switch (ck) { - case ClassS: typeCode = usgn ? "u64" : "s64"; break; + case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break; case ClassI: typeCode = "i64"; break; case ClassW: typeCode = "64"; break; default: break; @@ -2040,7 +2041,7 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { ET = NeonTypeFlags::Int32; break; case 'l': - ET = NeonTypeFlags::Int64; + ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64; break; case 'h': ET = NeonTypeFlags::Float16; @@ -2325,6 +2326,7 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "#ifdef __aarch64__\n"; OS << "typedef uint8_t poly8_t;\n"; OS << "typedef uint16_t poly16_t;\n"; + OS << "typedef uint64_t poly64_t;\n"; OS << "#else\n"; OS << "typedef int8_t poly8_t;\n"; OS << "typedef int16_t poly16_t;\n"; @@ -2332,19 +2334,21 @@ void NeonEmitter::run(raw_ostream &OS) { // Emit Neon vector typedefs. std::string TypedefTypes( - "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs"); + "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); SmallVector TDTypeVec; ParseTypes(0, TypedefTypes, TDTypeVec); // Emit vector typedefs. bool isA64 = false; + bool preinsert; + bool postinsert; for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { bool dummy, quad = false, poly = false; char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); - bool preinsert = false; - bool postinsert = false; + preinsert = false; + postinsert = false; - if (type == 'd') { + if (type == 'd' || (type == 'l' && poly)) { preinsert = isA64? false: true; isA64 = true; } else { @@ -2370,6 +2374,9 @@ void NeonEmitter::run(raw_ostream &OS) { OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; } + postinsert = isA64? true: false; + if (postinsert) + OS << "#endif\n"; OS << "\n"; // Emit struct typedefs. @@ -2378,10 +2385,10 @@ void NeonEmitter::run(raw_ostream &OS) { for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { bool dummy, quad = false, poly = false; char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); - bool preinsert = false; - bool postinsert = false; + preinsert = false; + postinsert = false; - if (type == 'd') { + if (type == 'd' || (type == 'l' && poly)) { preinsert = isA64? false: true; isA64 = true; } else { @@ -2403,6 +2410,10 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "\n"; } } + postinsert = isA64? true: false; + if (postinsert) + OS << "#endif\n"; + OS << "\n"; OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";