From: Hao Liu Date: Wed, 4 Sep 2013 09:29:13 +0000 (+0000) Subject: Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=912502b4996b14db31b498cb1eef2b17d7d66d57;p=clang Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions: sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$ and 4 convert instructions: scvtf,ucvtf,fcvtzs,fcvtzu git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@189926 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 6918f0aaeb..1ac5f9b716 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -70,6 +70,9 @@ def OP_ABDL : Op; def OP_ABA : Op; def OP_ABAL : Op; def OP_DIV : Op; +def OP_LONG_HI : Op; +def OP_NARROW_HI : Op; +def OP_MOVL_HI : Op; class Inst { string Name = n; @@ -131,6 +134,7 @@ class NoTestOpInst : Inst {} // n: double width elements, half num elts // h: half width elements, double num elts // e: half width elements, double num elts, unsigned +// m: half width elements, same num elts // i: constant int // l: constant uint64 // s: scalar of element type @@ -557,13 +561,33 @@ def ADDP : IInst<"vpadd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; // Shifts by constant let isShift = 1 in { // Left shift long high -def SHLL_HIGH_N : SInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi">; +def SHLL_HIGH_N : SOpInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi", + OP_LONG_HI>; + +// Right shift narrow high +def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "hmdi", + "HsHiHlHUsHUiHUl", OP_NARROW_HI>; +def QSHRUN_HIGH_N : SOpInst<"vqshrun_high_n", "hmdi", + "HsHiHl", OP_NARROW_HI>; +def RSHRN_HIGH_N : IOpInst<"vrshrn_high_n", "hmdi", + "HsHiHlHUsHUiHUl", OP_NARROW_HI>; +def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "hmdi", + "HsHiHl", OP_NARROW_HI>; +def QSHRN_HIGH_N : SOpInst<"vqshrn_high_n", "hmdi", + "HsHiHlHUsHUiHUl", OP_NARROW_HI>; +def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "hmdi", + "HsHiHlHUsHUiHUl", OP_NARROW_HI>; } //////////////////////////////////////////////////////////////////////////////// // Converting vectors -def VMOVL_HIGH : SInst<"vmovl_high", "nd", "HcHsHiHUcHUsHUi">; +def VMOVL_HIGH : SOpInst<"vmovl_high", "nd", "HcHsHiHUcHUsHUi", OP_MOVL_HI>; +let isVCVT_N = 1 in { +def CVTF_N_F64 : SInst<"vcvt_n_f64", "fdi", "QlQUl">; +def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "xdi", "Qd">; +def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "Qd">; +} //////////////////////////////////////////////////////////////////////////////// // Scalar Arithmetic diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index febde9a322..6bf5d6f54a 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1620,37 +1620,6 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, llvm_unreachable("Invalid NeonTypeFlags element type!"); } -static Value *EmitExtendedSHL(CodeGenFunction &CGF, - SmallVectorImpl &Ops, - llvm::VectorType *VTy, bool usgn, bool isHigh) { - CGBuilderTy Builder = CGF.Builder; - if (isHigh){ - unsigned NumElts = VTy->getNumElements(); - unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); - llvm::Type *EltTy = - llvm::IntegerType::get(VTy->getContext(), EltBits / 2); - // The source operand type has twice as many elements of half the size. - llvm::Type *SrcTy = llvm::VectorType::get(EltTy, NumElts * 2); - SmallVector Indices; - for (unsigned i = 0; i != NumElts; i++) - Indices.push_back(Builder.getInt32(i + NumElts)); - Value *SV = llvm::ConstantVector::get(Indices); - Value *Undef = llvm::UndefValue::get(SrcTy); - Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); - Ops[0] = Builder.CreateShuffleVector(Ops[0], Undef, SV); - } else { - llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); - Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); - } - - if (usgn) - Ops[0] = Builder.CreateZExt(Ops[0], VTy); - else - Ops[0] = Builder.CreateSExt(Ops[0], VTy); - Ops[1] = CGF.EmitNeonShiftVector(Ops[1], VTy, false); - return Builder.CreateShl(Ops[0], Ops[1], "vshl_n"); -} - Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { unsigned nElts = cast(V->getType())->getNumElements(); Value* SV = llvm::ConstantVector::getSplat(nElts, C); @@ -1893,18 +1862,122 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E); case AArch64::BI__builtin_neon_vqrdmulhq_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E); + + // Shift by immediate + case AArch64::BI__builtin_neon_vshr_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshr_n_v, E); + case AArch64::BI__builtin_neon_vshrq_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshrq_n_v, E); + case AArch64::BI__builtin_neon_vrshr_n_v: + case AArch64::BI__builtin_neon_vrshrq_n_v: + Int = usgn ? Intrinsic::aarch64_neon_vurshr + : Intrinsic::aarch64_neon_vsrshr; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n"); + case AArch64::BI__builtin_neon_vsra_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsra_n_v, E); + case AArch64::BI__builtin_neon_vsraq_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsraq_n_v, E); + case AArch64::BI__builtin_neon_vrsra_n_v: + case AArch64::BI__builtin_neon_vrsraq_n_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Int = usgn ? Intrinsic::aarch64_neon_vurshr + : Intrinsic::aarch64_neon_vsrshr; + Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); + return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); + } case AArch64::BI__builtin_neon_vshl_n_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_n_v, E); case AArch64::BI__builtin_neon_vshlq_n_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_n_v, E); + case AArch64::BI__builtin_neon_vqshl_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_n_v, E); + case AArch64::BI__builtin_neon_vqshlq_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_n_v, E); + case AArch64::BI__builtin_neon_vqshlu_n_v: + case AArch64::BI__builtin_neon_vqshluq_n_v: + Int = Intrinsic::aarch64_neon_vsqshlu; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n"); + case AArch64::BI__builtin_neon_vsri_n_v: + case AArch64::BI__builtin_neon_vsriq_n_v: + Int = Intrinsic::aarch64_neon_vsri; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n"); + case AArch64::BI__builtin_neon_vsli_n_v: + case AArch64::BI__builtin_neon_vsliq_n_v: + Int = Intrinsic::aarch64_neon_vsli; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n"); + case AArch64::BI__builtin_neon_vshll_n_v: { + llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); + Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); + if (usgn) + Ops[0] = Builder.CreateZExt(Ops[0], VTy); + else + Ops[0] = Builder.CreateSExt(Ops[0], VTy); + Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); + return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); + } + case AArch64::BI__builtin_neon_vshrn_n_v: { + llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); + Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); + Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); + if (usgn) + Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); + else + Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); + return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); + } + case AArch64::BI__builtin_neon_vqshrun_n_v: + Int = Intrinsic::aarch64_neon_vsqshrun; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); + case AArch64::BI__builtin_neon_vrshrn_n_v: + Int = Intrinsic::aarch64_neon_vrshrn; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); + case AArch64::BI__builtin_neon_vqrshrun_n_v: + Int = Intrinsic::aarch64_neon_vsqrshrun; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); + case AArch64::BI__builtin_neon_vqshrn_n_v: + Int = usgn ? Intrinsic::aarch64_neon_vuqshrn + : Intrinsic::aarch64_neon_vsqshrn; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); + case AArch64::BI__builtin_neon_vqrshrn_n_v: + Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn + : Intrinsic::aarch64_neon_vsqrshrn; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); + + // Convert case AArch64::BI__builtin_neon_vmovl_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovl_v, E); - case AArch64::BI__builtin_neon_vshll_n_v: - return EmitExtendedSHL(*this, Ops, VTy, usgn, false); - case AArch64::BI__builtin_neon_vmovl_high_v: - Ops.push_back(ConstantInt::get(Int32Ty, 0)); - case AArch64::BI__builtin_neon_vshll_high_n_v: - return EmitExtendedSHL(*this, Ops, VTy, usgn, true); + case AArch64::BI__builtin_neon_vcvt_n_f32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_f32_v, E); + case AArch64::BI__builtin_neon_vcvtq_n_f32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_f32_v, E); + case AArch64::BI__builtin_neon_vcvtq_n_f64_v: { + llvm::Type *FloatTy = + GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); + llvm::Type *Tys[2] = { FloatTy, Ty }; + Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp + : Intrinsic::arm_neon_vcvtfxs2fp; + Function *F = CGM.getIntrinsic(Int, Tys); + return EmitNeonCall(F, Ops, "vcvt_n"); + } + case AArch64::BI__builtin_neon_vcvt_n_s32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_s32_v, E); + case AArch64::BI__builtin_neon_vcvtq_n_s32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_s32_v, E); + case AArch64::BI__builtin_neon_vcvt_n_u32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_u32_v, E); + case AArch64::BI__builtin_neon_vcvtq_n_u32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_u32_v, E); + case AArch64::BI__builtin_neon_vcvtq_n_s64_v: + case AArch64::BI__builtin_neon_vcvtq_n_u64_v: { + llvm::Type *FloatTy = + GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); + llvm::Type *Tys[2] = { Ty, FloatTy }; + Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu + : Intrinsic::arm_neon_vcvtfp2fxs; + Function *F = CGM.getIntrinsic(Int, Tys); + return EmitNeonCall(F, Ops, "vcvt_n"); + } // AArch64-only builtins case AArch64::BI__builtin_neon_vfms_v: diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index 5d5bdafe50..9dce70d50a 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -3105,6 +3105,960 @@ int64x2_t test_vshlq_n_u64(int64x2_t a) { // CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 } +int8x8_t test_vshr_n_s8(int8x8_t a) { + // CHECK: test_vshr_n_s8 + return vshr_n_s8(a, 3); + // CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vshr_n_s16(int16x4_t a) { + // CHECK: test_vshr_n_s16 + return vshr_n_s16(a, 3); + // CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vshr_n_s32(int32x2_t a) { + // CHECK: test_vshr_n_s32 + return vshr_n_s32(a, 3); + // CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vshrq_n_s8(int8x16_t a) { + // CHECK: test_vshrq_n_s8 + return vshrq_n_s8(a, 3); + // CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vshrq_n_s16(int16x8_t a) { + // CHECK: test_vshrq_n_s16 + return vshrq_n_s16(a, 3); + // CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vshrq_n_s32(int32x4_t a) { + // CHECK: test_vshrq_n_s32 + return vshrq_n_s32(a, 3); + // CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vshrq_n_s64(int64x2_t a) { + // CHECK: test_vshrq_n_s64 + return vshrq_n_s64(a, 3); + // CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +int8x8_t test_vshr_n_u8(int8x8_t a) { + // CHECK: test_vshr_n_u8 + return vshr_n_u8(a, 3); + // CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vshr_n_u16(int16x4_t a) { + // CHECK: test_vshr_n_u16 + return vshr_n_u16(a, 3); + // CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vshr_n_u32(int32x2_t a) { + // CHECK: test_vshr_n_u32 + return vshr_n_u32(a, 3); + // CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vshrq_n_u8(int8x16_t a) { + // CHECK: test_vshrq_n_u8 + return vshrq_n_u8(a, 3); + // CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vshrq_n_u16(int16x8_t a) { + // CHECK: test_vshrq_n_u16 + return vshrq_n_u16(a, 3); + // CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vshrq_n_u32(int32x4_t a) { + // CHECK: test_vshrq_n_u32 + return vshrq_n_u32(a, 3); + // CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vshrq_n_u64(int64x2_t a) { + // CHECK: test_vshrq_n_u64 + return vshrq_n_u64(a, 3); + // CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vsra_n_s8 + return vsra_n_s8(a, b, 3); + // CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vsra_n_s16 + return vsra_n_s16(a, b, 3); + // CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vsra_n_s32 + return vsra_n_s32(a, b, 3); + // CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vsraq_n_s8 + return vsraq_n_s8(a, b, 3); + // CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vsraq_n_s16 + return vsraq_n_s16(a, b, 3); + // CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vsraq_n_s32 + return vsraq_n_s32(a, b, 3); + // CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vsraq_n_s64 + return vsraq_n_s64(a, b, 3); + // CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) { + // CHECK: test_vsra_n_u8 + return vsra_n_u8(a, b, 3); + // CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) { + // CHECK: test_vsra_n_u16 + return vsra_n_u16(a, b, 3); + // CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) { + // CHECK: test_vsra_n_u32 + return vsra_n_u32(a, b, 3); + // CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) { + // CHECK: test_vsraq_n_u8 + return vsraq_n_u8(a, b, 3); + // CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) { + // CHECK: test_vsraq_n_u16 + return vsraq_n_u16(a, b, 3); + // CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) { + // CHECK: test_vsraq_n_u32 + return vsraq_n_u32(a, b, 3); + // CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) { + // CHECK: test_vsraq_n_u64 + return vsraq_n_u64(a, b, 3); + // CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +int8x8_t test_vrshr_n_s8(int8x8_t a) { + // CHECK: test_vrshr_n_s8 + return vrshr_n_s8(a, 3); + // CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vrshr_n_s16(int16x4_t a) { + // CHECK: test_vrshr_n_s16 + return vrshr_n_s16(a, 3); + // CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vrshr_n_s32(int32x2_t a) { + // CHECK: test_vrshr_n_s32 + return vrshr_n_s32(a, 3); + // CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vrshrq_n_s8(int8x16_t a) { + // CHECK: test_vrshrq_n_s8 + return vrshrq_n_s8(a, 3); + // CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vrshrq_n_s16(int16x8_t a) { + // CHECK: test_vrshrq_n_s16 + return vrshrq_n_s16(a, 3); + // CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vrshrq_n_s32(int32x4_t a) { + // CHECK: test_vrshrq_n_s32 + return vrshrq_n_s32(a, 3); + // CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vrshrq_n_s64(int64x2_t a) { + // CHECK: test_vrshrq_n_s64 + return vrshrq_n_s64(a, 3); + // CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +int8x8_t test_vrshr_n_u8(int8x8_t a) { + // CHECK: test_vrshr_n_u8 + return vrshr_n_u8(a, 3); + // CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vrshr_n_u16(int16x4_t a) { + // CHECK: test_vrshr_n_u16 + return vrshr_n_u16(a, 3); + // CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vrshr_n_u32(int32x2_t a) { + // CHECK: test_vrshr_n_u32 + return vrshr_n_u32(a, 3); + // CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vrshrq_n_u8(int8x16_t a) { + // CHECK: test_vrshrq_n_u8 + return vrshrq_n_u8(a, 3); + // CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vrshrq_n_u16(int16x8_t a) { + // CHECK: test_vrshrq_n_u16 + return vrshrq_n_u16(a, 3); + // CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vrshrq_n_u32(int32x4_t a) { + // CHECK: test_vrshrq_n_u32 + return vrshrq_n_u32(a, 3); + // CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vrshrq_n_u64(int64x2_t a) { + // CHECK: test_vrshrq_n_u64 + return vrshrq_n_u64(a, 3); + // CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vrsra_n_s8 + return vrsra_n_s8(a, b, 3); + // CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vrsra_n_s16 + return vrsra_n_s16(a, b, 3); + // CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vrsra_n_s32 + return vrsra_n_s32(a, b, 3); + // CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vrsraq_n_s8 + return vrsraq_n_s8(a, b, 3); + // CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vrsraq_n_s16 + return vrsraq_n_s16(a, b, 3); + // CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vrsraq_n_s32 + return vrsraq_n_s32(a, b, 3); + // CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vrsraq_n_s64 + return vrsraq_n_s64(a, b, 3); + // CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) { + // CHECK: test_vrsra_n_u8 + return vrsra_n_u8(a, b, 3); + // CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) { + // CHECK: test_vrsra_n_u16 + return vrsra_n_u16(a, b, 3); + // CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) { + // CHECK: test_vrsra_n_u32 + return vrsra_n_u32(a, b, 3); + // CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) { + // CHECK: test_vrsraq_n_u8 + return vrsraq_n_u8(a, b, 3); + // CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) { + // CHECK: test_vrsraq_n_u16 + return vrsraq_n_u16(a, b, 3); + // CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) { + // CHECK: test_vrsraq_n_u32 + return vrsraq_n_u32(a, b, 3); + // CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) { + // CHECK: test_vrsraq_n_u64 + return vrsraq_n_u64(a, b, 3); + // CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vsri_n_s8 + return vsri_n_s8(a, b, 3); + // CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vsri_n_s16 + return vsri_n_s16(a, b, 3); + // CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vsri_n_s32 + return vsri_n_s32(a, b, 3); + // CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vsriq_n_s8 + return vsriq_n_s8(a, b, 3); + // CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vsriq_n_s16 + return vsriq_n_s16(a, b, 3); + // CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vsriq_n_s32 + return vsriq_n_s32(a, b, 3); + // CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vsriq_n_s64 + return vsriq_n_s64(a, b, 3); + // CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) { + // CHECK: test_vsri_n_u8 + return vsri_n_u8(a, b, 3); + // CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) { + // CHECK: test_vsri_n_u16 + return vsri_n_u16(a, b, 3); + // CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) { + // CHECK: test_vsri_n_u32 + return vsri_n_u32(a, b, 3); + // CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) { + // CHECK: test_vsriq_n_u8 + return vsriq_n_u8(a, b, 3); + // CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) { + // CHECK: test_vsriq_n_u16 + return vsriq_n_u16(a, b, 3); + // CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) { + // CHECK: test_vsriq_n_u32 + return vsriq_n_u32(a, b, 3); + // CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) { + // CHECK: test_vsriq_n_u64 + return vsriq_n_u64(a, b, 3); + // CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vsri_n_p8 + return vsri_n_p8(a, b, 3); + // CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vsri_n_p16 + return vsri_n_p16(a, b, 15); + // CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15 +} + +poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vsriq_n_p8 + return vsriq_n_p8(a, b, 3); + // CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vsriq_n_p16 + return vsriq_n_p16(a, b, 15); + // CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15 +} + +int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vsli_n_s8 + return vsli_n_s8(a, b, 3); + // CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vsli_n_s16 + return vsli_n_s16(a, b, 3); + // CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vsli_n_s32 + return vsli_n_s32(a, b, 3); + // CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vsliq_n_s8 + return vsliq_n_s8(a, b, 3); + // CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vsliq_n_s16 + return vsliq_n_s16(a, b, 3); + // CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vsliq_n_s32 + return vsliq_n_s32(a, b, 3); + // CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vsliq_n_s64 + return vsliq_n_s64(a, b, 3); + // CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vsli_n_u8 + return vsli_n_u8(a, b, 3); + // CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vsli_n_u16 + return vsli_n_u16(a, b, 3); + // CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vsli_n_u32 + return vsli_n_u32(a, b, 3); + // CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vsliq_n_u8 + return vsliq_n_u8(a, b, 3); + // CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vsliq_n_u16 + return vsliq_n_u16(a, b, 3); + // CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vsliq_n_u32 + return vsliq_n_u32(a, b, 3); + // CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vsliq_n_u64 + return vsliq_n_u64(a, b, 3); + // CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vsli_n_p8 + return vsli_n_p8(a, b, 3); + // CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vsli_n_p16 + return vsli_n_p16(a, b, 15); + // CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15 +} + +poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vsliq_n_p8 + return vsliq_n_p8(a, b, 3); + // CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vsliq_n_p16 + return vsliq_n_p16(a, b, 15); + // CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15 +} + +int8x8_t test_vqshlu_n_s8(int8x8_t a) { + // CHECK: test_vqshlu_n_s8 + return vqshlu_n_s8(a, 3); + // CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 +} + +int16x4_t test_vqshlu_n_s16(int16x4_t a) { + // CHECK: test_vqshlu_n_s16 + return vqshlu_n_s16(a, 3); + // CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 +} + +int32x2_t test_vqshlu_n_s32(int32x2_t a) { + // CHECK: test_vqshlu_n_s32 + return vqshlu_n_s32(a, 3); + // CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 +} + +int8x16_t test_vqshluq_n_s8(int8x16_t a) { + // CHECK: test_vqshluq_n_s8 + return vqshluq_n_s8(a, 3); + // CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 +} + +int16x8_t test_vqshluq_n_s16(int16x8_t a) { + // CHECK: test_vqshluq_n_s16 + return vqshluq_n_s16(a, 3); + // CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 +} + +int32x4_t test_vqshluq_n_s32(int32x4_t a) { + // CHECK: test_vqshluq_n_s32 + return vqshluq_n_s32(a, 3); + // CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 +} + +int64x2_t test_vqshluq_n_s64(int64x2_t a) { + // CHECK: test_vqshluq_n_s64 + return vqshluq_n_s64(a, 3); + // CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 +} + +int8x8_t test_vshrn_n_s16(int16x8_t a) { + // CHECK: test_vshrn_n_s16 + return vshrn_n_s16(a, 3); + // CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 +} + +int16x4_t test_vshrn_n_s32(int32x4_t a) { + // CHECK: test_vshrn_n_s32 + return vshrn_n_s32(a, 9); + // CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 +} + +int32x2_t test_vshrn_n_s64(int64x2_t a) { + // CHECK: test_vshrn_n_s64 + return vshrn_n_s64(a, 19); + // CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 +} + +uint8x8_t test_vshrn_n_u16(uint16x8_t a) { + // CHECK: test_vshrn_n_u16 + return vshrn_n_u16(a, 3); + // CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 +} + +uint16x4_t test_vshrn_n_u32(uint32x4_t a) { + // CHECK: test_vshrn_n_u32 + return vshrn_n_u32(a, 9); + // CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 +} + +uint32x2_t test_vshrn_n_u64(uint64x2_t a) { + // CHECK: test_vshrn_n_u64 + return vshrn_n_u64(a, 19); + // CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 +} + +int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) { + // CHECK: test_vshrn_high_n_s16 + return vshrn_high_n_s16(a, b, 3); + // CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 +} + +int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) { + // CHECK: test_vshrn_high_n_s32 + return vshrn_high_n_s32(a, b, 9); + // CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 +} + +int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) { + // CHECK: test_vshrn_high_n_s64 + return vshrn_high_n_s64(a, b, 19); + // CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 +} + +uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { + // CHECK: test_vshrn_high_n_u16 + return vshrn_high_n_u16(a, b, 3); + // CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 +} + +uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { + // CHECK: test_vshrn_high_n_u32 + return vshrn_high_n_u32(a, b, 9); + // CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 +} + +uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { + // CHECK: test_vshrn_high_n_u64 + return vshrn_high_n_u64(a, b, 19); + // CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 +} + +int8x8_t test_vqshrun_n_s16(int16x8_t a) { + // CHECK: test_vqshrun_n_s16 + return vqshrun_n_s16(a, 3); + // CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 +} + +int16x4_t test_vqshrun_n_s32(int32x4_t a) { + // CHECK: test_vqshrun_n_s32 + return vqshrun_n_s32(a, 9); + // CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 +} + +int32x2_t test_vqshrun_n_s64(int64x2_t a) { + // CHECK: test_vqshrun_n_s64 + return vqshrun_n_s64(a, 19); + // CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 +} + +int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) { + // CHECK: test_vqshrun_high_n_s16 + return vqshrun_high_n_s16(a, b, 3); + // CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 +} + +int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) { + // CHECK: test_vqshrun_high_n_s32 + return vqshrun_high_n_s32(a, b, 9); + // CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 +} + +int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) { + // CHECK: test_vqshrun_high_n_s64 + return vqshrun_high_n_s64(a, b, 19); + // CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 +} + +int8x8_t test_vrshrn_n_s16(int16x8_t a) { + // CHECK: test_vrshrn_n_s16 + return vrshrn_n_s16(a, 3); + // CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 +} + +int16x4_t test_vrshrn_n_s32(int32x4_t a) { + // CHECK: test_vrshrn_n_s32 + return vrshrn_n_s32(a, 9); + // CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 +} + +int32x2_t test_vrshrn_n_s64(int64x2_t a) { + // CHECK: test_vrshrn_n_s64 + return vrshrn_n_s64(a, 19); + // CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 +} + +uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { + // CHECK: test_vrshrn_n_u16 + return vrshrn_n_u16(a, 3); + // CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 +} + +uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { + // CHECK: test_vrshrn_n_u32 + return vrshrn_n_u32(a, 9); + // CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 +} + +uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { + // CHECK: test_vrshrn_n_u64 + return vrshrn_n_u64(a, 19); + // CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 +} + +int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) { + // CHECK: test_vrshrn_high_n_s16 + return vrshrn_high_n_s16(a, b, 3); + // CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 +} + +int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) { + // CHECK: test_vrshrn_high_n_s32 + return vrshrn_high_n_s32(a, b, 9); + // CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 +} + +int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) { + // CHECK: test_vrshrn_high_n_s64 + return vrshrn_high_n_s64(a, b, 19); + // CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 +} + +uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { + // CHECK: test_vrshrn_high_n_u16 + return vrshrn_high_n_u16(a, b, 3); + // CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 +} + +uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { + // CHECK: test_vrshrn_high_n_u32 + return vrshrn_high_n_u32(a, b, 9); + // CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 +} + +uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { + // CHECK: test_vrshrn_high_n_u64 + return vrshrn_high_n_u64(a, b, 19); + // CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 +} + +int8x8_t test_vqrshrun_n_s16(int16x8_t a) { + // CHECK: test_vqrshrun_n_s16 + return vqrshrun_n_s16(a, 3); + // CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 +} + +int16x4_t test_vqrshrun_n_s32(int32x4_t a) { + // CHECK: test_vqrshrun_n_s32 + return vqrshrun_n_s32(a, 9); + // CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 +} + +int32x2_t test_vqrshrun_n_s64(int64x2_t a) { + // CHECK: test_vqrshrun_n_s64 + return vqrshrun_n_s64(a, 19); + // CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 +} + +int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) { + // CHECK: test_vqrshrun_high_n_s16 + return vqrshrun_high_n_s16(a, b, 3); + // CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 +} + +int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) { + // CHECK: test_vqrshrun_high_n_s32 + return vqrshrun_high_n_s32(a, b, 9); + // CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 +} + +int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) { + // CHECK: test_vqrshrun_high_n_s64 + return vqrshrun_high_n_s64(a, b, 19); + // CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 +} + +int8x8_t test_vqshrn_n_s16(int16x8_t a) { + // CHECK: test_vqshrn_n_s16 + return vqshrn_n_s16(a, 3); + // CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 +} + +int16x4_t test_vqshrn_n_s32(int32x4_t a) { + // CHECK: test_vqshrn_n_s32 + return vqshrn_n_s32(a, 9); + // CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 +} + +int32x2_t test_vqshrn_n_s64(int64x2_t a) { + // CHECK: test_vqshrn_n_s64 + return vqshrn_n_s64(a, 19); + // CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 +} + +uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { + // CHECK: test_vqshrn_n_u16 + return vqshrn_n_u16(a, 3); + // CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 +} + +uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { + // CHECK: test_vqshrn_n_u32 + return vqshrn_n_u32(a, 9); + // CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 +} + +uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { + // CHECK: test_vqshrn_n_u64 + return vqshrn_n_u64(a, 19); + // CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 +} + +int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) { + // CHECK: test_vqshrn_high_n_s16 + return vqshrn_high_n_s16(a, b, 3); + // CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 +} + +int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) { + // CHECK: test_vqshrn_high_n_s32 + return vqshrn_high_n_s32(a, b, 9); + // CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 +} + +int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) { + // CHECK: test_vqshrn_high_n_s64 + return vqshrn_high_n_s64(a, b, 19); + // CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 +} + +uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { + // CHECK: test_vqshrn_high_n_u16 + return vqshrn_high_n_u16(a, b, 3); + // CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 +} + +uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { + // CHECK: test_vqshrn_high_n_u32 + return vqshrn_high_n_u32(a, b, 9); + // CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 +} + +uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { + // CHECK: test_vqshrn_high_n_u64 + return vqshrn_high_n_u64(a, b, 19); + // CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 +} + +int8x8_t test_vqrshrn_n_s16(int16x8_t a) { + // CHECK: test_vqrshrn_n_s16 + return vqrshrn_n_s16(a, 3); + // CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 +} + +int16x4_t test_vqrshrn_n_s32(int32x4_t a) { + // CHECK: test_vqrshrn_n_s32 + return vqrshrn_n_s32(a, 9); + // CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 +} + +int32x2_t test_vqrshrn_n_s64(int64x2_t a) { + // CHECK: test_vqrshrn_n_s64 + return vqrshrn_n_s64(a, 19); + // CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 +} + +uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { + // CHECK: test_vqrshrn_n_u16 + return vqrshrn_n_u16(a, 3); + // CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 +} + +uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { + // CHECK: test_vqrshrn_n_u32 + return vqrshrn_n_u32(a, 9); + // CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 +} + +uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { + // CHECK: test_vqrshrn_n_u64 + return vqrshrn_n_u64(a, 19); + // CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 +} + +int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) { + // CHECK: test_vqrshrn_high_n_s16 + return vqrshrn_high_n_s16(a, b, 3); + // CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 +} + +int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) { + // CHECK: test_vqrshrn_high_n_s32 + return vqrshrn_high_n_s32(a, b, 9); + // CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 +} + +int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) { + // CHECK: test_vqrshrn_high_n_s64 + return vqrshrn_high_n_s64(a, b, 19); + // CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 +} + +uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { + // CHECK: test_vqrshrn_high_n_u16 + return vqrshrn_high_n_u16(a, b, 3); + // CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 +} + +uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { + // CHECK: test_vqrshrn_high_n_u32 + return vqrshrn_high_n_u32(a, b, 9); + // CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 +} + +uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { + // CHECK: test_vqrshrn_high_n_u64 + return vqrshrn_high_n_u64(a, b, 19); + // CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 +} + int16x8_t test_vshll_n_s8(int8x8_t a) { // CHECK: test_vshll_n_s8 return vshll_n_s8(a, 3); @@ -3249,3 +4203,74 @@ uint64x2_t test_vmovl_high_u32(uint32x4_t a) { // CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0 } +float32x2_t test_vcvt_n_f32_s32(int32x2_t a) { + // CHECK: test_vcvt_n_f32_s32 + return vcvt_n_f32_s32(a, 31); + // CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 +} + +float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { + // CHECK: test_vcvtq_n_f32_s32 + return vcvtq_n_f32_s32(a, 31); + // CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 +} + +float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) { + // CHECK: test_vcvtq_n_f64_s64 + return vcvtq_n_f64_s64(a, 50); + // CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 +} + +float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) { + // CHECK: test_vcvt_n_f32_u32 + return vcvt_n_f32_u32(a, 31); + // CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 +} + +float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { + // CHECK: test_vcvtq_n_f32_u32 + return vcvtq_n_f32_u32(a, 31); + // CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 +} + +float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) { + // CHECK: test_vcvtq_n_f64_u64 + return vcvtq_n_f64_u64(a, 50); + // CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 +} + +int32x2_t test_vcvt_n_s32_f32(float32x2_t a) { + // CHECK: test_vcvt_n_s32_f32 + return vcvt_n_s32_f32(a, 31); + // CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 +} + +int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { + // CHECK: test_vcvtq_n_s32_f32 + return vcvtq_n_s32_f32(a, 31); + // CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 +} + +int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) { + // CHECK: test_vcvtq_n_s64_f64 + return vcvtq_n_s64_f64(a, 50); + // CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 +} + +uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) { + // CHECK: test_vcvt_n_u32_f32 + return vcvt_n_u32_f32(a, 31); + // CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 +} + +uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { + // CHECK: test_vcvt_n_u32_f32 + return vcvtq_n_u32_f32(a, 31); + // CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 +} + +uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) { + // CHECK: test_vcvtq_n_u64_f64 + return vcvtq_n_u64_f64(a, 50); + // CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 +} diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 1e43032da5..d8f203d3df 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -91,7 +91,10 @@ enum OpKind { OpAbdl, OpAba, OpAbal, - OpDiv + OpDiv, + OpLongHi, + OpNarrowHi, + OpMovlHi }; enum ClassKind { @@ -208,6 +211,9 @@ public: OpMap["OP_ABA"] = OpAba; OpMap["OP_ABAL"] = OpAbal; OpMap["OP_DIV"] = OpDiv; + OpMap["OP_LONG_HI"] = OpLongHi; + OpMap["OP_NARROW_HI"] = OpNarrowHi; + OpMap["OP_MOVL_HI"] = OpMovlHi; Record *SI = R.getClass("SInst"); Record *II = R.getClass("IInst"); @@ -374,6 +380,8 @@ static char ModType(const char mod, char type, bool &quad, bool &poly, poly = false; if (type == 'f') type = 'i'; + if (type == 'd') + type = 'l'; break; case 'f': if (type == 'h') @@ -422,6 +430,10 @@ static char ModType(const char mod, char type, bool &quad, bool &poly, type = Narrow(type); usgn = true; break; + case 'm': + type = Narrow(type); + quad = false; + break; default: break; } @@ -1305,6 +1317,15 @@ static std::string SplatLane(unsigned nElts, const std::string &vec, return s; } +static std::string RemoveHigh(const std::string &name) { + std::string s = name; + std::size_t found = s.find("_high_"); + if (found == std::string::npos) + PrintFatalError("name should contain \"_high_\" for high intrinsics"); + s.replace(found, 5, ""); + return s; +} + static unsigned GetNumElements(StringRef typestr, bool &quad) { quad = false; bool dummy = false; @@ -1328,8 +1349,8 @@ static unsigned GetNumElements(StringRef typestr, bool &quad) { } // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. -static std::string GenOpString(OpKind op, const std::string &proto, - StringRef typestr) { +static std::string GenOpString(const std::string &name, OpKind op, + const std::string &proto, StringRef typestr) { bool quad; unsigned nElts = GetNumElements(typestr, quad); bool define = UseMacro(proto); @@ -1559,6 +1580,27 @@ static std::string GenOpString(OpKind op, const std::string &proto, case OpDiv: s += "__a / __b;"; break; + case OpMovlHi: { + s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + + MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; + s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); + s += "(__a1, 0);"; + break; + } + case OpLongHi: { + // Another local variable __a1 is needed for calling a Macro, + // or using __a will have naming conflict when Macro expanding. + s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + + MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; + s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + + "(__a1, __b);"; + break; + } + case OpNarrowHi: { + s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + + MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; + break; + } default: PrintFatalError("unknown OpKind!"); } @@ -1796,7 +1838,7 @@ static std::string GenIntrinsic(const std::string &name, s += " {\n "; if (kind != OpNone) - s += GenOpString(kind, proto, outTypeStr); + s += GenOpString(name, kind, proto, outTypeStr); else s += GenBuiltin(name, proto, outTypeStr, classKind); if (define) @@ -2124,9 +2166,15 @@ NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, if (R->getValueAsBit("isVCVT_N")) { // VCVT between floating- and fixed-point values takes an immediate - // in the range 1 to 32. + // in the range [1, 32] for f32, or [1, 64] for f64. ck = ClassB; - rangestr = "l = 1; u = 31"; // upper bound = l + u + if (name.find("32") != std::string::npos) + rangestr = "l = 1; u = 31"; // upper bound = l + u + else if (name.find("64") != std::string::npos) + rangestr = "l = 1; u = 63"; + else + PrintFatalError(R->getLoc(), + "Fixed point convert name should contains \"32\" or \"64\""); } else if (Proto.find('s') == std::string::npos) { // Builtins which are overloaded by type will need to have their upper // bound computed at Sema time based on the type constant.