From dd12780e86575795fa912529a911b01e2abc4677 Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Mon, 18 Nov 2013 06:33:43 +0000 Subject: [PATCH] Implement the newly added AArch64 ACLE functions for ld1/st1 with 2/3/4 vectors. The functions are like: vst1_s8_x2 ... git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194991 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 14 + lib/CodeGen/CGBuiltin.cpp | 86 ++ test/CodeGen/aarch64-neon-intrinsics.c | 1120 ++++++++++++++++++++++++ test/CodeGenCXX/aarch64-neon.cpp | 13 + utils/TableGen/NeonEmitter.cpp | 16 +- 5 files changed, 1248 insertions(+), 1 deletion(-) create mode 100644 test/CodeGenCXX/aarch64-neon.cpp diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index e2e3d290eb..c917940288 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -543,6 +543,20 @@ def ST3 : WInst<"vst3", "vp3", def ST4 : WInst<"vst4", "vp4", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; +def LD1_X2 : WInst<"vld1_x2", "2c", + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; +def LD3_x3 : WInst<"vld1_x3", "3c", + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; +def LD4_x4 : WInst<"vld1_x4", "4c", + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + +def ST1_X2 : WInst<"vst1_x2", "vp2", + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; +def ST1_X3 : WInst<"vst1_x3", "vp3", + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; +def ST1_X4 : WInst<"vst1_x4", "vp4", + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + //////////////////////////////////////////////////////////////////////////////// // Addition // With additional Qd type. diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index a292ffcee1..206c429a25 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2750,7 +2750,42 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } SmallVector Ops; + llvm::Value *Align = 0; // Alignment for load/store for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { + if (i == 0) { + switch (BuiltinID) { + case AArch64::BI__builtin_neon_vst1_x2_v: + case AArch64::BI__builtin_neon_vst1q_x2_v: + case AArch64::BI__builtin_neon_vst1_x3_v: + case AArch64::BI__builtin_neon_vst1q_x3_v: + case AArch64::BI__builtin_neon_vst1_x4_v: + case AArch64::BI__builtin_neon_vst1q_x4_v: + // Get the alignment for the argument in addition to the value; + // we'll use it later. + std::pair Src = + EmitPointerWithAlignment(E->getArg(0)); + Ops.push_back(Src.first); + Align = Builder.getInt32(Src.second); + continue; + } + } + if (i == 1) { + switch (BuiltinID) { + case AArch64::BI__builtin_neon_vld1_x2_v: + case AArch64::BI__builtin_neon_vld1q_x2_v: + case AArch64::BI__builtin_neon_vld1_x3_v: + case AArch64::BI__builtin_neon_vld1q_x3_v: + case AArch64::BI__builtin_neon_vld1_x4_v: + case AArch64::BI__builtin_neon_vld1q_x4_v: + // Get the alignment for the argument in addition to the value; + // we'll use it later. + std::pair Src = + EmitPointerWithAlignment(E->getArg(1)); + Ops.push_back(Src.first); + Align = Builder.getInt32(Src.second); + continue; + } + } Ops.push_back(EmitScalarExpr(E->getArg(i))); } @@ -3084,6 +3119,57 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E); case AArch64::BI__builtin_neon_vst4q_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E); + case AArch64::BI__builtin_neon_vld1_x2_v: + case AArch64::BI__builtin_neon_vld1q_x2_v: + case AArch64::BI__builtin_neon_vld1_x3_v: + case AArch64::BI__builtin_neon_vld1q_x3_v: + case AArch64::BI__builtin_neon_vld1_x4_v: + case AArch64::BI__builtin_neon_vld1q_x4_v: { + unsigned Int; + switch (BuiltinID) { + case AArch64::BI__builtin_neon_vld1_x2_v: + case AArch64::BI__builtin_neon_vld1q_x2_v: + Int = Intrinsic::aarch64_neon_vld1x2; + break; + case AArch64::BI__builtin_neon_vld1_x3_v: + case AArch64::BI__builtin_neon_vld1q_x3_v: + Int = Intrinsic::aarch64_neon_vld1x3; + break; + case AArch64::BI__builtin_neon_vld1_x4_v: + case AArch64::BI__builtin_neon_vld1q_x4_v: + Int = Intrinsic::aarch64_neon_vld1x4; + break; + } + Function *F = CGM.getIntrinsic(Int, Ty); + Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN"); + Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + return Builder.CreateStore(Ops[1], Ops[0]); + } + case AArch64::BI__builtin_neon_vst1_x2_v: + case AArch64::BI__builtin_neon_vst1q_x2_v: + case AArch64::BI__builtin_neon_vst1_x3_v: + case AArch64::BI__builtin_neon_vst1q_x3_v: + case AArch64::BI__builtin_neon_vst1_x4_v: + case AArch64::BI__builtin_neon_vst1q_x4_v: { + Ops.push_back(Align); + unsigned Int; + switch (BuiltinID) { + case AArch64::BI__builtin_neon_vst1_x2_v: + case AArch64::BI__builtin_neon_vst1q_x2_v: + Int = Intrinsic::aarch64_neon_vst1x2; + break; + case AArch64::BI__builtin_neon_vst1_x3_v: + case AArch64::BI__builtin_neon_vst1q_x3_v: + Int = Intrinsic::aarch64_neon_vst1x3; + break; + case AArch64::BI__builtin_neon_vst1_x4_v: + case AArch64::BI__builtin_neon_vst1q_x4_v: + Int = Intrinsic::aarch64_neon_vst1x4; + break; + } + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); + } // Crypto case AArch64::BI__builtin_neon_vaeseq_v: diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index dd96f45714..f0533cd9d4 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -7010,6 +7010,1126 @@ void test_vst4_p16(poly16_t *a, poly16x4x4_t b) { // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] } +uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) { + // CHECK-LABEL: test_vld1q_u8_x2 + return vld1q_u8_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) { + // CHECK-LABEL: test_vld1q_u16_x2 + return vld1q_u16_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) { + // CHECK-LABEL: test_vld1q_u32_x2 + return vld1q_u32_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) { + // CHECK-LABEL: test_vld1q_u64_x2 + return vld1q_u64_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +int8x16x2_t test_vld1q_s8_x2(int8_t const *a) { + // CHECK-LABEL: test_vld1q_s8_x2 + return vld1q_s8_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +int16x8x2_t test_vld1q_s16_x2(int16_t const *a) { + // CHECK-LABEL: test_vld1q_s16_x2 + return vld1q_s16_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +int32x4x2_t test_vld1q_s32_x2(int32_t const *a) { + // CHECK-LABEL: test_vld1q_s32_x2 + return vld1q_s32_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +int64x2x2_t test_vld1q_s64_x2(int64_t const *a) { + // CHECK-LABEL: test_vld1q_s64_x2 + return vld1q_s64_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +float16x8x2_t test_vld1q_f16_x2(float16_t const *a) { + // CHECK-LABEL: test_vld1q_f16_x2 + return vld1q_f16_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +float32x4x2_t test_vld1q_f32_x2(float32_t const *a) { + // CHECK-LABEL: test_vld1q_f32_x2 + return vld1q_f32_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +float64x2x2_t test_vld1q_f64_x2(float64_t const *a) { + // CHECK-LABEL: test_vld1q_f64_x2 + return vld1q_f64_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) { + // CHECK-LABEL: test_vld1q_p8_x2 + return vld1q_p8_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) { + // CHECK-LABEL: test_vld1q_p16_x2 + return vld1q_p16_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) { + // CHECK-LABEL: test_vld1q_p64_x2 + return vld1q_p64_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) { + // CHECK-LABEL: test_vld1_u8_x2 + return vld1_u8_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) { + // CHECK-LABEL: test_vld1_u16_x2 + return vld1_u16_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) { + // CHECK-LABEL: test_vld1_u32_x2 + return vld1_u32_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) { + // CHECK-LABEL: test_vld1_u64_x2 + return vld1_u64_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +int8x8x2_t test_vld1_s8_x2(int8_t const *a) { + // CHECK-LABEL: test_vld1_s8_x2 + return vld1_s8_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +int16x4x2_t test_vld1_s16_x2(int16_t const *a) { + // CHECK-LABEL: test_vld1_s16_x2 + return vld1_s16_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +int32x2x2_t test_vld1_s32_x2(int32_t const *a) { + // CHECK-LABEL: test_vld1_s32_x2 + return vld1_s32_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +int64x1x2_t test_vld1_s64_x2(int64_t const *a) { + // CHECK-LABEL: test_vld1_s64_x2 + return vld1_s64_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +float16x4x2_t test_vld1_f16_x2(float16_t const *a) { + // CHECK-LABEL: test_vld1_f16_x2 + return vld1_f16_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +float32x2x2_t test_vld1_f32_x2(float32_t const *a) { + // CHECK-LABEL: test_vld1_f32_x2 + return vld1_f32_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +float64x1x2_t test_vld1_f64_x2(float64_t const *a) { + // CHECK-LABEL: test_vld1_f64_x2 + return vld1_f64_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) { + // CHECK-LABEL: test_vld1_p8_x2 + return vld1_p8_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) { + // CHECK-LABEL: test_vld1_p16_x2 + return vld1_p16_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) { + // CHECK-LABEL: test_vld1_p64_x2 + return vld1_p64_x2(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) { + // CHECK-LABEL: test_vld1q_u8_x3 + return vld1q_u8_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, + // [{{x[0-9]+|sp}}] +} + +uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) { + // CHECK-LABEL: test_vld1q_u16_x3 + return vld1q_u16_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, + // [{{x[0-9]+|sp}}] +} + +uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) { + // CHECK-LABEL: test_vld1q_u32_x3 + return vld1q_u32_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, + // [{{x[0-9]+|sp}}] +} + +uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) { + // CHECK-LABEL: test_vld1q_u64_x3 + return vld1q_u64_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, + // [{{x[0-9]+|sp}}] +} + +int8x16x3_t test_vld1q_s8_x3(int8_t const *a) { + // CHECK-LABEL: test_vld1q_s8_x3 + return vld1q_s8_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, + // [{{x[0-9]+|sp}}] +} + +int16x8x3_t test_vld1q_s16_x3(int16_t const *a) { + // CHECK-LABEL: test_vld1q_s16_x3 + return vld1q_s16_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, + // [{{x[0-9]+|sp}}] +} + +int32x4x3_t test_vld1q_s32_x3(int32_t const *a) { + // CHECK-LABEL: test_vld1q_s32_x3 + return vld1q_s32_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, + // [{{x[0-9]+|sp}}] +} + +int64x2x3_t test_vld1q_s64_x3(int64_t const *a) { + // CHECK-LABEL: test_vld1q_s64_x3 + return vld1q_s64_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, + // [{{x[0-9]+|sp}}] +} + +float16x8x3_t test_vld1q_f16_x3(float16_t const *a) { + // CHECK-LABEL: test_vld1q_f16_x3 + return vld1q_f16_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, + // [{{x[0-9]+|sp}}] +} + +float32x4x3_t test_vld1q_f32_x3(float32_t const *a) { + // CHECK-LABEL: test_vld1q_f32_x3 + return vld1q_f32_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, + // [{{x[0-9]+|sp}}] +} + +float64x2x3_t test_vld1q_f64_x3(float64_t const *a) { + // CHECK-LABEL: test_vld1q_f64_x3 + return vld1q_f64_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, + // [{{x[0-9]+|sp}}] +} + +poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) { + // CHECK-LABEL: test_vld1q_p8_x3 + return vld1q_p8_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, + // [{{x[0-9]+|sp}}] +} + +poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) { + // CHECK-LABEL: test_vld1q_p16_x3 + return vld1q_p16_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, + // [{{x[0-9]+|sp}}] +} + +poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) { + // CHECK-LABEL: test_vld1q_p64_x3 + return vld1q_p64_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, + // [{{x[0-9]+|sp}}] +} + +uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) { + // CHECK-LABEL: test_vld1_u8_x3 + return vld1_u8_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, + // [{{x[0-9]+|sp}}] +} + +uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) { + // CHECK-LABEL: test_vld1_u16_x3 + return vld1_u16_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, + // [{{x[0-9]+|sp}}] +} + +uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) { + // CHECK-LABEL: test_vld1_u32_x3 + return vld1_u32_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, + // [{{x[0-9]+|sp}}] +} + +uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) { + // CHECK-LABEL: test_vld1_u64_x3 + return vld1_u64_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, + // [{{x[0-9]+|sp}}] +} + +int8x8x3_t test_vld1_s8_x3(int8_t const *a) { + // CHECK-LABEL: test_vld1_s8_x3 + return vld1_s8_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, + // [{{x[0-9]+|sp}}] +} + +int16x4x3_t test_vld1_s16_x3(int16_t const *a) { + // CHECK-LABEL: test_vld1_s16_x3 + return vld1_s16_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, + // [{{x[0-9]+|sp}}] +} + +int32x2x3_t test_vld1_s32_x3(int32_t const *a) { + // CHECK-LABEL: test_vld1_s32_x3 + return vld1_s32_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, + // [{{x[0-9]+|sp}}] +} + +int64x1x3_t test_vld1_s64_x3(int64_t const *a) { + // CHECK-LABEL: test_vld1_s64_x3 + return vld1_s64_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, + // [{{x[0-9]+|sp}}] +} + +float16x4x3_t test_vld1_f16_x3(float16_t const *a) { + // CHECK-LABEL: test_vld1_f16_x3 + return vld1_f16_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, + // [{{x[0-9]+|sp}}] +} + +float32x2x3_t test_vld1_f32_x3(float32_t const *a) { + // CHECK-LABEL: test_vld1_f32_x3 + return vld1_f32_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, + // [{{x[0-9]+|sp}}] +} + +float64x1x3_t test_vld1_f64_x3(float64_t const *a) { + // CHECK-LABEL: test_vld1_f64_x3 + return vld1_f64_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, + // [{{x[0-9]+|sp}}] +} + +poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) { + // CHECK-LABEL: test_vld1_p8_x3 + return vld1_p8_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, + // [{{x[0-9]+|sp}}] +} + +poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) { + // CHECK-LABEL: test_vld1_p16_x3 + return vld1_p16_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, + // [{{x[0-9]+|sp}}] +} + +poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) { + // CHECK-LABEL: test_vld1_p64_x3 + return vld1_p64_x3(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, + // [{{x[0-9]+|sp}}] +} + +uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) { + // CHECK-LABEL: test_vld1q_u8_x4 + return vld1q_u8_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, + // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) { + // CHECK-LABEL: test_vld1q_u16_x4 + return vld1q_u16_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, + // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) { + // CHECK-LABEL: test_vld1q_u32_x4 + return vld1q_u32_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, + // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) { + // CHECK-LABEL: test_vld1q_u64_x4 + return vld1q_u64_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, + // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +int8x16x4_t test_vld1q_s8_x4(int8_t const *a) { + // CHECK-LABEL: test_vld1q_s8_x4 + return vld1q_s8_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, + // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +int16x8x4_t test_vld1q_s16_x4(int16_t const *a) { + // CHECK-LABEL: test_vld1q_s16_x4 + return vld1q_s16_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, + // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +int32x4x4_t test_vld1q_s32_x4(int32_t const *a) { + // CHECK-LABEL: test_vld1q_s32_x4 + return vld1q_s32_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, + // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +int64x2x4_t test_vld1q_s64_x4(int64_t const *a) { + // CHECK-LABEL: test_vld1q_s64_x4 + return vld1q_s64_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, + // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +float16x8x4_t test_vld1q_f16_x4(float16_t const *a) { + // CHECK-LABEL: test_vld1q_f16_x4 + return vld1q_f16_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, + // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +float32x4x4_t test_vld1q_f32_x4(float32_t const *a) { + // CHECK-LABEL: test_vld1q_f32_x4 + return vld1q_f32_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, + // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +float64x2x4_t test_vld1q_f64_x4(float64_t const *a) { + // CHECK-LABEL: test_vld1q_f64_x4 + return vld1q_f64_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, + // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) { + // CHECK-LABEL: test_vld1q_p8_x4 + return vld1q_p8_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, + // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) { + // CHECK-LABEL: test_vld1q_p16_x4 + return vld1q_p16_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, + // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) { + // CHECK-LABEL: test_vld1q_p64_x4 + return vld1q_p64_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, + // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) { + // CHECK-LABEL: test_vld1_u8_x4 + return vld1_u8_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, + // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) { + // CHECK-LABEL: test_vld1_u16_x4 + return vld1_u16_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, + // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) { + // CHECK-LABEL: test_vld1_u32_x4 + return vld1_u32_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, + // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) { + // CHECK-LABEL: test_vld1_u64_x4 + return vld1_u64_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, + // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +int8x8x4_t test_vld1_s8_x4(int8_t const *a) { + // CHECK-LABEL: test_vld1_s8_x4 + return vld1_s8_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, + // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +int16x4x4_t test_vld1_s16_x4(int16_t const *a) { + // CHECK-LABEL: test_vld1_s16_x4 + return vld1_s16_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, + // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +int32x2x4_t test_vld1_s32_x4(int32_t const *a) { + // CHECK-LABEL: test_vld1_s32_x4 + return vld1_s32_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, + // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +int64x1x4_t test_vld1_s64_x4(int64_t const *a) { + // CHECK-LABEL: test_vld1_s64_x4 + return vld1_s64_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, + // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +float16x4x4_t test_vld1_f16_x4(float16_t const *a) { + // CHECK-LABEL: test_vld1_f16_x4 + return vld1_f16_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, + // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +float32x2x4_t test_vld1_f32_x4(float32_t const *a) { + // CHECK-LABEL: test_vld1_f32_x4 + return vld1_f32_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, + // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +float64x1x4_t test_vld1_f64_x4(float64_t const *a) { + // CHECK-LABEL: test_vld1_f64_x4 + return vld1_f64_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, + // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) { + // CHECK-LABEL: test_vld1_p8_x4 + return vld1_p8_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, + // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) { + // CHECK-LABEL: test_vld1_p16_x4 + return vld1_p16_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, + // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) { + // CHECK-LABEL: test_vld1_p64_x4 + return vld1_p64_x4(a); + // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, + // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) { + // CHECK: test_vst1q_u8_x2 + vst1q_u8_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) { + // CHECK: test_vst1q_u16_x2 + vst1q_u16_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) { + // CHECK: test_vst1q_u32_x2 + vst1q_u32_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) { + // CHECK: test_vst1q_u64_x2 + vst1q_u64_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) { + // CHECK: test_vst1q_s8_x2 + vst1q_s8_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) { + // CHECK: test_vst1q_s16_x2 + vst1q_s16_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) { + // CHECK: test_vst1q_s32_x2 + vst1q_s32_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) { + // CHECK: test_vst1q_s64_x2 + vst1q_s64_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) { + // CHECK: test_vst1q_f16_x2 + vst1q_f16_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) { + // CHECK: test_vst1q_f32_x2 + vst1q_f32_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) { + // CHECK: test_vst1q_f64_x2 + vst1q_f64_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) { + // CHECK: test_vst1q_p8_x2 + vst1q_p8_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) { + // CHECK: test_vst1q_p16_x2 + vst1q_p16_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) { + // CHECK: test_vst1q_p64_x2 + vst1q_p64_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) { + // CHECK: test_vst1_u8_x2 + vst1_u8_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) { + // CHECK: test_vst1_u16_x2 + vst1_u16_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) { + // CHECK: test_vst1_u32_x2 + vst1_u32_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) { + // CHECK: test_vst1_u64_x2 + vst1_u64_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) { + // CHECK: test_vst1_s8_x2 + vst1_s8_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) { + // CHECK: test_vst1_s16_x2 + vst1_s16_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) { + // CHECK: test_vst1_s32_x2 + vst1_s32_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) { + // CHECK: test_vst1_s64_x2 + vst1_s64_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) { + // CHECK: test_vst1_f16_x2 + vst1_f16_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) { + // CHECK: test_vst1_f32_x2 + vst1_f32_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) { + // CHECK: test_vst1_f64_x2 + vst1_f64_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) { + // CHECK: test_vst1_p8_x2 + vst1_p8_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) { + // CHECK: test_vst1_p16_x2 + vst1_p16_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) { + // CHECK: test_vst1_p64_x2 + vst1_p64_x2(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) { + // CHECK: test_vst1q_u8_x3 + vst1q_u8_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) { + // CHECK: test_vst1q_u16_x3 + vst1q_u16_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) { + // CHECK: test_vst1q_u32_x3 + vst1q_u32_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) { + // CHECK: test_vst1q_u64_x3 + vst1q_u64_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) { + // CHECK: test_vst1q_s8_x3 + vst1q_s8_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) { + // CHECK: test_vst1q_s16_x3 + vst1q_s16_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) { + // CHECK: test_vst1q_s32_x3 + vst1q_s32_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) { + // CHECK: test_vst1q_s64_x3 + vst1q_s64_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) { + // CHECK: test_vst1q_f16_x3 + vst1q_f16_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) { + // CHECK: test_vst1q_f32_x3 + vst1q_f32_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) { + // CHECK: test_vst1q_f64_x3 + vst1q_f64_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) { + // CHECK: test_vst1q_p8_x3 + vst1q_p8_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) { + // CHECK: test_vst1q_p16_x3 + vst1q_p16_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) { + // CHECK: test_vst1q_p64_x3 + vst1q_p64_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) { + // CHECK: test_vst1_u8_x3 + vst1_u8_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) { + // CHECK: test_vst1_u16_x3 + vst1_u16_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) { + // CHECK: test_vst1_u32_x3 + vst1_u32_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) { + // CHECK: test_vst1_u64_x3 + vst1_u64_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) { + // CHECK: test_vst1_s8_x3 + vst1_s8_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) { + // CHECK: test_vst1_s16_x3 + vst1_s16_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) { + // CHECK: test_vst1_s32_x3 + vst1_s32_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) { + // CHECK: test_vst1_s64_x3 + vst1_s64_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) { + // CHECK: test_vst1_f16_x3 + vst1_f16_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) { + // CHECK: test_vst1_f32_x3 + vst1_f32_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) { + // CHECK: test_vst1_f64_x3 + vst1_f64_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) { + // CHECK: test_vst1_p8_x3 + vst1_p8_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) { + // CHECK: test_vst1_p16_x3 + vst1_p16_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) { + // CHECK: test_vst1_p64_x3 + vst1_p64_x3(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, + // [{{x[0-9]+|sp}}] +} + +void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) { + // CHECK: test_vst1q_u8_x4 + vst1q_u8_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, + // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) { + // CHECK: test_vst1q_u16_x4 + vst1q_u16_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, + // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) { + // CHECK: test_vst1q_u32_x4 + vst1q_u32_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, + // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) { + // CHECK: test_vst1q_u64_x4 + vst1q_u64_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, + // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) { + // CHECK: test_vst1q_s8_x4 + vst1q_s8_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, + // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) { + // CHECK: test_vst1q_s16_x4 + vst1q_s16_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, + // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) { + // CHECK: test_vst1q_s32_x4 + vst1q_s32_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, + // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) { + // CHECK: test_vst1q_s64_x4 + vst1q_s64_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, + // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) { + // CHECK: test_vst1q_f16_x4 + vst1q_f16_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, + // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) { + // CHECK: test_vst1q_f32_x4 + vst1q_f32_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, + // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) { + // CHECK: test_vst1q_f64_x4 + vst1q_f64_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, + // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) { + // CHECK: test_vst1q_p8_x4 + vst1q_p8_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, + // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) { + // CHECK: test_vst1q_p16_x4 + vst1q_p16_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, + // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] +} + +void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) { + // CHECK: test_vst1q_p64_x4 + vst1q_p64_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, + // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +} + +void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) { + // CHECK: test_vst1_u8_x4 + vst1_u8_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, + // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) { + // CHECK: test_vst1_u16_x4 + vst1_u16_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, + // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) { + // CHECK: test_vst1_u32_x4 + vst1_u32_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, + // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) { + // CHECK: test_vst1_u64_x4 + vst1_u64_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, + // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) { + // CHECK: test_vst1_s8_x4 + vst1_s8_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, + // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) { + // CHECK: test_vst1_s16_x4 + vst1_s16_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, + // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) { + // CHECK: test_vst1_s32_x4 + vst1_s32_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, + // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) { + // CHECK: test_vst1_s64_x4 + vst1_s64_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, + // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) { + // CHECK: test_vst1_f16_x4 + vst1_f16_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, + // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) { + // CHECK: test_vst1_f32_x4 + vst1_f32_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, + // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +} + +void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) { + // CHECK: test_vst1_f64_x4 + vst1_f64_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, + // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + +void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) { + // CHECK: test_vst1_p8_x4 + vst1_p8_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, + // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] +} + +void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) { + // CHECK: test_vst1_p16_x4 + vst1_p16_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, + // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +} + +void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) { + // CHECK: test_vst1_p64_x4 + vst1_p64_x4(a, b); + // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, + // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} + int64_t test_vceqd_s64(int64_t a, int64_t b) { // CHECK: test_vceqd_s64 // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} diff --git a/test/CodeGenCXX/aarch64-neon.cpp b/test/CodeGenCXX/aarch64-neon.cpp new file mode 100644 index 0000000000..5d2a00bebe --- /dev/null +++ b/test/CodeGenCXX/aarch64-neon.cpp @@ -0,0 +1,13 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s + +// Test whether arm_neon.h can be used in .cpp file. + +#include "arm_neon.h" + +poly64x1_t test_vld1_p64(poly64_t const * ptr) { + // CHECK: test_vld1_p64 + return vld1_p64(ptr); + // CHECK: ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] +} diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 99d6ac255f..874a661c5d 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -881,6 +881,16 @@ static char Insert_BHSD_Suffix(StringRef typestr){ return 0; } +static bool endsWith_xN(std::string const &name) { + if (name.length() > 3) { + if (name.compare(name.length() - 3, 3, "_x2") == 0 || + name.compare(name.length() - 3, 3, "_x3") == 0 || + name.compare(name.length() - 3, 3, "_x4") == 0) + return true; + } + return false; +} + /// MangleName - Append a type or width suffix to a base neon function name, /// and insert a 'q' in the appropriate location if type string starts with 'Q'. /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. @@ -898,7 +908,11 @@ static std::string MangleName(const std::string &name, StringRef typestr, std::string s = name; if (typeCode.size() > 0) { - s += "_" + typeCode; + // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN. + if (endsWith_xN(s)) + s.insert(s.length() - 3, "_" + typeCode); + else + s += "_" + typeCode; } if (ck == ClassB) -- 2.40.0