From: Jiangning Liu Date: Wed, 6 Nov 2013 03:35:53 +0000 (+0000) Subject: Implement AArch64 Neon instruction set Perm. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bf3bc5d37eff88b578d5990f507a87db33f30a6e;p=clang Implement AArch64 Neon instruction set Perm. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194124 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index f640442a93..fa97e82766 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -59,6 +59,12 @@ def OP_QDMULH_LN : Op; def OP_QRDMULH_LN : Op; def OP_FMS_LN : Op; def OP_FMS_LNQ : Op; +def OP_TRN1 : Op; +def OP_ZIP1 : Op; +def OP_UZP1 : Op; +def OP_TRN2 : Op; +def OP_ZIP2 : Op; +def OP_UZP2 : Op; def OP_EQ : Op; def OP_GE : Op; def OP_LE : Op; @@ -792,6 +798,21 @@ def SHA256H : SInst<"vsha256h", "dddd", "QUi">; def SHA256H2 : SInst<"vsha256h2", "dddd", "QUi">; def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">; +//////////////////////////////////////////////////////////////////////////////// +// Permutation +def VTRN1 : SOpInst<"vtrn1", "ddd", + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN1>; +def VZIP1 : SOpInst<"vzip1", "ddd", + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP1>; +def VUZP1 : SOpInst<"vuzp1", "ddd", + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP1>; +def VTRN2 : SOpInst<"vtrn2", "ddd", + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN2>; +def VZIP2 : SOpInst<"vzip2", "ddd", + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP2>; +def VUZP2 : SOpInst<"vuzp2", "ddd", + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP2>; + //////////////////////////////////////////////////////////////////////////////// // Scalar Arithmetic diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 8ca667df3b..f892d12372 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2497,6 +2497,18 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // AArch64 builtins mapping to legacy ARM v7 builtins. // FIXME: the mapped builtins listed correspond to what has been tested // in aarch64-neon-intrinsics.c so far. + case AArch64::BI__builtin_neon_vuzp_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzp_v, E); + case AArch64::BI__builtin_neon_vuzpq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzpq_v, E); + case AArch64::BI__builtin_neon_vzip_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzip_v, E); + case AArch64::BI__builtin_neon_vzipq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzipq_v, E); + case AArch64::BI__builtin_neon_vtrn_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrn_v, E); + case AArch64::BI__builtin_neon_vtrnq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrnq_v, E); case AArch64::BI__builtin_neon_vext_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vext_v, E); case AArch64::BI__builtin_neon_vextq_v: diff --git a/test/CodeGen/aarch64-neon-perm.c b/test/CodeGen/aarch64-neon-perm.c new file mode 100644 index 0000000000..903570b995 --- /dev/null +++ b/test/CodeGen/aarch64-neon-perm.c @@ -0,0 +1,1093 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s + +// Test new aarch64 intrinsics and types + +#include + +int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vuzp1_s8 + return vuzp1_s8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vuzp1q_s8 + return vuzp1q_s8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vuzp1_s16 + return vuzp1_s16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vuzp1q_s16 + return vuzp1q_s16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vuzp1_s32 + return vuzp1_s32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vuzp1q_s32 + return vuzp1q_s32(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vuzp1q_s64 + return vuzp1q_s64(a, b); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vuzp1_u8 + return vuzp1_u8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vuzp1q_u8 + return vuzp1q_u8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vuzp1_u16 + return vuzp1_u16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vuzp1q_u16 + return vuzp1q_u16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vuzp1_u32 + return vuzp1_u32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vuzp1q_u32 + return vuzp1q_u32(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vuzp1q_u64 + return vuzp1q_u64(a, b); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) { + // CHECK: test_vuzp1_f32 + return vuzp1_f32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) { + // CHECK: test_vuzp1q_f32 + return vuzp1q_f32(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) { + // CHECK: test_vuzp1q_f64 + return vuzp1q_f64(a, b); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vuzp1_p8 + return vuzp1_p8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vuzp1q_p8 + return vuzp1q_p8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vuzp1_p16 + return vuzp1_p16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vuzp1q_p16 + return vuzp1q_p16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vuzp2_s8 + return vuzp2_s8(a, b); + // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vuzp2q_s8 + return vuzp2q_s8(a, b); + // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vuzp2_s16 + return vuzp2_s16(a, b); + // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vuzp2q_s16 + return vuzp2q_s16(a, b); + // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vuzp2_s32 + return vuzp2_s32(a, b); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vuzp2q_s32 + return vuzp2q_s32(a, b); + // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vuzp2q_s64 + return vuzp2q_s64(a, b); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +} + +uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vuzp2_u8 + return vuzp2_u8(a, b); + // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vuzp2q_u8 + return vuzp2q_u8(a, b); + // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vuzp2_u16 + return vuzp2_u16(a, b); + // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vuzp2q_u16 + return vuzp2q_u16(a, b); + // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vuzp2_u32 + return vuzp2_u32(a, b); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vuzp2q_u32 + return vuzp2q_u32(a, b); + // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vuzp2q_u64 + return vuzp2q_u64(a, b); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +} + +float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) { + // CHECK: test_vuzp2_f32 + return vuzp2_f32(a, b); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) { + // CHECK: test_vuzp2q_f32 + return vuzp2q_f32(a, b); + // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) { + // CHECK: test_vuzp2q_f64 + return vuzp2q_f64(a, b); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +} + +poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vuzp2_p8 + return vuzp2_p8(a, b); + // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vuzp2q_p8 + return vuzp2q_p8(a, b); + // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vuzp2_p16 + return vuzp2_p16(a, b); + // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vuzp2q_p16 + return vuzp2q_p16(a, b); + // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vzip1_s8 + return vzip1_s8(a, b); + // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vzip1q_s8 + return vzip1q_s8(a, b); + // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vzip1_s16 + return vzip1_s16(a, b); + // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vzip1q_s16 + return vzip1q_s16(a, b); + // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vzip1_s32 + return vzip1_s32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vzip1q_s32 + return vzip1q_s32(a, b); + // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vzip1q_s64 + return vzip1q_s64(a, b); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vzip1_u8 + return vzip1_u8(a, b); + // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vzip1q_u8 + return vzip1q_u8(a, b); + // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vzip1_u16 + return vzip1_u16(a, b); + // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vzip1q_u16 + return vzip1q_u16(a, b); + // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vzip1_u32 + return vzip1_u32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vzip1q_u32 + return vzip1q_u32(a, b); + // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vzip1q_u64 + return vzip1q_u64(a, b); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) { + // CHECK: test_vzip1_f32 + return vzip1_f32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) { + // CHECK: test_vzip1q_f32 + return vzip1q_f32(a, b); + // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) { + // CHECK: test_vzip1q_f64 + return vzip1q_f64(a, b); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vzip1_p8 + return vzip1_p8(a, b); + // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vzip1q_p8 + return vzip1q_p8(a, b); + // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vzip1_p16 + return vzip1_p16(a, b); + // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vzip1q_p16 + return vzip1q_p16(a, b); + // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vzip2_s8 + return vzip2_s8(a, b); + // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vzip2q_s8 + return vzip2q_s8(a, b); + // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vzip2_s16 + return vzip2_s16(a, b); + // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vzip2q_s16 + return vzip2q_s16(a, b); + // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vzip2_s32 + return vzip2_s32(a, b); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vzip2q_s32 + return vzip2q_s32(a, b); + // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vzip2q_s64 + return vzip2q_s64(a, b); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +} + +uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vzip2_u8 + return vzip2_u8(a, b); + // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vzip2q_u8 + return vzip2q_u8(a, b); + // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vzip2_u16 + return vzip2_u16(a, b); + // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vzip2q_u16 + return vzip2q_u16(a, b); + // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vzip2_u32 + return vzip2_u32(a, b); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vzip2q_u32 + return vzip2q_u32(a, b); + // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vzip2q_u64 + return vzip2q_u64(a, b); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +} + +float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) { + // CHECK: test_vzip2_f32 + return vzip2_f32(a, b); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) { + // CHECK: test_vzip2q_f32 + return vzip2q_f32(a, b); + // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) { + // CHECK: test_vzip2q_f64 + return vzip2q_f64(a, b); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +} + +poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vzip2_p8 + return vzip2_p8(a, b); + // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vzip2q_p8 + return vzip2q_p8(a, b); + // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vzip2_p16 + return vzip2_p16(a, b); + // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vzip2q_p16 + return vzip2q_p16(a, b); + // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vtrn1_s8 + return vtrn1_s8(a, b); + // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vtrn1q_s8 + return vtrn1q_s8(a, b); + // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vtrn1_s16 + return vtrn1_s16(a, b); + // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vtrn1q_s16 + return vtrn1q_s16(a, b); + // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vtrn1_s32 + return vtrn1_s32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vtrn1q_s32 + return vtrn1q_s32(a, b); + // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vtrn1q_s64 + return vtrn1q_s64(a, b); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vtrn1_u8 + return vtrn1_u8(a, b); + // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vtrn1q_u8 + return vtrn1q_u8(a, b); + // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vtrn1_u16 + return vtrn1_u16(a, b); + // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vtrn1q_u16 + return vtrn1q_u16(a, b); + // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vtrn1_u32 + return vtrn1_u32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vtrn1q_u32 + return vtrn1q_u32(a, b); + // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vtrn1q_u64 + return vtrn1q_u64(a, b); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) { + // CHECK: test_vtrn1_f32 + return vtrn1_f32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) { + // CHECK: test_vtrn1q_f32 + return vtrn1q_f32(a, b); + // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) { + // CHECK: test_vtrn1q_f64 + return vtrn1q_f64(a, b); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vtrn1_p8 + return vtrn1_p8(a, b); + // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vtrn1q_p8 + return vtrn1q_p8(a, b); + // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vtrn1_p16 + return vtrn1_p16(a, b); + // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vtrn1q_p16 + return vtrn1q_p16(a, b); + // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vtrn2_s8 + return vtrn2_s8(a, b); + // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vtrn2q_s8 + return vtrn2q_s8(a, b); + // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vtrn2_s16 + return vtrn2_s16(a, b); + // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vtrn2q_s16 + return vtrn2q_s16(a, b); + // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vtrn2_s32 + return vtrn2_s32(a, b); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vtrn2q_s32 + return vtrn2q_s32(a, b); + // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vtrn2q_s64 + return vtrn2q_s64(a, b); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +} + +uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vtrn2_u8 + return vtrn2_u8(a, b); + // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vtrn2q_u8 + return vtrn2q_u8(a, b); + // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vtrn2_u16 + return vtrn2_u16(a, b); + // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vtrn2q_u16 + return vtrn2q_u16(a, b); + // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vtrn2_u32 + return vtrn2_u32(a, b); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vtrn2q_u32 + return vtrn2q_u32(a, b); + // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vtrn2q_u64 + return vtrn2q_u64(a, b); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +} + +float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) { + // CHECK: test_vtrn2_f32 + return vtrn2_f32(a, b); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) { + // CHECK: test_vtrn2q_f32 + return vtrn2q_f32(a, b); + // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) { + // CHECK: test_vtrn2q_f64 + return vtrn2q_f64(a, b); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +} + +poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vtrn2_p8 + return vtrn2_p8(a, b); + // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vtrn2q_p8 + return vtrn2q_p8(a, b); + // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vtrn2_p16 + return vtrn2_p16(a, b); + // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vtrn2q_p16 + return vtrn2q_p16(a, b); + // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vuzp_s8 + return vuzp_s8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vuzp_s16 + return vuzp_s16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} +int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vuzp_s32 + return vuzp_s32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} +uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vuzp_u8 + return vuzp_u8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} +uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vuzp_u16 + return vuzp_u16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} +uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vuzp_u32 + return vuzp_u32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} +float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { + // CHECK: test_vuzp_f32 + return vuzp_f32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} +poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vuzp_p8 + return vuzp_p8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} +poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vuzp_p16 + return vuzp_p16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} +int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vuzpq_s8 + return vuzpq_s8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} +int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vuzpq_s16 + return vuzpq_s16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} +int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vuzpq_s32 + return vuzpq_s32(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vuzpq_u8 + return vuzpq_u8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} +uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vuzpq_u16 + return vuzpq_u16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} +uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vuzpq_u32 + return vuzpq_u32(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { + // CHECK: test_vuzpq_f32 + return vuzpq_f32(a, b); + // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vuzpq_p8 + return vuzpq_p8(a, b); + // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} +poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vuzpq_p16 + return vuzpq_p16(a, b); + // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vzip_s8 + return vzip_s8(a, b); + // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vzip_s16 + return vzip_s16(a, b); + // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} +int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vzip_s32 + return vzip_s32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} +uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vzip_u8 + return vzip_u8(a, b); + // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} +uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vzip_u16 + return vzip_u16(a, b); + // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} +uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vzip_u32 + return vzip_u32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} +float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { + // CHECK: test_vzip_f32 + return vzip_f32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} +poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vzip_p8 + return vzip_p8(a, b); + // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} +poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vzip_p16 + return vzip_p16(a, b); + // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} +int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vzipq_s8 + return vzipq_s8(a, b); + // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} +int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vzipq_s16 + return vzipq_s16(a, b); + // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} +int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vzipq_s32 + return vzipq_s32(a, b); + // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vzipq_u8 + return vzipq_u8(a, b); + // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} +uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vzipq_u16 + return vzipq_u16(a, b); + // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} +uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vzipq_u32 + return vzipq_u32(a, b); + // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { + // CHECK: test_vzipq_f32 + return vzipq_f32(a, b); + // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vzipq_p8 + return vzipq_p8(a, b); + // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} +poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vzipq_p16 + return vzipq_p16(a, b); + // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vtrn_s8 + return vtrn_s8(a, b); + // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vtrn_s16 + return vtrn_s16(a, b); + // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} +int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vtrn_s32 + return vtrn_s32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} +uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vtrn_u8 + return vtrn_u8(a, b); + // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} +uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vtrn_u16 + return vtrn_u16(a, b); + // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} +uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vtrn_u32 + return vtrn_u32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} +float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { + // CHECK: test_vtrn_f32 + return vtrn_f32(a, b); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} +poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vtrn_p8 + return vtrn_p8(a, b); + // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} +poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vtrn_p16 + return vtrn_p16(a, b); + // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} +int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vtrnq_s8 + return vtrnq_s8(a, b); + // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} +int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vtrnq_s16 + return vtrnq_s16(a, b); + // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} +int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vtrnq_s32 + return vtrnq_s32(a, b); + // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vtrnq_u8 + return vtrnq_u8(a, b); + // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} +uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vtrnq_u16 + return vtrnq_u16(a, b); + // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} +uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vtrnq_u32 + return vtrnq_u32(a, b); + // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { + // CHECK: test_vtrnq_f32 + return vtrnq_f32(a, b); + // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vtrnq_p8 + return vtrnq_p8(a, b); + // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} +poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vtrnq_p16 + return vtrnq_p16(a, b); + // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 5e8590f241..95daa39cce 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -81,6 +81,12 @@ enum OpKind { OpQRDMulhLane, OpFMSLane, OpFMSLaneQ, + OpTrn1, + OpZip1, + OpUzp1, + OpTrn2, + OpZip2, + OpUzp2, OpEq, OpGe, OpLe, @@ -228,6 +234,12 @@ public: OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; OpMap["OP_FMS_LN"] = OpFMSLane; OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; + OpMap["OP_TRN1"] = OpTrn1; + OpMap["OP_ZIP1"] = OpZip1; + OpMap["OP_UZP1"] = OpUzp1; + OpMap["OP_TRN2"] = OpTrn2; + OpMap["OP_ZIP2"] = OpZip2; + OpMap["OP_UZP2"] = OpUzp2; OpMap["OP_EQ"] = OpEq; OpMap["OP_GE"] = OpGe; OpMap["OP_LE"] = OpLe; @@ -1776,6 +1788,42 @@ static std::string GenOpString(const std::string &name, OpKind op, s += ");"; break; } + case OpUzp1: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = 0; i < nElts; i++) + s += ", " + utostr(2*i); + s += ");"; + break; + case OpUzp2: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = 0; i < nElts; i++) + s += ", " + utostr(2*i+1); + s += ");"; + break; + case OpZip1: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = 0; i < (nElts/2); i++) + s += ", " + utostr(i) + ", " + utostr(i+nElts); + s += ");"; + break; + case OpZip2: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = nElts/2; i < nElts; i++) + s += ", " + utostr(i) + ", " + utostr(i+nElts); + s += ");"; + break; + case OpTrn1: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = 0; i < (nElts/2); i++) + s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts); + s += ");"; + break; + case OpTrn2: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = 0; i < (nElts/2); i++) + s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts); + s += ");"; + break; case OpAbdl: { std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; if (typestr[0] != 'U') {