From 2102a1dba460c99f97a0fdf1e5aee8c685e29ff8 Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Fri, 11 Oct 2013 02:34:30 +0000 Subject: [PATCH] Implemented aarch64 SIMD copy related ACLE intrinsic : vget_lane, vset_lane, vcopy_lane, vcreate, vdup_n, vdup_lane, vmov_n. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@192411 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 30 + lib/CodeGen/CGBuiltin.cpp | 28 + test/CodeGen/aarch64-neon-copy.c | 972 +++++++++++++++++++++++++++++++ utils/TableGen/NeonEmitter.cpp | 30 +- 4 files changed, 1055 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/aarch64-neon-copy.c diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 56a828d741..71627eea19 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -98,6 +98,7 @@ def OP_DIV : Op; def OP_LONG_HI : Op; def OP_NARROW_HI : Op; def OP_MOVL_HI : Op; +def OP_COPY : Op; class Inst { string Name = n; @@ -662,6 +663,35 @@ def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>; def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>; def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; +//////////////////////////////////////////////////////////////////////////////// +// Extract or insert element from vector +def GET_LANE : IInst<"vget_lane", "sdi", + "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPs">; +def SET_LANE : IInst<"vset_lane", "dsdi", + "csilPcPsUiUlUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPshfdQhQfQd">; +def COPY_LANE : IOpInst<"vcopy_lane", "ddidi", + "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPs", OP_COPY>; +def COPY_LANEQ : IOpInst<"vcopy_laneq", "ggidi", + "QcQsQiQlQUcQUsQUiQUlQPcQPs", OP_COPY>; + +//////////////////////////////////////////////////////////////////////////////// +// Set all lanes to same value +def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", + "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQd", + OP_DUP_LN>; +def VDUP_LANE2: WOpInst<"vdup_laneq", "dki", + "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQd", + OP_DUP_LN>; +def DUP_N : WOpInst<"vdup_n", "ds", + "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd", + OP_DUP>; +def MOV_N : WOpInst<"vmov_n", "ds", + "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd", + OP_DUP>; +//////////////////////////////////////////////////////////////////////////////// +//Initialize a vector from bit pattern +def CREATE : NoTestOpInst<"vcreate", "dl", "csihfdUcUsUiUlPcPsl", OP_CAST>; + //////////////////////////////////////////////////////////////////////////////// def VMLA_LANEQ : IOpInst<"vmla_laneq", "dddji", diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 97a3173ee9..a1320bb0eb 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2074,6 +2074,34 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { Ops.push_back(EmitScalarExpr(E->getArg(i))); } +// Some intrinsic isn't overloaded. + switch (BuiltinID) { + default: break; + case AArch64::BI__builtin_neon_vget_lane_i8: + case AArch64::BI__builtin_neon_vget_lane_i16: + case AArch64::BI__builtin_neon_vget_lane_i32: + case AArch64::BI__builtin_neon_vget_lane_i64: + case AArch64::BI__builtin_neon_vgetq_lane_i8: + case AArch64::BI__builtin_neon_vgetq_lane_i16: + case AArch64::BI__builtin_neon_vgetq_lane_i32: + case AArch64::BI__builtin_neon_vgetq_lane_i64: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E); + case AArch64::BI__builtin_neon_vset_lane_i8: + case AArch64::BI__builtin_neon_vset_lane_i16: + case AArch64::BI__builtin_neon_vset_lane_i32: + case AArch64::BI__builtin_neon_vset_lane_i64: + case AArch64::BI__builtin_neon_vset_lane_f16: + case AArch64::BI__builtin_neon_vset_lane_f32: + case AArch64::BI__builtin_neon_vset_lane_f64: + case AArch64::BI__builtin_neon_vsetq_lane_i8: + case AArch64::BI__builtin_neon_vsetq_lane_i16: + case AArch64::BI__builtin_neon_vsetq_lane_i32: + case AArch64::BI__builtin_neon_vsetq_lane_i64: + case AArch64::BI__builtin_neon_vsetq_lane_f16: + case AArch64::BI__builtin_neon_vsetq_lane_f32: + case AArch64::BI__builtin_neon_vsetq_lane_f64: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E); + } // Get the last argument, which specifies the vector type. llvm::APSInt Result; diff --git a/test/CodeGen/aarch64-neon-copy.c b/test/CodeGen/aarch64-neon-copy.c new file mode 100644 index 0000000000..752acb6071 --- /dev/null +++ b/test/CodeGen/aarch64-neon-copy.c @@ -0,0 +1,972 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s + +// Test new aarch64 intrinsics and types + +#include + +uint8x8_t test_vset_lane_u8(uint8_t v1, uint8x8_t v2) { + // CHECK: test_vset_lane_u8 + return vset_lane_u8(v1, v2, 6); + // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} +} + +uint16x4_t test_vset_lane_u16(uint16_t v1, uint16x4_t v2) { + // CHECK: test_vset_lane_u16 + return vset_lane_u16(v1, v2, 2); + // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} +} + +uint32x2_t test_vset_lane_u32(uint32_t v1, uint32x2_t v2) { + // CHECK: test_vset_lane_u32 + return vset_lane_u32(v1, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} +} +uint64x1_t test_vset_lane_u64(uint64_t v1, uint64x1_t v2) { + // CHECK: test_vset_lane_u64 + return vset_lane_u64(v1, v2, 0); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +int8x8_t test_vset_lane_s8(int8_t v1, int8x8_t v2) { + // CHECK: test_vset_lane_s8 + return vset_lane_s8(v1, v2, 6); + // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} +} + +int16x4_t test_vset_lane_s16(int16_t v1, int16x4_t v2) { + // CHECK: test_vset_lane_s16 + return vset_lane_s16(v1, v2, 2); + // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} +} + +int32x2_t test_vset_lane_s32(int32_t v1, int32x2_t v2) { + // CHECK: test_vset_lane_s32 + return vset_lane_s32(v1, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} +} + + int64x1_t test_vset_lane_s64(int64_t v1, int64x1_t v2) { + // CHECK: test_vset_lane_s64 + return vset_lane_s64(v1, v2, 0); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +uint8x16_t test_vsetq_lane_u8(uint8_t v1, uint8x16_t v2) { + // CHECK: test_vsetq_lane_u8 + return vsetq_lane_u8(v1, v2, 6); + // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} +} + +uint16x8_t test_vsetq_lane_u16(uint16_t v1, uint16x8_t v2) { + // CHECK: test_vsetq_lane_u16 + return vsetq_lane_u16(v1, v2, 2); + // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} +} + +uint32x4_t test_vsetq_lane_u32(uint32_t v1, uint32x4_t v2) { + // CHECK: test_vsetq_lane_u32 + return vsetq_lane_u32(v1, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} +} + + uint64x2_t test_vsetq_lane_u64(uint64_t v1, uint64x2_t v2) { + // CHECK: test_vsetq_lane_u64 + return vsetq_lane_u64(v1, v2, 1); + // CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} +} + +int8x16_t test_vsetq_lane_s8(int8_t v1, int8x16_t v2) { + // CHECK: test_vsetq_lane_s8 + return vsetq_lane_s8(v1, v2, 6); + // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} +} + +int16x8_t test_vsetq_lane_s16(int16_t v1, int16x8_t v2) { + // CHECK: test_vsetq_lane_s16 + return vsetq_lane_s16(v1, v2, 2); + // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} +} + +int32x4_t test_vsetq_lane_s32(int32_t v1, int32x4_t v2) { + // CHECK: test_vsetq_lane_s32 + return vsetq_lane_s32(v1, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} +} + +int64x2_t test_vsetq_lane_s64(int64_t v1, int64x2_t v2) { + // CHECK: test_vsetq_lane_s64 + return vsetq_lane_s64(v1, v2, 0); + // CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}} +} + +poly8x8_t test_vset_lane_p8(poly8_t v1, poly8x8_t v2) { + // CHECK: test_vset_lane_p8 + return vset_lane_p8(v1, v2, 6); + // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} +} + +poly16x4_t test_vset_lane_p16(poly16_t v1, poly16x4_t v2) { + // CHECK: test_vset_lane_p16 + return vset_lane_p16(v1, v2, 2); + // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} +} + +poly8x16_t test_vsetq_lane_p8(poly8_t v1, poly8x16_t v2) { + // CHECK: test_vsetq_lane_p8 + return vsetq_lane_p8(v1, v2, 6); + // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} +} + +poly16x8_t test_vsetq_lane_p16(poly16_t v1, poly16x8_t v2) { + // CHECK: test_vsetq_lane_p16 + return vsetq_lane_p16(v1, v2, 2); + // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} +} + +float32x2_t test_vset_lane_f32(float32_t v1, float32x2_t v2) { + // CHECK: test_vset_lane_f32 + return vset_lane_f32(v1, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +float32x4_t test_vsetq_lane_f32(float32_t v1, float32x4_t v2) { + // CHECK: test_vsetq_lane_f32 + return vsetq_lane_f32(v1, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +} + +float64x1_t test_vset_lane_f64(float64_t v1, float64x1_t v2) { + // CHECK: test_vset_lane_f64 + return vset_lane_f64(v1, v2, 0); + // CHECK: ret +} + +float64x2_t test_vsetq_lane_f64(float64_t v1, float64x2_t v2) { + // CHECK: test_vsetq_lane_f64 + return vsetq_lane_f64(v1, v2, 0); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] +} + +uint8_t test_vget_lane_u8(uint8x8_t v1) { + // CHECK: test_vget_lane_u8 + return vget_lane_u8(v1, 7); + // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7] +} + +uint16_t test_vget_lane_u16(uint16x4_t v1) { + // CHECK: test_vget_lane_u16 + return vget_lane_u16(v1, 3); + // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3] +} + +uint32_t test_vget_lane_u32(uint32x2_t v1) { + // CHECK: test_vget_lane_u32 + return vget_lane_u32(v1, 1); + // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1] +} + +uint64_t test_vget_lane_u64(uint64x1_t v1) { + // CHECK: test_vget_lane_u64 + return vget_lane_u64(v1, 0); + // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} +} + +uint8_t test_vgetq_lane_u8(uint8x16_t v1) { + // CHECK: test_vgetq_lane_u8 + return vgetq_lane_u8(v1, 15); + // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[15] +} + +uint16_t test_vgetq_lane_u16(uint16x8_t v1) { + // CHECK: test_vgetq_lane_u16 + return vgetq_lane_u16(v1, 6); + // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[6] +} + +uint32_t test_vgetq_lane_u32(uint32x4_t v1) { + // CHECK: test_vgetq_lane_u32 + return vgetq_lane_u32(v1, 2); + // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[2] +} + +uint64_t test_vgetq_lane_u64(uint64x2_t v1) { + // CHECK: test_vgetq_lane_u64 + return vgetq_lane_u64(v1, 1); + // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1] +} + +poly8_t test_vget_lane_p8(poly8x8_t v1) { + // CHECK: test_vget_lane_p8 + return vget_lane_p8(v1, 7); + // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7] +} + +poly16_t test_vget_lane_p16(poly16x4_t v1) { + // CHECK: test_vget_lane_p16 + return vget_lane_p16(v1, 3); + // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3] +} + +poly8_t test_vgetq_lane_p8(poly8x16_t v1) { + // CHECK: test_vgetq_lane_p8 + return vgetq_lane_p8(v1, 14); + // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[14] +} + +poly16_t test_vgetq_lane_p16(poly16x8_t v1) { + // CHECK: test_vgetq_lane_p16 + return vgetq_lane_p16(v1, 6); + // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[6] +} + +int32_t test_vget_lane_s8(int8x8_t v1) { + // CHECK: test_vget_lane_s8 + return vget_lane_s8(v1, 7)+1; + // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[7] +} + +int32_t test_vget_lane_s16(int16x4_t v1) { + // CHECK: test_vget_lane_s16 + return vget_lane_s16(v1, 3)+1; + // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[3] +} + +int64_t test_vget_lane_s32(int32x2_t v1) { + // CHECK: test_vget_lane_s32 + return vget_lane_s32(v1, 1); + // CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] +} + +int64_t test_vget_lane_s64(int64x1_t v1) { + // CHECK: test_vget_lane_s64 + return vget_lane_s64(v1, 0); + // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} +} + +int32_t test_vgetq_lane_s8(int8x16_t v1) { + // CHECK: test_vgetq_lane_s8 + return vgetq_lane_s8(v1, 15)+1; + // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[15] +} + +int32_t test_vgetq_lane_s16(int16x8_t v1) { + // CHECK: test_vgetq_lane_s16 + return vgetq_lane_s16(v1, 6)+1; + // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[6] +} + +int64_t test_vgetq_lane_s32(int32x4_t v1) { + // CHECK: test_vgetq_lane_s32 + return vgetq_lane_s32(v1, 2); + // CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] +} + +int64_t test_vgetq_lane_s64(int64x2_t v1) { + // CHECK: test_vgetq_lane_s64 + return vgetq_lane_s64(v1, 1); + // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1] +} + +int8x8_t test_vcopy_lane_s8(int8x8_t v1, int8x8_t v2) { + // CHECK: test_vcopy_lane_s8 + return vcopy_lane_s8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +int16x4_t test_vcopy_lane_s16(int16x4_t v1, int16x4_t v2) { + // CHECK: test_vcopy_lane_s16 + return vcopy_lane_s16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +int32x2_t test_vcopy_lane_s32(int32x2_t v1, int32x2_t v2) { + // CHECK: test_vcopy_lane_s32 + return vcopy_lane_s32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +int64x1_t test_vcopy_lane_s64(int64x1_t v1, int64x1_t v2) { + // CHECK: test_vcopy_lane_s64 + return vcopy_lane_s64(v1, 0, v2, 0); + // CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} +} + +int8x8_t test_vcreate_s8(uint64_t v1) { + // CHECK: test_vcreate_s8 + return vcreate_s8(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +int16x4_t test_vcreate_s16(uint64_t v1) { + // CHECK: test_vcreate_s16 + return vcreate_s16(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +int32x2_t test_vcreate_s32(uint64_t v1) { + // CHECK: test_vcreate_s32 + return vcreate_s32(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +int64x1_t test_vcreate_s64(uint64_t v1) { + // CHECK: test_vcreate_s64 + return vcreate_s64(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +uint8x8_t test_vcreate_u8(uint64_t v1) { + // CHECK: test_vcreate_u8 + return vcreate_u8(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +uint16x4_t test_vcreate_u16(uint64_t v1) { + // CHECK: test_vcreate_u16 + return vcreate_u16(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +uint32x2_t test_vcreate_u32(uint64_t v1) { + // CHECK: test_vcreate_u32 + return vcreate_u32(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +uint64x1_t test_vcreate_u64(uint64_t v1) { + // CHECK: test_vcreate_u64 + return vcreate_u64(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +poly8x8_t test_vcreate_p8(uint64_t v1) { + // CHECK: test_vcreate_p8 + return vcreate_p8(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +poly16x4_t test_vcreate_p16(uint64_t v1) { + // CHECK: test_vcreate_p16 + return vcreate_p16(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +float16x4_t test_vcreate_f16(uint64_t v1) { + // CHECK: test_vcreate_f16 + return vcreate_f16(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +float32x2_t test_vcreate_f32(uint64_t v1) { + // CHECK: test_vcreate_f32 + return vcreate_f32(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +float64x1_t test_vcreate_f64(uint64_t v1) { + // CHECK: test_vcreate_f64 + return vcreate_f64(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +uint8x8_t test_vdup_n_u8(uint8_t v1) { + // CHECK: test_vdup_n_u8 + return vdup_n_u8(v1); + // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} +} + +uint16x4_t test_vdup_n_u16(uint16_t v1) { + // CHECK: test_vdup_n_u16 + return vdup_n_u16(v1); + // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} +} + +uint32x2_t test_vdup_n_u32(uint32_t v1) { + // CHECK: test_vdup_n_u32 + return vdup_n_u32(v1); + // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} +} + +uint64x1_t test_vdup_n_u64(uint64_t v1) { + // CHECK: test_vdup_n_u64 + return vdup_n_u64(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +uint8x16_t test_vdupq_n_u8(uint8_t v1) { + // CHECK: test_vdupq_n_u8 + return vdupq_n_u8(v1); + // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} +} + +uint16x8_t test_vdupq_n_u16(uint16_t v1) { + // CHECK: test_vdupq_n_u16 + return vdupq_n_u16(v1); + // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} +} + +uint32x4_t test_vdupq_n_u32(uint32_t v1) { + // CHECK: test_vdupq_n_u32 + return vdupq_n_u32(v1); + // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} +} + +uint64x2_t test_vdupq_n_u64(uint64_t v1) { + // CHECK: test_vdupq_n_u64 + return vdupq_n_u64(v1); + // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} +} + +int8x8_t test_vdup_n_s8(int8_t v1) { + // CHECK: test_vdup_n_s8 + return vdup_n_s8(v1); + // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} +} + +int16x4_t test_vdup_n_s16(int16_t v1) { + // CHECK: test_vdup_n_s16 + return vdup_n_s16(v1); + // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} +} + +int32x2_t test_vdup_n_s32(int32_t v1) { + // CHECK: test_vdup_n_s32 + return vdup_n_s32(v1); + // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} +} + +int64x1_t test_vdup_n_s64(int64_t v1) { + // CHECK: test_vdup_n_s64 + return vdup_n_s64(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +int8x16_t test_vdupq_n_s8(int8_t v1) { + // CHECK: test_vdupq_n_s8 + return vdupq_n_s8(v1); + // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} +} + +int16x8_t test_vdupq_n_s16(int16_t v1) { + // CHECK: test_vdupq_n_s16 + return vdupq_n_s16(v1); + // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} +} + +int32x4_t test_vdupq_n_s32(int32_t v1) { + // CHECK: test_vdupq_n_s32 + return vdupq_n_s32(v1); + // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} +} + +int64x2_t test_vdupq_n_s64(int64_t v1) { + // CHECK: test_vdupq_n_s64 + return vdupq_n_s64(v1); + // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} +} + +poly8x8_t test_vdup_n_p8(poly8_t v1) { + // CHECK: test_vdup_n_p8 + return vdup_n_p8(v1); + // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} +} + +poly16x4_t test_vdup_n_p16(poly16_t v1) { + // CHECK: test_vdup_n_p16 + return vdup_n_p16(v1); + // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} +} + +poly8x16_t test_vdupq_n_p8(poly8_t v1) { + // CHECK: test_vdupq_n_p8 + return vdupq_n_p8(v1); + // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} +} + +poly16x8_t test_vdupq_n_p16(poly16_t v1) { + // CHECK: test_vdupq_n_p16 + return vdupq_n_p16(v1); + // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} +} + +float32x2_t test_vdup_n_f32(float32_t v1) { + // CHECK: test_vdup_n_f32 + return vdup_n_f32(v1); + // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float64x1_t test_vdup_n_f64(float64_t v1) { + // CHECK: test_vdup_n_f64 + return vdup_n_f64(v1); + // CHECK: ret +} + +float32x4_t test_vdupq_n_f32(float32_t v1) { + // CHECK: test_vdupq_n_f32 + return vdupq_n_f32(v1); + // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float64x2_t test_vdupq_n_f64(float64_t v1) { + // CHECK: test_vdupq_n_f64 + return vdupq_n_f64(v1); + // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +int8x8_t test_vdup_lane_s8(int8x8_t v1) { + // CHECK: test_vdup_lane_s8 + return vdup_lane_s8(v1, 5); + // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] +} + +int16x4_t test_vdup_lane_s16(int16x4_t v1) { + // CHECK: test_vdup_lane_s16 + return vdup_lane_s16(v1, 2); + // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] +} + +int32x2_t test_vdup_lane_s32(int32x2_t v1) { + // CHECK: test_vdup_lane_s32 + return vdup_lane_s32(v1, 1); + // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +} + +int64x1_t test_vdup_lane_s64(int64x1_t v1) { + // CHECK: test_vdup_lane_s64 + return vdup_lane_s64(v1, 0); + // CHECK: ret +} + +int8x16_t test_vdupq_lane_s8(int8x8_t v1) { + // CHECK: test_vdupq_lane_s8 + return vdupq_lane_s8(v1, 5); + // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] +} + +int16x8_t test_vdupq_lane_s16(int16x4_t v1) { + // CHECK: test_vdupq_lane_s16 + return vdupq_lane_s16(v1, 2); + // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] +} + +int32x4_t test_vdupq_lane_s32(int32x2_t v1) { + // CHECK: test_vdupq_lane_s32 + return vdupq_lane_s32(v1, 1); + // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +} + +int64x2_t test_vdupq_lane_s64(int64x1_t v1) { + // CHECK: test_vdupq_lane_s64 + return vdupq_lane_s64(v1, 0); + // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +uint8x8_t test_vdup_lane_u8(uint8x8_t v1) { + // CHECK: test_vdup_lane_u8 + return vdup_lane_u8(v1, 5); + // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] +} + +uint16x4_t test_vdup_lane_u16(uint16x4_t v1) { + // CHECK: test_vdup_lane_u16 + return vdup_lane_u16(v1, 2); + // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] +} + +uint32x2_t test_vdup_lane_u32(uint32x2_t v1) { + // CHECK: test_vdup_lane_u32 + return vdup_lane_u32(v1, 1); + // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +} + +uint64x1_t test_vdup_lane_u64(uint64x1_t v1) { + // CHECK: test_vdup_lane_u64 + return vdup_lane_u64(v1, 0); + // CHECK: ret +} + +uint8x16_t test_vdupq_lane_u8(uint8x8_t v1) { + // CHECK: test_vdupq_lane_u8 + return vdupq_lane_u8(v1, 5); + // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] +} + +uint16x8_t test_vdupq_lane_u16(uint16x4_t v1) { + // CHECK: test_vdupq_lane_u16 + return vdupq_lane_u16(v1, 2); + // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] +} + +uint32x4_t test_vdupq_lane_u32(uint32x2_t v1) { + // CHECK: test_vdupq_lane_u32 + return vdupq_lane_u32(v1, 1); + // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +} + +uint64x2_t test_vdupq_lane_u64(uint64x1_t v1) { + // CHECK: test_vdupq_lane_u64 + return vdupq_lane_u64(v1, 0); + // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +int8x8_t test_vdup_laneq_s8(int8x16_t v1) { + // CHECK: test_vdup_laneq_s8 + return vdup_laneq_s8(v1, 5); + // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] +} + +int16x4_t test_vdup_laneq_s16(int16x8_t v1) { + // CHECK: test_vdup_laneq_s16 + return vdup_laneq_s16(v1, 2); + // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] +} + +int32x2_t test_vdup_laneq_s32(int32x4_t v1) { + // CHECK: test_vdup_laneq_s32 + return vdup_laneq_s32(v1, 1); + // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +} + +int64x1_t test_vdup_laneq_s64(int64x2_t v1) { + // CHECK: test_vdup_laneq_s64 + return vdup_laneq_s64(v1, 0); + // CHECK: ret +} + +int8x16_t test_vdupq_laneq_s8(int8x16_t v1) { + // CHECK: test_vdupq_laneq_s8 + return vdupq_laneq_s8(v1, 5); + // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] +} + +int16x8_t test_vdupq_laneq_s16(int16x8_t v1) { + // CHECK: test_vdupq_laneq_s16 + return vdupq_laneq_s16(v1, 2); + // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] +} + +int32x4_t test_vdupq_laneq_s32(int32x4_t v1) { + // CHECK: test_vdupq_laneq_s32 + return vdupq_laneq_s32(v1, 1); + // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +} + +int64x2_t test_vdupq_laneq_s64(int64x2_t v1) { + // CHECK: test_vdupq_laneq_s64 + return vdupq_laneq_s64(v1, 0); + // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +uint8x8_t test_vdup_laneq_u8(uint8x16_t v1) { + // CHECK: test_vdup_laneq_u8 + return vdup_laneq_u8(v1, 5); + // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] +} + +uint16x4_t test_vdup_laneq_u16(uint16x8_t v1) { + // CHECK: test_vdup_laneq_u16 + return vdup_laneq_u16(v1, 2); + // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] +} + +uint32x2_t test_vdup_laneq_u32(uint32x4_t v1) { + // CHECK: test_vdup_laneq_u32 + return vdup_laneq_u32(v1, 1); + // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +} + +uint64x1_t test_vdup_laneq_u64(uint64x2_t v1) { + // CHECK: test_vdup_laneq_u64 + return vdup_laneq_u64(v1, 0); + // CHECK: ret +} + +uint8x16_t test_vdupq_laneq_u8(uint8x16_t v1) { + // CHECK: test_vdupq_laneq_u8 + return vdupq_laneq_u8(v1, 5); + // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] +} + +uint16x8_t test_vdupq_laneq_u16(uint16x8_t v1) { + // CHECK: test_vdupq_laneq_u16 + return vdupq_laneq_u16(v1, 2); + // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] +} + +uint32x4_t test_vdupq_laneq_u32(uint32x4_t v1) { + // CHECK: test_vdupq_laneq_u32 + return vdupq_laneq_u32(v1, 1); + // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +} + +uint64x2_t test_vdupq_laneq_u64(uint64x2_t v1) { + // CHECK: test_vdupq_laneq_u64 + return vdupq_laneq_u64(v1, 0); + // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +poly8x8_t test_vdup_lane_p8(poly8x8_t v1) { + // CHECK: test_vdup_lane_p8 + return vdup_lane_p8(v1, 5); + // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] +} + +poly16x4_t test_vdup_lane_p16(poly16x4_t v1) { + // CHECK: test_vdup_lane_p16 + return vdup_lane_p16(v1, 2); + // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] +} + +poly8x16_t test_vdupq_lane_p8(poly8x8_t v1) { + // CHECK: test_vdupq_lane_p8 + return vdupq_lane_p8(v1, 5); + // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] +} + +poly16x8_t test_vdupq_lane_p16(poly16x4_t v1) { + // CHECK: test_vdupq_lane_p16 + return vdupq_lane_p16(v1, 2); + // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] +} + +poly8x8_t test_vdup_laneq_p8(poly8x16_t v1) { + // CHECK: test_vdup_laneq_p8 + return vdup_laneq_p8(v1, 5); + // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] +} + +poly16x4_t test_vdup_laneq_p16(poly16x8_t v1) { + // CHECK: test_vdup_laneq_p16 + return vdup_laneq_p16(v1, 2); + // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] +} + +poly8x16_t test_vdupq_laneq_p8(poly8x16_t v1) { + // CHECK: test_vdupq_laneq_p8 + return vdupq_laneq_p8(v1, 5); + // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] +} + +poly16x8_t test_vdupq_laneq_p16(poly16x8_t v1) { + // CHECK: test_vdupq_laneq_p16 + return vdupq_laneq_p16(v1, 2); + // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] +} + +float16x4_t test_vdup_lane_f16(float16x4_t v1) { + // CHECK: test_vdup_lane_f16 + return vdup_lane_f16(v1, 2); + // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] +} + +float32x2_t test_vdup_lane_f32(float32x2_t v1) { + // CHECK: test_vdup_lane_f32 + return vdup_lane_f32(v1, 1); + // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +} + +float64x1_t test_vdup_lane_f64(float64x1_t v1) { + // CHECK: test_vdup_lane_f64 + return vdup_lane_f64(v1, 0); + // CHECK: ret +} + +float16x4_t test_vdup_laneq_f16(float16x8_t v1) { + // CHECK: test_vdup_laneq_f16 + return vdup_laneq_f16(v1, 2); + // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] +} + +float32x2_t test_vdup_laneq_f32(float32x4_t v1) { + // CHECK: test_vdup_laneq_f32 + return vdup_laneq_f32(v1, 1); + // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +} + +float64x1_t test_vdup_laneq_f64(float64x2_t v1) { + // CHECK: test_vdup_laneq_f64 + return vdup_laneq_f64(v1, 0); + // CHECK: ret +} + +float16x8_t test_vdupq_lane_f16(float16x4_t v1) { + // CHECK: test_vdupq_lane_f16 + return vdupq_lane_f16(v1, 2); + // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] +} + +float32x4_t test_vdupq_lane_f32(float32x2_t v1) { + // CHECK: test_vdupq_lane_f32 + return vdupq_lane_f32(v1, 1); + // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +} + +float64x2_t test_vdupq_lane_f64(float64x1_t v1) { + // CHECK: test_vdupq_lane_f64 + return vdupq_lane_f64(v1, 0); + // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +float16x8_t test_vdupq_laneq_f16(float16x8_t v1) { + // CHECK: test_vdupq_laneq_f16 + return vdupq_laneq_f16(v1, 2); + // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] +} + +float32x4_t test_vdupq_laneq_f32(float32x4_t v1) { + // CHECK: test_vdupq_laneq_f32 + return vdupq_laneq_f32(v1, 1); + // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +} + +float64x2_t test_vdupq_laneq_f64(float64x2_t v1) { + // CHECK: test_vdupq_laneq_f64 + return vdupq_laneq_f64(v1, 0); + // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +int8x8_t test_vmov_n_s8(int8_t v1) { + // CHECK: test_vmov_n_s8 + return vmov_n_s8(v1); + // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} +} + +int16x4_t test_vmov_n_s16(int16_t v1) { + // CHECK: test_vmov_n_s16 + return vmov_n_s16(v1); + // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} +} + +int32x2_t test_vmov_n_s32(int32_t v1) { + // CHECK: test_vmov_n_s32 + return vmov_n_s32(v1); + // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} +} + +int64x1_t test_vmov_n_s64(int64_t v1) { + // CHECK: test_vmov_n_s64 + return vmov_n_s64(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +int8x16_t test_vmovq_n_s8(int8_t v1) { + // CHECK: test_vmovq_n_s8 + return vmovq_n_s8(v1); + // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} +} + +int16x8_t test_vmovq_n_s16(int16_t v1) { + // CHECK: test_vmovq_n_s16 + return vmovq_n_s16(v1); + // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} +} + +int32x4_t test_vmovq_n_s32(int32_t v1) { + // CHECK: test_vmovq_n_s32 + return vmovq_n_s32(v1); + // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} +} + +int64x2_t test_vmovq_n_s64(int64_t v1) { + // CHECK: test_vmovq_n_s64 + return vmovq_n_s64(v1); + // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} +} + +uint8x8_t test_vmov_n_u8(uint8_t v1) { + // CHECK: test_vmov_n_u8 + return vmov_n_u8(v1); + // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} +} + +uint16x4_t test_vmov_n_u16(uint16_t v1) { + // CHECK: test_vmov_n_u16 + return vmov_n_u16(v1); + // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} +} + +uint32x2_t test_vmov_n_u32(uint32_t v1) { + // CHECK: test_vmov_n_u32 + return vmov_n_u32(v1); + // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} +} + +uint64x1_t test_vmov_n_u64(uint64_t v1) { + // CHECK: test_vmov_n_u64 + return vmov_n_u64(v1); + // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +} + +uint8x16_t test_vmovq_n_u8(uint8_t v1) { + // CHECK: test_vmovq_n_u8 + return vmovq_n_u8(v1); + // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} +} + +uint16x8_t test_vmovq_n_u16(uint16_t v1) { + // CHECK: test_vmovq_n_u16 + return vmovq_n_u16(v1); + // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} +} + +uint32x4_t test_vmovq_n_u32(uint32_t v1) { + // CHECK: test_vmovq_n_u32 + return vmovq_n_u32(v1); + // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} +} + +uint64x2_t test_vmovq_n_u64(uint64_t v1) { + // CHECK: test_vmovq_n_u64 + return vmovq_n_u64(v1); + // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} +} + +poly8x8_t test_vmov_n_p8(poly8_t v1) { + // CHECK: test_vmov_n_p8 + return vmov_n_p8(v1); + // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} +} + +poly16x4_t test_vmov_n_p16(poly16_t v1) { + // CHECK: test_vmov_n_p16 + return vmov_n_p16(v1); + // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} +} + +poly8x16_t test_vmovq_n_p8(poly8_t v1) { + // CHECK: test_vmovq_n_p8 + return vmovq_n_p8(v1); + // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} +} + +poly16x8_t test_vmovq_n_p16(poly16_t v1) { + // CHECK: test_vmovq_n_p16 + return vmovq_n_p16(v1); + // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} +} + +float32x2_t test_vmov_n_f32(float32_t v1) { + // CHECK: test_vmov_n_f32 + return vmov_n_f32(v1); + // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float64x1_t test_vmov_n_f64(float64_t v1) { + // CHECK: test_vmov_n_f64 + return vmov_n_f64(v1); + // CHECK: ret +} + +float32x4_t test_vmovq_n_f32(float32_t v1) { + // CHECK: test_vmovq_n_f32 + return vmovq_n_f32(v1); + // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float64x2_t test_vmovq_n_f64(float64_t v1) { + // CHECK: test_vmovq_n_f64 + return vmovq_n_f64(v1); + // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 21827da2a3..8d4380a1a6 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -119,7 +119,8 @@ enum OpKind { OpDiv, OpLongHi, OpNarrowHi, - OpMovlHi + OpMovlHi, + OpCopy }; enum ClassKind { @@ -264,6 +265,7 @@ public: OpMap["OP_LONG_HI"] = OpLongHi; OpMap["OP_NARROW_HI"] = OpNarrowHi; OpMap["OP_MOVL_HI"] = OpMovlHi; + OpMap["OP_COPY"] = OpCopy; Record *SI = R.getClass("SInst"); Record *II = R.getClass("IInst"); @@ -1328,7 +1330,8 @@ static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { } // Generate the string "(argtype a, argtype b, ...)" -static std::string GenArgs(const std::string &proto, StringRef typestr) { +static std::string GenArgs(const std::string &proto, StringRef typestr, + const std::string &name) { bool define = UseMacro(proto); char arg = 'a'; @@ -1346,6 +1349,9 @@ static std::string GenArgs(const std::string &proto, StringRef typestr) { s += TypeString(proto[i], typestr) + " __"; } s.push_back(arg); + //To avoid argument being multiple defined, add extra number for renaming. + if (name == "vcopy_lane") + s.push_back('1'); if ((i + 1) < e) s += ", "; } @@ -1356,7 +1362,8 @@ static std::string GenArgs(const std::string &proto, StringRef typestr) { // Macro arguments are not type-checked like inline function arguments, so // assign them to local temporaries to get the right type checking. -static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { +static std::string GenMacroLocals(const std::string &proto, StringRef typestr, + const std::string &name ) { char arg = 'a'; std::string s; bool generatedLocal = false; @@ -1367,11 +1374,18 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { if (MacroArgUsedDirectly(proto, i)) continue; generatedLocal = true; + bool extranumber = false; + if(name == "vcopy_lane") + extranumber = true; s += TypeString(proto[i], typestr) + " __"; s.push_back(arg); + if(extranumber) + s.push_back('1'); s += " = ("; s.push_back(arg); + if(extranumber) + s.push_back('1'); s += "); "; } @@ -1832,6 +1846,12 @@ static std::string GenOpString(const std::string &name, OpKind op, MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; break; } + case OpCopy: { + s += TypeString('s', typestr) + " __c2 = " + + MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " + + MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);"; + break; + } default: PrintFatalError("unknown OpKind!"); } @@ -2062,12 +2082,12 @@ static std::string GenIntrinsic(const std::string &name, s += mangledName; // Function arguments - s += GenArgs(proto, inTypeStr); + s += GenArgs(proto, inTypeStr, name); // Definition. if (define) { s += " __extension__ ({ \\\n "; - s += GenMacroLocals(proto, inTypeStr); + s += GenMacroLocals(proto, inTypeStr, name); } else if (kind == OpUnavailable) { s += " __attribute__((unavailable));\n"; return s; -- 2.40.0