From 22229d6822324a42913d25f256045dbf348a53e9 Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Tue, 5 Nov 2013 02:05:44 +0000 Subject: [PATCH] Implemented aarch64 neon intrinsic vcopy_lane with float type. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194042 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 19 ++- lib/CodeGen/CGBuiltin.cpp | 6 +- test/CodeGen/aarch64-neon-copy.c | 230 ++++++++++++++++++++++++++++++- utils/TableGen/NeonEmitter.cpp | 28 +++- 4 files changed, 266 insertions(+), 17 deletions(-) diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 96f9aa40ef..5b507a0d86 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -98,7 +98,9 @@ def OP_DIV : Op; def OP_LONG_HI : Op; def OP_NARROW_HI : Op; def OP_MOVL_HI : Op; -def OP_COPY : Op; +def OP_COPY_LN : Op; +def OP_COPYQ_LN : Op; +def OP_COPY_LNQ : Op; class Inst { string Name = n; @@ -669,13 +671,18 @@ def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; //////////////////////////////////////////////////////////////////////////////// // Extract or insert element from vector def GET_LANE : IInst<"vget_lane", "sdi", - "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPs">; + "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">; def SET_LANE : IInst<"vset_lane", "dsdi", - "csilPcPsUiUlUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPshfdQhQfQd">; + "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">; def COPY_LANE : IOpInst<"vcopy_lane", "ddidi", - "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPs", OP_COPY>; -def COPY_LANEQ : IOpInst<"vcopy_laneq", "ggidi", - "QcQsQiQlQUcQUsQUiQUlQPcQPs", OP_COPY>; + "csiPcPsUcUsUiPcPsf", OP_COPY_LN>; +def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi", + "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQd", OP_COPYQ_LN>; +def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki", + "csiPcPsUcUsUif", OP_COPY_LNQ>; +def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi", + "QcQsQiQlQUcQUsQUiQUlQPcQPsQfd", OP_COPY_LN>; + //////////////////////////////////////////////////////////////////////////////// // Set all lanes to same value diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 8f724501d2..5f72fa0a93 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2441,23 +2441,25 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case AArch64::BI__builtin_neon_vget_lane_i16: case AArch64::BI__builtin_neon_vget_lane_i32: case AArch64::BI__builtin_neon_vget_lane_i64: + case AArch64::BI__builtin_neon_vget_lane_f32: + case AArch64::BI__builtin_neon_vget_lane_f64: case AArch64::BI__builtin_neon_vgetq_lane_i8: case AArch64::BI__builtin_neon_vgetq_lane_i16: case AArch64::BI__builtin_neon_vgetq_lane_i32: case AArch64::BI__builtin_neon_vgetq_lane_i64: + case AArch64::BI__builtin_neon_vgetq_lane_f32: + case AArch64::BI__builtin_neon_vgetq_lane_f64: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E); case AArch64::BI__builtin_neon_vset_lane_i8: case AArch64::BI__builtin_neon_vset_lane_i16: case AArch64::BI__builtin_neon_vset_lane_i32: case AArch64::BI__builtin_neon_vset_lane_i64: - case AArch64::BI__builtin_neon_vset_lane_f16: case AArch64::BI__builtin_neon_vset_lane_f32: case AArch64::BI__builtin_neon_vset_lane_f64: case AArch64::BI__builtin_neon_vsetq_lane_i8: case AArch64::BI__builtin_neon_vsetq_lane_i16: case AArch64::BI__builtin_neon_vsetq_lane_i32: case AArch64::BI__builtin_neon_vsetq_lane_i64: - case AArch64::BI__builtin_neon_vsetq_lane_f16: case AArch64::BI__builtin_neon_vsetq_lane_f32: case AArch64::BI__builtin_neon_vsetq_lane_f64: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E); diff --git a/test/CodeGen/aarch64-neon-copy.c b/test/CodeGen/aarch64-neon-copy.c index 752acb6071..7c77b177af 100644 --- a/test/CodeGen/aarch64-neon-copy.c +++ b/test/CodeGen/aarch64-neon-copy.c @@ -281,16 +281,238 @@ int16x4_t test_vcopy_lane_s16(int16x4_t v1, int16x4_t v2) { // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] } +poly8x8_t test_vcopy_lane_p8(poly8x8_t v1, poly8x8_t v2) { + // CHECK: test_vcopy_lane_p8 + return vcopy_lane_p8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +poly16x4_t test_vcopy_lane_p16(poly16x4_t v1, poly16x4_t v2) { + // CHECK: test_vcopy_lane_p16 + return vcopy_lane_p16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + int32x2_t test_vcopy_lane_s32(int32x2_t v1, int32x2_t v2) { // CHECK: test_vcopy_lane_s32 return vcopy_lane_s32(v1, 0, v2, 1); // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] } -int64x1_t test_vcopy_lane_s64(int64x1_t v1, int64x1_t v2) { - // CHECK: test_vcopy_lane_s64 - return vcopy_lane_s64(v1, 0, v2, 0); - // CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} +float32x2_t test_vcopy_lane_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vcopy_lane_f32 + return vcopy_lane_f32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +uint8x8_t test_vcopy_lane_u8(uint8x8_t v1, uint8x8_t v2) { + // CHECK: test_vcopy_lane_u8 + return vcopy_lane_u8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +uint16x4_t test_vcopy_lane_u16(uint16x4_t v1, uint16x4_t v2) { + // CHECK: test_vcopy_lane_u16 + return vcopy_lane_u16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +uint32x2_t test_vcopy_lane_u32(uint32x2_t v1, uint32x2_t v2) { + // CHECK: test_vcopy_lane_u32 + return vcopy_lane_u32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +int8x8_t test_vcopy_laneq_s8(int8x8_t v1, int8x16_t v2) { + // CHECK: test_vcopy_laneq_s8 + return vcopy_laneq_s8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +int16x4_t test_vcopy_laneq_s16(int16x4_t v1, int16x8_t v2) { + // CHECK: test_vcopy_laneq_s16 + return vcopy_laneq_s16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +poly8x8_t test_vcopy_laneq_p8(poly8x8_t v1, poly8x16_t v2) { + // CHECK: test_vcopy_laneq_p8 + return vcopy_laneq_p8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +poly16x4_t test_vcopy_laneq_p16(poly16x4_t v1, poly16x8_t v2) { + // CHECK: test_vcopy_laneq_p16 + return vcopy_laneq_p16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +int32x2_t test_vcopy_laneq_s32(int32x2_t v1, int32x4_t v2) { + // CHECK: test_vcopy_laneq_s32 + return vcopy_laneq_s32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +float32x2_t test_vcopy_laneq_f32(float32x2_t v1, float32x4_t v2) { + // CHECK: test_vcopy_laneq_f32 + return vcopy_laneq_f32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +uint8x8_t test_vcopy_laneq_u8(uint8x8_t v1, uint8x16_t v2) { + // CHECK: test_vcopy_laneq_u8 + return vcopy_laneq_u8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +uint16x4_t test_vcopy_laneq_u16(uint16x4_t v1, uint16x8_t v2) { + // CHECK: test_vcopy_laneq_u16 + return vcopy_laneq_u16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +uint32x2_t test_vcopy_laneq_u32(uint32x2_t v1, uint32x4_t v2) { + // CHECK: test_vcopy_laneq_u32 + return vcopy_laneq_u32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +int8x16_t test_vcopyq_lane_s8(int8x16_t v1, int8x8_t v2) { + // CHECK: test_vcopyq_lane_s8 + return vcopyq_lane_s8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +int16x8_t test_vcopyq_lane_s16(int16x8_t v1, int16x4_t v2) { + // CHECK: test_vcopyq_lane_s16 + return vcopyq_lane_s16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +poly8x16_t test_vcopyq_lane_p8(poly8x16_t v1, poly8x8_t v2) { + // CHECK: test_vcopyq_lane_p8 + return vcopyq_lane_p8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +poly16x8_t test_vcopyq_lane_p16(poly16x8_t v1, poly16x4_t v2) { + // CHECK: test_vcopyq_lane_p16 + return vcopyq_lane_p16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +int32x4_t test_vcopyq_lane_s32(int32x4_t v1, int32x2_t v2) { + // CHECK: test_vcopyq_lane_s32 + return vcopyq_lane_s32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +int64x2_t test_vcopyq_lane_s64(int64x2_t v1, int64x1_t v2) { + // CHECK: test_vcopyq_lane_s64 + return vcopyq_lane_s64(v1, 1, v2, 0); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +float32x4_t test_vcopyq_lane_f32(float32x4_t v1, float32x2_t v2) { + // CHECK: test_vcopyq_lane_f32 + return vcopyq_lane_f32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +float64x2_t test_vcopyq_lane_f64(float64x2_t v1, float64x1_t v2) { + // CHECK: test_vcopyq_lane_f64 + return vcopyq_lane_f64(v1, 1, v2, 0); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +uint8x16_t test_vcopyq_lane_u8(uint8x16_t v1, uint8x8_t v2) { + // CHECK: test_vcopyq_lane_u8 + return vcopyq_lane_u8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +uint16x8_t test_vcopyq_lane_u16(uint16x8_t v1, uint16x4_t v2) { + // CHECK: test_vcopyq_lane_u16 + return vcopyq_lane_u16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +uint32x4_t test_vcopyq_lane_u32(uint32x4_t v1, uint32x2_t v2) { + // CHECK: test_vcopyq_lane_u32 + return vcopyq_lane_u32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +uint64x2_t test_vcopyq_lane_u64(uint64x2_t v1, uint64x1_t v2) { + // CHECK: test_vcopyq_lane_u64 + return vcopyq_lane_u64(v1, 1, v2, 0); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +} + +int8x16_t test_vcopyq_laneq_s8(int8x16_t v1, int8x16_t v2) { + // CHECK: test_vcopyq_laneq_s8 + return vcopyq_laneq_s8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +int16x8_t test_vcopyq_laneq_s16(int16x8_t v1, int16x8_t v2) { + // CHECK: test_vcopyq_laneq_s16 + return vcopyq_laneq_s16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +poly8x16_t test_vcopyq_laneq_p8(poly8x16_t v1, poly8x16_t v2) { + // CHECK: test_vcopyq_laneq_p8 + return vcopyq_laneq_p8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +poly16x8_t test_vcopyq_laneq_p16(poly16x8_t v1, poly16x8_t v2) { + // CHECK: test_vcopyq_laneq_p16 + return vcopyq_laneq_p16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +int32x4_t test_vcopyq_laneq_s32(int32x4_t v1, int32x4_t v2) { + // CHECK: test_vcopyq_laneq_s32 + return vcopyq_laneq_s32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +float32x4_t test_vcopyq_laneq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vcopyq_laneq_f32 + return vcopyq_laneq_f32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +int64x2_t test_vcopyq_laneq_s64(int64x2_t v1, int64x2_t v2) { + // CHECK: test_vcopyq_laneq_s64 + return vcopyq_laneq_s64(v1, 1, v2, 1); + // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1] +} + +uint8x16_t test_vcopyq_laneq_u8(uint8x16_t v1, uint8x16_t v2) { + // CHECK: test_vcopyq_laneq_u8 + return vcopyq_laneq_u8(v1, 5, v2, 3); + // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] +} + +uint16x8_t test_vcopyq_laneq_u16(uint16x8_t v1, uint16x8_t v2) { + // CHECK: test_vcopyq_laneq_u16 + return vcopyq_laneq_u16(v1, 2, v2, 3); + // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] +} + +uint32x4_t test_vcopyq_laneq_u32(uint32x4_t v1, uint32x4_t v2) { + // CHECK: test_vcopyq_laneq_u32 + return vcopyq_laneq_u32(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +} + +uint64x2_t test_vcopyq_laneq_u64(uint64x2_t v1, uint64x2_t v2) { + // CHECK: test_vcopyq_laneq_u64 + return vcopyq_laneq_u64(v1, 0, v2, 1); + // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] } int8x8_t test_vcreate_s8(uint64_t v1) { diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 1649633d3e..5e8590f241 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -120,7 +120,9 @@ enum OpKind { OpLongHi, OpNarrowHi, OpMovlHi, - OpCopy + OpCopyLane, + OpCopyQLane, + OpCopyLaneQ }; enum ClassKind { @@ -265,7 +267,9 @@ public: OpMap["OP_LONG_HI"] = OpLongHi; OpMap["OP_NARROW_HI"] = OpNarrowHi; OpMap["OP_MOVL_HI"] = OpMovlHi; - OpMap["OP_COPY"] = OpCopy; + OpMap["OP_COPY_LN"] = OpCopyLane; + OpMap["OP_COPYQ_LN"] = OpCopyQLane; + OpMap["OP_COPY_LNQ"] = OpCopyLaneQ; Record *SI = R.getClass("SInst"); Record *II = R.getClass("IInst"); @@ -1358,7 +1362,7 @@ static std::string GenArgs(const std::string &proto, StringRef typestr, } s.push_back(arg); //To avoid argument being multiple defined, add extra number for renaming. - if (name == "vcopy_lane") + if (name == "vcopy_lane" || name == "vcopy_laneq") s.push_back('1'); if ((i + 1) < e) s += ", "; @@ -1383,7 +1387,7 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr, continue; generatedLocal = true; bool extranumber = false; - if(name == "vcopy_lane") + if (name == "vcopy_lane" || name == "vcopy_laneq") extranumber = true; s += TypeString(proto[i], typestr) + " __"; @@ -1854,12 +1858,26 @@ static std::string GenOpString(const std::string &name, OpKind op, MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; break; } - case OpCopy: { + case OpCopyLane: { s += TypeString('s', typestr) + " __c2 = " + MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " + MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);"; break; } + case OpCopyQLane: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode + + "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);"; + break; + } + case OpCopyLaneQ: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode + + "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);"; + break; + } default: PrintFatalError("unknown OpKind!"); } -- 2.40.0