From: Bill Wendling Date: Sun, 8 Dec 2013 00:02:49 +0000 (+0000) Subject: Merging r196359: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=612f5bfeac5c4d923bb448b2f06e3aeab318130f;p=clang Merging r196359: ------------------------------------------------------------------------ r196359 | kevinqin | 2013-12-03 23:53:09 -0800 (Tue, 03 Dec 2013) | 1 line [AArch64 NEON] Add missing compare intrinsics. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_34@196680 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index e7f95a21c1..9097edc4e6 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -732,28 +732,31 @@ def FSQRT : SInst<"vsqrt", "dd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Comparison // With additional Qd, Ql, QPl type. -def VVCEQ : IOpInst<"vceq", "udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPcPlQPl", - OP_EQ>; -def FCAGE : IInst<"vcage", "udd", "fQfQd">; -def FCAGT : IInst<"vcagt", "udd", "fQfQd">; -def FCALE : IInst<"vcale", "udd", "fQfQd">; -def FCALT : IInst<"vcalt", "udd", "fQfQd">; +def FCAGE : IInst<"vcage", "udd", "fdQfQd">; +def FCAGT : IInst<"vcagt", "udd", "fdQfQd">; +def FCALE : IInst<"vcale", "udd", "fdQfQd">; +def FCALT : IInst<"vcalt", "udd", "fdQfQd">; // With additional Ql, QUl, Qd types. def CMTST : WInst<"vtst", "udd", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPslUlQlQUlPlQPl">; +// With additional l, Ul,d, Qd, Ql, QUl, Qd types. def CFMEQ : SOpInst<"vceq", "udd", - "csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>; -def CFMGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GE>; -def CFMLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LE>; -def CFMGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GT>; -def CFMLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LT>; + "csilfUcUsUiUlPcQcdQdQsQiQfQUcQUsQUiQUlQlQPcPlQPl", OP_EQ>; +def CFMGE : SOpInst<"vcge", "udd", + "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_GE>; +def CFMLE : SOpInst<"vcle", "udd", + "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_LE>; +def CFMGT : SOpInst<"vcgt", "udd", + "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_GT>; +def CFMLT : SOpInst<"vclt", "udd", + "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_LT>; def CMEQ : SInst<"vceqz", "ud", - "csifUcUsUiPcPsQcQsQiQlQfQUcQUsQUiQUlQPcQPsQd">; -def CMGE : SInst<"vcgez", "ud", "csifdQcQsQiQlQfQd">; -def CMLE : SInst<"vclez", "ud", "csifdQcQsQiQlQfQd">; -def CMGT : SInst<"vcgtz", "ud", "csifdQcQsQiQlQfQd">; -def CMLT : SInst<"vcltz", "ud", "csifdQcQsQiQlQfQd">; + "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">; +def CMGE : SInst<"vcgez", "ud", "csilfdQcQsQiQlQfQd">; +def CMLE : SInst<"vclez", "ud", "csilfdQcQsQiQlQfQd">; +def CMGT : SInst<"vcgtz", "ud", "csilfdQcQsQiQlQfQd">; +def CMLT : SInst<"vcltz", "ud", "csilfdQcQsQiQlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Max/Min Integer diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 97252cbfdb..7ca68f13de 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -3007,10 +3007,24 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E); case AArch64::BI__builtin_neon_vrecpsq_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E); + case AArch64::BI__builtin_neon_vcale_v: + if (VTy->getVectorNumElements() == 1) { + std::swap(Ops[0], Ops[1]); + } else { + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E); + } case AArch64::BI__builtin_neon_vcage_v: + if (VTy->getVectorNumElements() == 1) { + // Determine the types of this overloaded AArch64 intrinsic + SmallVector Tys; + Tys.push_back(VTy); + VTy = llvm::VectorType::get(DoubleTy, 1); + Tys.push_back(VTy); + Tys.push_back(VTy); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vcage, Tys); + return EmitNeonCall(F, Ops, "vcage"); + } return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E); - case AArch64::BI__builtin_neon_vcale_v: - return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E); case AArch64::BI__builtin_neon_vcaleq_v: std::swap(Ops[0], Ops[1]); case AArch64::BI__builtin_neon_vcageq_v: { @@ -3022,8 +3036,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNeonCall(F, Ops, "vcage"); } case AArch64::BI__builtin_neon_vcalt_v: - return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E); + if (VTy->getVectorNumElements() == 1) { + std::swap(Ops[0], Ops[1]); + } else { + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E); + } case AArch64::BI__builtin_neon_vcagt_v: + if (VTy->getVectorNumElements() == 1) { + // Determine the types of this overloaded AArch64 intrinsic + SmallVector Tys; + Tys.push_back(VTy); + VTy = llvm::VectorType::get(DoubleTy, 1); + Tys.push_back(VTy); + Tys.push_back(VTy); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vcagt, Tys); + return EmitNeonCall(F, Ops, "vcagt"); + } return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E); case AArch64::BI__builtin_neon_vcaltq_v: std::swap(Ops[0], Ops[1]); diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index 567d894676..7708572a9c 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -915,6 +915,12 @@ uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) { // CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcage_f64 + return vcage_f64(a, b); + // CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) { // CHECK: test_vcageq_f32 return vcageq_f32(v1, v2); @@ -933,6 +939,12 @@ uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) { // CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcagt_f64 + return vcagt_f64(a, b); + // CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) { // CHECK: test_vcagtq_f32 return vcagtq_f32(v1, v2); @@ -948,10 +960,16 @@ uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) { uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vcale_f32 return vcale_f32(v1, v2); - // Using registers other than v0, v1 are possible, but would be odd. + // Using registers other than v0, v1 are possible, but would be odd. // CHECK: facge {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcale_f64 + return vcale_f64(a, b); + // CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) { // CHECK: test_vcaleq_f32 return vcaleq_f32(v1, v2); @@ -973,6 +991,12 @@ uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) { // CHECK: facgt {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcalt_f64 + return vcalt_f64(a, b); + // CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) { // CHECK: test_vcaltq_f32 return vcaltq_f32(v1, v2); @@ -1114,12 +1138,30 @@ uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) { // CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vceq_s64 + return vceq_s64(a, b); + // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vceq_u64 + return vceq_u64(a, b); + // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vceq_f32 return vceq_f32(v1, v2); // CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vceq_f64 + return vceq_f64(a, b); + // CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK: test_vceq_u8 return vceq_u8(v1, v2); @@ -1228,12 +1270,30 @@ uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) { // CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vcge_s64 + return vcge_s64(a, b); + // CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vcge_u64 + return vcge_u64(a, b); + // CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vcge_f32 return vcge_f32(v1, v2); // CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcge_f64 + return vcge_f64(a, b); + // CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK: test_vcge_u8 return vcge_u8(v1, v2); @@ -1333,12 +1393,30 @@ uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) { // CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vcle_s64 + return vcle_s64(a, b); + // CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vcle_u64 + return vcle_u64(a, b); + // CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vcle_f32 return vcle_f32(v1, v2); // CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcle_f64 + return vcle_f64(a, b); + // CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK: test_vcle_u8 return vcle_u8(v1, v2); @@ -1436,12 +1514,30 @@ uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) { // CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vcgt_s64 + return vcgt_s64(a, b); + // CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vcgt_u64 + return vcgt_u64(a, b); + // CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vcgt_f32 return vcgt_f32(v1, v2); // CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcgt_f64 + return vcgt_f64(a, b); + // CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK: test_vcgt_u8 return vcgt_u8(v1, v2); @@ -1543,12 +1639,30 @@ uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) { // CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vclt_s64 + return vclt_s64(a, b); + // CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vclt_u64 + return vclt_u64(a, b); + // CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vclt_f32 return vclt_f32(v1, v2); // CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vclt_f64 + return vclt_f64(a, b); + // CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK: test_vclt_u8 return vclt_u8(v1, v2); diff --git a/test/CodeGen/aarch64-neon-misc.c b/test/CodeGen/aarch64-neon-misc.c index 8c2476b693..f56bf760aa 100644 --- a/test/CodeGen/aarch64-neon-misc.c +++ b/test/CodeGen/aarch64-neon-misc.c @@ -24,6 +24,24 @@ uint32x2_t test_vceqz_s32(int32x2_t a) { return vceqz_s32(a); } +// CHECK: test_vceqz_s64 +// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vceqz_s64(int64x1_t a) { + return vceqz_s64(a); +} + +// CHECK: test_vceqz_u64 +// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vceqz_u64(uint64x1_t a) { + return vceqz_u64(a); +} + +// CHECK: test_vceqz_p64 +// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vceqz_p64(poly64x1_t a) { + return vceqz_p64(a); +} + // CHECK: test_vceqzq_s8 // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0 uint8x16_t test_vceqzq_s8(int8x16_t a) { @@ -120,6 +138,12 @@ uint64x2_t test_vceqzq_f64(float64x2_t a) { return vceqzq_f64(a); } +// CHECK: test_vceqzq_p64 +// CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 +uint64x2_t test_vceqzq_p64(poly64x2_t a) { + return vceqzq_p64(a); +} + // CHECK: test_vcgez_s8 // CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0 uint8x8_t test_vcgez_s8(int8x8_t a) { @@ -138,6 +162,12 @@ uint32x2_t test_vcgez_s32(int32x2_t a) { return vcgez_s32(a); } +// CHECK: test_vcgez_s64 +// CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vcgez_s64(int64x1_t a) { + return vcgez_s64(a); +} + // CHECK: test_vcgezq_s8 // CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0 uint8x16_t test_vcgezq_s8(int8x16_t a) { @@ -198,6 +228,12 @@ uint32x2_t test_vclez_s32(int32x2_t a) { return vclez_s32(a); } +// CHECK: test_vclez_s64 +// CHECK: cmle {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vclez_s64(int64x1_t a) { + return vclez_s64(a); +} + // CHECK: test_vclezq_s8 // CHECK: cmle {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0 uint8x16_t test_vclezq_s8(int8x16_t a) { @@ -258,6 +294,12 @@ uint32x2_t test_vcgtz_s32(int32x2_t a) { return vcgtz_s32(a); } +// CHECK: test_vcgtz_s64 +// CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vcgtz_s64(int64x1_t a) { + return vcgtz_s64(a); +} + // CHECK: test_vcgtzq_s8 // CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0 uint8x16_t test_vcgtzq_s8(int8x16_t a) { @@ -318,6 +360,12 @@ uint32x2_t test_vcltz_s32(int32x2_t a) { return vcltz_s32(a); } +// CHECK: test_vcltz_s64 +// CHECK: cmlt {{d[0-9]+}}, {{d[0-9]+}}, #0 +uint64x1_t test_vcltz_s64(int64x1_t a) { + return vcltz_s64(a); +} + // CHECK: test_vcltzq_s8 // CHECK: cmlt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 uint8x16_t test_vcltzq_s8(int8x16_t a) {