From: Tim Northover Date: Tue, 1 Apr 2014 12:23:08 +0000 (+0000) Subject: ARM64: add a few bits of polynomial intrinsic codegen. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fcd13765fb9bc542ff4f470a16faa70f49d332a5;p=clang ARM64: add a few bits of polynomial intrinsic codegen. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@205303 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 9b321b9bf5..5a86bdd44c 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2608,6 +2608,7 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = { NEONMAP1(vminvq_f64, arm64_neon_fminv, AddRetType | Add1ArgType), NEONMAP1(vminvq_s32, arm64_neon_sminv, AddRetType | Add1ArgType), NEONMAP1(vminvq_u32, arm64_neon_uminv, AddRetType | Add1ArgType), + NEONMAP1(vmull_p64, arm64_neon_pmull64, 0), NEONMAP1(vmulxd_f64, arm64_neon_fmulx, Add1ArgType), NEONMAP1(vmulxs_f32, arm64_neon_fmulx, Add1ArgType), NEONMAP1(vpaddd_s64, arm64_neon_uaddv, AddRetType | Add1ArgType), @@ -5190,6 +5191,16 @@ Value *CodeGenFunction::EmitARM64BuiltinExpr(unsigned BuiltinID, // Handle non-overloaded intrinsics first. switch (BuiltinID) { default: break; + case NEON::BI__builtin_neon_vldrq_p128: { + llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); + Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); + return Builder.CreateLoad(Ptr); + } + case NEON::BI__builtin_neon_vstrq_p128: { + llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); + Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); + return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr); + } case NEON::BI__builtin_neon_vcvts_u32_f32: case NEON::BI__builtin_neon_vcvtd_u64_f64: usgn = true; diff --git a/test/CodeGen/aarch64-poly128.c b/test/CodeGen/aarch64-poly128.c index ddcb4420ba..609e5962ce 100644 --- a/test/CodeGen/aarch64-poly128.c +++ b/test/CodeGen/aarch64-poly128.c @@ -1,6 +1,11 @@ // REQUIRES: aarch64-registered-target +// REQUIRES: arm64-registered-target // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \ +// RUN: --check-prefix=CHECK-AARCH64 +// RUN: %clang_cc1 -triple arm64-none-linux-gnu \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \ +// RUN: --check-prefix=CHECK-ARM64 // Test new aarch64 intrinsics with poly128 // FIXME: Currently, poly128_t equals to uint128, which will be spilt into @@ -14,22 +19,29 @@ void test_vstrq_p128(poly128_t * ptr, poly128_t val) { // CHECK-LABEL: test_vstrq_p128 vstrq_p128(ptr, val); - // CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #8] - // CHECK-NEXT: str {{x[0-9]+}}, [{{x[0-9]+}}] +// CHECK-AARCH64: str {{x[0-9]+}}, [{{x[0-9]+}}, #8] +// CHECK-AARCH64-NEXT: str {{x[0-9]+}}, [{{x[0-9]+}}] + + // CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [x0] } poly128_t test_vldrq_p128(poly128_t * ptr) { // CHECK-LABEL: test_vldrq_p128 return vldrq_p128(ptr); - // CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}] - // CHECK-NEXT: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8] + // CHECK-AARCH64: ldr {{x[0-9]+}}, [{{x[0-9]+}}] + // CHECK-AARCH64-NEXT: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8] + + // CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [x0] } void test_ld_st_p128(poly128_t * ptr) { // CHECK-LABEL: test_ld_st_p128 vstrq_p128(ptr+1, vldrq_p128(ptr)); - // CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] - // CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16] + // CHECK-AARCH64: ldr {{q[0-9]+}}, [{{x[0-9]+}}] + // CHECK-AARCH64-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16] + + // CHECK-ARM64: ldp [[PLO:x[0-9]+]], [[PHI:x[0-9]+]], [{{x[0-9]+}}] + // CHECK-ARM64-NEXT: stp [[PLO]], [[PHI]], [{{x[0-9]+}}, #16] } poly128_t test_vmull_p64(poly64_t a, poly64_t b) { diff --git a/test/CodeGen/aarch64-poly64.c b/test/CodeGen/aarch64-poly64.c index 61cb5e588e..c071147af2 100644 --- a/test/CodeGen/aarch64-poly64.c +++ b/test/CodeGen/aarch64-poly64.c @@ -1,6 +1,11 @@ // REQUIRES: aarch64-registered-target +// REQUIRES: arm64-registered-target // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \ +// RUN: --check-prefix=CHECK-AARCH64 +// RUN: %clang_cc1 -triple arm64-none-linux-gnu \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \ +// RUN: --check-prefix=CHECK-ARM64 // Test new aarch64 intrinsics with poly64 @@ -69,7 +74,9 @@ poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) { poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) { // CHECK-LABEL: test_vcopy_lane_p64 return vcopy_lane_p64(a, 0, b, 0); - // CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} + // CHECK-AARCH64: fmov {{d[0-9]+}}, {{d[0-9]+}} + + // CHECK-ARM64: orr v0.16b, v1.16b, v1.16b } poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) { @@ -81,7 +88,7 @@ poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) { poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vcopyq_laneq_p64 return vcopyq_laneq_p64(a, 1, b, 1); - // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1] + // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1] } poly64x1_t test_vcreate_p64(uint64_t a) { @@ -128,97 +135,101 @@ poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) { poly64x1_t test_vld1_p64(poly64_t const * ptr) { // CHECK-LABEL: test_vld1_p64 return vld1_p64(ptr); - // CHECK: ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] + // CHECK-AARCH64: ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] + // CHECK-ARM64: ldr {{d[0-9]+}}, [{{x[0-9]+|sp}}] } poly64x2_t test_vld1q_p64(poly64_t const * ptr) { // CHECK-LABEL: test_vld1q_p64 return vld1q_p64(ptr); - // CHECK: ld1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] + // CHECK-AARCH64: ld1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] + // CHECK-ARM64: ldr {{q[0-9]+}}, [{{x[0-9]+|sp}}] } void test_vst1_p64(poly64_t * ptr, poly64x1_t val) { // CHECK-LABEL: test_vst1_p64 return vst1_p64(ptr, val); - // CHECK: st1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] + // CHECK-AARCH64: st1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] + // CHECK-ARM64: str {{d[0-9]+}}, [{{x[0-9]+|sp}}] } void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) { // CHECK-LABEL: test_vst1q_p64 return vst1q_p64(ptr, val); - // CHECK: st1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] + // CHECK-AARCH64: st1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] + // CHECK-ARM64: str {{q[0-9]+}}, [{{x[0-9]+|sp}}] } poly64x1x2_t test_vld2_p64(poly64_t const * ptr) { // CHECK-LABEL: test_vld2_p64 return vld2_p64(ptr); - // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] + // CHECK: ld1 {{{ *v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] } poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) { // CHECK-LABEL: test_vld2q_p64 return vld2q_p64(ptr); - // CHECK: ld2 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] + // CHECK: ld2 {{{ *v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] } poly64x1x3_t test_vld3_p64(poly64_t const * ptr) { // CHECK-LABEL: test_vld3_p64 return vld3_p64(ptr); - // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] + // CHECK: ld1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] } poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) { // CHECK-LABEL: test_vld3q_p64 return vld3q_p64(ptr); - // CHECK: ld3 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] + // CHECK: ld3 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] } poly64x1x4_t test_vld4_p64(poly64_t const * ptr) { // CHECK-LABEL: test_vld4_p64 return vld4_p64(ptr); - // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] + // CHECK: ld1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] } poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) { // CHECK-LABEL: test_vld4q_p64 return vld4q_p64(ptr); - // CHECK: ld4 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] + // CHECK: ld4 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] } void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) { // CHECK-LABEL: test_vst2_p64 return vst2_p64(ptr, val); - // CHECK: st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] + // CHECK: st1 {{{ *v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] } void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) { // CHECK-LABEL: test_vst2q_p64 return vst2q_p64(ptr, val); - // CHECK: st2 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] + // CHECK: st2 {{{ *v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] } void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) { // CHECK-LABEL: test_vst3_p64 return vst3_p64(ptr, val); - // CHECK: st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] + // CHECK: st1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] } void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) { // CHECK-LABEL: test_vst3q_p64 return vst3q_p64(ptr, val); - // CHECK: st3 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] + // CHECK: st3 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] } void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) { // CHECK-LABEL: test_vst4_p64 return vst4_p64(ptr, val); - // CHECK: st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] + // CHECK: st1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] } void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) { // CHECK-LABEL: test_vst4q_p64 return vst4q_p64(ptr, val); - // CHECK: st4 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] + // CHECK: st4 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] } poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) { @@ -230,43 +241,49 @@ poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) { poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vextq_p64 return vextq_p64(a, b, 1); - // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{#0x8|#8}} } poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vzip1q_p64 return vzip1q_p64(a, b); - // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vzip2q_p64 return vzip2q_u64(a, b); - // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] + // CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] + // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vuzp1q_p64 return vuzp1q_p64(a, b); - // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vuzp2q_p64 return vuzp2q_u64(a, b); - // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] + // CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] + // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vtrn1q_p64 return vtrn1q_p64(a, b); - // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] + // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vtrn2q_p64 return vtrn2q_u64(a, b); - // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] + // CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] + // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) {