From 9f2b0f8bb6fa976c5da9a62d8e69f35ccac7a42e Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Wed, 6 Nov 2013 02:26:12 +0000 Subject: [PATCH] Implement AArch64 Neon instruction set Bitwise Extract. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194119 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 5 + lib/CodeGen/CGBuiltin.cpp | 4 + test/CodeGen/aarch64-neon-extract.c | 148 ++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100644 test/CodeGen/aarch64-neon-extract.c diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 81904c8a1e..f640442a93 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -767,6 +767,11 @@ def VMINV : SInst<"vminv", "sd", "csiUcUsUiQcQsQiQUcQUsQUiQf">; def VADDV : SInst<"vaddv", "sd", "csiUcUsUiQcQsQiQUcQUsQUi">; def FMAXNMV : SInst<"vmaxnmv", "sd", "Qf">; def FMINNMV : SInst<"vminnmv", "sd", "Qf">; + +//////////////////////////////////////////////////////////////////////////////// +// Newly added Vector Extract for f64 +def VEXT_A64 : WInst<"vext", "dddi", + "cUcPcsUsPsiUilUlfdQcQUcQPcQsQUsQPsQiQUiQlQUlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Crypto diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 40284b03c2..8ca667df3b 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2497,6 +2497,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // AArch64 builtins mapping to legacy ARM v7 builtins. // FIXME: the mapped builtins listed correspond to what has been tested // in aarch64-neon-intrinsics.c so far. + case AArch64::BI__builtin_neon_vext_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vext_v, E); + case AArch64::BI__builtin_neon_vextq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vextq_v, E); case AArch64::BI__builtin_neon_vmul_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E); case AArch64::BI__builtin_neon_vmulq_v: diff --git a/test/CodeGen/aarch64-neon-extract.c b/test/CodeGen/aarch64-neon-extract.c new file mode 100644 index 0000000000..faf35afad6 --- /dev/null +++ b/test/CodeGen/aarch64-neon-extract.c @@ -0,0 +1,148 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s + +// Test new aarch64 intrinsics and types + +#include + +int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vext_s8 + return vext_s8(a, b, 2); + // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +} + +int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vext_s16 + return vext_s16(a, b, 3); + // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6 +} + +int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vext_s32 + return vext_s32(a, b, 1); + // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +} + +int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vext_s64 + return vext_s64(a, b, 0); +} + +int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vextq_s8 + return vextq_s8(a, b, 2); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2 +} + +int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vextq_s16 + return vextq_s16(a, b, 3); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +} + +int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vextq_s32 + return vextq_s32(a, b, 1); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4 +} + +int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vextq_s64 + return vextq_s64(a, b, 1); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 +} + +uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vext_u8 + return vext_u8(a, b, 2); + // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +} + +uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vext_u16 + return vext_u16(a, b, 3); + // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6 +} + +uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vext_u32 + return vext_u32(a, b, 1); + // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +} + +uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vext_u64 + return vext_u64(a, b, 0); +} + +uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vextq_u8 + return vextq_u8(a, b, 2); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2 +} + +uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vextq_u16 + return vextq_u16(a, b, 3); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +} + +uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vextq_u32 + return vextq_u32(a, b, 1); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4 +} + +uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vextq_u64 + return vextq_u64(a, b, 1); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 +} + +float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) { + // CHECK: test_vext_f32 + return vext_f32(a, b, 1); + // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +} + +float64x1_t test_vext_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vext_f64 + return vext_f64(a, b, 0); +} + +float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) { + // CHECK: test_vextq_f32 + return vextq_f32(a, b, 1); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4 +} + +float64x2_t test_vextq_f64(float64x2_t a, float64x2_t b) { + // CHECK: test_vextq_f64 + return vextq_f64(a, b, 1); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 +} + +poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vext_p8 + return vext_p8(a, b, 2); + // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +} + +poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) { + // CHECK: test_vext_p16 + return vext_p16(a, b, 3); + // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6 +} + +poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vextq_p8 + return vextq_p8(a, b, 2); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2 +} + +poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) { + // CHECK: test_vextq_p16 + return vextq_p16(a, b, 3); + // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +} -- 2.40.0