From 25009d468a0221f4e2a4d9c789720dde6167d4b7 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Tue, 5 Nov 2013 17:42:24 +0000 Subject: [PATCH] Implement AArch64 Neon Crypto instruction classes AES, SHA, and 3 SHA. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194086 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 19 ++++++ lib/CodeGen/CGBuiltin.cpp | 101 ++++++++++++++++++++--------- test/CodeGen/aarch64-neon-crypto.c | 91 ++++++++++++++++++++++++++ 3 files changed, 181 insertions(+), 30 deletions(-) create mode 100644 test/CodeGen/aarch64-neon-crypto.c diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 5b507a0d86..81904c8a1e 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -768,6 +768,25 @@ def VADDV : SInst<"vaddv", "sd", "csiUcUsUiQcQsQiQUcQUsQUi">; def FMAXNMV : SInst<"vmaxnmv", "sd", "Qf">; def FMINNMV : SInst<"vminnmv", "sd", "Qf">; +//////////////////////////////////////////////////////////////////////////////// +// Crypto +def AESE : SInst<"vaese", "ddd", "QUc">; +def AESD : SInst<"vaesd", "ddd", "QUc">; +def AESMC : SInst<"vaesmc", "dd", "QUc">; +def AESIMC : SInst<"vaesimc", "dd", "QUc">; + +def SHA1H : SInst<"vsha1h", "ss", "Ui">; +def SHA1SU1 : SInst<"vsha1su1", "ddd", "QUi">; +def SHA256SU0 : SInst<"vsha256su0", "ddd", "QUi">; + +def SHA1C : SInst<"vsha1c", "ddsd", "QUi">; +def SHA1P : SInst<"vsha1p", "ddsd", "QUi">; +def SHA1M : SInst<"vsha1m", "ddsd", "QUi">; +def SHA1SU0 : SInst<"vsha1su0", "dddd", "QUi">; +def SHA256H : SInst<"vsha256h", "dddd", "QUi">; +def SHA256H2 : SInst<"vsha256h2", "dddd", "QUi">; +def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">; + //////////////////////////////////////////////////////////////////////////////// // Scalar Arithmetic diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 5f72fa0a93..40284b03c2 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1770,6 +1770,45 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, // argument that specifies the vector type, need to handle each case. switch (BuiltinID) { default: break; + case AArch64::BI__builtin_neon_vget_lane_i8: + case AArch64::BI__builtin_neon_vget_lane_i16: + case AArch64::BI__builtin_neon_vget_lane_i32: + case AArch64::BI__builtin_neon_vget_lane_i64: + case AArch64::BI__builtin_neon_vget_lane_f32: + case AArch64::BI__builtin_neon_vget_lane_f64: + case AArch64::BI__builtin_neon_vgetq_lane_i8: + case AArch64::BI__builtin_neon_vgetq_lane_i16: + case AArch64::BI__builtin_neon_vgetq_lane_i32: + case AArch64::BI__builtin_neon_vgetq_lane_i64: + case AArch64::BI__builtin_neon_vgetq_lane_f32: + case AArch64::BI__builtin_neon_vgetq_lane_f64: + return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E); + case AArch64::BI__builtin_neon_vset_lane_i8: + case AArch64::BI__builtin_neon_vset_lane_i16: + case AArch64::BI__builtin_neon_vset_lane_i32: + case AArch64::BI__builtin_neon_vset_lane_i64: + case AArch64::BI__builtin_neon_vset_lane_f32: + case AArch64::BI__builtin_neon_vset_lane_f64: + case AArch64::BI__builtin_neon_vsetq_lane_i8: + case AArch64::BI__builtin_neon_vsetq_lane_i16: + case AArch64::BI__builtin_neon_vsetq_lane_i32: + case AArch64::BI__builtin_neon_vsetq_lane_i64: + case AArch64::BI__builtin_neon_vsetq_lane_f32: + case AArch64::BI__builtin_neon_vsetq_lane_f64: + return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E); + // Crypto + case AArch64::BI__builtin_neon_vsha1h_u32: + Int = Intrinsic::arm_neon_sha1h; + s = "sha1h"; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vsha1cq_u32: + Int = Intrinsic::aarch64_neon_sha1c; + s = "sha1c"; break; + case AArch64::BI__builtin_neon_vsha1pq_u32: + Int = Intrinsic::aarch64_neon_sha1p; + s = "sha1p"; break; + case AArch64::BI__builtin_neon_vsha1mq_u32: + Int = Intrinsic::aarch64_neon_sha1m; + s = "sha1m"; break; // Scalar Add case AArch64::BI__builtin_neon_vaddd_s64: Int = Intrinsic::aarch64_neon_vaddds; @@ -2434,36 +2473,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { Ops.push_back(EmitScalarExpr(E->getArg(i))); } -// Some intrinsic isn't overloaded. - switch (BuiltinID) { - default: break; - case AArch64::BI__builtin_neon_vget_lane_i8: - case AArch64::BI__builtin_neon_vget_lane_i16: - case AArch64::BI__builtin_neon_vget_lane_i32: - case AArch64::BI__builtin_neon_vget_lane_i64: - case AArch64::BI__builtin_neon_vget_lane_f32: - case AArch64::BI__builtin_neon_vget_lane_f64: - case AArch64::BI__builtin_neon_vgetq_lane_i8: - case AArch64::BI__builtin_neon_vgetq_lane_i16: - case AArch64::BI__builtin_neon_vgetq_lane_i32: - case AArch64::BI__builtin_neon_vgetq_lane_i64: - case AArch64::BI__builtin_neon_vgetq_lane_f32: - case AArch64::BI__builtin_neon_vgetq_lane_f64: - return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E); - case AArch64::BI__builtin_neon_vset_lane_i8: - case AArch64::BI__builtin_neon_vset_lane_i16: - case AArch64::BI__builtin_neon_vset_lane_i32: - case AArch64::BI__builtin_neon_vset_lane_i64: - case AArch64::BI__builtin_neon_vset_lane_f32: - case AArch64::BI__builtin_neon_vset_lane_f64: - case AArch64::BI__builtin_neon_vsetq_lane_i8: - case AArch64::BI__builtin_neon_vsetq_lane_i16: - case AArch64::BI__builtin_neon_vsetq_lane_i32: - case AArch64::BI__builtin_neon_vsetq_lane_i64: - case AArch64::BI__builtin_neon_vsetq_lane_f32: - case AArch64::BI__builtin_neon_vsetq_lane_f64: - return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E); - } // Get the last argument, which specifies the vector type. llvm::APSInt Result; @@ -2769,6 +2778,38 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case AArch64::BI__builtin_neon_vst4q_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E); + // Crypto + case AArch64::BI__builtin_neon_vaeseq_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aese, Ty), + Ops, "aese"); + case AArch64::BI__builtin_neon_vaesdq_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesd, Ty), + Ops, "aesd"); + case AArch64::BI__builtin_neon_vaesmcq_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesmc, Ty), + Ops, "aesmc"); + case AArch64::BI__builtin_neon_vaesimcq_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesimc, Ty), + Ops, "aesimc"); + case AArch64::BI__builtin_neon_vsha1su1q_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su1, Ty), + Ops, "sha1su1"); + case AArch64::BI__builtin_neon_vsha256su0q_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su0, Ty), + Ops, "sha256su0"); + case AArch64::BI__builtin_neon_vsha1su0q_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su0, Ty), + Ops, "sha1su0"); + case AArch64::BI__builtin_neon_vsha256hq_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h, Ty), + Ops, "sha256h"); + case AArch64::BI__builtin_neon_vsha256h2q_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h2, Ty), + Ops, "sha256h2"); + case AArch64::BI__builtin_neon_vsha256su1q_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su1, Ty), + Ops, "sha256su1"); + // AArch64-only builtins case AArch64::BI__builtin_neon_vfma_lane_v: case AArch64::BI__builtin_neon_vfmaq_laneq_v: { diff --git a/test/CodeGen/aarch64-neon-crypto.c b/test/CodeGen/aarch64-neon-crypto.c new file mode 100644 index 0000000000..968ef2ed60 --- /dev/null +++ b/test/CodeGen/aarch64-neon-crypto.c @@ -0,0 +1,91 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s + +// Test new aarch64 intrinsics and types + +#include + +uint8x16_t test_vaeseq_u8(uint8x16_t data, uint8x16_t key) { + // CHECK: test_vaeseq_u8 + return vaeseq_u8(data, key); + // CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint8x16_t test_vaesdq_u8(uint8x16_t data, uint8x16_t key) { + // CHECK: test_vaesdq_u8 + return vaesdq_u8(data, key); + // CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint8x16_t test_vaesmcq_u8(uint8x16_t data) { + // CHECK: test_vaesmcq_u8 + return vaesmcq_u8(data); + // CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint8x16_t test_vaesimcq_u8(uint8x16_t data) { + // CHECK: test_vaesimcq_u8 + return vaesimcq_u8(data); + // CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint32_t test_vsha1h_u32(uint32_t hash_e) { + // CHECK: test_vsha1h_u32 + return vsha1h_u32(hash_e); + // CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}} +} + +uint32x4_t test_vsha1su1q_u32(uint32x4_t tw0_3, uint32x4_t w12_15) { + // CHECK: test_vsha1su1q_u32 + return vsha1su1q_u32(tw0_3, w12_15); + // CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint32x4_t test_vsha256su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7) { + // CHECK: test_vsha256su0q_u32 + return vsha256su0q_u32(w0_3, w4_7); + // CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint32x4_t test_vsha1cq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) { + // CHECK: test_vsha1cq_u32 + return vsha1cq_u32(hash_abcd, hash_e, wk); + // CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +uint32x4_t test_vsha1pq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) { + // CHECK: test_vsha1pq_u32 + return vsha1pq_u32(hash_abcd, hash_e, wk); + // CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +uint32x4_t test_vsha1mq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) { + // CHECK: test_vsha1mq_u32 + return vsha1mq_u32(hash_abcd, hash_e, wk); + // CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +uint32x4_t test_vsha1su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) { + // CHECK: test_vsha1su0q_u32 + return vsha1su0q_u32(w0_3, w4_7, w8_11); + // CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint32x4_t test_vsha256hq_u32(uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) { + // CHECK: test_vsha256hq_u32 + return vsha256hq_u32(hash_abcd, hash_efgh, wk); + // CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s +} + +uint32x4_t test_vsha256h2q_u32(uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) { + // CHECK: test_vsha256h2q_u32 + return vsha256h2q_u32(hash_efgh, hash_abcd, wk); + // CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s +} + +uint32x4_t test_vsha256su1q_u32(uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) { + // CHECK: test_vsha256su1q_u32 + return vsha256su1q_u32(tw0_3, w8_11, w12_15); + // CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} -- 2.40.0