From 0a3d47628a2656a89cf8cd3ad5f30a8f7ab82cf9 Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Mon, 3 Feb 2014 17:28:04 +0000
Subject: [PATCH] ARM: implement support for crypto intrinsics in arm_neon.h

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@200708 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Basic/Targets.cpp              |  8 +++
 lib/CodeGen/CGBuiltin.cpp          | 83 +++++++++++++++-----------
 test/CodeGen/aarch64-neon-crypto.c | 94 -----------------------------
 test/CodeGen/neon-crypto.c         | 95 ++++++++++++++++++++++++++++++
 utils/TableGen/NeonEmitter.cpp     |  7 ++-
 5 files changed, 157 insertions(+), 130 deletions(-)
 delete mode 100644 test/CodeGen/aarch64-neon-crypto.c
 create mode 100644 test/CodeGen/neon-crypto.c

diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 9d8c8c73c3..8c67f54d62 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -3641,6 +3641,7 @@ class ARMTargetInfo : public TargetInfo {
   unsigned SoftFloatABI : 1;
 
   unsigned CRC : 1;
+  unsigned Crypto : 1;
 
   static const Builtin::Info BuiltinInfo[];
 
@@ -3850,6 +3851,7 @@ public:
       Features["hwdiv"] = true;
       Features["hwdiv-arm"] = true;
       Features["crc"] = true;
+      Features["crypto"] = true;
     } else if (CPU == "cortex-r5" ||
                // Enable the hwdiv extension for all v8a AArch32 cores by
                // default.
@@ -3866,6 +3868,7 @@ public:
                                     DiagnosticsEngine &Diags) {
     FPU = 0;
     CRC = 0;
+    Crypto = 0;
     SoftFloat = SoftFloatABI = false;
     HWDiv = 0;
     for (unsigned i = 0, e = Features.size(); i != e; ++i) {
@@ -3889,6 +3892,8 @@ public:
         HWDiv |= HWDivARM;
       else if (Features[i] == "+crc")
         CRC = 1;
+      else if (Features[i] == "+crypto")
+        Crypto = 1;
     }
 
     if (!(FPU & NeonFPU) && FPMath == FP_Neon) {
@@ -4054,6 +4059,9 @@ public:
     if (CRC)
       Builder.defineMacro("__ARM_FEATURE_CRC32");
 
+    if (Crypto)
+      Builder.defineMacro("__ARM_FEATURE_CRYPTO");
+
     if (CPUArchVer >= 6 && CPUArch != "6M") {
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 29187c4261..f5c76607f1 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -1777,6 +1777,18 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
   unsigned Int;
   switch (BuiltinID) {
   default: break;
+  case NEON::BI__builtin_neon_vaeseq_v:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aese),
+                        Ops, "aese");
+  case NEON::BI__builtin_neon_vaesdq_v:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesd),
+                        Ops, "aesd");
+  case NEON::BI__builtin_neon_vaesmcq_v:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesmc),
+                        Ops, "aesmc");
+  case NEON::BI__builtin_neon_vaesimcq_v:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesimc),
+                        Ops, "aesimc");
   case NEON::BI__builtin_neon_vabd_v:
   case NEON::BI__builtin_neon_vabdq_v:
     Int = Usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
@@ -2142,6 +2154,24 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vrsubhn_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
                         Ops, "vrsubhn");
+  case NEON::BI__builtin_neon_vsha1su1q_v:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su1),
+                        Ops, "sha1su1");
+  case NEON::BI__builtin_neon_vsha256su0q_v:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su0),
+                        Ops, "sha256su0");
+  case NEON::BI__builtin_neon_vsha1su0q_v:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su0),
+                        Ops, "sha1su0");
+  case NEON::BI__builtin_neon_vsha256hq_v:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h),
+                        Ops, "sha256h");
+  case NEON::BI__builtin_neon_vsha256h2q_v:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h2),
+                        Ops, "sha256h2");
+  case NEON::BI__builtin_neon_vsha256su1q_v:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su1),
+                        Ops, "sha256su1");
   case NEON::BI__builtin_neon_vshl_n_v:
   case NEON::BI__builtin_neon_vshlq_n_v:
     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
@@ -3847,37 +3877,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateStore(Ops[1], Ops[0]);
   }
 
-  // Crypto
-  case NEON::BI__builtin_neon_vaeseq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aese),
-                        Ops, "aese");
-  case NEON::BI__builtin_neon_vaesdq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesd),
-                        Ops, "aesd");
-  case NEON::BI__builtin_neon_vaesmcq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesmc),
-                        Ops, "aesmc");
-  case NEON::BI__builtin_neon_vaesimcq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesimc),
-                        Ops, "aesimc");
-  case NEON::BI__builtin_neon_vsha1su1q_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su1),
-                        Ops, "sha1su1");
-  case NEON::BI__builtin_neon_vsha256su0q_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su0),
-                        Ops, "sha256su0");
-  case NEON::BI__builtin_neon_vsha1su0q_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su0),
-                        Ops, "sha1su0");
-  case NEON::BI__builtin_neon_vsha256hq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h),
-                        Ops, "sha256h");
-  case NEON::BI__builtin_neon_vsha256h2q_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h2),
-                        Ops, "sha256h2");
-  case NEON::BI__builtin_neon_vsha256su1q_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su1),
-                        Ops, "sha256su1");
   case NEON::BI__builtin_neon_vmul_lane_v:
   case NEON::BI__builtin_neon_vmul_laneq_v: {
     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
@@ -4458,10 +4457,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     Ops.push_back(EmitScalarExpr(E->getArg(i)));
   }
 
-  // vget_lane and vset_lane are not overloaded and do not have an extra
-  // argument that specifies the vector type.
   switch (BuiltinID) {
   default: break;
+  // vget_lane and vset_lane are not overloaded and do not have an extra
+  // argument that specifies the vector type.
   case NEON::BI__builtin_neon_vget_lane_i8:
   case NEON::BI__builtin_neon_vget_lane_i16:
   case NEON::BI__builtin_neon_vget_lane_i32:
@@ -4486,6 +4485,24 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vsetq_lane_f32:
     Ops.push_back(EmitScalarExpr(E->getArg(2)));
     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
+
+  // Non-polymorphic crypto instructions also not overloaded
+  case NEON::BI__builtin_neon_vsha1h_u32:
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
+                        "vsha1h");
+  case NEON::BI__builtin_neon_vsha1cq_u32:
+    Ops.push_back(EmitScalarExpr(E->getArg(2)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
+                        "vsha1h");
+  case NEON::BI__builtin_neon_vsha1pq_u32:
+    Ops.push_back(EmitScalarExpr(E->getArg(2)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
+                        "vsha1h");
+  case NEON::BI__builtin_neon_vsha1mq_u32:
+    Ops.push_back(EmitScalarExpr(E->getArg(2)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
+                        "vsha1h");
   }
 
   // Get the last argument, which specifies the vector type.
diff --git a/test/CodeGen/aarch64-neon-crypto.c b/test/CodeGen/aarch64-neon-crypto.c
deleted file mode 100644
index 240f3794b9..0000000000
--- a/test/CodeGen/aarch64-neon-crypto.c
+++ /dev/null
@@ -1,94 +0,0 @@
-// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
-// RUN:   -target-feature +crypto -S -O3 -o - %s | FileCheck %s
-// RUN: not %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
-// RUN:   -S -O3 -o - %s 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s
-
-// Test new aarch64 intrinsics and types
-
-#include <arm_neon.h>
-
-uint8x16_t test_vaeseq_u8(uint8x16_t data, uint8x16_t key) {
-  // CHECK: test_vaeseq_u8
-  // CHECK-NO-CRYPTO: warning: implicit declaration of function 'vaeseq_u8' is invalid in C99
-  return vaeseq_u8(data, key);
-  // CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-}
-
-uint8x16_t test_vaesdq_u8(uint8x16_t data, uint8x16_t key) {
-  // CHECK: test_vaesdq_u8
-  return vaesdq_u8(data, key);
-  // CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-}
-
-uint8x16_t test_vaesmcq_u8(uint8x16_t data) {
-  // CHECK: test_vaesmcq_u8
-  return vaesmcq_u8(data);
-  // CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-}
-
-uint8x16_t test_vaesimcq_u8(uint8x16_t data) {
-  // CHECK: test_vaesimcq_u8
-  return vaesimcq_u8(data);
-  // CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-}
-
-uint32_t test_vsha1h_u32(uint32_t hash_e) {
-  // CHECK: test_vsha1h_u32
-  return vsha1h_u32(hash_e);
-  // CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}}
-}
-
-uint32x4_t test_vsha1su1q_u32(uint32x4_t tw0_3, uint32x4_t w12_15) {
-  // CHECK: test_vsha1su1q_u32
-  return vsha1su1q_u32(tw0_3, w12_15);
-  // CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-}
-
-uint32x4_t test_vsha256su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7) {
-  // CHECK: test_vsha256su0q_u32
-  return vsha256su0q_u32(w0_3, w4_7);
-  // CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-}
-
-uint32x4_t test_vsha1cq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
-  // CHECK: test_vsha1cq_u32
-  return vsha1cq_u32(hash_abcd, hash_e, wk);
-  // CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
-}
-
-uint32x4_t test_vsha1pq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
-  // CHECK: test_vsha1pq_u32
-  return vsha1pq_u32(hash_abcd, hash_e, wk);
-  // CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
-}
-
-uint32x4_t test_vsha1mq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
-  // CHECK: test_vsha1mq_u32
-  return vsha1mq_u32(hash_abcd, hash_e, wk);
-  // CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
-}
-
-uint32x4_t test_vsha1su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) {
-  // CHECK: test_vsha1su0q_u32
-  return vsha1su0q_u32(w0_3, w4_7, w8_11);
-  // CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-}
-
-uint32x4_t test_vsha256hq_u32(uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) {
-  // CHECK: test_vsha256hq_u32
-  return vsha256hq_u32(hash_abcd, hash_efgh, wk);
-  // CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
-}
-
-uint32x4_t test_vsha256h2q_u32(uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) {
-  // CHECK: test_vsha256h2q_u32
-  return vsha256h2q_u32(hash_efgh, hash_abcd, wk);
-  // CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
-}
-
-uint32x4_t test_vsha256su1q_u32(uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) {
-  // CHECK: test_vsha256su1q_u32
-  return vsha256su1q_u32(tw0_3, w8_11, w12_15);
-  // CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-}
diff --git a/test/CodeGen/neon-crypto.c b/test/CodeGen/neon-crypto.c
new file mode 100644
index 0000000000..5dcef4b579
--- /dev/null
+++ b/test/CodeGen/neon-crypto.c
@@ -0,0 +1,95 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -target-feature +crypto -emit-llvm -O1 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm-none-linux-gnueabi -target-feature +neon \
+// RUN:  -target-feature +crypto -target-cpu cortex-a57 -emit-llvm -O1 -o - %s | FileCheck %s
+// RUN: not %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -S -O3 -o - %s 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+uint8x16_t test_vaeseq_u8(uint8x16_t data, uint8x16_t key) {
+  // CHECK-LABEL: @test_vaeseq_u8
+  // CHECK-NO-CRYPTO: warning: implicit declaration of function 'vaeseq_u8' is invalid in C99
+  return vaeseq_u8(data, key);
+  // CHECK: call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key)
+}
+
+uint8x16_t test_vaesdq_u8(uint8x16_t data, uint8x16_t key) {
+  // CHECK-LABEL: @test_vaesdq_u8
+  return vaesdq_u8(data, key);
+  // CHECK: call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key)
+}
+
+uint8x16_t test_vaesmcq_u8(uint8x16_t data) {
+  // CHECK-LABEL: @test_vaesmcq_u8
+  return vaesmcq_u8(data);
+  // CHECK: call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %data)
+}
+
+uint8x16_t test_vaesimcq_u8(uint8x16_t data) {
+  // CHECK-LABEL: @test_vaesimcq_u8
+  return vaesimcq_u8(data);
+  // CHECK: call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %data)
+}
+
+uint32_t test_vsha1h_u32(uint32_t hash_e) {
+  // CHECK-LABEL: @test_vsha1h_u32
+  return vsha1h_u32(hash_e);
+  // CHECK: call i32 @llvm.arm.neon.sha1h(i32 %hash_e)
+}
+
+uint32x4_t test_vsha1su1q_u32(uint32x4_t w0_3, uint32x4_t w12_15) {
+  // CHECK-LABEL: @test_vsha1su1q_u32
+  return vsha1su1q_u32(w0_3, w12_15);
+  // CHECK: call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %w0_3, <4 x i32> %w12_15)
+}
+
+uint32x4_t test_vsha256su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7) {
+  // CHECK-LABEL: @test_vsha256su0q_u32
+  return vsha256su0q_u32(w0_3, w4_7);
+  // CHECK: call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
+}
+
+uint32x4_t test_vsha1cq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
+  // CHECK-LABEL: @test_vsha1cq_u32
+  return vsha1cq_u32(hash_abcd, hash_e, wk);
+  // CHECK: call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+}
+
+uint32x4_t test_vsha1pq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
+  // CHECK-LABEL: @test_vsha1pq_u32
+  return vsha1pq_u32(hash_abcd, hash_e, wk);
+  // CHECK: call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+}
+
+uint32x4_t test_vsha1mq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
+  // CHECK-LABEL: @test_vsha1mq_u32
+  return vsha1mq_u32(hash_abcd, hash_e, wk);
+  // CHECK: call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+}
+
+uint32x4_t test_vsha1su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) {
+  // CHECK-LABEL: @test_vsha1su0q_u32
+  return vsha1su0q_u32(w0_3, w4_7, w8_11);
+  // CHECK: call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11)
+}
+
+uint32x4_t test_vsha256hq_u32(uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) {
+  // CHECK-LABEL: @test_vsha256hq_u32
+  return vsha256hq_u32(hash_abcd, hash_efgh, wk);
+  // CHECK: call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
+}
+
+uint32x4_t test_vsha256h2q_u32(uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) {
+  // CHECK-LABEL: @test_vsha256h2q_u32
+  return vsha256h2q_u32(hash_efgh, hash_abcd, wk);
+  // CHECK: call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
+}
+
+uint32x4_t test_vsha256su1q_u32(uint32x4_t w0_3, uint32x4_t w8_11, uint32x4_t w12_15) {
+  // CHECK-LABEL: @test_vsha256su1q_u32
+  return vsha256su1q_u32(w0_3, w8_11, w12_15);
+  // CHECK: call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
+}
diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp
index b24d06916f..003722ef3f 100644
--- a/utils/TableGen/NeonEmitter.cpp
+++ b/utils/TableGen/NeonEmitter.cpp
@@ -2776,20 +2776,21 @@ void NeonEmitter::run(raw_ostream &OS) {
     emitIntrinsic(OS, R, EmittedMap);
   }
 
+  OS << "#endif\n\n";
+
+  // Now emit all the crypto intrinsics together
   OS << "#ifdef __ARM_FEATURE_CRYPTO\n";
 
   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
     Record *R = RV[i];
 
-    // Skip crypto temporarily, and will emit them all together at the end.
     bool isCrypto = R->getValueAsBit("isCrypto");
     if (!isCrypto)
       continue;
 
     emitIntrinsic(OS, R, EmittedMap);
   }
-  
-  OS << "#endif\n\n";
+
 
   OS << "#endif\n\n";
 
-- 
2.50.1