ARM NEON: add _f16 support to a couple of vector-shuffling intrinsics.

author Tim Northover <tnorthover@apple.com>

Tue, 25 Feb 2014 11:13:42 +0000 (11:13 +0000)

committer Tim Northover <tnorthover@apple.com>

Tue, 25 Feb 2014 11:13:42 +0000 (11:13 +0000)
author Tim Northover <tnorthover@apple.com>
Tue, 25 Feb 2014 11:13:42 +0000 (11:13 +0000)
committer Tim Northover <tnorthover@apple.com>
Tue, 25 Feb 2014 11:13:42 +0000 (11:13 +0000)
diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td

index eab3b50e384420469ad50ea6b80e0d179104c25c..b1ae1db0f6edff6e21d5b1a1b9f0d380d296546c 100644 (file)
--- a/include/clang/Basic/arm_neon.td
+++ b/include/clang/Basic/arm_neon.td
@@ -410,9 +410,11 @@ def VCREATE : NoTestOpInst<"vcreate", "dl", "csihfUcUsUiUlPcPsl", OP_CAST>;
  // E.3.19 Set all lanes to same value
  let InstName = "vmov" in {
  def VDUP_N   : WOpInst<"vdup_n", "ds",
-                       "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP>;
+                       "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
+                       OP_DUP>;
  def VMOV_N   : WOpInst<"vmov_n", "ds",
-                       "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP>;
+                       "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
+                       OP_DUP>;
  }
  let InstName = "" in
  def VDUP_LANE: WOpInst<"vdup_lane", "dgi",
diff --git a/test/CodeGen/arm_neon_intrinsics.c b/test/CodeGen/arm_neon_intrinsics.c

index 1d76e8a57546c86d23e1d45c63c038f55cbc9658..d7473ed2bcfa60cdfd13410f6dd98fa48695678b 100644 (file)
--- a/test/CodeGen/arm_neon_intrinsics.c
+++ b/test/CodeGen/arm_neon_intrinsics.c
@@ -1887,6 +1887,12 @@ poly16x4_t test_vdup_n_p16(poly16_t a) {
    return vdup_n_p16(a);
  }
  
+// CHECK: test_vdup_n_f16
+// CHECK: vdup.16
+float16x4_t test_vdup_n_f16(float16_t *a) {
+  return vdup_n_f16(*a);
+}
+
  // CHECK: test_vdup_n_f32
  // CHECK: vmov 
  float32x2_t test_vdup_n_f32(float32_t a) {
@@ -1941,6 +1947,12 @@ poly16x8_t test_vdupq_n_p16(poly16_t a) {
    return vdupq_n_p16(a);
  }
  
+// CHECK: test_vdupq_n_f16
+// CHECK: vdup.16
+float16x8_t test_vdupq_n_f16(float16_t *a) {
+  return vdupq_n_f16(*a);
+}
+
  // CHECK: test_vdupq_n_f32
  // CHECK: vmov 
  float32x4_t test_vdupq_n_f32(float32_t a) {
@@ -4868,6 +4880,12 @@ poly16x4_t test_vmov_n_p16(poly16_t a) {
    return vmov_n_p16(a);
  }
  
+// CHECK: test_vmov_n_f16
+// CHECK: vdup.16
+float16x4_t test_vmov_n_f16(float16_t *a) {
+  return vmov_n_f16(*a);
+}
+
  // CHECK: test_vmov_n_f32
  // CHECK: vmov 
  float32x2_t test_vmov_n_f32(float32_t a) {
@@ -4922,6 +4940,12 @@ poly16x8_t test_vmovq_n_p16(poly16_t a) {
    return vmovq_n_p16(a);
  }
  
+// CHECK: test_vmovq_n_f16
+// CHECK: vdup.16
+float16x8_t test_vmovq_n_f16(float16_t *a) {
+  return vmovq_n_f16(*a);
+}
+
  // CHECK: test_vmovq_n_f32
  // CHECK: vmov 
  float32x4_t test_vmovq_n_f32(float32_t a) {
diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp

index 932af9a8e88b62da553ba9cce28bd86c3bb809e2..165e749b3347b5b0d900edad7fa748ec2fa5f309 100644 (file)
--- a/utils/TableGen/NeonEmitter.cpp
+++ b/utils/TableGen/NeonEmitter.cpp
@@ -1436,7 +1436,7 @@ static void GenerateChecksForIntrinsic(const std::string &Name,
  
  /// UseMacro - Examine the prototype string to determine if the intrinsic
  /// should be defined as a preprocessor macro instead of an inline function.
-static bool UseMacro(const std::string &proto) {
+static bool UseMacro(const std::string &proto, StringRef typestr) {
    // If this builtin takes an immediate argument, we need to #define it rather
    // than use a standard declaration, so that SemaChecking can range check
    // the immediate passed by the user.
@@ -1449,6 +1449,12 @@ static bool UseMacro(const std::string &proto) {
        proto.find('c') != std::string::npos)
      return true;
  
+  // It is not permitted to pass or return an __fp16 by value, so intrinsics
+  // taking a scalar float16_t must be implemented as macros.
+  if (typestr.find('h') != std::string::npos &&
+      proto.find('s') != std::string::npos)
+    return true;
+
    return false;
  }
  
@@ -1463,7 +1469,7 @@ static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
  // Generate the string "(argtype a, argtype b, ...)"
  static std::string GenArgs(const std::string &proto, StringRef typestr,
                             const std::string &name) {
-  bool define = UseMacro(proto);
+  bool define = UseMacro(proto, typestr);
    char arg = 'a';
  
    std::string s;
@@ -1642,7 +1648,7 @@ static std::string GenOpString(const std::string &name, OpKind op,
                                 const std::string &proto, StringRef typestr) {
    bool quad;
    unsigned nElts = GetNumElements(typestr, quad);
-  bool define = UseMacro(proto);
+  bool define = UseMacro(proto, typestr);
  
    std::string ts = TypeString(proto[0], typestr);
    std::string s;
@@ -2386,7 +2392,7 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto,
    // sret-like argument.
    bool sret = IsMultiVecProto(proto[0]);
  
-  bool define = UseMacro(proto);
+  bool define = UseMacro(proto, typestr);
  
    // Check if the prototype has a scalar operand with the type of the vector
    // elements.  If not, bitcasting the args will take care of arg checking.
@@ -2531,7 +2537,7 @@ static std::string GenIntrinsic(const std::string &name,
                                  StringRef outTypeStr, StringRef inTypeStr,
                                  OpKind kind, ClassKind classKind) {
    assert(!proto.empty() && "");
-  bool define = UseMacro(proto) && kind != OpUnavailable;
+  bool define = UseMacro(proto, outTypeStr) && kind != OpUnavailable;
    std::string s;
  
    // static always inline + return type
author	Tim Northover <tnorthover@apple.com>
	Tue, 25 Feb 2014 11:13:42 +0000 (11:13 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Tue, 25 Feb 2014 11:13:42 +0000 (11:13 +0000)
include/clang/Basic/arm_neon.td		patch \| blob \| history
test/CodeGen/arm_neon_intrinsics.c		patch \| blob \| history
utils/TableGen/NeonEmitter.cpp		patch \| blob \| history