]> granicus.if.org Git - clang/commitdiff
[AArch64] Add support for NEON scalar arithmetic instructions:
authorChad Rosier <mcrosier@codeaurora.org>
Mon, 7 Oct 2013 17:07:17 +0000 (17:07 +0000)
committerChad Rosier <mcrosier@codeaurora.org>
Mon, 7 Oct 2013 17:07:17 +0000 (17:07 +0000)
SQDMULH, SQRDMULH, FMULX, FRECPS, and FRSQRTS.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@192112 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/arm_neon.td
lib/CodeGen/CGBuiltin.cpp
test/CodeGen/aarch64-neon-intrinsics.c

index 10150249b41e3eb2464012f32d438d5a4fe9e896..3482b8f77f5616eb352aa2af747fe02267fc56c5 100644 (file)
@@ -745,4 +745,24 @@ def SCALAR_FMINP : SInst<"vpmin", "sd", "SfSQd">;
 def SCALAR_FMAXNMP : SInst<"vpmaxnm", "sd", "SfSQd">;
 def SCALAR_FMINNMP : SInst<"vpminnm", "sd", "SfSQd">;
 
+////////////////////////////////////////////////////////////////////////////////
+// Scalar Integer Saturating Doubling Multiply Half High
+def SCALAR_SQDMULH : SInst<"vqdmulh", "sss", "SsSi">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Scalar Integer Saturating Rounding Doubling Multiply Half High
+def SCALAR_SQRDMULH : SInst<"vqrdmulh", "sss", "SsSi">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Scalar Floating-point Multiply Extended
+def SCALAR_FMULX : IInst<"vmulx", "sss", "SfSd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Scalar Floating-point Reciprocal Step
+def SCALAR_FRECPS : IInst<"vrecps", "sss", "SfSd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Scalar Floating-point Reciprocal Square Root Step
+def SCALAR_FRSQRTS : IInst<"vrsqrts", "sss", "SfSd">;
+
 }
index 52e40db540235c54c02aefd9c500c0785d6b0707..9c526107edfcc547f4bf85065f771c59e94bea0f 100644 (file)
@@ -1945,6 +1945,31 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
   case AArch64::BI__builtin_neon_vminnmvq_f32:
     Int = Intrinsic::aarch64_neon_vminnmv;
     AcrossVec = true; ExtendEle = false; s = "vminnmv"; break;
+  // Scalar Integer Saturating Doubling Multiply Half High
+  case AArch64::BI__builtin_neon_vqdmulhh_s16:
+  case AArch64::BI__builtin_neon_vqdmulhs_s32:
+    Int = Intrinsic::arm_neon_vqdmulh;
+    s = "vqdmulh"; OverloadInt = true; break;
+  // Scalar Integer Saturating Rounding Doubling Multiply Half High
+  case AArch64::BI__builtin_neon_vqrdmulhh_s16:
+  case AArch64::BI__builtin_neon_vqrdmulhs_s32:
+    Int = Intrinsic::arm_neon_vqrdmulh;
+    s = "vqrdmulh"; OverloadInt = true; break;
+  // Scalar Floating-point Multiply Extended
+  case AArch64::BI__builtin_neon_vmulxs_f32:
+  case AArch64::BI__builtin_neon_vmulxd_f64:
+    Int = Intrinsic::aarch64_neon_vmulx;
+    s = "vmulx"; OverloadInt = true; break;
+  // Scalar Floating-point Reciprocal Step and
+  case AArch64::BI__builtin_neon_vrecpss_f32:
+  case AArch64::BI__builtin_neon_vrecpsd_f64:
+    Int = Intrinsic::arm_neon_vrecps;
+    s = "vrecps"; OverloadInt = true; break;
+  // Scalar Floating-point Reciprocal Square Root Step
+  case AArch64::BI__builtin_neon_vrsqrtss_f32:
+  case AArch64::BI__builtin_neon_vrsqrtsd_f64:
+    Int = Intrinsic::arm_neon_vrsqrts;
+    s = "vrsqrts"; OverloadInt = true; break;
   }
 
   if (!Int)
index b71256a665051811e9d2d3a3dc65ab7e5ffbdbaa..07b15b55f6ea29742f53fcf6ea07ae2799b6d641 100644 (file)
@@ -5538,3 +5538,62 @@ float64_t test_vpminqd_f64(float64x2_t a) {
 // CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
 }
 
+int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
+// CHECK: test_vqdmulhh_s16
+  return vqdmulhh_s16(a, b);
+// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
+// CHECK: test_vqdmulhs_s32
+  return vqdmulhs_s32(a, b);
+// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
+// CHECK: test_vqrdmulhh_s16
+  return vqrdmulhh_s16(a, b);
+// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
+// CHECK: test_vqrdmulhs_s32
+  return vqrdmulhs_s32(a, b);
+// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+float32_t test_vmulxs_f32(float32_t a, float32_t b) {
+// CHECK: test_vmulxs_f32
+  return vmulxs_f32(a, b);
+// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+float64_t test_vmulxd_f64(float64_t a, float64_t b) {
+// CHECK: test_vmulxd_f64
+  return vmulxd_f64(a, b);
+// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32_t test_vrecpss_f32(float32_t a, float32_t b) {
+// CHECK: test_vrecpss_f32
+  return vrecpss_f32(a, b);
+// CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
+// CHECK: test_vrecpsd_f64
+  return vrecpsd_f64(a, b);
+// CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
+// CHECK: test_vrsqrtss_f32
+  return vrsqrtss_f32(a, b);
+// CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
+// CHECK: test_vrsqrtsd_f64
+  return vrsqrtsd_f64(a, b);
+// CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}