[AArch64] Split the neon.addp intrinsic into integer and fp variants.

author Amara Emerson <aemerson@apple.com>

Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)

committer Amara Emerson <aemerson@apple.com>

Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)
author Amara Emerson <aemerson@apple.com>
Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)
committer Amara Emerson <aemerson@apple.com>
Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp

index 4d9c76ae7fc4b91cc392536cc063e07dac8bdc36..5042d09227bf2d817e74955337e6e7e53a398140 100644 (file)
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -5095,6 +5095,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
  
    switch (BuiltinID) {
    default: break;
+  case NEON::BI__builtin_neon_vpadd_v:
+  case NEON::BI__builtin_neon_vpaddq_v:
+    // We don't allow fp/int overloading of intrinsics.
+    if (VTy->getElementType()->isFloatingPointTy() &&
+        Int == Intrinsic::aarch64_neon_addp)
+      Int = Intrinsic::aarch64_neon_faddp;
+    break;
    case NEON::BI__builtin_neon_vabs_v:
    case NEON::BI__builtin_neon_vabsq_v:
      if (VTy->getElementType()->isFloatingPointTy())
diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c

index 40e39912be9a5c0b4e53f6d02d868b7d37d93270..9a5b3a9f18b4b304de975e7afbccb9740885f6bf 100644 (file)
--- a/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-neon-intrinsics.c
@@ -4411,7 +4411,7 @@ uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
  // CHECK-LABEL: @test_vpadd_f32(
  // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
  // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %a, <2 x float> %b)
+// CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
  // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
  // CHECK:   ret <2 x float> [[VPADD_V2_I]]
  float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
@@ -4475,7 +4475,7 @@ uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
  // CHECK-LABEL: @test_vpaddq_f32(
  // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
  // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b)
+// CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
  // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
  // CHECK:   ret <4 x float> [[VPADDQ_V2_I]]
  float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
@@ -4485,7 +4485,7 @@ float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
  // CHECK-LABEL: @test_vpaddq_f64(
  // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
  // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %a, <2 x double> %b)
+// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
  // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
  // CHECK:   ret <2 x double> [[VPADDQ_V2_I]]
  float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
diff --git a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c

index e1a2e3fb92dd1ea5ae0601c5b10ad0c3dcb49228..a4bf8753363064db58b217a6435c286c7de96295 100644 (file)
--- a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -736,14 +736,14 @@ float16x8_t test_vmulxq_f16(float16x8_t a, float16x8_t b) {
  }
  
  // CHECK-LABEL: test_vpadd_f16
-// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.addp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %a, <4 x half> %b)
  // CHECK:  ret <4 x half> [[ADD]]
  float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
    return vpadd_f16(a, b);
  }
  
  // CHECK-LABEL: test_vpaddq_f16
-// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.addp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.faddp.v8f16(<8 x half> %a, <8 x half> %b)
  // CHECK:  ret <8 x half> [[ADD]]
  float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) {
    return vpaddq_f16(a, b);
author	Amara Emerson <aemerson@apple.com>
	Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)
committer	Amara Emerson <aemerson@apple.com>
	Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)
lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
test/CodeGen/aarch64-neon-intrinsics.c		patch \| blob \| history
test/CodeGen/aarch64-v8.2a-neon-intrinsics.c		patch \| blob \| history