From: Chad Rosier <mcrosier@codeaurora.org>
Date: Mon, 9 Dec 2013 22:47:59 +0000 (+0000)
Subject: [AArch64] Refactor the NEON scalar reduce pairwise intrinsics so that they use
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4f8ca137603e716034bbb2b951e96e1dd8c3807c;p=clang

[AArch64] Refactor the NEON scalar reduce pairwise intrinsics so that they use
float/double rather than the vector equivalents when appropriate.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@196836 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index ea6176dd9f..108c0f0887 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -1763,6 +1763,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
   bool OverloadCvtInt = false;
   bool OverloadWideInt = false;
   bool OverloadNarrowInt = false;
+  bool OverloadScalarRetInt = false;
   const char *s = NULL;
 
   SmallVector<Value *, 4> Ops;
@@ -1995,35 +1996,35 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
   case AArch64::BI__builtin_neon_vpadds_f32:
   case AArch64::BI__builtin_neon_vpaddd_f64:
     Int = Intrinsic::aarch64_neon_vpfadd;
-    s = "vpfadd"; AcrossVec = true; break;
+    s = "vpfadd"; OverloadScalarRetInt = true; break;
   // Scalar Reduce Pairwise Floating Point Max
   case AArch64::BI__builtin_neon_vmaxv_f32:
   case AArch64::BI__builtin_neon_vpmaxs_f32:
   case AArch64::BI__builtin_neon_vmaxvq_f64:
   case AArch64::BI__builtin_neon_vpmaxqd_f64:
     Int = Intrinsic::aarch64_neon_vpmax;
-    s = "vpmax"; AcrossVec = true; break;
+    s = "vpmax"; OverloadScalarRetInt = true; break;
   // Scalar Reduce Pairwise Floating Point Min
   case AArch64::BI__builtin_neon_vminv_f32:
   case AArch64::BI__builtin_neon_vpmins_f32:
   case AArch64::BI__builtin_neon_vminvq_f64:
   case AArch64::BI__builtin_neon_vpminqd_f64:
     Int = Intrinsic::aarch64_neon_vpmin;
-    s = "vpmin"; AcrossVec = true; break;
+    s = "vpmin"; OverloadScalarRetInt = true; break;
   // Scalar Reduce Pairwise Floating Point Maxnm
   case AArch64::BI__builtin_neon_vmaxnmv_f32:
   case AArch64::BI__builtin_neon_vpmaxnms_f32:
   case AArch64::BI__builtin_neon_vmaxnmvq_f64:
   case AArch64::BI__builtin_neon_vpmaxnmqd_f64:
     Int = Intrinsic::aarch64_neon_vpfmaxnm;
-    s = "vpfmaxnm"; AcrossVec = true; break;
+    s = "vpfmaxnm"; OverloadScalarRetInt = true; break;
   // Scalar Reduce Pairwise Floating Point Minnm
   case AArch64::BI__builtin_neon_vminnmv_f32:
   case AArch64::BI__builtin_neon_vpminnms_f32:
   case AArch64::BI__builtin_neon_vminnmvq_f64:
   case AArch64::BI__builtin_neon_vpminnmqd_f64:
     Int = Intrinsic::aarch64_neon_vpfminnm;
-    s = "vpfminnm"; AcrossVec = true; break;
+    s = "vpfminnm"; OverloadScalarRetInt = true; break;
   // The followings are intrinsics with scalar results generated AcrossVec vectors
   case AArch64::BI__builtin_neon_vaddlv_s8:
   case AArch64::BI__builtin_neon_vaddlv_s16:
@@ -2580,12 +2581,22 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
     F = CGF.CGM.getIntrinsic(Int, Tys);
     assert(E->getNumArgs() == 1);
   } else if (OverloadInt) {
-    // Determine the type of this overloaded AArch64 intrinsic
+    // Determine the type of this overloaded AArch64 intrinsic and convert the
+    // scalar types to one-vector element types.
     llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
     llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
     assert(VTy);
 
     F = CGF.CGM.getIntrinsic(Int, VTy);
+  } else if (OverloadScalarRetInt) {
+    // Determine the type of this overloaded AArch64 intrinsic
+    const Expr *Arg = E->getArg(E->getNumArgs()-1);
+    llvm::Type *Ty = CGF.ConvertType(Arg->getType());
+    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
+    llvm::Type *RTy = VTy->getElementType();
+
+    llvm::Type *Tys[2] = {RTy, VTy};
+    F = CGF.CGM.getIntrinsic(Int, Tys);
   } else if (OverloadWideInt || OverloadNarrowInt) {
     // Determine the type of this overloaded AArch64 intrinsic
     const Expr *Arg = E->getArg(E->getNumArgs()-1);