Allow __fp16 as a function arg or return type for AArch64

author Oliver Stannard <oliver.stannard@arm.com>

Wed, 27 Aug 2014 16:31:57 +0000 (16:31 +0000)

committer Oliver Stannard <oliver.stannard@arm.com>

Wed, 27 Aug 2014 16:31:57 +0000 (16:31 +0000)
author Oliver Stannard <oliver.stannard@arm.com>
Wed, 27 Aug 2014 16:31:57 +0000 (16:31 +0000)
committer Oliver Stannard <oliver.stannard@arm.com>
Wed, 27 Aug 2014 16:31:57 +0000 (16:31 +0000)
diff --git a/include/clang/Basic/LangOptions.def b/include/clang/Basic/LangOptions.def

index fe3a46f30d2b4c2003627af3a06569e5037c4a6e..b84b9ac49b2f82b12edb3b15ca1bc961cf584d14 100644 (file)
--- a/include/clang/Basic/LangOptions.def
+++ b/include/clang/Basic/LangOptions.def
@@ -128,6 +128,7 @@ LANGOPT(ShortEnums        , 1, 0, "short enum types")
  LANGOPT(OpenCL            , 1, 0, "OpenCL")
  LANGOPT(OpenCLVersion     , 32, 0, "OpenCL version")
  LANGOPT(NativeHalfType    , 1, 0, "Native half type support")
+LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
  LANGOPT(CUDA              , 1, 0, "CUDA")
  LANGOPT(OpenMP            , 1, 0, "OpenMP support")
  
diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td

index 2c699a2bbe50f964d58f47380ca1c755cf5ef437..7e6a3395221f7214ceffde39cab576dadd542a75 100644 (file)
--- a/include/clang/Driver/CC1Options.td
+++ b/include/clang/Driver/CC1Options.td
@@ -511,6 +511,8 @@ def vtordisp_mode_EQ : Joined<["-"], "vtordisp-mode=">,
    HelpText<"Control vtordisp placement on win32 targets">;
  def fno_rtti_data : Flag<["-"], "fno-rtti-data">,
    HelpText<"Control emission of RTTI data">;
+def fallow_half_arguments_and_returns : Flag<["-"], "fallow-half-arguments-and-returns">,
+  HelpText<"Allow function arguments and returns of type half">;
  
  //===----------------------------------------------------------------------===//
  // Header Search Options
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp

index b508dcb446fb7809ca40e9fbe678005ab515f45a..3fdab85d5f062be104720babe32f3b6cfbe49a5d 100644 (file)
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -1132,7 +1132,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
    case APValue::Float: {
      const llvm::APFloat &Init = Value.getFloat();
      if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf &&
-         !Context.getLangOpts().NativeHalfType)
+        !Context.getLangOpts().NativeHalfType &&
+        !Context.getLangOpts().HalfArgsAndReturns)
        return llvm::ConstantInt::get(VMContext, Init.bitcastToAPInt());
      else
        return llvm::ConstantFP::get(VMContext, Init);
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp

index 9e0fbcfd184429155e359946c7fc658096766f03..5abe80f19002a893be7b8a58bbfc7b895565c76b 100644 (file)
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -701,7 +701,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
    llvm::Type *SrcTy = Src->getType();
  
    // If casting to/from storage-only half FP, use special intrinsics.
-  if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
+  if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
+      !CGF.getContext().getLangOpts().HalfArgsAndReturns) {
      Src = Builder.CreateCall(
          CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
                               CGF.CGM.FloatTy),
@@ -773,7 +774,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
                               DstTy);
  
    // Cast to half via float
-  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
+  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
+      !CGF.getContext().getLangOpts().HalfArgsAndReturns)
      DstTy = CGF.FloatTy;
  
    if (isa<llvm::IntegerType>(SrcTy)) {
@@ -1691,7 +1693,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
      // Add the inc/dec to the real part.
      llvm::Value *amt;
  
-    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
+    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
+        !CGF.getContext().getLangOpts().HalfArgsAndReturns) {
        // Another special case: half FP increment should be done via float
        value = Builder.CreateCall(
            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
@@ -1714,7 +1717,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
      }
      value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
  
-    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
+    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
+        !CGF.getContext().getLangOpts().HalfArgsAndReturns)
        value = Builder.CreateCall(
            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16,
                                 CGF.CGM.FloatTy),
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp

index 6b0e4ad8b3137c8ea2961a85bf87d225adcd0246..1f39eb88c01f88b1359f27002570a59ef5e37e71 100644 (file)
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -358,9 +358,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
  
      case BuiltinType::Half:
        // Half FP can either be storage-only (lowered to i16) or native.
-      ResultType = getTypeForFormat(getLLVMContext(),
-          Context.getFloatTypeSemantics(T),
-          Context.getLangOpts().NativeHalfType);
+      ResultType =
+          getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T),
+                           Context.getLangOpts().NativeHalfType ||
+                               Context.getLangOpts().HalfArgsAndReturns);
        break;
      case BuiltinType::Float:
      case BuiltinType::Double:
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp

index c27e2cf42fe67b18e0819bed9531b2e46a7da2f9..fb3b931ce95ad9ffa95851dad49fb9c3ee35cbdc 100644 (file)
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -3544,8 +3544,9 @@ public:
  };
  }
  
-static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
+static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base,
                                     ASTContext &Context,
+                                   bool isAArch64,
                                     uint64_t *HAMembers = nullptr);
  
  ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
@@ -3627,7 +3628,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
    // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
    const Type *Base = nullptr;
    uint64_t Members = 0;
-  if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
+  if (isARMHomogeneousAggregate(Ty, Base, getContext(), true, &Members)) {
      IsHA = true;
      if (!IsNamedArg && isDarwinPCS()) {
        // With the Darwin ABI, variadic arguments are always passed on the stack
@@ -3685,7 +3686,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
      return ABIArgInfo::getIgnore();
  
    const Type *Base = nullptr;
-  if (isHomogeneousAggregate(RetTy, Base, getContext()))
+  if (isARMHomogeneousAggregate(RetTy, Base, getContext(), true))
      // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
      return ABIArgInfo::getDirect();
  
@@ -3822,7 +3823,7 @@ static llvm::Value *EmitAArch64VAArg(llvm::Value *VAListAddr, QualType Ty,
  
    const Type *Base = nullptr;
    uint64_t NumMembers;
-  bool IsHFA = isHomogeneousAggregate(Ty, Base, Ctx, &NumMembers);
+  bool IsHFA = isARMHomogeneousAggregate(Ty, Base, Ctx, true, &NumMembers);
    if (IsHFA && NumMembers > 1) {
      // Homogeneous aggregates passed in registers will have their elements split
      // and stored 16-bytes apart regardless of size (they're notionally in qN,
@@ -3965,7 +3966,7 @@ llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType T
    uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
  
    const Type *Base = nullptr;
-  bool isHA = isHomogeneousAggregate(Ty, Base, getContext());
+  bool isHA = isARMHomogeneousAggregate(Ty, Base, getContext(), true);
  
    bool isIndirect = false;
    // Arguments bigger than 16 bytes which aren't homogeneous aggregates should
@@ -4251,15 +4252,16 @@ void ARMABIInfo::setRuntimeCC() {
      RuntimeCC = abiCC;
  }
  
-/// isHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
+/// isARMHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
  /// aggregate.  If HAMembers is non-null, the number of base elements
  /// contained in the type is returned through it; this is used for the
  /// recursive calls that check aggregate component types.
-static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
-                                   ASTContext &Context, uint64_t *HAMembers) {
+static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base,
+                                   ASTContext &Context, bool isAArch64,
+                                   uint64_t *HAMembers) {
    uint64_t Members = 0;
    if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
-    if (!isHomogeneousAggregate(AT->getElementType(), Base, Context, &Members))
+    if (!isARMHomogeneousAggregate(AT->getElementType(), Base, Context, isAArch64, &Members))
        return false;
      Members *= AT->getSize().getZExtValue();
    } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
@@ -4270,7 +4272,7 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
      Members = 0;
      for (const auto *FD : RD->fields()) {
        uint64_t FldMembers;
-      if (!isHomogeneousAggregate(FD->getType(), Base, Context, &FldMembers))
+      if (!isARMHomogeneousAggregate(FD->getType(), Base, Context, isAArch64, &FldMembers))
          return false;
  
        Members = (RD->isUnion() ?
@@ -4284,12 +4286,22 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
      }
  
      // Homogeneous aggregates for AAPCS-VFP must have base types of float,
-    // double, or 64-bit or 128-bit vectors.
+    // double, or 64-bit or 128-bit vectors. "long double" has the same machine
+    // type as double, so it is also allowed as a base type.
+    // Homogeneous aggregates for AAPCS64 must have base types of a floating
+    // point type or a short-vector type. This is the same as the 32-bit ABI,
+    // but with the difference that any floating-point type is allowed,
+    // including __fp16.
      if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-      if (BT->getKind() != BuiltinType::Float && 
-          BT->getKind() != BuiltinType::Double &&
-          BT->getKind() != BuiltinType::LongDouble)
-        return false;
+      if (isAArch64) {
+        if (!BT->isFloatingPoint())
+          return false;
+      } else {
+        if (BT->getKind() != BuiltinType::Float &&
+            BT->getKind() != BuiltinType::Double &&
+            BT->getKind() != BuiltinType::LongDouble)
+          return false;
+      }
      } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
        unsigned VecSize = Context.getTypeSize(VT);
        if (VecSize != 64 && VecSize != 128)
@@ -4491,7 +4503,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
      // into VFP registers.
      const Type *Base = nullptr;
      uint64_t Members = 0;
-    if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
+    if (isARMHomogeneousAggregate(Ty, Base, getContext(), false, &Members)) {
        assert(Base && "Base class should be set for homogeneous aggregate");
        // Base can be a floating-point or a vector.
        if (Base->isVectorType()) {
@@ -4696,7 +4708,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
    // Check for homogeneous aggregates with AAPCS-VFP.
    if (getABIKind() == AAPCS_VFP && !isVariadic) {
      const Type *Base = nullptr;
-    if (isHomogeneousAggregate(RetTy, Base, getContext())) {
+    if (isARMHomogeneousAggregate(RetTy, Base, getContext(), false)) {
        assert(Base && "Base class should be set for homogeneous aggregate");
        // Homogeneous Aggregates are returned directly.
        return ABIArgInfo::getDirect(nullptr, 0, nullptr, !isAAPCS_VFP);
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp

index 1f857e3debeb07834acfbff9a216c18a3c0e260d..3af94ca32351a13e2d3b133d8ef2c5a49feabbbf 100644 (file)
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -3714,6 +3714,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
      CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment));
    }
  
+  if (getToolChain().getTriple().getArch() == llvm::Triple::aarch64 ||
+      getToolChain().getTriple().getArch() == llvm::Triple::aarch64_be)
+    CmdArgs.push_back("-fallow-half-arguments-and-returns");
+
    if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
                                 options::OPT_mno_restrict_it)) {
      if (A->getOption().matches(options::OPT_mrestrict_it)) {
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp

index 3eb6df84a0d5ff0ff32267a2006174e160cc104a..6c176e8c1a00226c92378a263af883fe1f21231f 100644 (file)
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -1500,6 +1500,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
    Opts.CurrentModule = Args.getLastArgValue(OPT_fmodule_name);
    Opts.ImplementationOfModule =
        Args.getLastArgValue(OPT_fmodule_implementation_of);
+  Opts.NativeHalfType = Opts.NativeHalfType;
+  Opts.HalfArgsAndReturns = Args.hasArg(OPT_fallow_half_arguments_and_returns);
  
    if (!Opts.CurrentModule.empty() && !Opts.ImplementationOfModule.empty() &&
        Opts.CurrentModule != Opts.ImplementationOfModule) {
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp

index c7915d3291a98d7cdb30ef84e7e0b362f4fae98b..51f36feaa46ab877e7cceca251cb46e469b5c1bf 100644 (file)
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -1746,7 +1746,7 @@ bool Sema::CheckFunctionReturnType(QualType T, SourceLocation Loc) {
    }
  
    // Functions cannot return half FP.
-  if (T->isHalfType()) {
+  if (T->isHalfType() && !getLangOpts().HalfArgsAndReturns) {
      Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 1 <<
        FixItHint::CreateInsertion(Loc, "*");
      return true;
@@ -1776,7 +1776,7 @@ QualType Sema::BuildFunctionType(QualType T,
      if (ParamType->isVoidType()) {
        Diag(Loc, diag::err_param_with_void_type);
        Invalid = true;
-    } else if (ParamType->isHalfType()) {
+    } else if (ParamType->isHalfType() && !getLangOpts().HalfArgsAndReturns) {
        // Disallow half FP arguments.
        Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 0 <<
          FixItHint::CreateInsertion(Loc, "*");
@@ -2751,7 +2751,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
              S.Diag(D.getIdentifierLoc(), diag::err_opencl_half_return) << T;
              D.setInvalidType(true);
            } 
-        } else {
+        } else if (!S.getLangOpts().HalfArgsAndReturns) {
            S.Diag(D.getIdentifierLoc(),
              diag::err_parameters_retval_cannot_have_fp16_type) << 1;
            D.setInvalidType(true);
@@ -2941,7 +2941,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
                  D.setInvalidType();
                  Param->setInvalidDecl();
                }
-            } else {
+            } else if (!S.getLangOpts().HalfArgsAndReturns) {
                S.Diag(Param->getLocation(),
                  diag::err_parameters_retval_cannot_have_fp16_type) << 0;
                D.setInvalidType();
diff --git a/test/CodeGen/arm64-aapcs-arguments.c b/test/CodeGen/arm64-aapcs-arguments.c

index b430630b0711be45038808efd8a0151736feeb69..38ac522de5d77ea9a2ee905b6c1990fb4d4d544a 100644 (file)
--- a/test/CodeGen/arm64-aapcs-arguments.c
+++ b/test/CodeGen/arm64-aapcs-arguments.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-abi aapcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-abi aapcs -ffreestanding -fallow-half-arguments-and-returns -emit-llvm -w -o - %s | FileCheck %s
  
  // AAPCS clause C.8 says: If the argument has an alignment of 16 then the NGRN
  // is rounded up to the next even number.
@@ -40,3 +40,12 @@ void test4(BigHFA v0_v2, BigHFA v3_v5, BigHFA sp, double sp48, BigHFA sp64) {
  // CHECK: define i8 @test5(i8 %a, i16 %b)
  unsigned char test5(unsigned char a, signed short b) {
  }
+
+// __fp16 can be used as a function argument or return type (ACLE 2.0)
+// CHECK: define half @test_half(half %{{.*}})
+__fp16 test_half(__fp16 A) { }
+
+// __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM).
+// CHECK: define %struct.HFA_half @test_half_hfa(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+struct HFA_half { __fp16 a[4]; };
+struct HFA_half test_half_hfa(struct HFA_half A) { }
author	Oliver Stannard <oliver.stannard@arm.com>
	Wed, 27 Aug 2014 16:31:57 +0000 (16:31 +0000)
committer	Oliver Stannard <oliver.stannard@arm.com>
	Wed, 27 Aug 2014 16:31:57 +0000 (16:31 +0000)
include/clang/Basic/LangOptions.def		patch \| blob \| history
include/clang/Driver/CC1Options.td		patch \| blob \| history
lib/CodeGen/CGExprConstant.cpp		patch \| blob \| history
lib/CodeGen/CGExprScalar.cpp		patch \| blob \| history
lib/CodeGen/CodeGenTypes.cpp		patch \| blob \| history
lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
lib/Driver/Tools.cpp		patch \| blob \| history
lib/Frontend/CompilerInvocation.cpp		patch \| blob \| history
lib/Sema/SemaType.cpp		patch \| blob \| history
test/CodeGen/arm64-aapcs-arguments.c		patch \| blob \| history