AArch64: simplify PCS mapping.

author Tim Northover <tnorthover@apple.com>

Thu, 27 Nov 2014 21:02:49 +0000 (21:02 +0000)

committer Tim Northover <tnorthover@apple.com>

Thu, 27 Nov 2014 21:02:49 +0000 (21:02 +0000)
author Tim Northover <tnorthover@apple.com>
Thu, 27 Nov 2014 21:02:49 +0000 (21:02 +0000)
committer Tim Northover <tnorthover@apple.com>
Thu, 27 Nov 2014 21:02:49 +0000 (21:02 +0000)
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp

index 6e2c83e1fc429b2c66b3810e2168d320b793cf0f..cf7050757e8ecd27aadea7d9929dc6de871f0e2d 100644 (file)
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -3813,9 +3813,7 @@ private:
    bool isDarwinPCS() const { return Kind == DarwinPCS; }
  
    ABIArgInfo classifyReturnType(QualType RetTy) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
-                                  bool &IsHA, unsigned &AllocatedGPR,
-                                  bool &IsSmallAggr, bool IsNamedArg) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy) const;
    bool isHomogeneousAggregateBaseType(QualType Ty) const override;
    bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                           uint64_t Members) const override;
@@ -3823,68 +3821,11 @@ private:
    bool isIllegalVectorType(QualType Ty) const;
  
    void computeInfo(CGFunctionInfo &FI) const override {
-    // To correctly handle Homogeneous Aggregate, we need to keep track of the
-    // number of SIMD and Floating-point registers allocated so far.
-    // If the argument is an HFA or an HVA and there are sufficient unallocated
-    // SIMD and Floating-point registers, then the argument is allocated to SIMD
-    // and Floating-point Registers (with one register per member of the HFA or
-    // HVA). Otherwise, the NSRN is set to 8.
-    unsigned AllocatedVFP = 0;
-
-    // To correctly handle small aggregates, we need to keep track of the number
-    // of GPRs allocated so far. If the small aggregate can't all fit into
-    // registers, it will be on stack. We don't allow the aggregate to be
-    // partially in registers.
-    unsigned AllocatedGPR = 0;
-
-    // Find the number of named arguments. Variadic arguments get special
-    // treatment with the Darwin ABI.
-    unsigned NumRequiredArgs = FI.getNumRequiredArgs();
-
      if (!getCXXABI().classifyReturnType(FI))
        FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
-    unsigned ArgNo = 0;
-    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-         it != ie; ++it, ++ArgNo) {
-      unsigned PreAllocation = AllocatedVFP, PreGPR = AllocatedGPR;
-      bool IsHA = false, IsSmallAggr = false;
-      const unsigned NumVFPs = 8;
-      const unsigned NumGPRs = 8;
-      bool IsNamedArg = ArgNo < NumRequiredArgs;
-      it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA,
-                                      AllocatedGPR, IsSmallAggr, IsNamedArg);
-
-      // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs
-      // as sequences of floats since they'll get "holes" inserted as
-      // padding by the back end.
-      if (IsHA && AllocatedVFP > NumVFPs && !isDarwinPCS() &&
-          getContext().getTypeAlign(it->type) < 64) {
-        uint32_t NumStackSlots = getContext().getTypeSize(it->type);
-        NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64;
-
-        llvm::Type *CoerceTy = llvm::ArrayType::get(
-            llvm::Type::getDoubleTy(getVMContext()), NumStackSlots);
-        it->info = ABIArgInfo::getDirect(CoerceTy);
-      }
  
-      // If we do not have enough VFP registers for the HA, any VFP registers
-      // that are unallocated are marked as unavailable. To achieve this, we add
-      // padding of (NumVFPs - PreAllocation) floats.
-      if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
-        llvm::Type *PaddingTy = llvm::ArrayType::get(
-            llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
-        it->info.setPaddingType(PaddingTy);
-      }
-
-      // If we do not have enough GPRs for the small aggregate, any GPR regs
-      // that are unallocated are marked as unavailable.
-      if (IsSmallAggr && AllocatedGPR > NumGPRs && PreGPR < NumGPRs) {
-        llvm::Type *PaddingTy = llvm::ArrayType::get(
-            llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreGPR);
-        it->info =
-            ABIArgInfo::getDirect(it->info.getCoerceToType(), 0, PaddingTy);
-      }
-    }
+    for (auto &it : FI.arguments())
+      it.info = classifyArgumentType(it.type);
    }
  
    llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -3915,12 +3856,7 @@ public:
  };
  }
  
-ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
-                                                unsigned &AllocatedVFP,
-                                                bool &IsHA,
-                                                unsigned &AllocatedGPR,
-                                                bool &IsSmallAggr,
-                                                bool IsNamedArg) const {
+ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
    Ty = useFirstFieldIfTransparentUnion(Ty);
  
    // Handle illegal vector types here.
@@ -3928,48 +3864,26 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
      uint64_t Size = getContext().getTypeSize(Ty);
      if (Size <= 32) {
        llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
-      AllocatedGPR++;
        return ABIArgInfo::getDirect(ResType);
      }
      if (Size == 64) {
        llvm::Type *ResType =
            llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
-      AllocatedVFP++;
        return ABIArgInfo::getDirect(ResType);
      }
      if (Size == 128) {
        llvm::Type *ResType =
            llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
-      AllocatedVFP++;
        return ABIArgInfo::getDirect(ResType);
      }
-    AllocatedGPR++;
      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
    }
-  if (Ty->isVectorType())
-    // Size of a legal vector should be either 64 or 128.
-    AllocatedVFP++;
-  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-    if (BT->getKind() == BuiltinType::Half ||
-        BT->getKind() == BuiltinType::Float ||
-        BT->getKind() == BuiltinType::Double ||
-        BT->getKind() == BuiltinType::LongDouble)
-      AllocatedVFP++;
-  }
  
    if (!isAggregateTypeForABI(Ty)) {
      // Treat an enum type as its underlying type.
      if (const EnumType *EnumTy = Ty->getAs<EnumType>())
        Ty = EnumTy->getDecl()->getIntegerType();
  
-    if (!Ty->isFloatingType() && !Ty->isVectorType()) {
-      unsigned Alignment = getContext().getTypeAlign(Ty);
-      if (!isDarwinPCS() && Alignment > 64)
-        AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
-
-      int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
-      AllocatedGPR += RegsNeeded;
-    }
      return (Ty->isPromotableIntegerType() && isDarwinPCS()
                  ? ABIArgInfo::getExtend()
                  : ABIArgInfo::getDirect());
@@ -3978,9 +3892,8 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
    // Structures with either a non-trivial destructor or a non-trivial
    // copy constructor are always indirect.
    if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
-    AllocatedGPR++;
      return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA ==
-                                          CGCXXABI::RAA_DirectInMemory);
+                                   CGCXXABI::RAA_DirectInMemory);
    }
  
    // Empty records are always ignored on Darwin, but actually passed in C++ mode
@@ -3989,7 +3902,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
      if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
        return ABIArgInfo::getIgnore();
  
-    ++AllocatedGPR;
      return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
    }
  
@@ -3997,28 +3909,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
    const Type *Base = nullptr;
    uint64_t Members = 0;
    if (isHomogeneousAggregate(Ty, Base, Members)) {
-    IsHA = true;
-    if (!IsNamedArg && isDarwinPCS()) {
-      // With the Darwin ABI, variadic arguments are always passed on the stack
-      // and should not be expanded. Treat variadic HFAs as arrays of doubles.
-      uint64_t Size = getContext().getTypeSize(Ty);
-      llvm::Type *BaseTy = llvm::Type::getDoubleTy(getVMContext());
-      return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
-    }
-    AllocatedVFP += Members;
-    return ABIArgInfo::getExpand();
+    return ABIArgInfo::getDirect(
+        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
    }
  
    // Aggregates <= 16 bytes are passed directly in registers or on the stack.
    uint64_t Size = getContext().getTypeSize(Ty);
    if (Size <= 128) {
      unsigned Alignment = getContext().getTypeAlign(Ty);
-    if (!isDarwinPCS() && Alignment > 64)
-      AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
-
      Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
-    AllocatedGPR += Size / 64;
-    IsSmallAggr = true;
+
      // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
      // For aggregates with 16-byte alignment, we use i128.
      if (Alignment < 128 && Size == 128) {
@@ -4028,7 +3928,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
    }
  
-  AllocatedGPR++;
    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
  }
  
@@ -4104,14 +4003,25 @@ bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
    return Members <= 4;
  }
  
-llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                          CodeGenFunction &CGF) const {
-  unsigned AllocatedGPR = 0, AllocatedVFP = 0;
-  bool IsHA = false, IsSmallAggr = false;
-  ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR,
-                                       IsSmallAggr, false /*IsNamedArg*/);
+llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
+                                            QualType Ty,
+                                            CodeGenFunction &CGF) const {
+  ABIArgInfo AI = classifyArgumentType(Ty);
    bool IsIndirect = AI.isIndirect();
  
+  llvm::Type *BaseTy = CGF.ConvertType(Ty);
+  if (IsIndirect)
+    BaseTy = llvm::PointerType::getUnqual(BaseTy);
+  else if (AI.getCoerceToType())
+    BaseTy = AI.getCoerceToType();
+
+  unsigned NumRegs = 1;
+  if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
+    BaseTy = ArrTy->getElementType();
+    NumRegs = ArrTy->getNumElements();
+  }
+  bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+
    // The AArch64 va_list type and handling is specified in the Procedure Call
    // Standard, section B.4:
    //
@@ -4131,21 +4041,19 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty
  
    llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr;
    int reg_top_index;
-  int RegSize;
-  if (AllocatedGPR) {
-    assert(!AllocatedVFP && "Arguments never split between int & VFP regs");
+  int RegSize = IsIndirect ? 8 : getContext().getTypeSize(Ty) / 8;
+  if (!IsFPR) {
      // 3 is the field number of __gr_offs
      reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
      reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
      reg_top_index = 1; // field number for __gr_top
-    RegSize = 8 * AllocatedGPR;
+    RegSize = llvm::RoundUpToAlignment(RegSize, 8);
    } else {
-    assert(!AllocatedGPR && "Argument must go in VFP or int regs");
      // 4 is the field number of __vr_offs.
      reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
      reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
      reg_top_index = 2; // field number for __vr_top
-    RegSize = 16 * AllocatedVFP;
+    RegSize = 16 * NumRegs;
    }
  
    //=======================================
@@ -4169,7 +4077,7 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty
    // Integer arguments may need to correct register alignment (for example a
    // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
    // align __gr_offs to calculate the potential address.
-  if (AllocatedGPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
+  if (!IsFPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
      int Align = Ctx.getTypeAlign(Ty) / 8;
  
      reg_offs = CGF.Builder.CreateAdd(
diff --git a/test/CodeGen/arm-aapcs-vfp.c b/test/CodeGen/arm-aapcs-vfp.c

index da1e675aba89a7ec3acaa180c52d1e970f066f58..7ef7c4e52edb65028735c81f0e7478d1ebea16b1 100644 (file)
--- a/test/CodeGen/arm-aapcs-vfp.c
+++ b/test/CodeGen/arm-aapcs-vfp.c
@@ -29,7 +29,7 @@ struct homogeneous_struct {
    float f4;
  };
  // CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(%struct.homogeneous_struct %{{.*}})
-// CHECK64: define %struct.homogeneous_struct @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+// CHECK64: define %struct.homogeneous_struct @test_struct([4 x float] %{{.*}})
  extern struct homogeneous_struct struct_callee(struct homogeneous_struct);
  struct homogeneous_struct test_struct(struct homogeneous_struct arg) {
    return struct_callee(arg);
@@ -44,7 +44,7 @@ struct nested_array {
    double d[4];
  };
  // CHECK: define arm_aapcs_vfpcc void @test_array(%struct.nested_array %{{.*}})
-// CHECK64: define void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}})
+// CHECK64: define void @test_array([4 x double] %{{.*}})
  extern void array_callee(struct nested_array);
  void test_array(struct nested_array arg) {
    array_callee(arg);
@@ -52,7 +52,7 @@ void test_array(struct nested_array arg) {
  
  extern void complex_callee(__complex__ double);
  // CHECK: define arm_aapcs_vfpcc void @test_complex({ double, double } %{{.*}})
-// CHECK64: define void @test_complex(double %{{.*}}, double %{{.*}})
+// CHECK64: define void @test_complex([2 x double] %cd.coerce)
  void test_complex(__complex__ double cd) {
    complex_callee(cd);
  }
@@ -98,7 +98,7 @@ void test_hetero(struct heterogeneous_struct arg) {
  
  // Neon multi-vector types are homogeneous aggregates.
  // CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(%struct.int8x16x4_t %{{.*}})
-// CHECK64: define <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+// CHECK64: define <16 x i8> @f0([4 x <16 x i8>] %{{.*}})
  int8x16_t f0(int8x16x4_t v4) {
    return vaddq_s8(v4.val[0], v4.val[3]);
  }
@@ -112,7 +112,7 @@ struct neon_struct {
    int16x4_t v4;
  };
  // CHECK: define arm_aapcs_vfpcc void @test_neon(%struct.neon_struct %{{.*}})
-// CHECK64: define void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}})
+// CHECK64: define void @test_neon([4 x <8 x i8>] %{{.*}})
  extern void neon_callee(struct neon_struct);
  void test_neon(struct neon_struct arg) {
    neon_callee(arg);
diff --git a/test/CodeGen/arm-homogenous.c b/test/CodeGen/arm-homogenous.c

index 2ab6c105a59ff4ba5ad2d8f2c96a78cde101922e..3426d995caef8d344965ce6e6f0ee3dc223dff83 100644 (file)
--- a/test/CodeGen/arm-homogenous.c
+++ b/test/CodeGen/arm-homogenous.c
@@ -5,7 +5,7 @@
  // RUN:  -ffreestanding -emit-llvm -w -o - %s | FileCheck -check-prefix=CHECK64 %s
  
  // RUN: %clang_cc1 -triple arm64-linux-gnu -ffreestanding -emit-llvm -w -o - %s \
-// RUN:   | FileCheck --check-prefix=CHECK64-AAPCS %s
+// RUN:   | FileCheck --check-prefix=CHECK64 %s
  typedef long long int64_t;
  typedef unsigned int uint32_t;
  
@@ -176,9 +176,7 @@ void test_struct_of_four_doubles(void) {
  // CHECK: test_struct_of_four_doubles
  // CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}})
  // CHECK64: test_struct_of_four_doubles
-// CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
-// CHECK64-AAPCS: test_struct_of_four_doubles
-// CHECK64-AAPCS: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
+// CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, [4 x double] {{.*}}, [4 x double] {{.*}}, double {{.*}})
    takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
  }
  
@@ -212,9 +210,7 @@ void test_struct_of_vecs(void) {
  // CHECK: test_struct_of_vecs
  // CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, %struct.struct_of_vecs {{.*}}, %struct.struct_of_vecs {{.*}}, double {{.*}})
  // CHECK64: test_struct_of_vecs
-// CHECK64: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
-// CHECK64-AAPCS: test_struct_of_vecs
-// CHECK64-AAPCS: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
+// CHECK64: call void @takes_struct_of_vecs(double {{.*}}, [4 x <8 x i8>] {{.*}}, [4 x <8 x i8>] {{.*}}, double {{.*}})
    takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
  }
  
diff --git a/test/CodeGen/arm64-aapcs-arguments.c b/test/CodeGen/arm64-aapcs-arguments.c

index 38ac522de5d77ea9a2ee905b6c1990fb4d4d544a..ab302d407041b27d8ab4e2004ca13218d0dd4e15 100644 (file)
--- a/test/CodeGen/arm64-aapcs-arguments.c
+++ b/test/CodeGen/arm64-aapcs-arguments.c
@@ -17,7 +17,7 @@ void test2(int x0, Small x2_x3, int x4, Small x6_x7, int sp, Small sp16) {
  // stack in order to avoid holes. Make sure we get all of them, and not just the
  // first:
  
-// CHECK: void @test3(float %s0_s3.0, float %s0_s3.1, float %s0_s3.2, float %s0_s3.3, float %s4, [3 x float], [2 x double] %sp.coerce, [2 x double] %sp16.coerce)
+// CHECK: void @test3([4 x float] %s0_s3.coerce, float %s4, [4 x float] %sp.coerce, [4 x float] %sp16.coerce)
  typedef struct { float arr[4]; } HFA;
  void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) {
  }
@@ -28,7 +28,7 @@ void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) {
  // fp128] or something, but leaving them as-is retains more information for
  // users to debug.
  
-//  CHECK: void @test4(<16 x i8> %v0_v2.0, <16 x i8> %v0_v2.1, <16 x i8> %v0_v2.2, <16 x i8> %v3_v5.0, <16 x i8> %v3_v5.1, <16 x i8> %v3_v5.2, [2 x float], <16 x i8> %sp.0, <16 x i8> %sp.1, <16 x i8> %sp.2, double %sp48, <16 x i8> %sp64.0, <16 x i8> %sp64.1, <16 x i8> %sp64.2)
+//  CHECK: void @test4([3 x <16 x i8>] %v0_v2.coerce, [3 x <16 x i8>] %v3_v5.coerce, [3 x <16 x i8>] %sp.coerce, double %sp48, [3 x <16 x i8>] %sp64.coerce)
  typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
  typedef struct { int8x16_t arr[3]; } BigHFA;
  void test4(BigHFA v0_v2, BigHFA v3_v5, BigHFA sp, double sp48, BigHFA sp64) {
@@ -46,6 +46,6 @@ unsigned char test5(unsigned char a, signed short b) {
  __fp16 test_half(__fp16 A) { }
  
  // __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM).
-// CHECK: define %struct.HFA_half @test_half_hfa(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+// CHECK: define %struct.HFA_half @test_half_hfa([4 x half] %{{.*}})
  struct HFA_half { __fp16 a[4]; };
  struct HFA_half test_half_hfa(struct HFA_half A) { }
diff --git a/test/CodeGen/arm64-arguments.c b/test/CodeGen/arm64-arguments.c

index b2de08dbe68c5c1e850b72498720320cf8623f1d..ae1ff98800ac473646103ce7e7d14ebbb4bfcc94 100644 (file)
--- a/test/CodeGen/arm64-arguments.c
+++ b/test/CodeGen/arm64-arguments.c
@@ -123,8 +123,7 @@ void f31(struct s31 s) { }
  
  struct s32 { double x; };
  void f32(struct s32 s) { }
-// Expand Homogeneous Aggregate.
-// CHECK: @f32(double %{{.*}})
+// CHECK: @f32([1 x double] %{{.*}})
  
  // A composite type larger than 16 bytes should be passed indirectly.
  struct s33 { char buf[32*32]; };
@@ -197,7 +196,7 @@ typedef struct s35 s35_with_align;
  
  typedef __attribute__((neon_vector_type(4))) float float32x4_t;
  float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
-// CHECK: define <4 x float> @f35(i32 %i, float %s1.0, float %s1.1, float %s1.2, float %s1.3, float %s2.0, float %s2.1, float %s2.2, float %s2.3)
+// CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce)
  // CHECK: %s1 = alloca %struct.s35, align 16
  // CHECK: %s2 = alloca %struct.s35, align 16
  // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
@@ -598,24 +597,24 @@ int caller43_stack() {
  __attribute__ ((noinline))
  int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
                s40_no_align s1, s40_no_align s2) {
-// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
+// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
    return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
  }
  int caller40_split() {
  // CHECK: define i32 @caller40_split()
-// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
+// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
    return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2);
  }
  
  __attribute__ ((noinline))
  int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
                s41_with_align s1, s41_with_align s2) {
-// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], i128 %s1.coerce, i128 %s2.coerce)
+// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i128 %s1.coerce, i128 %s2.coerce)
    return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
  }
  int caller41_split() {
  // CHECK: define i32 @caller41_split()
-// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, i128 %{{.*}}, i128 %{{.*}})
+// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 %{{.*}}, i128 %{{.*}})
    return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2);
  }
  
@@ -642,7 +641,7 @@ float test_hfa(int n, ...) {
  
  float test_hfa_call(struct HFA *a) {
  // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a)
-// CHECK: call float (i32, ...)* @test_hfa(i32 1, [2 x double] {{.*}})
+// CHECK: call float (i32, ...)* @test_hfa(i32 1, [4 x float] {{.*}})
    test_hfa(1, *a);
  }
  
diff --git a/test/CodeGenCXX/homogeneous-aggregates.cpp b/test/CodeGenCXX/homogeneous-aggregates.cpp

index 4800aacbfe5e7ae6219d504cedbbe7cbc289c880..77c6b3a527bd569d3ac779e1cf404441086b58b1 100644 (file)
--- a/test/CodeGenCXX/homogeneous-aggregates.cpp
+++ b/test/CodeGenCXX/homogeneous-aggregates.cpp
@@ -46,7 +46,7 @@ D1 CC func_D1(D1 x) { return x; }
  
  // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
  // ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce)
-// ARM64: define %struct.D2 @_Z7func_D22D2(double %x.0, double %x.1, double %x.2)
+// ARM64: define %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce)
  // X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(double %x.0, double %x.1, double %x.2)
  D2 CC func_D2(D2 x) { return x; }
  
@@ -57,7 +57,7 @@ D3 CC func_D3(D3 x) { return x; }
  
  // PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)
  // ARM32: define arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce)
-// ARM64: define %struct.D4 @_Z7func_D42D4(double %x.0, double %x.1, double %x.2, double %x.3)
+// ARM64: define %struct.D4 @_Z7func_D42D4([4 x double] %x.coerce)
  D4 CC func_D4(D4 x) { return x; }
  
  D5 CC func_D5(D5 x) { return x; }
@@ -67,17 +67,9 @@ D5 CC func_D5(D5 x) { return x; }
  // The C++ multiple inheritance expansion case is a little more complicated, so
  // do some extra checking.
  //
-// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5(double %x.0, double %x.1, double %x.2)
-// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1*
-// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.0, double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.1, double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.2, double*
+// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5([3 x double] %x.coerce)
+// ARM64: bitcast %struct.D5* %{{.*}} to [3 x double]*
+// ARM64: store [3 x double] %x.coerce, [3 x double]*
  
  void call_D5(D5 *p) {
    func_D5(*p);
@@ -86,21 +78,8 @@ void call_D5(D5 *p) {
  // Check the call site.
  //
  // ARM64-LABEL: define void @_Z7call_D5P2D5(%struct.D5* %p)
-// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1*
-// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8
-// ARM64: bitcast i8* %{{.*}} to %struct.I2*
-// ARM64: bitcast %struct.I2* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16
-// ARM64: bitcast i8* %{{.*}} to %struct.I3*
-// ARM64: bitcast %struct.I3* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: call %struct.D5 @_Z7func_D52D5(double %{{.*}}, double %{{.*}}, double %{{.*}})
+// ARM64: load [3 x double]*
+// ARM64: call %struct.D5 @_Z7func_D52D5([3 x double] %{{.*}})
  
  struct Empty { };
  struct Float1 { float x; };
@@ -108,7 +87,7 @@ struct Float2 { float y; };
  struct HVAWithEmptyBase : Float1, Empty, Float2 { float z; };
  
  // PPC: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
-// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase(float %a.0, float %a.1, float %a.2)
+// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
  // ARM32: define arm_aapcs_vfpcc void @_Z15with_empty_base16HVAWithEmptyBase(%struct.HVAWithEmptyBase %a.coerce)
  void CC with_empty_base(HVAWithEmptyBase a) {}
  
@@ -121,7 +100,7 @@ struct HVAWithEmptyBitField : Float1, Float2 {
  };
  
  // PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
-// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField(float %a.0, float %a.1, float %a.2)
+// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
  // ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce)
  // X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(float %a.0, float %a.1, float %a.2)
  void CC with_empty_bitfield(HVAWithEmptyBitField a) {}
author	Tim Northover <tnorthover@apple.com>
	Thu, 27 Nov 2014 21:02:49 +0000 (21:02 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Thu, 27 Nov 2014 21:02:49 +0000 (21:02 +0000)
lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
test/CodeGen/arm-aapcs-vfp.c		patch \| blob \| history
test/CodeGen/arm-homogenous.c		patch \| blob \| history
test/CodeGen/arm64-aapcs-arguments.c		patch \| blob \| history
test/CodeGen/arm64-arguments.c		patch \| blob \| history
test/CodeGenCXX/homogeneous-aggregates.cpp		patch \| blob \| history