From: Tim Northover Date: Thu, 27 Nov 2014 21:02:49 +0000 (+0000) Subject: AArch64: simplify PCS mapping. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5609bba46f1f7da78b097298970cd2c0378aad6b;p=clang AArch64: simplify PCS mapping. Now that LLVM can count the registers needed to implement AAPCS rules, we don't need to duplicate that logic here. This means we can drop the explicit padding and also use more natural types in many cases (e.g. "struct { float arr[3]; }" used to end up as "[2 x double]" to avoid holes on the stack. The one wrinkle is that AAPCS va_arg was also using the register counting machinery. But the local replacement isn't too bad. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@222904 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 6e2c83e1fc..cf7050757e 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -3813,9 +3813,7 @@ private: bool isDarwinPCS() const { return Kind == DarwinPCS; } ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP, - bool &IsHA, unsigned &AllocatedGPR, - bool &IsSmallAggr, bool IsNamedArg) const; + ABIArgInfo classifyArgumentType(QualType RetTy) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; @@ -3823,68 +3821,11 @@ private: bool isIllegalVectorType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override { - // To correctly handle Homogeneous Aggregate, we need to keep track of the - // number of SIMD and Floating-point registers allocated so far. - // If the argument is an HFA or an HVA and there are sufficient unallocated - // SIMD and Floating-point registers, then the argument is allocated to SIMD - // and Floating-point Registers (with one register per member of the HFA or - // HVA). Otherwise, the NSRN is set to 8. - unsigned AllocatedVFP = 0; - - // To correctly handle small aggregates, we need to keep track of the number - // of GPRs allocated so far. If the small aggregate can't all fit into - // registers, it will be on stack. We don't allow the aggregate to be - // partially in registers. - unsigned AllocatedGPR = 0; - - // Find the number of named arguments. Variadic arguments get special - // treatment with the Darwin ABI. - unsigned NumRequiredArgs = FI.getNumRequiredArgs(); - if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - unsigned ArgNo = 0; - for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); - it != ie; ++it, ++ArgNo) { - unsigned PreAllocation = AllocatedVFP, PreGPR = AllocatedGPR; - bool IsHA = false, IsSmallAggr = false; - const unsigned NumVFPs = 8; - const unsigned NumGPRs = 8; - bool IsNamedArg = ArgNo < NumRequiredArgs; - it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA, - AllocatedGPR, IsSmallAggr, IsNamedArg); - - // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs - // as sequences of floats since they'll get "holes" inserted as - // padding by the back end. - if (IsHA && AllocatedVFP > NumVFPs && !isDarwinPCS() && - getContext().getTypeAlign(it->type) < 64) { - uint32_t NumStackSlots = getContext().getTypeSize(it->type); - NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64; - - llvm::Type *CoerceTy = llvm::ArrayType::get( - llvm::Type::getDoubleTy(getVMContext()), NumStackSlots); - it->info = ABIArgInfo::getDirect(CoerceTy); - } - // If we do not have enough VFP registers for the HA, any VFP registers - // that are unallocated are marked as unavailable. To achieve this, we add - // padding of (NumVFPs - PreAllocation) floats. - if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) { - llvm::Type *PaddingTy = llvm::ArrayType::get( - llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation); - it->info.setPaddingType(PaddingTy); - } - - // If we do not have enough GPRs for the small aggregate, any GPR regs - // that are unallocated are marked as unavailable. - if (IsSmallAggr && AllocatedGPR > NumGPRs && PreGPR < NumGPRs) { - llvm::Type *PaddingTy = llvm::ArrayType::get( - llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreGPR); - it->info = - ABIArgInfo::getDirect(it->info.getCoerceToType(), 0, PaddingTy); - } - } + for (auto &it : FI.arguments()) + it.info = classifyArgumentType(it.type); } llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty, @@ -3915,12 +3856,7 @@ public: }; } -ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, - unsigned &AllocatedVFP, - bool &IsHA, - unsigned &AllocatedGPR, - bool &IsSmallAggr, - bool IsNamedArg) const { +ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. @@ -3928,48 +3864,26 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); - AllocatedGPR++; return ABIArgInfo::getDirect(ResType); } if (Size == 64) { llvm::Type *ResType = llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); - AllocatedVFP++; return ABIArgInfo::getDirect(ResType); } if (Size == 128) { llvm::Type *ResType = llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); - AllocatedVFP++; return ABIArgInfo::getDirect(ResType); } - AllocatedGPR++; return ABIArgInfo::getIndirect(0, /*ByVal=*/false); } - if (Ty->isVectorType()) - // Size of a legal vector should be either 64 or 128. - AllocatedVFP++; - if (const BuiltinType *BT = Ty->getAs()) { - if (BT->getKind() == BuiltinType::Half || - BT->getKind() == BuiltinType::Float || - BT->getKind() == BuiltinType::Double || - BT->getKind() == BuiltinType::LongDouble) - AllocatedVFP++; - } if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); - if (!Ty->isFloatingType() && !Ty->isVectorType()) { - unsigned Alignment = getContext().getTypeAlign(Ty); - if (!isDarwinPCS() && Alignment > 64) - AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64); - - int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1; - AllocatedGPR += RegsNeeded; - } return (Ty->isPromotableIntegerType() && isDarwinPCS() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); @@ -3978,9 +3892,8 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, // Structures with either a non-trivial destructor or a non-trivial // copy constructor are always indirect. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { - AllocatedGPR++; return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA == - CGCXXABI::RAA_DirectInMemory); + CGCXXABI::RAA_DirectInMemory); } // Empty records are always ignored on Darwin, but actually passed in C++ mode @@ -3989,7 +3902,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) return ABIArgInfo::getIgnore(); - ++AllocatedGPR; return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } @@ -3997,28 +3909,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { - IsHA = true; - if (!IsNamedArg && isDarwinPCS()) { - // With the Darwin ABI, variadic arguments are always passed on the stack - // and should not be expanded. Treat variadic HFAs as arrays of doubles. - uint64_t Size = getContext().getTypeSize(Ty); - llvm::Type *BaseTy = llvm::Type::getDoubleTy(getVMContext()); - return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); - } - AllocatedVFP += Members; - return ABIArgInfo::getExpand(); + return ABIArgInfo::getDirect( + llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); } // Aggregates <= 16 bytes are passed directly in registers or on the stack. uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 128) { unsigned Alignment = getContext().getTypeAlign(Ty); - if (!isDarwinPCS() && Alignment > 64) - AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes - AllocatedGPR += Size / 64; - IsSmallAggr = true; + // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. if (Alignment < 128 && Size == 128) { @@ -4028,7 +3928,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } - AllocatedGPR++; return ABIArgInfo::getIndirect(0, /*ByVal=*/false); } @@ -4104,14 +4003,25 @@ bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, return Members <= 4; } -llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty, - CodeGenFunction &CGF) const { - unsigned AllocatedGPR = 0, AllocatedVFP = 0; - bool IsHA = false, IsSmallAggr = false; - ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR, - IsSmallAggr, false /*IsNamedArg*/); +llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, + QualType Ty, + CodeGenFunction &CGF) const { + ABIArgInfo AI = classifyArgumentType(Ty); bool IsIndirect = AI.isIndirect(); + llvm::Type *BaseTy = CGF.ConvertType(Ty); + if (IsIndirect) + BaseTy = llvm::PointerType::getUnqual(BaseTy); + else if (AI.getCoerceToType()) + BaseTy = AI.getCoerceToType(); + + unsigned NumRegs = 1; + if (llvm::ArrayType *ArrTy = dyn_cast(BaseTy)) { + BaseTy = ArrTy->getElementType(); + NumRegs = ArrTy->getNumElements(); + } + bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy(); + // The AArch64 va_list type and handling is specified in the Procedure Call // Standard, section B.4: // @@ -4131,21 +4041,19 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr; int reg_top_index; - int RegSize; - if (AllocatedGPR) { - assert(!AllocatedVFP && "Arguments never split between int & VFP regs"); + int RegSize = IsIndirect ? 8 : getContext().getTypeSize(Ty) / 8; + if (!IsFPR) { // 3 is the field number of __gr_offs reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); reg_top_index = 1; // field number for __gr_top - RegSize = 8 * AllocatedGPR; + RegSize = llvm::RoundUpToAlignment(RegSize, 8); } else { - assert(!AllocatedGPR && "Argument must go in VFP or int regs"); // 4 is the field number of __vr_offs. reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); reg_top_index = 2; // field number for __vr_top - RegSize = 16 * AllocatedVFP; + RegSize = 16 * NumRegs; } //======================================= @@ -4169,7 +4077,7 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty // Integer arguments may need to correct register alignment (for example a // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we // align __gr_offs to calculate the potential address. - if (AllocatedGPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) { + if (!IsFPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) { int Align = Ctx.getTypeAlign(Ty) / 8; reg_offs = CGF.Builder.CreateAdd( diff --git a/test/CodeGen/arm-aapcs-vfp.c b/test/CodeGen/arm-aapcs-vfp.c index da1e675aba..7ef7c4e52e 100644 --- a/test/CodeGen/arm-aapcs-vfp.c +++ b/test/CodeGen/arm-aapcs-vfp.c @@ -29,7 +29,7 @@ struct homogeneous_struct { float f4; }; // CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(%struct.homogeneous_struct %{{.*}}) -// CHECK64: define %struct.homogeneous_struct @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) +// CHECK64: define %struct.homogeneous_struct @test_struct([4 x float] %{{.*}}) extern struct homogeneous_struct struct_callee(struct homogeneous_struct); struct homogeneous_struct test_struct(struct homogeneous_struct arg) { return struct_callee(arg); @@ -44,7 +44,7 @@ struct nested_array { double d[4]; }; // CHECK: define arm_aapcs_vfpcc void @test_array(%struct.nested_array %{{.*}}) -// CHECK64: define void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}) +// CHECK64: define void @test_array([4 x double] %{{.*}}) extern void array_callee(struct nested_array); void test_array(struct nested_array arg) { array_callee(arg); @@ -52,7 +52,7 @@ void test_array(struct nested_array arg) { extern void complex_callee(__complex__ double); // CHECK: define arm_aapcs_vfpcc void @test_complex({ double, double } %{{.*}}) -// CHECK64: define void @test_complex(double %{{.*}}, double %{{.*}}) +// CHECK64: define void @test_complex([2 x double] %cd.coerce) void test_complex(__complex__ double cd) { complex_callee(cd); } @@ -98,7 +98,7 @@ void test_hetero(struct heterogeneous_struct arg) { // Neon multi-vector types are homogeneous aggregates. // CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(%struct.int8x16x4_t %{{.*}}) -// CHECK64: define <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) +// CHECK64: define <16 x i8> @f0([4 x <16 x i8>] %{{.*}}) int8x16_t f0(int8x16x4_t v4) { return vaddq_s8(v4.val[0], v4.val[3]); } @@ -112,7 +112,7 @@ struct neon_struct { int16x4_t v4; }; // CHECK: define arm_aapcs_vfpcc void @test_neon(%struct.neon_struct %{{.*}}) -// CHECK64: define void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}}) +// CHECK64: define void @test_neon([4 x <8 x i8>] %{{.*}}) extern void neon_callee(struct neon_struct); void test_neon(struct neon_struct arg) { neon_callee(arg); diff --git a/test/CodeGen/arm-homogenous.c b/test/CodeGen/arm-homogenous.c index 2ab6c105a5..3426d995ca 100644 --- a/test/CodeGen/arm-homogenous.c +++ b/test/CodeGen/arm-homogenous.c @@ -5,7 +5,7 @@ // RUN: -ffreestanding -emit-llvm -w -o - %s | FileCheck -check-prefix=CHECK64 %s // RUN: %clang_cc1 -triple arm64-linux-gnu -ffreestanding -emit-llvm -w -o - %s \ -// RUN: | FileCheck --check-prefix=CHECK64-AAPCS %s +// RUN: | FileCheck --check-prefix=CHECK64 %s typedef long long int64_t; typedef unsigned int uint32_t; @@ -176,9 +176,7 @@ void test_struct_of_four_doubles(void) { // CHECK: test_struct_of_four_doubles // CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}}) // CHECK64: test_struct_of_four_doubles -// CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}) -// CHECK64-AAPCS: test_struct_of_four_doubles -// CHECK64-AAPCS: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}) +// CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, [4 x double] {{.*}}, [4 x double] {{.*}}, double {{.*}}) takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0); } @@ -212,9 +210,7 @@ void test_struct_of_vecs(void) { // CHECK: test_struct_of_vecs // CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, %struct.struct_of_vecs {{.*}}, %struct.struct_of_vecs {{.*}}, double {{.*}}) // CHECK64: test_struct_of_vecs -// CHECK64: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}}) -// CHECK64-AAPCS: test_struct_of_vecs -// CHECK64-AAPCS: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}}) +// CHECK64: call void @takes_struct_of_vecs(double {{.*}}, [4 x <8 x i8>] {{.*}}, [4 x <8 x i8>] {{.*}}, double {{.*}}) takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0); } diff --git a/test/CodeGen/arm64-aapcs-arguments.c b/test/CodeGen/arm64-aapcs-arguments.c index 38ac522de5..ab302d4070 100644 --- a/test/CodeGen/arm64-aapcs-arguments.c +++ b/test/CodeGen/arm64-aapcs-arguments.c @@ -17,7 +17,7 @@ void test2(int x0, Small x2_x3, int x4, Small x6_x7, int sp, Small sp16) { // stack in order to avoid holes. Make sure we get all of them, and not just the // first: -// CHECK: void @test3(float %s0_s3.0, float %s0_s3.1, float %s0_s3.2, float %s0_s3.3, float %s4, [3 x float], [2 x double] %sp.coerce, [2 x double] %sp16.coerce) +// CHECK: void @test3([4 x float] %s0_s3.coerce, float %s4, [4 x float] %sp.coerce, [4 x float] %sp16.coerce) typedef struct { float arr[4]; } HFA; void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) { } @@ -28,7 +28,7 @@ void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) { // fp128] or something, but leaving them as-is retains more information for // users to debug. -// CHECK: void @test4(<16 x i8> %v0_v2.0, <16 x i8> %v0_v2.1, <16 x i8> %v0_v2.2, <16 x i8> %v3_v5.0, <16 x i8> %v3_v5.1, <16 x i8> %v3_v5.2, [2 x float], <16 x i8> %sp.0, <16 x i8> %sp.1, <16 x i8> %sp.2, double %sp48, <16 x i8> %sp64.0, <16 x i8> %sp64.1, <16 x i8> %sp64.2) +// CHECK: void @test4([3 x <16 x i8>] %v0_v2.coerce, [3 x <16 x i8>] %v3_v5.coerce, [3 x <16 x i8>] %sp.coerce, double %sp48, [3 x <16 x i8>] %sp64.coerce) typedef __attribute__((neon_vector_type(16))) signed char int8x16_t; typedef struct { int8x16_t arr[3]; } BigHFA; void test4(BigHFA v0_v2, BigHFA v3_v5, BigHFA sp, double sp48, BigHFA sp64) { @@ -46,6 +46,6 @@ unsigned char test5(unsigned char a, signed short b) { __fp16 test_half(__fp16 A) { } // __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM). -// CHECK: define %struct.HFA_half @test_half_hfa(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) +// CHECK: define %struct.HFA_half @test_half_hfa([4 x half] %{{.*}}) struct HFA_half { __fp16 a[4]; }; struct HFA_half test_half_hfa(struct HFA_half A) { } diff --git a/test/CodeGen/arm64-arguments.c b/test/CodeGen/arm64-arguments.c index b2de08dbe6..ae1ff98800 100644 --- a/test/CodeGen/arm64-arguments.c +++ b/test/CodeGen/arm64-arguments.c @@ -123,8 +123,7 @@ void f31(struct s31 s) { } struct s32 { double x; }; void f32(struct s32 s) { } -// Expand Homogeneous Aggregate. -// CHECK: @f32(double %{{.*}}) +// CHECK: @f32([1 x double] %{{.*}}) // A composite type larger than 16 bytes should be passed indirectly. struct s33 { char buf[32*32]; }; @@ -197,7 +196,7 @@ typedef struct s35 s35_with_align; typedef __attribute__((neon_vector_type(4))) float float32x4_t; float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) { -// CHECK: define <4 x float> @f35(i32 %i, float %s1.0, float %s1.1, float %s1.2, float %s1.3, float %s2.0, float %s2.1, float %s2.2, float %s2.3) +// CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce) // CHECK: %s1 = alloca %struct.s35, align 16 // CHECK: %s2 = alloca %struct.s35, align 16 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>* @@ -598,24 +597,24 @@ int caller43_stack() { __attribute__ ((noinline)) int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, s40_no_align s1, s40_no_align s2) { -// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) +// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; } int caller40_split() { // CHECK: define i32 @caller40_split() -// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, [2 x i64] %{{.*}} [2 x i64] %{{.*}}) +// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}}) return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2); } __attribute__ ((noinline)) int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, s41_with_align s1, s41_with_align s2) { -// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], i128 %s1.coerce, i128 %s2.coerce) +// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i128 %s1.coerce, i128 %s2.coerce) return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; } int caller41_split() { // CHECK: define i32 @caller41_split() -// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, i128 %{{.*}}, i128 %{{.*}}) +// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 %{{.*}}, i128 %{{.*}}) return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2); } @@ -642,7 +641,7 @@ float test_hfa(int n, ...) { float test_hfa_call(struct HFA *a) { // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a) -// CHECK: call float (i32, ...)* @test_hfa(i32 1, [2 x double] {{.*}}) +// CHECK: call float (i32, ...)* @test_hfa(i32 1, [4 x float] {{.*}}) test_hfa(1, *a); } diff --git a/test/CodeGenCXX/homogeneous-aggregates.cpp b/test/CodeGenCXX/homogeneous-aggregates.cpp index 4800aacbfe..77c6b3a527 100644 --- a/test/CodeGenCXX/homogeneous-aggregates.cpp +++ b/test/CodeGenCXX/homogeneous-aggregates.cpp @@ -46,7 +46,7 @@ D1 CC func_D1(D1 x) { return x; } // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce) // ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce) -// ARM64: define %struct.D2 @_Z7func_D22D2(double %x.0, double %x.1, double %x.2) +// ARM64: define %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce) // X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(double %x.0, double %x.1, double %x.2) D2 CC func_D2(D2 x) { return x; } @@ -57,7 +57,7 @@ D3 CC func_D3(D3 x) { return x; } // PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce) // ARM32: define arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce) -// ARM64: define %struct.D4 @_Z7func_D42D4(double %x.0, double %x.1, double %x.2, double %x.3) +// ARM64: define %struct.D4 @_Z7func_D42D4([4 x double] %x.coerce) D4 CC func_D4(D4 x) { return x; } D5 CC func_D5(D5 x) { return x; } @@ -67,17 +67,9 @@ D5 CC func_D5(D5 x) { return x; } // The C++ multiple inheritance expansion case is a little more complicated, so // do some extra checking. // -// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5(double %x.0, double %x.1, double %x.2) -// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1* -// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2* -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: store double %x.0, double* -// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8 -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: store double %x.1, double* -// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16 -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: store double %x.2, double* +// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5([3 x double] %x.coerce) +// ARM64: bitcast %struct.D5* %{{.*}} to [3 x double]* +// ARM64: store [3 x double] %x.coerce, [3 x double]* void call_D5(D5 *p) { func_D5(*p); @@ -86,21 +78,8 @@ void call_D5(D5 *p) { // Check the call site. // // ARM64-LABEL: define void @_Z7call_D5P2D5(%struct.D5* %p) -// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1* -// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2* -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: load double* -// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8 -// ARM64: bitcast i8* %{{.*}} to %struct.I2* -// ARM64: bitcast %struct.I2* %{{.*}} to %struct.Base2* -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: load double* -// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16 -// ARM64: bitcast i8* %{{.*}} to %struct.I3* -// ARM64: bitcast %struct.I3* %{{.*}} to %struct.Base2* -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: load double* -// ARM64: call %struct.D5 @_Z7func_D52D5(double %{{.*}}, double %{{.*}}, double %{{.*}}) +// ARM64: load [3 x double]* +// ARM64: call %struct.D5 @_Z7func_D52D5([3 x double] %{{.*}}) struct Empty { }; struct Float1 { float x; }; @@ -108,7 +87,7 @@ struct Float2 { float y; }; struct HVAWithEmptyBase : Float1, Empty, Float2 { float z; }; // PPC: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce) -// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase(float %a.0, float %a.1, float %a.2) +// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce) // ARM32: define arm_aapcs_vfpcc void @_Z15with_empty_base16HVAWithEmptyBase(%struct.HVAWithEmptyBase %a.coerce) void CC with_empty_base(HVAWithEmptyBase a) {} @@ -121,7 +100,7 @@ struct HVAWithEmptyBitField : Float1, Float2 { }; // PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce) -// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField(float %a.0, float %a.1, float %a.2) +// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce) // ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce) // X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(float %a.0, float %a.1, float %a.2) void CC with_empty_bitfield(HVAWithEmptyBitField a) {}