// Support byval for ARM.
// The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at most 8-byte.
- // Byval can't handle the case where type alignment is bigger than ABI alignment.
- // We also increase the threshold for byval due to its overhead.
+ // We realign the indirect argument if type alignment is bigger than ABI alignment.
uint64_t ABIAlign = 4;
uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
getABIKind() == ARMABIInfo::AAPCS)
ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
- if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64*8) &&
- TyAlign <= ABIAlign) {
- return ABIArgInfo::getIndirect(0, /*ByVal=*/true);
+ if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
+ return ABIArgInfo::getIndirect(0, /*ByVal=*/true,
+ /*Realign=*/TyAlign <= ABIAlign ? false : true);
}
// Otherwise, pass by coercing to a structure of the appropriate size.
// AAPCS: %[[a:.*]] = alloca { [1 x i32] }
// AAPCS: %[[gep:.*]] = getelementptr { [1 x i32] }* %[[a]], i32 0, i32 0
// AAPCS: load [1 x i32]* %[[gep]]
+
+// rdar://12596507
+struct s35
+{
+ float v[18]; //make sure byval is on.
+} __attribute__((aligned(16)));
+typedef struct s35 s35_with_align;
+
+typedef __attribute__((neon_vector_type(4))) float float32x4_t;
+static __attribute__((__always_inline__, __nodebug__)) float32x4_t vaddq_f32(
+ float32x4_t __a, float32x4_t __b) {
+ return __a + __b;
+}
+float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
+ float32x4_t v = vaddq_f32(*(float32x4_t *)&s1,
+ *(float32x4_t *)&s2);
+ return v;
+}
+// APCS-GNU: define <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
+// APCS-GNU: %[[a:.*]] = alloca %struct.s35, align 16
+// APCS-GNU: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
+// APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
+// APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
+// APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
+// APCS-GNU: load <4 x float>* %[[d]], align 16
+// AAPCS: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
+// AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
+// AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
+// AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
+// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
+// AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
+// AAPCS: load <4 x float>* %[[d]], align 16