ARM byval: when type alignment is bigger than ABI alignment, instead of

author Manman Ren <mren@apple.com>

Tue, 6 Nov 2012 04:58:01 +0000 (04:58 +0000)

committer Manman Ren <mren@apple.com>

Tue, 6 Nov 2012 04:58:01 +0000 (04:58 +0000)
author Manman Ren <mren@apple.com>
Tue, 6 Nov 2012 04:58:01 +0000 (04:58 +0000)
committer Manman Ren <mren@apple.com>
Tue, 6 Nov 2012 04:58:01 +0000 (04:58 +0000)
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp

index 90603371805d80c3f90ffb6db3af9ec76aa0a2b6..22292603f691957dc71dbb1324057798c5898536 100644 (file)
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -3221,16 +3221,15 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, int *VFPRegs,
  
    // Support byval for ARM.
    // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at most 8-byte.
-  // Byval can't handle the case where type alignment is bigger than ABI alignment.
-  // We also increase the threshold for byval due to its overhead.
+  // We realign the indirect argument if type alignment is bigger than ABI alignment.
    uint64_t ABIAlign = 4;
    uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
    if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
        getABIKind() == ARMABIInfo::AAPCS)
      ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
-  if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64*8) &&
-      TyAlign <= ABIAlign) {
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/true);
+  if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/true,
+           /*Realign=*/TyAlign <= ABIAlign ? false : true);
    }
  
    // Otherwise, pass by coercing to a structure of the appropriate size.
diff --git a/test/CodeGen/arm-arguments.c b/test/CodeGen/arm-arguments.c

index 8aa33f696d72678df2b7fa64053ce3fa2526903b..63ecd4c5990b7eb0552a6892b11b732a24837898 100644 (file)
--- a/test/CodeGen/arm-arguments.c
+++ b/test/CodeGen/arm-arguments.c
@@ -191,3 +191,35 @@ void g34(struct s34 *s) { f34(*s); }
  // AAPCS: %[[a:.*]] = alloca { [1 x i32] }
  // AAPCS: %[[gep:.*]] = getelementptr { [1 x i32] }* %[[a]], i32 0, i32 0
  // AAPCS: load [1 x i32]* %[[gep]]
+
+// rdar://12596507
+struct s35
+{
+   float v[18]; //make sure byval is on.
+} __attribute__((aligned(16)));
+typedef struct s35 s35_with_align;
+
+typedef __attribute__((neon_vector_type(4))) float float32x4_t;
+static __attribute__((__always_inline__, __nodebug__)) float32x4_t vaddq_f32(
+       float32x4_t __a, float32x4_t __b) {
+ return __a + __b;
+}
+float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
+  float32x4_t v = vaddq_f32(*(float32x4_t *)&s1,
+                            *(float32x4_t *)&s2);
+  return v;
+}
+// APCS-GNU: define <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
+// APCS-GNU: %[[a:.*]] = alloca %struct.s35, align 16
+// APCS-GNU: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
+// APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
+// APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
+// APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
+// APCS-GNU: load <4 x float>* %[[d]], align 16
+// AAPCS: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
+// AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
+// AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
+// AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
+// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
+// AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
+// AAPCS: load <4 x float>* %[[d]], align 16
author	Manman Ren <mren@apple.com>
	Tue, 6 Nov 2012 04:58:01 +0000 (04:58 +0000)
committer	Manman Ren <mren@apple.com>
	Tue, 6 Nov 2012 04:58:01 +0000 (04:58 +0000)
lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
test/CodeGen/arm-arguments.c		patch \| blob \| history