ARM64: make sure HFAs on the stack get properly aligned.

author Tim Northover <tnorthover@apple.com>

Fri, 18 Apr 2014 10:47:44 +0000 (10:47 +0000)

committer Tim Northover <tnorthover@apple.com>

Fri, 18 Apr 2014 10:47:44 +0000 (10:47 +0000)
author Tim Northover <tnorthover@apple.com>
Fri, 18 Apr 2014 10:47:44 +0000 (10:47 +0000)
committer Tim Northover <tnorthover@apple.com>
Fri, 18 Apr 2014 10:47:44 +0000 (10:47 +0000)
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp

index e9bb3cdb08b9f211331be9586eed8f97410e3ce7..47c5269ec5a54b956e15f914c918cb591f1f24a1 100644 (file)
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -3187,13 +3187,14 @@ private:
        // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs
        // as sequences of floats since they'll get "holes" inserted as
        // padding by the back end.
-      if (IsHA && AllocatedVFP > NumVFPs && !isDarwinPCS()) {
-          uint32_t NumStackSlots = getContext().getTypeSize(it->type);
-          NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64;
-
-          llvm::Type *CoerceTy = llvm::ArrayType::get(
-              llvm::Type::getDoubleTy(getVMContext()), NumStackSlots);
-          it->info = ABIArgInfo::getDirect(CoerceTy);
+      if (IsHA && AllocatedVFP > NumVFPs && !isDarwinPCS() &&
+          getContext().getTypeAlign(it->type) < 64) {
+        uint32_t NumStackSlots = getContext().getTypeSize(it->type);
+        NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64;
+
+        llvm::Type *CoerceTy = llvm::ArrayType::get(
+            llvm::Type::getDoubleTy(getVMContext()), NumStackSlots);
+        it->info = ABIArgInfo::getDirect(CoerceTy);
        }
  
        // If we do not have enough VFP registers for the HA, any VFP registers
diff --git a/test/CodeGen/arm-homogenous.c b/test/CodeGen/arm-homogenous.c

index ad21444aabb2a2111aeb3537222230ed43d52c87..854ccaf15e61a7021a1c7022f2cbf1face039bdc 100644 (file)
--- a/test/CodeGen/arm-homogenous.c
+++ b/test/CodeGen/arm-homogenous.c
@@ -178,7 +178,7 @@ void test_struct_of_four_doubles(void) {
  // CHECK64: test_struct_of_four_doubles
  // CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
  // CHECK64-AAPCS: test_struct_of_four_doubles
-// CHECK64-AAPCS: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, [4 x double] {{.*}}, double {{.*}})
+// CHECK64-AAPCS: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
    takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
  }
  
@@ -214,7 +214,7 @@ void test_struct_of_vecs(void) {
  // CHECK64: test_struct_of_vecs
  // CHECK64: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
  // CHECK64-AAPCS: test_struct_of_vecs
-// CHECK64-AAPCS: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, [4 x double] {{.*}})
+// CHECK64-AAPCS: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
    takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
  }
  
diff --git a/test/CodeGen/arm64-aapcs-arguments.c b/test/CodeGen/arm64-aapcs-arguments.c

index 1deeb2530f4bf403fb6d32b9ac54da2e63a0f6be..72527b5acfb9589ec9ff13cfe701716041edcd90 100644 (file)
--- a/test/CodeGen/arm64-aapcs-arguments.c
+++ b/test/CodeGen/arm64-aapcs-arguments.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-abi aapcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-abi aapcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
  
  // AAPCS clause C.8 says: If the argument has an alignment of 16 then the NGRN
  // is rounded up to the next even number.
@@ -21,3 +21,15 @@ void test2(int x0, Small x2_x3, int x4, Small x6_x7, int sp, Small sp16) {
  typedef struct { float arr[4]; } HFA;
  void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) {
  }
+
+
+// However, we shouldn't perform the [N x double] coercion on types which have
+// sufficient alignment to avoid holes on their own. We could coerce to [N x
+// fp128] or something, but leaving them as-is retains more information for
+// users to debug.
+
+//  CHECK: void @test4(<16 x i8> %v0_v2.0, <16 x i8> %v0_v2.1, <16 x i8> %v0_v2.2, <16 x i8> %v3_v5.0, <16 x i8> %v3_v5.1, <16 x i8> %v3_v5.2, [2 x float], <16 x i8> %sp.0, <16 x i8> %sp.1, <16 x i8> %sp.2, double %sp48, <16 x i8> %sp64.0, <16 x i8> %sp64.1, <16 x i8> %sp64.2)
+typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
+typedef struct { int8x16_t arr[3]; } BigHFA;
+void test4(BigHFA v0_v2, BigHFA v3_v5, BigHFA sp, double sp48, BigHFA sp64) {
+}
author	Tim Northover <tnorthover@apple.com>
	Fri, 18 Apr 2014 10:47:44 +0000 (10:47 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Fri, 18 Apr 2014 10:47:44 +0000 (10:47 +0000)
lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
test/CodeGen/arm-homogenous.c		patch \| blob \| history
test/CodeGen/arm64-aapcs-arguments.c		patch \| blob \| history