[PowerPC] Optimize passing certain aggregates by value

author Ulrich Weigand <ulrich.weigand@de.ibm.com>

Mon, 21 Jul 2014 00:56:36 +0000 (00:56 +0000)

committer Ulrich Weigand <ulrich.weigand@de.ibm.com>

Mon, 21 Jul 2014 00:56:36 +0000 (00:56 +0000)
author Ulrich Weigand <ulrich.weigand@de.ibm.com>
Mon, 21 Jul 2014 00:56:36 +0000 (00:56 +0000)
committer Ulrich Weigand <ulrich.weigand@de.ibm.com>
Mon, 21 Jul 2014 00:56:36 +0000 (00:56 +0000)
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp

index 2ed33b00bca90cfe11c0d69bcde7629c5d3097d7..5da22c3e6cbbf3340879d32ef7b0d8216f39b05a 100644 (file)
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -3178,6 +3178,31 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
        return ABIArgInfo::getDirect(CoerceTy);
      }
  
+    // If an aggregate may end up fully in registers, we do not
+    // use the ByVal method, but pass the aggregate as array.
+    // This is usually beneficial since we avoid forcing the
+    // back-end to store the argument to memory.
+    uint64_t Bits = getContext().getTypeSize(Ty);
+    if (Bits > 0 && Bits <= 8 * GPRBits) {
+      llvm::Type *CoerceTy;
+
+      // Types up to 8 bytes are passed as integer type (which will be
+      // properly aligned in the argument save area doubleword).
+      if (Bits <= GPRBits)
+        CoerceTy = llvm::IntegerType::get(getVMContext(),
+                                          llvm::RoundUpToAlignment(Bits, 8));
+      // Larger types are passed as arrays, with the base type selected
+      // according to the required alignment in the save area.
+      else {
+        uint64_t RegBits = ABIAlign * 8;
+        uint64_t NumRegs = llvm::RoundUpToAlignment(Bits, RegBits) / RegBits;
+        llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits);
+        CoerceTy = llvm::ArrayType::get(RegTy, NumRegs);
+      }
+
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+
      // All other aggregates are passed ByVal.
      return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
                                     /*Realign=*/TyAlign > ABIAlign);
diff --git a/test/CodeGen/ppc64-align-struct.c b/test/CodeGen/ppc64-align-struct.c

index f820d2fbba63e0fef475a04801f05e69f6b1e71f..a50c849b4568dcd8d3a49013f4df259ce0e49887 100644 (file)
--- a/test/CodeGen/ppc64-align-struct.c
+++ b/test/CodeGen/ppc64-align-struct.c
@@ -6,30 +6,48 @@ struct test1 { int x; int y; };
  struct test2 { int x; int y; } __attribute__((aligned (16)));
  struct test3 { int x; int y; } __attribute__((aligned (32)));
  struct test4 { int x; int y; int z; };
+struct test5 { int x[17]; };
+struct test6 { int x[17]; } __attribute__((aligned (16)));
+struct test7 { int x[17]; } __attribute__((aligned (32)));
  
-// CHECK: define void @test1(i32 signext %x, %struct.test1* byval align 8 %y)
+// CHECK: define void @test1(i32 signext %x, i64 %y.coerce)
  void test1 (int x, struct test1 y)
  {
  }
  
-// CHECK: define void @test2(i32 signext %x, %struct.test2* byval align 16 %y)
+// CHECK: define void @test2(i32 signext %x, [1 x i128] %y.coerce)
  void test2 (int x, struct test2 y)
  {
  }
  
-// This case requires run-time realignment of the incoming struct
-// CHECK: define void @test3(i32 signext %x, %struct.test3* byval align 16)
-// CHECK: %y = alloca %struct.test3, align 32
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK: define void @test3(i32 signext %x, [2 x i128] %y.coerce)
  void test3 (int x, struct test3 y)
  {
  }
  
-// CHECK: define void @test4(i32 signext %x, %struct.test4* byval align 8 %y)
+// CHECK: define void @test4(i32 signext %x, [2 x i64] %y.coerce)
  void test4 (int x, struct test4 y)
  {
  }
  
+// CHECK: define void @test5(i32 signext %x, %struct.test5* byval align 8 %y)
+void test5 (int x, struct test5 y)
+{
+}
+
+// CHECK: define void @test6(i32 signext %x, %struct.test6* byval align 16 %y)
+void test6 (int x, struct test6 y)
+{
+}
+
+// This case requires run-time realignment of the incoming struct
+// CHECK: define void @test7(i32 signext %x, %struct.test7* byval align 16)
+// CHECK: %y = alloca %struct.test7, align 32
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+void test7 (int x, struct test7 y)
+{
+}
+
  // CHECK: define void @test1va(%struct.test1* noalias sret %agg.result, i32 signext %x, ...)
  // CHECK: %[[CUR:[^ ]+]] = load i8** %ap
  // CHECK: %[[NEXT:[^ ]+]] = getelementptr i8* %[[CUR]], i64 8
diff --git a/test/CodeGen/ppc64-vector.c b/test/CodeGen/ppc64-vector.c

index 3ff07a4d4153e4308ff452c55ab3be9dd8361e14..f0211f0ec197bd5b1423a1a3e527d184169698ed 100644 (file)
--- a/test/CodeGen/ppc64-vector.c
+++ b/test/CodeGen/ppc64-vector.c
@@ -45,7 +45,7 @@ v16i16 test_v16i16(v16i16 x)
    return x;
  }
  
-// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret %agg.result, %struct.v16i16* byval align 16)
+// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret %agg.result, [2 x i128] %x.coerce)
  struct v16i16 test_struct_v16i16(struct v16i16 x)
  {
    return x;
diff --git a/test/CodeGen/ppc64le-aggregates.c b/test/CodeGen/ppc64le-aggregates.c

index cb19dd31f2abf698ffaa5e60f59ec8cf12e19649..acf34a8a80123df2cb88a5176764a4ddf4163497 100644 (file)
--- a/test/CodeGen/ppc64le-aggregates.c
+++ b/test/CodeGen/ppc64le-aggregates.c
@@ -40,7 +40,7 @@ struct f7 func_f7(struct f7 x) { return x; }
  // CHECK: define [8 x float] @func_f8([8 x float] %x.coerce)
  struct f8 func_f8(struct f8 x) { return x; }
  
-// CHECK: define void @func_f9(%struct.f9* noalias sret %agg.result, %struct.f9* byval align 8 %x)
+// CHECK: define void @func_f9(%struct.f9* noalias sret %agg.result, [5 x i64] %x.coerce)
  struct f9 func_f9(struct f9 x) { return x; }
  
  // CHECK: define [2 x float] @func_fab([2 x float] %x.coerce)
@@ -98,10 +98,11 @@ struct f8 global_f8;
  void call_f8(void) { global_f8 = func_f8(global_f8); }
  
  // CHECK-LABEL: @call_f9
-// CHECK: %[[TMP1:[^ ]+]] = alloca %struct.f9, align 8
-// CHECK: %[[TMP2:[^ ]+]] = bitcast %struct.f9* %[[TMP1]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false)
-// CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, %struct.f9* byval align 8 %[[TMP1]])
+// CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64]
+// CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 1, i1 false)
+// CHECK: %[[TMP3:[^ ]+]] = load [5 x i64]* %[[TMP1]]
+// CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]])
  struct f9 global_f9;
  void call_f9(void) { global_f9 = func_f9(global_f9); }
author	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Mon, 21 Jul 2014 00:56:36 +0000 (00:56 +0000)
committer	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Mon, 21 Jul 2014 00:56:36 +0000 (00:56 +0000)
lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
test/CodeGen/ppc64-align-struct.c		patch \| blob \| history
test/CodeGen/ppc64-vector.c		patch \| blob \| history
test/CodeGen/ppc64le-aggregates.c		patch \| blob \| history