[ARM] Fix over-alignment in arguments that are HA of 128-bit vectors

author Petr Pavlu <petr.pavlu@arm.com>

Mon, 30 Jul 2018 08:49:30 +0000 (08:49 +0000)

committer Petr Pavlu <petr.pavlu@arm.com>

Mon, 30 Jul 2018 08:49:30 +0000 (08:49 +0000)
author Petr Pavlu <petr.pavlu@arm.com>
Mon, 30 Jul 2018 08:49:30 +0000 (08:49 +0000)
committer Petr Pavlu <petr.pavlu@arm.com>
Mon, 30 Jul 2018 08:49:30 +0000 (08:49 +0000)
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h

index 63bf48abb7ace604fbb714e7ba07fa48b2636579..543165de38d0406c34762dff1607dbb2731dae99 100644 (file)
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -269,14 +269,15 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
    for (auto Reg : RegList)
      State.AllocateReg(Reg);
  
+  // After the first item has been allocated, the rest are packed as tightly as
+  // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
+  // be allocating a bunch of i32 slots).
+  unsigned RestAlign = std::min(Align, Size);
+
    for (auto &It : PendingMembers) {
      It.convertToMem(State.AllocateStack(Size, Align));
      State.addLoc(It);
-
-    // After the first item has been allocated, the rest are packed as tightly
-    // as possible. (E.g. an incoming i64 would have starting Align of 8, but
-    // we'll be allocating a bunch of i32 slots).
-    Align = Size;
+    Align = RestAlign;
    }
  
    // All pending members have now been allocated
diff --git a/test/CodeGen/ARM/aggregate-padding.ll b/test/CodeGen/ARM/aggregate-padding.ll

index bc46a9cdf91366ba3a95e59853e4d19c4015eba9..ae7ab90fcd2f89516a45e7377603964eb1d0d3e7 100644 (file)
--- a/test/CodeGen/ARM/aggregate-padding.ll
+++ b/test/CodeGen/ARM/aggregate-padding.ll
@@ -99,3 +99,19 @@ define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg
    %sum = add i16 %val0, %val2
    ret i16 %sum
  }
+
+; [2 x <4 x i32>] should be aligned only on a 64-bit boundary and contiguous.
+; None of the two <4 x i32> elements should introduce any padding to 128 bits.
+define i32 @test_4xi32_64bit_aligned_and_contiguous([8 x double], float, [2 x <4 x i32>] %arg) nounwind {
+; CHECK-LABEL: test_4xi32_64bit_aligned_and_contiguous:
+; CHECK-DAG: ldr [[VAL0_0:r[0-9]+]], [sp, #8]
+; CHECK-DAG: ldr [[VAL1_0:r[0-9]+]], [sp, #24]
+; CHECK: add r0, [[VAL0_0]], [[VAL1_0]]
+
+  %val0 = extractvalue [2 x <4 x i32>] %arg, 0
+  %val0_0 = extractelement <4 x i32> %val0, i32 0
+  %val1 = extractvalue [2 x <4 x i32>] %arg, 1
+  %val1_0 = extractelement <4 x i32> %val1, i32 0
+  %sum = add i32 %val0_0, %val1_0
+  ret i32 %sum
+}
author	Petr Pavlu <petr.pavlu@arm.com>
	Mon, 30 Jul 2018 08:49:30 +0000 (08:49 +0000)
committer	Petr Pavlu <petr.pavlu@arm.com>
	Mon, 30 Jul 2018 08:49:30 +0000 (08:49 +0000)
lib/Target/ARM/ARMCallingConv.h		patch \| blob \| history
test/CodeGen/ARM/aggregate-padding.ll		patch \| blob \| history