From: Momchil Velikov <momchil.velikov@arm.com>
Date: Mon, 30 Jul 2018 17:48:23 +0000 (+0000)
Subject: [ARM, AArch64]: Use unadjusted alignment when passing composites as arguments
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=82b074f75871a4b42c11e4d3dc11af8e62563730;p=clang

[ARM, AArch64]: Use unadjusted alignment when passing composites as arguments

The "Procedure Call Procedure Call Standard for the ARM® Architecture"
(https://static.docs.arm.com/ihi0042/f/IHI0042F_aapcs.pdf), specifies that
composite types are passed according to their "natural alignment", i.e. the
alignment before alignment adjustment on the entire composite is applied.

The same applies for AArch64 ABI.

Clang, however, used the adjusted alignment.

GCC already implements the ABI correctly. With this patch Clang becomes
compatible with GCC and passes such arguments in accordance with AAPCS.

Differential Revision: https://reviews.llvm.org/D46013



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@338279 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h
index c6f8e2973e..7a50ce8d52 100644
--- a/include/clang/AST/ASTContext.h
+++ b/include/clang/AST/ASTContext.h
@@ -226,6 +226,12 @@ class ASTContext : public RefCountedBase<ASTContext> {
   using TypeInfoMap = llvm::DenseMap<const Type *, struct TypeInfo>;
   mutable TypeInfoMap MemoizedTypeInfo;
 
+  /// A cache from types to unadjusted alignment information. Only ARM and
+  /// AArch64 targets need this information, keeping it separate prevents
+  /// imposing overhead on TypeInfo size.
+  using UnadjustedAlignMap = llvm::DenseMap<const Type *, unsigned>;
+  mutable UnadjustedAlignMap MemoizedUnadjustedAlign;
+
   /// A cache mapping from CXXRecordDecls to key functions.
   llvm::DenseMap<const CXXRecordDecl*, LazyDeclPtr> KeyFunctions;
 
@@ -2067,6 +2073,16 @@ public:
   unsigned getTypeAlign(QualType T) const { return getTypeInfo(T).Align; }
   unsigned getTypeAlign(const Type *T) const { return getTypeInfo(T).Align; }
 
+  /// Return the ABI-specified natural alignment of a (complete) type \p T,
+  /// before alignment adjustments, in bits.
+  ///
+  /// This alignment is curently used only by ARM and AArch64 when passing
+  /// arguments of a composite type.
+  unsigned getTypeUnadjustedAlign(QualType T) const {
+    return getTypeUnadjustedAlign(T.getTypePtr());
+  }
+  unsigned getTypeUnadjustedAlign(const Type *T) const;
+
   /// Return the ABI-specified alignment of a type, in bits, or 0 if
   /// the type is incomplete and we cannot determine the alignment (for
   /// example, from alignment attributes).
@@ -2077,6 +2093,12 @@ public:
   CharUnits getTypeAlignInChars(QualType T) const;
   CharUnits getTypeAlignInChars(const Type *T) const;
 
+  /// getTypeUnadjustedAlignInChars - Return the ABI-specified alignment of a type,
+  /// in characters, before alignment adjustments. This method does not work on
+  /// incomplete types.
+  CharUnits getTypeUnadjustedAlignInChars(QualType T) const;
+  CharUnits getTypeUnadjustedAlignInChars(const Type *T) const;
+
   // getTypeInfoDataSizeInChars - Return the size of a type, in chars. If the
   // type is a record, its data size is returned.
   std::pair<CharUnits, CharUnits> getTypeInfoDataSizeInChars(QualType T) const;
diff --git a/include/clang/AST/RecordLayout.h b/include/clang/AST/RecordLayout.h
index dc60ef892b..ba60008589 100644
--- a/include/clang/AST/RecordLayout.h
+++ b/include/clang/AST/RecordLayout.h
@@ -71,6 +71,10 @@ private:
   // Alignment - Alignment of record in characters.
   CharUnits Alignment;
 
+  // UnadjustedAlignment - Maximum of the alignments of the record members in
+  // characters.
+  CharUnits UnadjustedAlignment;
+
   /// RequiredAlignment - The required alignment of the object.  In the MS-ABI
   /// the __declspec(align()) trumps #pramga pack and must always be obeyed.
   CharUnits RequiredAlignment;
@@ -136,6 +140,7 @@ private:
   CXXRecordLayoutInfo *CXXInfo = nullptr;
 
   ASTRecordLayout(const ASTContext &Ctx, CharUnits size, CharUnits alignment,
+                  CharUnits unadjustedAlignment,
                   CharUnits requiredAlignment, CharUnits datasize,
                   ArrayRef<uint64_t> fieldoffsets);
 
@@ -144,6 +149,7 @@ private:
   // Constructor for C++ records.
   ASTRecordLayout(const ASTContext &Ctx,
                   CharUnits size, CharUnits alignment,
+                  CharUnits unadjustedAlignment,
                   CharUnits requiredAlignment,
                   bool hasOwnVFPtr, bool hasExtendableVFPtr,
                   CharUnits vbptroffset,
@@ -170,6 +176,10 @@ public:
   /// getAlignment - Get the record alignment in characters.
   CharUnits getAlignment() const { return Alignment; }
 
+  /// getUnadjustedAlignment - Get the record alignment in characters, before
+  /// alignment adjustement.
+  CharUnits getUnadjustedAlignment() const { return UnadjustedAlignment; }
+
   /// getSize - Get the record size in characters.
   CharUnits getSize() const { return Size; }
 
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index 25dc4441aa..ab428e3fc5 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp
@@ -2056,6 +2056,27 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
   return TypeInfo(Width, Align, AlignIsRequired);
 }
 
+unsigned ASTContext::getTypeUnadjustedAlign(const Type *T) const {
+  UnadjustedAlignMap::iterator I = MemoizedUnadjustedAlign.find(T);
+  if (I != MemoizedUnadjustedAlign.end())
+    return I->second;
+
+  unsigned UnadjustedAlign;
+  if (const auto *RT = T->getAs<RecordType>()) {
+    const RecordDecl *RD = RT->getDecl();
+    const ASTRecordLayout &Layout = getASTRecordLayout(RD);
+    UnadjustedAlign = toBits(Layout.getUnadjustedAlignment());
+  } else if (const auto *ObjCI = T->getAs<ObjCInterfaceType>()) {
+    const ASTRecordLayout &Layout = getASTObjCInterfaceLayout(ObjCI->getDecl());
+    UnadjustedAlign = toBits(Layout.getUnadjustedAlignment());
+  } else {
+    UnadjustedAlign = getTypeAlign(T);
+  }
+
+  MemoizedUnadjustedAlign[T] = UnadjustedAlign;
+  return UnadjustedAlign;
+}
+
 unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const {
   unsigned SimdAlign = getTargetInfo().getSimdDefaultAlign();
   // Target ppc64 with QPX: simd default alignment for pointer to double is 32.
@@ -2095,6 +2116,16 @@ CharUnits ASTContext::getTypeAlignInChars(const Type *T) const {
   return toCharUnitsFromBits(getTypeAlign(T));
 }
 
+/// getTypeUnadjustedAlignInChars - Return the ABI-specified alignment of a
+/// type, in characters, before alignment adustments. This method does
+/// not work on incomplete types.
+CharUnits ASTContext::getTypeUnadjustedAlignInChars(QualType T) const {
+  return toCharUnitsFromBits(getTypeUnadjustedAlign(T));
+}
+CharUnits ASTContext::getTypeUnadjustedAlignInChars(const Type *T) const {
+  return toCharUnitsFromBits(getTypeUnadjustedAlign(T));
+}
+
 /// getPreferredTypeAlign - Return the "preferred" alignment of the specified
 /// type for the current target in bits.  This can be different than the ABI
 /// alignment in cases where it is beneficial for performance to overalign
diff --git a/lib/AST/RecordLayout.cpp b/lib/AST/RecordLayout.cpp
index 37fe029f53..af3d0af8bb 100644
--- a/lib/AST/RecordLayout.cpp
+++ b/lib/AST/RecordLayout.cpp
@@ -30,10 +30,12 @@ void ASTRecordLayout::Destroy(ASTContext &Ctx) {
 
 ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx, CharUnits size,
                                  CharUnits alignment,
+                                 CharUnits unadjustedAlignment,
                                  CharUnits requiredAlignment,
                                  CharUnits datasize,
                                  ArrayRef<uint64_t> fieldoffsets)
     : Size(size), DataSize(datasize), Alignment(alignment),
+      UnadjustedAlignment(unadjustedAlignment),
       RequiredAlignment(requiredAlignment) {
   FieldOffsets.append(Ctx, fieldoffsets.begin(), fieldoffsets.end());
 }
@@ -41,6 +43,7 @@ ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx, CharUnits size,
 // Constructor for C++ records.
 ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx,
                                  CharUnits size, CharUnits alignment,
+                                 CharUnits unadjustedAlignment,
                                  CharUnits requiredAlignment,
                                  bool hasOwnVFPtr, bool hasExtendableVFPtr,
                                  CharUnits vbptroffset,
@@ -57,6 +60,7 @@ ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx,
                                  const BaseOffsetsMapTy& BaseOffsets,
                                  const VBaseOffsetsMapTy& VBaseOffsets)
   : Size(size), DataSize(datasize), Alignment(alignment),
+    UnadjustedAlignment(unadjustedAlignment),
     RequiredAlignment(requiredAlignment), CXXInfo(new (Ctx) CXXRecordLayoutInfo)
 {
   FieldOffsets.append(Ctx, fieldoffsets.begin(), fieldoffsets.end());
diff --git a/lib/AST/RecordLayoutBuilder.cpp b/lib/AST/RecordLayoutBuilder.cpp
index b4b09c7cec..c92ac20f1f 100644
--- a/lib/AST/RecordLayoutBuilder.cpp
+++ b/lib/AST/RecordLayoutBuilder.cpp
@@ -582,6 +582,9 @@ protected:
   /// The alignment if attribute packed is not used.
   CharUnits UnpackedAlignment;
 
+  /// \brief The maximum of the alignments of top-level members.
+  CharUnits UnadjustedAlignment;
+
   SmallVector<uint64_t, 16> FieldOffsets;
 
   /// Whether the external AST source has provided a layout for this
@@ -662,6 +665,7 @@ protected:
                              EmptySubobjectMap *EmptySubobjects)
       : Context(Context), EmptySubobjects(EmptySubobjects), Size(0),
         Alignment(CharUnits::One()), UnpackedAlignment(CharUnits::One()),
+        UnadjustedAlignment(CharUnits::One()),
         UseExternalLayout(false), InferAlignment(false), Packed(false),
         IsUnion(false), IsMac68kAlign(false), IsMsStruct(false),
         UnfilledBitsInLastUnit(0), LastBitfieldTypeSize(0),
@@ -1707,7 +1711,9 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
   setSize(std::max(getSizeInBits(), getDataSizeInBits()));
 
   // Remember max struct/class alignment.
-  UpdateAlignment(Context.toCharUnitsFromBits(FieldAlign), 
+  UnadjustedAlignment =
+      std::max(UnadjustedAlignment, Context.toCharUnitsFromBits(FieldAlign));
+  UpdateAlignment(Context.toCharUnitsFromBits(FieldAlign),
                   Context.toCharUnitsFromBits(UnpackedFieldAlign));
 }
 
@@ -1862,6 +1868,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
   setSize(std::max(getSizeInBits(), getDataSizeInBits()));
 
   // Remember max struct/class alignment.
+  UnadjustedAlignment = std::max(UnadjustedAlignment, FieldAlign);
   UpdateAlignment(FieldAlign, UnpackedFieldAlign);
 }
 
@@ -2988,7 +2995,8 @@ ASTContext::getASTRecordLayout(const RecordDecl *D) const {
     if (const auto *RD = dyn_cast<CXXRecordDecl>(D)) {
       Builder.cxxLayout(RD);
       NewEntry = new (*this) ASTRecordLayout(
-          *this, Builder.Size, Builder.Alignment, Builder.RequiredAlignment,
+          *this, Builder.Size, Builder.Alignment, Builder.Alignment,
+          Builder.RequiredAlignment,
           Builder.HasOwnVFPtr, Builder.HasOwnVFPtr || Builder.PrimaryBase,
           Builder.VBPtrOffset, Builder.DataSize, Builder.FieldOffsets,
           Builder.NonVirtualSize, Builder.Alignment, CharUnits::Zero(),
@@ -2998,7 +3006,8 @@ ASTContext::getASTRecordLayout(const RecordDecl *D) const {
     } else {
       Builder.layout(D);
       NewEntry = new (*this) ASTRecordLayout(
-          *this, Builder.Size, Builder.Alignment, Builder.RequiredAlignment,
+          *this, Builder.Size, Builder.Alignment, Builder.Alignment,
+          Builder.RequiredAlignment,
           Builder.Size, Builder.FieldOffsets);
     }
   } else {
@@ -3019,7 +3028,7 @@ ASTContext::getASTRecordLayout(const RecordDecl *D) const {
       CharUnits NonVirtualSize =
           skipTailPadding ? DataSize : Builder.NonVirtualSize;
       NewEntry = new (*this) ASTRecordLayout(
-          *this, Builder.getSize(), Builder.Alignment,
+          *this, Builder.getSize(), Builder.Alignment, Builder.UnadjustedAlignment,
           /*RequiredAlignment : used by MS-ABI)*/
           Builder.Alignment, Builder.HasOwnVFPtr, RD->isDynamicClass(),
           CharUnits::fromQuantity(-1), DataSize, Builder.FieldOffsets,
@@ -3032,7 +3041,7 @@ ASTContext::getASTRecordLayout(const RecordDecl *D) const {
       Builder.Layout(D);
 
       NewEntry = new (*this) ASTRecordLayout(
-          *this, Builder.getSize(), Builder.Alignment,
+          *this, Builder.getSize(), Builder.Alignment, Builder.UnadjustedAlignment,
           /*RequiredAlignment : used by MS-ABI)*/
           Builder.Alignment, Builder.getSize(), Builder.FieldOffsets);
     }
@@ -3186,6 +3195,7 @@ ASTContext::getObjCLayout(const ObjCInterfaceDecl *D,
   const ASTRecordLayout *NewEntry =
     new (*this) ASTRecordLayout(*this, Builder.getSize(),
                                 Builder.Alignment,
+                                Builder.UnadjustedAlignment,
                                 /*RequiredAlignment : used by MS-ABI)*/
                                 Builder.Alignment,
                                 Builder.getDataSize(),
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 7652cb142c..838e52f883 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -5063,7 +5063,13 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
     if (getTarget().isRenderScriptTarget()) {
       return coerceToIntArray(Ty, getContext(), getVMContext());
     }
-    unsigned Alignment = getContext().getTypeAlign(Ty);
+    unsigned Alignment;
+    if (Kind == AArch64ABIInfo::AAPCS) {
+      Alignment = getContext().getTypeUnadjustedAlign(Ty);
+      Alignment = Alignment < 128 ? 64 : 128;
+    } else {
+      Alignment = getContext().getTypeAlign(Ty);
+    }
     Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
 
     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
@@ -5801,11 +5807,14 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
   // most 8-byte. We realign the indirect argument if type alignment is bigger
   // than ABI alignment.
   uint64_t ABIAlign = 4;
-  uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
+  uint64_t TyAlign;
   if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
-       getABIKind() == ARMABIInfo::AAPCS)
+      getABIKind() == ARMABIInfo::AAPCS) {
+    TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
     ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
-
+  } else {
+    TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();
+  }
   if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
     assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval");
     return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
@@ -5824,7 +5833,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
   unsigned SizeRegs;
   // FIXME: Try to match the types of the arguments more accurately where
   // we can.
-  if (getContext().getTypeAlign(Ty) <= 32) {
+  if (TyAlign <= 4) {
     ElemTy = llvm::Type::getInt32Ty(getVMContext());
     SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
   } else {
diff --git a/test/CodeGen/aapcs-align.cc b/test/CodeGen/aapcs-align.cc
new file mode 100644
index 0000000000..40fba78235
--- /dev/null
+++ b/test/CodeGen/aapcs-align.cc
@@ -0,0 +1,141 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple arm-none-none-eabi \
+// RUN:   -O2 \
+// RUN:   -target-cpu cortex-a8 \
+// RUN:   -emit-llvm -o - %s | FileCheck %s
+
+extern "C" {
+
+// Base case, nothing interesting.
+struct S {
+  int x, y;
+};
+
+void f0(int, S);
+void f0m(int, int, int, int, int, S);
+void g0() {
+  S s = {6, 7};
+  f0(1, s);
+  f0m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g0
+// CHECK: call void @f0(i32 1, [2 x i32] [i32 6, i32 7]
+// CHECK: call void @f0m(i32 1, i32 2, i32 3, i32 4, i32 5, [2 x i32] [i32 6, i32 7]
+// CHECK: declare void @f0(i32, [2 x i32])
+// CHECK: declare void @f0m(i32, i32, i32, i32, i32, [2 x i32])
+
+// Aligned struct, passed according to its natural alignment.
+struct __attribute__((aligned(8))) S8 {
+  int x, y;
+} s8;
+
+void f1(int, S8);
+void f1m(int, int, int, int, int, S8);
+void g1() {
+  S8 s = {6, 7};
+  f1(1, s);
+  f1m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g1
+// CHECK: call void @f1(i32 1, [2 x i32] [i32 6, i32 7]
+// CHECK: call void @f1m(i32 1, i32 2, i32 3, i32 4, i32 5, [2 x i32] [i32 6, i32 7]
+// CHECK: declare void @f1(i32, [2 x i32])
+// CHECK: declare void @f1m(i32, i32, i32, i32, i32, [2 x i32])
+
+// Aligned struct, passed according to its natural alignment.
+struct alignas(16) S16 {
+  int x, y;
+};
+
+extern "C" void f2(int, S16);
+extern "C" void f2m(int, int, int, int, int, S16);
+
+void g2() {
+  S16 s = {6, 7};
+  f2(1, s);
+  f2m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g2
+// CHECK: call void @f2(i32 1, [4 x i32] [i32 6, i32 7
+// CHECK: call void @f2m(i32 1, i32 2, i32 3, i32 4, i32 5, [4 x i32] [i32 6, i32 7
+// CHECK: declare void @f2(i32, [4 x i32])
+// CHECK: declare void @f2m(i32, i32, i32, i32, i32, [4 x i32])
+
+// Increased natural alignment.
+struct SF8 {
+  int x __attribute__((aligned(8)));
+  int y;
+};
+
+void f3(int, SF8);
+void f3m(int, int, int, int, int, SF8);
+void g3() {
+  SF8 s = {6, 7};
+  f3(1, s);
+  f3m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g3
+// CHECK: call void @f3(i32 1, [1 x i64] [i64 30064771078]
+// CHECK: call void @f3m(i32 1, i32 2, i32 3, i32 4, i32 5, [1 x i64] [i64 30064771078]
+// CHECK: declare void @f3(i32, [1 x i64])
+// CHECK: declare void @f3m(i32, i32, i32, i32, i32, [1 x i64])
+
+// Increased natural alignment, capped to 8 though.
+struct SF16 {
+  int x;
+  int y alignas(16);
+  int z, a, b, c, d, e, f, g, h, i, j, k;
+};
+
+void f4(int, SF16);
+void f4m(int, int, int, int, int, SF16);
+void g4() {
+  SF16 s = {6, 7};
+  f4(1, s);
+  f4m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g4
+// CHECK: call void @f4(i32 1, %struct.SF16* byval nonnull align 8
+// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* byval nonnull align 8
+// CHECK: declare void @f4(i32, %struct.SF16* byval align 8)
+// CHECK: declare void @f4m(i32, i32, i32, i32, i32, %struct.SF16* byval align 8)
+
+// Packed structure.
+struct  __attribute__((packed)) P {
+  int x;
+  long long u;
+};
+
+void f5(int, P);
+void f5m(int, int, int, int, int, P);
+void g5() {
+  P s = {6, 7};
+  f5(1, s);
+  f5m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g5
+// CHECK: call void @f5(i32 1, [3 x i32] [i32 6, i32 7, i32 0])
+// CHECK: call void @f5m(i32 1, i32 2, i32 3, i32 4, i32 5, [3 x i32] [i32 6, i32 7, i32 0])
+// CHECK: declare void @f5(i32, [3 x i32])
+// CHECK: declare void @f5m(i32, i32, i32, i32, i32, [3 x i32])
+
+
+// Packed and aligned, alignement causes padding at the end.
+struct  __attribute__((packed, aligned(8))) P8 {
+  int x;
+  long long u;
+};
+
+void f6(int, P8);
+void f6m(int, int, int, int, int, P8);
+void g6() {
+  P8 s = {6, 7};
+  f6(1, s);
+  f6m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g6
+// CHECK: call void @f6(i32 1, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
+// CHECK: call void @f6m(i32 1, i32 2, i32 3, i32 4, i32 5, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
+// CHECK: declare void @f6(i32, [4 x i32])
+// CHECK: declare void @f6m(i32, i32, i32, i32, i32, [4 x i32])
+}
diff --git a/test/CodeGen/aapcs64-align.cc b/test/CodeGen/aapcs64-align.cc
new file mode 100644
index 0000000000..1b7c99ea87
--- /dev/null
+++ b/test/CodeGen/aapcs64-align.cc
@@ -0,0 +1,103 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-none-eabi \
+// RUN:   -O2 \
+// RUN:   -emit-llvm -o - %s | FileCheck %s
+
+extern "C" {
+
+// Base case, nothing interesting.
+struct S {
+  long x, y;
+};
+
+void f0(long, S);
+void f0m(long, long, long, long, long, S);
+void g0() {
+  S s = {6, 7};
+  f0(1, s);
+  f0m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g0
+// CHECK: call void @f0(i64 1, [2 x i64] [i64 6, i64 7]
+// CHECK: call void @f0m{{.*}}[2 x i64] [i64 6, i64 7]
+// CHECK: declare void @f0(i64, [2 x i64])
+// CHECK: declare void @f0m(i64, i64, i64, i64, i64, [2 x i64])
+
+// Aligned struct, passed according to its natural alignment.
+struct __attribute__((aligned(16))) S16 {
+  long x, y;
+} s16;
+
+void f1(long, S16);
+void f1m(long, long, long, long, long, S16);
+void g1() {
+  S16 s = {6, 7};
+  f1(1, s);
+  f1m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g1
+// CHECK: call void @f1{{.*}}[2 x i64] [i64 6, i64 7]
+// CHECK: call void @f1m{{.*}}[2 x i64] [i64 6, i64 7]
+// CHECK: declare void @f1(i64, [2 x i64])
+// CHECK: declare void @f1m(i64, i64, i64, i64, i64, [2 x i64])
+
+// Increased natural alignment.
+struct SF16 {
+  long x __attribute__((aligned(16)));
+  long y;
+};
+
+void f3(long, SF16);
+void f3m(long, long, long, long, long, SF16);
+void g3() {
+  SF16 s = {6, 7};
+  f3(1, s);
+  f3m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g3
+// CHECK: call void @f3(i64 1, i128 129127208515966861318)
+// CHECK: call void @f3m(i64 1, i64 2, i64 3, i64 4, i64 5, i128 129127208515966861318)
+// CHECK: declare void @f3(i64, i128)
+// CHECK: declare void @f3m(i64, i64, i64, i64, i64, i128)
+
+
+// Packed structure.
+struct  __attribute__((packed)) P {
+  int x;
+  long u;
+};
+
+void f4(int, P);
+void f4m(int, int, int, int, int, P);
+void g4() {
+  P s = {6, 7};
+  f4(1, s);
+  f4m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g4()
+// CHECK: call void @f4(i32 1, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: declare void @f4(i32, [2 x i64])
+// CHECK: declare void @f4m(i32, i32, i32, i32, i32, [2 x i64])
+
+
+// Packed structure, overaligned, same as above.
+struct  __attribute__((packed, aligned(16))) P16 {
+  int x;
+  long y;
+};
+
+void f5(int, P16);
+void f5m(int, int, int, int, int, P16);
+  void g5() {
+    P16 s = {6, 7};
+    f5(1, s);
+    f5m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g5()
+// CHECK: call void @f5(i32 1, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: void @f5m(i32 1, i32 2, i32 3, i32 4, i32 5, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: declare void @f5(i32, [2 x i64])
+// CHECK: declare void @f5m(i32, i32, i32, i32, i32, [2 x i64])
+
+}
diff --git a/test/CodeGen/arm-arguments.c b/test/CodeGen/arm-arguments.c
index ef4e76054f..ca6b70b446 100644
--- a/test/CodeGen/arm-arguments.c
+++ b/test/CodeGen/arm-arguments.c
@@ -211,10 +211,13 @@ float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
 // APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align {{[0-9]+}} %[[b]], i8* align {{[0-9]+}} %[[c]]
 // APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
 // APCS-GNU: load <4 x float>, <4 x float>* %[[d]], align 16
-// AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval align 8, %struct.s35* byval align 8)
-// AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
-// AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
-// AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
-// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %[[b]], i8* align 8 %[[c]]
-// AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
-// AAPCS: load <4 x float>, <4 x float>* %[[d]], align 16
+
+// AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval align 4 %s1, %struct.s35* byval align 4 %s2)
+// AAPCS: %[[a_addr:.*]] = alloca <4 x float>, align 16
+// AAPCS: %[[b_addr:.*]] = alloca <4 x float>, align 16
+// AAPCS: %[[p1:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
+// AAPCS: %[[a:.*]] = load <4 x float>, <4 x float>* %[[p1]], align 4
+// AAPCS: %[[p2:.*]] = bitcast %struct.s35* %s2 to <4 x float>*
+// AAPCS: %[[b:.*]] = load <4 x float>, <4 x float>* %[[p2]], align 4
+// AAPCS: store <4 x float> %[[a]], <4 x float>* %[[a_addr]], align 16
+// AAPCS: store <4 x float> %[[b]], <4 x float>* %[[b_addr]], align 16