From: Manman Ren <mren@apple.com>
Date: Tue, 16 Oct 2012 19:01:37 +0000 (+0000)
Subject: ARM ABI: fix ABI alignment issues when passing legal vector types as varargs.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d105e73368e677e65af724947be85ec87a0fa45e;p=clang

ARM ABI: fix ABI alignment issues when passing legal vector types as varargs.

We create an aligned temporary space and copy the content over from ap.cur to
the temporary space. This is necessary if the natural alignment of the type is
greater than the ABI alignment.

rdar://12439123


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@166040 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 75cc5a37a5..b43d33198f 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -3169,27 +3169,59 @@ llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   CGBuilderTy &Builder = CGF.Builder;
   llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
-  // Handle address alignment for type alignment > 32 bits
+
+  uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
   uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
+
+  // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for
+  // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte.
+  if (Ty->getAs<VectorType>()) {
+    if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
+        getABIKind() == ARMABIInfo::AAPCS)
+      TyAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
+    else
+      TyAlign = 4;
+  }
+
+  // Handle address alignment for ABI alignment > 4 bytes.
   if (TyAlign > 4) {
     assert((TyAlign & (TyAlign - 1)) == 0 &&
            "Alignment is not power of 2!");
     llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty);
     AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1));
     AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1)));
-    Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
+    Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
   }
-  llvm::Type *PTy =
-    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
 
   uint64_t Offset =
-    llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, 4);
+    llvm::RoundUpToAlignment(Size, 4);
   llvm::Value *NextAddr =
     Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
                       "ap.next");
   Builder.CreateStore(NextAddr, VAListAddrAsBPP);
 
+  if (Ty->getAs<VectorType>() &&
+      (TyAlign < CGF.getContext().getTypeAlign(Ty) / 8)) {
+    // We can't directly cast ap.cur to pointer to a vector type, since ap.cur
+    // may not be correctly aligned for the vector type. We create an aligned
+    // temporary space and copy the content over from ap.cur to the temporary
+    // space. This is necessary if the natural alignment of the type is greater
+    // than the ABI alignment.
+    llvm::Type *I8PtrTy = Builder.getInt8PtrTy();
+    CharUnits CharSize = getContext().getTypeSizeInChars(Ty);
+    llvm::Value *AlignedTemp = CGF.CreateTempAlloca(CGF.ConvertType(Ty),
+                                                    "var.align");
+    llvm::Value *Dst = Builder.CreateBitCast(AlignedTemp, I8PtrTy);
+    llvm::Value *Src = Builder.CreateBitCast(Addr, I8PtrTy);
+    Builder.CreateMemCpy(Dst, Src,
+        llvm::ConstantInt::get(CGF.IntPtrTy, CharSize.getQuantity()),
+        TyAlign, false);
+    Addr = AlignedTemp; //The content is in aligned location.
+  }
+  llvm::Type *PTy =
+    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
+  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
+
   return AddrTyped;
 }
 
diff --git a/test/CodeGen/arm-abi-vector.c b/test/CodeGen/arm-abi-vector.c
new file mode 100644
index 0000000000..fee41b8514
--- /dev/null
+++ b/test/CodeGen/arm-abi-vector.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi aapcs -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi apcs-gnu -emit-llvm -o - %s | FileCheck -check-prefix=APCS-GNU %s
+
+#include <stdarg.h>
+
+typedef __attribute__(( ext_vector_type(2) ))  int __int2;
+
+// Passing legal vector types as varargs.
+double varargs_vec_2i(int fixed, ...) {
+// CHECK: varargs_vec_2i
+// CHECK: %c3 = alloca <2 x i32>, align 8
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 8
+// CHECK: bitcast i8* %ap.align to <2 x i32>*
+// APCS-GNU: varargs_vec_2i
+// APCS-GNU: %c3 = alloca <2 x i32>, align 8
+// APCS-GNU: %var.align = alloca <2 x i32>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 8
+// APCS-GNU: %1 = bitcast <2 x i32>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <2 x i32>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __int2 c3 = va_arg(ap, __int2);
+  sum = sum + c3.x + c3.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_2i(__int2 *in) {
+// CHECK: test_2i
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1)
+// APCS-GNU: test_2i
+// APCS-GNU: call double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1)
+  return varargs_vec_2i(3, *in);
+}