CGBuilderTy &Builder = CGF.Builder;
llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
- // Handle address alignment for type alignment > 32 bits
+
+ uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
+
+ // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for
+ // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte.
+ if (Ty->getAs<VectorType>()) {
+ if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
+ getABIKind() == ARMABIInfo::AAPCS)
+ TyAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
+ else
+ TyAlign = 4;
+ }
+
+ // Handle address alignment for ABI alignment > 4 bytes.
if (TyAlign > 4) {
assert((TyAlign & (TyAlign - 1)) == 0 &&
"Alignment is not power of 2!");
llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty);
AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1));
AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1)));
- Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
+ Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
}
- llvm::Type *PTy =
- llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
- llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
uint64_t Offset =
- llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, 4);
+ llvm::RoundUpToAlignment(Size, 4);
llvm::Value *NextAddr =
Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
"ap.next");
Builder.CreateStore(NextAddr, VAListAddrAsBPP);
+ if (Ty->getAs<VectorType>() &&
+ (TyAlign < CGF.getContext().getTypeAlign(Ty) / 8)) {
+ // We can't directly cast ap.cur to pointer to a vector type, since ap.cur
+ // may not be correctly aligned for the vector type. We create an aligned
+ // temporary space and copy the content over from ap.cur to the temporary
+ // space. This is necessary if the natural alignment of the type is greater
+ // than the ABI alignment.
+ llvm::Type *I8PtrTy = Builder.getInt8PtrTy();
+ CharUnits CharSize = getContext().getTypeSizeInChars(Ty);
+ llvm::Value *AlignedTemp = CGF.CreateTempAlloca(CGF.ConvertType(Ty),
+ "var.align");
+ llvm::Value *Dst = Builder.CreateBitCast(AlignedTemp, I8PtrTy);
+ llvm::Value *Src = Builder.CreateBitCast(Addr, I8PtrTy);
+ Builder.CreateMemCpy(Dst, Src,
+ llvm::ConstantInt::get(CGF.IntPtrTy, CharSize.getQuantity()),
+ TyAlign, false);
+ Addr = AlignedTemp; //The content is in aligned location.
+ }
+ llvm::Type *PTy =
+ llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
+ llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
+
return AddrTyped;
}
--- /dev/null
+// RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi aapcs -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi apcs-gnu -emit-llvm -o - %s | FileCheck -check-prefix=APCS-GNU %s
+
+#include <stdarg.h>
+
+typedef __attribute__(( ext_vector_type(2) )) int __int2;
+
+// Passing legal vector types as varargs.
+double varargs_vec_2i(int fixed, ...) {
+// CHECK: varargs_vec_2i
+// CHECK: %c3 = alloca <2 x i32>, align 8
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 8
+// CHECK: bitcast i8* %ap.align to <2 x i32>*
+// APCS-GNU: varargs_vec_2i
+// APCS-GNU: %c3 = alloca <2 x i32>, align 8
+// APCS-GNU: %var.align = alloca <2 x i32>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 8
+// APCS-GNU: %1 = bitcast <2 x i32>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <2 x i32>* %var.align
+ va_list ap;
+ double sum = fixed;
+ va_start(ap, fixed);
+ __int2 c3 = va_arg(ap, __int2);
+ sum = sum + c3.x + c3.y;
+ va_end(ap);
+ return sum;
+}
+
+double test_2i(__int2 *in) {
+// CHECK: test_2i
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1)
+// APCS-GNU: test_2i
+// APCS-GNU: call double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1)
+ return varargs_vec_2i(3, *in);
+}