return NumMembers <= 4;
}
+/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86.
+static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
+ auto AI = ABIArgInfo::getDirect(T);
+ AI.setInReg(true);
+ AI.setCanBeFlattened(false);
+ return AI;
+}
+
//===----------------------------------------------------------------------===//
// X86-32 ABI Implementation
//===----------------------------------------------------------------------===//
unsigned FreeSSERegs;
};
+enum {
+ // Vectorcall only allows the first 6 parameters to be passed in registers.
+ VectorcallMaxParamNumAsReg = 6
+};
+
/// X86_32ABIInfo - The X86-32 ABI information.
class X86_32ABIInfo : public SwiftABIInfo {
enum Class {
Class classify(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
+ ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State,
+ const ABIArgInfo& current) const;
/// \brief Updates the number of available free registers, returns
/// true if any registers were allocated.
bool updateFreeRegs(QualType Ty, CCState &State) const;
void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
CharUnits &StackOffset, ABIArgInfo &Info,
QualType Type) const;
+ void computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
+ bool &UsedInAlloca) const;
public:
return true;
}
+ABIArgInfo
+X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State,
+ const ABIArgInfo ¤t) const {
+ // Assumes vectorCall calling convention.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+
+ if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (State.FreeSSERegs >= NumElts) {
+ // HVA types get passed directly in registers if there is room.
+ State.FreeSSERegs -= NumElts;
+ return getDirectX86Hva();
+ }
+ // If there's no room, the HVA gets passed as normal indirect
+ // structure.
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
+ }
+ return current;
+}
+
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// FIXME: Set alignment on indirect arguments.
}
// vectorcall adds the concept of a homogenous vector aggregate, similar
- // to other targets.
+ // to other targets, regcall uses some of the HVA rules.
const Type *Base = nullptr;
uint64_t NumElts = 0;
if ((State.CC == llvm::CallingConv::X86_VectorCall ||
State.CC == llvm::CallingConv::X86_RegCall) &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.FreeSSERegs >= NumElts) {
- State.FreeSSERegs -= NumElts;
- if (Ty->isBuiltinType() || Ty->isVectorType())
+
+ if (State.CC == llvm::CallingConv::X86_RegCall) {
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ if (Ty->isBuiltinType() || Ty->isVectorType())
+ return ABIArgInfo::getDirect();
+ return ABIArgInfo::getExpand();
+
+ }
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
+ } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
+ if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) {
+ // Actual floating-point types get registers first time through if
+ // there is registers available
+ State.FreeSSERegs -= NumElts;
return ABIArgInfo::getDirect();
- return ABIArgInfo::getExpand();
+ } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
+ // HVA Types only get registers after everything else has been
+ // set, so it gets set as indirect for now.
+ return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty));
+ }
}
- return getIndirectResult(Ty, /*ByVal=*/false, State);
}
if (isAggregateTypeForABI(Ty)) {
return ABIArgInfo::getDirect();
}
+void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
+ bool &UsedInAlloca) const {
+ // Vectorcall only allows the first 6 parameters to be passed in registers,
+ // and homogeneous vector aggregates are only put into registers as a second
+ // priority.
+ unsigned Count = 0;
+ CCState ZeroState = State;
+ ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0;
+ // HVAs must be done as a second priority for registers, so the deferred
+ // items are dealt with by going through the pattern a second time.
+ for (auto &I : FI.arguments()) {
+ if (Count < VectorcallMaxParamNumAsReg)
+ I.info = classifyArgumentType(I.type, State);
+ else
+ // Parameters after the 6th cannot be passed in registers,
+ // so pretend there are no registers left for them.
+ I.info = classifyArgumentType(I.type, ZeroState);
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ ++Count;
+ }
+ Count = 0;
+ // Go through the arguments a second time to get HVAs registers if there
+ // are still some available.
+ for (auto &I : FI.arguments()) {
+ if (Count < VectorcallMaxParamNumAsReg)
+ I.info = reclassifyHvaArgType(I.type, State, I.info);
+ ++Count;
+ }
+}
+
void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
CCState State(FI.getCallingConvention());
if (IsMCUABI)
++State.FreeRegs;
bool UsedInAlloca = false;
- for (auto &I : FI.arguments()) {
- I.info = classifyArgumentType(I.type, State);
- UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ if (State.CC == llvm::CallingConv::X86_VectorCall) {
+ computeVectorCallArgs(FI, State, UsedInAlloca);
+ } else {
+ // If not vectorcall, revert to normal behavior.
+ for (auto &I : FI.arguments()) {
+ I.info = classifyArgumentType(I.type, State);
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
// If we needed to use inalloca for any argument, do a second pass and rewrite
}
private:
- ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs,
- bool IsReturnType) const;
-
- bool IsMingw64;
+ ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
+ bool IsVectorCall, bool IsRegCall) const;
+ ABIArgInfo reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs,
+ const ABIArgInfo ¤t) const;
+ void computeVectorCallArgs(CGFunctionInfo &FI, unsigned FreeSSERegs,
+ bool IsVectorCall, bool IsRegCall) const;
+
+ bool IsMingw64;
};
class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
/*allowHigherAlign*/ false);
}
+ABIArgInfo
+WinX86_64ABIInfo::reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs,
+ const ABIArgInfo ¤t) const {
+ // Assumes vectorCall calling convention.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+
+ if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
+ isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) {
+ FreeSSERegs -= NumElts;
+ return getDirectX86Hva();
+ }
+ return current;
+}
+
ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
- bool IsReturnType) const {
+ bool IsReturnType, bool IsVectorCall,
+ bool IsRegCall) const {
if (Ty->isVoidType())
return ABIArgInfo::getIgnore();
}
- // vectorcall adds the concept of a homogenous vector aggregate, similar to
- // other targets.
const Type *Base = nullptr;
uint64_t NumElts = 0;
- if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (FreeSSERegs >= NumElts) {
- FreeSSERegs -= NumElts;
- if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())
+ // vectorcall adds the concept of a homogenous vector aggregate, similar to
+ // other targets.
+ if ((IsVectorCall || IsRegCall) &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (IsRegCall) {
+ if (FreeSSERegs >= NumElts) {
+ FreeSSERegs -= NumElts;
+ if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())
+ return ABIArgInfo::getDirect();
+ return ABIArgInfo::getExpand();
+ }
+ return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+ } else if (IsVectorCall) {
+ if (FreeSSERegs >= NumElts &&
+ (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) {
+ FreeSSERegs -= NumElts;
return ABIArgInfo::getDirect();
- return ABIArgInfo::getExpand();
+ } else if (IsReturnType) {
+ return ABIArgInfo::getExpand();
+ } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
+ // HVAs are delayed and reclassified in the 2nd step.
+ return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+ }
}
- return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
}
-
if (Ty->isMemberPointerType()) {
// If the member pointer is represented by an LLVM int or ptr, pass it
// directly.
return ABIArgInfo::getDirect();
}
+void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI,
+ unsigned FreeSSERegs,
+ bool IsVectorCall,
+ bool IsRegCall) const {
+ unsigned Count = 0;
+ for (auto &I : FI.arguments()) {
+ if (Count < VectorcallMaxParamNumAsReg)
+ I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall);
+ else {
+ // Since these cannot be passed in registers, pretend no registers
+ // are left.
+ unsigned ZeroSSERegsAvail = 0;
+ I.info = classify(I.type, /*FreeSSERegs=*/ZeroSSERegsAvail, false,
+ IsVectorCall, IsRegCall);
+ }
+ ++Count;
+ }
+
+ Count = 0;
+ for (auto &I : FI.arguments()) {
+ if (Count < VectorcallMaxParamNumAsReg)
+ I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
+ ++Count;
+ }
+}
+
void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
bool IsVectorCall =
FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall;
}
if (!getCXXABI().classifyReturnType(FI))
- FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true);
+ FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true,
+ IsVectorCall, IsRegCall);
if (IsVectorCall) {
// We can use up to 6 SSE register parameters with vectorcall.
FreeSSERegs = 6;
} else if (IsRegCall) {
+ // RegCall gives us 16 SSE registers, we can reuse the return registers.
FreeSSERegs = 16;
}
- for (auto &I : FI.arguments())
- I.info = classify(I.type, FreeSSERegs, false);
+ if (IsVectorCall) {
+ computeVectorCallArgs(FI, FreeSSERegs, IsVectorCall, IsRegCall);
+ } else {
+ for (auto &I : FI.arguments())
+ I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall);
+ }
+
}
Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64
+// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefix=X32
+// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64
void __vectorcall v1(int a, int b) {}
-// CHECK: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b)
+// X32: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b)
// X64: define x86_vectorcallcc void @"\01v1@@16"(i32 %a, i32 %b)
void __vectorcall v2(char a, char b) {}
-// CHECK: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b)
+// X32: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b)
// X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b)
struct Small { int x; };
void __vectorcall v3(int a, struct Small b, int c) {}
-// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c)
+// X32: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c)
// X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c)
struct Large { int a[5]; };
void __vectorcall v4(int a, struct Large b, int c) {}
-// CHECK: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
+// X32: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
// X64: define x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c)
struct HFA2 { double x, y; };
struct HFA5 { double v, w, x, y, z; };
void __vectorcall hfa1(int a, struct HFA4 b, int c) {}
-// CHECK: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c)
-// X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 %c)
+// X32: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, %struct.HFA4 inreg %b.coerce, i32 inreg %c)
+// X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, %struct.HFA4 inreg %b.coerce, i32 %c)
// HFAs that would require more than six total SSE registers are passed
// indirectly. Additional vector arguments can consume the rest of the SSE
// registers.
void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
-// CHECK: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* inreg %b, double %c)
-// X64: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* %b, double %c)
+// X32: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* inreg %b, double %c)
+// X64: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* %b, double %c)
// Ensure that we pass builtin types directly while counting them against the
// SSE register usage.
void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
-// CHECK: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f)
+// X32: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f)
// X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f)
// Aggregates with more than four elements are not HFAs and are passed byval.
// Because they are not classified as homogeneous, they don't get special
// handling to ensure alignment.
void __vectorcall hfa4(struct HFA5 a) {}
-// CHECK: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4)
+// X32: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4)
// X64: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a)
// Return HFAs of 4 or fewer elements in registers.
static struct HFA2 g_hfa2;
struct HFA2 __vectorcall hfa5(void) { return g_hfa2; }
-// CHECK: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
+// X32: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
// X64: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
typedef float __attribute__((vector_size(16))) v4f32;
struct HVA2 { v4f32 x, y; };
+struct HVA3 { v4f32 w, x, y; };
struct HVA4 { v4f32 w, x, y, z; };
+struct HVA5 { v4f32 w, x, y, z, p; };
-void __vectorcall hva1(int a, struct HVA4 b, int c) {}
-// CHECK: define x86_vectorcallcc void @"\01hva1@@72"(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c)
-// X64: define x86_vectorcallcc void @"\01hva1@@80"(i32 %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 %c)
+v4f32 __vectorcall hva1(int a, struct HVA4 b, int c) {return b.w;}
+// X32: define x86_vectorcallcc <4 x float> @"\01hva1@@72"(i32 inreg %a, %struct.HVA4 inreg %b.coerce, i32 inreg %c)
+// X64: define x86_vectorcallcc <4 x float> @"\01hva1@@80"(i32 %a, %struct.HVA4 inreg %b.coerce, i32 %c)
-void __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {}
-// CHECK: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* inreg %b, <4 x float> %c)
-// X64: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* %b, <4 x float> %c)
+v4f32 __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {return c;}
+// X32: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b, <4 x float> %c)
+// X64: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b, <4 x float> %c)
-void __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {}
-// CHECK: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f)
-// X64: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f)
+v4f32 __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {return f.x;}
+// X32: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f)
+// X64: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f)
+
+// vector types have higher priority then HVA structures, So vector types are allocated first
+// and HVAs are allocated if enough registers are available
+v4f32 __vectorcall hva4(struct HVA4 a, struct HVA2 b, v4f32 c) {return b.y;}
+// X32: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* inreg %b, <4 x float> %c)
+// X64: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* %b, <4 x float> %c)
+
+v4f32 __vectorcall hva5(struct HVA3 a, struct HVA3 b, v4f32 c, struct HVA2 d) {return d.y;}
+// X32: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* inreg %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce)
+// X64: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce)
+
+struct HVA4 __vectorcall hva6(struct HVA4 a, struct HVA4 b) { return b;}
+// X32: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b)
+// X64: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b)
+
+struct HVA5 __vectorcall hva7() {struct HVA5 a = {}; return a;}
+// X32: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* inreg noalias sret %agg.result)
+// X64: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret %agg.result)
+
+v4f32 __vectorcall hva8(v4f32 a, v4f32 b, v4f32 c, v4f32 d, int e, v4f32 f) {return f;}
+// X32: define x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 inreg %e, <4 x float> %f)
+// X64: define x86_vectorcallcc <4 x float> @"\01hva8@@88"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f)
typedef float __attribute__((ext_vector_type(3))) v3f32;
struct OddSizeHVA { v3f32 x, y; };
void __vectorcall odd_size_hva(struct OddSizeHVA a) {}
-// CHECK: define x86_vectorcallcc void @"\01odd_size_hva@@32"(<3 x float> %a.0, <3 x float> %a.1)
-// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(<3 x float> %a.0, <3 x float> %a.1)
+// X32: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
+// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
+
+// The Vectorcall ABI only allows passing the first 6 items in registers, so this shouldn't
+// consider 'p7' as a register. Instead p5 gets put into the register on the second pass.
+struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7){ return p1;}
+// X32: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@80"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7)
+// X64: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@96"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7)
// PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
// ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce)
// ARM64: define %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce)
-// X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(double %x.0, double %x.1, double %x.2)
+// X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(%struct.D2 inreg %x.coerce)
D2 CC func_D2(D2 x) { return x; }
// PPC: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce)
void CC with_empty_base(HVAWithEmptyBase a) {}
// FIXME: MSVC doesn't consider this an HVA because of the empty base.
-// X64: define x86_vectorcallcc void @"\01_Z15with_empty_base16HVAWithEmptyBase@@16"(float %a.0, float %a.1, float %a.2)
+// X64: define x86_vectorcallcc void @"\01_Z15with_empty_base16HVAWithEmptyBase@@16"(%struct.HVAWithEmptyBase inreg %a.coerce)
struct HVAWithEmptyBitField : Float1, Float2 {
int : 0; // Takes no space.
// PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
// ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce)
-// X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(float %a.0, float %a.1, float %a.2)
+// X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(%struct.HVAWithEmptyBitField inreg %a.coerce)
void CC with_empty_bitfield(HVAWithEmptyBitField a) {}