Class classify(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
- ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State,
- const ABIArgInfo& current) const;
+
/// \brief Updates the number of available free registers, returns
/// true if any registers were allocated.
bool updateFreeRegs(QualType Ty, CCState &State) const;
return true;
}
-ABIArgInfo
-X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State,
- const ABIArgInfo ¤t) const {
- // Assumes vectorCall calling convention.
- const Type *Base = nullptr;
- uint64_t NumElts = 0;
-
- if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
- isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.FreeSSERegs >= NumElts) {
- // HVA types get passed directly in registers if there is room.
- State.FreeSSERegs -= NumElts;
- return getDirectX86Hva();
- }
- // If there's no room, the HVA gets passed as normal indirect
- // structure.
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- }
- return current;
-}
-
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// FIXME: Set alignment on indirect arguments.
}
}
- // vectorcall adds the concept of a homogenous vector aggregate, similar
- // to other targets, regcall uses some of the HVA rules.
+ // Regcall uses the concept of a homogenous vector aggregate, similar
+ // to other targets.
const Type *Base = nullptr;
uint64_t NumElts = 0;
- if ((State.CC == llvm::CallingConv::X86_VectorCall ||
- State.CC == llvm::CallingConv::X86_RegCall) &&
+ if (State.CC == llvm::CallingConv::X86_RegCall &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.CC == llvm::CallingConv::X86_RegCall) {
- if (State.FreeSSERegs >= NumElts) {
- State.FreeSSERegs -= NumElts;
- if (Ty->isBuiltinType() || Ty->isVectorType())
- return ABIArgInfo::getDirect();
- return ABIArgInfo::getExpand();
-
- }
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
- if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) {
- // Actual floating-point types get registers first time through if
- // there is registers available
- State.FreeSSERegs -= NumElts;
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ if (Ty->isBuiltinType() || Ty->isVectorType())
return ABIArgInfo::getDirect();
- } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
- // HVA Types only get registers after everything else has been
- // set, so it gets set as indirect for now.
- return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty));
- }
+ return ABIArgInfo::getExpand();
}
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
}
if (isAggregateTypeForABI(Ty)) {
void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
bool &UsedInAlloca) const {
- // Vectorcall only allows the first 6 parameters to be passed in registers,
- // and homogeneous vector aggregates are only put into registers as a second
- // priority.
- unsigned Count = 0;
- CCState ZeroState = State;
- ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0;
- // HVAs must be done as a second priority for registers, so the deferred
- // items are dealt with by going through the pattern a second time.
+ // Vectorcall x86 works subtly different than in x64, so the format is
+ // a bit different than the x64 version. First, all vector types (not HVAs)
+ // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers.
+ // This differs from the x64 implementation, where the first 6 by INDEX get
+ // registers.
+ // After that, integers AND HVAs are assigned Left to Right in the same pass.
+ // Integers are passed as ECX/EDX if one is available (in order). HVAs will
+ // first take up the remaining YMM/XMM registers. If insufficient registers
+ // remain but an integer register (ECX/EDX) is available, it will be passed
+ // in that, else, on the stack.
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = classifyArgumentType(I.type, State);
- else
- // Parameters after the 6th cannot be passed in registers,
- // so pretend there are no registers left for them.
- I.info = classifyArgumentType(I.type, ZeroState);
- UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
- ++Count;
+ // First pass do all the vector types.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType& Ty = I.type;
+ if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ I.info = ABIArgInfo::getDirect();
+ } else {
+ I.info = classifyArgumentType(Ty, State);
+ }
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
- Count = 0;
- // Go through the arguments a second time to get HVAs registers if there
- // are still some available.
+
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = reclassifyHvaArgType(I.type, State, I.info);
- ++Count;
+ // Second pass, do the rest!
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType& Ty = I.type;
+ bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts);
+
+ if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) {
+ // Assign true HVAs (non vector/native FP types).
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ I.info = getDirectX86Hva();
+ } else {
+ I.info = getIndirectResult(Ty, /*ByVal=*/false, State);
+ }
+ } else if (!IsHva) {
+ // Assign all Non-HVAs, so this will exclude Vector/FP args.
+ I.info = classifyArgumentType(Ty, State);
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
}
bool IsRegCall) const {
unsigned Count = 0;
for (auto &I : FI.arguments()) {
+ // Vectorcall in x64 only permits the first 6 arguments to be passed
+ // as XMM/YMM registers.
if (Count < VectorcallMaxParamNumAsReg)
I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall);
else {
++Count;
}
- Count = 0;
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
- ++Count;
+ I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
}
}
// X32: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
-// The Vectorcall ABI only allows passing the first 6 items in registers, so this shouldn't
+// The Vectorcall ABI only allows passing the first 6 items in registers in x64, so this shouldn't
// consider 'p7' as a register. Instead p5 gets put into the register on the second pass.
-struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7){ return p1;}
-// X32: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@80"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7)
-// X64: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@96"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7)
+// x86 should pass p2, p6 and p7 in registers, then p1 in the second pass.
+struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7, int p8){ return p1;}
+// X32: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@84"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2* %p5, float %p6, float %p7, i32 %p8)
+// X64: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@104"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7, i32 %p8)
+
+// Vectorcall in both architectures allows passing of an HVA as long as there is room,
+// even if it is not one of the first 6 arguments. First pass puts p4 into a
+// register on both. p9 ends up in a register in x86 only. Second pass puts p1
+// in a register, does NOT put p7 in a register (since theres no room), then puts
+// p8 in a register.
+void __vectorcall HVAAnywhere(struct HFA2 p1, int p2, int p3, float p4, int p5, int p6, struct HFA4 p7, struct HFA2 p8, float p9){}
+// X32: define x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)
+// X64: define x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 %p2, i32 %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)
+