///
/// Used to check if arguments are suitable for tail call lowering.
bool analyzeArgInfo(CCState &CCState, SmallVectorImpl<ArgInfo> &Args,
- CCAssignFn &Fn) const;
+ CCAssignFn &AssignFnFixed,
+ CCAssignFn &AssignFnVarArg) const;
/// \returns True if the calling convention for a callee and its caller pass
/// results in the same way. Typically used for tail call eligibility checks.
/// \p Info is the CallLoweringInfo for the call.
/// \p MF is the MachineFunction for the caller.
/// \p InArgs contains the results of the call.
- /// \p CalleeAssignFn is the CCAssignFn to be used for the callee.
- /// \p CallerAssignFn is the CCAssignFn to be used for the caller.
+ /// \p CalleeAssignFnFixed is the CCAssignFn to be used for the callee for
+ /// fixed arguments.
+ /// \p CalleeAssignFnVarArg is similar, but for varargs.
+ /// \p CallerAssignFnFixed is the CCAssignFn to be used for the caller for
+ /// fixed arguments.
+ /// \p CallerAssignFnVarArg is similar, but for varargs.
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF,
SmallVectorImpl<ArgInfo> &InArgs,
- CCAssignFn &CalleeAssignFn,
- CCAssignFn &CallerAssignFn) const;
+ CCAssignFn &CalleeAssignFnFixed,
+ CCAssignFn &CalleeAssignFnVarArg,
+ CCAssignFn &CallerAssignFnFixed,
+ CCAssignFn &CallerAssignFnVarArg) const;
public:
CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
}
bool CallLowering::analyzeArgInfo(CCState &CCState,
- SmallVectorImpl<ArgInfo> &Args,
- CCAssignFn &Fn) const {
+ SmallVectorImpl<ArgInfo> &Args,
+ CCAssignFn &AssignFnFixed,
+ CCAssignFn &AssignFnVarArg) const {
for (unsigned i = 0, e = Args.size(); i < e; ++i) {
MVT VT = MVT::getVT(Args[i].Ty);
+ CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg;
if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) {
// Bail out on anything we can't handle.
LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString()
bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
MachineFunction &MF,
SmallVectorImpl<ArgInfo> &InArgs,
- CCAssignFn &CalleeAssignFn,
- CCAssignFn &CallerAssignFn) const {
+ CCAssignFn &CalleeAssignFnFixed,
+ CCAssignFn &CalleeAssignFnVarArg,
+ CCAssignFn &CallerAssignFnFixed,
+ CCAssignFn &CallerAssignFnVarArg) const {
const Function &F = MF.getFunction();
CallingConv::ID CalleeCC = Info.CallConv;
CallingConv::ID CallerCC = F.getCallingConv();
SmallVector<CCValAssign, 16> ArgLocs1;
CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext());
- if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFn))
+ if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed,
+ CalleeAssignFnVarArg))
return false;
SmallVector<CCValAssign, 16> ArgLocs2;
CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext());
- if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFn))
+ if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed,
+ CalleeAssignFnVarArg))
return false;
// We need the argument locations to match up exactly. If there's more in
}
}
+/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
+/// CC.
+static std::pair<CCAssignFn *, CCAssignFn *>
+getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
+ return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
+}
+
bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
CallLoweringInfo &Info, MachineFunction &MF,
SmallVectorImpl<ArgInfo> &InArgs) const {
// Check if the caller and callee will handle arguments in the same way.
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
- CCAssignFn *CalleeAssignFn = TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg);
- CCAssignFn *CallerAssignFn =
- TLI.CCAssignFnForCall(CallerCC, CallerF.isVarArg());
-
- if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFn, *CallerAssignFn))
+ CCAssignFn *CalleeAssignFnFixed;
+ CCAssignFn *CalleeAssignFnVarArg;
+ std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
+ getAssignFnsForCC(CalleeCC, TLI);
+
+ CCAssignFn *CallerAssignFnFixed;
+ CCAssignFn *CallerAssignFnVarArg;
+ std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
+ getAssignFnsForCC(CallerCC, TLI);
+
+ if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed,
+ *CalleeAssignFnVarArg, *CallerAssignFnFixed,
+ *CallerAssignFnVarArg))
return false;
// Make sure that the caller and callee preserve all of the same registers.
CallingConv::ID CallerCC = CallerF.getCallingConv();
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+ CCAssignFn *AssignFnFixed;
+ CCAssignFn *AssignFnVarArg;
+ std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
+
// We have outgoing arguments. Make sure that we can tail call with them.
SmallVector<CCValAssign, 16> OutLocs;
CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
- if (!analyzeArgInfo(OutInfo, OutArgs,
- *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg))) {
+ if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) {
LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
return false;
}
// Find out which ABI gets to decide where things go.
CallingConv::ID CalleeCC = Info.CallConv;
- CCAssignFn *AssignFnFixed =
- TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false);
- CCAssignFn *AssignFnVarArg =
- TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/true);
+ CCAssignFn *AssignFnFixed;
+ CCAssignFn *AssignFnVarArg;
+ std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
MachineInstrBuilder CallSeqStart;
if (!IsSibCall)
unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
SmallVector<CCValAssign, 16> OutLocs;
CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
- analyzeArgInfo(OutInfo, OutArgs,
- *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg));
+ analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg);
// The callee will pop the argument stack as a tail call. Thus, we must
// keep it 16-byte aligned.
return lowerTailCall(MIRBuilder, Info, OutArgs);
// Find out which ABI gets to decide where things go.
- CCAssignFn *AssignFnFixed =
- TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/false);
- CCAssignFn *AssignFnVarArg =
- TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
+ CCAssignFn *AssignFnFixed;
+ CCAssignFn *AssignFnVarArg;
+ std::tie(AssignFnFixed, AssignFnVarArg) =
+ getAssignFnsForCC(Info.CallConv, TLI);
MachineInstrBuilder CallSeqStart;
CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
declare void @varargs(i32, double, i64, ...)
define void @test_varargs() {
- ; On Darwin, everything is passed on the stack. Since the caller has no stack,
- ; we don't tail call.
- ; DARWIN-LABEL: name: test_varargs
- ; DARWIN: bb.1 (%ir-block.0):
- ; DARWIN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
- ; DARWIN: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
- ; DARWIN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
- ; DARWIN: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
- ; DARWIN: $w0 = COPY [[C]](s32)
- ; DARWIN: $d0 = COPY [[C1]](s64)
- ; DARWIN: $x1 = COPY [[C2]](s64)
+ ; COMMON-LABEL: name: test_varargs
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+ ; COMMON: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; COMMON: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+ ; COMMON: $w0 = COPY [[C]](s32)
+ ; COMMON: $d0 = COPY [[C1]](s64)
+ ; COMMON: $x1 = COPY [[C2]](s64)
+ ; COMMON: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+ tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
+ ret void
+}
+
+; Darwin should not tail call here, because the last parameter to @varargs is
+; not fixed. So, it's passed on the stack, which will make us not fit. On
+; Windows, it's passed in a register, so it's safe to tail call.
+define void @test_varargs_2() {
+ ; DARWIN-LABEL: name: test_varargs_2
+ ; DARWIN-NOT: TCRETURNdi @varargs
; DARWIN: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
- ; DARWIN: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; DARWIN: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
; DARWIN: RET_ReallyLR
- ; Windows uses registers, so we don't need to worry about using the stack.
- ; WINDOWS-LABEL: name: test_varargs
+ ; WINDOWS-LABEL: name: test_varargs_2
; WINDOWS: bb.1 (%ir-block.0):
; WINDOWS: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; WINDOWS: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+ ; WINDOWS: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
; WINDOWS: $w0 = COPY [[C]](s32)
; WINDOWS: $d0 = COPY [[C1]](s64)
; WINDOWS: $x1 = COPY [[C2]](s64)
- ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1
- tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
+ ; WINDOWS: $x2 = COPY [[C3]](s64)
+ ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $x2
+ tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314)
+ ret void
+}
+
+; Same deal here, even though we have enough room to fit. On Darwin, we'll pass
+; the last argument to @varargs on the stack. We don't allow tail calling
+; varargs arguments that are on the stack.
+define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) {
+ ; DARWIN-LABEL: name: test_varargs_3
+ ; DARWIN-NOT: TCRETURNdi @varargs
+ ; DARWIN: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+ ; DARWIN: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
+ ; DARWIN: RET_ReallyLR
+
+ ; WINDOWS-LABEL: name: test_varargs_3
+ ; WINDOWS: bb.1 (%ir-block.1):
+ ; WINDOWS: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
+ ; WINDOWS: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; WINDOWS: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; WINDOWS: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+ ; WINDOWS: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
+ ; WINDOWS: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
+ ; WINDOWS: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
+ ; WINDOWS: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
+ ; WINDOWS: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
+ ; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1)
+ ; WINDOWS: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+ ; WINDOWS: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+ ; WINDOWS: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
+ ; WINDOWS: $w0 = COPY [[C]](s32)
+ ; WINDOWS: $d0 = COPY [[C1]](s64)
+ ; WINDOWS: $x1 = COPY [[C2]](s64)
+ ; WINDOWS: $x2 = COPY [[C3]](s64)
+ ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $x2
+ tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314)
ret void
}