return Success;
}
+/// Helper function to compute forwarded registers for musttail calls. Computes
+/// the forwarded registers, sets MBB liveness, and emits COPY instructions that
+/// can be used to save + restore registers later.
+static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
+ CCAssignFn *AssignFn) {
+ MachineBasicBlock &MBB = MIRBuilder.getMBB();
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ if (!MFI.hasMustTailInVarArgFunc())
+ return;
+
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ const Function &F = MF.getFunction();
+ assert(F.isVarArg() && "Expected F to be vararg?");
+
+ // Compute the set of forwarded registers. The rest are scratch.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
+ F.getContext());
+ SmallVector<MVT, 2> RegParmTypes;
+ RegParmTypes.push_back(MVT::i64);
+ RegParmTypes.push_back(MVT::f128);
+
+ // Later on, we can use this vector to restore the registers if necessary.
+ SmallVectorImpl<ForwardedRegister> &Forwards =
+ FuncInfo->getForwardedMustTailRegParms();
+ CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
+
+ // Conservatively forward X8, since it might be used for an aggregate
+ // return.
+ if (!CCInfo.isAllocated(AArch64::X8)) {
+ unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+ Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
+ }
+
+ // Add the forwards to the MachineBasicBlock and MachineFunction.
+ for (const auto &F : Forwards) {
+ MBB.addLiveIn(F.PReg);
+ MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
+ }
+}
+
bool AArch64CallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
if (Subtarget.hasCustomCallingConv())
Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
+ handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
+
// Move back to the end of the basic block.
MIRBuilder.setMBB(MBB);
assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
"Unexpected variadic calling convention");
- // Before we can musttail varargs, we need to forward parameters like in
- // r345641. Make sure that we don't enable musttail with varargs without
- // addressing that!
- if (Info.IsVarArg && Info.IsMustTailCall) {
- LLVM_DEBUG(
- dbgs()
- << "... Cannot handle vararg musttail functions yet.\n");
- return false;
- }
-
// Verify that the incoming and outgoing arguments from the callee are
// safe to tail call.
if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
// True when we're tail calling, but without -tailcallopt.
bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
// We aren't sibcalling, so we need to compute FPDiff. We need to do this
// before handling assignments, because FPDiff must be known for memory
// arguments.
- AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
SmallVector<CCValAssign, 16> OutLocs;
CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
}
+ const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
+
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
if (!handleAssignments(MIRBuilder, OutArgs, Handler))
return false;
+ if (Info.IsVarArg && Info.IsMustTailCall) {
+ // Now we know what's being passed to the function. Add uses to the call for
+ // the forwarded registers that we *aren't* passing as parameters. This will
+ // preserve the copies we build earlier.
+ for (const auto &F : Forwards) {
+ Register ForwardedReg = F.PReg;
+ // If the register is already passed, or aliases a register which is
+ // already being passed, then skip it.
+ if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
+ if (!Use.isReg())
+ return false;
+ return TRI->regsOverlap(Use.getReg(), ForwardedReg);
+ }))
+ continue;
+
+ // We aren't passing it already, so we should add it to the call.
+ MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
+ MIB.addReg(ForwardedReg, RegState::Implicit);
+ }
+ }
+
// If we have -tailcallopt, we need to adjust the stack. We'll do the call
// sequence start and end here.
if (!IsSibCall) {
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s
+
+; There are two things we want to test here:
+; (1) We can tail call musttail calls.
+; (2) We spill and reload all of the arguments around a normal call.
+
+declare i32 @musttail_variadic_callee(i32, ...)
+define i32 @test_musttail_variadic(i32 %arg0, ...) {
+; CHECK-LABEL: test_musttail_variadic:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: b _musttail_variadic_callee
+ %r = musttail call i32 (i32, ...) @musttail_variadic_callee(i32 %arg0, ...)
+ ret i32 %r
+}
+
+declare [2 x i64] @musttail_variadic_aggret_callee(i32 %arg0, ...)
+define [2 x i64] @test_musttail_variadic_aggret(i32 %arg0, ...) {
+; CHECK-LABEL: test_musttail_variadic_aggret:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: b _musttail_variadic_aggret_callee
+ %r = musttail call [2 x i64] (i32, ...) @musttail_variadic_aggret_callee(i32 %arg0, ...)
+ ret [2 x i64] %r
+}
+
+; Test musttailing with a normal call in the block. Test that we spill and
+; restore, as a normal call will clobber all argument registers.
+@asdf = internal constant [4 x i8] c"asdf"
+declare void @puts(i8*)
+define i32 @test_musttail_variadic_spill(i32 %arg0, ...) {
+; CHECK-LABEL: test_musttail_variadic_spill:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sub sp, sp, #224 ; =224
+; CHECK-NEXT: stp x28, x27, [sp, #128] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #144] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #160] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #176] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #192] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #208] ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 224
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -24
+; CHECK-NEXT: .cfi_offset w20, -32
+; CHECK-NEXT: .cfi_offset w21, -40
+; CHECK-NEXT: .cfi_offset w22, -48
+; CHECK-NEXT: .cfi_offset w23, -56
+; CHECK-NEXT: .cfi_offset w24, -64
+; CHECK-NEXT: .cfi_offset w25, -72
+; CHECK-NEXT: .cfi_offset w26, -80
+; CHECK-NEXT: .cfi_offset w27, -88
+; CHECK-NEXT: .cfi_offset w28, -96
+; CHECK-NEXT: mov w19, w0
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x0, _asdf@PAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: add x0, x0, _asdf@PAGEOFF
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov x21, x2
+; CHECK-NEXT: mov x22, x3
+; CHECK-NEXT: mov x23, x4
+; CHECK-NEXT: mov x24, x5
+; CHECK-NEXT: mov x25, x6
+; CHECK-NEXT: mov x26, x7
+; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill
+; CHECK-NEXT: mov x27, x8
+; CHECK-NEXT: bl _puts
+; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload
+; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload
+; CHECK-NEXT: ldp q5, q4, [sp, #32] ; 32-byte Folded Reload
+; CHECK-NEXT: ldp q7, q6, [sp] ; 32-byte Folded Reload
+; CHECK-NEXT: mov w0, w19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: mov x2, x21
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: mov x4, x23
+; CHECK-NEXT: mov x5, x24
+; CHECK-NEXT: mov x6, x25
+; CHECK-NEXT: mov x7, x26
+; CHECK-NEXT: mov x8, x27
+; CHECK-NEXT: ldp x29, x30, [sp, #208] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #192] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #176] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #160] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #144] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #128] ; 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #224 ; =224
+; CHECK-NEXT: b _musttail_variadic_callee
+; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1
+ call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))
+ %r = musttail call i32 (i32, ...) @musttail_variadic_callee(i32 %arg0, ...)
+ ret i32 %r
+}
+
+; Test musttailing with a varargs call in the block. Test that we spill and
+; reload all arguments in the variadic argument pack.
+declare void @llvm.va_start(i8*) nounwind
+declare void(i8*, ...)* @get_f(i8* %this)
+define void @f_thunk(i8* %this, ...) {
+; CHECK-LABEL: f_thunk:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sub sp, sp, #256 ; =256
+; CHECK-NEXT: stp x28, x27, [sp, #160] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #176] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #192] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #208] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #224] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #240] ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 256
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -24
+; CHECK-NEXT: .cfi_offset w20, -32
+; CHECK-NEXT: .cfi_offset w21, -40
+; CHECK-NEXT: .cfi_offset w22, -48
+; CHECK-NEXT: .cfi_offset w23, -56
+; CHECK-NEXT: .cfi_offset w24, -64
+; CHECK-NEXT: .cfi_offset w25, -72
+; CHECK-NEXT: .cfi_offset w26, -80
+; CHECK-NEXT: .cfi_offset w27, -88
+; CHECK-NEXT: .cfi_offset w28, -96
+; CHECK-NEXT: mov x27, x8
+; CHECK-NEXT: add x8, sp, #128 ; =128
+; CHECK-NEXT: add x9, sp, #256 ; =256
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov x21, x2
+; CHECK-NEXT: mov x22, x3
+; CHECK-NEXT: mov x23, x4
+; CHECK-NEXT: mov x24, x5
+; CHECK-NEXT: mov x25, x6
+; CHECK-NEXT: mov x26, x7
+; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill
+; CHECK-NEXT: str x9, [x8]
+; CHECK-NEXT: bl _get_f
+; CHECK-NEXT: mov x9, x0
+; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload
+; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload
+; CHECK-NEXT: ldp q5, q4, [sp, #32] ; 32-byte Folded Reload
+; CHECK-NEXT: ldp q7, q6, [sp] ; 32-byte Folded Reload
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: mov x2, x21
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: mov x4, x23
+; CHECK-NEXT: mov x5, x24
+; CHECK-NEXT: mov x6, x25
+; CHECK-NEXT: mov x7, x26
+; CHECK-NEXT: mov x8, x27
+; CHECK-NEXT: ldp x29, x30, [sp, #240] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #224] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #208] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #192] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #176] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #160] ; 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #256 ; =256
+; CHECK-NEXT: br x9
+ %ap = alloca [4 x i8*], align 16
+ %ap_i8 = bitcast [4 x i8*]* %ap to i8*
+ call void @llvm.va_start(i8* %ap_i8)
+ %fptr = call void(i8*, ...)*(i8*) @get_f(i8* %this)
+ musttail call void (i8*, ...) %fptr(i8* %this, ...)
+ ret void
+}
+
+; We don't need any spills and reloads here, but we should still emit the
+; copies in call lowering.
+define void @g_thunk(i8* %fptr_i8, ...) {
+; CHECK-LABEL: g_thunk:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: br x0
+ %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)*
+ musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...)
+ ret void
+}
+
+; Test that this works with multiple exits and basic blocks.
+%struct.Foo = type { i1, i8*, i8* }
+@g = external global i32
+define void @h_thunk(%struct.Foo* %this, ...) {
+; CHECK-LABEL: h_thunk:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldrb w9, [x0]
+; CHECK-NEXT: tbz w9, #0, LBB5_2
+; CHECK-NEXT: ; %bb.1: ; %then
+; CHECK-NEXT: ldr x9, [x0, #8]
+; CHECK-NEXT: br x9
+; CHECK-NEXT: LBB5_2: ; %else
+; CHECK-NEXT: Lloh2:
+; CHECK-NEXT: adrp x10, _g@GOTPAGE
+; CHECK-NEXT: ldr x9, [x0, #16]
+; CHECK-NEXT: Lloh3:
+; CHECK-NEXT: ldr x10, [x10, _g@GOTPAGEOFF]
+; CHECK-NEXT: mov w11, #42
+; CHECK-NEXT: Lloh4:
+; CHECK-NEXT: str w11, [x10]
+; CHECK-NEXT: br x9
+; CHECK-NEXT: .loh AdrpLdrGotStr Lloh2, Lloh3, Lloh4
+ %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0
+ %cond = load i1, i1* %cond_p
+ br i1 %cond, label %then, label %else
+
+then:
+ %a_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 1
+ %a_i8 = load i8*, i8** %a_p
+ %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)*
+ musttail call void (%struct.Foo*, ...) %a(%struct.Foo* %this, ...)
+ ret void
+
+else:
+ %b_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 2
+ %b_i8 = load i8*, i8** %b_p
+ %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)*
+ store i32 42, i32* @g
+ musttail call void (%struct.Foo*, ...) %b(%struct.Foo* %this, ...)
+ ret void
+}