const Value *Val,
ArrayRef<Register> VRegs,
Register SwiftErrorVReg) const {
+
+ // Check if a tail call was lowered in this block. If so, we already handled
+ // the terminator.
+ MachineFunction &MF = MIRBuilder.getMF();
+ if (MF.getFrameInfo().hasTailCall()) {
+ MachineBasicBlock &MBB = MIRBuilder.getMBB();
+ auto FirstTerm = MBB.getFirstTerminator();
+ if (FirstTerm != MBB.end() && FirstTerm->isCall())
+ return true;
+ }
+
auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
"Return value without a vreg");
return true;
}
+/// Return true if the calling convention is one that we can guarantee TCO for.
+static bool canGuaranteeTCO(CallingConv::ID CC) {
+ return CC == CallingConv::Fast;
+}
+
+/// Return true if we might ever do TCO for calls with this calling convention.
+static bool mayTailCallThisCC(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::PreserveMost:
+ case CallingConv::Swift:
+ return true;
+ default:
+ return canGuaranteeTCO(CC);
+ }
+}
+
+bool AArch64CallLowering::isEligibleForTailCallOptimization(
+ MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const {
+ CallingConv::ID CalleeCC = Info.CallConv;
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &CallerF = MF.getFunction();
+ CallingConv::ID CallerCC = CallerF.getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
+
+ if (Info.SwiftErrorVReg) {
+ // TODO: We should handle this.
+ // Note that this is also handled by the check for no outgoing arguments.
+ // Proactively disabling this though, because the swifterror handling in
+ // lowerCall inserts a COPY *after* the location of the call.
+ LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
+ return false;
+ }
+
+ if (!Info.OrigRet.Ty->isVoidTy()) {
+ // TODO: lowerCall will insert COPYs to handle the call's return value.
+ // This needs some refactoring to avoid this with tail call returns. For
+ // now, just don't handle that case.
+ LLVM_DEBUG(dbgs() << "... Cannot handle non-void return types yet.\n");
+ return false;
+ }
+
+ if (!mayTailCallThisCC(CalleeCC)) {
+ LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
+ return false;
+ }
+
+ if (Info.IsVarArg) {
+ LLVM_DEBUG(dbgs() << "... Tail calling varargs not supported yet.\n");
+ return false;
+ }
+
+ // Byval parameters hand the function a pointer directly into the stack area
+ // we want to reuse during a tail call. Working around this *is* possible (see
+ // X86).
+ //
+ // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
+ // it?
+ //
+ // On Windows, "inreg" attributes signify non-aggregate indirect returns.
+ // In this case, it is necessary to save/restore X0 in the callee. Tail
+ // call opt interferes with this. So we disable tail call opt when the
+ // caller has an argument with "inreg" attribute.
+ //
+ // FIXME: Check whether the callee also has an "inreg" argument.
+ if (any_of(CallerF.args(), [](const Argument &A) {
+ return A.hasByValAttr() || A.hasInRegAttr();
+ })) {
+ LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval or "
+ "inreg arguments.\n");
+ return false;
+ }
+
+ // Externally-defined functions with weak linkage should not be
+ // tail-called on AArch64 when the OS does not support dynamic
+ // pre-emption of symbols, as the AAELF spec requires normal calls
+ // to undefined weak functions to be replaced with a NOP or jump to the
+ // next instruction. The behaviour of branch instructions in this
+ // situation (as used for tail calls) is implementation-defined, so we
+ // cannot rely on the linker replacing the tail call with a return.
+ if (Info.Callee.isGlobal()) {
+ const GlobalValue *GV = Info.Callee.getGlobal();
+ const Triple &TT = MF.getTarget().getTargetTriple();
+ if (GV->hasExternalWeakLinkage() &&
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
+ TT.isOSBinFormatMachO())) {
+ LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
+ "with weak linkage for this OS.\n");
+ return false;
+ }
+ }
+
+ // If we have -tailcallopt and matching CCs, at this point, we could return
+ // true. However, we don't have full tail call support yet. So, continue
+ // checking. We want to emit a sibling call.
+
+ // I want anyone implementing a new calling convention to think long and hard
+ // about this assert.
+ assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
+ "Unexpected variadic calling convention");
+
+ // For now, only support the case where the calling conventions match.
+ if (!CCMatch) {
+ LLVM_DEBUG(
+ dbgs()
+ << "... Cannot tail call with mismatched calling conventions yet.\n");
+ return false;
+ }
+
+ // For now, only handle callees that take no arguments.
+ if (!Info.OrigArgs.empty()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "... Cannot tail call callees with outgoing arguments yet.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(
+ dbgs() << "... Call is eligible for tail call optimization.\n");
+ return true;
+}
+
+static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect,
+ bool IsTailCall) {
+ if (!IsTailCall)
+ return IsIndirect ? AArch64::BLR : AArch64::BL;
+
+ if (!IsIndirect)
+ return AArch64::TCRETURNdi;
+
+ // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
+ // x16 or x17.
+ if (CallerF.hasFnAttribute("branch-target-enforcement"))
+ return AArch64::TCRETURNriBTI;
+
+ return AArch64::TCRETURNri;
+}
+
bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
MachineFunction &MF = MIRBuilder.getMF();
auto &DL = F.getParent()->getDataLayout();
if (Info.IsMustTailCall) {
+ // TODO: Until we lower all tail calls, we should fall back on this.
LLVM_DEBUG(dbgs() << "Cannot lower musttail calls yet.\n");
return false;
}
SplitArgs.back().Flags[0].setZExt();
}
+ bool IsSibCall =
+ Info.IsTailCall && isEligibleForTailCallOptimization(MIRBuilder, Info);
+ if (IsSibCall)
+ MF.getFrameInfo().setHasTailCall();
+
// Find out which ABI gets to decide where things go.
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
CCAssignFn *AssignFnFixed =
CCAssignFn *AssignFnVarArg =
TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
- auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
+ // If we have a sibling call, then we don't have to adjust the stack.
+ // Otherwise, we need to adjust it.
+ MachineInstrBuilder CallSeqStart;
+ if (!IsSibCall)
+ CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
- auto MIB = MIRBuilder.buildInstrNoInsert(Info.Callee.isReg() ? AArch64::BLR
- : AArch64::BL);
+ unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), IsSibCall);
+
+ // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
+ // register class. Until we can do that, we should fall back here.
+ if (Opc == AArch64::TCRETURNriBTI) {
+ LLVM_DEBUG(
+ dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
+ return false;
+ }
+
+ auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.add(Info.Callee);
+ // Add the byte offset for the tail call. We only have sibling calls, so this
+ // is always 0.
+ // TODO: Handle tail calls where we will have a different value here.
+ if (IsSibCall)
+ MIB.addImm(0);
+
// Tell the call which registers are clobbered.
auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
}
- CallSeqStart.addImm(Handler.StackSize).addImm(0);
- MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
- .addImm(Handler.StackSize)
- .addImm(0);
+ if (!IsSibCall) {
+ // If we aren't sibcalling, we need to move the stack.
+ CallSeqStart.addImm(Handler.StackSize).addImm(0);
+ MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
+ .addImm(Handler.StackSize)
+ .addImm(0);
+ }
return true;
}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s --check-prefixes=DARWIN,COMMON
+; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-windows -global-isel -o - 2>&1 | FileCheck %s --check-prefixes=WINDOWS,COMMON
+
+declare void @simple_fn()
+define void @tail_call() {
+ ; COMMON-LABEL: name: tail_call
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: TCRETURNdi @simple_fn, 0, csr_aarch64_aapcs, implicit $sp
+ tail call void @simple_fn()
+ ret void
+}
+
+; We should get a TCRETURNri here.
+; FIXME: We don't need the COPY.
+define void @indirect_tail_call(void()* %func) {
+ ; COMMON-LABEL: name: indirect_tail_call
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: liveins: $x0
+ ; COMMON: [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
+ ; COMMON: TCRETURNri [[COPY]](p0), 0, csr_aarch64_aapcs, implicit $sp
+ tail call void %func()
+ ret void
+}
+
+declare void @outgoing_args_fn(i32)
+; Right now, callees with outgoing arguments should not be tail called.
+; TODO: Support this.
+define void @test_outgoing_args(i32 %a) {
+ ; COMMON-LABEL: name: test_outgoing_args
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: liveins: $w0
+ ; COMMON: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: $w0 = COPY [[COPY]](s32)
+ ; COMMON: BL @outgoing_args_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call void @outgoing_args_fn(i32 %a)
+ ret void
+}
+
+; Right now, we don't want to tail call callees with nonvoid return types, since
+; call lowering will insert COPYs after the call.
+; TODO: Support this.
+declare i32 @nonvoid_ret()
+define i32 @test_nonvoid_ret() {
+ ; COMMON-LABEL: name: test_nonvoid_ret
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: BL @nonvoid_ret, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
+ ; COMMON: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: $w0 = COPY [[COPY]](s32)
+ ; COMMON: RET_ReallyLR implicit $w0
+ %call = tail call i32 @nonvoid_ret()
+ ret i32 %call
+}
+
+; Don't want to handle swifterror at all right now, since lowerCall will
+; insert a COPY after the call right now.
+; TODO: Support this.
+%swift_error = type {i64, i8}
+define float @swifterror(%swift_error** swifterror %ptr) {
+ ; COMMON-LABEL: name: swifterror
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: liveins: $x21
+ ; COMMON: [[COPY:%[0-9]+]]:_(p0) = COPY $x21
+ ; COMMON: [[COPY1:%[0-9]+]]:gpr64all = COPY [[COPY]](p0)
+ ; COMMON: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY1]]
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: $x21 = COPY [[COPY2]](p0)
+ ; COMMON: BL @swifterror, csr_aarch64_aapcs_swifterror, implicit-def $lr, implicit $sp, implicit $x21, implicit-def $s0, implicit-def $x21
+ ; COMMON: [[COPY3:%[0-9]+]]:_(s32) = COPY $s0
+ ; COMMON: [[COPY4:%[0-9]+]]:gpr64all = COPY $x21
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: $s0 = COPY [[COPY3]](s32)
+ ; COMMON: $x21 = COPY [[COPY4]]
+ ; COMMON: RET_ReallyLR implicit $s0, implicit $x21
+ %call = tail call float @swifterror(%swift_error** swifterror %ptr)
+ ret float %call
+}
+
+define swiftcc float @swifterror_swiftcc(%swift_error** swifterror %ptr) {
+ ; COMMON-LABEL: name: swifterror_swiftcc
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: liveins: $x21
+ ; COMMON: [[COPY:%[0-9]+]]:_(p0) = COPY $x21
+ ; COMMON: [[COPY1:%[0-9]+]]:gpr64all = COPY [[COPY]](p0)
+ ; COMMON: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY1]]
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: $x21 = COPY [[COPY2]](p0)
+ ; COMMON: BL @swifterror_swiftcc, csr_aarch64_aapcs_swifterror, implicit-def $lr, implicit $sp, implicit $x21, implicit-def $s0, implicit-def $x21
+ ; COMMON: [[COPY3:%[0-9]+]]:_(s32) = COPY $s0
+ ; COMMON: [[COPY4:%[0-9]+]]:gpr64all = COPY $x21
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: $s0 = COPY [[COPY3]](s32)
+ ; COMMON: $x21 = COPY [[COPY4]]
+ ; COMMON: RET_ReallyLR implicit $s0, implicit $x21
+ %call = tail call swiftcc float @swifterror_swiftcc(%swift_error** swifterror %ptr)
+ ret float %call
+}
+
+; Right now, this should not be tail called.
+; TODO: Support this.
+declare void @varargs(i32, double, i64, ...)
+define void @test_varargs() {
+ ; COMMON-LABEL: name: test_varargs
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+ ; COMMON: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; COMMON: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: $w0 = COPY [[C]](s32)
+ ; COMMON: $d0 = COPY [[C1]](s64)
+ ; COMMON: $x1 = COPY [[C2]](s64)
+ ; COMMON: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
+ ret void
+}
+
+; Unsupported calling convention for tail calls. Make sure we never tail call
+; it.
+declare ghccc void @bad_call_conv_fn()
+define void @test_bad_call_conv() {
+ ; COMMON-LABEL: name: test_bad_call_conv
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: BL @bad_call_conv_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call ghccc void @bad_call_conv_fn()
+ ret void
+}
+
+; Shouldn't tail call when the caller has byval arguments.
+define void @test_byval(i8* byval %ptr) {
+ ; COMMON-LABEL: name: test_byval
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; COMMON: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1)
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call void @simple_fn()
+ ret void
+}
+
+; Shouldn't tail call when the caller has inreg arguments.
+define void @test_inreg(i8* inreg %ptr) {
+ ; COMMON-LABEL: name: test_inreg
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: liveins: $x0
+ ; COMMON: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call void @simple_fn()
+ ret void
+}
+
+; Shouldn't tail call when the OS doesn't support it. Windows supports this,
+; so we should be able to tail call there.
+declare extern_weak void @extern_weak_fn()
+define void @test_extern_weak() {
+ ; DARWIN-LABEL: name: test_extern_weak
+ ; DARWIN: bb.1 (%ir-block.0):
+ ; DARWIN: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; DARWIN: BL @extern_weak_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; DARWIN: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; DARWIN: RET_ReallyLR
+ ; WINDOWS-LABEL: name: test_extern_weak
+ ; WINDOWS: bb.1 (%ir-block.0):
+ ; WINDOWS: TCRETURNdi @extern_weak_fn, 0, csr_aarch64_aapcs, implicit $sp
+ tail call void @extern_weak_fn()
+ ret void
+}
+
+; Right now, mismatched calling conventions should not be tail called.
+; TODO: Support this.
+declare fastcc void @fast_fn()
+define void @test_mismatched_caller() {
+ ; COMMON-LABEL: name: test_mismatched_caller
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: BL @fast_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call fastcc void @fast_fn()
+ ret void
+}