SmallVectorImpl<ArgInfo> &Args,
ValueHandler &Handler) const;
- /// Analyze the return values of a call, incorporating info about the passed
- /// values into \p CCState.
- bool analyzeCallResult(CCState &CCState, SmallVectorImpl<ArgInfo> &Args,
- CCAssignFn &Fn) const;
+ /// Analyze passed or returned values from a call, supplied in \p ArgInfo,
+ /// incorporating info about the passed values into \p CCState.
+ ///
+ /// Used to check if arguments are suitable for tail call lowering.
+ bool analyzeArgInfo(CCState &CCState, SmallVectorImpl<ArgInfo> &Args,
+ CCAssignFn &Fn) const;
/// \returns True if the calling convention for a callee and its caller pass
/// results in the same way. Typically used for tail call eligibility checks.
return true;
}
-bool CallLowering::analyzeCallResult(CCState &CCState,
+bool CallLowering::analyzeArgInfo(CCState &CCState,
SmallVectorImpl<ArgInfo> &Args,
CCAssignFn &Fn) const {
for (unsigned i = 0, e = Args.size(); i < e; ++i) {
SmallVector<CCValAssign, 16> ArgLocs1;
CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext());
- if (!analyzeCallResult(CCInfo1, InArgs, CalleeAssignFn))
+ if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFn))
return false;
SmallVector<CCValAssign, 16> ArgLocs2;
CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext());
- if (!analyzeCallResult(CCInfo2, InArgs, CallerAssignFn))
+ if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFn))
return false;
// We need the argument locations to match up exactly. If there's more in
struct OutgoingArgHandler : public CallLowering::ValueHandler {
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn,
- CCAssignFn *AssignFnVarArg)
+ CCAssignFn *AssignFnVarArg, bool IsTailCall = false)
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
- AssignFnVarArg(AssignFnVarArg), StackSize(0) {}
+ AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), StackSize(0) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
+ MachineFunction &MF = MIRBuilder.getMF();
LLT p0 = LLT::pointer(0, 64);
LLT s64 = LLT::scalar(64);
+
+ if (IsTailCall) {
+ // TODO: For -tailcallopt tail calls, Offset will need FPDiff like in
+ // ISelLowering.
+ int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
+ Register FIReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildFrameIndex(FIReg, FI);
+ MPO = MachinePointerInfo::getFixedStack(MF, FI);
+ return FIReg;
+ }
+
Register SPReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildCopy(SPReg, Register(AArch64::SP));
Register AddrReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
- MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+ MPO = MachinePointerInfo::getStack(MF, Offset);
return AddrReg;
}
MachineInstrBuilder MIB;
CCAssignFn *AssignFnVarArg;
+ bool IsTailCall;
uint64_t StackSize;
};
} // namespace
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ uint64_t StackOffset = Handler.StackUsed;
if (F.isVarArg()) {
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (!Subtarget.isTargetDarwin()) {
}
// We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
- uint64_t StackOffset =
- alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);
+ StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);
auto &MFI = MIRBuilder.getMF().getFrameInfo();
- AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
}
+ // TODO: Port checks for stack to restore for -tailcallopt from ISelLowering.
+ // We need to keep track of the size of function stacks for tail call
+ // optimization. When we tail call, we need to check if the callee's arguments
+ // will fit on the caller's stack. So, whenever we lower formal arguments,
+ // we should keep track of this information, since we might lower a tail call
+ // in this function later.
+ FuncInfo->setBytesInStackArgArea(StackOffset);
+
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (Subtarget.hasCustomCallingConv())
Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
}
+bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
+ CallLoweringInfo &Info, MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &OutArgs) const {
+ // If there are no outgoing arguments, then we are done.
+ if (OutArgs.empty())
+ return true;
+
+ const Function &CallerF = MF.getFunction();
+ CallingConv::ID CalleeCC = Info.CallConv;
+ CallingConv::ID CallerCC = CallerF.getCallingConv();
+ const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+
+ // We have outgoing arguments. Make sure that we can tail call with them.
+ SmallVector<CCValAssign, 16> OutLocs;
+ CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
+
+ if (!analyzeArgInfo(OutInfo, OutArgs,
+ *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg))) {
+ LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
+ return false;
+ }
+
+ // Make sure that they can fit on the caller's stack.
+ const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
+ LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
+ return false;
+ }
+
+ // Verify that the parameters in callee-saved registers match.
+ // TODO: Port this over to CallLowering as general code once swiftself is
+ // supported.
+ auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
+ const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
+
+ for (auto &ArgLoc : OutLocs) {
+ // If it's not a register, it's fine.
+ if (!ArgLoc.isRegLoc())
+ continue;
+
+ Register Reg = ArgLoc.getLocReg();
+
+ // Only look at callee-saved registers.
+ if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
+ continue;
+
+ // TODO: Port the remainder of this check from TargetLowering to support
+ // tail calling swiftself.
+ LLVM_DEBUG(
+ dbgs()
+ << "... Cannot handle callee-saved registers in outgoing args yet.\n");
+ return false;
+ }
+
+ return true;
+}
+
bool AArch64CallLowering::isEligibleForTailCallOptimization(
MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
- SmallVectorImpl<ArgInfo> &InArgs) const {
+ SmallVectorImpl<ArgInfo> &InArgs,
+ SmallVectorImpl<ArgInfo> &OutArgs) const {
CallingConv::ID CalleeCC = Info.CallConv;
MachineFunction &MF = MIRBuilder.getMF();
const Function &CallerF = MF.getFunction();
assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
"Unexpected variadic calling convention");
- // Look at the incoming values.
+ // Verify that the incoming and outgoing arguments from the callee are
+ // safe to tail call.
if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
LLVM_DEBUG(
dbgs()
return false;
}
- // For now, only handle callees that take no arguments.
- if (!Info.OrigArgs.empty()) {
- LLVM_DEBUG(
- dbgs()
- << "... Cannot tail call callees with outgoing arguments yet.\n");
+ if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
return false;
- }
LLVM_DEBUG(
dbgs() << "... Call is eligible for tail call optimization.\n");
return false;
}
- SmallVector<ArgInfo, 8> SplitArgs;
+ SmallVector<ArgInfo, 8> OutArgs;
for (auto &OrigArg : Info.OrigArgs) {
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI, Info.CallConv);
+ splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
// AAPCS requires that we zero-extend i1 to 8 bits by the caller.
if (OrigArg.Ty->isIntegerTy(1))
- SplitArgs.back().Flags[0].setZExt();
+ OutArgs.back().Flags[0].setZExt();
}
SmallVector<ArgInfo, 8> InArgs;
if (!Info.OrigRet.Ty->isVoidTy())
splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv());
- bool IsSibCall = Info.IsTailCall &&
- isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs);
+ bool IsSibCall = Info.IsTailCall && isEligibleForTailCallOptimization(
+ MIRBuilder, Info, InArgs, OutArgs);
if (IsSibCall)
MF.getFrameInfo().setHasTailCall();
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
- AssignFnVarArg);
- if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
+ AssignFnVarArg, IsSibCall);
+ if (!handleAssignments(MIRBuilder, OutArgs, Handler))
return false;
// Now we can add the actual call instruction to the correct basic block.
bool
isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info,
- SmallVectorImpl<ArgInfo> &InArgs) const;
+ SmallVectorImpl<ArgInfo> &InArgs,
+ SmallVectorImpl<ArgInfo> &OutArgs) const;
bool supportSwiftError() const override { return true; }
doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info,
MachineFunction &MF,
SmallVectorImpl<ArgInfo> &InArgs) const;
+
+ bool
+ areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &OutArgs) const;
};
} // end namespace llvm
}
declare void @outgoing_args_fn(i32)
-; Right now, callees with outgoing arguments should not be tail called.
-; TODO: Support this.
define void @test_outgoing_args(i32 %a) {
; COMMON-LABEL: name: test_outgoing_args
; COMMON: bb.1 (%ir-block.0):
; COMMON: liveins: $w0
; COMMON: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; COMMON: $w0 = COPY [[COPY]](s32)
- ; COMMON: BL @outgoing_args_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0
- ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
- ; COMMON: RET_ReallyLR
+ ; COMMON: TCRETURNdi @outgoing_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0
tail call void @outgoing_args_fn(i32 %a)
ret void
}
+; Verify that we create frame indices for memory arguments in tail calls.
+; We get a bunch of copies here which are unused and thus eliminated. So, let's
+; just focus on what matters, which is that we get a G_FRAME_INDEX.
+declare void @outgoing_stack_args_fn(<4 x half>)
+define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) {
+ ; COMMON-LABEL: name: test_outgoing_stack_args
+ ; COMMON: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; COMMON: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1)
+ ; COMMON: $d0 = COPY [[LOAD]](<4 x s16>)
+ ; COMMON: TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0
+ tail call void @outgoing_stack_args_fn(<4 x half> %arg)
+ ret void
+}
+
+; Verify that we don't tail call when we cannot fit arguments on the caller's
+; stack.
+declare i32 @too_big_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i8 %c, i16 %s)
+define i32 @test_too_big_stack() {
+ ; COMMON-LABEL: name: test_too_big_stack
+ ; COMMON-NOT: TCRETURNdi
+ ; COMMON-NOT: TCRETURNri
+ ; COMMON: BL @too_big_stack
+ ; COMMON-DAG: RET_ReallyLR
+entry:
+ %call = tail call i32 @too_big_stack(i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i8 8, i16 9)
+ ret i32 %call
+}
+
; Right now, we don't want to tail call callees with nonvoid return types, since
; call lowering will insert COPYs after the call.
; TODO: Support this.
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
+; RUN: llc -global-isel < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
@t = weak global i32 ()* null
@x = external global i32, align 4
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH,GISELSLH --dump-input-on-failure
-; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,GISELNOSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
+; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR {
; CHECK-LABEL: f
; Make sure that for a tail call, taint doesn't get put into SP twice.
define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR {
; CHECK-LABEL: tail_caller:
-; NOGISELSLH: mov [[TMPREG:x[0-9]+]], sp
-; NOGISELSLH: and [[TMPREG]], [[TMPREG]], x16
-; NOGISELSLH: mov sp, [[TMPREG]]
-; NOGISELNOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp
-; NOGISELNOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16
-; NOGISELNOSLH-NOT: mov sp, [[TMPREG]]
-; GISELSLH: mov [[TMPREG:x[0-9]+]], sp
-; GISELSLH: and [[TMPREG]], [[TMPREG]], x16
-; GISELSLH: mov sp, [[TMPREG]]
-; GISELNOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp
-; GISELNOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16
-; GISELNOSLH-NOT: mov sp, [[TMPREG]]
-; GlobalISel doesn't optimize tail calls (yet?), so only check that
-; cross-call taint register setup code is missing if a tail call was
-; actually produced.
-; NOGISELSLH: b tail_callee
-; GISELSLH: bl tail_callee
-; GISELSLH: cmp sp, #0
+; SLH: mov [[TMPREG:x[0-9]+]], sp
+; SLH: and [[TMPREG]], [[TMPREG]], x16
+; SLH: mov sp, [[TMPREG]]
+; NOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp
+; NOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16
+; NOSLH-NOT: mov sp, [[TMPREG]]
+; SLH: b tail_callee
; SLH-NOT: cmp sp, #0
%call = tail call i32 @tail_callee(i32 %a)
ret i32 %call
; RUN: llc -relocation-model=static -verify-machineinstrs -O2 < %s | FileCheck %s
+; RUN: llc -relocation-model=static -verify-machineinstrs -global-isel -O2 < %s | FileCheck %s
; The call to function TestBar should be a tail call, when in C++ the string
; `ret` is RVO returned.
-; RUN: llc -mcpu=cyclone -debug-only=machine-scheduler < %s 2>&1 | FileCheck %s
+; RUN: llc -mcpu=cyclone -debug-only=machine-scheduler < %s 2>&1 | FileCheck %s --check-prefixes=COMMON,SDAG
+; RUN: llc -mcpu=cyclone -global-isel -debug-only=machine-scheduler < %s 2>&1 | FileCheck %s --check-prefixes=COMMON,GISEL
; REQUIRES: asserts
; PR23459 has a test case that we where miscompiling because of this at the
; time.
-; CHECK: Frame Objects
-; CHECK: fi#-4: {{.*}} fixed, at location [SP+8]
-; CHECK: fi#-3: {{.*}} fixed, at location [SP]
-; CHECK: fi#-2: {{.*}} fixed, at location [SP+8]
-; CHECK: fi#-1: {{.*}} fixed, at location [SP]
-
-; CHECK: [[VRA:%.*]]:gpr64 = LDRXui %fixed-stack.3
-; CHECK: [[VRB:%.*]]:gpr64 = LDRXui %fixed-stack.2
-; CHECK: STRXui %{{.*}}, %fixed-stack.0
-; CHECK: STRXui [[VRB]]{{[^,]*}}, %fixed-stack.1
+; COMMON: Frame Objects
+; COMMON: fi#-4: {{.*}} fixed, at location [SP+8]
+; COMMON: fi#-3: {{.*}} fixed, at location [SP]
+; COMMON: fi#-2: {{.*}} fixed, at location [SP+8]
+; COMMON: fi#-1: {{.*}} fixed, at location [SP]
+
+; The order that these appear in differes in GISel than SDAG, but the
+; dependency relationship still holds.
+; COMMON: [[VRA:%.*]]:gpr64 = LDRXui %fixed-stack.3
+; COMMON: [[VRB:%.*]]:gpr64 = LDRXui %fixed-stack.2
+; SDAG: STRXui %{{.*}}, %fixed-stack.0
+; SDAG: STRXui [[VRB]]{{[^,]*}}, %fixed-stack.1
+; GISEL: STRXui [[VRB]]{{[^,]*}}, %fixed-stack.1
+; GISEL: STRXui %{{.*}}, %fixed-stack.0
; Make sure that there is an dependence edge between fi#-2 and fi#-4.
; Without this edge the scheduler would be free to move the store accross the load.
-; CHECK: SU({{.*}}): [[VRB]]:gpr64 = LDRXui %fixed-stack.2
-; CHECK-NOT: SU
-; CHECK: Successors:
-; CHECK: SU([[DEPSTOREB:.*]]): Ord Latency=0
-; CHECK: SU([[DEPSTOREA:.*]]): Ord Latency=0
+; COMMON: SU({{.*}}): [[VRB]]:gpr64 = LDRXui %fixed-stack.2
+; COMMON-NOT: SU
+; COMMON: Successors:
+; COMMON: SU([[DEPSTOREB:.*]]): Ord Latency=0
+; COMMON: SU([[DEPSTOREA:.*]]): Ord Latency=0
+
+; GlobalISel outputs DEPSTOREB before DEPSTOREA, but the dependency relationship
+; still holds.
+; SDAG: SU([[DEPSTOREA]]): STRXui %{{.*}}, %fixed-stack.0
+; SDAG: SU([[DEPSTOREB]]): STRXui %{{.*}}, %fixed-stack.1
-; CHECK: SU([[DEPSTOREA]]): STRXui %{{.*}}, %fixed-stack.0
-; CHECK: SU([[DEPSTOREB]]): STRXui %{{.*}}, %fixed-stack.1
+; GISEL: SU([[DEPSTOREB]]): STRXui %{{.*}}, %fixed-stack.0
+; GISEL: SU([[DEPSTOREA]]): STRXui %{{.*}}, %fixed-stack.1