From 126b679a29fed151b0900636ff3e58ee37226a4f Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Wed, 25 Sep 2019 16:45:35 +0000 Subject: [PATCH] [AArch64][GlobalISel] Choose CCAssignFns per-argument for tail call lowering When checking for tail call eligibility, we should use the correct CCAssignFn for each argument, rather than just checking if the caller/callee is varargs or not. This is important for tail call lowering with varargs. If we don't check it, then basically any varargs callee with parameters cannot be tail called on Darwin, for one thing. If the parameters are all guaranteed to be in registers, this should be entirely safe. On top of that, not checking for this could potentially make it so that we have the wrong stack offsets when checking for tail call eligibility. Also refactor some of the stuff for CCAssignFnForCall and pull it out into a helper function. Update call-translator-tail-call.ll to show that we can now correctly tail call on Darwin. Also add two extra tail call checks. The first verifies that we still respect the caller's stack size, and the second verifies that we still don't tail call when a varargs function has a memory argument. Differential Revision: https://reviews.llvm.org/D67939 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372897 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/CodeGen/GlobalISel/CallLowering.h | 17 ++-- lib/CodeGen/GlobalISel/CallLowering.cpp | 18 +++-- lib/Target/AArch64/AArch64CallLowering.cpp | 50 ++++++++---- .../GlobalISel/call-translator-tail-call.ll | 77 +++++++++++++++---- 4 files changed, 118 insertions(+), 44 deletions(-) diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h index 65c527b96ca..dddb267f0c6 100644 --- a/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -220,7 +220,8 @@ protected: /// /// Used to check if arguments are suitable for tail call lowering. bool analyzeArgInfo(CCState &CCState, SmallVectorImpl &Args, - CCAssignFn &Fn) const; + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const; /// \returns True if the calling convention for a callee and its caller pass /// results in the same way. Typically used for tail call eligibility checks. @@ -228,12 +229,18 @@ protected: /// \p Info is the CallLoweringInfo for the call. /// \p MF is the MachineFunction for the caller. /// \p InArgs contains the results of the call. - /// \p CalleeAssignFn is the CCAssignFn to be used for the callee. - /// \p CallerAssignFn is the CCAssignFn to be used for the caller. + /// \p CalleeAssignFnFixed is the CCAssignFn to be used for the callee for + /// fixed arguments. + /// \p CalleeAssignFnVarArg is similar, but for varargs. + /// \p CallerAssignFnFixed is the CCAssignFn to be used for the caller for + /// fixed arguments. + /// \p CallerAssignFnVarArg is similar, but for varargs. bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl &InArgs, - CCAssignFn &CalleeAssignFn, - CCAssignFn &CallerAssignFn) const; + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const; public: CallLowering(const TargetLowering *TLI) : TLI(TLI) {} diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index 9280c9efc93..d05e2091c71 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -379,10 +379,12 @@ bool CallLowering::handleAssignments(CCState &CCInfo, } bool CallLowering::analyzeArgInfo(CCState &CCState, - SmallVectorImpl &Args, - CCAssignFn &Fn) const { + SmallVectorImpl &Args, + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const { for (unsigned i = 0, e = Args.size(); i < e; ++i) { MVT VT = MVT::getVT(Args[i].Ty); + CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg; if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) { // Bail out on anything we can't handle. LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString() @@ -396,8 +398,10 @@ bool CallLowering::analyzeArgInfo(CCState &CCState, bool CallLowering::resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl &InArgs, - CCAssignFn &CalleeAssignFn, - CCAssignFn &CallerAssignFn) const { + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const { const Function &F = MF.getFunction(); CallingConv::ID CalleeCC = Info.CallConv; CallingConv::ID CallerCC = F.getCallingConv(); @@ -407,12 +411,14 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info, SmallVector ArgLocs1; CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext()); - if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFn)) + if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed, + CalleeAssignFnVarArg)) return false; SmallVector ArgLocs2; CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext()); - if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFn)) + if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed, + CalleeAssignFnVarArg)) return false; // We need the argument locations to match up exactly. If there's more in diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp index a4f2bac8d60..9f1945cbc31 100644 --- a/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/lib/Target/AArch64/AArch64CallLowering.cpp @@ -464,6 +464,13 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { } } +/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for +/// CC. +static std::pair +getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { + return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)}; +} + bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl &InArgs) const { @@ -477,11 +484,19 @@ bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( // Check if the caller and callee will handle arguments in the same way. const AArch64TargetLowering &TLI = *getTLI(); - CCAssignFn *CalleeAssignFn = TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg); - CCAssignFn *CallerAssignFn = - TLI.CCAssignFnForCall(CallerCC, CallerF.isVarArg()); - - if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFn, *CallerAssignFn)) + CCAssignFn *CalleeAssignFnFixed; + CCAssignFn *CalleeAssignFnVarArg; + std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) = + getAssignFnsForCC(CalleeCC, TLI); + + CCAssignFn *CallerAssignFnFixed; + CCAssignFn *CallerAssignFnVarArg; + std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) = + getAssignFnsForCC(CallerCC, TLI); + + if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed, + *CalleeAssignFnVarArg, *CallerAssignFnFixed, + *CallerAssignFnVarArg)) return false; // Make sure that the caller and callee preserve all of the same registers. @@ -508,12 +523,15 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( CallingConv::ID CallerCC = CallerF.getCallingConv(); const AArch64TargetLowering &TLI = *getTLI(); + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); + // We have outgoing arguments. Make sure that we can tail call with them. SmallVector OutLocs; CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext()); - if (!analyzeArgInfo(OutInfo, OutArgs, - *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg))) { + if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) { LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); return false; } @@ -741,10 +759,9 @@ bool AArch64CallLowering::lowerTailCall( // Find out which ABI gets to decide where things go. CallingConv::ID CalleeCC = Info.CallConv; - CCAssignFn *AssignFnFixed = - TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false); - CCAssignFn *AssignFnVarArg = - TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/true); + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); MachineInstrBuilder CallSeqStart; if (!IsSibCall) @@ -787,8 +804,7 @@ bool AArch64CallLowering::lowerTailCall( unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); SmallVector OutLocs; CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); - analyzeArgInfo(OutInfo, OutArgs, - *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg)); + analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg); // The callee will pop the argument stack as a tail call. Thus, we must // keep it 16-byte aligned. @@ -879,10 +895,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return lowerTailCall(MIRBuilder, Info, OutArgs); // Find out which ABI gets to decide where things go. - CCAssignFn *AssignFnFixed = - TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/false); - CCAssignFn *AssignFnVarArg = - TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true); + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = + getAssignFnsForCC(Info.CallConv, TLI); MachineInstrBuilder CallSeqStart; CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll index 78ab27e1039..115fa4fb2a3 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll @@ -77,32 +77,77 @@ define i32 @test_nonvoid_ret() { declare void @varargs(i32, double, i64, ...) define void @test_varargs() { - ; On Darwin, everything is passed on the stack. Since the caller has no stack, - ; we don't tail call. - ; DARWIN-LABEL: name: test_varargs - ; DARWIN: bb.1 (%ir-block.0): - ; DARWIN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; DARWIN: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; DARWIN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; DARWIN: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; DARWIN: $w0 = COPY [[C]](s32) - ; DARWIN: $d0 = COPY [[C1]](s64) - ; DARWIN: $x1 = COPY [[C2]](s64) + ; COMMON-LABEL: name: test_varargs + ; COMMON: bb.1 (%ir-block.0): + ; COMMON: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; COMMON: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; COMMON: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; COMMON: $w0 = COPY [[C]](s32) + ; COMMON: $d0 = COPY [[C1]](s64) + ; COMMON: $x1 = COPY [[C2]](s64) + ; COMMON: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1 + tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12) + ret void +} + +; Darwin should not tail call here, because the last parameter to @varargs is +; not fixed. So, it's passed on the stack, which will make us not fit. On +; Windows, it's passed in a register, so it's safe to tail call. +define void @test_varargs_2() { + ; DARWIN-LABEL: name: test_varargs_2 + ; DARWIN-NOT: TCRETURNdi @varargs ; DARWIN: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 - ; DARWIN: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; DARWIN: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp ; DARWIN: RET_ReallyLR - ; Windows uses registers, so we don't need to worry about using the stack. - ; WINDOWS-LABEL: name: test_varargs + ; WINDOWS-LABEL: name: test_varargs_2 ; WINDOWS: bb.1 (%ir-block.0): ; WINDOWS: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; WINDOWS: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; WINDOWS: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314 ; WINDOWS: $w0 = COPY [[C]](s32) ; WINDOWS: $d0 = COPY [[C1]](s64) ; WINDOWS: $x1 = COPY [[C2]](s64) - ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1 - tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12) + ; WINDOWS: $x2 = COPY [[C3]](s64) + ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $x2 + tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314) + ret void +} + +; Same deal here, even though we have enough room to fit. On Darwin, we'll pass +; the last argument to @varargs on the stack. We don't allow tail calling +; varargs arguments that are on the stack. +define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) { + ; DARWIN-LABEL: name: test_varargs_3 + ; DARWIN-NOT: TCRETURNdi @varargs + ; DARWIN: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 + ; DARWIN: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp + ; DARWIN: RET_ReallyLR + + ; WINDOWS-LABEL: name: test_varargs_3 + ; WINDOWS: bb.1 (%ir-block.1): + ; WINDOWS: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 + ; WINDOWS: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; WINDOWS: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; WINDOWS: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 + ; WINDOWS: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 + ; WINDOWS: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4 + ; WINDOWS: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5 + ; WINDOWS: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 + ; WINDOWS: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 + ; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1) + ; WINDOWS: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; WINDOWS: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; WINDOWS: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314 + ; WINDOWS: $w0 = COPY [[C]](s32) + ; WINDOWS: $d0 = COPY [[C1]](s64) + ; WINDOWS: $x1 = COPY [[C2]](s64) + ; WINDOWS: $x2 = COPY [[C3]](s64) + ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $x2 + tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314) ret void } -- 2.50.1