From fec0c652bf5ecc3af308d743b06b48da2906cf5b Mon Sep 17 00:00:00 2001 From: Martin Storsjo Date: Thu, 13 Jul 2017 17:03:12 +0000 Subject: [PATCH] [AArch64] Implement support for windows style vararg functions Pass parameters properly in calls to such functions (pass all floats in integer registers), and handle va_start properly (allocate stack immediately below the arguments on the stack, to save the register arguments into a single continuous array). Differential Revision: https://reviews.llvm.org/D35006 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307928 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/AArch64CallingConvention.td | 7 ++ lib/Target/AArch64/AArch64FastISel.cpp | 1 + lib/Target/AArch64/AArch64FrameLowering.cpp | 11 ++- lib/Target/AArch64/AArch64ISelLowering.cpp | 45 +++++++-- lib/Target/AArch64/AArch64ISelLowering.h | 1 + test/CodeGen/AArch64/win64_vararg.ll | 95 +++++++++++++++++++ 6 files changed, 152 insertions(+), 8 deletions(-) create mode 100644 test/CodeGen/AArch64/win64_vararg.ll diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index 938779d2369..291bc5ea858 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -118,6 +118,13 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> ]>; +// Vararg functions on windows pass floats in integer registers +def CC_AArch64_Win64_VarArg : CallingConv<[ + CCIfType<[f16, f32], CCPromoteToType>, + CCIfType<[f64], CCBitConvertToType>, + CCDelegateTo +]>; + // Darwin uses a calling convention which differs in only two ways // from the standard one at this level: diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 3682b62d2b8..97396057dce 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -5138,6 +5138,7 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { return selectOperator(I, I->getOpcode()); // Silence warnings. (void)&CC_AArch64_DarwinPCS_VarArg; + (void)&CC_AArch64_Win64_VarArg; } namespace llvm { diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index e96ee7d29b3..2436c4eb76f 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -41,6 +41,10 @@ // | | // |-----------------------------------| // | | +// | (Win64 only) varargs from reg | +// | | +// |-----------------------------------| +// | | // | prev_fp, prev_lr | // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) @@ -950,7 +954,12 @@ static void computeCalleeSaveRegisterPairs( CC == CallingConv::PreserveMost || (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); - unsigned Offset = AFI->getCalleeSavedStackSize(); + int Offset = AFI->getCalleeSavedStackSize(); + + unsigned GPRSaveSize = AFI->getVarArgsGPRSize(); + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + if (Subtarget.isTargetWindows()) + Offset -= alignTo(GPRSaveSize, 16); for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index bce87c0744b..640c4b0df90 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2650,6 +2650,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::PreserveMost: case CallingConv::CXX_FAST_TLS: case CallingConv::Swift: + if (Subtarget->isTargetWindows() && IsVarArg) + return CC_AArch64_Win64_VarArg; if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; @@ -2828,6 +2830,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // The AAPCS variadic function ABI is identical to the non-variadic // one. As a result there may be more arguments in registers and we should // save them for future reference. + // Win64 variadic functions also pass arguments in registers, but all float + // arguments are passed in integer registers. saveVarArgRegisters(CCInfo, DAG, DL, Chain); } @@ -2881,7 +2885,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); int GPRIdx = 0; if (GPRSaveSize != 0) { - GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); + if (Subtarget->isTargetWindows()) + GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false); + else + GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT); @@ -2890,7 +2897,11 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); SDValue Store = DAG.getStore( Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8)); + Subtarget->isTargetWindows() + ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + GPRIdx, + (i - FirstVariadicGPR) * 8) + : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8)); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT)); @@ -2899,7 +2910,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, FuncInfo->setVarArgsGPRIndex(GPRIdx); FuncInfo->setVarArgsGPRSize(GPRSaveSize); - if (Subtarget->hasFPARMv8()) { + if (Subtarget->hasFPARMv8() && !Subtarget->isTargetWindows()) { static const MCPhysReg FPRArgRegs[] = { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; @@ -4491,6 +4502,21 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, MachinePointerInfo(SV)); } +SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op, + SelectionDAG &DAG) const { + AArch64FunctionInfo *FuncInfo = + DAG.getMachineFunction().getInfo(); + + SDLoc DL(Op); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0 + ? FuncInfo->getVarArgsGPRIndex() + : FuncInfo->getVarArgsStackIndex(), + getPointerTy(DAG.getDataLayout())); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), + MachinePointerInfo(SV)); +} + SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const { // The layout of the va_list struct is specified in the AArch64 Procedure Call @@ -4562,8 +4588,12 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { - return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG) - : LowerAAPCS_VASTART(Op, DAG); + if (Subtarget->isTargetWindows()) + return LowerWin64_VASTART(Op, DAG); + else if (Subtarget->isTargetDarwin()) + return LowerDarwin_VASTART(Op, DAG); + else + return LowerAAPCS_VASTART(Op, DAG); } SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, @@ -4571,7 +4601,8 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single // pointer. SDLoc DL(Op); - unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32; + unsigned VaListSize = + Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32; const Value *DestSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); @@ -10780,7 +10811,7 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { unsigned AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const { - if (Subtarget->isTargetDarwin()) + if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) return getPointerTy(DL).getSizeInBits(); return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32; diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index ecc2517fb28..69a1ac2a5e9 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -541,6 +541,7 @@ private: SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/AArch64/win64_vararg.ll b/test/CodeGen/AArch64/win64_vararg.ll new file mode 100644 index 00000000000..b760e4acd16 --- /dev/null +++ b/test/CodeGen/AArch64/win64_vararg.ll @@ -0,0 +1,95 @@ +; RUN: llc < %s -mtriple=aarch64-pc-win32 | FileCheck %s + +define void @pass_va(i32 %count, ...) nounwind { +entry: +; CHECK: sub sp, sp, #80 +; CHECK: add x8, sp, #24 +; CHECK: add x0, sp, #24 +; CHECK: stp x6, x7, [sp, #64] +; CHECK: stp x4, x5, [sp, #48] +; CHECK: stp x2, x3, [sp, #32] +; CHECK: str x1, [sp, #24] +; CHECK: stp x30, x8, [sp] +; CHECK: bl other_func +; CHECK: ldr x30, [sp], #80 +; CHECK: ret + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %ap2 = load i8*, i8** %ap, align 8 + call void @other_func(i8* %ap2) + ret void +} + +declare void @other_func(i8*) local_unnamed_addr + +declare void @llvm.va_start(i8*) nounwind +declare void @llvm.va_copy(i8*, i8*) nounwind + +; CHECK-LABEL: f9: +; CHECK: sub sp, sp, #16 +; CHECK: add x8, sp, #24 +; CHECK: add x0, sp, #24 +; CHECK: str x8, [sp, #8] +; CHECK: add sp, sp, #16 +; CHECK: ret +define i8* @f9(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %ap2 = load i8*, i8** %ap, align 8 + ret i8* %ap2 +} + +; CHECK-LABEL: f8: +; CHECK: sub sp, sp, #16 +; CHECK: add x8, sp, #16 +; CHECK: add x0, sp, #16 +; CHECK: str x8, [sp, #8] +; CHECK: add sp, sp, #16 +; CHECK: ret +define i8* @f8(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %ap2 = load i8*, i8** %ap, align 8 + ret i8* %ap2 +} + +; CHECK-LABEL: f7: +; CHECK: sub sp, sp, #16 +; CHECK: add x8, sp, #8 +; CHECK: add x0, sp, #8 +; CHECK: stp x8, x7, [sp], #16 +; CHECK: ret +define i8* @f7(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %ap2 = load i8*, i8** %ap, align 8 + ret i8* %ap2 +} + +; CHECK-LABEL: copy1: +; CHECK: sub sp, sp, #80 +; CHECK: add x8, sp, #24 +; CHECK: stp x6, x7, [sp, #64] +; CHECK: stp x4, x5, [sp, #48] +; CHECK: stp x2, x3, [sp, #32] +; CHECK: stp x8, x1, [sp, #16] +; CHECK: str x8, [sp, #8] +; CHECK: add sp, sp, #80 +; CHECK: ret +define void @copy1(i64 %a0, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %cp = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + %cp1 = bitcast i8** %cp to i8* + call void @llvm.va_start(i8* %ap1) + call void @llvm.va_copy(i8* %cp1, i8* %ap1) + ret void +} -- 2.40.0