[AArch64] Implement support for windows style vararg functions

author Martin Storsjo <martin@martin.st>

Thu, 13 Jul 2017 17:03:12 +0000 (17:03 +0000)

committer Martin Storsjo <martin@martin.st>

Thu, 13 Jul 2017 17:03:12 +0000 (17:03 +0000)
author Martin Storsjo <martin@martin.st>
Thu, 13 Jul 2017 17:03:12 +0000 (17:03 +0000)
committer Martin Storsjo <martin@martin.st>
Thu, 13 Jul 2017 17:03:12 +0000 (17:03 +0000)
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td

index 938779d23690dbaedadc61fe2b8533ea8f49bdef..291bc5ea858e363ae711b51cf6fb624c1f3d6b1d 100644 (file)
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -118,6 +118,13 @@ def RetCC_AArch64_AAPCS : CallingConv<[
        CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
  ]>;
  
+// Vararg functions on windows pass floats in integer registers
+def CC_AArch64_Win64_VarArg : CallingConv<[
+  CCIfType<[f16, f32],    CCPromoteToType<f64>>,
+  CCIfType<[f64], CCBitConvertToType<i64>>,
+  CCDelegateTo<CC_AArch64_AAPCS>
+]>;
+
  
  // Darwin uses a calling convention which differs in only two ways
  // from the standard one at this level:
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp

index 3682b62d2b84d3fbed41a97533dfe561f6739ab0..97396057dce078a27a04bd98da184e8bdf4e142b 100644 (file)
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -5138,6 +5138,7 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
    return selectOperator(I, I->getOpcode());
    // Silence warnings.
    (void)&CC_AArch64_DarwinPCS_VarArg;
+  (void)&CC_AArch64_Win64_VarArg;
  }
  
  namespace llvm {
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp

index e96ee7d29b3e85e79eee286c15847aa54f55b274..2436c4eb76fb3e14474f9bfae631730d568020a2 100644 (file)
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -41,6 +41,10 @@
  // |                                   |
  // |-----------------------------------|
  // |                                   |
+// | (Win64 only) varargs from reg     |
+// |                                   |
+// |-----------------------------------|
+// |                                   |
  // | prev_fp, prev_lr                  |
  // | (a.k.a. "frame record")           |
  // |-----------------------------------| <- fp(=x29)
@@ -950,7 +954,12 @@ static void computeCalleeSaveRegisterPairs(
            CC == CallingConv::PreserveMost ||
            (Count & 1) == 0) &&
           "Odd number of callee-saved regs to spill!");
-  unsigned Offset = AFI->getCalleeSavedStackSize();
+  int Offset = AFI->getCalleeSavedStackSize();
+
+  unsigned GPRSaveSize = AFI->getVarArgsGPRSize();
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  if (Subtarget.isTargetWindows())
+    Offset -= alignTo(GPRSaveSize, 16);
  
    for (unsigned i = 0; i < Count; ++i) {
      RegPairInfo RPI;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index bce87c0744b9a53c1b4b839d8cf5c2661ad0f47b..640c4b0df901bca7072500aefff942d2e6077052 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2650,6 +2650,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
    case CallingConv::PreserveMost:
    case CallingConv::CXX_FAST_TLS:
    case CallingConv::Swift:
+    if (Subtarget->isTargetWindows() && IsVarArg)
+      return CC_AArch64_Win64_VarArg;
      if (!Subtarget->isTargetDarwin())
        return CC_AArch64_AAPCS;
      return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
@@ -2828,6 +2830,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
        // The AAPCS variadic function ABI is identical to the non-variadic
        // one. As a result there may be more arguments in registers and we should
        // save them for future reference.
+      // Win64 variadic functions also pass arguments in registers, but all float
+      // arguments are passed in integer registers.
        saveVarArgRegisters(CCInfo, DAG, DL, Chain);
      }
  
@@ -2881,7 +2885,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
    unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
    int GPRIdx = 0;
    if (GPRSaveSize != 0) {
-    GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
+    if (Subtarget->isTargetWindows())
+      GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
+    else
+      GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
  
      SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
  
@@ -2890,7 +2897,11 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
        SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
        SDValue Store = DAG.getStore(
            Val.getValue(1), DL, Val, FIN,
-          MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
+          Subtarget->isTargetWindows()
+              ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+                                                  GPRIdx,
+                                                  (i - FirstVariadicGPR) * 8)
+              : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
        MemOps.push_back(Store);
        FIN =
            DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
@@ -2899,7 +2910,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
    FuncInfo->setVarArgsGPRIndex(GPRIdx);
    FuncInfo->setVarArgsGPRSize(GPRSaveSize);
  
-  if (Subtarget->hasFPARMv8()) {
+  if (Subtarget->hasFPARMv8() && !Subtarget->isTargetWindows()) {
      static const MCPhysReg FPRArgRegs[] = {
          AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
          AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
@@ -4491,6 +4502,21 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
                        MachinePointerInfo(SV));
  }
  
+SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  AArch64FunctionInfo *FuncInfo =
+      DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
+
+  SDLoc DL(Op);
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
+                                     ? FuncInfo->getVarArgsGPRIndex()
+                                     : FuncInfo->getVarArgsStackIndex(),
+                                 getPointerTy(DAG.getDataLayout()));
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+                      MachinePointerInfo(SV));
+}
+
  SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
                                                  SelectionDAG &DAG) const {
    // The layout of the va_list struct is specified in the AArch64 Procedure Call
@@ -4562,8 +4588,12 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
  
  SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
                                              SelectionDAG &DAG) const {
-  return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG)
-                                     : LowerAAPCS_VASTART(Op, DAG);
+  if (Subtarget->isTargetWindows())
+    return LowerWin64_VASTART(Op, DAG);
+  else if (Subtarget->isTargetDarwin())
+    return LowerDarwin_VASTART(Op, DAG);
+  else
+    return LowerAAPCS_VASTART(Op, DAG);
  }
  
  SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
@@ -4571,7 +4601,8 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
    // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
    // pointer.
    SDLoc DL(Op);
-  unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
+  unsigned VaListSize =
+      Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
    const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
    const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
  
@@ -10780,7 +10811,7 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
  
  unsigned
  AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
-  if (Subtarget->isTargetDarwin())
+  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
      return getPointerTy(DL).getSizeInBits();
  
    return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h

index ecc2517fb288dfbe410aa2aef2ef0b937dc265c0..69a1ac2a5e989c7ac2d5a06a176b1f585716078c 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -541,6 +541,7 @@ private:
    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
diff --git a/test/CodeGen/AArch64/win64_vararg.ll b/test/CodeGen/AArch64/win64_vararg.ll

new file mode 100644 (file)

index 0000000..b760e4a
--- /dev/null
+++ b/test/CodeGen/AArch64/win64_vararg.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mtriple=aarch64-pc-win32 | FileCheck %s
+
+define void @pass_va(i32 %count, ...) nounwind {
+entry:
+; CHECK: sub     sp, sp, #80
+; CHECK: add     x8, sp, #24
+; CHECK: add     x0, sp, #24
+; CHECK: stp     x6, x7, [sp, #64]
+; CHECK: stp     x4, x5, [sp, #48]
+; CHECK: stp     x2, x3, [sp, #32]
+; CHECK: str     x1, [sp, #24]
+; CHECK: stp     x30, x8, [sp]
+; CHECK: bl      other_func
+; CHECK: ldr     x30, [sp], #80
+; CHECK: ret
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %ap2 = load i8*, i8** %ap, align 8
+  call void @other_func(i8* %ap2)
+  ret void
+}
+
+declare void @other_func(i8*) local_unnamed_addr
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_copy(i8*, i8*) nounwind
+
+; CHECK-LABEL: f9:
+; CHECK: sub     sp, sp, #16
+; CHECK: add     x8, sp, #24
+; CHECK: add     x0, sp, #24
+; CHECK: str     x8, [sp, #8]
+; CHECK: add     sp, sp, #16
+; CHECK: ret
+define i8* @f9(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %ap2 = load i8*, i8** %ap, align 8
+  ret i8* %ap2
+}
+
+; CHECK-LABEL: f8:
+; CHECK: sub     sp, sp, #16
+; CHECK: add     x8, sp, #16
+; CHECK: add     x0, sp, #16
+; CHECK: str     x8, [sp, #8]
+; CHECK: add     sp, sp, #16
+; CHECK: ret
+define i8* @f8(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %ap2 = load i8*, i8** %ap, align 8
+  ret i8* %ap2
+}
+
+; CHECK-LABEL: f7:
+; CHECK: sub     sp, sp, #16
+; CHECK: add     x8, sp, #8
+; CHECK: add     x0, sp, #8
+; CHECK: stp     x8, x7, [sp], #16
+; CHECK: ret
+define i8* @f7(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %ap2 = load i8*, i8** %ap, align 8
+  ret i8* %ap2
+}
+
+; CHECK-LABEL: copy1:
+; CHECK: sub     sp, sp, #80
+; CHECK: add     x8, sp, #24
+; CHECK: stp     x6, x7, [sp, #64]
+; CHECK: stp     x4, x5, [sp, #48]
+; CHECK: stp     x2, x3, [sp, #32]
+; CHECK: stp     x8, x1, [sp, #16]
+; CHECK: str     x8, [sp, #8]
+; CHECK: add     sp, sp, #80
+; CHECK: ret
+define void @copy1(i64 %a0, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %cp = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  %cp1 = bitcast i8** %cp to i8*
+  call void @llvm.va_start(i8* %ap1)
+  call void @llvm.va_copy(i8* %cp1, i8* %ap1)
+  ret void
+}
author	Martin Storsjo <martin@martin.st>
	Thu, 13 Jul 2017 17:03:12 +0000 (17:03 +0000)
committer	Martin Storsjo <martin@martin.st>
	Thu, 13 Jul 2017 17:03:12 +0000 (17:03 +0000)
lib/Target/AArch64/AArch64CallingConvention.td		patch \| blob \| history
lib/Target/AArch64/AArch64FastISel.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64FrameLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.h		patch \| blob \| history
test/CodeGen/AArch64/win64_vararg.ll	[new file with mode: 0644]	patch \| blob