Implement IRGen for the x86 vectorcall convention

author Reid Kleckner <reid@kleckner.net>

Fri, 31 Oct 2014 22:00:51 +0000 (22:00 +0000)

committer Reid Kleckner <reid@kleckner.net>

Fri, 31 Oct 2014 22:00:51 +0000 (22:00 +0000)
author Reid Kleckner <reid@kleckner.net>
Fri, 31 Oct 2014 22:00:51 +0000 (22:00 +0000)
committer Reid Kleckner <reid@kleckner.net>
Fri, 31 Oct 2014 22:00:51 +0000 (22:00 +0000)
diff --git a/lib/AST/Mangle.cpp b/lib/AST/Mangle.cpp

index b5dbdca9c97621dbc604efdba8c66a198abef7a8..ae6c5f296d7bfc9233ab860803795df324b00953 100644 (file)
--- a/lib/AST/Mangle.cpp
+++ b/lib/AST/Mangle.cpp
@@ -49,10 +49,11 @@ static void mangleFunctionBlock(MangleContext &Context,
  
  void MangleContext::anchor() { }
  
-enum StdOrFastCC {
-  SOF_OTHER,
-  SOF_FAST,
-  SOF_STD
+enum CCMangling {
+  CCM_Other,
+  CCM_Fast,
+  CCM_Vector,
+  CCM_Std
  };
  
  static bool isExternC(const NamedDecl *ND) {
@@ -61,20 +62,22 @@ static bool isExternC(const NamedDecl *ND) {
    return cast<VarDecl>(ND)->isExternC();
  }
  
-static StdOrFastCC getStdOrFastCallMangling(const ASTContext &Context,
-                                            const NamedDecl *ND) {
+static CCMangling getCallingConvMangling(const ASTContext &Context,
+                                         const NamedDecl *ND) {
    const TargetInfo &TI = Context.getTargetInfo();
    const llvm::Triple &Triple = TI.getTriple();
-  if (!Triple.isOSWindows() || Triple.getArch() != llvm::Triple::x86)
-    return SOF_OTHER;
+  if (!Triple.isOSWindows() ||
+      !(Triple.getArch() == llvm::Triple::x86 ||
+        Triple.getArch() == llvm::Triple::x86_64))
+    return CCM_Other;
  
    if (Context.getLangOpts().CPlusPlus && !isExternC(ND) &&
        TI.getCXXABI() == TargetCXXABI::Microsoft)
-    return SOF_OTHER;
+    return CCM_Other;
  
    const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND);
    if (!FD)
-    return SOF_OTHER;
+    return CCM_Other;
    QualType T = FD->getType();
  
    const FunctionType *FT = T->castAs<FunctionType>();
@@ -82,19 +85,21 @@ static StdOrFastCC getStdOrFastCallMangling(const ASTContext &Context,
    CallingConv CC = FT->getCallConv();
    switch (CC) {
    default:
-    return SOF_OTHER;
+    return CCM_Other;
    case CC_X86FastCall:
-    return SOF_FAST;
+    return CCM_Fast;
    case CC_X86StdCall:
-    return SOF_STD;
+    return CCM_Std;
+  case CC_X86VectorCall:
+    return CCM_Vector;
    }
  }
  
  bool MangleContext::shouldMangleDeclName(const NamedDecl *D) {
    const ASTContext &ASTContext = getASTContext();
  
-  StdOrFastCC CC = getStdOrFastCallMangling(ASTContext, D);
-  if (CC != SOF_OTHER)
+  CCMangling CC = getCallingConvMangling(ASTContext, D);
+  if (CC != CCM_Other)
      return true;
  
    // In C, functions with no attributes never need to be mangled. Fastpath them.
@@ -131,10 +136,10 @@ void MangleContext::mangleName(const NamedDecl *D, raw_ostream &Out) {
    }
  
    const ASTContext &ASTContext = getASTContext();
-  StdOrFastCC CC = getStdOrFastCallMangling(ASTContext, D);
+  CCMangling CC = getCallingConvMangling(ASTContext, D);
    bool MCXX = shouldMangleCXXName(D);
    const TargetInfo &TI = Context.getTargetInfo();
-  if (CC == SOF_OTHER || (MCXX && TI.getCXXABI() == TargetCXXABI::Microsoft)) {
+  if (CC == CCM_Other || (MCXX && TI.getCXXABI() == TargetCXXABI::Microsoft)) {
      if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D))
        mangleObjCMethodName(OMD, Out);
      else
@@ -143,9 +148,9 @@ void MangleContext::mangleName(const NamedDecl *D, raw_ostream &Out) {
    }
  
    Out << '\01';
-  if (CC == SOF_STD)
+  if (CC == CCM_Std)
      Out << '_';
-  else
+  else if (CC == CCM_Fast)
      Out << '@';
  
    if (!MCXX)
@@ -158,6 +163,8 @@ void MangleContext::mangleName(const NamedDecl *D, raw_ostream &Out) {
    const FunctionDecl *FD = cast<FunctionDecl>(D);
    const FunctionType *FT = FD->getType()->castAs<FunctionType>();
    const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(FT);
+  if (CC == CCM_Vector)
+    Out << '@';
    Out << '@';
    if (!Proto) {
      Out << '0';
@@ -169,9 +176,11 @@ void MangleContext::mangleName(const NamedDecl *D, raw_ostream &Out) {
      if (!MD->isStatic())
        ++ArgWords;
    for (const auto &AT : Proto->param_types())
-    // Size should be aligned to DWORD boundary
-    ArgWords += llvm::RoundUpToAlignment(ASTContext.getTypeSize(AT), 32) / 32;
-  Out << 4 * ArgWords;
+    // Size should be aligned to pointer size.
+    ArgWords += llvm::RoundUpToAlignment(ASTContext.getTypeSize(AT),
+                                         TI.getPointerWidth(0)) /
+                TI.getPointerWidth(0);
+  Out << ((TI.getPointerWidth(0) / 8) * ArgWords);
  }
  
  void MangleContext::mangleGlobalBlock(const BlockDecl *BD,
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp

index 587b13fe4126aff48b43ba86a91132aed00d55b7..c9c05b845dda62e54d02886aedb113e494f3c6ec 100644 (file)
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -3503,6 +3503,7 @@ public:
  
    CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
      return (CC == CC_C ||
+            CC == CC_X86VectorCall ||
              CC == CC_IntelOclBicc ||
              CC == CC_X86_64Win64) ? CCCR_OK : CCCR_Warning;
    }
@@ -3542,6 +3543,7 @@ public:
    }
    CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
      return (CC == CC_C ||
+            CC == CC_X86VectorCall ||
              CC == CC_IntelOclBicc ||
              CC == CC_X86_64SysV) ? CCCR_OK : CCCR_Warning;
    }
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp

index b31cdf3a909001f3a6a41294d4bc74dc34187f7e..564a754a81ccfdefced3f55f6e8f70e61d3bf0cd 100644 (file)
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -50,7 +50,7 @@ static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) {
    // TODO: Add support for __pascal to LLVM.
    case CC_X86Pascal: return llvm::CallingConv::C;
    // TODO: Add support for __vectorcall to LLVM.
-  case CC_X86VectorCall: return llvm::CallingConv::C;
+  case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
    }
  }
  
@@ -603,6 +603,9 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
        CharUnits UnionSize = CharUnits::Zero();
  
        for (const auto *FD : RD->fields()) {
+        // Skip zero length bitfields.
+        if (FD->isBitField() && FD->getBitWidthValue(Context) == 0)
+          continue;
          assert(!FD->isBitField() &&
                 "Cannot expand structure with bit-field members.");
          CharUnits FieldSize = Context.getTypeSizeInChars(FD->getType());
@@ -622,6 +625,9 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
        }
  
        for (const auto *FD : RD->fields()) {
+        // Skip zero length bitfields.
+        if (FD->isBitField() && FD->getBitWidthValue(Context) == 0)
+          continue;
          assert(!FD->isBitField() &&
                 "Cannot expand structure with bit-field members.");
          Fields.push_back(FD);
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp

index 49907c965d3bacce0822a8ca179ccb7783a422b4..428c7b11a4702fe67deadcc208aad466e071e7e8 100644 (file)
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -617,8 +617,15 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
      if (RD->hasNonTrivialCopyConstructor())
        return RAA_Indirect;
  
-    // Win64 passes objects larger than 8 bytes indirectly.
-    if (getContext().getTypeSize(RD->getTypeForDecl()) > 64)
+    // If an object has a destructor, we'd really like to pass it indirectly
+    // because it allows us to elide copies.  Unfortunately, MSVC makes that
+    // impossible for small types, which it will pass in a single register or
+    // stack slot. Most objects with dtors are large-ish, so handle that early.
+    // We can't call out all large objects as being indirect because there are
+    // multiple x64 calling conventions and the C++ ABI code shouldn't dictate
+    // how we pass large POD types.
+    if (RD->hasNonTrivialDestructor() &&
+        getContext().getTypeSize(RD->getTypeForDecl()) > 64)
        return RAA_Indirect;
  
      // We have a trivial copy constructor or no copy constructors, but we have
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp

index ed9e83fda3b15739f684cc9103207f307d78de38..c776db61049a5d75dad093342058b5489034426d 100644 (file)
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -508,18 +508,39 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
    return Ty;
  }
  
+/// Returns true if this type can be passed in SSE registers with the
+/// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
+static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
+  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+    if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half)
+      return true;
+  } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
+    // registers specially.
+    unsigned VecSize = Context.getTypeSize(VT);
+    if (VecSize == 128 || VecSize == 256 || VecSize == 512)
+      return true;
+  }
+  return false;
+}
+
+/// Returns true if this aggregate is small enough to be passed in SSE registers
+/// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64.
+static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) {
+  return NumMembers <= 4;
+}
+
  //===----------------------------------------------------------------------===//
  // X86-32 ABI Implementation
  //===----------------------------------------------------------------------===//
  
  /// \brief Similar to llvm::CCState, but for Clang.
  struct CCState {
-  CCState(unsigned CC) : CC(CC), FreeRegs(0) {}
+  CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {}
  
    unsigned CC;
    unsigned FreeRegs;
-  unsigned StackOffset;
-  bool UseInAlloca;
+  unsigned FreeSSERegs;
  };
  
  /// X86_32ABIInfo - The X86-32 ABI information.
@@ -540,6 +561,17 @@ class X86_32ABIInfo : public ABIInfo {
      return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
    }
  
+  bool isHomogeneousAggregateBaseType(QualType Ty) const override {
+    // FIXME: Assumes vectorcall is in use.
+    return isX86VectorTypeForVectorCall(getContext(), Ty);
+  }
+
+  bool isHomogeneousAggregateSmallEnough(const Type *Ty,
+                                         uint64_t NumMembers) const override {
+    // FIXME: Assumes vectorcall is in use.
+    return isX86VectorCallAggregateSmallEnough(NumMembers);
+  }
+
    bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;
  
    /// getIndirectResult - Give a source type \arg Ty, return a suitable result
@@ -767,6 +799,14 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, CCState &State) con
    if (RetTy->isVoidType())
      return ABIArgInfo::getIgnore();
  
+  const Type *Base = nullptr;
+  uint64_t NumElts = 0;
+  if (State.CC == llvm::CallingConv::X86_VectorCall &&
+      isHomogeneousAggregate(RetTy, Base, NumElts)) {
+    // The LLVM struct type for such an aggregate should lower properly.
+    return ABIArgInfo::getDirect();
+  }
+
    if (const VectorType *VT = RetTy->getAs<VectorType>()) {
      // On Darwin, some vectors are returned in registers.
      if (IsDarwinVectorABI) {
@@ -939,7 +979,8 @@ bool X86_32ABIInfo::shouldUseInReg(QualType Ty, CCState &State,
  
    State.FreeRegs -= SizeInRegs;
  
-  if (State.CC == llvm::CallingConv::X86_FastCall) {
+  if (State.CC == llvm::CallingConv::X86_FastCall ||
+      State.CC == llvm::CallingConv::X86_VectorCall) {
      if (Size > 32)
        return false;
  
@@ -964,17 +1005,36 @@ bool X86_32ABIInfo::shouldUseInReg(QualType Ty, CCState &State,
  ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
                                                 CCState &State) const {
    // FIXME: Set alignment on indirect arguments.
-  if (isAggregateTypeForABI(Ty)) {
-    if (const RecordType *RT = Ty->getAs<RecordType>()) {
-      // Check with the C++ ABI first.
-      CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
-      if (RAA == CGCXXABI::RAA_Indirect) {
-        return getIndirectResult(Ty, false, State);
-      } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
-        // The field index doesn't matter, we'll fix it up later.
-        return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
-      }
  
+  // Check with the C++ ABI first.
+  const RecordType *RT = Ty->getAs<RecordType>();
+  if (RT) {
+    CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
+    if (RAA == CGCXXABI::RAA_Indirect) {
+      return getIndirectResult(Ty, false, State);
+    } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
+      // The field index doesn't matter, we'll fix it up later.
+      return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
+    }
+  }
+
+  // vectorcall adds the concept of a homogenous vector aggregate, similar
+  // to other targets.
+  const Type *Base = nullptr;
+  uint64_t NumElts = 0;
+  if (State.CC == llvm::CallingConv::X86_VectorCall &&
+      isHomogeneousAggregate(Ty, Base, NumElts)) {
+    if (State.FreeSSERegs >= NumElts) {
+      State.FreeSSERegs -= NumElts;
+      if (Ty->isBuiltinType() || Ty->isVectorType())
+        return ABIArgInfo::getDirect();
+      return ABIArgInfo::getExpand();
+    }
+    return getIndirectResult(Ty, /*ByVal=*/false, State);
+  }
+
+  if (isAggregateTypeForABI(Ty)) {
+    if (RT) {
        // Structs are always byval on win32, regardless of what they contain.
        if (IsWin32StructABI)
          return getIndirectResult(Ty, true, State);
@@ -1006,7 +1066,9 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
      if (getContext().getTypeSize(Ty) <= 4*32 &&
          canExpandIndirectArgument(Ty, getContext()))
        return ABIArgInfo::getExpandWithPadding(
-          State.CC == llvm::CallingConv::X86_FastCall, PaddingType);
+          State.CC == llvm::CallingConv::X86_FastCall ||
+              State.CC == llvm::CallingConv::X86_VectorCall,
+          PaddingType);
  
      return getIndirectResult(Ty, true, State);
    }
@@ -1049,7 +1111,10 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
    CCState State(FI.getCallingConvention());
    if (State.CC == llvm::CallingConv::X86_FastCall)
      State.FreeRegs = 2;
-  else if (FI.getHasRegParm())
+  else if (State.CC == llvm::CallingConv::X86_VectorCall) {
+    State.FreeRegs = 2;
+    State.FreeSSERegs = 6;
+  } else if (FI.getHasRegParm())
      State.FreeRegs = FI.getRegParm();
    else
      State.FreeRegs = DefaultNumRegisterParameters;
@@ -1434,7 +1499,8 @@ public:
  /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
  class WinX86_64ABIInfo : public ABIInfo {
  
-  ABIArgInfo classify(QualType Ty, bool IsReturnType) const;
+  ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs,
+                      bool IsReturnType) const;
  
  public:
    WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
@@ -1443,6 +1509,17 @@ public:
  
    llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                           CodeGenFunction &CGF) const override;
+
+  bool isHomogeneousAggregateBaseType(QualType Ty) const override {
+    // FIXME: Assumes vectorcall is in use.
+    return isX86VectorTypeForVectorCall(getContext(), Ty);
+  }
+
+  bool isHomogeneousAggregateSmallEnough(const Type *Ty,
+                                         uint64_t NumMembers) const override {
+    // FIXME: Assumes vectorcall is in use.
+    return isX86VectorCallAggregateSmallEnough(NumMembers);
+  }
  };
  
  class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -2844,7 +2921,8 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
    return ResAddr;
  }
  
-ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
+ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
+                                      bool IsReturnType) const {
  
    if (Ty->isVoidType())
      return ABIArgInfo::getIgnore();
@@ -2852,7 +2930,9 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
      Ty = EnumTy->getDecl()->getIntegerType();
  
-  uint64_t Size = getContext().getTypeSize(Ty);
+  TypeInfo Info = getContext().getTypeInfo(Ty);
+  uint64_t Width = Info.Width;
+  unsigned Align = getContext().toCharUnitsFromBits(Info.Align).getQuantity();
  
    const RecordType *RT = Ty->getAs<RecordType>();
    if (RT) {
@@ -2865,11 +2945,26 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
        return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
  
      // FIXME: mingw-w64-gcc emits 128-bit struct as i128
-    if (Size == 128 && getTarget().getTriple().isWindowsGNUEnvironment())
+    if (Width == 128 && getTarget().getTriple().isWindowsGNUEnvironment())
        return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
-                                                          Size));
+                                                          Width));
+  }
+
+  // vectorcall adds the concept of a homogenous vector aggregate, similar to
+  // other targets.
+  const Type *Base = nullptr;
+  uint64_t NumElts = 0;
+  if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) {
+    if (FreeSSERegs >= NumElts) {
+      FreeSSERegs -= NumElts;
+      if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())
+        return ABIArgInfo::getDirect();
+      return ABIArgInfo::getExpand();
+    }
+    return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
    }
  
+
    if (Ty->isMemberPointerType()) {
      // If the member pointer is represented by an LLVM int or ptr, pass it
      // directly.
@@ -2881,11 +2976,11 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
    if (RT || Ty->isMemberPointerType()) {
      // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
      // not 1, 2, 4, or 8 bytes, must be passed by reference."
-    if (Size > 64 || !llvm::isPowerOf2_64(Size))
+    if (Width > 64 || !llvm::isPowerOf2_64(Width))
        return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
  
      // Otherwise, coerce it to a small integer.
-    return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
+    return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width));
    }
  
    // Bool type is always extended to the ABI, other builtin types are not
@@ -2898,11 +2993,18 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
  }
  
  void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  bool IsVectorCall =
+      FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall;
+
+  // We can use up to 4 SSE return registers with vectorcall.
+  unsigned FreeSSERegs = IsVectorCall ? 4 : 0;
    if (!getCXXABI().classifyReturnType(FI))
-    FI.getReturnInfo() = classify(FI.getReturnType(), true);
+    FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true);
  
+  // We can use up to 6 SSE register parameters with vectorcall.
+  FreeSSERegs = IsVectorCall ? 6 : 0;
    for (auto &I : FI.arguments())
-    I.info = classify(I.type, false);
+    I.info = classify(I.type, FreeSSERegs, false);
  }
  
  llvm::Value *WinX86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
diff --git a/test/CodeGen/mangle-windows.c b/test/CodeGen/mangle-windows.c

index 37d1018283589c6be3f041e7e81ce67042f7e1df..4a108751aa4b5a9a9021acd6da1090c89f20fb9b 100644 (file)
--- a/test/CodeGen/mangle-windows.c
+++ b/test/CodeGen/mangle-windows.c
@@ -1,33 +1,68 @@
  // RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s
  // RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-mingw32 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-mingw32 | FileCheck %s --check-prefix=X64
  
  void __stdcall f1(void) {}
  // CHECK: define x86_stdcallcc void @"\01_f1@0"
+// X64: define void @f1(
  
  void __fastcall f2(void) {}
  // CHECK: define x86_fastcallcc void @"\01@f2@0"
+// X64: define void @f2(
  
  void __stdcall f3() {}
  // CHECK: define x86_stdcallcc void @"\01_f3@0"
+// X64: define void @f3(
  
  void __fastcall f4(char a) {}
  // CHECK: define x86_fastcallcc void @"\01@f4@4"
+// X64: define void @f4(
  
  void __fastcall f5(short a) {}
  // CHECK: define x86_fastcallcc void @"\01@f5@4"
+// X64: define void @f5(
  
  void __fastcall f6(int a) {}
  // CHECK: define x86_fastcallcc void @"\01@f6@4"
+// X64: define void @f6(
  
  void __fastcall f7(long a) {}
  // CHECK: define x86_fastcallcc void @"\01@f7@4"
+// X64: define void @f7(
  
  void __fastcall f8(long long a) {}
  // CHECK: define x86_fastcallcc void @"\01@f8@8"
+// X64: define void @f8(
  
  void __fastcall f9(long long a, char b, char c, short d) {}
-// CHECK: define x86_fastcallcc void @"\01@f9@20"(i64 %a, i8 signext %b, i8
-// signext %c, i16 signext %d)
+// CHECK: define x86_fastcallcc void @"\01@f9@20"(i64 %a, i8 signext %b, i8 signext %c, i16 signext %d)
+// X64: define void @f9(
  
  void f12(void) {}
  // CHECK: define void @f12(
+// X64: define void @f12(
+
+void __vectorcall v1(void) {}
+// CHECK: define x86_vectorcallcc void @"\01v1@@0"(
+// X64: define x86_vectorcallcc void @"\01v1@@0"(
+
+void __vectorcall v2(char a) {}
+// CHECK: define x86_vectorcallcc void @"\01v2@@4"(
+// X64: define x86_vectorcallcc void @"\01v2@@8"(
+
+void __vectorcall v3(short a) {}
+// CHECK: define x86_vectorcallcc void @"\01v3@@4"(
+// X64: define x86_vectorcallcc void @"\01v3@@8"(
+
+void __vectorcall v4(int a) {}
+// CHECK: define x86_vectorcallcc void @"\01v4@@4"(
+// X64: define x86_vectorcallcc void @"\01v4@@8"(
+
+void __vectorcall v5(long long a) {}
+// CHECK: define x86_vectorcallcc void @"\01v5@@8"(
+// X64: define x86_vectorcallcc void @"\01v5@@8"(
+
+void __vectorcall v6(char a, char b) {}
+// CHECK: define x86_vectorcallcc void @"\01v6@@8"(
+// X64: define x86_vectorcallcc void @"\01v6@@16"(
diff --git a/test/CodeGen/microsoft-call-conv.c b/test/CodeGen/microsoft-call-conv.c

index aabe825516ab22cd3fc52f4ea3e9df7805303e1c..c24db1296f12935fbb6915f0a2d3a206cdb8d291 100644 (file)
--- a/test/CodeGen/microsoft-call-conv.c
+++ b/test/CodeGen/microsoft-call-conv.c
@@ -20,9 +20,8 @@ void __thiscall f6(void) {
    f3();
  // CHECK: call x86_thiscallcc void @f3()
  }
-// FIXME: Add this to LLVM.
  void __vectorcall f61(void) {
-// CHECK-LABEL: define void @f61()
+// CHECK-LABEL: define x86_vectorcallcc void @f61()
    f3();
  // CHECK: call x86_thiscallcc void @f3()
  }
@@ -41,7 +40,7 @@ int main(void) {
      // CHECK: call x86_fastcallcc void @f4()
      // CHECK: call x86_stdcallcc void @f5()
      // CHECK: call x86_thiscallcc void @f6()
-    // CHECK: call void @f61()
+    // CHECK: call x86_vectorcallcc void @f61()
      pf1(); pf2(); pf3(); pf4(); pf5(); pf6(); pf7();
      // CHECK: call x86_fastcallcc void %{{.*}}()
      // CHECK: call x86_stdcallcc void %{{.*}}()
@@ -49,7 +48,7 @@ int main(void) {
      // CHECK: call x86_fastcallcc void %{{.*}}()
      // CHECK: call x86_stdcallcc void %{{.*}}()
      // CHECK: call x86_thiscallcc void %{{.*}}()
-    // CHECK: call void %{{.*}}()
+    // CHECK: call x86_vectorcallcc void %{{.*}}()
      return 0;
  }
  
diff --git a/test/CodeGen/vectorcall.c b/test/CodeGen/vectorcall.c

new file mode 100644 (file)

index 0000000..17927c7
--- /dev/null
+++ b/test/CodeGen/vectorcall.c
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64
+
+void __vectorcall v1(int a, int b) {}
+// CHECK: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b)
+// X64: define x86_vectorcallcc void @"\01v1@@16"(i32 %a, i32 %b)
+
+void __vectorcall v2(char a, char b) {}
+// CHECK: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b)
+// X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b)
+
+struct Small { int a; };
+void __vectorcall v3(int a, struct Small b, int c) {}
+// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, %struct.Small* byval align 4 %b, i32 inreg %c)
+// X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c)
+
+struct Large { int a[5]; };
+void __vectorcall v4(int a, struct Large b, int c) {}
+// CHECK: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
+// X64: define x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c)
+
+struct HFA2 { double x, y; };
+struct HFA4 { double w, x, y, z; };
+struct HFA5 { double v, w, x, y, z; };
+
+void __vectorcall hfa1(int a, struct HFA4 b, int c) {}
+// CHECK: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c)
+// X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 %c)
+
+// HFAs that would require more than six total SSE registers are passed
+// indirectly. Additional vector arguments can consume the rest of the SSE
+// registers.
+void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
+// CHECK: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* inreg %b, double %c)
+// X64: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* align 8 %b, double %c)
+
+// Ensure that we pass builtin types directly while counting them against the
+// SSE register usage.
+void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
+// CHECK: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f)
+// X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* align 8 %f)
+
+// Aggregates with more than four elements are not HFAs and are passed byval.
+// Because they are not classified as homogeneous, they don't get special
+// handling to ensure alignment.
+void __vectorcall hfa4(struct HFA5 a) {}
+// CHECK: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4)
+// X64: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a)
+
+// Return HFAs of 4 or fewer elements in registers.
+static struct HFA2 g_hfa2;
+struct HFA2 __vectorcall hfa5(void) { return g_hfa2; }
+// CHECK: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
+// X64: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
+
+typedef float __attribute__((vector_size(16))) v4f32;
+struct HVA2 { v4f32 x, y; };
+struct HVA4 { v4f32 w, x, y, z; };
+
+void __vectorcall hva1(int a, struct HVA4 b, int c) {}
+// CHECK: define x86_vectorcallcc void @"\01hva1@@72"(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c)
+// X64: define x86_vectorcallcc void @"\01hva1@@80"(i32 %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 %c)
+
+void __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {}
+// CHECK: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* inreg %b, <4 x float> %c)
+// X64: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* align 16 %b, <4 x float> %c)
+
+void __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {}
+// CHECK: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f)
+// X64: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* align 16 %f)
+
+typedef float __attribute__((ext_vector_type(3))) v3f32;
+struct OddSizeHVA { v3f32 x, y; };
+
+void __vectorcall odd_size_hva(struct OddSizeHVA a) {}
+// CHECK: define x86_vectorcallcc void @"\01odd_size_hva@@32"(<3 x float> %a.0, <3 x float> %a.1)
+// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(<3 x float> %a.0, <3 x float> %a.1)
diff --git a/test/CodeGenCXX/homogeneous-aggregates.cpp b/test/CodeGenCXX/homogeneous-aggregates.cpp

index 638184087d9b5507f88afdbcb98164bfee3a0119..5a34c87cada27869abb14ddbd23181d2bca80ab7 100644 (file)
--- a/test/CodeGenCXX/homogeneous-aggregates.cpp
+++ b/test/CodeGenCXX/homogeneous-aggregates.cpp
@@ -1,6 +1,13 @@
-// RUNxX: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC
+// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC
  // RUN: %clang_cc1 -mfloat-abi hard -triple armv7-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM32
  // RUN: %clang_cc1 -mfloat-abi hard -triple aarch64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM64
+// RUN: %clang_cc1 -mfloat-abi hard -triple x86_64-unknown-windows-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=X64
+
+#if defined(__x86_64__)
+#define CC __attribute__((vectorcall))
+#else
+#define CC
+#endif
  
  // Test that C++ classes are correctly classified as homogeneous aggregates.
  
@@ -34,24 +41,26 @@ struct D5 : I1, I2, I3 {}; // homogeneous aggregate
  // PPC: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce)
  // ARM32: define arm_aapcs_vfpcc void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, { [3 x i64] } %x.coerce)
  // ARM64: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, %struct.D1* %x)
-D1 func_D1(D1 x) { return x; }
+// X64: define x86_vectorcallcc void @"\01_Z7func_D12D1@@24"(%struct.D1* noalias sret %agg.result, %struct.D1* %x)
+D1 CC func_D1(D1 x) { return x; }
  
  // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
  // ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce)
  // ARM64: define %struct.D2 @_Z7func_D22D2(double %x.0, double %x.1, double %x.2)
-D2 func_D2(D2 x) { return x; }
+// X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(double %x.0, double %x.1, double %x.2)
+D2 CC func_D2(D2 x) { return x; }
  
  // PPC: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce)
  // ARM32: define arm_aapcs_vfpcc void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, { [4 x i64] } %x.coerce)
  // ARM64: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, %struct.D3* %x)
-D3 func_D3(D3 x) { return x; }
+D3 CC func_D3(D3 x) { return x; }
  
  // PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)
  // ARM32: define arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce)
  // ARM64: define %struct.D4 @_Z7func_D42D4(double %x.0, double %x.1, double %x.2, double %x.3)
-D4 func_D4(D4 x) { return x; }
+D4 CC func_D4(D4 x) { return x; }
  
-D5 func_D5(D5 x) { return x; }
+D5 CC func_D5(D5 x) { return x; }
  // PPC: define [3 x double] @_Z7func_D52D5([3 x double] %x.coerce)
  // ARM32: define arm_aapcs_vfpcc %struct.D5 @_Z7func_D52D5(%struct.D5 %x.coerce)
  
@@ -92,3 +101,27 @@ void call_D5(D5 *p) {
  // ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
  // ARM64: load double*
  // ARM64: call %struct.D5 @_Z7func_D52D5(double %{{.*}}, double %{{.*}}, double %{{.*}})
+
+struct Empty { };
+struct Float1 { float x; };
+struct Float2 { float y; };
+struct HVAWithEmptyBase : Float1, Empty, Float2 { float z; };
+
+// PPC: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
+// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase(float %a.0, float %a.1, float %a.2)
+// ARM32: define arm_aapcs_vfpcc void @_Z15with_empty_base16HVAWithEmptyBase(%struct.HVAWithEmptyBase %a.coerce)
+void CC with_empty_base(HVAWithEmptyBase a) {}
+
+// FIXME: MSVC doesn't consider this an HVA becuase of the empty base.
+// X64: define x86_vectorcallcc void @"\01_Z15with_empty_base16HVAWithEmptyBase@@16"(float %a.0, float %a.1, float %a.2)
+
+struct HVAWithEmptyBitField : Float1, Float2 {
+  int : 0; // Takes no space.
+  float z;
+};
+
+// PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
+// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField(float %a.0, float %a.1, float %a.2)
+// ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce)
+// X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(float %a.0, float %a.1, float %a.2)
+void CC with_empty_bitfield(HVAWithEmptyBitField a) {}
author	Reid Kleckner <reid@kleckner.net>
	Fri, 31 Oct 2014 22:00:51 +0000 (22:00 +0000)
committer	Reid Kleckner <reid@kleckner.net>
	Fri, 31 Oct 2014 22:00:51 +0000 (22:00 +0000)
lib/AST/Mangle.cpp		patch \| blob \| history
lib/Basic/Targets.cpp		patch \| blob \| history
lib/CodeGen/CGCall.cpp		patch \| blob \| history
lib/CodeGen/MicrosoftCXXABI.cpp		patch \| blob \| history
lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
test/CodeGen/mangle-windows.c		patch \| blob \| history
test/CodeGen/microsoft-call-conv.c		patch \| blob \| history
test/CodeGen/vectorcall.c	[new file with mode: 0644]	patch \| blob
test/CodeGenCXX/homogeneous-aggregates.cpp		patch \| blob \| history