/// Calling convention for AMDGPU code object kernels.
AMDGPU_KERNEL = 91,
+ /// Register calling convention used for parameters transfer optimization
+ X86_RegCall = 92,
+
/// The highest possible calling convention ID. Must be some 2^k - 1.
MaxID = 1023
};
KEYWORD(intel_ocl_bicc);
KEYWORD(x86_64_sysvcc);
KEYWORD(x86_64_win64cc);
+ KEYWORD(x86_regcallcc);
KEYWORD(webkit_jscc);
KEYWORD(swiftcc);
KEYWORD(anyregcc);
case lltok::kw_coldcc: CC = CallingConv::Cold; break;
case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+ case lltok::kw_x86_regcallcc: CC = CallingConv::X86_RegCall; break;
case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
case lltok::kw_x86_vectorcallcc:CC = CallingConv::X86_VectorCall; break;
case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
kw_x86_fastcallcc,
kw_x86_thiscallcc,
kw_x86_vectorcallcc,
+ kw_x86_regcallcc,
kw_arm_apcscc,
kw_arm_aapcscc,
kw_arm_aapcs_vfpcc,
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
+ case CallingConv::X86_RegCall: Out << "x86_regcallcc"; break;
case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
return false;
}
+inline bool CC_X86_RegCall_Error(unsigned &, MVT &, MVT &,
+ CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+ CCState &) {
+ report_fatal_error("LLVM x86 RegCall calling convention implementation" \
+ " doesn't support long double and mask types yet.");
+}
+
inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
"(State.getMachineFunction().getSubtarget()).", F),
A>;
+// Register classes for RegCall
+class RC_X86_RegCall {
+ list<Register> GPR_8 = [];
+ list<Register> GPR_16 = [];
+ list<Register> GPR_32 = [];
+ list<Register> GPR_64 = [];
+ list<Register> XMM = [];
+ list<Register> YMM = [];
+ list<Register> ZMM = [];
+}
+
+// RegCall register classes for 32 bits
+def RC_X86_32_RegCall : RC_X86_RegCall {
+ let GPR_8 = [AL, CL, DL, DIL, SIL];
+ let GPR_16 = [AX, CX, DX, DI, SI];
+ let GPR_32 = [EAX, ECX, EDX, EDI, ESI];
+ let GPR_64 = [RAX]; ///< Not actually used, but AssignToReg can't handle []
+ ///< \todo Fix AssignToReg to enable empty lists
+ let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7];
+ let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7];
+ let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7];
+}
+
+class RC_X86_64_RegCall : RC_X86_RegCall {
+ let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15];
+ let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15];
+ let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7,
+ ZMM8, ZMM9, ZMM10, ZMM11, ZMM12, ZMM13, ZMM14, ZMM15];
+}
+
+def RC_X86_64_RegCall_Win : RC_X86_64_RegCall {
+ let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R10B, R11B, R12B, R14B, R15B];
+ let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R10W, R11W, R12W, R14W, R15W];
+ let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R10D, R11D, R12D, R14D, R15D];
+ let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11, R12, R14, R15];
+}
+
+def RC_X86_64_RegCall_SysV : RC_X86_64_RegCall {
+ let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R12B, R13B, R14B, R15B];
+ let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R12W, R13W, R14W, R15W];
+ let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R12D, R13D, R14D, R15D];
+ let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R12, R13, R14, R15];
+}
+
+// X86-64 Intel regcall calling convention.
+multiclass X86_RegCall_base<RC_X86_RegCall RC> {
+def CC_#NAME : CallingConv<[
+ // Handles byval parameters.
+ CCIfSubtarget<"is64Bit()", CCIfByVal<CCPassByVal<8, 8>>>,
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i1/i8/i16 arguments to i32.
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // bool, char, int, enum, long, pointer --> GPR
+ CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
+
+ // TODO: Handle the case of mask types (v*i1)
+ // TODO: Handle the case of 32 bit machine with v64i1 argument
+ // (split to 2 registers)
+ CCIfType<[v8i1, v16i1, v32i1, v64i1], CCCustom<"CC_X86_RegCall_Error">>,
+
+ // long long, __int64 --> GPR
+ CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
+
+ // TODO: Handle the case of long double (f80)
+ CCIfType<[f80], CCCustom<"CC_X86_RegCall_Error">>,
+
+ // float, double, float128 --> XMM
+ // In the case of SSE disabled --> save to stack
+ CCIfType<[f32, f64, f128],
+ CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+ // __m128, __m128i, __m128d --> XMM
+ // In the case of SSE disabled --> save to stack
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+ // __m256, __m256i, __m256d --> YMM
+ // In the case of SSE disabled --> save to stack
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
+
+ // __m512, __m512i, __m512d --> ZMM
+ // In the case of SSE disabled --> save to stack
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfSubtarget<"hasAVX512()",CCAssignToReg<RC.ZMM>>>,
+
+ // If no register was found -> assign to stack
+
+ // In 64 bit, assign 64/32 bit values to 8 byte stack
+ CCIfSubtarget<"is64Bit()", CCIfType<[i32, i64, f32, f64],
+ CCAssignToStack<8, 8>>>,
+
+ // In 32 bit, assign 64/32 bit values to 8/4 byte stack
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+
+ // MMX type gets 8 byte slot in stack , while alignment depends on target
+ CCIfSubtarget<"is64Bit()", CCIfType<[x86mmx], CCAssignToStack<8, 8>>>,
+ CCIfType<[x86mmx], CCAssignToStack<8, 4>>,
+
+ // float 128 get stack slots whose size and alignment depends
+ // on the subtarget.
+ CCIfType<[f128], CCAssignToStack<0, 0>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>,
+
+ // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
+ // 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
+ CCIfType<[v16i32, v8i64, v16f32, v8f64], CCAssignToStack<64, 64>>
+]>;
+
+def RetCC_#NAME : CallingConv<[
+ // Promote i1 arguments to i8.
+ CCIfType<[i1], CCPromoteToType<i8>>,
+
+ // bool, char, int, enum, long, pointer --> GPR
+ CCIfType<[i8], CCAssignToReg<RC.GPR_8>>,
+ CCIfType<[i16], CCAssignToReg<RC.GPR_16>>,
+ CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
+
+ // TODO: Handle the case of mask types (v*i1)
+ // TODO: Handle the case of 32 bit machine with v64i1 argument
+ // (split to 2 registers)
+ CCIfType<[v8i1, v16i1, v32i1, v64i1], CCCustom<"CC_X86_RegCall_Error">>,
+
+ // long long, __int64 --> GPR
+ CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
+
+ // long double --> FP
+ CCIfType<[f80], CCAssignToReg<[FP0]>>,
+
+ // float, double, float128 --> XMM
+ CCIfType<[f32, f64, f128],
+ CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+ // __m128, __m128i, __m128d --> XMM
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+ // __m256, __m256i, __m256d --> YMM
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
+
+ // __m512, __m512i, __m512d --> ZMM
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfSubtarget<"hasAVX512()", CCAssignToReg<RC.ZMM>>>
+]>;
+}
+
//===----------------------------------------------------------------------===//
// Return Value Calling Conventions
//===----------------------------------------------------------------------===//
RAX, R10, R11, R13, R14, R15]>>
]>;
+
+defm X86_32_RegCall :
+ X86_RegCall_base<RC_X86_32_RegCall>;
+defm X86_Win64_RegCall :
+ X86_RegCall_base<RC_X86_64_RegCall_Win>;
+defm X86_SysV64_RegCall :
+ X86_RegCall_base<RC_X86_64_RegCall_SysV>;
+
// This is the root return-value convention for the X86-32 backend.
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
+ CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_32_RegCall>>,
// Otherwise, use RetCC_X86_32_C.
CCDelegateTo<RetCC_X86_32_C>
// Handle HHVM calls.
CCIfCC<"CallingConv::HHVM", CCDelegateTo<RetCC_X86_64_HHVM>>,
+ CCIfCC<"CallingConv::X86_RegCall",
+ CCIfSubtarget<"isTargetWin64()",
+ CCDelegateTo<RetCC_X86_Win64_RegCall>>>,
+ CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_SysV64_RegCall>>,
+
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
+ CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>,
// Otherwise, drop to normal X86-32 CC
CCDelegateTo<CC_X86_32_C>
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
CCIfCC<"CallingConv::HHVM", CCDelegateTo<CC_X86_64_HHVM>>,
CCIfCC<"CallingConv::HHVM_C", CCDelegateTo<CC_X86_64_HHVM_C>>,
+ CCIfCC<"CallingConv::X86_RegCall",
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
+ CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
// Mingw64 and native Win64 use Win64 CC
// Only R12 is preserved for PHP calls in HHVM.
def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
+
+// Register calling convention preserves few GPR and XMM8-15
+def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP, ESP)>;
+def CSR_32_RegCall : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE,
+ (sequence "XMM%u", 4, 7))>;
+def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+ (sequence "R%u", 10, 15))>;
+def CSR_Win64_RegCall : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,
+ (sequence "XMM%u", 8, 15))>;
+def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+ (sequence "R%u", 12, 15))>;
+def CSR_SysV64_RegCall : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,
+ (sequence "XMM%u", 8, 15))>;
+
}
case CallingConv::HHVM:
return CSR_64_HHVM_SaveList;
+ case CallingConv::X86_RegCall:
+ if (Is64Bit) {
+ if (IsWin64) {
+ return (HasSSE ? CSR_Win64_RegCall_SaveList :
+ CSR_Win64_RegCall_NoSSE_SaveList);
+ } else {
+ return (HasSSE ? CSR_SysV64_RegCall_SaveList :
+ CSR_SysV64_RegCall_NoSSE_SaveList);
+ }
+ } else {
+ return (HasSSE ? CSR_32_RegCall_SaveList :
+ CSR_32_RegCall_NoSSE_SaveList);
+ }
case CallingConv::Cold:
if (Is64Bit)
return CSR_64_MostRegs_SaveList;
}
case CallingConv::HHVM:
return CSR_64_HHVM_RegMask;
+ case CallingConv::X86_RegCall:
+ if (Is64Bit) {
+ if (IsWin64) {
+ return (HasSSE ? CSR_Win64_RegCall_RegMask :
+ CSR_Win64_RegCall_NoSSE_RegMask);
+ } else {
+ return (HasSSE ? CSR_SysV64_RegCall_RegMask :
+ CSR_SysV64_RegCall_NoSSE_RegMask);
+ }
+ } else {
+ return (HasSSE ? CSR_32_RegCall_RegMask :
+ CSR_32_RegCall_NoSSE_RegMask);
+ }
case CallingConv::Cold:
if (Is64Bit)
return CSR_64_MostRegs_RegMask;
--- /dev/null
+; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck --check-prefix=X32 %s\r
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck --check-prefix=WIN64 %s\r
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck --check-prefix=LINUXOSX64 %s \r
+\r
+; X32-LABEL: test_argReti1:\r
+; X32: kmov{{.*}} %eax, %k{{[0-7]}}\r
+; X32: kmov{{.*}} %k{{[0-7]}}, %eax\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argReti1:\r
+; WIN64: kmov{{.*}} %eax, %k{{[0-7]}}\r
+; WIN64: kmov{{.*}} %k{{[0-7]}}, %eax\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning i1\r
+define x86_regcallcc i1 @test_argReti1(i1 %a) {\r
+ %add = add i1 %a, 1\r
+ ret i1 %add\r
+}\r
+\r
+; X32-LABEL: test_CallargReti1:\r
+; X32: kmov{{.*}} %k{{[0-7]}}, %eax\r
+; X32: call{{.*}} {{.*}}test_argReti1\r
+; X32: kmov{{.*}} %eax, %k{{[0-7]}}\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargReti1:\r
+; WIN64: kmov{{.*}} %k{{[0-7]}}, %eax\r
+; WIN64: call{{.*}} {{.*}}test_argReti1\r
+; WIN64: kmov{{.*}} %eax, %k{{[0-7]}}\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving i1\r
+define x86_regcallcc i1 @test_CallargReti1(i1 %a) {\r
+ %b = add i1 %a, 1\r
+ %c = call x86_regcallcc i1 @test_argReti1(i1 %b)\r
+ %d = add i1 %c, 1\r
+ ret i1 %d\r
+}\r
+\r
+; X32-LABEL: test_argReti8:\r
+; X32: incb %al\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argReti8:\r
+; WIN64: incb %al\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning i8\r
+define x86_regcallcc i8 @test_argReti8(i8 %a) {\r
+ %add = add i8 %a, 1\r
+ ret i8 %add\r
+}\r
+\r
+; X32-LABEL: test_CallargReti8:\r
+; X32: incb %al\r
+; X32: call{{.*}} {{.*}}test_argReti8\r
+; X32: incb %al\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargReti8:\r
+; WIN64: incb %al\r
+; WIN64: call{{.*}} {{.*}}test_argReti8\r
+; WIN64: incb %al\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving i8\r
+define x86_regcallcc i8 @test_CallargReti8(i8 %a) {\r
+ %b = add i8 %a, 1\r
+ %c = call x86_regcallcc i8 @test_argReti8(i8 %b)\r
+ %d = add i8 %c, 1\r
+ ret i8 %d\r
+}\r
+\r
+; X32-LABEL: test_argReti16:\r
+; X32: incl %eax\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argReti16:\r
+; WIN64: incl %eax\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning i16\r
+define x86_regcallcc i16 @test_argReti16(i16 %a) {\r
+ %add = add i16 %a, 1\r
+ ret i16 %add\r
+}\r
+\r
+; X32-LABEL: test_CallargReti16:\r
+; X32: incl %eax\r
+; X32: call{{.*}} {{.*}}test_argReti16\r
+; X32: incl %eax\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargReti16:\r
+; WIN64: incl %eax\r
+; WIN64: call{{.*}} {{.*}}test_argReti16\r
+; WIN64: incl %eax\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving i16\r
+define x86_regcallcc i16 @test_CallargReti16(i16 %a) {\r
+ %b = add i16 %a, 1\r
+ %c = call x86_regcallcc i16 @test_argReti16(i16 %b)\r
+ %d = add i16 %c, 1\r
+ ret i16 %d\r
+}\r
+\r
+; X32-LABEL: test_argReti32:\r
+; X32: incl %eax\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argReti32:\r
+; WIN64: incl %eax\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning i32\r
+define x86_regcallcc i32 @test_argReti32(i32 %a) {\r
+ %add = add i32 %a, 1\r
+ ret i32 %add\r
+}\r
+\r
+; X32-LABEL: test_CallargReti32:\r
+; X32: incl %eax\r
+; X32: call{{.*}} {{.*}}test_argReti32\r
+; X32: incl %eax\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargReti32:\r
+; WIN64: incl %eax\r
+; WIN64: call{{.*}} {{.*}}test_argReti32\r
+; WIN64: incl %eax\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving i32\r
+define x86_regcallcc i32 @test_CallargReti32(i32 %a) {\r
+ %b = add i32 %a, 1\r
+ %c = call x86_regcallcc i32 @test_argReti32(i32 %b)\r
+ %d = add i32 %c, 1\r
+ ret i32 %d\r
+}\r
+\r
+; X32-LABEL: test_argReti64:\r
+; X32: addl $3, %eax\r
+; X32: adcl $1, %ecx\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argReti64:\r
+; WIN64: movabsq $4294967299, %r{{.*}}\r
+; WIN64: addq %r{{.*}}, %rax\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning i64\r
+define x86_regcallcc i64 @test_argReti64(i64 %a) {\r
+ %add = add i64 %a, 4294967299\r
+ ret i64 %add\r
+}\r
+\r
+; X32-LABEL: test_CallargReti64:\r
+; X32: add{{.*}} $1, %eax\r
+; X32: adcl $0, {{%e(cx|dx|si|di|bx|bp)}}\r
+; X32: call{{.*}} {{.*}}test_argReti64\r
+; X32: add{{.*}} $1, %eax\r
+; X32: adcl $0, {{%e(cx|dx|si|di|bx|bp)}}\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargReti64:\r
+; WIN64: incq %rax\r
+; WIN64: call{{.*}} {{.*}}test_argReti64\r
+; WIN64: incq %rax\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving i64\r
+define x86_regcallcc i64 @test_CallargReti64(i64 %a) {\r
+ %b = add i64 %a, 1\r
+ %c = call x86_regcallcc i64 @test_argReti64(i64 %b)\r
+ %d = add i64 %c, 1\r
+ ret i64 %d\r
+}\r
+\r
+; X32-LABEL: test_argRetFloat:\r
+; X32: vadd{{.*}} {{.*}}, %xmm0\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argRetFloat:\r
+; WIN64: vadd{{.*}} {{.*}}, %xmm0\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning float\r
+define x86_regcallcc float @test_argRetFloat(float %a) {\r
+ %add = fadd float 1.0, %a\r
+ ret float %add\r
+}\r
+\r
+; X32-LABEL: test_CallargRetFloat:\r
+; X32: vadd{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0\r
+; X32: call{{.*}} {{.*}}test_argRetFloat\r
+; X32: vadd{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargRetFloat:\r
+; WIN64: vadd{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0\r
+; WIN64: call{{.*}} {{.*}}test_argRetFloat\r
+; WIN64: vadd{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving float\r
+define x86_regcallcc float @test_CallargRetFloat(float %a) {\r
+ %b = fadd float 1.0, %a\r
+ %c = call x86_regcallcc float @test_argRetFloat(float %b)\r
+ %d = fadd float 1.0, %c\r
+ ret float %d\r
+}\r
+\r
+; X32-LABEL: test_argRetDouble:\r
+; X32: vadd{{.*}} {{.*}}, %xmm0\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argRetDouble:\r
+; WIN64: vadd{{.*}} {{.*}}, %xmm0\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning double\r
+define x86_regcallcc double @test_argRetDouble(double %a) {\r
+ %add = fadd double %a, 1.0\r
+ ret double %add\r
+}\r
+\r
+; X32-LABEL: test_CallargRetDouble:\r
+; X32: vadd{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0\r
+; X32: call{{.*}} {{.*}}test_argRetDouble\r
+; X32: vadd{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargRetDouble:\r
+; WIN64: vadd{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0\r
+; WIN64: call{{.*}} {{.*}}test_argRetDouble\r
+; WIN64: vadd{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving double\r
+define x86_regcallcc double @test_CallargRetDouble(double %a) {\r
+ %b = fadd double 1.0, %a\r
+ %c = call x86_regcallcc double @test_argRetDouble(double %b)\r
+ %d = fadd double 1.0, %c\r
+ ret double %d\r
+}\r
+\r
+; X32-LABEL: test_argRetPointer:\r
+; X32: incl %eax\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argRetPointer:\r
+; WIN64: incl %eax\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning pointer\r
+define x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %a) {\r
+ %b = ptrtoint [4 x i32]* %a to i32\r
+ %c = add i32 %b, 1\r
+ %d = inttoptr i32 %c to [4 x i32]*\r
+ ret [4 x i32]* %d\r
+}\r
+\r
+; X32-LABEL: test_CallargRetPointer:\r
+; X32: incl %eax\r
+; X32: call{{.*}} {{.*}}test_argRetPointer\r
+; X32: incl %eax\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargRetPointer:\r
+; WIN64: incl %eax\r
+; WIN64: call{{.*}} {{.*}}test_argRetPointer\r
+; WIN64: incl %eax\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving pointer\r
+define x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a) {\r
+ %b = ptrtoint [4 x i32]* %a to i32\r
+ %c = add i32 %b, 1\r
+ %d = inttoptr i32 %c to [4 x i32]*\r
+ %e = call x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %d)\r
+ %f = ptrtoint [4 x i32]* %e to i32\r
+ %g = add i32 %f, 1\r
+ %h = inttoptr i32 %g to [4 x i32]*\r
+ ret [4 x i32]* %h\r
+}\r
+\r
+; X32-LABEL: test_argRet128Vector:\r
+; X32: vpblend{{.*}} %xmm0, %xmm1, %xmm0\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argRet128Vector:\r
+; WIN64: vpblend{{.*}} %xmm0, %xmm1, %xmm0\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning 128 bit vector\r
+define x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %b) {\r
+ %d = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b\r
+ ret <4 x i32> %d\r
+}\r
+\r
+; X32-LABEL: test_CallargRet128Vector:\r
+; X32: vmov{{.*}} %xmm0, {{%xmm([0-7])}}\r
+; X32: call{{.*}} {{.*}}test_argRet128Vector\r
+; X32: vpblend{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargRet128Vector:\r
+; WIN64: vmov{{.*}} %xmm0, {{%xmm([0-9]+)}}\r
+; WIN64: call{{.*}} {{.*}}test_argRet128Vector\r
+; WIN64: vpblend{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving 128 bit vector\r
+define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i32> %a) {\r
+ %b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %a)\r
+ %c = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b\r
+ ret <4 x i32> %c\r
+}\r
+\r
+; X32-LABEL: test_argRet256Vector:\r
+; X32: vpblend{{.*}} %ymm0, %ymm1, %ymm0\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argRet256Vector:\r
+; WIN64: vpblend{{.*}} %ymm0, %ymm1, %ymm0\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning 256 bit vector\r
+define x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %b) {\r
+ %d = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b\r
+ ret <8 x i32> %d\r
+}\r
+\r
+; X32-LABEL: test_CallargRet256Vector:\r
+; X32: vmov{{.*}} %ymm0, %ymm1\r
+; X32: call{{.*}} {{.*}}test_argRet256Vector\r
+; X32: vpblend{{.*}} %ymm1, %ymm0, %ymm0\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargRet256Vector:\r
+; WIN64: vmov{{.*}} %ymm0, %ymm1\r
+; WIN64: call{{.*}} {{.*}}test_argRet256Vector\r
+; WIN64: vpblend{{.*}} %ymm1, %ymm0, %ymm0\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving 256 bit vector\r
+define x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i32> %a) {\r
+ %b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %a)\r
+ %c = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b\r
+ ret <8 x i32> %c\r
+}\r
+\r
+; X32-LABEL: test_argRet512Vector:\r
+; X32: vpblend{{.*}} %zmm0, %zmm1, %zmm0\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argRet512Vector:\r
+; WIN64: vpblend{{.*}} %zmm0, %zmm1, %zmm0\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning 512 bit vector\r
+define x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> %b) {\r
+ %d = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b\r
+ ret <16 x i32> %d\r
+}\r
+\r
+; X32-LABEL: test_CallargRet512Vector:\r
+; X32: vmov{{.*}} %zmm0, %zmm1\r
+; X32: call{{.*}} {{.*}}test_argRet512Vector\r
+; X32: vpblend{{.*}} %zmm1, %zmm0, %zmm0\r
+; X32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargRet512Vector:\r
+; WIN64: vmov{{.*}} %zmm0, %zmm1\r
+; WIN64: call{{.*}} {{.*}}test_argRet512Vector\r
+; WIN64: vpblend{{.*}} %zmm1, %zmm0, %zmm0\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving 512 bit vector\r
+define x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i32> %a) {\r
+ %b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> %a)\r
+ %c = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b\r
+ ret <16 x i32> %c\r
+}\r
+\r
+; WIN64-LABEL: testf32_inp\r
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; WIN64: retq\r
+\r
+; X32-LABEL: testf32_inp\r
+; X32: vmovups {{%xmm([0-7])}}, {{.*(%esp).*}} {{#+}} 16-byte Spill\r
+; X32: vmovups {{%xmm([0-7])}}, {{.*(%esp).*}} {{#+}} 16-byte Spill\r
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}\r
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}\r
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}\r
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}\r
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}\r
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}\r
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}\r
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}\r
+; X32: vmovups {{.*(%esp).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload\r
+; X32: vmovups {{.*(%esp).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload\r
+; X32: retl\r
+\r
+; LINUXOSX64-LABEL: testf32_inp\r
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}\r
+; LINUXOSX64: retq\r
+\r
+; Test regcall when running multiple input parameters - callee saved XMMs\r
+define x86_regcallcc <32 x float> @testf32_inp(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {\r
+ %x1 = fadd <32 x float> %a, %b\r
+ %x2 = fmul <32 x float> %a, %b\r
+ %x3 = fsub <32 x float> %x1, %x2\r
+ %x4 = fadd <32 x float> %x3, %c\r
+ ret <32 x float> %x4\r
+}\r
+\r
+; X32-LABEL: pushl {{%e(si|di|bx|bp)}}\r
+; X32: pushl {{%e(si|di|bx|bp)}}\r
+; X32: pushl {{%e(si|di|bx|bp)}}\r
+; X32: pushl {{%e(si|di|bx|bp)}}\r
+; X32: popl {{%e(si|di|bx|bp)}}\r
+; X32: popl {{%e(si|di|bx|bp)}}\r
+; X32: popl {{%e(si|di|bx|bp)}}\r
+; X32: popl {{%e(si|di|bx|bp)}}\r
+; X32: retl\r
+\r
+; WIN64-LABEL: pushq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: popq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: popq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: popq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: popq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: retq\r
+\r
+; LINUXOSX64-LABEL: pushq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX64: retq\r
+\r
+; Test regcall when running multiple input parameters - callee saved GPRs\r
+define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6,\r
+ i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {\r
+ %x1 = sub i32 %a1, %a2\r
+ %x2 = sub i32 %a3, %a4\r
+ %x3 = sub i32 %a5, %a6\r
+ %y1 = sub i32 %b1, %b2\r
+ %y2 = sub i32 %b3, %b4\r
+ %y3 = sub i32 %b5, %b6\r
+ %v1 = add i32 %a1, %a2\r
+ %v2 = add i32 %a3, %a4\r
+ %v3 = add i32 %a5, %a6\r
+ %w1 = add i32 %b1, %b2\r
+ %w2 = add i32 %b3, %b4\r
+ %w3 = add i32 %b5, %b6\r
+ %s1 = mul i32 %x1, %y1\r
+ %s2 = mul i32 %x2, %y2\r
+ %s3 = mul i32 %x3, %y3\r
+ %t1 = mul i32 %v1, %w1\r
+ %t2 = mul i32 %v2, %w2\r
+ %t3 = mul i32 %v3, %w3\r
+ %m1 = add i32 %s1, %s2\r
+ %m2 = add i32 %m1, %s3\r
+ %n1 = add i32 %t1, %t2\r
+ %n2 = add i32 %n1, %t3\r
+ %r1 = add i32 %m2, %n2\r
+ ret i32 %r1\r
+}\r
+\r
+; X32-LABEL: testf32_stack\r
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}\r
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-1])}}\r
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-1])}}\r
+; X32: retl\r
+\r
+; LINUXOSX64-LABEL: testf32_stack\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}\r
+; LINUXOSX64: vaddps {{([0-9])+}}(%rbp), {{%zmm([0-9]+)}}, {{%zmm([0-1])}}\r
+; LINUXOSX64: vaddps {{([0-9])+}}(%rbp), {{%zmm([0-9]+)}}, {{%zmm([0-1])}}\r
+; LINUXOSX64: retq\r
+\r
+; Test that parameters, overflowing register capacity, are passed through the stack\r
+define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a0, <32 x float> %b0, <32 x float> %c0, \r
+ <32 x float> %a1, <32 x float> %b1, <32 x float> %c1,\r
+ <32 x float> %a2, <32 x float> %b2, <32 x float> %c2) nounwind {\r
+ %x1 = fadd <32 x float> %a0, %b0\r
+ %x2 = fadd <32 x float> %c0, %x1\r
+ %x3 = fadd <32 x float> %a1, %x2\r
+ %x4 = fadd <32 x float> %b1, %x3\r
+ %x5 = fadd <32 x float> %c1, %x4\r
+ %x6 = fadd <32 x float> %a2, %x5\r
+ %x7 = fadd <32 x float> %b2, %x6\r
+ %x8 = fadd <32 x float> %c2, %x7\r
+ ret <32 x float> %x8\r
+}\r
+\r
+; X32-LABEL: vmovd %edx, {{%xmm([0-9])}}\r
+; X32: vcvtsi2sdl %eax, {{%xmm([0-9])}}, {{%xmm([0-9])}}\r
+; X32: vcvtsi2sdl %ecx, {{%xmm([0-9])}}, {{%xmm([0-9])}}\r
+; X32: vcvtsi2sdl %esi, {{%xmm([0-9])}}, {{%xmm([0-9])}}\r
+; X32: vaddsd %xmm1, %xmm0, %xmm0\r
+; X32: vcvttsd2si %xmm0, %eax\r
+; X32: retl\r
+\r
+; LINUXOSX64-LABEL: test_argRetMixTypes\r
+; LINUXOSX64: vcvtss2sd %xmm1, %xmm1, %xmm1\r
+; LINUXOSX64: vcvtsi2sdl %eax, {{%xmm([0-9])}}, {{%xmm([0-9])}}\r
+; LINUXOSX64: vcvtsi2sdl %ecx, {{%xmm([0-9])}}, {{%xmm([0-9])}}\r
+; LINUXOSX64: vcvtsi2sdq %rdx, {{%xmm([0-9])}}, {{%xmm([0-9])}}\r
+; LINUXOSX64: vcvtsi2sdl %edi, {{%xmm([0-9])}}, {{%xmm([0-9])}}\r
+; LINUXOSX64: vcvtsi2sdl (%rsi), {{%xmm([0-9])}}, {{%xmm([0-9])}}\r
+; LINUXOSX64: vcvttsd2si {{%xmm([0-9])}}, %eax\r
+\r
+; Test regcall when passing/retrieving mixed types\r
+define x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 {\r
+ %8 = fpext float %1 to double\r
+ %9 = fadd double %8, %0\r
+ %10 = sitofp i8 %2 to double\r
+ %11 = fadd double %9, %10\r
+ %12 = sitofp i32 %3 to double\r
+ %13 = fadd double %11, %12\r
+ %14 = sitofp i64 %4 to double\r
+ %15 = fadd double %13, %14\r
+ %16 = sitofp i16 %5 to double\r
+ %17 = fadd double %15, %16\r
+ %18 = load i32, i32* %6, align 4\r
+ %19 = sitofp i32 %18 to double\r
+ %20 = fadd double %17, %19\r
+ %21 = fptosi double %20 to i32\r
+ ret i32 %21\r
+}\r
+\r
+%struct.complex = type { float, double, i32, i8, i64}\r
+\r
+\r
+; X32-LABEL: test_argMultiRet \r
+; X32: vaddsd {{.*}}, %xmm1, %xmm1\r
+; X32: movl $4, %eax\r
+; X32: movb $7, %cl\r
+; X32: movl $999, %edx\r
+; X32: xorl %edi, %edi\r
+; X32: retl\r
+\r
+; LINUXOSX64-LABEL: test_argMultiRet \r
+; LINUXOSX64: vaddsd {{.*}}, %xmm1, %xmm1\r
+; LINUXOSX64: movl $4, %eax\r
+; LINUXOSX64: movb $7, %cl\r
+; LINUXOSX64: movl $999, %edx\r
+; LINUXOSX64: retq\r
+ \r
+define x86_regcallcc %struct.complex @test_argMultiRet(float, double, i32, i8, i64) local_unnamed_addr #0 {\r
+ %6 = fadd double %1, 5.000000e+00\r
+ %7 = insertvalue %struct.complex undef, float %0, 0\r
+ %8 = insertvalue %struct.complex %7, double %6, 1\r
+ %9 = insertvalue %struct.complex %8, i32 4, 2\r
+ %10 = insertvalue %struct.complex %9, i8 7, 3\r
+ %11 = insertvalue %struct.complex %10, i64 999, 4\r
+ ret %struct.complex %11\r
+}\r
+\r
--- /dev/null
+; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+sse | FileCheck --check-prefix=WIN32 %s\r
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck --check-prefix=WIN64 %s\r
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck --check-prefix=LINUXOSX %s\r
+\r
+; WIN32-LABEL: test_argReti1:\r
+; WIN32: incb %al\r
+; WIN32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_argReti1:\r
+; WIN64: incb %al\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when receiving/returning i1\r
+define x86_regcallcc i1 @test_argReti1(i1 %a) {\r
+ %add = add i1 %a, 1\r
+ ret i1 %add\r
+}\r
+\r
+; WIN32-LABEL: test_CallargReti1:\r
+; WIN32: movzbl %al, %eax\r
+; WIN32: call{{.*}} {{.*}}test_argReti1\r
+; WIN32: incb %al\r
+; WIN32: ret{{.*}}\r
+\r
+; WIN64-LABEL: test_CallargReti1:\r
+; WIN64: movzbl %al, %eax\r
+; WIN64: call{{.*}} {{.*}}test_argReti1\r
+; WIN64: incb %al\r
+; WIN64: ret{{.*}}\r
+\r
+; Test regcall when passing/retrieving i1\r
+define x86_regcallcc i1 @test_CallargReti1(i1 %a) {\r
+ %b = add i1 %a, 1\r
+ %c = call x86_regcallcc i1 @test_argReti1(i1 %b)\r
+ %d = add i1 %c, 1\r
+ ret i1 %d\r
+}\r
+\r
+; WIN64-LABEL: testf32_inp\r
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill\r
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill\r
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill\r
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill\r
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}\r
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}\r
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}\r
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}\r
+; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload\r
+; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload\r
+; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload\r
+; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload\r
+; WIN64: retq\r
+\r
+; WIN32-LABEL: testf32_inp\r
+; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill\r
+; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill\r
+; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill\r
+; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill\r
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}\r
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}\r
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}\r
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}\r
+; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload\r
+; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload\r
+; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload\r
+; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload\r
+; WIN32: retl\r
+\r
+; LINUXOSX-LABEL: testf32_inp\r
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill\r
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill\r
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill\r
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill\r
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}\r
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}\r
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}\r
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}\r
+; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload\r
+; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload\r
+; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload\r
+; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload\r
+; LINUXOSX: retq\r
+\r
+;test calling conventions - input parameters, callee saved XMMs\r
+define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, <16 x float> %c) nounwind {\r
+ %x1 = fadd <16 x float> %a, %b\r
+ %x2 = fmul <16 x float> %a, %b\r
+ %x3 = fsub <16 x float> %x1, %x2\r
+ %x4 = fadd <16 x float> %x3, %c\r
+ ret <16 x float> %x4\r
+}\r
+\r
+; WIN32-LABEL: testi32_inp\r
+; WIN32: pushl {{%e(si|di|bx|bp)}}\r
+; WIN32: pushl {{%e(si|di|bx|bp)}}\r
+; WIN32: pushl {{%e(si|di|bx|bp)}}\r
+; WIN32: pushl {{%e(si|di|bx|bp)}}\r
+; WIN32: popl {{%e(si|di|bx|bp)}}\r
+; WIN32: popl {{%e(si|di|bx|bp)}}\r
+; WIN32: popl {{%e(si|di|bx|bp)}}\r
+; WIN32: popl {{%e(si|di|bx|bp)}}\r
+; WIN32: retl\r
+\r
+; WIN64-LABEL: testi32_inp\r
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: popq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: popq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: popq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: popq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: popq {{%r(bp|bx|1[0-5])}}\r
+; WIN64: retq\r
+\r
+; LINUXOSX-LABEL: testi32_inp\r
+; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}\r
+; LINUXOSX: retq\r
+\r
+;test calling conventions - input parameters, callee saved GPRs\r
+define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6,\r
+ i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {\r
+ %x1 = sub i32 %a1, %a2\r
+ %x2 = sub i32 %a3, %a4\r
+ %x3 = sub i32 %a5, %a6\r
+ %y1 = sub i32 %b1, %b2\r
+ %y2 = sub i32 %b3, %b4\r
+ %y3 = sub i32 %b5, %b6\r
+ %v1 = add i32 %a1, %a2\r
+ %v2 = add i32 %a3, %a4\r
+ %v3 = add i32 %a5, %a6\r
+ %w1 = add i32 %b1, %b2\r
+ %w2 = add i32 %b3, %b4\r
+ %w3 = add i32 %b5, %b6\r
+ %s1 = mul i32 %x1, %y1\r
+ %s2 = mul i32 %x2, %y2\r
+ %s3 = mul i32 %x3, %y3\r
+ %t1 = mul i32 %v1, %w1\r
+ %t2 = mul i32 %v2, %w2\r
+ %t3 = mul i32 %v3, %w3\r
+ %m1 = add i32 %s1, %s2\r
+ %m2 = add i32 %m1, %s3\r
+ %n1 = add i32 %t1, %t2\r
+ %n2 = add i32 %n1, %t3\r
+ %r1 = add i32 %m2, %n2\r
+ ret i32 %r1\r
+}\r
+\r
+; X32: testf32_stack\r
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)\r
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)\r
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)\r
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}\r
+\r
+; LINUXOSX: testf32_stack\r
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}\r
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}\r
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}\r
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}\r
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}\r
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}\r
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}\r
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}\r
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}\r
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}\r
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}\r
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}\r
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}\r
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}\r
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}\r
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}\r
+; LINUXOSX: retq\r
+\r
+; Test that parameters, overflowing register capacity, are passed through the stack\r
+define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {\r
+ %x1 = fadd <32 x float> %a, %b\r
+ %x2 = fadd <32 x float> %x1, %c\r
+ ret <32 x float> %x2\r
+}\r