From 2fa91afc4677766e7c29d1784bc5575e9cb791ac Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 14 May 2015 09:04:45 +0000 Subject: [PATCH] AVX-512: Added i1 type handling for calling conventions. i1 type is a legal type on AVX-512 and can be passed as parameter or return value. i1 is promoted to i8 on return and to i32 for call arguments (i8 is also promoted to i32 here). The result code is similar to the previous X86 targets, where i1 is allways promoted to i8. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237350 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CallingConv.td | 29 ++++++----- lib/Target/X86/X86ISelLowering.cpp | 23 ++++++--- test/CodeGen/X86/avx512-calling-conv.ll | 69 ++++++++++++++++++++++++- 3 files changed, 99 insertions(+), 22 deletions(-) diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 203dc3efa3e..790160662ef 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -34,6 +34,7 @@ def RetCC_X86Common : CallingConv<[ // // For code that doesn't care about the ABI, we allow returning more than two // integer values in registers. + CCIfType<[i1], CCPromoteToType>, CCIfType<[i8] , CCAssignToReg<[AL, DL, CL]>>, CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>, CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>, @@ -251,8 +252,8 @@ def CC_X86_64_C : CallingConv<[ // Handles byval parameters. CCIfByVal>, - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType>, + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, // The 'nest' parameter, if any, is passed in R10. CCIfNest>>, @@ -323,8 +324,8 @@ def CC_X86_Win64_C : CallingConv<[ // FIXME: Handle byval stuff. // FIXME: Handle varargs. - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType>, + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, // The 'nest' parameter, if any, is passed in R10. CCIfNest>, @@ -511,8 +512,8 @@ def CC_X86_32_Common : CallingConv<[ CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>; def CC_X86_32_C : CallingConv<[ - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType>, + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, // The 'nest' parameter, if any, is passed in ECX. CCIfNest>, @@ -526,8 +527,8 @@ def CC_X86_32_C : CallingConv<[ ]>; def CC_X86_32_FastCall : CallingConv<[ - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType>, + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, // The 'nest' parameter, if any, is passed in EAX. CCIfNest>, @@ -572,15 +573,15 @@ def CC_X86_32_ThisCall_Common : CallingConv<[ ]>; def CC_X86_32_ThisCall_Mingw : CallingConv<[ - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType>, + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, CCDelegateTo ]>; def CC_X86_32_ThisCall_Win : CallingConv<[ - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType>, + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, // Pass sret arguments indirectly through stack. CCIfSRet>, @@ -599,8 +600,8 @@ def CC_X86_32_FastCC : CallingConv<[ // puts arguments in registers. CCIfByVal>, - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType>, + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, // The 'nest' parameter, if any, is passed in EAX. CCIfNest>, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 30cb2238afa..2b5c60019e3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1941,11 +1941,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, else if (VA.getLocInfo() == CCValAssign::ZExt) ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); else if (VA.getLocInfo() == CCValAssign::AExt) { - if (ValVT.getScalarType() == MVT::i1) + if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1) ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); else ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); - } + } else if (VA.getLocInfo() == CCValAssign::BCvt) ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy); @@ -2133,6 +2133,9 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // This truncation won't change the value. DAG.getIntPtrConstant(1, dl)); + if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1) + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + InFlag = Chain.getValue(2); InVals.push_back(Val); } @@ -2248,7 +2251,10 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // If value is passed by pointer we have address passed instead of the value // itself. - if (VA.getLocInfo() == CCValAssign::Indirect) + bool ExtendedInMem = VA.isExtInLoc() && + VA.getValVT().getScalarType() == MVT::i1; + + if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) ValVT = VA.getLocVT(); else ValVT = VA.getValVT(); @@ -2266,9 +2272,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - return DAG.getLoad(ValVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); + SDValue Val = DAG.getLoad(ValVT, dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); + return ExtendedInMem ? + DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val; } } @@ -2857,7 +2865,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); break; case CCValAssign::AExt: - if (Arg.getValueType().getScalarType() == MVT::i1) + if (Arg.getValueType().isVector() && + Arg.getValueType().getScalarType() == MVT::i1) Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); else if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. diff --git a/test/CodeGen/X86/avx512-calling-conv.ll b/test/CodeGen/X86/avx512-calling-conv.ll index d18fd7e175e..edb6bef1a4a 100644 --- a/test/CodeGen/X86/avx512-calling-conv.ll +++ b/test/CodeGen/X86/avx512-calling-conv.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL +; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL_X32 ; KNL-LABEL: test1 ; KNL: vxorps @@ -85,3 +85,70 @@ define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) { %res = sext <4 x i1>%resi to <4 x i32> ret <4 x i32> %res } + +; SKX-LABEL: test7a +; SKX: call +; SKX: vpmovw2m %xmm0, %k0 +; SKX: kandb +define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) { + %cmpRes = icmp sgt <8 x i32>%a, %b + %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes) + %res = and <8 x i1>%resi, + ret <8 x i1> %res +} + + +; KNL_X32-LABEL: test8 +; KNL_X32: testb $1, 4(%esp) +; KNL_X32:jne + +; KNL-LABEL: test8 +; KNL: testb $1, %dil +; KNL:jne + +define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) { + %res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2 + ret <16 x i8> %res +} + +; KNL-LABEL: test9 +; KNL: vucomisd +; KNL: setb +define i1 @test9(double %a, double %b) { + %c = fcmp ugt double %a, %b + ret i1 %c +} + +; KNL_X32-LABEL: test10 +; KNL_X32: testb $1, 12(%esp) +; KNL_X32: cmovnel + +; KNL-LABEL: test10 +; KNL: testb $1, %dl +; KNL: cmovel +define i32 @test10(i32 %a, i32 %b, i1 %cond) { + %c = select i1 %cond, i32 %a, i32 %b + ret i32 %c +} + +; KNL-LABEL: test11 +; KNL: cmp +; KNL: setg +define i1 @test11(i32 %a, i32 %b) { + %c = icmp sgt i32 %a, %b + ret i1 %c +} + +; KNL-LABEL: test12 +; KNL: callq _test11 +;; return value in %al +; KNL: movzbl %al, %ebx +; KNL: callq _test10 +; KNL: testb $1, %bl + +define i32 @test12(i32 %a1, i32 %a2, i32 %b1) { + %cond = call i1 @test11(i32 %a1, i32 %b1) + %res = call i32 @test10(i32 %a1, i32 %a2, i1 %cond) + %res1 = select i1 %cond, i32 %res, i32 0 + ret i32 %res1 +} \ No newline at end of file -- 2.40.0