From: Craig Topper Date: Wed, 22 Nov 2017 07:11:03 +0000 (+0000) Subject: [X86] Lower all ISD::MGATHER nodes to X86ISD:MGATHER. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fa5b169130d3175f4ae2e34d02ef81ae3ccd0d23;p=llvm [X86] Lower all ISD::MGATHER nodes to X86ISD:MGATHER. Now we consistently represent the mask result without relying on isel ignoring it. We now have a more general SDNode and type constraints to represent these nodes in isel patterns. This allows us to present both both vXi1 and XMM/YMM mask types with a single set of constraints. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318821 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 320a9a17659..b3afe081834 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -971,8 +971,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // Special handling for masked gather of 2 elements - if (Subtarget.hasAVX2() && !Subtarget.hasAVX512()) - setOperationAction(ISD::MGATHER, MVT::v2i64, Custom); + if (Subtarget.hasAVX2()) { + for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, + MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) + setOperationAction(ISD::MGATHER, VT, Custom); + } if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) { bool HasInt256 = Subtarget.hasInt256(); @@ -1381,10 +1384,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Custom lower several nodes. for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, - MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { - setOperationAction(ISD::MGATHER, VT, Custom); + MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) setOperationAction(ISD::MSCATTER, VT, Custom); - } setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v1i1, Legal); @@ -1408,7 +1409,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); setOperationAction(ISD::MLOAD, VT, Legal); setOperationAction(ISD::MSTORE, VT, Legal); - setOperationAction(ISD::MGATHER, VT, Legal); + setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); } for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) { @@ -24332,10 +24333,11 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, // the vector contains 8 elements, we just sign-extend the index if (NumElts == 8) { Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), - N->getOperand(3), Index }; - DAG.UpdateNodeOperands(N, Ops); - return Op; + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue NewGather = DAG.getTargetMemSDNode( + DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(), + N->getMemOperand()); + return DAG.getMergeValues({NewGather, NewGather.getValue(2)}, dl); } // Minimal number of elements in Gather @@ -24359,13 +24361,13 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, Src0 = ExtendToType(Src0, NewVT, DAG); SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; - SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other), - N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + SDValue NewGather = DAG.getTargetMemSDNode( + DAG.getVTList(NewVT, MaskBitVT, MVT::Other), Ops, dl, N->getMemoryVT(), + N->getMemOperand()); SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewGather.getValue(0), DAG.getIntPtrConstant(0, dl)); - SDValue RetOps[] = {Extract, NewGather.getValue(1)}; + SDValue RetOps[] = {Extract, NewGather.getValue(2)}; return DAG.getMergeValues(RetOps, dl); } if (N->getMemoryVT() == MVT::v2i32) { @@ -24386,23 +24388,27 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, if (Subtarget.hasVLX()) { Mask = ExtendToType(Mask, MVT::v4i1, DAG, false); VTList = DAG.getVTList(MVT::v4i32, MVT::v2i1, MVT::Other); - } - else { + } else { Mask = DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Mask), DAG.getUNDEF(MVT::v4i32), {0, 2, -1, -1}); - VTList = DAG.getVTList(MVT::v4i32, MVT::Other); - } + VTList = DAG.getVTList(MVT::v4i32, MVT::v4i32, MVT::Other); + } SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; SDValue NewGather = DAG.getTargetMemSDNode( VTList, Ops, dl, N->getMemoryVT(), N->getMemOperand()); SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64, NewGather.getValue(0), DAG); - SDValue RetOps[] = { Sext, NewGather.getValue(1) }; + SDValue RetOps[] = { Sext, NewGather.getValue(2) }; return DAG.getMergeValues(RetOps, dl); } - return Op; + + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue NewGather = DAG.getTargetMemSDNode( + DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(), + N->getMemOperand()); + return DAG.getMergeValues({NewGather, NewGather.getValue(2)}, dl); } SDValue X86TargetLowering::LowerGC_TRANSITION_START(SDValue Op, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index f9b33bac9fa..6a87a264750 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -8241,7 +8241,7 @@ let Predicates = [HasVLX] in { defm NAME##D##SUFF##Z128: avx512_gather, EVEX_V128; defm NAME##Q##SUFF##Z128: avx512_gather, + vx64xmem, mgatherv2i64, VK2WM>, EVEX_V128; } } diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 263babd09c5..c38b2c730c7 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -773,54 +773,44 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; -// Hack because we can't write a tablegen pattern that requires the type -// of result 1 to be checked. So explicitly force the mask to v2i1. -def X86masked_gatherv2i64 : SDNode<"X86ISD::MGATHER", - SDTypeProfile<2, 3, [SDTCisVec<0>, - SDTCisVT<1, v2i1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<1, 3>, - SDTCisPtrTy<4>]>, +def X86masked_gather : SDNode<"X86ISD::MGATHER", + SDTypeProfile<2, 3, [SDTCisVec<0>, + SDTCisVec<1>, SDTCisInt<1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<1, 3>, + SDTCisPtrTy<4>]>, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def X86masked_gather : SDNode<"X86ISD::MGATHER", SDTMaskedGather, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); + (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ + X86MaskedGatherSDNode *Mgt = cast(N); return Mgt->getIndex().getValueType() == MVT::v4i32; }]>; def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); + (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ + X86MaskedGatherSDNode *Mgt = cast(N); return Mgt->getIndex().getValueType() == MVT::v8i32; }]>; def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); - return Mgt->getIndex().getValueType() == MVT::v2i64; -}]>; -def X86mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_gatherv2i64 node:$src1, node:$src2, node:$src3) , [{ + (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ X86MaskedGatherSDNode *Mgt = cast(N); - return Mgt->getIndex().getValueType() == MVT::v2i64 && - (Mgt->getMemoryVT() == MVT::v2i32 || Mgt->getMemoryVT() == MVT::v2f32); + return Mgt->getIndex().getValueType() == MVT::v2i64; }]>; def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); + (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ + X86MaskedGatherSDNode *Mgt = cast(N); return Mgt->getIndex().getValueType() == MVT::v4i64; }]>; def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); + (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ + X86MaskedGatherSDNode *Mgt = cast(N); return Mgt->getIndex().getValueType() == MVT::v8i64; }]>; def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); + (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ + X86MaskedGatherSDNode *Mgt = cast(N); return Mgt->getIndex().getValueType() == MVT::v16i32; }]>; @@ -1123,70 +1113,3 @@ def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3) (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; - -// AVX2 special nodes -// masked gather of AVX2 where mask elements are i32 -def avx2_x86_masked_gather : SDNode<"X86ISD::MGATHER", - SDTypeProfile<2, 3, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<1, 3>, - SDTCisPtrTy<4>, SDTCisInt<1>, SDTCisSameSizeAs<0, 1>, - SDTCisSameNumEltsAs<0, 1>]>, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - -def avx2_masked_gather : SDNode<"ISD::MGATHER", - SDTypeProfile<2, 3, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<1, 3>, - SDTCisPtrTy<4>, SDTCisInt<1>, SDTCisSameSizeAs<0, 1>, - SDTCisSameNumEltsAs<0, 1>]>, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - -// dword gathers -def avx2_mvpgatherdd_ps_xmm : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (avx2_masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); - return Mgt->getIndex().getValueType() == MVT::v4i32; -}]>; - -def avx2_mvpgatherqd_ps_xmm : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (avx2_x86_masked_gather node:$src1, node:$src2, node:$src3) , [{ - X86MaskedGatherSDNode *Mgt = cast(N); - return Mgt->getIndex().getValueType() == MVT::v2i64; -}]>; - -def avx2_mvpgatherdd_ps_ymm : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (avx2_masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); - return Mgt->getIndex().getValueType() == MVT::v8i32; -}]>; - -def avx2_mvpgatherqd_ps_ymm : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (avx2_masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); - return Mgt->getIndex().getValueType() == MVT::v4i64; -}]>; - -// qwords -def avx2_mvpgatherdq_pd_xmm : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (avx2_masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); - return Mgt->getIndex().getValueType() == MVT::v2i32; -}]>; - -def avx2_mvpgatherqq_pd_xmm : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (avx2_masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = dyn_cast(N); - return Mgt->getIndex().getValueType() == MVT::v2i64 && - Mgt->getMemoryVT().is128BitVector(); -}]>; - -def avx2_mvpgatherdq_pd_ymm : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (avx2_masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); - return Mgt->getIndex().getValueType() == MVT::v4i32; -}]>; - -def avx2_mvpgatherqq_pd_ymm : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (avx2_masked_gather node:$src1, node:$src2, node:$src3) , [{ - MaskedGatherSDNode *Mgt = cast(N); - return Mgt->getIndex().getValueType() == MVT::v4i64; -}]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 9d7b17c0453..daab153bb2c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8362,20 +8362,21 @@ let Predicates = [HasAVX2, NoVLX] in { multiclass avx2_gather opc, string OpcodeStr, ValueType VTx, ValueType VTy, PatFrag GatherNode128, PatFrag GatherNode256, RegisterClass RC256, - X86MemOperand memop128, X86MemOperand memop256> { + X86MemOperand memop128, X86MemOperand memop256, + ValueType MTx = VTx, ValueType MTy = VTy> { def rm : AVX28I, VEX; def Yrm : AVX28I, VEX, VEX_L; } @@ -8383,27 +8384,31 @@ let Predicates = [UseAVX2] in { let mayLoad = 1, hasSideEffects = 0, Constraints = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" in { - defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64, avx2_mvpgatherdq_pd_xmm, - avx2_mvpgatherdq_pd_ymm, VR256, vx128mem, vx256mem>, VEX_W; - defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64, avx2_mvpgatherqq_pd_xmm, - avx2_mvpgatherqq_pd_ymm, VR256, vx128mem, vy256mem>, VEX_W; - defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32, avx2_mvpgatherdd_ps_xmm, - avx2_mvpgatherdd_ps_ymm, VR256, vx128mem, vy256mem>; - defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32, avx2_mvpgatherqd_ps_xmm, - avx2_mvpgatherqd_ps_ymm, VR128, vx64mem, vy128mem>; + defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64, mgatherv4i32, + mgatherv4i32, VR256, vx128mem, vx256mem>, VEX_W; + defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64, mgatherv2i64, + mgatherv4i64, VR256, vx128mem, vy256mem>, VEX_W; + defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32, mgatherv4i32, + mgatherv8i32, VR256, vx128mem, vy256mem>; + defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32, mgatherv2i64, + mgatherv4i64, VR128, vx64mem, vy128mem>; let ExeDomain = SSEPackedDouble in { - defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64, avx2_mvpgatherdq_pd_xmm, - avx2_mvpgatherdq_pd_ymm, VR256, vx128mem, vx256mem>, VEX_W; - defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64, avx2_mvpgatherqq_pd_xmm, - avx2_mvpgatherqq_pd_ymm, VR256, vx128mem, vy256mem>, VEX_W; + defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64, mgatherv4i32, + mgatherv4i32, VR256, vx128mem, vx256mem, + v2i64, v4i64>, VEX_W; + defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64, mgatherv2i64, + mgatherv4i64, VR256, vx128mem, vy256mem, + v2i64, v4i64>, VEX_W; } let ExeDomain = SSEPackedSingle in { - defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32, avx2_mvpgatherdd_ps_xmm, - avx2_mvpgatherdd_ps_ymm, VR256, vx128mem, vy256mem>; - defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32, avx2_mvpgatherqd_ps_xmm, - avx2_mvpgatherqd_ps_ymm, VR128, vx64mem, vy128mem>; + defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32, mgatherv4i32, + mgatherv8i32, VR256, vx128mem, vy256mem, + v4i32, v8i32>; + defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32, mgatherv2i64, + mgatherv4i64, VR128, vx64mem, vy128mem, + v4i32, v4i32>; } } }