From a06b09b70dece85f44b060e6959bd340c92c0baf Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 11 Mar 2019 04:36:59 +0000 Subject: [PATCH] [X86] Split SCALEF(S) ISD opcodes into a version without rounding mode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355806 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 38 ++++++++------------- lib/Target/X86/X86ISelLowering.h | 4 +-- lib/Target/X86/X86InstrAVX512.td | 45 ++++++++++++------------- lib/Target/X86/X86InstrFragmentsSIMD.td | 6 ++-- lib/Target/X86/X86IntrinsicsInfo.h | 29 ++++++++-------- 5 files changed, 56 insertions(+), 66 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index df7f8f07323..d53bce56bb2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -22111,34 +22111,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), Mask, passThru, Subtarget, DAG); } - case INTR_TYPE_SCALAR_MASK_RM: { - assert(Op.getNumOperands() == 6 && "Unexpected number of operands"); - SDValue Src1 = Op.getOperand(1); - SDValue Src2 = Op.getOperand(2); - SDValue Src0 = Op.getOperand(3); - SDValue Mask = Op.getOperand(4); - SDValue Rnd = Op.getOperand(5); - return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, - Rnd), - Mask, Src0, Subtarget, DAG); - } - case INTR_TYPE_2OP_MASK_RM: { + case INTR_TYPE_2OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); - // We specify 2 possible modes for intrinsics, with/without rounding - // modes. - // First, we check if the intrinsic have rounding mode (6 operands), - // if not, we set rounding mode to "current". - SDValue Rnd; - if (Op.getNumOperands() == 6) - Rnd = Op.getOperand(5); - else - Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, - Src1, Src2, Rnd), - Mask, PassThru, Subtarget, DAG); + SDValue NewOp; + if (IntrData->Opc1 != 0) { + SDValue Rnd = Op.getOperand(5); + if (isRoundModeSAEToX(Rnd)) + NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, Rnd); + else if (!isRoundModeCurDirection(Rnd)) + return SDValue(); + } + if (!NewOp) + NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); + return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_2OP_MASK_SAE: { SDValue Src1 = Op.getOperand(1); @@ -27776,7 +27764,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FGETEXPS: return "X86ISD::FGETEXPS"; case X86ISD::FGETEXPS_SAE: return "X86ISD::FGETEXPS_SAE"; case X86ISD::SCALEF: return "X86ISD::SCALEF"; + case X86ISD::SCALEF_RND: return "X86ISD::SCALEF_RND"; case X86ISD::SCALEFS: return "X86ISD::SCALEFS"; + case X86ISD::SCALEFS_RND: return "X86ISD::SCALEFS_RND"; case X86ISD::AVG: return "X86ISD::AVG"; case X86ISD::MULHRS: return "X86ISD::MULHRS"; case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index c78eea1ac07..bc32130d071 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -224,8 +224,8 @@ namespace llvm { // Extract Normalized Mantissas. VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE, // FP Scale. - SCALEF, - SCALEFS, + SCALEF, SCALEF_RND, + SCALEFS, SCALEFS_RND, // Unsigned Integer average. AVG, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index c076e12265a..910f7f9a401 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5790,20 +5790,19 @@ multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, defm rr: AVX512_maskable, + (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, EVEX_4V, Sched<[sched]>; defm rm: AVX512_maskable, + (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb: AVX512_maskable, + (_.ScalarLdFrag addr:$src2))))>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5814,46 +5813,46 @@ multiclass avx512_fp_scalef_scalar opc, string OpcodeStr, SDNode OpNode, defm rr: AVX512_maskable_scalar, + (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, Sched<[sched]>; defm rm: AVX512_maskable_scalar, + (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, - SDNode OpNode, SDNode OpNodeScal, X86SchedWriteWidths sched> { - defm PSZ : avx512_fp_scalef_p, - avx512_fp_round_packed, + defm PSZ : avx512_fp_scalef_p, + avx512_fp_round_packed, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_scalef_p, - avx512_fp_round_packed, + defm PDZ : avx512_fp_scalef_p, + avx512_fp_round_packed, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm SSZ : avx512_fp_scalef_scalar, - avx512_fp_scalar_round, - EVEX_4V,EVEX_CD8<32, CD8VT1>; - defm SDZ : avx512_fp_scalef_scalar, - avx512_fp_scalar_round, - EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; + defm SSZ : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V,EVEX_CD8<32, CD8VT1>; + defm SDZ : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { - defm PSZ128 : avx512_fp_scalef_p, + defm PSZ128 : avx512_fp_scalef_p, EVEX_V128, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp_scalef_p, + defm PSZ256 : avx512_fp_scalef_p, EVEX_V256, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp_scalef_p, + defm PDZ128 : avx512_fp_scalef_p, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp_scalef_p, + defm PDZ256 : avx512_fp_scalef_p, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs, +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 8de2ebdf374..c105e0f02f5 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -465,8 +465,10 @@ def X86fmaxSAE : SDNode<"X86ISD::FMAX_SAE", SDTFPBinOp>; def X86fmaxSAEs : SDNode<"X86ISD::FMAXS_SAE", SDTFPBinOp>; def X86fminSAE : SDNode<"X86ISD::FMIN_SAE", SDTFPBinOp>; def X86fminSAEs : SDNode<"X86ISD::FMINS_SAE", SDTFPBinOp>; -def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>; -def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOpRound>; +def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOp>; +def X86scalefRnd : SDNode<"X86ISD::SCALEF_RND", SDTFPBinOpRound>; +def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOp>; +def X86scalefsRnd: SDNode<"X86ISD::SCALEFS_RND", SDTFPBinOpRound>; def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; def X86fsqrts : SDNode<"X86ISD::FSQRTS", SDTFPBinOp>; def X86fsqrtRnds : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 94846c89432..92be4bebbb9 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -26,11 +26,10 @@ enum IntrinsicType : uint16_t { CVTPD2PS_MASK, INTR_TYPE_1OP_SAE, INTR_TYPE_2OP_SAE, INTR_TYPE_1OP_MASK_SAE, INTR_TYPE_2OP_MASK_SAE, INTR_TYPE_3OP_MASK_SAE, - INTR_TYPE_1OP_MASK, - INTR_TYPE_2OP_MASK_RM, + INTR_TYPE_1OP_MASK, INTR_TYPE_2OP_MASK, IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_SAE, INTR_TYPE_SCALAR_MASK_RND, - INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_SAE, + INTR_TYPE_3OP_SCALAR_MASK_SAE, COMPRESS_EXPAND_IN_REG, TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2DQ_MASK, CVTQQ2PS_MASK, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, @@ -756,22 +755,22 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_SAE), X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK, X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_SAE), - X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM, + X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0), - X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM, + X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0), - X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK_RM, + X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK, + X86ISD::SCALEF, X86ISD::SCALEF_RND), + X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0), - X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK_RM, + X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0), - X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM, - X86ISD::SCALEF, 0), - X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM, - X86ISD::SCALEF, 0), - X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::SCALEFS, 0), - X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::SCALEFS, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK, + X86ISD::SCALEF, X86ISD::SCALEF_RND), + X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK, + X86ISD::SCALEFS, X86ISD::SCALEFS_RND), + X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK, + X86ISD::SCALEFS, X86ISD::SCALEFS_RND), X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FSQRTS, X86ISD::FSQRTS_RND), X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK, -- 2.40.0