From a88cb1d663837a501e2fc5abd800796875c8cf1c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 11 Mar 2019 04:36:57 +0000 Subject: [PATCH] [X86] Split RCP28/RSQRT/GETEXP/EXP2 ISD opcodes into SAE and current direction nodes. Remove rounding mode operand. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355805 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 29 +++----- lib/Target/X86/X86ISelLowering.h | 5 +- lib/Target/X86/X86InstrAVX512.td | 88 +++++++++++++------------ lib/Target/X86/X86InstrFragmentsSIMD.td | 21 ++++-- lib/Target/X86/X86IntrinsicsInfo.h | 54 +++++++-------- 5 files changed, 99 insertions(+), 98 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 22a9e0c2002..df7f8f07323 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -21999,24 +21999,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case INTR_TYPE_4OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), Op.getOperand(4)); - case INTR_TYPE_1OP_MASK_RM: { - SDValue Src = Op.getOperand(1); - SDValue PassThru = Op.getOperand(2); - SDValue Mask = Op.getOperand(3); - SDValue RoundingMode; - // We always add rounding mode to the Node. - // If the rounding mode is not specified, we add the - // "current direction" mode. - if (Op.getNumOperands() == 4) - RoundingMode = - DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); - else - RoundingMode = Op.getOperand(4); - assert(IntrData->Opc1 == 0 && "Unexpected second opcode!"); - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, - RoundingMode), - Mask, PassThru, Subtarget, DAG); - } case INTR_TYPE_1OP_MASK: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); @@ -27763,12 +27745,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::RCP14: return "X86ISD::RCP14"; case X86ISD::RCP14S: return "X86ISD::RCP14S"; case X86ISD::RCP28: return "X86ISD::RCP28"; + case X86ISD::RCP28_SAE: return "X86ISD::RCP28_SAE"; case X86ISD::RCP28S: return "X86ISD::RCP28S"; + case X86ISD::RCP28S_SAE: return "X86ISD::RCP28S_SAE"; case X86ISD::EXP2: return "X86ISD::EXP2"; + case X86ISD::EXP2_SAE: return "X86ISD::EXP2_SAE"; case X86ISD::RSQRT14: return "X86ISD::RSQRT14"; case X86ISD::RSQRT14S: return "X86ISD::RSQRT14S"; case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; + case X86ISD::RSQRT28_SAE: return "X86ISD::RSQRT28_SAE"; case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S"; + case X86ISD::RSQRT28S_SAE: return "X86ISD::RSQRT28S_SAE"; case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; case X86ISD::FADDS: return "X86ISD::FADDS"; case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND"; @@ -27784,8 +27771,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; case X86ISD::FSQRTS: return "X86ISD::FSQRTS"; case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND"; - case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; - case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND"; + case X86ISD::FGETEXP: return "X86ISD::FGETEXP"; + case X86ISD::FGETEXP_SAE: return "X86ISD::FGETEXP_SAE"; + case X86ISD::FGETEXPS: return "X86ISD::FGETEXPS"; + case X86ISD::FGETEXPS_SAE: return "X86ISD::FGETEXPS_SAE"; case X86ISD::SCALEF: return "X86ISD::SCALEF"; case X86ISD::SCALEFS: return "X86ISD::SCALEFS"; case X86ISD::AVG: return "X86ISD::AVG"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6cd695f9219..c78eea1ac07 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -220,7 +220,7 @@ namespace llvm { FSQRT_RND, FSQRTS, FSQRTS_RND, // FP vector get exponent. - FGETEXP_RND, FGETEXPS_RND, + FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE, // Extract Normalized Mantissas. VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE, // FP Scale. @@ -559,7 +559,8 @@ namespace llvm { XTEST, // ERI instructions. - RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2, + RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE, + RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE, // Conversions between float and half-float. CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 2773d679ecb..c076e12265a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -9125,47 +9125,45 @@ defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, - SDNode OpNode, X86FoldableSchedWrite sched> { + SDNode OpNode, SDNode OpNodeSAE, + X86FoldableSchedWrite sched> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable_scalar, + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, Sched<[sched]>; defm rb : AVX512_maskable_scalar, EVEX_B, - Sched<[sched]>; + (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, + EVEX_B, Sched<[sched]>; defm m : AVX512_maskable_scalar, + (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched> { - defm SSZ : avx512_fp28_s, - EVEX_CD8<32, CD8VT1>; - defm SDZ : avx512_fp28_s, - EVEX_CD8<64, CD8VT1>, VEX_W; + SDNode OpNodeSAE, X86FoldableSchedWrite sched> { + defm SSZ : avx512_fp28_s, EVEX_CD8<32, CD8VT1>; + defm SDZ : avx512_fp28_s, EVEX_CD8<64, CD8VT1>, VEX_W; } let Predicates = [HasERI] in { - defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>, - T8PD, EVEX_4V; - defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, + defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, + SchedWriteFRcp.Scl>, T8PD, EVEX_4V; + defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; } -defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, SchedWriteFRnd.Scl>, T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd @@ -9174,42 +9172,40 @@ multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable, + (OpNode (_.VT _.RC:$src))>, Sched<[sched]>; defm m : AVX512_maskable, + (bitconvert (_.LdFrag addr:$src))))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb : AVX512_maskable, EVEX_B, - Sched<[sched.Folded, sched.ReadAfterFold]>; + (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, + EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } -multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, +multiclass avx512_fp28_p_sae opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, X86FoldableSchedWrite sched> { let ExeDomain = _.ExeDomain in defm rb : AVX512_maskable, + (OpNode (_.VT _.RC:$src))>, EVEX_B, Sched<[sched]>; } multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode, - X86SchedWriteWidths sched> { + SDNode OpNodeSAE, X86SchedWriteWidths sched> { defm PSZ : avx512_fp28_p, - avx512_fp28_p_round, + avx512_fp28_p_sae, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp28_p, - avx512_fp28_p_round, + avx512_fp28_p_sae, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } @@ -9217,24 +9213,32 @@ multiclass avx512_fp_unaryop_packed opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { - defm PSZ128 : avx512_fp28_p, - EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp28_p, - EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp28_p, - EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp28_p, - EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + defm PSZ128 : avx512_fp28_p, + EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp28_p, + EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp28_p, + EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp28_p, + EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; } } let Predicates = [HasERI] in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX; -} -defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>, - avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd, + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, + SchedWriteFRsqrt>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, + SchedWriteFRcp>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, + SchedWriteFAdd>, EVEX; +} +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, + SchedWriteFRnd>, + avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>, EVEX; multiclass avx512_sqrt_packed_round opc, string OpcodeStr, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index c3a9c68ce2b..8de2ebdf374 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -470,8 +470,10 @@ def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOpRound>; def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; def X86fsqrts : SDNode<"X86ISD::FSQRTS", SDTFPBinOp>; def X86fsqrtRnds : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>; -def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; -def X86fgetexpRnds : SDNode<"X86ISD::FGETEXPS_RND", SDTFPBinOpRound>; +def X86fgetexp : SDNode<"X86ISD::FGETEXP", SDTFPUnaryOp>; +def X86fgetexpSAE : SDNode<"X86ISD::FGETEXP_SAE", SDTFPUnaryOp>; +def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>; +def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>; def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>; @@ -503,14 +505,19 @@ def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>; def X86Vpdpwssd : SDNode<"X86ISD::VPDPWSSD", SDTVnni>; def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>; -def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>; -def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>; -def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOpRound>; +def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOp>; +def X86rsqrt28SAE: SDNode<"X86ISD::RSQRT28_SAE", SDTFPUnaryOp>; +def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOp>; +def X86rcp28SAE : SDNode<"X86ISD::RCP28_SAE", SDTFPUnaryOp>; +def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOp>; +def X86exp2SAE : SDNode<"X86ISD::EXP2_SAE", SDTFPUnaryOp>; def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>; def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>; -def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOpRound>; -def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOpRound>; +def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOp>; +def X86rsqrt28SAEs : SDNode<"X86ISD::RSQRT28S_SAE", SDTFPBinOp>; +def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOp>; +def X86rcp28SAEs : SDNode<"X86ISD::RCP28S_SAE", SDTFPBinOp>; def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImm>; def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImm>; def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImm>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index e9c07e7377f..94846c89432 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -26,7 +26,7 @@ enum IntrinsicType : uint16_t { CVTPD2PS_MASK, INTR_TYPE_1OP_SAE, INTR_TYPE_2OP_SAE, INTR_TYPE_1OP_MASK_SAE, INTR_TYPE_2OP_MASK_SAE, INTR_TYPE_3OP_MASK_SAE, - INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, + INTR_TYPE_1OP_MASK, INTR_TYPE_2OP_MASK_RM, IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_SAE, INTR_TYPE_SCALAR_MASK_RND, @@ -445,8 +445,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_dbpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0), X86_INTRINSIC_DATA(avx512_div_pd_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND), - X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), - X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), + X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE), + X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE), X86_INTRINSIC_DATA(avx512_fpclass_pd_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_fpclass_pd_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_fpclass_pd_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0), @@ -586,22 +586,22 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_fixupimm_ss, FIXUPIMM, X86ISD::VFIXUPIMMS, X86ISD::VFIXUPIMMS_SAE), X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASSS, 0), X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASSS, 0), - X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM, - X86ISD::FGETEXP_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM, - X86ISD::FGETEXP_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_RM, - X86ISD::FGETEXP_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM, - X86ISD::FGETEXP_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM, - X86ISD::FGETEXP_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM, - X86ISD::FGETEXP_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FGETEXPS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FGETEXPS_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK, + X86ISD::FGETEXP, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK, + X86ISD::FGETEXP, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_SAE, + X86ISD::FGETEXP, X86ISD::FGETEXP_SAE), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK, + X86ISD::FGETEXP, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK, + X86ISD::FGETEXP, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_SAE, + X86ISD::FGETEXP, X86ISD::FGETEXP_SAE), + X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_SAE, + X86ISD::FGETEXPS, X86ISD::FGETEXPS_SAE), + X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_SAE, + X86ISD::FGETEXPS, X86ISD::FGETEXPS_SAE), X86_INTRINSIC_DATA(avx512_mask_getmant_pd_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VGETMANT, 0), X86_INTRINSIC_DATA(avx512_mask_getmant_pd_256, INTR_TYPE_2OP_MASK_SAE, @@ -895,10 +895,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0), X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0), - X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), - X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), - X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0), - X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0), + X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE), + X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE), + X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE), + X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE), X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), @@ -907,10 +907,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0), - X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), - X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), - X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0), - X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0), + X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE), + X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE), + X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE), + X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE), X86_INTRINSIC_DATA(avx512_sitofp_round, INTR_TYPE_1OP, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_sqrt_pd_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND), X86_INTRINSIC_DATA(avx512_sqrt_ps_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND), -- 2.40.0