From a00be16cd8ce3284450837c483a221927f46ca66 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 11 Mar 2019 04:36:51 +0000 Subject: [PATCH] [X86] Rename the CVTT*_RND ISD nodes to _SAE and remove the SAE operand. Split VFPROUNDS_RND/VFPEXT(S)_RND into versions without rounding operand. For VFPEXT(S) we only need current rounding mode and an SAE version. Neither need extra operand. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355802 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 61 +++++++++++++++-- lib/Target/X86/X86ISelLowering.h | 8 +-- lib/Target/X86/X86InstrAVX512.td | 91 ++++++++++++------------- lib/Target/X86/X86InstrFragmentsSIMD.td | 35 ++++++---- lib/Target/X86/X86IntrinsicsInfo.h | 64 ++++++++--------- 5 files changed, 158 insertions(+), 101 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a569a82a3b7..d74b56e1c88 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -21924,6 +21924,19 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1)); } + case INTR_TYPE_1OP_SAE: { + SDValue Sae = Op.getOperand(2); + + unsigned Opc; + if (isRoundModeCurDirection(Sae)) + Opc = IntrData->Opc0; + else if (isRoundModeSAE(Sae)) + Opc = IntrData->Opc1; + else + return SDValue(); + + return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1)); + } case INTR_TYPE_2OP: { SDValue Src2 = Op.getOperand(2); @@ -22025,6 +22038,23 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru, Subtarget, DAG); } + case INTR_TYPE_1OP_MASK_SAE: { + SDValue Src = Op.getOperand(1); + SDValue PassThru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + SDValue Rnd = Op.getOperand(4); + + unsigned Opc; + if (isRoundModeCurDirection(Rnd)) + Opc = IntrData->Opc0; + else if (isRoundModeSAE(Rnd)) + Opc = IntrData->Opc1; + else + return SDValue(); + + return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), + Mask, PassThru, Subtarget, DAG); + } case INTR_TYPE_SCALAR_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); @@ -22067,6 +22097,23 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Src2, RoundingMode), Mask, passThru, Subtarget, DAG); } + case INTR_TYPE_SCALAR_MASK_RND: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue passThru = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + SDValue Rnd = Op.getOperand(5); + + SDValue NewOp; + if (isRoundModeCurDirection(Rnd)) + NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); + else if (isRoundModeSAEToX(Rnd)) + NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, Rnd); + else + return SDValue(); + + return getScalarMaskingNode(NewOp, Mask, passThru, Subtarget, DAG); + } case INTR_TYPE_SCALAR_MASK_SAE: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); @@ -27593,11 +27640,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES"; case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; - case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND"; - case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND"; + case X86ISD::VFPEXT_SAE: return "X86ISD::VFPEXT_SAE"; + case X86ISD::VFPEXTS: return "X86ISD::VFPEXTS"; + case X86ISD::VFPEXTS_SAE: return "X86ISD::VFPEXTS_SAE"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND"; case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND"; + case X86ISD::VFPROUNDS: return "X86ISD::VFPROUNDS"; case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; @@ -27765,12 +27814,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI"; case X86ISD::MCVTTP2SI: return "X86ISD::MCVTTP2SI"; case X86ISD::MCVTTP2UI: return "X86ISD::MCVTTP2UI"; - case X86ISD::CVTTP2SI_RND: return "X86ISD::CVTTP2SI_RND"; - case X86ISD::CVTTP2UI_RND: return "X86ISD::CVTTP2UI_RND"; + case X86ISD::CVTTP2SI_SAE: return "X86ISD::CVTTP2SI_SAE"; + case X86ISD::CVTTP2UI_SAE: return "X86ISD::CVTTP2UI_SAE"; case X86ISD::CVTTS2SI: return "X86ISD::CVTTS2SI"; case X86ISD::CVTTS2UI: return "X86ISD::CVTTS2UI"; - case X86ISD::CVTTS2SI_RND: return "X86ISD::CVTTS2SI_RND"; - case X86ISD::CVTTS2UI_RND: return "X86ISD::CVTTS2UI_RND"; + case X86ISD::CVTTS2SI_SAE: return "X86ISD::CVTTS2SI_SAE"; + case X86ISD::CVTTS2UI_SAE: return "X86ISD::CVTTS2UI_SAE"; case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P"; case X86ISD::CVTUI2P: return "X86ISD::CVTUI2P"; case X86ISD::MCVTSI2P: return "X86ISD::MCVTSI2P"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e51553e0871..d7a5b2b5117 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -300,10 +300,10 @@ namespace llvm { VMTRUNC, VMTRUNCUS, VMTRUNCS, // Vector FP extend. - VFPEXT, VFPEXT_RND, VFPEXTS_RND, + VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE, // Vector FP round. - VFPROUND, VFPROUND_RND, VFPROUNDS_RND, + VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND, // Masked version of above. Used for v2f64->v4f32. // SRC, PASSTHRU, MASK @@ -505,9 +505,9 @@ namespace llvm { CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND, // Vector float/double to signed/unsigned integer with truncation. - CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND, + CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE, // Scalar float/double to signed/unsigned integer with truncation. - CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND, + CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE, // Vector signed/unsigned integer to float/double. CVTSI2P, CVTUI2P, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 822592d30ce..ce16cc9a92f 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7684,7 +7684,7 @@ def : Pat<(v2f64 (X86Movsd // Convert float/double to signed/unsigned int 32/64 with truncation multiclass avx512_cvt_s_all opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, - SDNode OpNodeInt, SDNode OpNodeRnd, + SDNode OpNodeInt, SDNode OpNodeSAE, X86FoldableSchedWrite sched, string aliasStr, bit CodeGenOnly = 1>{ let Predicates = [HasAVX512] in { @@ -7705,8 +7705,7 @@ let Predicates = [HasAVX512] in { EVEX, VEX_LIG, Sched<[sched]>; def rrb_Int : AVX512, + [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, EVEX,VEX_LIG , EVEX_B, Sched<[sched]>; let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in def rm_Int : AVX512 opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, - SDNode OpNodeInt, SDNode OpNodeRnd, + SDNode OpNodeInt, SDNode OpNodeSAE, X86FoldableSchedWrite sched, string aliasStr> : - avx512_cvt_s_all { def : InstAlias(NAME # "rm_Int") _DstRC.RC:$dst, @@ -7737,29 +7736,29 @@ multiclass avx512_cvt_s_all_unsigned opc, string asm, } defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, - fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I, + fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, - fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I, + fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, - fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I, + fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, - fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I, + fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info, - fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I, + fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info, - fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I, + fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info, - fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I, + fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info, - fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I, + fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; //===----------------------------------------------------------------------===// @@ -7773,15 +7772,13 @@ multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _ (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode (_.VT _.RC:$src1), - (_Src.VT _Src.RC:$src2), - (i32 FROUND_CURRENT)))>, + (_Src.VT _Src.RC:$src2)))>, EVEX_4V, VEX_LIG, Sched<[sched]>; defm rm_Int : AVX512_maskable_scalar, + (_Src.VT _Src.ScalarIntMemCPat:$src2)))>, EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; @@ -7800,14 +7797,13 @@ multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _ // Scalar Coversion with SAE - suppress all exceptions multiclass avx512_cvt_fp_sae_scalar opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNodeRnd, + X86VectorVTInfo _Src, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { defm rrb_Int : AVX512_maskable_scalar, + (_.VT (OpNodeSAE (_.VT _.RC:$src1), + (_Src.VT _Src.RC:$src2)))>, EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; } @@ -7824,29 +7820,31 @@ multiclass avx512_cvt_fp_rc_scalar opc, string OpcodeStr, X86VectorVTInf EVEX_B, EVEX_RC; } multiclass avx512_cvt_fp_scalar_sd2ss opc, string OpcodeStr, - SDNode OpNodeRnd, X86FoldableSchedWrite sched, - X86VectorVTInfo _src, X86VectorVTInfo _dst> { + SDNode OpNode, SDNode OpNodeRnd, + X86FoldableSchedWrite sched, + X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { - defm Z : avx512_cvt_fp_scalar, + defm Z : avx512_cvt_fp_scalar, avx512_cvt_fp_rc_scalar, VEX_W, EVEX_CD8<64, CD8VT1>, XD; } } -multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, SDNode OpNodeRnd, +multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeSAE, X86FoldableSchedWrite sched, X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { - defm Z : avx512_cvt_fp_scalar, - avx512_cvt_fp_sae_scalar, + defm Z : avx512_cvt_fp_scalar, + avx512_cvt_fp_sae_scalar, EVEX_CD8<32, CD8VT1>, XS; } } -defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", - X86froundRnd, WriteCvtSD2SS, f64x_info, +defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds, + X86froundsRnd, WriteCvtSD2SS, f64x_info, f32x_info>; -defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", - X86fpextRnd, WriteCvtSS2SD, f32x_info, +defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts, + X86fpextsSAE, WriteCvtSS2SD, f32x_info, f64x_info>; def : Pat<(f64 (fpextend FR32X:$src)), @@ -7941,13 +7939,12 @@ multiclass avx512_vcvt_fp opc, string OpcodeStr, X86VectorVTInfo _, } // Coversion with SAE - suppress all exceptions multiclass avx512_vcvt_fp_sae opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNodeRnd, + X86VectorVTInfo _Src, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { defm rrb : AVX512_maskable, + (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, EVEX, EVEX_B, Sched<[sched]>; } @@ -7969,7 +7966,7 @@ multiclass avx512_cvtps2pd opc, string OpcodeStr, defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; + X86vfpextSAE, sched.ZMM>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp opc, string OpcodeStr, SDNode OpNode, // Convert Float to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttps2dq opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode OpNodeSAE, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; + OpNodeSAE, sched.ZMM>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp opc, string OpcodeStr, SDNode OpNode, // Convert Double to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttpd2dq opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode OpNodeSAE, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; + OpNodeSAE, sched.ZMM>, EVEX_V512; } let Predicates = [HasVLX] in { // we need "x"/"y" suffixes in order to distinguish between 128 and 256 @@ -8365,19 +8362,19 @@ defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, PS, EVEX_CD8<32, CD8VF>; defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si, - X86cvttp2siRnd, SchedWriteCvtPS2DQ>, + X86cvttp2siSAE, SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si, - X86cvttp2siRnd, SchedWriteCvtPD2DQ>, + X86cvttp2siSAE, SchedWriteCvtPD2DQ>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui, - X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS, + X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui, - X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, + X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, PS, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, @@ -8421,19 +8418,19 @@ defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, EVEX_CD8<32, CD8VH>; defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si, - X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W, + X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si, - X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD, + X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui, - X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W, + X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui, - X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD, + X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 5acd144b86a..1f73f18d23c 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -128,19 +128,29 @@ def X86vfpround: SDNode<"X86ISD::VFPROUND", SDTCVecEltisVT<1, f64>, SDTCisOpSmallerThanOp<0, 1>]>>; -def X86froundRnd: SDNode<"X86ISD::VFPROUNDS_RND", +def X86frounds : SDNode<"X86ISD::VFPROUNDS", + SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>, + SDTCisSameAs<0, 1>, + SDTCVecEltisVT<2, f64>, + SDTCisSameSizeAs<0, 2>]>>; + +def X86froundsRnd: SDNode<"X86ISD::VFPROUNDS_RND", SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>, SDTCisSameAs<0, 1>, SDTCVecEltisVT<2, f64>, SDTCisSameSizeAs<0, 2>, SDTCisVT<3, i32>]>>; -def X86fpextRnd : SDNode<"X86ISD::VFPEXTS_RND", - SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f64>, +def X86fpexts : SDNode<"X86ISD::VFPEXTS", + SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>, SDTCisSameAs<0, 1>, SDTCVecEltisVT<2, f32>, - SDTCisSameSizeAs<0, 2>, - SDTCisVT<3, i32>]>>; + SDTCisSameSizeAs<0, 2>]>>; +def X86fpextsSAE : SDNode<"X86ISD::VFPEXTS_SAE", + SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>, + SDTCisSameAs<0, 1>, + SDTCVecEltisVT<2, f32>, + SDTCisSameSizeAs<0, 2>]>>; def X86vmfpround: SDNode<"X86ISD::VMFPROUND", SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>, @@ -561,8 +571,8 @@ def X86UintToFpRnd : SDNode<"X86ISD::SCALAR_UINT_TO_FP_RND", SDTintToFPRound>; def X86cvtts2Int : SDNode<"X86ISD::CVTTS2SI", SDTSFloatToInt>; def X86cvtts2UInt : SDNode<"X86ISD::CVTTS2UI", SDTSFloatToInt>; -def X86cvtts2IntRnd : SDNode<"X86ISD::CVTTS2SI_RND", SDTSFloatToIntRnd>; -def X86cvtts2UIntRnd : SDNode<"X86ISD::CVTTS2UI_RND", SDTSFloatToIntRnd>; +def X86cvtts2IntSAE : SDNode<"X86ISD::CVTTS2SI_SAE", SDTSFloatToInt>; +def X86cvtts2UIntSAE : SDNode<"X86ISD::CVTTS2UI_SAE", SDTSFloatToInt>; def X86cvts2si : SDNode<"X86ISD::CVTS2SI", SDTSFloatToInt>; def X86cvts2usi : SDNode<"X86ISD::CVTS2UI", SDTSFloatToInt>; @@ -572,8 +582,8 @@ def X86cvts2usiRnd : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>; // Vector with rounding mode // cvtt fp-to-int staff -def X86cvttp2siRnd : SDNode<"X86ISD::CVTTP2SI_RND", SDTFloatToIntRnd>; -def X86cvttp2uiRnd : SDNode<"X86ISD::CVTTP2UI_RND", SDTFloatToIntRnd>; +def X86cvttp2siSAE : SDNode<"X86ISD::CVTTP2SI_SAE", SDTFloatToInt>; +def X86cvttp2uiSAE : SDNode<"X86ISD::CVTTP2UI_SAE", SDTFloatToInt>; def X86VSintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTVintToFPRound>; def X86VUintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTVintToFPRound>; @@ -639,11 +649,10 @@ def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH", SDTCisSameAs<0, 3>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<1, 4>]> >; -def X86vfpextRnd : SDNode<"X86ISD::VFPEXT_RND", - SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>, +def X86vfpextSAE : SDNode<"X86ISD::VFPEXT_SAE", + SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>, SDTCVecEltisVT<1, f32>, - SDTCisOpSmallerThanOp<1, 0>, - SDTCisVT<2, i32>]>>; + SDTCisOpSmallerThanOp<1, 0>]>>; def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, f64>, diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index bd28c7b67f4..727aa41259d 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -24,11 +24,13 @@ enum IntrinsicType : uint16_t { INTR_TYPE_3OP_IMM8, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV, CVTPD2PS_MASK, - INTR_TYPE_2OP_SAE, + INTR_TYPE_1OP_SAE, INTR_TYPE_2OP_SAE, + INTR_TYPE_1OP_MASK_SAE, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_3OP_MASK, IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_SAE, + INTR_TYPE_SCALAR_MASK_RND, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK, COMPRESS_EXPAND_IN_REG, TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2DQ_MASK, CVTQQ2PS_MASK, @@ -428,14 +430,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), - X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND), - X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND), - X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND), - X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND), - X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND), - X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND), - X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND), - X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND), + X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE), + X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE), + X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE), + X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE), + X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE), + X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE), + X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE), + X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE), X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), @@ -499,8 +501,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK, - ISD::FP_EXTEND, X86ISD::VFPEXT_RND), + X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK_SAE, + ISD::FP_EXTEND, X86ISD::VFPEXT_SAE), X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_256, INTR_TYPE_1OP_MASK, @@ -521,52 +523,52 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND), X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, CVTQQ2PS_MASK, X86ISD::CVTSI2P, X86ISD::MCVTSI2P), - X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::VFPROUNDS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::VFPEXTS_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RND, + X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND), + X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_SAE, + X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE), X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2DQ_MASK, X86ISD::CVTTP2SI, X86ISD::MCVTTP2SI), - X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK, - X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), + X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK_SAE, + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, 0), - X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK, - X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), + X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK_SAE, + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2DQ_MASK, X86ISD::CVTTP2UI, X86ISD::MCVTTP2UI), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), - X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK, - X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), + X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK_SAE, + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE), X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_256, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), - X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK, - X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK, - X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), + X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK_SAE, + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE), + X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK_SAE, + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE), X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_256, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, 0), - X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK, - X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), + X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK_SAE, + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE), X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_256, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), - X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK, - X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), + X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK_SAE, + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_256, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), - X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK, - X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), + X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK_SAE, + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE), X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, CVTQQ2PS_MASK, X86ISD::CVTUI2P, X86ISD::MCVTUI2P), X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK, -- 2.40.0