From bf6b4e88ed52391be5682cd4c24423686c4776de Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 15 Mar 2019 19:59:35 +0000 Subject: [PATCH] [X86] Strip the SAE bit from the rounding mode passed to the _RND opcodes. Use TargetConstant to save a conversion in the isel table. The asm parser generates the immediate without the SAE bit. So for consistency we should generate the MCInst the same way from CodeGen. Since they are now both the same, remove the masking from the printer and replace with an llvm_unreachable. Use a target constant since we're rebuilding the node anyway. Then we don't have to have isel convert it. Saves about 500 bytes from the isel table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356294 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/InstPrinter/X86InstPrinterCommon.cpp | 20 +++-- lib/Target/X86/X86ISelLowering.cpp | 74 +++++++++++-------- lib/Target/X86/X86InstrAVX512.td | 60 +++++++-------- 3 files changed, 88 insertions(+), 66 deletions(-) diff --git a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp b/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp index f31641fd733..cf08389876b 100644 --- a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp +++ b/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp @@ -82,12 +82,22 @@ void X86InstPrinterCommon::printXOPCC(const MCInst *MI, unsigned Op, void X86InstPrinterCommon::printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &O) { - int64_t Imm = MI->getOperand(Op).getImm() & 0x3; + int64_t Imm = MI->getOperand(Op).getImm(); switch (Imm) { - case 0: O << "{rn-sae}"; break; - case 1: O << "{rd-sae}"; break; - case 2: O << "{ru-sae}"; break; - case 3: O << "{rz-sae}"; break; + default: + llvm_unreachable("Invalid rounding control!"); + case X86::TO_NEAREST_INT: + O << "{rn-sae}"; + break; + case X86::TO_NEG_INF: + O << "{rd-sae}"; + break; + case X86::TO_POS_INF: + O << "{ru-sae}"; + break; + case X86::TO_ZERO: + O << "{rz-sae}"; + break; } } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 713ea356584..9be77c95687 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -21865,16 +21865,16 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return false; }; - auto isRoundModeSAEToX = [](SDValue Rnd) { + auto isRoundModeSAEToX = [](SDValue Rnd, unsigned &RC) { if (auto *C = dyn_cast(Rnd)) { - unsigned Round = C->getZExtValue(); - if (Round & X86::STATIC_ROUNDING::NO_EXC) { + RC = C->getZExtValue(); + if (RC & X86::STATIC_ROUNDING::NO_EXC) { // Clear the NO_EXC bit and check remaining bits. - Round ^= X86::STATIC_ROUNDING::NO_EXC; - return Round == X86::STATIC_ROUNDING::TO_NEAREST_INT || - Round == X86::STATIC_ROUNDING::TO_NEG_INF || - Round == X86::STATIC_ROUNDING::TO_POS_INF || - Round == X86::STATIC_ROUNDING::TO_ZERO; + RC ^= X86::STATIC_ROUNDING::NO_EXC; + return RC == X86::STATIC_ROUNDING::TO_NEAREST_INT || + RC == X86::STATIC_ROUNDING::TO_NEG_INF || + RC == X86::STATIC_ROUNDING::TO_POS_INF || + RC == X86::STATIC_ROUNDING::TO_ZERO; } } @@ -21894,9 +21894,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(2); - if (isRoundModeSAEToX(Rnd)) + unsigned RC; + if (isRoundModeSAEToX(Rnd, RC)) return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), - Op.getOperand(1), Rnd); + Op.getOperand(1), + DAG.getTargetConstant(RC, dl, MVT::i32)); if (!isRoundModeCurDirection(Rnd)) return SDValue(); } @@ -21924,9 +21926,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(3); - if (isRoundModeSAEToX(Rnd)) + unsigned RC; + if (isRoundModeSAEToX(Rnd, RC)) return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), - Op.getOperand(1), Src2, Rnd); + Op.getOperand(1), Src2, + DAG.getTargetConstant(RC, dl, MVT::i32)); if (!isRoundModeCurDirection(Rnd)) return SDValue(); } @@ -21963,10 +21967,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(4); - if (isRoundModeSAEToX(Rnd)) - return DAG.getNode(IntrWithRoundingModeOpcode, - dl, Op.getValueType(), - Src1, Src2, Src3, Rnd); + unsigned RC; + if (isRoundModeSAEToX(Rnd, RC)) + return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), + Src1, Src2, Src3, + DAG.getTargetConstant(RC, dl, MVT::i32)); if (!isRoundModeCurDirection(Rnd)) return SDValue(); } @@ -21982,16 +21987,17 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); // We add rounding mode to the Node when - // - RM Opcode is specified and - // - RM is not "current direction". + // - RC Opcode is specified and + // - RC is not "current direction". unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(4); - if (isRoundModeSAEToX(Rnd)) - return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, - dl, Op.getValueType(), - Src, Rnd), - Mask, PassThru, Subtarget, DAG); + unsigned RC; + if (isRoundModeSAEToX(Rnd, RC)) + return getVectorMaskingNode( + DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), + Src, DAG.getTargetConstant(RC, dl, MVT::i32)), + Mask, PassThru, Subtarget, DAG); if (!isRoundModeCurDirection(Rnd)) return SDValue(); } @@ -22028,10 +22034,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if (Op.getNumOperands() == (5U + HasRounding)) { if (HasRounding) { SDValue Rnd = Op.getOperand(5); - if (isRoundModeSAEToX(Rnd)) - return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, - dl, VT, Src1, Src2, Rnd), - Mask, passThru, Subtarget, DAG); + unsigned RC; + if (isRoundModeSAEToX(Rnd, RC)) + return getScalarMaskingNode( + DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2, + DAG.getTargetConstant(RC, dl, MVT::i32)), + Mask, passThru, Subtarget, DAG); if (!isRoundModeCurDirection(Rnd)) return SDValue(); } @@ -22063,10 +22071,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Rnd = Op.getOperand(5); SDValue NewOp; + unsigned RC; if (isRoundModeCurDirection(Rnd)) NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); - else if (isRoundModeSAEToX(Rnd)) - NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, Rnd); + else if (isRoundModeSAEToX(Rnd, RC)) + NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, + DAG.getTargetConstant(RC, dl, MVT::i32)); else return SDValue(); @@ -22097,8 +22107,10 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue NewOp; if (IntrData->Opc1 != 0) { SDValue Rnd = Op.getOperand(5); - if (isRoundModeSAEToX(Rnd)) - NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, Rnd); + unsigned RC; + if (isRoundModeSAEToX(Rnd, RC)) + NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, + DAG.getTargetConstant(RC, dl, MVT::i32)); else if (!isRoundModeCurDirection(Rnd)) return SDValue(); } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index b67b6c60c29..0310de3f1f7 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2028,7 +2028,7 @@ defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, // avx512_cmp_scalar - AVX512 CMPSS and CMPSD -multiclass avx512_cmp_scalar { defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), @@ -2053,7 +2053,7 @@ multiclass avx512_cmp_scalar, EVEX_4V, EVEX_B, Sched<[sched]>; @@ -5485,7 +5485,7 @@ multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, "$rc, $src2, $src1", "$src1, $src2, $rc", (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$rc)), IsCommutable>, + (i32 timm:$rc)), IsCommutable>, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, @@ -5640,18 +5640,18 @@ multiclass avx512_fp_round_packed opc, string OpcodeStr, defm rrb: AVX512_maskable, + (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fp_sae_packed opc, string OpcodeStr, - SDPatternOperator OpNodeRnd, + SDPatternOperator OpNodeSAE, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrb: AVX512_maskable, + (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, EVEX_4V, EVEX_B, Sched<[sched]>; } @@ -6828,7 +6828,7 @@ multiclass avx512_fma3_213_round opc, string OpcodeStr, SDNode OpNode, defm rb: AVX512_maskable_3src, + (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } @@ -6903,7 +6903,7 @@ multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } @@ -6981,7 +6981,7 @@ multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } @@ -7076,7 +7076,7 @@ multiclass avx512_fma3s_all opc213, bits<8> opc231, bits<8> opc132, (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, (_.ScalarLdFrag addr:$src3)))), (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, - _.FRC:$src3, (i32 imm:$rc)))), 0>; + _.FRC:$src3, (i32 timm:$rc)))), 0>; defm NAME#231#SUFF#Z: avx512_fma3s_common opc213, bits<8> opc231, bits<8> opc132, (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, - _.FRC:$src1, (i32 imm:$rc)))), 1>; + _.FRC:$src1, (i32 timm:$rc)))), 1>; // One pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -7094,7 +7094,7 @@ multiclass avx512_fma3s_all opc213, bits<8> opc231, bits<8> opc132, (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), _.FRC:$src1, _.FRC:$src2))), (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, - _.FRC:$src2, (i32 imm:$rc)))), 1>; + _.FRC:$src2, (i32 timm:$rc)))), 1>; } } @@ -7258,62 +7258,62 @@ multiclass avx512_scalar_fma_patterns(Prefix#"213"#Suffix#"Zrb_Int") VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (RndOp _.FRC:$src2, _.FRC:$src3, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), - (i32 imm:$rc)))))), + (i32 timm:$rc)))))), (!cast(Prefix#"231"#Suffix#"Zrb_Int") VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (X86selects VK1WM:$mask, (RndOp _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), - _.FRC:$src3, (i32 imm:$rc)), + _.FRC:$src3, (i32 timm:$rc)), (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), (!cast(Prefix#"213"#Suffix#"Zrb_Intk") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (X86selects VK1WM:$mask, (RndOp _.FRC:$src2, _.FRC:$src3, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), - (i32 imm:$rc)), + (i32 timm:$rc)), (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), (!cast(Prefix#"231"#Suffix#"Zrb_Intk") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (X86selects VK1WM:$mask, (RndOp _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), - _.FRC:$src3, (i32 imm:$rc)), + _.FRC:$src3, (i32 timm:$rc)), (_.EltVT ZeroFP)))))), (!cast(Prefix#"213"#Suffix#"Zrb_Intkz") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (X86selects VK1WM:$mask, (RndOp _.FRC:$src2, _.FRC:$src3, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), - (i32 imm:$rc)), + (i32 timm:$rc)), (_.EltVT ZeroFP)))))), (!cast(Prefix#"231"#Suffix#"Zrb_Intkz") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; } } @@ -7435,7 +7435,7 @@ multiclass avx512_vcvtsi_round opc, SDNode OpNode, [(set DstVT.RC:$dst, (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2, - (i32 imm:$rc)))]>, + (i32 timm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; } @@ -7545,7 +7545,7 @@ multiclass avx512_cvt_s_int_round opc, X86VectorVTInfo SrcVT, EVEX, VEX_LIG, Sched<[sched]>; def rrb_Int : SI, + [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, EVEX, VEX_LIG, EVEX_B, EVEX_RC, Sched<[sched]>; let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in @@ -7817,7 +7817,7 @@ multiclass avx512_cvt_fp_rc_scalar opc, string OpcodeStr, X86VectorVTInf (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, "$rc, $src2, $src1", "$src1, $src2, $rc", (_.VT (OpNodeRnd (_.VT _.RC:$src1), - (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>, + (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, EVEX_4V, VEX_LIG, Sched<[sched]>, EVEX_B, EVEX_RC; } @@ -7957,7 +7957,7 @@ multiclass avx512_vcvt_fp_rc opc, string OpcodeStr, X86VectorVTInfo _, defm rrb : AVX512_maskable, + (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; } @@ -9248,7 +9248,7 @@ multiclass avx512_sqrt_packed_round opc, string OpcodeStr, let ExeDomain = _.ExeDomain in defm rb: AVX512_maskable, + (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; } @@ -9328,7 +9328,7 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, X86FoldableSchedWri "$rc, $src2, $src1", "$src1, $src2, $rc", (X86fsqrtRnds (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$rc))>, + (i32 timm:$rc))>, EVEX_B, EVEX_RC, Sched<[sched]>; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { -- 2.40.0