From ae007cec62c858256c8dce4bac79b31e79b6d9f5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 29 Nov 2017 19:19:59 +0000 Subject: [PATCH] [X86][AVX512] Tag RCP/RSQRT/GETEXP instructions scheduler classes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319338 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 117 +++++++++++++++------------- lib/Target/X86/X86InstrSSE.td | 16 ++++ test/CodeGen/X86/recip-fastmath2.ll | 12 +-- 3 files changed, 86 insertions(+), 59 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 5d97c21587a..a14689e2613 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4549,7 +4549,7 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; defm rm_Int : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, (_.VT (VecNode _.RC:$src1, _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT))), - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], - itins.rr> { + itins.rr>, Sched<[itins.Sched]> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, - (_.ScalarLdFrag addr:$src2)))], itins.rm>; + (_.ScalarLdFrag addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } } @@ -4583,7 +4584,7 @@ multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo "$rc, $src2, $src1", "$src1, $src2, $rc", (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$rc)), itins.rr, IsCommutable>, - EVEX_B, EVEX_RC; + EVEX_B, EVEX_RC, Sched<[itins.Sched]>; } multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, SDNode SaeNode, @@ -4593,35 +4594,36 @@ multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.RC:$src2)), - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; defm rm_Int : AVX512_maskable_scalar; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], - itins.rr> { + itins.rr>, Sched<[itins.Sched]> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, - (_.ScalarLdFrag addr:$src2)))], itins.rm>; + (_.ScalarLdFrag addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } defm rrb : AVX512_maskable_scalar, EVEX_B; + (i32 FROUND_NO_EXC))>, EVEX_B, Sched<[itins.Sched]>; } } @@ -7364,32 +7366,34 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable_scalar, EVEX_4V; + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>, + EVEX_4V, Sched<[itins.Sched]>; defm rm : AVX512_maskable_scalar, EVEX_4V; + _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } -defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, f32x_info>, +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, f64x_info>, +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, f32x_info>, +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, f64x_info>, +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable opc, string OpcodeStr, SDNode OpNode, } } -multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode> { - defm PSZ : avx512_fp14_p, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp14_p, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode, + SizeItins itins> { + defm PSZ : avx512_fp14_p, EVEX_V512, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp14_p, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp14_p, + OpNode, itins.s, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp14_p, + OpNode, itins.s, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp14_p, + OpNode, itins.d, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp14_p, + OpNode, itins.d, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14>; -defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14>; +defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>; +defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, } } -multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode> { - defm SS : avx512_fp28_s, +multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode, + SizeItins itins> { + defm SS : avx512_fp28_s, EVEX_CD8<32, CD8VT1>; - defm SD : avx512_fp28_s, + defm SD : avx512_fp28_s, EVEX_CD8<64, CD8VT1>, VEX_W; } let Predicates = [HasERI] in { - defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V; - defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V; + defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>, + T8PD, EVEX_4V; + defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>, + T8PD, EVEX_4V; } -defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V; +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>, + T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable opc, string OpcodeStr, X86VectorVTInfo _, } } multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { let ExeDomain = _.ExeDomain in defm rb : AVX512_maskable opc, string OpcodeStr, X86VectorVTInfo _, (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B; } -multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode> { - defm PS : avx512_fp28_p, - avx512_fp28_p_round, +multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode, + SizeItins itins> { + defm PS : avx512_fp28_p, + avx512_fp28_p_round, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PD : avx512_fp28_p, - avx512_fp28_p_round, + defm PD : avx512_fp28_p, + avx512_fp28_p_round, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_unaryop_packed opc, string OpcodeStr, - SDNode OpNode> { + SDNode OpNode, SizeItins itins> { // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { - defm PSZ128 : avx512_fp28_p, + defm PSZ128 : avx512_fp28_p, EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp28_p, + defm PSZ256 : avx512_fp28_p, EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp28_p, + defm PDZ128 : avx512_fp28_p, EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp28_p, + defm PDZ256 : avx512_fp28_p, EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; } } let Predicates = [HasERI] in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX; } -defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>, - avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX; +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>, + avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd, + SSE_ALU_ITINS_P>, EVEX; multiclass avx512_sqrt_packed_round opc, string OpcodeStr, OpndItins itins, X86VectorVTInfo _>{ diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ed05b32f30a..099883c4072 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3040,6 +3040,14 @@ def SSE_RSQRTSS : OpndItins< >; } +def SSE_RSQRT_P : SizeItins< + SSE_RSQRTPS, SSE_RSQRTPS +>; + +def SSE_RSQRT_S : SizeItins< + SSE_RSQRTSS, SSE_RSQRTSS +>; + let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM @@ -3050,6 +3058,14 @@ def SSE_RCPS : OpndItins< >; } +def SSE_RCP_P : SizeItins< + SSE_RCPP, SSE_RCPP +>; + +def SSE_RCP_S : SizeItins< + SSE_RCPS, SSE_RCPS +>; + /// sse_fp_unop_s - SSE1 unops in scalar form /// For the non-AVX defs, we need $src1 to be tied to $dst because /// the HW instructions are 2 operand / destructive. diff --git a/test/CodeGen/X86/recip-fastmath2.ll b/test/CodeGen/X86/recip-fastmath2.ll index f6eeeec57f1..b5001666b9e 100644 --- a/test/CodeGen/X86/recip-fastmath2.ll +++ b/test/CodeGen/X86/recip-fastmath2.ll @@ -380,12 +380,12 @@ define float @f32_two_step_2(float %x) #2 { ; ; SKX-LABEL: f32_two_step_2: ; SKX: # BB#0: -; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33] -; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [4:0.33] -; SKX-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm2 # sched: [4:1.00] +; SKX-NEXT: vmovaps %xmm2, %xmm3 # sched: [1:1.00] +; SKX-NEXT: vfnmadd213ss %xmm1, %xmm0, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ss %xmm2, %xmm2, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ss %xmm1, %xmm3, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] -- 2.50.1