From: Simon Pilgrim Date: Wed, 29 Nov 2017 19:37:38 +0000 (+0000) Subject: [X86][AVX512] Tag RCP/RSQRT/GETEXP instructions scheduler classes (REVERSION) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5c698e34371a61241388dea1d1ddfaa38a62766a;p=llvm [X86][AVX512] Tag RCP/RSQRT/GETEXP instructions scheduler classes (REVERSION) Accidental commit of incomplete patch git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319346 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index a14689e2613..5d97c21587a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4549,7 +4549,7 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), - itins.rr>, Sched<[itins.Sched]>; + itins.rr>; defm rm_Int : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, (_.VT (VecNode _.RC:$src1, _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT))), - itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; + itins.rm>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], - itins.rr>, Sched<[itins.Sched]> { + itins.rr> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, - (_.ScalarLdFrag addr:$src2)))], itins.rm>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (_.ScalarLdFrag addr:$src2)))], itins.rm>; } } } @@ -4584,7 +4583,7 @@ multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo "$rc, $src2, $src1", "$src1, $src2, $rc", (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$rc)), itins.rr, IsCommutable>, - EVEX_B, EVEX_RC, Sched<[itins.Sched]>; + EVEX_B, EVEX_RC; } multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, SDNode SaeNode, @@ -4594,36 +4593,35 @@ multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.RC:$src2)), - itins.rr>, Sched<[itins.Sched]>; + itins.rr>; defm rm_Int : AVX512_maskable_scalar, Sched<[itins.Sched.Folded, ReadAfterLd]>; + itins.rm>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], - itins.rr>, Sched<[itins.Sched]> { + itins.rr> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, - (_.ScalarLdFrag addr:$src2)))], itins.rm>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (_.ScalarLdFrag addr:$src2)))], itins.rm>; } defm rrb : AVX512_maskable_scalar, EVEX_B, Sched<[itins.Sched]>; + (i32 FROUND_NO_EXC))>, EVEX_B; } } @@ -7366,34 +7364,32 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86VectorVTInfo _> { let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable_scalar, - EVEX_4V, Sched<[itins.Sched]>; + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V; defm rm : AVX512_maskable_scalar, EVEX_4V, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + _.ScalarIntMemCPat:$src2)>, EVEX_4V; } } -defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>, +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>, +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>, +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>, +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable opc, string OpcodeStr, SDNode OpNode, } } -multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm PSZ : avx512_fp14_p, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp14_p, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode> { + defm PSZ : avx512_fp14_p, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp14_p, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp14_p, + OpNode, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp14_p, + OpNode, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp14_p, + OpNode, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp14_p, + OpNode, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>; -defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>; +defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14>; +defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, - SDNode OpNode, OpndItins itins> { + SDNode OpNode> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, } } -multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm SS : avx512_fp28_s, +multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode> { + defm SS : avx512_fp28_s, EVEX_CD8<32, CD8VT1>; - defm SD : avx512_fp28_s, + defm SD : avx512_fp28_s, EVEX_CD8<64, CD8VT1>, VEX_W; } let Predicates = [HasERI] in { - defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>, - T8PD, EVEX_4V; - defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>, - T8PD, EVEX_4V; + defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V; + defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V; } -defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>, - T8PD, EVEX_4V; +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, - SDNode OpNode, OpndItins itins> { + SDNode OpNode> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable opc, string OpcodeStr, X86VectorVTInfo _, } } multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, - SDNode OpNode, OpndItins itins> { + SDNode OpNode> { let ExeDomain = _.ExeDomain in defm rb : AVX512_maskable opc, string OpcodeStr, X86VectorVTInfo _, (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B; } -multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm PS : avx512_fp28_p, - avx512_fp28_p_round, +multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode> { + defm PS : avx512_fp28_p, + avx512_fp28_p_round, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PD : avx512_fp28_p, - avx512_fp28_p_round, + defm PD : avx512_fp28_p, + avx512_fp28_p_round, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_unaryop_packed opc, string OpcodeStr, - SDNode OpNode, SizeItins itins> { + SDNode OpNode> { // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { - defm PSZ128 : avx512_fp28_p, + defm PSZ128 : avx512_fp28_p, EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp28_p, + defm PSZ256 : avx512_fp28_p, EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp28_p, + defm PDZ128 : avx512_fp28_p, EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp28_p, + defm PDZ256 : avx512_fp28_p, EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; } } let Predicates = [HasERI] in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX; } -defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>, - avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd, - SSE_ALU_ITINS_P>, EVEX; +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>, + avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX; multiclass avx512_sqrt_packed_round opc, string OpcodeStr, OpndItins itins, X86VectorVTInfo _>{ diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 099883c4072..ed05b32f30a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3040,14 +3040,6 @@ def SSE_RSQRTSS : OpndItins< >; } -def SSE_RSQRT_P : SizeItins< - SSE_RSQRTPS, SSE_RSQRTPS ->; - -def SSE_RSQRT_S : SizeItins< - SSE_RSQRTSS, SSE_RSQRTSS ->; - let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM @@ -3058,14 +3050,6 @@ def SSE_RCPS : OpndItins< >; } -def SSE_RCP_P : SizeItins< - SSE_RCPP, SSE_RCPP ->; - -def SSE_RCP_S : SizeItins< - SSE_RCPS, SSE_RCPS ->; - /// sse_fp_unop_s - SSE1 unops in scalar form /// For the non-AVX defs, we need $src1 to be tied to $dst because /// the HW instructions are 2 operand / destructive. diff --git a/test/CodeGen/X86/recip-fastmath2.ll b/test/CodeGen/X86/recip-fastmath2.ll index b5001666b9e..f6eeeec57f1 100644 --- a/test/CodeGen/X86/recip-fastmath2.ll +++ b/test/CodeGen/X86/recip-fastmath2.ll @@ -380,12 +380,12 @@ define float @f32_two_step_2(float %x) #2 { ; ; SKX-LABEL: f32_two_step_2: ; SKX: # BB#0: -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm2 # sched: [4:1.00] -; SKX-NEXT: vmovaps %xmm2, %xmm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ss %xmm1, %xmm0, %xmm3 # sched: [4:0.33] -; SKX-NEXT: vfmadd132ss %xmm2, %xmm2, %xmm3 # sched: [4:0.33] -; SKX-NEXT: vfnmadd213ss %xmm1, %xmm3, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] +; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00]