From: Simon Pilgrim Date: Wed, 29 Nov 2017 13:49:51 +0000 (+0000) Subject: [X86][AVX512] Setup unary (PABS/VPLZCNT/VPOPCNT/VPCONFLICT/VMOV*DUP) instruction... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=abf3849506a08357be9c9e485664f45d5db1abcf;p=llvm [X86][AVX512] Setup unary (PABS/VPLZCNT/VPOPCNT/VPCONFLICT/VMOV*DUP) instruction scheduler classes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319312 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index c1c3f3a3fb0..1e1a885409c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -9098,84 +9098,93 @@ defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" , avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, EVEX, AVX5128IBase; + (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase, + Sched<[itins.Sched]>; defm rm : AVX512_maskable, - EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>; + (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>, + EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded]>; } } multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> : - avx512_unary_rm { + OpndItins itins, X86VectorVTInfo _> : + avx512_unary_rm { defm rmb : AVX512_maskable, - EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + (_.ScalarLdFrag addr:$src1)))), itins.rm>, + EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded]>; } multiclass avx512_unary_rm_vl opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo, Predicate prd> { + OpndItins itins, AVX512VLVectorVTInfo VTInfo, + Predicate prd> { let Predicates = [prd] in - defm Z : avx512_unary_rm, EVEX_V512; + defm Z : avx512_unary_rm, + EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_unary_rm, + defm Z256 : avx512_unary_rm, EVEX_V256; - defm Z128 : avx512_unary_rm, + defm Z128 : avx512_unary_rm, EVEX_V128; } } multiclass avx512_unary_rmb_vl opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo, Predicate prd> { + OpndItins itins, AVX512VLVectorVTInfo VTInfo, + Predicate prd> { let Predicates = [prd] in - defm Z : avx512_unary_rmb, + defm Z : avx512_unary_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_unary_rmb, + defm Z256 : avx512_unary_rmb, EVEX_V256; - defm Z128 : avx512_unary_rmb, + defm Z128 : avx512_unary_rmb, EVEX_V128; } } multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, - SDNode OpNode, Predicate prd> { - defm Q : avx512_unary_rmb_vl, VEX_W; - defm D : avx512_unary_rmb_vl; + SDNode OpNode, OpndItins itins, Predicate prd> { + defm Q : avx512_unary_rmb_vl, VEX_W; + defm D : avx512_unary_rmb_vl; } multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, - SDNode OpNode, Predicate prd> { - defm W : avx512_unary_rm_vl, VEX_WIG; - defm B : avx512_unary_rm_vl, VEX_WIG; + SDNode OpNode, OpndItins itins, Predicate prd> { + defm W : avx512_unary_rm_vl, VEX_WIG; + defm B : avx512_unary_rm_vl, VEX_WIG; } multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, bits<8> opc_d, bits<8> opc_q, - string OpcodeStr, SDNode OpNode> { - defm NAME : avx512_unary_rm_vl_dq { + defm NAME : avx512_unary_rm_vl_dq, - avx512_unary_rm_vl_bw; } -defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>; +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>; // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { @@ -9191,13 +9200,17 @@ let Predicates = [HasAVX512, NoVLX] in { sub_xmm)>; } -multiclass avx512_ctlz opc, string OpcodeStr, Predicate prd>{ - - defm NAME : avx512_unary_rm_vl_dq; +multiclass avx512_ctlz opc, string OpcodeStr, OpndItins itins, + Predicate prd> { + defm NAME : avx512_unary_rm_vl_dq; } -defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>; -defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>; +// FIXME: Is there a better scheduler itinerary for VPLZCNT? +defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", SSE_INTALU_ITINS_P, HasCDI>; + +// FIXME: Is there a better scheduler itinerary for VPCONFLICT? +defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, + SSE_INTALU_ITINS_P, HasCDI>; // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasCDI, NoVLX] in { @@ -9228,9 +9241,10 @@ let Predicates = [HasCDI, NoVLX] in { // Counts number of ones - VPOPCNTD and VPOPCNTQ //===---------------------------------------------------------------------===// -multiclass avx512_unary_rmb_popcnt opc, string OpcodeStr, X86VectorVTInfo VTInfo> { +multiclass avx512_unary_rmb_popcnt opc, string OpcodeStr, + OpndItins itins, X86VectorVTInfo VTInfo> { let Predicates = [HasVPOPCNTDQ] in - defm Z : avx512_unary_rmb, EVEX_V512; + defm Z : avx512_unary_rmb, EVEX_V512; } // Use 512bit version to implement 128/256 bit. @@ -9254,59 +9268,67 @@ multiclass avx512_unary_lowering, +// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ? +defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", SSE_INTALU_ITINS_P, + v16i32_info>, avx512_unary_lowering; -defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>, + +defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", SSE_INTALU_ITINS_P, + v8i64_info>, avx512_unary_lowering, VEX_W; //===---------------------------------------------------------------------===// // Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// -multiclass avx512_replicate opc, string OpcodeStr, SDNode OpNode>{ - defm NAME: avx512_unary_rm_vl, XS; +multiclass avx512_replicate opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { + defm NAME: avx512_unary_rm_vl, XS; } -defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>; -defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>; +defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>; +defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>; //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// multiclass avx512_movddup_128 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, EVEX; + (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX, + Sched<[itins.Sched]>; defm rm : AVX512_maskable, - EVEX, EVEX_CD8<_.EltSize, CD8VH>; + (_.ScalarLdFrag addr:$src))))), + itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>, + Sched<[itins.Sched.Folded]>; } } multiclass avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo> { + OpndItins itins, AVX512VLVectorVTInfo VTInfo> { - defm Z : avx512_unary_rm, EVEX_V512; + defm Z : avx512_unary_rm, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_unary_rm, + defm Z256 : avx512_unary_rm, EVEX_V256; - defm Z128 : avx512_movddup_128, + defm Z128 : avx512_movddup_128, EVEX_V128; } } -multiclass avx512_movddup opc, string OpcodeStr, SDNode OpNode>{ - defm NAME: avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { + defm NAME: avx512_movddup_common, XD, VEX_W; } -defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>; +defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>; let Predicates = [HasVLX] in { def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), @@ -10204,10 +10226,11 @@ defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds>; // Bit Algorithms //===----------------------------------------------------------------------===// -defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, +// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW? +defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P, avx512vl_i8_info, HasBITALG>, avx512_unary_lowering; -defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, +defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P, avx512vl_i16_info, HasBITALG>, avx512_unary_lowering, VEX_W; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f74a92afca9..c87a338cb1e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4695,6 +4695,12 @@ let Predicates = [UseSSE3] in { // SSE3 - Replicate Double FP - MOVDDUP //===---------------------------------------------------------------------===// +// FIXME: Improve MOVDDUP/BROADCAST reg/mem scheduling itineraries. +let Sched = WriteFShuffle in +def SSE_MOVDDUP : OpndItins< + IIC_SSE_MOV_LH, IIC_SSE_MOV_LH +>; + multiclass sse3_replicate_dfp { def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),