From: Simon Pilgrim Date: Fri, 1 Dec 2017 18:40:32 +0000 (+0000) Subject: [X86][AVX512] Tag subvector extract/insert instructions scheduler classes X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bfae8814e3460c34ba78d027d7c379af7fbb50e7;p=llvm [X86][AVX512] Tag subvector extract/insert instructions scheduler classes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319568 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index bd0ef5fad29..a911f54ab8a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -492,7 +492,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, multiclass vinsert_for_size_split { + SDPatternOperator vinsert_for_mask, + OpndItins itins> { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { defm rr : AVX512_maskable_split, AVX512AIi8Base, EVEX_4V; - + (iPTR imm)), itins.rr>, + AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>; let mayLoad = 1 in defm rm : AVX512_maskable_split, AVX512AIi8Base, EVEX_4V, - EVEX_CD8; + (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V, + EVEX_CD8, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // Passes the same pattern operator for masked and unmasked ops. multiclass vinsert_for_size : - vinsert_for_size_split; + SDPatternOperator vinsert_insert, + OpndItins itins> : + vinsert_for_size_split; multiclass vinsert_for_size_lowering { + ValueType EltVT64, int Opcode256, + OpndItins itins> { let Predicates = [HasVLX] in defm NAME # "32x4Z256" : vinsert_for_size, X86VectorVTInfo< 8, EltVT32, VR256X>, - vinsert128_insert>, EVEX_V256; + vinsert128_insert, itins>, EVEX_V256; defm NAME # "32x4Z" : vinsert_for_size, X86VectorVTInfo<16, EltVT32, VR512>, - vinsert128_insert>, EVEX_V512; + vinsert128_insert, itins>, EVEX_V512; defm NAME # "64x4Z" : vinsert_for_size, X86VectorVTInfo< 8, EltVT64, VR512>, - vinsert256_insert>, VEX_W, EVEX_V512; + vinsert256_insert, itins>, VEX_W, EVEX_V512; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasVLX, HasDQI] in defm NAME # "64x2Z256" : vinsert_for_size_split, X86VectorVTInfo< 4, EltVT64, VR256X>, - null_frag, vinsert128_insert>, VEX_W, EVEX_V256; + null_frag, vinsert128_insert, itins>, + VEX_W, EVEX_V256; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasDQI] in { defm NAME # "64x2Z" : vinsert_for_size_split, X86VectorVTInfo< 8, EltVT64, VR512>, - null_frag, vinsert128_insert>, VEX_W, EVEX_V512; + null_frag, vinsert128_insert, itins>, + VEX_W, EVEX_V512; defm NAME # "32x8Z" : vinsert_for_size_split, X86VectorVTInfo<16, EltVT32, VR512>, - null_frag, vinsert256_insert>, EVEX_V512; + null_frag, vinsert256_insert, itins>, + EVEX_V512; } } -defm VINSERTF : vinsert_for_type; -defm VINSERTI : vinsert_for_type; +// FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI? +let Sched = WriteFShuffle256 in +def AVX512_VINSERTF : OpndItins< + IIC_SSE_SHUFP, IIC_SSE_SHUFP +>; +let Sched = WriteShuffle256 in +def AVX512_VINSERTI : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + +defm VINSERTF : vinsert_for_type; +defm VINSERTI : vinsert_for_type; // Codegen pattern with the alternative types, // Even with AVX512DQ we'll still use these for unmasked operations. @@ -779,7 +796,8 @@ def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), multiclass vextract_for_size_split { + SDPatternOperator vextract_for_mask, + OpndItins itins> { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { defm rr : AVX512_maskable_split, - AVX512AIi8Base, EVEX; + (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)), + itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>; + def mr : AVX512AIi8, EVEX; + addr:$dst)], itins.rm>, EVEX, + Sched<[itins.Sched.Folded, ReadAfterLd]>; let mayStore = 1, hasSideEffects = 0 in def mrk : AVX512AIi8, EVEX_K, EVEX; + [], itins.rm>, EVEX_K, EVEX, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // Passes the same pattern operator for masked and unmasked ops. multiclass vextract_for_size : - vextract_for_size_split; + SDPatternOperator vextract_extract, + OpndItins itins> : + vextract_for_size_split; // Codegen pattern for the alternative types multiclass vextract_for_size_lowering { + ValueType EltVT64, int Opcode256, + OpndItins itins> { let Predicates = [HasAVX512] in { defm NAME # "32x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT32, VR128X>, - vextract128_extract>, + vextract128_extract, itins>, EVEX_V512, EVEX_CD8<32, CD8VT4>; defm NAME # "64x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT64, VR256X>, - vextract256_extract>, + vextract256_extract, itins>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; } let Predicates = [HasVLX] in defm NAME # "32x4Z256" : vextract_for_size, X86VectorVTInfo< 4, EltVT32, VR128X>, - vextract128_extract>, + vextract128_extract, itins>, EVEX_V256, EVEX_CD8<32, CD8VT4>; // Even with DQI we'd like to only use these instructions for masking. @@ -856,7 +879,7 @@ multiclass vextract_for_type, X86VectorVTInfo< 2, EltVT64, VR128X>, - null_frag, vextract128_extract>, + null_frag, vextract128_extract, itins>, VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>; // Even with DQI we'd like to only use these instructions for masking. @@ -864,18 +887,28 @@ multiclass vextract_for_type, X86VectorVTInfo< 2, EltVT64, VR128X>, - null_frag, vextract128_extract>, + null_frag, vextract128_extract, itins>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; defm NAME # "32x8Z" : vextract_for_size_split, X86VectorVTInfo< 8, EltVT32, VR256X>, - null_frag, vextract256_extract>, + null_frag, vextract256_extract, itins>, EVEX_V512, EVEX_CD8<32, CD8VT8>; } } -defm VEXTRACTF : vextract_for_type; -defm VEXTRACTI : vextract_for_type; +// FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI? +let Sched = WriteFShuffle256 in +def AVX512_VEXTRACTF : OpndItins< + IIC_SSE_SHUFP, IIC_SSE_SHUFP +>; +let Sched = WriteShuffle256 in +def AVX512_VEXTRACTI : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + +defm VEXTRACTF : vextract_for_type; +defm VEXTRACTI : vextract_for_type; // extract_subvector codegen patterns with the alternative types. // Even with AVX512DQ we'll still use these for unmasked operations.