From a327b4b9151b1b1da6899d6b75833a064aceb8e3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 5 Dec 2017 13:49:44 +0000 Subject: [PATCH] [X86][AVX512] Tag scalar CVT and CMP instruction scheduler classes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319765 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 221 +++++++++++++++------------- lib/Target/X86/X86InstrSSE.td | 59 ++++---- test/CodeGen/X86/avx512-schedule.ll | 8 +- 3 files changed, 154 insertions(+), 134 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 6d0b2f4d3a2..f3b766de0ed 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -6331,19 +6331,19 @@ defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, // AVX-512 Scalar convert from sign integer to float/double //===----------------------------------------------------------------------===// -multiclass avx512_vcvtsi opc, SDNode OpNode, RegisterClass SrcRC, - X86VectorVTInfo DstVT, X86MemOperand x86memop, - PatFrag ld_frag, string asm> { +multiclass avx512_vcvtsi opc, SDNode OpNode, OpndItins itins, + RegisterClass SrcRC, X86VectorVTInfo DstVT, + X86MemOperand x86memop, PatFrag ld_frag, string asm> { let hasSideEffects = 0 in { def rr : SI, - EVEX_4V; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [], + itins.rr>, EVEX_4V, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : SI, - EVEX_4V; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [], + itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; } // hasSideEffects = 0 let isCodeGenOnly = 1 in { def rr_Int : SI opc, SDNode OpNode, RegisterClass SrcRC, [(set DstVT.RC:$dst, (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2, - (i32 FROUND_CURRENT)))]>, EVEX_4V; + (i32 FROUND_CURRENT)))], itins.rr>, + EVEX_4V, Sched<[itins.Sched]>; def rm_Int : SI opc, SDNode OpNode, RegisterClass SrcRC, [(set DstVT.RC:$dst, (OpNode (DstVT.VT DstVT.RC:$src1), (ld_frag addr:$src2), - (i32 FROUND_CURRENT)))]>, EVEX_4V; + (i32 FROUND_CURRENT)))], itins.rm>, + EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; }//isCodeGenOnly = 1 } -multiclass avx512_vcvtsi_round opc, SDNode OpNode, RegisterClass SrcRC, - X86VectorVTInfo DstVT, string asm> { +multiclass avx512_vcvtsi_round opc, SDNode OpNode, OpndItins itins, + RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> { def rrb_Int : SI opc, SDNode OpNode, RegisterClass SrcRC, [(set DstVT.RC:$dst, (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2, - (i32 imm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC; + (i32 imm:$rc)))], itins.rr>, + EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>; } -multiclass avx512_vcvtsi_common opc, SDNode OpNode, RegisterClass SrcRC, - X86VectorVTInfo DstVT, X86MemOperand x86memop, - PatFrag ld_frag, string asm> { - defm NAME : avx512_vcvtsi_round, - avx512_vcvtsi, - VEX_LIG; +multiclass avx512_vcvtsi_common opc, SDNode OpNode, OpndItins itins, + RegisterClass SrcRC, X86VectorVTInfo DstVT, + X86MemOperand x86memop, PatFrag ld_frag, string asm> { + defm NAME : avx512_vcvtsi_round, + avx512_vcvtsi, VEX_LIG; } let Predicates = [HasAVX512] in { -defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, +defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32, v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, +defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64, v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, +defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">, XD, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, +defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64, v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -6421,16 +6424,16 @@ def : Pat<(f64 (sint_to_fp GR32:$src)), def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; -defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32, +defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32, v4f32x_info, i32mem, loadi32, "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64, +defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64, v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info, +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info, i32mem, loadi32, "cvtusi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64, +defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64, v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -6461,51 +6464,54 @@ def : Pat<(f64 (uint_to_fp GR64:$src)), //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from float/double to integer //===----------------------------------------------------------------------===// -multiclass avx512_cvt_s_int_round opc, X86VectorVTInfo SrcVT , - X86VectorVTInfo DstVT, SDNode OpNode, string asm> { + +multiclass avx512_cvt_s_int_round opc, X86VectorVTInfo SrcVT, + X86VectorVTInfo DstVT, SDNode OpNode, + OpndItins itins, string asm> { let Predicates = [HasAVX512] in { def rr : SI, - EVEX, VEX_LIG; + [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))], + itins.rr>, EVEX, VEX_LIG, Sched<[itins.Sched]>; def rb : SI, - EVEX, VEX_LIG, EVEX_B, EVEX_RC; + [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))], + itins.rr>, EVEX, VEX_LIG, EVEX_B, EVEX_RC, + Sched<[itins.Sched]>; def rm : SI, - EVEX, VEX_LIG; + (i32 FROUND_CURRENT)))], itins.rm>, + EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Predicates = [HasAVX512] } // Convert float/double to signed/unsigned int 32/64 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info, - X86cvts2si, "cvtss2si">, + X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, - X86cvts2si, "cvtss2si">, + X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, - X86cvts2usi, "cvtss2usi">, + X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, - X86cvts2usi, "cvtss2usi">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; + X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi">, + XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, - X86cvts2si, "cvtsd2si">, + X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, - X86cvts2si, "cvtsd2si">, + X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, - X86cvts2usi, "cvtsd2usi">, + X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, - X86cvts2usi, "cvtsd2usi">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; + X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; // The SSE version of these instructions are disabled for AVX512. // Therefore, the SSE intrinsics are mapped to the AVX512 instructions. @@ -6578,19 +6584,20 @@ def : Pat<(v2f64 (X86Movsd // Convert float/double to signed/unsigned int 32/64 with truncation multiclass avx512_cvt_s_all opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, - SDNode OpNodeRnd, string aliasStr>{ + SDNode OpNodeRnd, OpndItins itins, string aliasStr>{ let Predicates = [HasAVX512] in { def rr : AVX512, EVEX; + [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>, + EVEX, Sched<[itins.Sched]>; let hasSideEffects = 0 in def rb : AVX512, EVEX, EVEX_B; + [], itins.rr>, EVEX, EVEX_B, Sched<[itins.Sched]>; def rm : AVX512, - EVEX; + [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))], + itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>; def : InstAlias(NAME # "rr") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>; @@ -6604,47 +6611,48 @@ let Predicates = [HasAVX512] in { def rr_Int : AVX512, EVEX, VEX_LIG; + (i32 FROUND_CURRENT)))], itins.rr>, + EVEX, VEX_LIG, Sched<[itins.Sched]>; def rb_Int : AVX512, - EVEX,VEX_LIG , EVEX_B; + (i32 FROUND_NO_EXC)))], itins.rr>, + EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>; let mayLoad = 1, hasSideEffects = 0 in def rm_Int : AVX512, EVEX, VEX_LIG; - + [], itins.rm>, EVEX, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // isCodeGenOnly = 1 } //HasAVX512 } defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, - fp_to_sint, X86cvtts2IntRnd, "{l}">, + fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, - fp_to_sint, X86cvtts2IntRnd, "{q}">, + fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, - fp_to_sint, X86cvtts2IntRnd, "{l}">, + fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, - fp_to_sint, X86cvtts2IntRnd, "{q}">, + fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, - fp_to_uint, X86cvtts2UIntRnd, "{l}">, + fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, - fp_to_uint, X86cvtts2UIntRnd, "{q}">, + fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, - fp_to_uint, X86cvtts2UIntRnd, "{l}">, + fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, - fp_to_uint, X86cvtts2UIntRnd, "{q}">, + fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; let Predicates = [HasAVX512] in { def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))), @@ -6664,87 +6672,92 @@ let Predicates = [HasAVX512] in { def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)), (VCVTTSD2SI64Zrm_Int sdmem:$src)>; } // HasAVX512 + //===----------------------------------------------------------------------===// // AVX-512 Convert form float to double and back //===----------------------------------------------------------------------===// + multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNode> { + X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> { defm rr_Int : AVX512_maskable_scalar, - EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; + (i32 FROUND_CURRENT))), itins.rr>, + EVEX_4V, VEX_LIG, Sched<[itins.Sched]>; defm rm_Int : AVX512_maskable_scalar, - EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>; + (i32 FROUND_CURRENT))), itins.rm>, + EVEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, hasSideEffects = 0 in { def rr : I, - EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : I, - EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>; + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + itins.rm>, EVEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // Scalar Coversion with SAE - suppress all exceptions multiclass avx512_cvt_fp_sae_scalar opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNodeRnd> { + X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> { defm rrb_Int : AVX512_maskable_scalar, - EVEX_4V, VEX_LIG, EVEX_B; + (i32 FROUND_NO_EXC))), itins.rr>, + EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>; } // Scalar Conversion with rounding control (RC) multiclass avx512_cvt_fp_rc_scalar opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNodeRnd> { + X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> { defm rrb_Int : AVX512_maskable_scalar, - EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, + (_Src.VT _Src.RC:$src2), (i32 imm:$rc))), + itins.rm>, + EVEX_4V, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>, EVEX_B, EVEX_RC; } multiclass avx512_cvt_fp_scalar_sd2ss opc, string OpcodeStr, - SDNode OpNodeRnd, X86VectorVTInfo _src, - X86VectorVTInfo _dst> { + SDNode OpNodeRnd, OpndItins itins, + X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { - defm Z : avx512_cvt_fp_scalar, + defm Z : avx512_cvt_fp_scalar, avx512_cvt_fp_rc_scalar, VEX_W, EVEX_CD8<64, CD8VT1>, XD; + OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; } } multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, - SDNode OpNodeRnd, X86VectorVTInfo _src, - X86VectorVTInfo _dst> { + SDNode OpNodeRnd, OpndItins itins, + X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { - defm Z : avx512_cvt_fp_scalar, - avx512_cvt_fp_sae_scalar, + defm Z : avx512_cvt_fp_scalar, + avx512_cvt_fp_sae_scalar, EVEX_CD8<32, CD8VT1>, XS; } } defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", - X86froundRnd, f64x_info, f32x_info>, - NotMemoryFoldable; + X86froundRnd, SSE_CVT_SD2SS, f64x_info, + f32x_info>, NotMemoryFoldable; defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", - X86fpextRnd,f32x_info, f64x_info >, - NotMemoryFoldable; + X86fpextRnd, SSE_CVT_SS2SD, f32x_info, + f64x_info>, NotMemoryFoldable; def : Pat<(f64 (fpextend FR32X:$src)), (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, @@ -7526,53 +7539,53 @@ let Predicates = [HasVLX] in { // Unordered/Ordered scalar fp compare with Sea and set EFLAGS multiclass avx512_ord_cmp_sae opc, X86VectorVTInfo _, - string OpcodeStr> { + string OpcodeStr, OpndItins itins> { let hasSideEffects = 0 in def rb: AVX512, EVEX, EVEX_B, VEX_LIG, EVEX_V128, - Sched<[WriteFAdd]>; + [], itins.rr>, EVEX, EVEX_B, VEX_LIG, EVEX_V128, + Sched<[itins.Sched]>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { - defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss">, + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd">, + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; - defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss">, + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd">, + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, - "ucomiss">, PS, EVEX, VEX_LIG, + "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd">, PD, EVEX, + "ucomisd", SSE_COMIS>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, - "comiss">, PS, EVEX, VEX_LIG, + "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, - "comisd">, PD, EVEX, + "comisd", SSE_COMIS>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG, + sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd">, PD, EVEX, + sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, - sse_load_f32, "comiss">, PS, EVEX, VEX_LIG, + sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, - sse_load_f64, "comisd">, PD, EVEX, + sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ea30393242d..6504c2b9c8b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2135,6 +2135,11 @@ let Predicates = [UseSSE2] in { // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// +let Sched = WriteFAdd in +def SSE_COMIS : OpndItins< + IIC_SSE_COMIS_RR, IIC_SSE_COMIS_RM +>; + // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar opc, RegisterClass RC, SDNode OpNode, ValueType vt, X86MemOperand x86memop, - PatFrag ld_frag, string OpcodeStr> { + PatFrag ld_frag, string OpcodeStr, + OpndItins itins> { let hasSideEffects = 0 in { def rr: SI, - Sched<[WriteFAdd]>; + itins.rr>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm: SI, - Sched<[WriteFAddLd, ReadAfterLd]>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp multiclass sse12_ord_cmp_int opc, RegisterClass RC, SDNode OpNode, ValueType vt, Operand memop, - ComplexPattern mem_cpat, string OpcodeStr> { + ComplexPattern mem_cpat, string OpcodeStr, + OpndItins itins> { def rr: SI, - Sched<[WriteFAdd]>; + itins.rr>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm: SI, - Sched<[WriteFAddLd, ReadAfterLd]>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss">, PS, VEX, VEX_LIG, VEX_WIG; + "ucomiss", SSE_COMIS>, PS, VEX, VEX_LIG, VEX_WIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd">, PD, VEX, VEX_LIG, VEX_WIG; + "ucomisd", SSE_COMIS>, PD, VEX, VEX_LIG, VEX_WIG; let Pattern = [] in { defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss">, PS, VEX, VEX_LIG, VEX_WIG; + "comiss", SSE_COMIS>, PS, VEX, VEX_LIG, VEX_WIG; defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd">, PD, VEX, VEX_LIG, VEX_WIG; + "comisd", SSE_COMIS>, PD, VEX, VEX_LIG, VEX_WIG; } let isCodeGenOnly = 1 in { defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss">, PS, VEX, VEX_WIG; + sse_load_f32, "ucomiss", SSE_COMIS>, PS, VEX, VEX_WIG; defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd">, PD, VEX, VEX_WIG; + sse_load_f64, "ucomisd", SSE_COMIS>, PD, VEX, VEX_WIG; defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss">, PS, VEX, VEX_WIG; + sse_load_f32, "comiss", SSE_COMIS>, PS, VEX, VEX_WIG; defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd">, PD, VEX, VEX_WIG; + sse_load_f64, "comisd", SSE_COMIS>, PD, VEX, VEX_WIG; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss">, PS; + "ucomiss", SSE_COMIS>, PS; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd">, PD; + "ucomisd", SSE_COMIS>, PD; let Pattern = [] in { defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss">, PS; + "comiss", SSE_COMIS>, PS; defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd">, PD; + "comisd", SSE_COMIS>, PD; } let isCodeGenOnly = 1 in { defm Int_UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss">, PS; + sse_load_f32, "ucomiss", SSE_COMIS>, PS; defm Int_UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd">, PD; + sse_load_f64, "ucomisd", SSE_COMIS>, PD; defm Int_COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss">, PS; + sse_load_f32, "comiss", SSE_COMIS>, PS; defm Int_COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd">, PD; + sse_load_f64, "comisd", SSE_COMIS>, PD; } } // Defs = [EFLAGS] diff --git a/test/CodeGen/X86/avx512-schedule.ll b/test/CodeGen/X86/avx512-schedule.ll index 0c9e22db426..376baf2b1f9 100755 --- a/test/CodeGen/X86/avx512-schedule.ll +++ b/test/CodeGen/X86/avx512-schedule.ll @@ -2317,12 +2317,12 @@ define i32 @fptosi(float %a) nounwind { define i32 @fptoui(float %a) nounwind { ; GENERIC-LABEL: fptoui: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttss2usi %xmm0, %eax +; GENERIC-NEXT: vcvttss2usi %xmm0, %eax # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fptoui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttss2usi %xmm0, %eax +; SKX-NEXT: vcvttss2usi %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptoui float %a to i32 ret i32 %b @@ -2331,7 +2331,7 @@ define i32 @fptoui(float %a) nounwind { define float @uitof32(i32 %a) nounwind { ; GENERIC-LABEL: uitof32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 +; GENERIC-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: uitof32: @@ -2345,7 +2345,7 @@ define float @uitof32(i32 %a) nounwind { define double @uitof64(i32 %a) nounwind { ; GENERIC-LABEL: uitof64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 +; GENERIC-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: uitof64: -- 2.50.1