From: Ayman Musa Date: Sun, 8 Oct 2017 08:32:56 +0000 (+0000) Subject: [X86] Add new attribute to X86 instructions to enable marking them as "not memory... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e22eb3ecee0013d55253225c8121d048a1a0bf5b;p=llvm [X86] Add new attribute to X86 instructions to enable marking them as "not memory foldable" This attribute will be used in a tablegen backend that generated the X86 memory folding tables which will be added in a future pass. Instructions with this attribute unset will be excluded from the full set of X86 instructions available for the pass. Differential Revision: https://reviews.llvm.org/D38027 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315171 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 929faaf23e3..824f5099d8c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -6480,9 +6480,11 @@ multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, } } defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", - X86froundRnd, f64x_info, f32x_info>; + X86froundRnd, f64x_info, f32x_info>, + NotMemoryFoldable; defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", - X86fpextRnd,f32x_info, f64x_info >; + X86fpextRnd,f32x_info, f64x_info >, + NotMemoryFoldable; def : Pat<(f64 (fpextend FR32X:$src)), (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>, @@ -7274,13 +7276,13 @@ multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, } defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>, - EVEX_CD8<32, CD8VT1>, T8PD; + EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>, - VEX_W, EVEX_CD8<64, CD8VT1>, T8PD; + VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>, - EVEX_CD8<32, CD8VT1>, T8PD; + EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>, - VEX_W, EVEX_CD8<64, CD8VT1>, T8PD; + VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, @@ -7540,9 +7542,11 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr,X86VectorVTInfo _, multiclass avx512_sqrt_scalar_all opc, string OpcodeStr> { defm SSZ : avx512_sqrt_scalar, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; + X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, + NotMemoryFoldable; defm SDZ : avx512_sqrt_scalar, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; + X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W, + NotMemoryFoldable; } defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 8f9226cf228..4b8c24a1c04 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -243,9 +243,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in let Uses = [ESP] in { def TCRETURNdi : PseudoI<(outs), - (ins i32imm_pcrel:$dst, i32imm:$offset), []>; + (ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable; def TCRETURNri : PseudoI<(outs), - (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>; + (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable; let mayLoad = 1 in def TCRETURNmi : PseudoI<(outs), (ins i32mem_TC:$dst, i32imm:$offset), []>; @@ -315,10 +315,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, (ins i64i32imm_pcrel:$dst, i32imm:$offset), []>; def TCRETURNri64 : PseudoI<(outs), - (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>; + (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable; let mayLoad = 1 in def TCRETURNmi64 : PseudoI<(outs), - (ins i64mem_TC:$dst, i32imm:$offset), []>; + (ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable; def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst), "jmp\t$dst", [], IIC_JMP_REL>; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 57f0c1944c9..2653e8c0a39 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -231,6 +231,9 @@ class FoldGenData { string FoldGenRegForm = _RegisterForm; } +// Mark the instruction as "illegal to memory fold/unfold" +class NotMemoryFoldable { bit isMemoryFoldable = 0; } + class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, InstrItinClass itin, @@ -314,6 +317,8 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, // instruction to replace the current one in case it got picked during generation. string FoldGenRegForm = ?; + bit isMemoryFoldable = 1; // Is it allowed to memory fold/unfold this instruction? + // TSFlags layout should be kept in sync with X86BaseInfo.h. let TSFlags{6-0} = FormBits; let TSFlags{8-7} = OpSizeBits; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index b76baac4bdf..96eb5f4538f 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1658,14 +1658,15 @@ let SchedRW = [WriteALU] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))], IIC_BT_RR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB; + [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB, + NotMemoryFoldable; } // SchedRW // Unlike with the register+register form, the memory+register form of the @@ -1679,19 +1680,19 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { // [(X86bt (loadi16 addr:$src1), GR16:$src2), // (implicit EFLAGS)] [], IIC_BT_MR - >, OpSize16, TB, Requires<[FastBTMem]>; + >, OpSize16, TB, Requires<[FastBTMem]>, NotMemoryFoldable; def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi32 addr:$src1), GR32:$src2), // (implicit EFLAGS)] [], IIC_BT_MR - >, OpSize32, TB, Requires<[FastBTMem]>; + >, OpSize32, TB, Requires<[FastBTMem]>, NotMemoryFoldable; def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi64 addr:$src1), GR64:$src2), // (implicit EFLAGS)] [], IIC_BT_MR - >, TB; + >, TB, NotMemoryFoldable; } let SchedRW = [WriteALU] in { @@ -1731,23 +1732,25 @@ let hasSideEffects = 0 in { let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTC32rr : I<0xBB, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTC64rr : RI<0xBB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; + "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB, + NotMemoryFoldable; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; + "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB, + NotMemoryFoldable; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { @@ -1775,23 +1778,24 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTR32rr : I<0xB3, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTR64rr : RI<0xB3, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB, NotMemoryFoldable; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; + "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB, + NotMemoryFoldable; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { @@ -1819,23 +1823,25 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTS32rr : I<0xAB, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTS64rr : RI<0xAB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; + "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB, + NotMemoryFoldable; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; + "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB, + NotMemoryFoldable; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 92898d1bed9..ba0c9bfec73 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1509,14 +1509,14 @@ def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, - Sched<[WriteCvtF2F]>, VEX_WIG; + Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, - Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG; + Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable; } def : Pat<(f32 (fpround FR64:$src)), @@ -1576,14 +1576,14 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, - Sched<[WriteCvtF2F]>, VEX_WIG; + Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, - Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG; + Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable; } def : Pat<(f64 (fpextend FR32:$src)), @@ -3258,7 +3258,8 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, defm V#NAME#SS : avx_fp_unop_s("int_x86_sse_"##OpcodeStr##_ss), OpNode, - SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG; + SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG, + NotMemoryFoldable; } multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, @@ -3270,7 +3271,7 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, f64mem, !cast("int_x86_sse2_"##OpcodeStr##_sd), OpNode, SSEPackedDouble, itins, "SD">, - XD, VEX_4V, VEX_LIG, VEX_WIG; + XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; } // Square root.