From 3e48a4f4584fcf21e300affe64eb228647f4bb13 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 28 Nov 2017 15:03:42 +0000 Subject: [PATCH] [X86][X87] Tag FABS/FCHS/FSQRT/FSIN/FCOS x87 instruction scheduler classes Atom's FABS/FCHS/FSQRT latencies taken from Agner. Note: I just added FSIN and FCOS to the existing IIC_FSINCOS itinerary, which is actually a more costly instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319175 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFPStack.td | 42 ++++++++++++-------- lib/Target/X86/X86Schedule.td | 2 + lib/Target/X86/X86ScheduleAtom.td | 2 + test/CodeGen/X86/x87-schedule.ll | 66 +++++++++++++++---------------- 4 files changed, 63 insertions(+), 49 deletions(-) diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index f096f51d6be..1ac62a74bce 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -118,10 +118,12 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1. // f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2. // f80 instructions cannot use SSE and use neither of these. -class FpIf32 pattern> : - FpI_, Requires<[FPStackf32]>; -class FpIf64 pattern> : - FpI_, Requires<[FPStackf64]>; +class FpIf32 pattern, + InstrItinClass itin = NoItinerary> : + FpI_, Requires<[FPStackf32]>; +class FpIf64 pattern, + InstrItinClass itin = NoItinerary> : + FpI_, Requires<[FPStackf64]>; // Factoring for arithmetic. multiclass FPBinary_rr { @@ -293,30 +295,38 @@ def COM_FST0r : FPST0rInst ; def COMP_FST0r : FPST0rInst ; // Unary operations. -multiclass FPUnary { +multiclass FPUnary { def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW, - [(set RFP32:$dst, (OpNode RFP32:$src))]>; + [(set RFP32:$dst, (OpNode RFP32:$src))], itin>; def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW, - [(set RFP64:$dst, (OpNode RFP64:$src))]>; + [(set RFP64:$dst, (OpNode RFP64:$src))], itin>; def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW, - [(set RFP80:$dst, (OpNode RFP80:$src))]>; -def _F : FPI<0xD9, fp, (outs), (ins), asmstring>; + [(set RFP80:$dst, (OpNode RFP80:$src))], itin>; +def _F : FPI<0xD9, fp, (outs), (ins), asmstring, itin>; } let Defs = [FPSW] in { -defm CHS : FPUnary; -defm ABS : FPUnary; -let SchedRW = [WriteFSqrt] in { -defm SQRT: FPUnary; + +let SchedRW = [WriteVecLogic] in { +defm CHS : FPUnary; +defm ABS : FPUnary; +} + +let SchedRW = [WriteFSqrt] in +defm SQRT: FPUnary; + +let SchedRW = [WriteMicrocoded] in { +defm SIN : FPUnary; +defm COS : FPUnary; } -defm SIN : FPUnary; -defm COS : FPUnary; let hasSideEffects = 0 in { def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>; def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>; def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>; -} +} // hasSideEffects + def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">; } // Defs = [FPSW] diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 0346046e758..16abe81acf0 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -477,6 +477,8 @@ def IIC_FXTRACT : InstrItinClass; def IIC_FPREM1 : InstrItinClass; def IIC_FPSTP : InstrItinClass; def IIC_FPREM : InstrItinClass; +def IIC_FSIGN : InstrItinClass; +def IIC_FSQRT : InstrItinClass; def IIC_FYL2XP1 : InstrItinClass; def IIC_FSINCOS : InstrItinClass; def IIC_FRNDINT : InstrItinClass; diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 1a070f79de6..a0821994214 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -394,6 +394,8 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, InstrItinData, InstrStage<1, [Port1]>] >, + InstrItinData] >, + InstrItinData] >, // System instructions InstrItinData] >, diff --git a/test/CodeGen/X86/x87-schedule.ll b/test/CodeGen/X86/x87-schedule.ll index bac4c08af5b..05e4ac7cdc8 100644 --- a/test/CodeGen/X86/x87-schedule.ll +++ b/test/CodeGen/X86/x87-schedule.ll @@ -96,21 +96,21 @@ define void @test_fabs() optsize { ; ATOM-LABEL: test_fabs: ; ATOM: # BB#0: ; ATOM-NEXT: #APP -; ATOM-NEXT: fabs +; ATOM-NEXT: fabs # sched: [1:1.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; ; SLM-LABEL: test_fabs: ; SLM: # BB#0: ; SLM-NEXT: #APP -; SLM-NEXT: fabs +; SLM-NEXT: fabs # sched: [1:0.50] ; SLM-NEXT: #NO_APP ; SLM-NEXT: retl # sched: [4:1.00] ; ; SANDY-LABEL: test_fabs: ; SANDY: # BB#0: ; SANDY-NEXT: #APP -; SANDY-NEXT: fabs +; SANDY-NEXT: fabs # sched: [1:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retl # sched: [5:1.00] ; @@ -124,28 +124,28 @@ define void @test_fabs() optsize { ; BROADWELL-LABEL: test_fabs: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fabs +; BROADWELL-NEXT: fabs # sched: [1:0.33] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fabs: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fabs +; SKYLAKE-NEXT: fabs # sched: [1:0.33] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fabs: ; SKX: # BB#0: ; SKX-NEXT: #APP -; SKX-NEXT: fabs +; SKX-NEXT: fabs # sched: [1:0.33] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BTVER2-LABEL: test_fabs: ; BTVER2: # BB#0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fabs +; BTVER2-NEXT: fabs # sched: [1:0.50] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ; @@ -421,21 +421,21 @@ define void @test_fchs() optsize { ; ATOM-LABEL: test_fchs: ; ATOM: # BB#0: ; ATOM-NEXT: #APP -; ATOM-NEXT: fchs +; ATOM-NEXT: fchs # sched: [1:1.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; ; SLM-LABEL: test_fchs: ; SLM: # BB#0: ; SLM-NEXT: #APP -; SLM-NEXT: fchs +; SLM-NEXT: fchs # sched: [1:0.50] ; SLM-NEXT: #NO_APP ; SLM-NEXT: retl # sched: [4:1.00] ; ; SANDY-LABEL: test_fchs: ; SANDY: # BB#0: ; SANDY-NEXT: #APP -; SANDY-NEXT: fchs +; SANDY-NEXT: fchs # sched: [1:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retl # sched: [5:1.00] ; @@ -449,28 +449,28 @@ define void @test_fchs() optsize { ; BROADWELL-LABEL: test_fchs: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fchs +; BROADWELL-NEXT: fchs # sched: [1:0.33] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fchs: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fchs +; SKYLAKE-NEXT: fchs # sched: [1:0.33] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fchs: ; SKX: # BB#0: ; SKX-NEXT: #APP -; SKX-NEXT: fchs +; SKX-NEXT: fchs # sched: [1:0.33] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BTVER2-LABEL: test_fchs: ; BTVER2: # BB#0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fchs +; BTVER2-NEXT: fchs # sched: [1:0.50] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ; @@ -1075,63 +1075,63 @@ define void @test_fcos() optsize { ; ATOM-LABEL: test_fcos: ; ATOM: # BB#0: ; ATOM-NEXT: #APP -; ATOM-NEXT: fcos +; ATOM-NEXT: fcos # sched: [174:87.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; ; SLM-LABEL: test_fcos: ; SLM: # BB#0: ; SLM-NEXT: #APP -; SLM-NEXT: fcos +; SLM-NEXT: fcos # sched: [100:1.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: retl # sched: [4:1.00] ; ; SANDY-LABEL: test_fcos: ; SANDY: # BB#0: ; SANDY-NEXT: #APP -; SANDY-NEXT: fcos +; SANDY-NEXT: fcos # sched: [100:0.33] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retl # sched: [5:1.00] ; ; HASWELL-LABEL: test_fcos: ; HASWELL: # BB#0: ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fcos +; HASWELL-NEXT: fcos # sched: [100:0.25] ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retl # sched: [5:0.50] ; ; BROADWELL-LABEL: test_fcos: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fcos +; BROADWELL-NEXT: fcos # sched: [100:0.25] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fcos: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fcos +; SKYLAKE-NEXT: fcos # sched: [100:0.25] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fcos: ; SKX: # BB#0: ; SKX-NEXT: #APP -; SKX-NEXT: fcos +; SKX-NEXT: fcos # sched: [100:0.25] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BTVER2-LABEL: test_fcos: ; BTVER2: # BB#0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fcos +; BTVER2-NEXT: fcos # sched: [100:0.17] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ; ; ZNVER1-LABEL: test_fcos: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fcos +; ZNVER1-NEXT: fcos # sched: [100:?] ; ZNVER1-NEXT: #NO_APP ; ZNVER1-NEXT: retl # sched: [1:0.50] tail call void asm sideeffect "fcos", ""() nounwind @@ -3048,63 +3048,63 @@ define void @test_fsin() optsize { ; ATOM-LABEL: test_fsin: ; ATOM: # BB#0: ; ATOM-NEXT: #APP -; ATOM-NEXT: fsin +; ATOM-NEXT: fsin # sched: [174:87.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; ; SLM-LABEL: test_fsin: ; SLM: # BB#0: ; SLM-NEXT: #APP -; SLM-NEXT: fsin +; SLM-NEXT: fsin # sched: [100:1.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: retl # sched: [4:1.00] ; ; SANDY-LABEL: test_fsin: ; SANDY: # BB#0: ; SANDY-NEXT: #APP -; SANDY-NEXT: fsin +; SANDY-NEXT: fsin # sched: [100:0.33] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retl # sched: [5:1.00] ; ; HASWELL-LABEL: test_fsin: ; HASWELL: # BB#0: ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsin +; HASWELL-NEXT: fsin # sched: [100:0.25] ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retl # sched: [5:0.50] ; ; BROADWELL-LABEL: test_fsin: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsin +; BROADWELL-NEXT: fsin # sched: [100:0.25] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fsin: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsin +; SKYLAKE-NEXT: fsin # sched: [100:0.25] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fsin: ; SKX: # BB#0: ; SKX-NEXT: #APP -; SKX-NEXT: fsin +; SKX-NEXT: fsin # sched: [100:0.25] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BTVER2-LABEL: test_fsin: ; BTVER2: # BB#0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsin +; BTVER2-NEXT: fsin # sched: [100:0.17] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ; ; ZNVER1-LABEL: test_fsin: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsin +; ZNVER1-NEXT: fsin # sched: [100:?] ; ZNVER1-NEXT: #NO_APP ; ZNVER1-NEXT: retl # sched: [1:0.50] tail call void asm sideeffect "fsin", ""() nounwind @@ -3196,7 +3196,7 @@ define void @test_fsqrt() optsize { ; ATOM-LABEL: test_fsqrt: ; ATOM: # BB#0: ; ATOM-NEXT: #APP -; ATOM-NEXT: fsqrt +; ATOM-NEXT: fsqrt # sched: [71:35.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; -- 2.50.1