From 984b922f6a977e27dfa82c7c99effac34324b4bd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 26 Nov 2017 20:50:29 +0000 Subject: [PATCH] [X86][3DNow] Add 3DNow! instruction itinerary and scheduling classes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319005 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Instr3DNow.td | 112 +++++++++++++++++++---------- lib/Target/X86/X86Schedule.td | 9 +++ test/CodeGen/X86/3dnow-schedule.ll | 96 ++++++++++++------------- 3 files changed, 132 insertions(+), 85 deletions(-) diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index 36c7adc3172..b32f0c32f98 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -12,68 +12,106 @@ // //===----------------------------------------------------------------------===// -class I3DNow o, Format F, dag outs, dag ins, string asm, list pat> - : I, TB, Requires<[Has3DNow]> { +let Sched = WriteFAdd in { +def I3DNOW_FALU_ITINS : OpndItins< + IIC_3DNOW_FALU_RR, IIC_3DNOW_FALU_RM +>; } -class I3DNow_binop o, Format F, dag ins, string Mnemonic, list pat> +let Sched = WriteCvtF2I in { +def I3DNOW_FCVT_F2I_ITINS : OpndItins< + IIC_3DNOW_FCVT_F2I_RR, IIC_3DNOW_FCVT_F2I_RM +>; +} + +let Sched = WriteCvtI2F in { +def I3DNOW_FCVT_I2F_ITINS : OpndItins< + IIC_3DNOW_FCVT_I2F_RR, IIC_3DNOW_FCVT_I2F_RM +>; +} + +let Sched = WriteVecIMul in { +def I3DNOW_MISC_FUNC_ITINS : OpndItins< + IIC_3DNOW_MISC_FUNC_REG, IIC_3DNOW_MISC_FUNC_MEM +>; +} + +let Sched = WriteShuffle in { +def I3DNOW_PSHUF_ITINS : OpndItins< + IIC_MMX_PSHUF, IIC_MMX_PSHUF +>; +} + +class I3DNow o, Format F, dag outs, dag ins, string asm, list pat, + InstrItinClass itin = NoItinerary> + : I, TB, Requires<[Has3DNow]> { +} + +class I3DNow_binop o, Format F, dag ins, string Mnemonic, list pat, + InstrItinClass itin> : I3DNow, + !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat, itin>, Has3DNow0F0FOpcode { // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet. let isAsmParserOnly = 1; let Constraints = "$src1 = $dst"; } -class I3DNow_conv o, Format F, dag ins, string Mnemonic, list pat> +class I3DNow_conv o, Format F, dag ins, string Mnemonic, list pat, + InstrItinClass itin> : I3DNow, + !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat, itin>, Has3DNow0F0FOpcode { // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet. let isAsmParserOnly = 1; } -multiclass I3DNow_binop_rm_int opc, string Mn, bit Commutable = 0, - string Ver = ""> { +multiclass I3DNow_binop_rm_int opc, string Mn, OpndItins itins, + bit Commutable = 0, string Ver = ""> { let isCommutable = Commutable in def rr : I3DNow_binop( - !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>; + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))], + itins.rr>, Sched<[itins.Sched]>; def rm : I3DNow_binop( !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, - (bitconvert (load_mmx addr:$src2))))]>; + (bitconvert (load_mmx addr:$src2))))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } -multiclass I3DNow_conv_rm_int opc, string Mn, string Ver = ""> { +multiclass I3DNow_conv_rm_int opc, string Mn, OpndItins itins, + string Ver = ""> { def rr : I3DNow_conv( - !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>; + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm : I3DNow_conv( !strconcat("int_x86_3dnow", Ver, "_", Mn)) - (bitconvert (load_mmx addr:$src))))]>; + (bitconvert (load_mmx addr:$src))))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } -defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", 1>; -defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">; -defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">; -defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", 1>; -defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", 1>; -defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">; -defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">; -defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">; -defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">; -defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", 1>; -defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">; -defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">; -defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">; -defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">; -defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">; -defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub", 1>; -defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", 1>; -defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">; -defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", 1>; +defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", I3DNOW_MISC_FUNC_ITINS, 1>; +defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id", I3DNOW_FCVT_F2I_ITINS>; +defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc", I3DNOW_FALU_ITINS>; +defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", I3DNOW_FALU_ITINS, 1>; +defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", I3DNOW_FALU_ITINS, 1>; +defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge", I3DNOW_FALU_ITINS>; +defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt", I3DNOW_FALU_ITINS>; +defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax", I3DNOW_FALU_ITINS>; +defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin", I3DNOW_FALU_ITINS>; +defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", I3DNOW_FALU_ITINS, 1>; +defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp", I3DNOW_FALU_ITINS>; +defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1", I3DNOW_FALU_ITINS>; +defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2", I3DNOW_FALU_ITINS>; +defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1", I3DNOW_FALU_ITINS>; +defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt", I3DNOW_FALU_ITINS>; +defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub", I3DNOW_FALU_ITINS, 1>; +defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", I3DNOW_FALU_ITINS, 1>; +defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd", I3DNOW_FCVT_I2F_ITINS>; +defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", I3DNOW_MISC_FUNC_ITINS, 1>; def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; @@ -87,8 +125,8 @@ def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr", Requires<[HasPrefetchW]>; // "3DNowA" instructions -defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; -defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">; -defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", 0, "a">; -defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", 0, "a">; -defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">; +defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", I3DNOW_FCVT_F2I_ITINS, "a">; +defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", I3DNOW_FCVT_I2F_ITINS, "a">; +defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", I3DNOW_FALU_ITINS, 0, "a">; +defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", I3DNOW_FALU_ITINS, 0, "a">; +defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", I3DNOW_PSHUF_ITINS, "a">; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 91d450c1ffb..0346046e758 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -432,6 +432,15 @@ def IIC_MMX_CVT_PD_RM : InstrItinClass; def IIC_MMX_CVT_PS_RR : InstrItinClass; def IIC_MMX_CVT_PS_RM : InstrItinClass; +def IIC_3DNOW_FALU_RM : InstrItinClass; +def IIC_3DNOW_FALU_RR : InstrItinClass; +def IIC_3DNOW_FCVT_F2I_RM : InstrItinClass; +def IIC_3DNOW_FCVT_F2I_RR : InstrItinClass; +def IIC_3DNOW_FCVT_I2F_RM : InstrItinClass; +def IIC_3DNOW_FCVT_I2F_RR : InstrItinClass; +def IIC_3DNOW_MISC_FUNC_REG : InstrItinClass; +def IIC_3DNOW_MISC_FUNC_MEM : InstrItinClass; + def IIC_CMPX_LOCK : InstrItinClass; def IIC_CMPX_LOCK_8 : InstrItinClass; def IIC_CMPX_LOCK_8B : InstrItinClass; diff --git a/test/CodeGen/X86/3dnow-schedule.ll b/test/CodeGen/X86/3dnow-schedule.ll index c33ecc8a596..5996c73aa49 100644 --- a/test/CodeGen/X86/3dnow-schedule.ll +++ b/test/CodeGen/X86/3dnow-schedule.ll @@ -14,8 +14,8 @@ declare void @llvm.x86.mmx.femms() nounwind readnone define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pavgusb: ; CHECK: # BB#0: -; CHECK-NEXT: pavgusb %mm1, %mm0 -; CHECK-NEXT: pavgusb (%rdi), %mm0 +; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [5:1.00] +; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1) @@ -29,8 +29,8 @@ declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pf2id(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pf2id: ; CHECK: # BB#0: -; CHECK-NEXT: pf2id (%rdi), %mm0 -; CHECK-NEXT: pf2id %mm0, %mm0 +; CHECK-NEXT: pf2id (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pf2id %mm0, %mm0 # sched: [3:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 @@ -44,8 +44,8 @@ declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone define i64 @test_pf2iw(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pf2iw: ; CHECK: # BB#0: -; CHECK-NEXT: pf2iw (%rdi), %mm0 -; CHECK-NEXT: pf2iw %mm0, %mm0 +; CHECK-NEXT: pf2iw (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pf2iw %mm0, %mm0 # sched: [3:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 @@ -59,8 +59,8 @@ declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone define i64 @test_pfacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfacc: ; CHECK: # BB#0: -; CHECK-NEXT: pfacc %mm1, %mm0 -; CHECK-NEXT: pfacc (%rdi), %mm0 +; CHECK-NEXT: pfacc %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1) @@ -74,8 +74,8 @@ declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfadd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfadd: ; CHECK: # BB#0: -; CHECK-NEXT: pfadd %mm1, %mm0 -; CHECK-NEXT: pfadd (%rdi), %mm0 +; CHECK-NEXT: pfadd %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1) @@ -89,8 +89,8 @@ declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfcmpeq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfcmpeq: ; CHECK: # BB#0: -; CHECK-NEXT: pfcmpeq %mm1, %mm0 -; CHECK-NEXT: pfcmpeq (%rdi), %mm0 +; CHECK-NEXT: pfcmpeq %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1) @@ -104,8 +104,8 @@ declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfcmpge(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfcmpge: ; CHECK: # BB#0: -; CHECK-NEXT: pfcmpge %mm1, %mm0 -; CHECK-NEXT: pfcmpge (%rdi), %mm0 +; CHECK-NEXT: pfcmpge %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1) @@ -119,8 +119,8 @@ declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfcmpgt(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfcmpgt: ; CHECK: # BB#0: -; CHECK-NEXT: pfcmpgt %mm1, %mm0 -; CHECK-NEXT: pfcmpgt (%rdi), %mm0 +; CHECK-NEXT: pfcmpgt %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1) @@ -134,8 +134,8 @@ declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfmax(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfmax: ; CHECK: # BB#0: -; CHECK-NEXT: pfmax %mm1, %mm0 -; CHECK-NEXT: pfmax (%rdi), %mm0 +; CHECK-NEXT: pfmax %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1) @@ -149,8 +149,8 @@ declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfmin(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfmin: ; CHECK: # BB#0: -; CHECK-NEXT: pfmin %mm1, %mm0 -; CHECK-NEXT: pfmin (%rdi), %mm0 +; CHECK-NEXT: pfmin %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1) @@ -164,8 +164,8 @@ declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfmul(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfmul: ; CHECK: # BB#0: -; CHECK-NEXT: pfmul %mm1, %mm0 -; CHECK-NEXT: pfmul (%rdi), %mm0 +; CHECK-NEXT: pfmul %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1) @@ -179,8 +179,8 @@ declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfnacc: ; CHECK: # BB#0: -; CHECK-NEXT: pfnacc %mm1, %mm0 -; CHECK-NEXT: pfnacc (%rdi), %mm0 +; CHECK-NEXT: pfnacc %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1) @@ -194,8 +194,8 @@ declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfpnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfpnacc: ; CHECK: # BB#0: -; CHECK-NEXT: pfpnacc %mm1, %mm0 -; CHECK-NEXT: pfpnacc (%rdi), %mm0 +; CHECK-NEXT: pfpnacc %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1) @@ -209,8 +209,8 @@ declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfrcp(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pfrcp: ; CHECK: # BB#0: -; CHECK-NEXT: pfrcp (%rdi), %mm0 -; CHECK-NEXT: pfrcp %mm0, %mm0 +; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfrcp %mm0, %mm0 # sched: [3:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 @@ -224,8 +224,8 @@ declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone define i64 @test_pfrcpit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfrcpit1: ; CHECK: # BB#0: -; CHECK-NEXT: pfrcpit1 %mm1, %mm0 -; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 +; CHECK-NEXT: pfrcpit1 %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1) @@ -239,8 +239,8 @@ declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfrcpit2(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfrcpit2: ; CHECK: # BB#0: -; CHECK-NEXT: pfrcpit2 %mm1, %mm0 -; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 +; CHECK-NEXT: pfrcpit2 %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1) @@ -254,8 +254,8 @@ declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfrsqit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfrsqit1: ; CHECK: # BB#0: -; CHECK-NEXT: pfrsqit1 %mm1, %mm0 -; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 +; CHECK-NEXT: pfrsqit1 %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1) @@ -269,8 +269,8 @@ declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfrsqrt(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pfrsqrt: ; CHECK: # BB#0: -; CHECK-NEXT: pfrsqrt (%rdi), %mm0 -; CHECK-NEXT: pfrsqrt %mm0, %mm0 +; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfrsqrt %mm0, %mm0 # sched: [3:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 @@ -284,8 +284,8 @@ declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone define i64 @test_pfsub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfsub: ; CHECK: # BB#0: -; CHECK-NEXT: pfsub %mm1, %mm0 -; CHECK-NEXT: pfsub (%rdi), %mm0 +; CHECK-NEXT: pfsub %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1) @@ -299,8 +299,8 @@ declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfsubr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfsubr: ; CHECK: # BB#0: -; CHECK-NEXT: pfsubr %mm1, %mm0 -; CHECK-NEXT: pfsubr (%rdi), %mm0 +; CHECK-NEXT: pfsubr %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [7:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1) @@ -314,8 +314,8 @@ declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pi2fd(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pi2fd: ; CHECK: # BB#0: -; CHECK-NEXT: pi2fd (%rdi), %mm0 -; CHECK-NEXT: pi2fd %mm0, %mm0 +; CHECK-NEXT: pi2fd (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pi2fd %mm0, %mm0 # sched: [4:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 @@ -329,8 +329,8 @@ declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone define i64 @test_pi2fw(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pi2fw: ; CHECK: # BB#0: -; CHECK-NEXT: pi2fw (%rdi), %mm0 -; CHECK-NEXT: pi2fw %mm0, %mm0 +; CHECK-NEXT: pi2fw (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pi2fw %mm0, %mm0 # sched: [4:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 @@ -344,8 +344,8 @@ declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone define i64 @test_pmulhrw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pmulhrw: ; CHECK: # BB#0: -; CHECK-NEXT: pmulhrw %mm1, %mm0 -; CHECK-NEXT: pmulhrw (%rdi), %mm0 +; CHECK-NEXT: pmulhrw %mm1, %mm0 # sched: [5:1.00] +; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1) @@ -359,8 +359,8 @@ declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pswapd(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pswapd: ; CHECK: # BB#0: -; CHECK-NEXT: pswapd (%rdi), %mm0 # mm0 = mem[1,0] -; CHECK-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] +; CHECK-NEXT: pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [5:1.00] +; CHECK-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] sched: [1:1.00] ; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 -- 2.50.1