def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTPD2(DQ|PS)Yrm")>;
def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTTPD2DQYrm")>;
+def WriteVBlendVPY: SchedWriteRes<[JFPU01]> {
+ let Latency = 3;
+ let ResourceCycles = [6];
+}
+def : InstRW<[WriteVBlendVPY], (instregex "VBLENDVP(S|D)Yrr", "VPERMILP(D|S)Yrr")>;
+
+def WriteVBlendVPYLd: SchedWriteRes<[JLAGU, JFPU01]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 6];
+}
+def : InstRW<[WriteVBlendVPYLd, ReadAfterLd], (instregex "VBLENDVP(S|D)Yrm")>;
+
+def WriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 4];
+}
+def : InstRW<[WriteVBROADCASTYLd, ReadAfterLd], (instregex "VBROADCASTS(S|D)Yrm")>;
+
+def WriteFPAY22: SchedWriteRes<[JFPU0]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WriteFPAY22], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>;
+
+def WriteFPAY22Ld: SchedWriteRes<[JLAGU, JFPU0]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>;
+
+def WriteVHAddSubY: SchedWriteRes<[JFPU0]> {
+ let Latency = 3;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WriteVHAddSubY], (instregex "VH(ADD|SUB)P(D|S)Yrr")>;
+
+def WriteVHAddSubYLd: SchedWriteRes<[JLAGU, JFPU0]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteVHAddSubYLd], (instregex "VH(ADD|SUB)P(D|S)Yrm")>;
+
+def WriteVMaskMovLd: SchedWriteRes<[JLAGU,JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteVMaskMovLd], (instregex "VMASKMOVP(D|S)rm")>;
+
+def WriteVMaskMovYLd: SchedWriteRes<[JLAGU,JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 4];
+}
+def : InstRW<[WriteVMaskMovYLd], (instregex "VMASKMOVP(D|S)Yrm")>;
+
+def WriteVMaskMovSt: SchedWriteRes<[JFPU01,JSAGU]> {
+ let Latency = 6;
+ let ResourceCycles = [4, 1];
+}
+def : InstRW<[WriteVMaskMovSt], (instregex "VMASKMOVP(D|S)mr")>;
+
+def WriteVMaskMovYSt: SchedWriteRes<[JFPU01,JSAGU]> {
+ let Latency = 6;
+ let ResourceCycles = [4, 1];
+}
+def : InstRW<[WriteVMaskMovYSt], (instregex "VMASKMOVP(D|S)Ymr")>;
+
+// TODO: In fact we have latency '2+i'. The +i represents an additional 1 cycle transfer
+// operation which moves the floating point result to the integer unit. During this
+// additional cycle the floating point unit execution resources are not occupied
+// and ALU0 in the integer unit is occupied instead.
+def WriteVMOVMSK: SchedWriteRes<[JFPU0]> {
+ let Latency = 3;
+}
+def : InstRW<[WriteVMOVMSK], (instregex "VMOVMSKP(D|S)(Y)?rr")>;
+
+// TODO: In fact we have latency '3+i'. The +i represents an additional 1 cycle transfer
+// operation which moves the floating point result to the integer unit. During this
+// additional cycle the floating point unit execution resources are not occupied
+// and ALU0 in the integer unit is occupied instead.
+def WriteVTESTY: SchedWriteRes<[JFPU01, JFPU0]> {
+ let Latency = 4;
+ let ResourceCycles = [4, 2];
+}
+def : InstRW<[WriteVTESTY], (instregex "VTESTP(S|D)Yrr")>;
+def : InstRW<[WriteVTESTY], (instregex "VPTESTYrr")>;
+
+def WriteVTESTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPU0]> {
+ let Latency = 9;
+ let ResourceCycles = [1, 4, 2];
+}
+def : InstRW<[WriteVTESTYLd], (instregex "VTESTP(S|D)Yrm")>;
+def : InstRW<[WriteVTESTYLd], (instregex "VPTESTYrm")>;
+
def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> {
let Latency = 54;
let ResourceCycles = [54];
;
; BTVER2-LABEL: test_blendvpd:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
+; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_blendvpd:
;
; BTVER2-LABEL: test_blendvps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
+; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_blendvps:
;
; BTVER2-LABEL: test_broadcastsd_ymm:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:1.00]
+; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_broadcastsd_ymm:
;
; BTVER2-LABEL: test_broadcastss_ymm:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:1.00]
+; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_broadcastss_ymm:
;
; BTVER2-LABEL: test_cmppd:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
+; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
;
; BTVER2-LABEL: test_cmpps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
+; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
;
; BTVER2-LABEL: test_maskmovpd:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2
-; BTVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi)
+; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
+; BTVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [6:2.00]
; BTVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
;
; BTVER2-LABEL: test_maskmovpd_ymm:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2
-; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi)
+; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
+; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [6:2.00]
; BTVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
;
; BTVER2-LABEL: test_maskmovps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2
-; BTVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi)
+; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
+; BTVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [6:2.00]
; BTVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
;
; BTVER2-LABEL: test_maskmovps_ymm:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2
-; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi)
+; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
+; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [6:2.00]
; BTVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
;
; BTVER2-LABEL: test_maxpd:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_maxpd:
;
; BTVER2-LABEL: test_maxps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_maxps:
;
; BTVER2-LABEL: test_minpd:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_minpd:
;
; BTVER2-LABEL: test_minps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_minps:
;
; BTVER2-LABEL: test_movmskpd:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.50]
+; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movmskpd:
;
; BTVER2-LABEL: test_movmskps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.50]
+; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movmskps:
;
; BTVER2-LABEL: test_permilvarpd_ymm:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
; BTVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
;
; BTVER2-LABEL: test_permilvarps_ymm:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
; BTVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_testpd_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50]
-; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [4:3.00]
; BTVER2-NEXT: setb %al # sched: [1:0.50]
-; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [6:1.00]
+; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:3.00]
; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_testps_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50]
-; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [4:3.00]
; BTVER2-NEXT: setb %al # sched: [1:0.50]
-; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [6:1.00]
+; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [9:3.00]
; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;