; GENERIC: # BB#0:
; GENERIC-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
; GENERIC-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movddup:
; ATOM: # BB#0:
; ATOM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [1:1.00]
; ATOM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00]
+; ATOM-NEXT: subpd %xmm0, %xmm1 # sched: [6:3.00]
; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM: # BB#0:
; SLM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
; SLM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [3:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_movddup:
; SANDY: # BB#0:
; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movddup:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; SKYLAKE-LABEL: test_movddup:
; SKYLAKE: # BB#0:
; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: test_movddup:
; SKX: # BB#0:
; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_movddup:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:1.00]
; BTVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movddup:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
%2 = load <2 x double>, <2 x double> *%a1, align 16
%3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
- %4 = fadd <2 x double> %1, %3
+ %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl.
ret <2 x double> %4
}