; GENERIC-LABEL: test_cvtdq2ps:
; GENERIC: # BB#0:
; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00]
+; GENERIC-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
; GENERIC-LABEL: test_movupd:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
+; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movupd:
define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
; GENERIC-LABEL: test_movups:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
+; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movups:
define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
; ALL-LABEL: shuffle_v16f32_extract_256:
; ALL: # BB#0:
-; ALL-NEXT: vmovups 32(%rsi), %xmm0
-; ALL-NEXT: vinsertf128 $1, 48(%rsi), %ymm0, %ymm0
+; ALL-NEXT: vmovups 32(%rsi), %ymm0
; ALL-NEXT: retq
%ptr_a = bitcast float* %a to <16 x float>*
%v_a = load <16 x float>, <16 x float>* %ptr_a, align 4