; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSE2-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: merge_4f32_f32_2345_volatile:
}
define <8 x float> @merge_8f32_2f32_23z5(<2 x float>* %ptr) nounwind uwtable noinline ssp {
-; AVX1-LABEL: merge_8f32_2f32_23z5:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovups 16(%rdi), %xmm0
-; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: merge_8f32_2f32_23z5:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovupd 16(%rdi), %xmm0
-; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: merge_8f32_2f32_23z5:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovupd 16(%rdi), %xmm0
-; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: retq
+; AVX-LABEL: merge_8f32_2f32_23z5:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovups 16(%rdi), %xmm0
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_8f32_2f32_23z5:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX-NEXT: vmovups 16(%eax), %xmm0
-; X32-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X32-AVX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-AVX-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 2
; AVX-LABEL: merge_4f64_f64_34uz_volatile:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4f64_f64_34uz_volatile:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; X32-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 3
%ptr1 = getelementptr inbounds double, double* %ptr, i64 4
; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile:
; ALL: # %bb.0:
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; ALL-NEXT: vbroadcastsd 72(%rdi), %ymm1
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX512F-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; X32-AVX512F-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; X32-AVX512F-NEXT: vbroadcastsd 72(%eax), %ymm1
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl
define void @test_extract_f64(<2 x double> %arg, double* %dst) {
; SSE2-LABEL: test_extract_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: movhpd %xmm0, (%rdi)
+; SSE2-NEXT: movhps %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSE4A-LABEL: test_extract_f64:
;
; SSE41-LABEL: test_extract_f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: movhpd %xmm0, (%rdi)
+; SSE41-NEXT: movhps %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX-LABEL: test_extract_f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovhpd %xmm0, (%rdi)
+; AVX-NEXT: vmovhps %xmm0, (%rdi)
; AVX-NEXT: retq
;
; VLX-LABEL: test_extract_f64:
; VLX: # %bb.0:
-; VLX-NEXT: vmovhpd %xmm0, (%rdi)
+; VLX-NEXT: vmovhps %xmm0, (%rdi)
; VLX-NEXT: retq
%1 = extractelement <2 x double> %arg, i32 1
store double %1, double* %dst, align 1, !nontemporal !1
define void @v3f64(<2 x double> %a, <2 x double> %b, <3 x double>* %p) nounwind {
; SSE-LABEL: v3f64:
; SSE: # %bb.0:
-; SSE-NEXT: movhpd %xmm0, 16(%rdi)
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE-NEXT: movapd %xmm0, (%rdi)
+; SSE-NEXT: movhps %xmm0, 16(%rdi)
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movaps %xmm0, (%rdi)
; SSE-NEXT: retq
;
; AVX-LABEL: v3f64:
; AVX: # %bb.0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
-; AVX-NEXT: vmovhpd %xmm0, 16(%rdi)
-; AVX-NEXT: vmovapd %xmm1, (%rdi)
+; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovhps %xmm0, 16(%rdi)
+; AVX-NEXT: vmovaps %xmm1, (%rdi)
; AVX-NEXT: retq
;
; XOP-LABEL: v3f64:
; XOP: # %bb.0:
-; XOP-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
-; XOP-NEXT: vmovhpd %xmm0, 16(%rdi)
-; XOP-NEXT: vmovapd %xmm1, (%rdi)
+; XOP-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; XOP-NEXT: vmovhps %xmm0, 16(%rdi)
+; XOP-NEXT: vmovaps %xmm1, (%rdi)
; XOP-NEXT: retq
%r = shufflevector <2 x double> %a, <2 x double> %b, <3 x i32> <i32 0, i32 2, i32 1>
store <3 x double> %r, <3 x double>* %p
define void @test_vector_creation() nounwind {
; SSE-LABEL: test_vector_creation:
; SSE: # %bb.0:
-; SSE-NEXT: xorpd %xmm0, %xmm0
-; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; SSE-NEXT: movapd %xmm0, (%rax)
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; SSE-NEXT: movaps %xmm0, (%rax)
; SSE-NEXT: retq
;
; AVX-LABEL: test_vector_creation:
; AVX: # %bb.0:
-; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: vmovaps %ymm0, (%rax)
; AVX-NEXT: vzeroupper
; X86-AVX1-LABEL: test_mm_loadh_pi:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00]
-; X86-AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
+; X86-AVX1-NEXT: vmovhps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x00]
+; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadh_pi:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00]
-; X86-AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; X86-AVX512-NEXT: vmovhps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x00]
+; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadh_pi:
;
; X64-AVX1-LABEL: test_mm_loadh_pi:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07]
-; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x07]
+; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadh_pi:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07]
-; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07]
+; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%ptr = bitcast x86_mmx* %a1 to <2 x float>*
%ld = load <2 x float>, <2 x float>* %ptr
; X86-AVX1-LABEL: test_mm_loadl_pi:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00]
-; X86-AVX1-NEXT: # xmm0 = mem[0],xmm0[1]
+; X86-AVX1-NEXT: vmovlps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x00]
+; X86-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadl_pi:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00]
-; X86-AVX512-NEXT: # xmm0 = mem[0],xmm0[1]
+; X86-AVX512-NEXT: vmovlps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x00]
+; X86-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadl_pi:
;
; X64-AVX1-LABEL: test_mm_loadl_pi:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07]
-; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1]
+; X64-AVX1-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x07]
+; X64-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadl_pi:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07]
-; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1]
+; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07]
+; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%ptr = bitcast x86_mmx* %a1 to <2 x float>*
%ld = load <2 x float>, <2 x float>* %ptr
; X86-AVX1-LABEL: test_mm_storeh_ps:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vmovhpd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x17,0x00]
+; X86-AVX1-NEXT: vmovhps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x17,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_storeh_ps:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vmovhpd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x17,0x00]
+; X86-AVX512-NEXT: vmovhps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x00]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_storeh_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movapd (%ecx), %xmm0
-; CHECK-NEXT: movlpd {{[0-9]+}}(%esp), %xmm0
-; CHECK-NEXT: movapd %xmm0, (%eax)
+; CHECK-NEXT: movaps (%ecx), %xmm0
+; CHECK-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; CHECK-NEXT: movaps %xmm0, (%eax)
; CHECK-NEXT: retl
%tmp3 = load <2 x double>, <2 x double>* %A, align 16
%tmp7 = insertelement <2 x double> undef, double %B, i32 0
; X86-SSE-LABEL: test_mm_loadh_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT: movhpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x16,0x00]
-; X86-SSE-NEXT: # xmm0 = xmm0[0],mem[0]
+; X86-SSE-NEXT: movhps (%eax), %xmm0 # encoding: [0x0f,0x16,0x00]
+; X86-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadh_pd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00]
-; X86-AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
+; X86-AVX1-NEXT: vmovhps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x00]
+; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadh_pd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00]
-; X86-AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; X86-AVX512-NEXT: vmovhps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x00]
+; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadh_pd:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movhpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x16,0x07]
-; X64-SSE-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-SSE-NEXT: movhps (%rdi), %xmm0 # encoding: [0x0f,0x16,0x07]
+; X64-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadh_pd:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07]
-; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x07]
+; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadh_pd:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07]
-; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07]
+; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%ld = load double, double* %a1, align 8
%res = insertelement <2 x double> %a0, double %ld, i32 1
; X86-SSE-LABEL: test_mm_loadl_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT: movlpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x12,0x00]
-; X86-SSE-NEXT: # xmm0 = mem[0],xmm0[1]
+; X86-SSE-NEXT: movlps (%eax), %xmm0 # encoding: [0x0f,0x12,0x00]
+; X86-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadl_pd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00]
-; X86-AVX1-NEXT: # xmm0 = mem[0],xmm0[1]
+; X86-AVX1-NEXT: vmovlps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x00]
+; X86-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadl_pd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00]
-; X86-AVX512-NEXT: # xmm0 = mem[0],xmm0[1]
+; X86-AVX512-NEXT: vmovlps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x00]
+; X86-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadl_pd:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movlpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x12,0x07]
-; X64-SSE-NEXT: # xmm0 = mem[0],xmm0[1]
+; X64-SSE-NEXT: movlps (%rdi), %xmm0 # encoding: [0x0f,0x12,0x07]
+; X64-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadl_pd:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07]
-; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1]
+; X64-AVX1-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x07]
+; X64-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadl_pd:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07]
-; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1]
+; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07]
+; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%ld = load double, double* %a1, align 8
%res = insertelement <2 x double> %a0, double %ld, i32 0
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE-NEXT: movapd (%ecx), %xmm0
-; X86-SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
-; X86-SSE-NEXT: movapd %xmm0, (%eax)
+; X86-SSE-NEXT: movaps (%ecx), %xmm0
+; X86-SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE-NEXT: movaps %xmm0, (%eax)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: test1:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-AVX-NEXT: vmovapd (%ecx), %xmm0
-; X86-AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
-; X86-AVX-NEXT: vmovapd %xmm0, (%eax)
+; X86-AVX-NEXT: vmovaps (%ecx), %xmm0
+; X86-AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-AVX-NEXT: vmovaps %xmm0, (%eax)
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: test1:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE-NEXT: movapd (%ecx), %xmm0
-; X86-SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; X86-SSE-NEXT: movapd %xmm0, (%eax)
+; X86-SSE-NEXT: movaps (%ecx), %xmm0
+; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; X86-SSE-NEXT: movaps %xmm0, (%eax)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: test2:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-AVX-NEXT: vmovapd (%ecx), %xmm0
-; X86-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; X86-AVX-NEXT: vmovapd %xmm0, (%eax)
+; X86-AVX-NEXT: vmovaps (%ecx), %xmm0
+; X86-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; X86-AVX-NEXT: vmovaps %xmm0, (%eax)
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: test2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movapd (%ecx), %xmm0
-; X86-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; X86-NEXT: movapd %xmm0, (%ecx)
+; X86-NEXT: movaps (%ecx), %xmm0
+; X86-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; X86-NEXT: movaps %xmm0, (%ecx)
; X86-NEXT: retl
;
; X64-LABEL: t9:
; X64: # %bb.0:
-; X64-NEXT: movapd (%rdi), %xmm0
-; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; X64-NEXT: movapd %xmm0, (%rdi)
+; X64-NEXT: movaps (%rdi), %xmm0
+; X64-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; X64-NEXT: movaps %xmm0, (%rdi)
; X64-NEXT: retq
%tmp = load <4 x float>, <4 x float>* %r
%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
; SSE3-NEXT: andl $1, %ecx
; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE3-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSE3-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v2f64:
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSSE3-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v2f64:
define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind {
; X32-LABEL: t3:
; X32: # %bb.0:
-; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; X32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; X32-NEXT: retl
;
; X64-LABEL: t3:
define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
; SSE2-LABEL: insert_mem_lo_v2i64:
; SSE2: # %bb.0:
-; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_mem_lo_v2i64:
; SSE3: # %bb.0:
-; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_mem_lo_v2i64:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_mem_lo_v2i64:
define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
; SSE-LABEL: insert_mem_lo_v2f64:
; SSE: # %bb.0:
-; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_lo_v2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; AVX-NEXT: retq
%a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
; SSE-LABEL: insert_mem_hi_v2f64:
; SSE: # %bb.0:
-; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_hi_v2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX-NEXT: retq
%a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) {
; SSE-LABEL: shuffle_mem_v2f64_02:
; SSE: # %bb.0:
-; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_mem_v2f64_02:
define <2 x double> @shuffle_mem_v2f64_21(<2 x double> %a, <2 x double>* %pb) {
; SSE2-LABEL: shuffle_mem_v2f64_21:
; SSE2: # %bb.0:
-; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_mem_v2f64_21:
; SSE3: # %bb.0:
-; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_mem_v2f64_21:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_mem_v2f64_21:
define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
; SSE2-LABEL: insert_mem_lo_v4i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_mem_lo_v4i32:
; SSE3: # %bb.0:
-; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_mem_lo_v4i32:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_mem_lo_v4i32:
define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
; SSE-LABEL: insert_mem_lo_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_lo_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
+; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; AVX-NEXT: retq
%a = load <2 x float>, <2 x float>* %ptr
%v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
; SSE-LABEL: insert_mem_hi_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_hi_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX-NEXT: retq
%a = load <2 x float>, <2 x float>* %ptr
%v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; ALL-LABEL: concat_v2f32_1:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; ALL-NEXT: retq
entry:
%tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
; ALL-LABEL: concat_v2f32_2:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; ALL-NEXT: retq
entry:
%tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
; ALL-LABEL: concat_v2f32_3:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; ALL-NEXT: retq
entry:
%tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
; SSE-LABEL: combine_test22:
; SSE: # %bb.0:
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test22:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX-NEXT: retq
; Current AVX2 lowering of this is still awful, not adding a test case.
%1 = load <2 x float>, <2 x float>* %a, align 8
; SSE-NEXT: andl $1, %edi
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: var_shuffle_v2f64_v2f64_xx_i64:
; AVX-NEXT: andl $1, %edi
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX-NEXT: retq
%x0 = extractelement <2 x double> %x, i64 %i0
%x1 = extractelement <2 x double> %x, i64 %i1
; ALL-NEXT: andl $3, %edx
; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; ALL-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: andl $1, %edx
; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; ALL-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: retq
%x0 = extractelement <2 x double> %x, i64 %i0