From: Craig Topper Date: Sat, 6 Jul 2019 17:59:57 +0000 (+0000) Subject: [X86] Add PS<->PD domain changing support for MOVH/MOVL load instructions and MOVH... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8a198ef01a293f9a3c24ef55fee11e54c561f4eb;p=llvm [X86] Add PS<->PD domain changing support for MOVH/MOVL load instructions and MOVH store instructions. These instructions don't have an integer domain equivalent, but we can at least change between the two floating point domains. This allows a smaller encoding on SSE targets if we can turn PD into PS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365268 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/merge-consecutive-loads-128.ll b/test/CodeGen/X86/merge-consecutive-loads-128.ll index 32a8b7fd02d..c6cd75b21c7 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -1173,7 +1173,7 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; SSE2-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_2345_volatile: diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll index 60eb93f4d19..c03e2267b0d 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-256.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll @@ -235,36 +235,20 @@ define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp { } define <8 x float> @merge_8f32_2f32_23z5(<2 x float>* %ptr) nounwind uwtable noinline ssp { -; AVX1-LABEL: merge_8f32_2f32_23z5: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovups 16(%rdi), %xmm0 -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: merge_8f32_2f32_23z5: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovupd 16(%rdi), %xmm0 -; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: merge_8f32_2f32_23z5: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovupd 16(%rdi), %xmm0 -; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: retq +; AVX-LABEL: merge_8f32_2f32_23z5: +; AVX: # %bb.0: +; AVX-NEXT: vmovups 16(%rdi), %xmm0 +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: retq ; ; X32-AVX-LABEL: merge_8f32_2f32_23z5: ; X32-AVX: # %bb.0: ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX-NEXT: vmovups 16(%eax), %xmm0 -; X32-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X32-AVX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-AVX-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32-AVX-NEXT: retl %ptr0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 2 @@ -598,14 +582,14 @@ define <4 x double> @merge_4f64_f64_34uz_volatile(double* %ptr) nounwind uwtable ; AVX-LABEL: merge_4f64_f64_34uz_volatile: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-NEXT: retq ; ; X32-AVX-LABEL: merge_4f64_f64_34uz_volatile: ; X32-AVX: # %bb.0: ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; X32-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; X32-AVX-NEXT: retl %ptr0 = getelementptr inbounds double, double* %ptr, i64 3 %ptr1 = getelementptr inbounds double, double* %ptr, i64 4 diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll index 38eee15178e..eb7eff6955a 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-512.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll @@ -627,7 +627,7 @@ define <8 x double> @merge_8f64_f64_23uuuuu9_volatile(double* %ptr) nounwind uwt ; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile: ; ALL: # %bb.0: ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; ALL-NEXT: vbroadcastsd 72(%rdi), %ymm1 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: retq @@ -636,7 +636,7 @@ define <8 x double> @merge_8f64_f64_23uuuuu9_volatile(double* %ptr) nounwind uwt ; X32-AVX512F: # %bb.0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-AVX512F-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; X32-AVX512F-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; X32-AVX512F-NEXT: vbroadcastsd 72(%eax), %ymm1 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: retl diff --git a/test/CodeGen/X86/nontemporal-2.ll b/test/CodeGen/X86/nontemporal-2.ll index dc4780991a5..4e8b0ffbc4a 100644 --- a/test/CodeGen/X86/nontemporal-2.ll +++ b/test/CodeGen/X86/nontemporal-2.ll @@ -564,7 +564,7 @@ define void @test_extract_i32(<4 x i32> %arg, i32* %dst) { define void @test_extract_f64(<2 x double> %arg, double* %dst) { ; SSE2-LABEL: test_extract_f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movhpd %xmm0, (%rdi) +; SSE2-NEXT: movhps %xmm0, (%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_extract_f64: @@ -575,17 +575,17 @@ define void @test_extract_f64(<2 x double> %arg, double* %dst) { ; ; SSE41-LABEL: test_extract_f64: ; SSE41: # %bb.0: -; SSE41-NEXT: movhpd %xmm0, (%rdi) +; SSE41-NEXT: movhps %xmm0, (%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_extract_f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovhpd %xmm0, (%rdi) +; AVX-NEXT: vmovhps %xmm0, (%rdi) ; AVX-NEXT: retq ; ; VLX-LABEL: test_extract_f64: ; VLX: # %bb.0: -; VLX-NEXT: vmovhpd %xmm0, (%rdi) +; VLX-NEXT: vmovhps %xmm0, (%rdi) ; VLX-NEXT: retq %1 = extractelement <2 x double> %arg, i32 1 store double %1, double* %dst, align 1, !nontemporal !1 diff --git a/test/CodeGen/X86/oddshuffles.ll b/test/CodeGen/X86/oddshuffles.ll index da54945782a..2c36385e714 100644 --- a/test/CodeGen/X86/oddshuffles.ll +++ b/test/CodeGen/X86/oddshuffles.ll @@ -42,23 +42,23 @@ define void @v3i64(<2 x i64> %a, <2 x i64> %b, <3 x i64>* %p) nounwind { define void @v3f64(<2 x double> %a, <2 x double> %b, <3 x double>* %p) nounwind { ; SSE-LABEL: v3f64: ; SSE: # %bb.0: -; SSE-NEXT: movhpd %xmm0, 16(%rdi) -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE-NEXT: movapd %xmm0, (%rdi) +; SSE-NEXT: movhps %xmm0, 16(%rdi) +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movaps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: v3f64: ; AVX: # %bb.0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] -; AVX-NEXT: vmovhpd %xmm0, 16(%rdi) -; AVX-NEXT: vmovapd %xmm1, (%rdi) +; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovhps %xmm0, 16(%rdi) +; AVX-NEXT: vmovaps %xmm1, (%rdi) ; AVX-NEXT: retq ; ; XOP-LABEL: v3f64: ; XOP: # %bb.0: -; XOP-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] -; XOP-NEXT: vmovhpd %xmm0, 16(%rdi) -; XOP-NEXT: vmovapd %xmm1, (%rdi) +; XOP-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0] +; XOP-NEXT: vmovhps %xmm0, 16(%rdi) +; XOP-NEXT: vmovaps %xmm1, (%rdi) ; XOP-NEXT: retq %r = shufflevector <2 x double> %a, <2 x double> %b, <3 x i32> store <3 x double> %r, <3 x double>* %p diff --git a/test/CodeGen/X86/pr11334.ll b/test/CodeGen/X86/pr11334.ll index ea40dd3579f..666f72ba06c 100644 --- a/test/CodeGen/X86/pr11334.ll +++ b/test/CodeGen/X86/pr11334.ll @@ -85,15 +85,15 @@ entry: define void @test_vector_creation() nounwind { ; SSE-LABEL: test_vector_creation: ; SSE: # %bb.0: -; SSE-NEXT: xorpd %xmm0, %xmm0 -; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; SSE-NEXT: movapd %xmm0, (%rax) +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; SSE-NEXT: movaps %xmm0, (%rax) ; SSE-NEXT: retq ; ; AVX-LABEL: test_vector_creation: ; AVX: # %bb.0: -; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX-NEXT: vmovaps %ymm0, (%rax) ; AVX-NEXT: vzeroupper diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 89403b8edec..355d19b0e67 100644 --- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -1332,15 +1332,15 @@ define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { ; X86-AVX1-LABEL: test_mm_loadh_pi: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX1-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00] -; X86-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; X86-AVX1-NEXT: vmovhps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x00] +; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_loadh_pi: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00] -; X86-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; X86-AVX512-NEXT: vmovhps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x00] +; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_loadh_pi: @@ -1361,14 +1361,14 @@ define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { ; ; X64-AVX1-LABEL: test_mm_loadh_pi: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07] -; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x07] +; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X64-AVX1-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512-LABEL: test_mm_loadh_pi: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07] -; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07] +; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X64-AVX512-NEXT: retq # encoding: [0xc3] %ptr = bitcast x86_mmx* %a1 to <2 x float>* %ld = load <2 x float>, <2 x float>* %ptr @@ -1395,15 +1395,15 @@ define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { ; X86-AVX1-LABEL: test_mm_loadl_pi: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX1-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00] -; X86-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] +; X86-AVX1-NEXT: vmovlps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x00] +; X86-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_loadl_pi: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00] -; X86-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] +; X86-AVX512-NEXT: vmovlps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x00] +; X86-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_loadl_pi: @@ -1425,14 +1425,14 @@ define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { ; ; X64-AVX1-LABEL: test_mm_loadl_pi: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07] -; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] +; X64-AVX1-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x07] +; X64-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X64-AVX1-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512-LABEL: test_mm_loadl_pi: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07] -; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] +; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07] +; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X64-AVX512-NEXT: retq # encoding: [0xc3] %ptr = bitcast x86_mmx* %a1 to <2 x float>* %ld = load <2 x float>, <2 x float>* %ptr @@ -2780,13 +2780,13 @@ define void @test_mm_storeh_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { ; X86-AVX1-LABEL: test_mm_storeh_ps: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX1-NEXT: vmovhpd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x17,0x00] +; X86-AVX1-NEXT: vmovhps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x17,0x00] ; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_storeh_ps: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovhpd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x17,0x00] +; X86-AVX512-NEXT: vmovhps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x00] ; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_storeh_ps: diff --git a/test/CodeGen/X86/sse-only.ll b/test/CodeGen/X86/sse-only.ll index 5cc09c52004..6f766b1a4e4 100644 --- a/test/CodeGen/X86/sse-only.ll +++ b/test/CodeGen/X86/sse-only.ll @@ -8,9 +8,9 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movapd (%ecx), %xmm0 -; CHECK-NEXT: movlpd {{[0-9]+}}(%esp), %xmm0 -; CHECK-NEXT: movapd %xmm0, (%eax) +; CHECK-NEXT: movaps (%ecx), %xmm0 +; CHECK-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; CHECK-NEXT: movaps %xmm0, (%eax) ; CHECK-NEXT: retl %tmp3 = load <2 x double>, <2 x double>* %A, align 16 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index e3dd381f030..eb1190151eb 100644 --- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -2045,40 +2045,40 @@ define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind { ; X86-SSE-LABEL: test_mm_loadh_pd: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movhpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x16,0x00] -; X86-SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; X86-SSE-NEXT: movhps (%eax), %xmm0 # encoding: [0x0f,0x16,0x00] +; X86-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_loadh_pd: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX1-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00] -; X86-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; X86-AVX1-NEXT: vmovhps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x00] +; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_loadh_pd: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00] -; X86-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; X86-AVX512-NEXT: vmovhps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x00] +; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_loadh_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movhpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x16,0x07] -; X64-SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-SSE-NEXT: movhps (%rdi), %xmm0 # encoding: [0x0f,0x16,0x07] +; X64-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_loadh_pd: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07] -; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x07] +; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X64-AVX1-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512-LABEL: test_mm_loadh_pd: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07] -; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07] +; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X64-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a1, align 8 %res = insertelement <2 x double> %a0, double %ld, i32 1 @@ -2135,40 +2135,40 @@ define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind { ; X86-SSE-LABEL: test_mm_loadl_pd: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movlpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x12,0x00] -; X86-SSE-NEXT: # xmm0 = mem[0],xmm0[1] +; X86-SSE-NEXT: movlps (%eax), %xmm0 # encoding: [0x0f,0x12,0x00] +; X86-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_loadl_pd: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX1-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00] -; X86-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] +; X86-AVX1-NEXT: vmovlps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x00] +; X86-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_loadl_pd: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00] -; X86-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] +; X86-AVX512-NEXT: vmovlps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x00] +; X86-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_loadl_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movlpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x12,0x07] -; X64-SSE-NEXT: # xmm0 = mem[0],xmm0[1] +; X64-SSE-NEXT: movlps (%rdi), %xmm0 # encoding: [0x0f,0x12,0x07] +; X64-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_loadl_pd: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07] -; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] +; X64-AVX1-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x07] +; X64-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X64-AVX1-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512-LABEL: test_mm_loadl_pd: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07] -; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] +; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07] +; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X64-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a1, align 8 %res = insertelement <2 x double> %a0, double %ld, i32 0 diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index bb6d52a2895..b62e52a800b 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -13,18 +13,18 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movapd (%ecx), %xmm0 -; X86-SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] -; X86-SSE-NEXT: movapd %xmm0, (%eax) +; X86-SSE-NEXT: movaps (%ecx), %xmm0 +; X86-SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE-NEXT: movaps %xmm0, (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test1: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-AVX-NEXT: vmovapd (%ecx), %xmm0 -; X86-AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] -; X86-AVX-NEXT: vmovapd %xmm0, (%eax) +; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 +; X86-AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-AVX-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test1: @@ -51,18 +51,18 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movapd (%ecx), %xmm0 -; X86-SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X86-SSE-NEXT: movapd %xmm0, (%eax) +; X86-SSE-NEXT: movaps (%ecx), %xmm0 +; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; X86-SSE-NEXT: movaps %xmm0, (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test2: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-AVX-NEXT: vmovapd (%ecx), %xmm0 -; X86-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X86-AVX-NEXT: vmovapd %xmm0, (%eax) +; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 +; X86-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; X86-AVX-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test2: diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll index b9da731837f..b657a42445b 100644 --- a/test/CodeGen/X86/sse3.ll +++ b/test/CodeGen/X86/sse3.ll @@ -213,16 +213,16 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movapd (%ecx), %xmm0 -; X86-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X86-NEXT: movapd %xmm0, (%ecx) +; X86-NEXT: movaps (%ecx), %xmm0 +; X86-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; X86-NEXT: movaps %xmm0, (%ecx) ; X86-NEXT: retl ; ; X64-LABEL: t9: ; X64: # %bb.0: -; X64-NEXT: movapd (%rdi), %xmm0 -; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X64-NEXT: movapd %xmm0, (%rdi) +; X64-NEXT: movaps (%rdi), %xmm0 +; X64-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; X64-NEXT: movaps %xmm0, (%rdi) ; X64-NEXT: retq %tmp = load <4 x float>, <4 x float>* %r %tmp.upgrd.3 = bitcast <2 x i32>* %A to double* diff --git a/test/CodeGen/X86/var-permute-128.ll b/test/CodeGen/X86/var-permute-128.ll index a81ad74f8f5..f1633114466 100644 --- a/test/CodeGen/X86/var-permute-128.ll +++ b/test/CodeGen/X86/var-permute-128.ll @@ -384,7 +384,7 @@ define <2 x double> @var_shuffle_v2f64(<2 x double> %v, <2 x i64> %indices) noun ; SSE3-NEXT: andl $1, %ecx ; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE3-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; SSE3-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: var_shuffle_v2f64: @@ -396,7 +396,7 @@ define <2 x double> @var_shuffle_v2f64(<2 x double> %v, <2 x i64> %indices) noun ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSSE3-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; SSSE3-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: var_shuffle_v2f64: diff --git a/test/CodeGen/X86/vec_insert-2.ll b/test/CodeGen/X86/vec_insert-2.ll index 9fb0dc54f2a..2f06350b58b 100644 --- a/test/CodeGen/X86/vec_insert-2.ll +++ b/test/CodeGen/X86/vec_insert-2.ll @@ -41,7 +41,7 @@ define <4 x i32> @t2(i32 %s, <4 x i32> %tmp) nounwind { define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind { ; X32-LABEL: t3: ; X32: # %bb.0: -; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; X32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; X32-NEXT: retl ; ; X64-LABEL: t3: diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll index 2b42d33000a..e3f33de6103 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -987,17 +987,17 @@ define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) { define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) { ; SSE2-LABEL: insert_mem_lo_v2i64: ; SSE2: # %bb.0: -; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_mem_lo_v2i64: ; SSE3: # %bb.0: -; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_mem_lo_v2i64: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: insert_mem_lo_v2i64: @@ -1118,12 +1118,12 @@ define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { ; SSE-LABEL: insert_mem_lo_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_lo_v2f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; AVX-NEXT: retq %a = load double, double* %ptr %v = insertelement <2 x double> undef, double %a, i32 0 @@ -1150,12 +1150,12 @@ define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { ; SSE-LABEL: insert_mem_hi_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_hi_v2f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-NEXT: retq %a = load double, double* %ptr %v = insertelement <2 x double> undef, double %a, i32 0 @@ -1309,7 +1309,7 @@ define <2 x double> @shuffle_mem_v2f64_31(<2 x double> %a, <2 x double>* %b) { define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) { ; SSE-LABEL: shuffle_mem_v2f64_02: ; SSE: # %bb.0: -; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_mem_v2f64_02: @@ -1324,17 +1324,17 @@ define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) { define <2 x double> @shuffle_mem_v2f64_21(<2 x double> %a, <2 x double>* %pb) { ; SSE2-LABEL: shuffle_mem_v2f64_21: ; SSE2: # %bb.0: -; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_mem_v2f64_21: ; SSE3: # %bb.0: -; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_mem_v2f64_21: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_mem_v2f64_21: diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 42869c1e02d..f1fcf7287fb 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -2186,17 +2186,17 @@ define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) { define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { ; SSE2-LABEL: insert_mem_lo_v4i32: ; SSE2: # %bb.0: -; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_mem_lo_v4i32: ; SSE3: # %bb.0: -; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_mem_lo_v4i32: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: insert_mem_lo_v4i32: @@ -2289,12 +2289,12 @@ define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) { define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) { ; SSE-LABEL: insert_mem_lo_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_lo_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] +; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; AVX-NEXT: retq %a = load <2 x float>, <2 x float>* %ptr %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> @@ -2322,12 +2322,12 @@ define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) { define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) { ; SSE-LABEL: insert_mem_hi_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_hi_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-NEXT: retq %a = load <2 x float>, <2 x float>* %ptr %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll index 795ca11ca26..f4a11f443e4 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -2713,7 +2713,7 @@ define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) { ; ALL-LABEL: concat_v2f32_1: ; ALL: # %bb.0: # %entry ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; ALL-NEXT: retq entry: %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 @@ -2728,7 +2728,7 @@ define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) { ; ALL-LABEL: concat_v2f32_2: ; ALL: # %bb.0: # %entry ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; ALL-NEXT: retq entry: %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 @@ -2741,7 +2741,7 @@ define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) { ; ALL-LABEL: concat_v2f32_3: ; ALL: # %bb.0: # %entry ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; ALL-NEXT: retq entry: %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll index c9302817d47..d65d583838c 100644 --- a/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/test/CodeGen/X86/vector-shuffle-combining.ll @@ -1583,13 +1583,13 @@ define <8 x float> @combine_test22(<2 x float>* %a, <2 x float>* %b) { ; SSE-LABEL: combine_test22: ; SSE: # %bb.0: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test22: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-NEXT: retq ; Current AVX2 lowering of this is still awful, not adding a test case. %1 = load <2 x float>, <2 x float>* %a, align 8 diff --git a/test/CodeGen/X86/vector-shuffle-variable-128.ll b/test/CodeGen/X86/vector-shuffle-variable-128.ll index 18b97b195f7..e4f773aeb36 100644 --- a/test/CodeGen/X86/vector-shuffle-variable-128.ll +++ b/test/CodeGen/X86/vector-shuffle-variable-128.ll @@ -16,7 +16,7 @@ define <2 x double> @var_shuffle_v2f64_v2f64_xx_i64(<2 x double> %x, i64 %i0, i6 ; SSE-NEXT: andl $1, %edi ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE-NEXT: retq ; ; AVX-LABEL: var_shuffle_v2f64_v2f64_xx_i64: @@ -25,7 +25,7 @@ define <2 x double> @var_shuffle_v2f64_v2f64_xx_i64(<2 x double> %x, i64 %i0, i6 ; AVX-NEXT: andl $1, %edi ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-NEXT: retq %x0 = extractelement <2 x double> %x, i64 %i0 %x1 = extractelement <2 x double> %x, i64 %i1 diff --git a/test/CodeGen/X86/vector-shuffle-variable-256.ll b/test/CodeGen/X86/vector-shuffle-variable-256.ll index 6a159ab3aea..1c1e5742a2b 100644 --- a/test/CodeGen/X86/vector-shuffle-variable-256.ll +++ b/test/CodeGen/X86/vector-shuffle-variable-256.ll @@ -19,9 +19,9 @@ define <4 x double> @var_shuffle_v4f64_v4f64_xxxx_i64(<4 x double> %x, i64 %i0, ; ALL-NEXT: andl $3, %edx ; ALL-NEXT: vmovaps %ymm0, (%rsp) ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; ALL-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: movq %rbp, %rsp ; ALL-NEXT: popq %rbp @@ -73,9 +73,9 @@ define <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0, ; ALL-NEXT: andl $1, %edx ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; ALL-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: retq %x0 = extractelement <2 x double> %x, i64 %i0