%x1 = load <7 x i64>, <7 x i64>* %x, align 1
ret <7 x i64> %x1
}
+
+; PR42305 - https://bugs.llvm.org/show_bug.cgi?id=42305
+
+define void @load_split(<8 x float>* %ld, <4 x float>* %st1, <4 x float>* %st2) {
+; X86-SSE-LABEL: load_split:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT: movups (%edx), %xmm0
+; X86-SSE-NEXT: movups 16(%edx), %xmm1
+; X86-SSE-NEXT: movups %xmm0, (%ecx)
+; X86-SSE-NEXT: movups %xmm1, (%eax)
+; X86-SSE-NEXT: retl
+;
+; X86-AVX-LABEL: load_split:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-AVX-NEXT: vmovups (%edx), %xmm0
+; X86-AVX-NEXT: vmovups 16(%edx), %xmm1
+; X86-AVX-NEXT: vmovups %xmm0, (%ecx)
+; X86-AVX-NEXT: vmovups %xmm1, (%eax)
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: load_split:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: movups (%rdi), %xmm0
+; X64-SSE-NEXT: movups 16(%rdi), %xmm1
+; X64-SSE-NEXT: movups %xmm0, (%rsi)
+; X64-SSE-NEXT: movups %xmm1, (%rdx)
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: load_split:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovups (%rdi), %xmm0
+; X64-AVX-NEXT: vmovups 16(%rdi), %xmm1
+; X64-AVX-NEXT: vmovups %xmm0, (%rsi)
+; X64-AVX-NEXT: vmovups %xmm1, (%rdx)
+; X64-AVX-NEXT: retq
+ %t256 = load <8 x float>, <8 x float>* %ld, align 1
+ %b128 = shufflevector <8 x float> %t256, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <4 x float> %b128, <4 x float>* %st1, align 1
+ %t128 = shufflevector <8 x float> %t256, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ store <4 x float> %t128, <4 x float>* %st2, align 1
+ ret void
+}