ret <4 x i32> %res3
}
+define <4 x i32> @merge_4i32_i32_23u5_inc2(i32* %ptr) nounwind uwtable noinline ssp {
+; SSE-LABEL: merge_4i32_i32_23u5_inc2:
+; SSE: # BB#0:
+; SSE-NEXT: movups 8(%rdi), %xmm0
+; SSE-NEXT: incl 8(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: merge_4i32_i32_23u5_inc2:
+; AVX: # BB#0:
+; AVX-NEXT: vmovups 8(%rdi), %xmm0
+; AVX-NEXT: incl 8(%rdi)
+; AVX-NEXT: retq
+;
+; X32-SSE1-LABEL: merge_4i32_i32_23u5_inc2:
+; X32-SSE1: # BB#0:
+; X32-SSE1-NEXT: pushl %edi
+; X32-SSE1-NEXT: .Lcfi6:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
+; X32-SSE1-NEXT: pushl %esi
+; X32-SSE1-NEXT: .Lcfi7:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
+; X32-SSE1-NEXT: .Lcfi8:
+; X32-SSE1-NEXT: .cfi_offset %esi, -12
+; X32-SSE1-NEXT: .Lcfi9:
+; X32-SSE1-NEXT: .cfi_offset %edi, -8
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE1-NEXT: movl 8(%ecx), %edx
+; X32-SSE1-NEXT: movl 12(%ecx), %esi
+; X32-SSE1-NEXT: leal 1(%edx), %edi
+; X32-SSE1-NEXT: movl %edi, 8(%ecx)
+; X32-SSE1-NEXT: movl 20(%ecx), %ecx
+; X32-SSE1-NEXT: movl %esi, 4(%eax)
+; X32-SSE1-NEXT: movl %edx, (%eax)
+; X32-SSE1-NEXT: movl %ecx, 12(%eax)
+; X32-SSE1-NEXT: popl %esi
+; X32-SSE1-NEXT: popl %edi
+; X32-SSE1-NEXT: retl $4
+;
+; X32-SSE41-LABEL: merge_4i32_i32_23u5_inc2:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movups 8(%eax), %xmm0
+; X32-SSE41-NEXT: incl 8(%eax)
+; X32-SSE41-NEXT: retl
+ %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2
+ %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3
+ %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5
+ %val0 = load i32, i32* %ptr0
+ %inc = add i32 %val0, 1
+ store i32 %inc, i32* %ptr0
+ %val1 = load i32, i32* %ptr1
+ %val3 = load i32, i32* %ptr3
+ %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
+ %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
+ %res3 = insertelement <4 x i32> %res1, i32 %val3, i32 3
+ ret <4 x i32> %res3
+}
+
+define <4 x i32> @merge_4i32_i32_23u5_inc3(i32* %ptr) nounwind uwtable noinline ssp {
+; SSE-LABEL: merge_4i32_i32_23u5_inc3:
+; SSE: # BB#0:
+; SSE-NEXT: movups 8(%rdi), %xmm0
+; SSE-NEXT: incl 12(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: merge_4i32_i32_23u5_inc3:
+; AVX: # BB#0:
+; AVX-NEXT: vmovups 8(%rdi), %xmm0
+; AVX-NEXT: incl 12(%rdi)
+; AVX-NEXT: retq
+;
+; X32-SSE1-LABEL: merge_4i32_i32_23u5_inc3:
+; X32-SSE1: # BB#0:
+; X32-SSE1-NEXT: pushl %edi
+; X32-SSE1-NEXT: .Lcfi10:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
+; X32-SSE1-NEXT: pushl %esi
+; X32-SSE1-NEXT: .Lcfi11:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
+; X32-SSE1-NEXT: .Lcfi12:
+; X32-SSE1-NEXT: .cfi_offset %esi, -12
+; X32-SSE1-NEXT: .Lcfi13:
+; X32-SSE1-NEXT: .cfi_offset %edi, -8
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE1-NEXT: movl 8(%ecx), %edx
+; X32-SSE1-NEXT: movl 12(%ecx), %esi
+; X32-SSE1-NEXT: leal 1(%esi), %edi
+; X32-SSE1-NEXT: movl %edi, 12(%ecx)
+; X32-SSE1-NEXT: movl 20(%ecx), %ecx
+; X32-SSE1-NEXT: movl %esi, 4(%eax)
+; X32-SSE1-NEXT: movl %edx, (%eax)
+; X32-SSE1-NEXT: movl %ecx, 12(%eax)
+; X32-SSE1-NEXT: popl %esi
+; X32-SSE1-NEXT: popl %edi
+; X32-SSE1-NEXT: retl $4
+;
+; X32-SSE41-LABEL: merge_4i32_i32_23u5_inc3:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movups 8(%eax), %xmm0
+; X32-SSE41-NEXT: incl 12(%eax)
+; X32-SSE41-NEXT: retl
+ %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2
+ %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3
+ %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5
+ %val0 = load i32, i32* %ptr0
+ %val1 = load i32, i32* %ptr1
+ %inc = add i32 %val1, 1
+ store i32 %inc, i32* %ptr1
+ %val3 = load i32, i32* %ptr3
+ %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
+ %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
+ %res3 = insertelement <4 x i32> %res1, i32 %val3, i32 3
+ ret <4 x i32> %res3
+}
+
define <4 x i32> @merge_4i32_i32_3zuu(i32* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_4i32_i32_3zuu:
; SSE: # BB#0:
ret <4 x i32> %res1
}
+define <4 x i32> @merge_4i32_i32_45zz_inc4(i32* %ptr) nounwind uwtable noinline ssp {
+; SSE-LABEL: merge_4i32_i32_45zz_inc4:
+; SSE: # BB#0:
+; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: incl 16(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: merge_4i32_i32_45zz_inc4:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: incl 16(%rdi)
+; AVX-NEXT: retq
+;
+; X32-SSE1-LABEL: merge_4i32_i32_45zz_inc4:
+; X32-SSE1: # BB#0:
+; X32-SSE1-NEXT: pushl %edi
+; X32-SSE1-NEXT: .Lcfi14:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
+; X32-SSE1-NEXT: pushl %esi
+; X32-SSE1-NEXT: .Lcfi15:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
+; X32-SSE1-NEXT: .Lcfi16:
+; X32-SSE1-NEXT: .cfi_offset %esi, -12
+; X32-SSE1-NEXT: .Lcfi17:
+; X32-SSE1-NEXT: .cfi_offset %edi, -8
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE1-NEXT: movl 16(%ecx), %edx
+; X32-SSE1-NEXT: movl 20(%ecx), %esi
+; X32-SSE1-NEXT: leal 1(%edx), %edi
+; X32-SSE1-NEXT: movl %edi, 16(%ecx)
+; X32-SSE1-NEXT: movl %esi, 4(%eax)
+; X32-SSE1-NEXT: movl %edx, (%eax)
+; X32-SSE1-NEXT: movl $0, 12(%eax)
+; X32-SSE1-NEXT: movl $0, 8(%eax)
+; X32-SSE1-NEXT: popl %esi
+; X32-SSE1-NEXT: popl %edi
+; X32-SSE1-NEXT: retl $4
+;
+; X32-SSE41-LABEL: merge_4i32_i32_45zz_inc4:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE41-NEXT: incl 16(%eax)
+; X32-SSE41-NEXT: retl
+ %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 4
+ %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 5
+ %val0 = load i32, i32* %ptr0
+ %inc = add i32 %val0, 1
+ store i32 %inc, i32* %ptr0
+ %val1 = load i32, i32* %ptr1
+ %res0 = insertelement <4 x i32> zeroinitializer, i32 %val0, i32 0
+ %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
+ ret <4 x i32> %res1
+}
+
+define <4 x i32> @merge_4i32_i32_45zz_inc5(i32* %ptr) nounwind uwtable noinline ssp {
+; SSE-LABEL: merge_4i32_i32_45zz_inc5:
+; SSE: # BB#0:
+; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: incl 20(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: merge_4i32_i32_45zz_inc5:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: incl 20(%rdi)
+; AVX-NEXT: retq
+;
+; X32-SSE1-LABEL: merge_4i32_i32_45zz_inc5:
+; X32-SSE1: # BB#0:
+; X32-SSE1-NEXT: pushl %edi
+; X32-SSE1-NEXT: .Lcfi18:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
+; X32-SSE1-NEXT: pushl %esi
+; X32-SSE1-NEXT: .Lcfi19:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
+; X32-SSE1-NEXT: .Lcfi20:
+; X32-SSE1-NEXT: .cfi_offset %esi, -12
+; X32-SSE1-NEXT: .Lcfi21:
+; X32-SSE1-NEXT: .cfi_offset %edi, -8
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE1-NEXT: movl 16(%ecx), %edx
+; X32-SSE1-NEXT: movl 20(%ecx), %esi
+; X32-SSE1-NEXT: leal 1(%esi), %edi
+; X32-SSE1-NEXT: movl %edi, 20(%ecx)
+; X32-SSE1-NEXT: movl %esi, 4(%eax)
+; X32-SSE1-NEXT: movl %edx, (%eax)
+; X32-SSE1-NEXT: movl $0, 12(%eax)
+; X32-SSE1-NEXT: movl $0, 8(%eax)
+; X32-SSE1-NEXT: popl %esi
+; X32-SSE1-NEXT: popl %edi
+; X32-SSE1-NEXT: retl $4
+;
+; X32-SSE41-LABEL: merge_4i32_i32_45zz_inc5:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE41-NEXT: incl 20(%eax)
+; X32-SSE41-NEXT: retl
+ %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 4
+ %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 5
+ %val0 = load i32, i32* %ptr0
+ %val1 = load i32, i32* %ptr1
+ %inc = add i32 %val1, 1
+ store i32 %inc, i32* %ptr1
+ %res0 = insertelement <4 x i32> zeroinitializer, i32 %val0, i32 0
+ %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
+ ret <4 x i32> %res1
+}
+
define <8 x i16> @merge_8i16_i16_23u567u9(i16* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_8i16_i16_23u567u9:
; SSE: # BB#0:
; X32-SSE1-LABEL: merge_8i16_i16_23u567u9:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %edi
-; X32-SSE1-NEXT: .Lcfi6:
+; X32-SSE1-NEXT: .Lcfi22:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: pushl %esi
-; X32-SSE1-NEXT: .Lcfi7:
+; X32-SSE1-NEXT: .Lcfi23:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
-; X32-SSE1-NEXT: .Lcfi8:
+; X32-SSE1-NEXT: .Lcfi24:
; X32-SSE1-NEXT: .cfi_offset %esi, -12
-; X32-SSE1-NEXT: .Lcfi9:
+; X32-SSE1-NEXT: .Lcfi25:
; X32-SSE1-NEXT: .cfi_offset %edi, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-SSE1-LABEL: merge_16i8_i8_01u3456789ABCDuF:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %ebp
-; X32-SSE1-NEXT: .Lcfi10:
+; X32-SSE1-NEXT: .Lcfi26:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: pushl %ebx
-; X32-SSE1-NEXT: .Lcfi11:
+; X32-SSE1-NEXT: .Lcfi27:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
; X32-SSE1-NEXT: pushl %edi
-; X32-SSE1-NEXT: .Lcfi12:
+; X32-SSE1-NEXT: .Lcfi28:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 16
; X32-SSE1-NEXT: pushl %esi
-; X32-SSE1-NEXT: .Lcfi13:
+; X32-SSE1-NEXT: .Lcfi29:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 20
-; X32-SSE1-NEXT: .Lcfi14:
+; X32-SSE1-NEXT: .Lcfi30:
; X32-SSE1-NEXT: .cfi_offset %esi, -20
-; X32-SSE1-NEXT: .Lcfi15:
+; X32-SSE1-NEXT: .Lcfi31:
; X32-SSE1-NEXT: .cfi_offset %edi, -16
-; X32-SSE1-NEXT: .Lcfi16:
+; X32-SSE1-NEXT: .Lcfi32:
; X32-SSE1-NEXT: .cfi_offset %ebx, -12
-; X32-SSE1-NEXT: .Lcfi17:
+; X32-SSE1-NEXT: .Lcfi33:
; X32-SSE1-NEXT: .cfi_offset %ebp, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-SSE1-LABEL: merge_2i64_i64_12_volatile:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %edi
-; X32-SSE1-NEXT: .Lcfi18:
+; X32-SSE1-NEXT: .Lcfi34:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: pushl %esi
-; X32-SSE1-NEXT: .Lcfi19:
+; X32-SSE1-NEXT: .Lcfi35:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
-; X32-SSE1-NEXT: .Lcfi20:
+; X32-SSE1-NEXT: .Lcfi36:
; X32-SSE1-NEXT: .cfi_offset %esi, -12
-; X32-SSE1-NEXT: .Lcfi21:
+; X32-SSE1-NEXT: .Lcfi37:
; X32-SSE1-NEXT: .cfi_offset %edi, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx