From: Craig Topper Date: Sat, 28 Jul 2018 00:27:25 +0000 (+0000) Subject: [DAGCombiner] Teach DAG combiner that A-(B-C) can be folded to A+(C-B) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fefdba1e17f2599c69f2521e8edf83dae99994ae;p=llvm [DAGCombiner] Teach DAG combiner that A-(B-C) can be folded to A+(C-B) This can be useful since addition is commutable, and subtraction is not. This matches a transform that is also done by InstCombine. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338181 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7a99687757f..963081c2e95 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2671,6 +2671,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); + // fold (A-(B-C)) -> A+(C-B) + if (N1.getOpcode() == ISD::SUB && N1.hasOneUse()) + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), + N1.getOperand(0))); + // fold (X - (-Y * Z)) -> (X + (Y * Z)) if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { if (N1.getOperand(0).getOpcode() == ISD::SUB && diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll index 030857df777..36aa036c3c0 100644 --- a/test/CodeGen/AArch64/arm64-cse.ll +++ b/test/CodeGen/AArch64/arm64-cse.ll @@ -12,7 +12,7 @@ entry: ; CHECK-NOT: sub ; CHECK: b.ge ; CHECK: sub -; CHECK: sub +; CHECK-NEXT: add ; CHECK-NOT: sub ; CHECK: ret %0 = load i32, i32* %offset, align 4 diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll index 10e56a346a2..49dbb03135f 100644 --- a/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/test/CodeGen/ARM/machine-cse-cmp.ll @@ -65,8 +65,8 @@ if.end: %s2 = sub nsw i32 %s, %size %s3 = sub nsw i32 %sub, %s2 ; CHECK: sub [[R1:r[0-9]+]], [[R2:r[0-9]+]], r2 -; CHECK: sub [[R3:r[0-9]+]], [[R1]], r2 -; CHECK: sub [[R4:r[0-9]+]], [[R1]], [[R3]] +; CHECK: sub [[R3:r[0-9]+]], r2, [[R1]] +; CHECK: add [[R4:r[0-9]+]], [[R1]], [[R3]] ; CHECK-NOT: sub ; CHECK: str store i32 %s3, i32* %offset, align 4 diff --git a/test/CodeGen/Mips/const-mult.ll b/test/CodeGen/Mips/const-mult.ll index dc4f2f9c862..cbb3c91299f 100644 --- a/test/CodeGen/Mips/const-mult.ll +++ b/test/CodeGen/Mips/const-mult.ll @@ -312,20 +312,20 @@ define i32 @mul22224078_32(i32 %a) { ; MIPS32-NEXT: sll $2, $4, 4 ; MIPS32-NEXT: subu $1, $2, $1 ; MIPS32-NEXT: sll $2, $4, 6 -; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: subu $1, $1, $2 ; MIPS32-NEXT: sll $2, $4, 8 -; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: addu $1, $2, $1 ; MIPS32-NEXT: sll $2, $4, 10 -; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: subu $1, $1, $2 ; MIPS32-NEXT: sll $2, $4, 13 -; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: addu $1, $2, $1 ; MIPS32-NEXT: sll $2, $4, 16 -; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: subu $1, $1, $2 ; MIPS32-NEXT: sll $2, $4, 24 ; MIPS32-NEXT: sll $3, $4, 22 ; MIPS32-NEXT: sll $5, $4, 20 ; MIPS32-NEXT: sll $4, $4, 18 -; MIPS32-NEXT: subu $1, $4, $1 +; MIPS32-NEXT: addu $1, $4, $1 ; MIPS32-NEXT: addu $1, $5, $1 ; MIPS32-NEXT: addu $1, $3, $1 ; MIPS32-NEXT: jr $ra @@ -338,20 +338,20 @@ define i32 @mul22224078_32(i32 %a) { ; MIPS64-NEXT: sll $3, $1, 4 ; MIPS64-NEXT: subu $2, $3, $2 ; MIPS64-NEXT: sll $3, $1, 6 -; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: subu $2, $2, $3 ; MIPS64-NEXT: sll $3, $1, 8 -; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: addu $2, $3, $2 ; MIPS64-NEXT: sll $3, $1, 10 -; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: subu $2, $2, $3 ; MIPS64-NEXT: sll $3, $1, 13 -; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: addu $2, $3, $2 ; MIPS64-NEXT: sll $3, $1, 16 -; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: subu $2, $2, $3 ; MIPS64-NEXT: sll $3, $1, 24 ; MIPS64-NEXT: sll $4, $1, 22 ; MIPS64-NEXT: sll $5, $1, 20 ; MIPS64-NEXT: sll $1, $1, 18 -; MIPS64-NEXT: subu $1, $1, $2 +; MIPS64-NEXT: addu $1, $1, $2 ; MIPS64-NEXT: addu $1, $5, $1 ; MIPS64-NEXT: addu $1, $4, $1 ; MIPS64-NEXT: jr $ra @@ -373,20 +373,20 @@ define i64 @mul22224078_64(i64 %a) { ; MIPS32-NEXT: sll $4, $5, 4 ; MIPS32-NEXT: subu $3, $4, $3 ; MIPS32-NEXT: sll $4, $5, 6 -; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: subu $3, $3, $4 ; MIPS32-NEXT: sll $4, $5, 8 -; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: addu $3, $4, $3 ; MIPS32-NEXT: sll $4, $5, 10 -; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: subu $3, $3, $4 ; MIPS32-NEXT: sll $4, $5, 13 -; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: addu $3, $4, $3 ; MIPS32-NEXT: sll $4, $5, 16 -; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: subu $3, $3, $4 ; MIPS32-NEXT: sll $4, $5, 24 ; MIPS32-NEXT: sll $6, $5, 22 ; MIPS32-NEXT: sll $7, $5, 20 ; MIPS32-NEXT: sll $5, $5, 18 -; MIPS32-NEXT: subu $3, $5, $3 +; MIPS32-NEXT: addu $3, $5, $3 ; MIPS32-NEXT: addu $3, $7, $3 ; MIPS32-NEXT: addu $3, $6, $3 ; MIPS32-NEXT: addu $3, $4, $3 @@ -399,20 +399,20 @@ define i64 @mul22224078_64(i64 %a) { ; MIPS64-NEXT: dsll $2, $4, 4 ; MIPS64-NEXT: dsubu $1, $2, $1 ; MIPS64-NEXT: dsll $2, $4, 6 -; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsubu $1, $1, $2 ; MIPS64-NEXT: dsll $2, $4, 8 -; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: daddu $1, $2, $1 ; MIPS64-NEXT: dsll $2, $4, 10 -; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsubu $1, $1, $2 ; MIPS64-NEXT: dsll $2, $4, 13 -; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: daddu $1, $2, $1 ; MIPS64-NEXT: dsll $2, $4, 16 -; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsubu $1, $1, $2 ; MIPS64-NEXT: dsll $2, $4, 24 ; MIPS64-NEXT: dsll $3, $4, 22 ; MIPS64-NEXT: dsll $5, $4, 20 ; MIPS64-NEXT: dsll $4, $4, 18 -; MIPS64-NEXT: dsubu $1, $4, $1 +; MIPS64-NEXT: daddu $1, $4, $1 ; MIPS64-NEXT: daddu $1, $5, $1 ; MIPS64-NEXT: daddu $1, $3, $1 ; MIPS64-NEXT: jr $ra diff --git a/test/CodeGen/X86/rem.ll b/test/CodeGen/X86/rem.ll index 672baa5c1bd..5f2cc199bcf 100644 --- a/test/CodeGen/X86/rem.ll +++ b/test/CodeGen/X86/rem.ll @@ -15,8 +15,8 @@ define i32 @test1(i32 %X) { ; CHECK-NEXT: addl %eax, %edx ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll $8, %eax -; CHECK-NEXT: subl %edx, %eax -; CHECK-NEXT: subl %eax, %ecx +; CHECK-NEXT: subl %eax, %edx +; CHECK-NEXT: addl %edx, %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: retl %tmp1 = srem i32 %X, 255 @@ -48,8 +48,8 @@ define i32 @test3(i32 %X) { ; CHECK-NEXT: shrl $7, %edx ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll $8, %eax -; CHECK-NEXT: subl %edx, %eax -; CHECK-NEXT: subl %eax, %ecx +; CHECK-NEXT: subl %eax, %edx +; CHECK-NEXT: addl %edx, %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: retl %tmp1 = urem i32 %X, 255 diff --git a/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/test/CodeGen/X86/vector-idiv-sdiv-128.ll index 2416a177228..3f251dd8d62 100644 --- a/test/CodeGen/X86/vector-idiv-sdiv-128.ll +++ b/test/CodeGen/X86/vector-idiv-sdiv-128.ll @@ -301,9 +301,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; SSE2-NEXT: sarq %rdx ; SSE2-NEXT: addq %rax, %rdx ; SSE2-NEXT: leaq (,%rdx,8), %rax -; SSE2-NEXT: subq %rdx, %rax -; SSE2-NEXT: subq %rax, %rcx -; SSE2-NEXT: movq %rcx, %xmm1 +; SSE2-NEXT: subq %rax, %rdx +; SSE2-NEXT: addq %rcx, %rdx +; SSE2-NEXT: movq %rdx, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE2-NEXT: movq %xmm0, %rcx ; SSE2-NEXT: movq %rcx, %rax @@ -313,9 +313,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; SSE2-NEXT: sarq %rdx ; SSE2-NEXT: addq %rax, %rdx ; SSE2-NEXT: leaq (,%rdx,8), %rax -; SSE2-NEXT: subq %rdx, %rax -; SSE2-NEXT: subq %rax, %rcx -; SSE2-NEXT: movq %rcx, %xmm0 +; SSE2-NEXT: subq %rax, %rdx +; SSE2-NEXT: addq %rcx, %rdx +; SSE2-NEXT: movq %rdx, %xmm0 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -331,9 +331,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; SSE41-NEXT: sarq %rdx ; SSE41-NEXT: addq %rax, %rdx ; SSE41-NEXT: leaq (,%rdx,8), %rax -; SSE41-NEXT: subq %rdx, %rax -; SSE41-NEXT: subq %rax, %rcx -; SSE41-NEXT: movq %rcx, %xmm1 +; SSE41-NEXT: subq %rax, %rdx +; SSE41-NEXT: addq %rcx, %rdx +; SSE41-NEXT: movq %rdx, %xmm1 ; SSE41-NEXT: movq %xmm0, %rcx ; SSE41-NEXT: movq %rcx, %rax ; SSE41-NEXT: imulq %rsi @@ -342,9 +342,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; SSE41-NEXT: sarq %rdx ; SSE41-NEXT: addq %rax, %rdx ; SSE41-NEXT: leaq (,%rdx,8), %rax -; SSE41-NEXT: subq %rdx, %rax -; SSE41-NEXT: subq %rax, %rcx -; SSE41-NEXT: movq %rcx, %xmm0 +; SSE41-NEXT: subq %rax, %rdx +; SSE41-NEXT: addq %rcx, %rdx +; SSE41-NEXT: movq %rdx, %xmm0 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: retq ; @@ -359,9 +359,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; AVX-NEXT: sarq %rdx ; AVX-NEXT: addq %rax, %rdx ; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm1 +; AVX-NEXT: subq %rax, %rdx +; AVX-NEXT: addq %rcx, %rdx +; AVX-NEXT: vmovq %rdx, %xmm1 ; AVX-NEXT: vmovq %xmm0, %rcx ; AVX-NEXT: movq %rcx, %rax ; AVX-NEXT: imulq %rsi @@ -370,9 +370,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; AVX-NEXT: sarq %rdx ; AVX-NEXT: addq %rax, %rdx ; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 +; AVX-NEXT: subq %rax, %rdx +; AVX-NEXT: addq %rcx, %rdx +; AVX-NEXT: vmovq %rdx, %xmm0 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %res = srem <2 x i64> %a, diff --git a/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/test/CodeGen/X86/vector-idiv-sdiv-256.ll index c112e84fbf7..5df4d09e971 100644 --- a/test/CodeGen/X86/vector-idiv-sdiv-256.ll +++ b/test/CodeGen/X86/vector-idiv-sdiv-256.ll @@ -263,9 +263,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX1-NEXT: sarq %rdx ; AVX1-NEXT: addq %rax, %rdx ; AVX1-NEXT: leaq (,%rdx,8), %rax -; AVX1-NEXT: subq %rdx, %rax -; AVX1-NEXT: subq %rax, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: subq %rax, %rdx +; AVX1-NEXT: addq %rcx, %rdx +; AVX1-NEXT: vmovq %rdx, %xmm2 ; AVX1-NEXT: vmovq %xmm1, %rcx ; AVX1-NEXT: movq %rcx, %rax ; AVX1-NEXT: imulq %rsi @@ -274,9 +274,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX1-NEXT: sarq %rdx ; AVX1-NEXT: addq %rax, %rdx ; AVX1-NEXT: leaq (,%rdx,8), %rax -; AVX1-NEXT: subq %rdx, %rax -; AVX1-NEXT: subq %rax, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: subq %rax, %rdx +; AVX1-NEXT: addq %rcx, %rdx +; AVX1-NEXT: vmovq %rdx, %xmm1 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX1-NEXT: vpextrq $1, %xmm0, %rcx ; AVX1-NEXT: movq %rcx, %rax @@ -286,9 +286,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX1-NEXT: sarq %rdx ; AVX1-NEXT: addq %rax, %rdx ; AVX1-NEXT: leaq (,%rdx,8), %rax -; AVX1-NEXT: subq %rdx, %rax -; AVX1-NEXT: subq %rax, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: subq %rax, %rdx +; AVX1-NEXT: addq %rcx, %rdx +; AVX1-NEXT: vmovq %rdx, %xmm2 ; AVX1-NEXT: vmovq %xmm0, %rcx ; AVX1-NEXT: movq %rcx, %rax ; AVX1-NEXT: imulq %rsi @@ -297,9 +297,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX1-NEXT: sarq %rdx ; AVX1-NEXT: addq %rax, %rdx ; AVX1-NEXT: leaq (,%rdx,8), %rax -; AVX1-NEXT: subq %rdx, %rax -; AVX1-NEXT: subq %rax, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: subq %rax, %rdx +; AVX1-NEXT: addq %rcx, %rdx +; AVX1-NEXT: vmovq %rdx, %xmm0 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -316,9 +316,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: sarq %rdx ; AVX2-NEXT: addq %rax, %rdx ; AVX2-NEXT: leaq (,%rdx,8), %rax -; AVX2-NEXT: subq %rdx, %rax -; AVX2-NEXT: subq %rax, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm2 +; AVX2-NEXT: subq %rax, %rdx +; AVX2-NEXT: addq %rcx, %rdx +; AVX2-NEXT: vmovq %rdx, %xmm2 ; AVX2-NEXT: vmovq %xmm1, %rcx ; AVX2-NEXT: movq %rcx, %rax ; AVX2-NEXT: imulq %rsi @@ -327,9 +327,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: sarq %rdx ; AVX2-NEXT: addq %rax, %rdx ; AVX2-NEXT: leaq (,%rdx,8), %rax -; AVX2-NEXT: subq %rdx, %rax -; AVX2-NEXT: subq %rax, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm1 +; AVX2-NEXT: subq %rax, %rdx +; AVX2-NEXT: addq %rcx, %rdx +; AVX2-NEXT: vmovq %rdx, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX2-NEXT: vpextrq $1, %xmm0, %rcx ; AVX2-NEXT: movq %rcx, %rax @@ -339,9 +339,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: sarq %rdx ; AVX2-NEXT: addq %rax, %rdx ; AVX2-NEXT: leaq (,%rdx,8), %rax -; AVX2-NEXT: subq %rdx, %rax -; AVX2-NEXT: subq %rax, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm2 +; AVX2-NEXT: subq %rax, %rdx +; AVX2-NEXT: addq %rcx, %rdx +; AVX2-NEXT: vmovq %rdx, %xmm2 ; AVX2-NEXT: vmovq %xmm0, %rcx ; AVX2-NEXT: movq %rcx, %rax ; AVX2-NEXT: imulq %rsi @@ -350,9 +350,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: sarq %rdx ; AVX2-NEXT: addq %rax, %rdx ; AVX2-NEXT: leaq (,%rdx,8), %rax -; AVX2-NEXT: subq %rdx, %rax -; AVX2-NEXT: subq %rax, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm0 +; AVX2-NEXT: subq %rax, %rdx +; AVX2-NEXT: addq %rcx, %rdx +; AVX2-NEXT: vmovq %rdx, %xmm0 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/test/CodeGen/X86/vector-idiv-sdiv-512.ll index 310e1fc7057..893c7d1bbd7 100644 --- a/test/CodeGen/X86/vector-idiv-sdiv-512.ll +++ b/test/CodeGen/X86/vector-idiv-sdiv-512.ll @@ -214,9 +214,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: sarq %rdx ; AVX-NEXT: addq %rax, %rdx ; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 +; AVX-NEXT: subq %rax, %rdx +; AVX-NEXT: addq %rcx, %rdx +; AVX-NEXT: vmovq %rdx, %xmm2 ; AVX-NEXT: vmovq %xmm1, %rcx ; AVX-NEXT: movq %rcx, %rax ; AVX-NEXT: imulq %rsi @@ -225,9 +225,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: sarq %rdx ; AVX-NEXT: addq %rax, %rdx ; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm1 +; AVX-NEXT: subq %rax, %rdx +; AVX-NEXT: addq %rcx, %rdx +; AVX-NEXT: vmovq %rdx, %xmm1 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; AVX-NEXT: vpextrq $1, %xmm2, %rcx @@ -238,9 +238,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: sarq %rdx ; AVX-NEXT: addq %rax, %rdx ; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm3 +; AVX-NEXT: subq %rax, %rdx +; AVX-NEXT: addq %rcx, %rdx +; AVX-NEXT: vmovq %rdx, %xmm3 ; AVX-NEXT: vmovq %xmm2, %rcx ; AVX-NEXT: movq %rcx, %rax ; AVX-NEXT: imulq %rsi @@ -249,9 +249,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: sarq %rdx ; AVX-NEXT: addq %rax, %rdx ; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 +; AVX-NEXT: subq %rax, %rdx +; AVX-NEXT: addq %rcx, %rdx +; AVX-NEXT: vmovq %rdx, %xmm2 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX-NEXT: vextracti128 $1, %ymm0, %xmm2 @@ -263,9 +263,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: sarq %rdx ; AVX-NEXT: addq %rax, %rdx ; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm3 +; AVX-NEXT: subq %rax, %rdx +; AVX-NEXT: addq %rcx, %rdx +; AVX-NEXT: vmovq %rdx, %xmm3 ; AVX-NEXT: vmovq %xmm2, %rcx ; AVX-NEXT: movq %rcx, %rax ; AVX-NEXT: imulq %rsi @@ -274,9 +274,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: sarq %rdx ; AVX-NEXT: addq %rax, %rdx ; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 +; AVX-NEXT: subq %rax, %rdx +; AVX-NEXT: addq %rcx, %rdx +; AVX-NEXT: vmovq %rdx, %xmm2 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX-NEXT: vpextrq $1, %xmm0, %rcx ; AVX-NEXT: movq %rcx, %rax @@ -286,9 +286,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: sarq %rdx ; AVX-NEXT: addq %rax, %rdx ; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm3 +; AVX-NEXT: subq %rax, %rdx +; AVX-NEXT: addq %rcx, %rdx +; AVX-NEXT: vmovq %rdx, %xmm3 ; AVX-NEXT: vmovq %xmm0, %rcx ; AVX-NEXT: movq %rcx, %rax ; AVX-NEXT: imulq %rsi @@ -297,9 +297,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: sarq %rdx ; AVX-NEXT: addq %rax, %rdx ; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 +; AVX-NEXT: subq %rax, %rdx +; AVX-NEXT: addq %rcx, %rdx +; AVX-NEXT: vmovq %rdx, %xmm0 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 diff --git a/test/CodeGen/X86/vector-idiv-udiv-128.ll b/test/CodeGen/X86/vector-idiv-udiv-128.ll index c991a905c05..598782ddd63 100644 --- a/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -278,9 +278,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; SSE2-NEXT: addq %rdx, %rax ; SSE2-NEXT: shrq $2, %rax ; SSE2-NEXT: leaq (,%rax,8), %rdx -; SSE2-NEXT: subq %rax, %rdx -; SSE2-NEXT: subq %rdx, %rcx -; SSE2-NEXT: movq %rcx, %xmm1 +; SSE2-NEXT: subq %rdx, %rax +; SSE2-NEXT: addq %rcx, %rax +; SSE2-NEXT: movq %rax, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE2-NEXT: movq %xmm0, %rcx ; SSE2-NEXT: movq %rcx, %rax @@ -291,9 +291,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; SSE2-NEXT: addq %rdx, %rax ; SSE2-NEXT: shrq $2, %rax ; SSE2-NEXT: leaq (,%rax,8), %rdx -; SSE2-NEXT: subq %rax, %rdx -; SSE2-NEXT: subq %rdx, %rcx -; SSE2-NEXT: movq %rcx, %xmm0 +; SSE2-NEXT: subq %rdx, %rax +; SSE2-NEXT: addq %rcx, %rax +; SSE2-NEXT: movq %rax, %xmm0 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -310,9 +310,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; SSE41-NEXT: addq %rdx, %rax ; SSE41-NEXT: shrq $2, %rax ; SSE41-NEXT: leaq (,%rax,8), %rdx -; SSE41-NEXT: subq %rax, %rdx -; SSE41-NEXT: subq %rdx, %rcx -; SSE41-NEXT: movq %rcx, %xmm1 +; SSE41-NEXT: subq %rdx, %rax +; SSE41-NEXT: addq %rcx, %rax +; SSE41-NEXT: movq %rax, %xmm1 ; SSE41-NEXT: movq %xmm0, %rcx ; SSE41-NEXT: movq %rcx, %rax ; SSE41-NEXT: mulq %rsi @@ -322,9 +322,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; SSE41-NEXT: addq %rdx, %rax ; SSE41-NEXT: shrq $2, %rax ; SSE41-NEXT: leaq (,%rax,8), %rdx -; SSE41-NEXT: subq %rax, %rdx -; SSE41-NEXT: subq %rdx, %rcx -; SSE41-NEXT: movq %rcx, %xmm0 +; SSE41-NEXT: subq %rdx, %rax +; SSE41-NEXT: addq %rcx, %rax +; SSE41-NEXT: movq %rax, %xmm0 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: retq ; @@ -340,9 +340,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; AVX-NEXT: addq %rdx, %rax ; AVX-NEXT: shrq $2, %rax ; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm1 +; AVX-NEXT: subq %rdx, %rax +; AVX-NEXT: addq %rcx, %rax +; AVX-NEXT: vmovq %rax, %xmm1 ; AVX-NEXT: vmovq %xmm0, %rcx ; AVX-NEXT: movq %rcx, %rax ; AVX-NEXT: mulq %rsi @@ -352,9 +352,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { ; AVX-NEXT: addq %rdx, %rax ; AVX-NEXT: shrq $2, %rax ; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 +; AVX-NEXT: subq %rdx, %rax +; AVX-NEXT: addq %rcx, %rax +; AVX-NEXT: vmovq %rax, %xmm0 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %res = urem <2 x i64> %a, diff --git a/test/CodeGen/X86/vector-idiv-udiv-256.ll b/test/CodeGen/X86/vector-idiv-udiv-256.ll index 81d93984e26..377ff5ea77a 100644 --- a/test/CodeGen/X86/vector-idiv-udiv-256.ll +++ b/test/CodeGen/X86/vector-idiv-udiv-256.ll @@ -264,9 +264,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX1-NEXT: addq %rdx, %rax ; AVX1-NEXT: shrq $2, %rax ; AVX1-NEXT: leaq (,%rax,8), %rdx -; AVX1-NEXT: subq %rax, %rdx -; AVX1-NEXT: subq %rdx, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: addq %rcx, %rax +; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vmovq %xmm1, %rcx ; AVX1-NEXT: movq %rcx, %rax ; AVX1-NEXT: mulq %rsi @@ -276,9 +276,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX1-NEXT: addq %rdx, %rax ; AVX1-NEXT: shrq $2, %rax ; AVX1-NEXT: leaq (,%rax,8), %rdx -; AVX1-NEXT: subq %rax, %rdx -; AVX1-NEXT: subq %rdx, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: addq %rcx, %rax +; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX1-NEXT: vpextrq $1, %xmm0, %rcx ; AVX1-NEXT: movq %rcx, %rax @@ -289,9 +289,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX1-NEXT: addq %rdx, %rax ; AVX1-NEXT: shrq $2, %rax ; AVX1-NEXT: leaq (,%rax,8), %rdx -; AVX1-NEXT: subq %rax, %rdx -; AVX1-NEXT: subq %rdx, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: addq %rcx, %rax +; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vmovq %xmm0, %rcx ; AVX1-NEXT: movq %rcx, %rax ; AVX1-NEXT: mulq %rsi @@ -301,9 +301,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX1-NEXT: addq %rdx, %rax ; AVX1-NEXT: shrq $2, %rax ; AVX1-NEXT: leaq (,%rax,8), %rdx -; AVX1-NEXT: subq %rax, %rdx -; AVX1-NEXT: subq %rdx, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: addq %rcx, %rax +; AVX1-NEXT: vmovq %rax, %xmm0 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -321,9 +321,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: addq %rdx, %rax ; AVX2-NEXT: shrq $2, %rax ; AVX2-NEXT: leaq (,%rax,8), %rdx -; AVX2-NEXT: subq %rax, %rdx -; AVX2-NEXT: subq %rdx, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm2 +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: addq %rcx, %rax +; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vmovq %xmm1, %rcx ; AVX2-NEXT: movq %rcx, %rax ; AVX2-NEXT: mulq %rsi @@ -333,9 +333,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: addq %rdx, %rax ; AVX2-NEXT: shrq $2, %rax ; AVX2-NEXT: leaq (,%rax,8), %rdx -; AVX2-NEXT: subq %rax, %rdx -; AVX2-NEXT: subq %rdx, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm1 +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: addq %rcx, %rax +; AVX2-NEXT: vmovq %rax, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX2-NEXT: vpextrq $1, %xmm0, %rcx ; AVX2-NEXT: movq %rcx, %rax @@ -346,9 +346,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: addq %rdx, %rax ; AVX2-NEXT: shrq $2, %rax ; AVX2-NEXT: leaq (,%rax,8), %rdx -; AVX2-NEXT: subq %rax, %rdx -; AVX2-NEXT: subq %rdx, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm2 +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: addq %rcx, %rax +; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vmovq %xmm0, %rcx ; AVX2-NEXT: movq %rcx, %rax ; AVX2-NEXT: mulq %rsi @@ -358,9 +358,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: addq %rdx, %rax ; AVX2-NEXT: shrq $2, %rax ; AVX2-NEXT: leaq (,%rax,8), %rdx -; AVX2-NEXT: subq %rax, %rdx -; AVX2-NEXT: subq %rdx, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm0 +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: addq %rcx, %rax +; AVX2-NEXT: vmovq %rax, %xmm0 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vector-idiv-udiv-512.ll b/test/CodeGen/X86/vector-idiv-udiv-512.ll index 1288f5a5d5b..22c359cb7e9 100644 --- a/test/CodeGen/X86/vector-idiv-udiv-512.ll +++ b/test/CodeGen/X86/vector-idiv-udiv-512.ll @@ -218,9 +218,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: addq %rdx, %rax ; AVX-NEXT: shrq $2, %rax ; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 +; AVX-NEXT: subq %rdx, %rax +; AVX-NEXT: addq %rcx, %rax +; AVX-NEXT: vmovq %rax, %xmm2 ; AVX-NEXT: vmovq %xmm1, %rcx ; AVX-NEXT: movq %rcx, %rax ; AVX-NEXT: mulq %rsi @@ -230,9 +230,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: addq %rdx, %rax ; AVX-NEXT: shrq $2, %rax ; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm1 +; AVX-NEXT: subq %rdx, %rax +; AVX-NEXT: addq %rcx, %rax +; AVX-NEXT: vmovq %rax, %xmm1 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; AVX-NEXT: vpextrq $1, %xmm2, %rcx @@ -244,9 +244,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: addq %rdx, %rax ; AVX-NEXT: shrq $2, %rax ; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm3 +; AVX-NEXT: subq %rdx, %rax +; AVX-NEXT: addq %rcx, %rax +; AVX-NEXT: vmovq %rax, %xmm3 ; AVX-NEXT: vmovq %xmm2, %rcx ; AVX-NEXT: movq %rcx, %rax ; AVX-NEXT: mulq %rsi @@ -256,9 +256,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: addq %rdx, %rax ; AVX-NEXT: shrq $2, %rax ; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 +; AVX-NEXT: subq %rdx, %rax +; AVX-NEXT: addq %rcx, %rax +; AVX-NEXT: vmovq %rax, %xmm2 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX-NEXT: vextracti128 $1, %ymm0, %xmm2 @@ -271,9 +271,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: addq %rdx, %rax ; AVX-NEXT: shrq $2, %rax ; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm3 +; AVX-NEXT: subq %rdx, %rax +; AVX-NEXT: addq %rcx, %rax +; AVX-NEXT: vmovq %rax, %xmm3 ; AVX-NEXT: vmovq %xmm2, %rcx ; AVX-NEXT: movq %rcx, %rax ; AVX-NEXT: mulq %rsi @@ -283,9 +283,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: addq %rdx, %rax ; AVX-NEXT: shrq $2, %rax ; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 +; AVX-NEXT: subq %rdx, %rax +; AVX-NEXT: addq %rcx, %rax +; AVX-NEXT: vmovq %rax, %xmm2 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX-NEXT: vpextrq $1, %xmm0, %rcx ; AVX-NEXT: movq %rcx, %rax @@ -296,9 +296,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: addq %rdx, %rax ; AVX-NEXT: shrq $2, %rax ; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm3 +; AVX-NEXT: subq %rdx, %rax +; AVX-NEXT: addq %rcx, %rax +; AVX-NEXT: vmovq %rax, %xmm3 ; AVX-NEXT: vmovq %xmm0, %rcx ; AVX-NEXT: movq %rcx, %rax ; AVX-NEXT: mulq %rsi @@ -308,9 +308,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: addq %rdx, %rax ; AVX-NEXT: shrq $2, %rax ; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 +; AVX-NEXT: subq %rdx, %rax +; AVX-NEXT: addq %rcx, %rax +; AVX-NEXT: vmovq %rax, %xmm0 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0