From: Simon Pilgrim Date: Sat, 22 Jun 2019 17:57:01 +0000 (+0000) Subject: [X86][SSE] Fold extract_subvector(vselect(x,y,z),0) -> vselect(extract_subvector... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a600636743291714a3afeeda8ae7f479b50913e7;p=llvm [X86][SSE] Fold extract_subvector(vselect(x,y,z),0) -> vselect(extract_subvector(x,0),extract_subvector(y,0),extract_subvector(z,0)) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364136 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b21f8fa25cd..bb2545a24db 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -43623,6 +43623,16 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, unsigned ExtOp = getOpcode_EXTEND_VECTOR_INREG(InOpcode); return DAG.getNode(ExtOp, SDLoc(N), VT, InVec.getOperand(0)); } + if (InOpcode == ISD::VSELECT && + InVec.getOperand(0).getValueType().is256BitVector() && + InVec.getOperand(1).getValueType().is256BitVector() && + InVec.getOperand(2).getValueType().is256BitVector()) { + SDLoc DL(N); + SDValue Ext0 = extractSubVector(InVec.getOperand(0), 0, DAG, DL, 128); + SDValue Ext1 = extractSubVector(InVec.getOperand(1), 0, DAG, DL, 128); + SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128); + return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2); + } } return SDValue(); diff --git a/test/CodeGen/X86/horizontal-reduce-smax.ll b/test/CodeGen/X86/horizontal-reduce-smax.ll index ce3210c477c..ec831ac9b9f 100644 --- a/test/CodeGen/X86/horizontal-reduce-smax.ll +++ b/test/CodeGen/X86/horizontal-reduce-smax.ll @@ -545,7 +545,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) { ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 -; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; X64-AVX2-NEXT: vmovq %xmm0, %rax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1256,7 +1256,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) { ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 -; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; X64-AVX2-NEXT: vmovq %xmm0, %rax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq diff --git a/test/CodeGen/X86/horizontal-reduce-smin.ll b/test/CodeGen/X86/horizontal-reduce-smin.ll index eaae04e5c94..1adb3368767 100644 --- a/test/CodeGen/X86/horizontal-reduce-smin.ll +++ b/test/CodeGen/X86/horizontal-reduce-smin.ll @@ -549,7 +549,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) { ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; X64-AVX2-NEXT: vmovq %xmm0, %rax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1260,7 +1260,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) { ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; X64-AVX2-NEXT: vmovq %xmm0, %rax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq diff --git a/test/CodeGen/X86/horizontal-reduce-umax.ll b/test/CodeGen/X86/horizontal-reduce-umax.ll index 34df7ac33de..b676b2acb77 100644 --- a/test/CodeGen/X86/horizontal-reduce-umax.ll +++ b/test/CodeGen/X86/horizontal-reduce-umax.ll @@ -654,7 +654,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) { ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; X64-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 -; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; X64-AVX2-NEXT: vmovq %xmm0, %rax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1434,7 +1434,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) { ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; X64-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 -; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; X64-AVX2-NEXT: vmovq %xmm0, %rax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq diff --git a/test/CodeGen/X86/horizontal-reduce-umin.ll b/test/CodeGen/X86/horizontal-reduce-umin.ll index 2b21d27b048..8c524aa9269 100644 --- a/test/CodeGen/X86/horizontal-reduce-umin.ll +++ b/test/CodeGen/X86/horizontal-reduce-umin.ll @@ -594,7 +594,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) { ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; X64-AVX2-NEXT: vmovq %xmm0, %rax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1338,7 +1338,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) { ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; X64-AVX2-NEXT: vmovq %xmm0, %rax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq diff --git a/test/CodeGen/X86/var-permute-128.ll b/test/CodeGen/X86/var-permute-128.ll index 50cedfd2a29..49bbfa9d8cf 100644 --- a/test/CodeGen/X86/var-permute-128.ll +++ b/test/CodeGen/X86/var-permute-128.ll @@ -1031,8 +1031,7 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1 -; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -1043,8 +1042,7 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in ; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] ; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq ; diff --git a/test/CodeGen/X86/var-permute-256.ll b/test/CodeGen/X86/var-permute-256.ll index 5d5ed467e85..d1594eddf44 100644 --- a/test/CodeGen/X86/var-permute-256.ll +++ b/test/CodeGen/X86/var-permute-256.ll @@ -1126,8 +1126,7 @@ define <4 x i32> @var_shuffle_v4i32_from_v8i32(<8 x i32> %v, <4 x i32> %indices) ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0 -; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX1-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-reduce-smax-widen.ll b/test/CodeGen/X86/vector-reduce-smax-widen.ll index 447c17a366f..e1fe08ca92e 100644 --- a/test/CodeGen/X86/vector-reduce-smax-widen.ll +++ b/test/CodeGen/X86/vector-reduce-smax-widen.ll @@ -165,7 +165,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -342,7 +342,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -644,7 +644,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vector-reduce-smax.ll b/test/CodeGen/X86/vector-reduce-smax.ll index 1b15929bc19..7cb0da94f95 100644 --- a/test/CodeGen/X86/vector-reduce-smax.ll +++ b/test/CodeGen/X86/vector-reduce-smax.ll @@ -165,7 +165,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -342,7 +342,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -644,7 +644,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vector-reduce-smin-widen.ll b/test/CodeGen/X86/vector-reduce-smin-widen.ll index 52e3b94036f..25e4d0ddf8f 100644 --- a/test/CodeGen/X86/vector-reduce-smin-widen.ll +++ b/test/CodeGen/X86/vector-reduce-smin-widen.ll @@ -164,7 +164,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -341,7 +341,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -643,7 +643,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vector-reduce-smin.ll b/test/CodeGen/X86/vector-reduce-smin.ll index 9625f4c8579..b7c05a9eff9 100644 --- a/test/CodeGen/X86/vector-reduce-smin.ll +++ b/test/CodeGen/X86/vector-reduce-smin.ll @@ -164,7 +164,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -341,7 +341,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -643,7 +643,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vector-reduce-umax-widen.ll b/test/CodeGen/X86/vector-reduce-umax-widen.ll index 7b3d4b4c1ef..4e1cc5f23df 100644 --- a/test/CodeGen/X86/vector-reduce-umax-widen.ll +++ b/test/CodeGen/X86/vector-reduce-umax-widen.ll @@ -178,7 +178,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -371,7 +371,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -701,7 +701,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2 ; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vector-reduce-umax.ll b/test/CodeGen/X86/vector-reduce-umax.ll index f92153e3fa8..e577232beeb 100644 --- a/test/CodeGen/X86/vector-reduce-umax.ll +++ b/test/CodeGen/X86/vector-reduce-umax.ll @@ -178,7 +178,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -371,7 +371,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -701,7 +701,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2 ; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vector-reduce-umin-widen.ll b/test/CodeGen/X86/vector-reduce-umin-widen.ll index e5cbd1b73e5..8cdf00c22a6 100644 --- a/test/CodeGen/X86/vector-reduce-umin-widen.ll +++ b/test/CodeGen/X86/vector-reduce-umin-widen.ll @@ -177,7 +177,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -370,7 +370,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -700,7 +700,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2 ; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vector-reduce-umin.ll b/test/CodeGen/X86/vector-reduce-umin.ll index 258cf462342..75c9b103121 100644 --- a/test/CodeGen/X86/vector-reduce-umin.ll +++ b/test/CodeGen/X86/vector-reduce-umin.ll @@ -177,7 +177,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -370,7 +370,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -700,7 +700,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2 ; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq