From: Craig Topper Date: Sun, 27 Aug 2017 05:39:57 +0000 (+0000) Subject: [X86] Add a target-specific DAG combine to combine extract_subvector from all zero... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=395cdbc9b5fe52bd976e5d5e9ce5b722425995bf;p=llvm [X86] Add a target-specific DAG combine to combine extract_subvector from all zero/one build_vectors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311841 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bd1d09b50ce..ecc8ff0b1af 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1628,6 +1628,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::INSERT_SUBVECTOR); + setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::SELECT); @@ -35678,6 +35679,25 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + MVT OpVT = N->getSimpleValueType(0); + + if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) + return getZeroVector(OpVT, Subtarget, DAG, SDLoc(N)); + + if (ISD::isBuildVectorAllOnes(N->getOperand(0).getNode())) { + if (OpVT.getScalarType() == MVT::i1) + return DAG.getConstant(1, SDLoc(N), OpVT); + return getZeroVector(OpVT, Subtarget, DAG, SDLoc(N)); + } + + return SDValue(); +} SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -35691,6 +35711,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, return combineExtractVectorElt_SSE(N, DAG, DCI, Subtarget); case ISD::INSERT_SUBVECTOR: return combineInsertSubvector(N, DAG, DCI, Subtarget); + case ISD::EXTRACT_SUBVECTOR: + return combineExtractSubvector(N, DAG, DCI, Subtarget); case ISD::VSELECT: case ISD::SELECT: case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget); diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll index b5f1f6d539c..9f17ce4b2ec 100644 --- a/test/CodeGen/X86/2012-1-10-buildvector.ll +++ b/test/CodeGen/X86/2012-1-10-buildvector.ll @@ -7,7 +7,6 @@ define void @bad_cast() { ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm0, (%eax) ; CHECK-NEXT: movl $0, (%eax) -; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl %vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> %vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index ba4f6b80ab0..93754c69ed1 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -722,10 +722,8 @@ define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) { ; KNL_64-NEXT: vpsllq $2, %zmm1, %zmm1 ; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm1 {%k2} -; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm2 {%k1} -; KNL_64-NEXT: vinsertf64x4 $1, %ymm1, %zmm2, %zmm0 +; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm1 {%k1} +; KNL_64-NEXT: vinsertf64x4 $1, %ymm1, %zmm1, %zmm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test14: @@ -747,10 +745,8 @@ define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) { ; SKX-NEXT: vpsllq $2, %zmm1, %zmm1 ; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vgatherqps (,%zmm0), %ymm1 {%k2} -; SKX-NEXT: vgatherqps (,%zmm0), %ymm2 {%k1} -; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm2, %zmm0 +; SKX-NEXT: vgatherqps (,%zmm0), %ymm1 {%k1} +; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm1, %zmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test14: @@ -1624,7 +1620,6 @@ define <16 x float*> @test31(<16 x float**> %ptrs) { ; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: kxnorw %k0, %k0, %k2 ; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} -; KNL_64-NEXT: kshiftrw $8, %k1, %k1 ; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} ; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0 ; KNL_64-NEXT: vmovdqa64 %zmm3, %zmm1 @@ -1642,7 +1637,6 @@ define <16 x float*> @test31(<16 x float**> %ptrs) { ; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: kxnorw %k0, %k0, %k2 ; SKX-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} -; SKX-NEXT: kshiftrw $8, %k1, %k1 ; SKX-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} ; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 ; SKX-NEXT: vmovdqa64 %zmm3, %zmm1 diff --git a/test/CodeGen/X86/pr34139.ll b/test/CodeGen/X86/pr34139.ll index c20c2cd510c..0aea3fcfdba 100644 --- a/test/CodeGen/X86/pr34139.ll +++ b/test/CodeGen/X86/pr34139.ll @@ -6,14 +6,6 @@ define void @f_f(<16 x double>* %ptr) { ; CHECK: # BB#0: ; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, (%rax) -; CHECK-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovapd (%rdi), %zmm1 -; CHECK-NEXT: vmovapd 64(%rdi), %zmm2 -; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovapd %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vmovapd %zmm2, 64(%rdi) -; CHECK-NEXT: vmovapd %zmm1, (%rdi) store <16 x i8> , <16 x i8>* undef %load_mask8.i.i.i = load <16 x i8>, <16 x i8>* undef %v.i.i.i.i = load <16 x double>, <16 x double>* %ptr