From ba2cc0b9fcf946290e2d88128f9b3eef9c8ad4df Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Jul 2019 14:17:21 +0000 Subject: [PATCH] [X86][AVX] combineX86ShuffleChainWithExtract - add number of non-zero extract_subvectors to the combine depth This better accounts for the cost/benefit of removing extract_subvectors from the shuffle and will be more useful in future patches. The vpermq predicate regression will be fixed shortly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365041 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 +++ test/CodeGen/X86/avx512-shuffles/partial_permute.ll | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 96b9a5c50ce..2912c249283 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32429,6 +32429,9 @@ static SDValue combineX86ShuffleChainWithExtract( if (WideInputs.size() > 2) return SDValue(); + // Increase depth for every upper subvector we've peeked through. + Depth += count_if(Offsets, [](unsigned Offset) { return Offset > 0; }); + // Attempt to combine wider chain. // TODO: Can we use a better Root? SDValue WideRoot = WideInputs[0]; diff --git a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index b3e154b3107..de581aa3fb9 100644 --- a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -2216,9 +2216,9 @@ define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec, <2 x i64 define <2 x i64> @test_masked_z_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec, <2 x i64> %mask) { ; CHECK-LABEL: test_masked_z_8xi64_to_2xi64_perm_mask0: ; CHECK: # %bb.0: +; CHECK-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,0,2,3,7,4,6,7] ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,2,3,7,4,6,7] -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> -- 2.50.1