From 93de9f6cdd58a2666075a3badf09c47133f3ce24 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 5 Dec 2017 01:28:06 +0000 Subject: [PATCH] [X86] Don't use kunpck for vXi1 concat_vectors if the upper bits are undef. This can be efficiently selected by a COPY_TO_REGCLASS without the need for an extra instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319726 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 8 +++++--- test/CodeGen/X86/avx512-skx-insert-subvec.ll | 3 --- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ee8351c81ce..234af160178 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8318,6 +8318,11 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, V1.getValueType().getVectorNumElements() == NumElems/2 && "Unexpected operands in CONCAT_VECTORS"); + // If this can be done with a subreg insert do that first. + SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); + if (V2.isUndef()) + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); + if (ResVT.getSizeInBits() >= 16) return Op; // The operation is legal with KUNPCK @@ -8327,9 +8332,6 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, if (IsZeroV1 && IsZeroV2) return ZeroVec; - SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); - if (V2.isUndef()) - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); if (IsZeroV2) return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx); diff --git a/test/CodeGen/X86/avx512-skx-insert-subvec.ll b/test/CodeGen/X86/avx512-skx-insert-subvec.ll index 91c3b73a959..4c650903e47 100644 --- a/test/CodeGen/X86/avx512-skx-insert-subvec.ll +++ b/test/CodeGen/X86/avx512-skx-insert-subvec.ll @@ -100,7 +100,6 @@ define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) { ; CHECK-NEXT: kshiftlb $2, %k0, %k0 ; CHECK-NEXT: kshiftrb $2, %k0, %k0 ; CHECK-NEXT: korb %k1, %k0, %k0 -; CHECK-NEXT: kunpckbw %k0, %k0, %k0 ; CHECK-NEXT: vpmovm2b %k0, %xmm0 ; CHECK-NEXT: retq @@ -119,8 +118,6 @@ define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) { ; CHECK-NEXT: kshiftlb $4, %k0, %k0 ; CHECK-NEXT: kshiftrb $4, %k0, %k0 ; CHECK-NEXT: korb %k1, %k0, %k0 -; CHECK-NEXT: kunpckbw %k0, %k0, %k0 -; CHECK-NEXT: kunpckwd %k0, %k0, %k0 ; CHECK-NEXT: vpmovm2b %k0, %ymm0 ; CHECK-NEXT: retq -- 2.50.1