From: Craig Topper Date: Sat, 5 Jan 2019 22:42:58 +0000 (+0000) Subject: [X86] Use two pmovmskbs in combineBitcastvxi1 for (i64 (bitcast (v64i1 (truncate... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=16d8f6a1c9a08925493173af678ef4fa8da28a6b;p=llvm [X86] Use two pmovmskbs in combineBitcastvxi1 for (i64 (bitcast (v64i1 (truncate (v64i8)))) on KNL. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350481 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 67f2929dae7..f643482884e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32743,7 +32743,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, // vpcmpeqb/vpcmpgtb. bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && (N0.getOperand(0).getValueType() == MVT::v16i8 || - N0.getOperand(0).getValueType() == MVT::v32i8); + N0.getOperand(0).getValueType() == MVT::v32i8 || + N0.getOperand(0).getValueType() == MVT::v64i8); // With AVX512 vxi1 types are legal and we prefer using k-regs. // MOVMSK is supported in SSE2 or later. @@ -32799,12 +32800,30 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, case MVT::v32i1: SExtVT = MVT::v32i8; break; + case MVT::v64i1: + // If we have AVX512F, but not AVX512BW and the input is truncated from + // v64i8 checked earlier. Then split the input and make two pmovmskbs. + if (Subtarget.hasAVX512() && !Subtarget.hasBWI()) { + SExtVT = MVT::v64i8; + break; + } + return SDValue(); }; SDLoc DL(BitCast); SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, N0); - if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) { + if (SExtVT == MVT::v64i8) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(V, DL); + Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo); + Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi); + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi); + Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi, + DAG.getConstant(32, DL, MVT::i8)); + V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi); + } else if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) { V = getPMOVMSKB(DL, V, DAG, Subtarget); } else { if (SExtVT == MVT::v8i16) diff --git a/test/CodeGen/X86/bitcast-setcc-512.ll b/test/CodeGen/X86/bitcast-setcc-512.ll index 1911aed2973..340c7ab3afd 100644 --- a/test/CodeGen/X86/bitcast-setcc-512.ll +++ b/test/CodeGen/X86/bitcast-setcc-512.ll @@ -256,26 +256,10 @@ define i64 @v64i8(<64 x i8> %a, <64 x i8> %b) { ; ; AVX512F-LABEL: v64i8: ; AVX512F: # %bb.0: +; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm2 -; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, %ecx -; AVX512F-NEXT: shll $16, %ecx -; AVX512F-NEXT: orl %eax, %ecx -; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, %edx -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: shll $16, %eax -; AVX512F-NEXT: orl %edx, %eax +; AVX512F-NEXT: vpmovmskb %ymm0, %ecx +; AVX512F-NEXT: vpmovmskb %ymm1, %eax ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %rcx, %rax ; AVX512F-NEXT: vzeroupper diff --git a/test/CodeGen/X86/movmsk-cmp.ll b/test/CodeGen/X86/movmsk-cmp.ll index 93d86b0e11d..bc16d8c710e 100644 --- a/test/CodeGen/X86/movmsk-cmp.ll +++ b/test/CodeGen/X86/movmsk-cmp.ll @@ -235,30 +235,11 @@ define i1 @allones_v64i8_sign(<64 x i8> %arg) { ; ; KNL-LABEL: allones_v64i8_sign: ; KNL: # %bb.0: -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm3 -; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: shll $16, %edx -; KNL-NEXT: orl %eax, %edx -; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %rcx, %rdx -; KNL-NEXT: cmpq $-1, %rdx +; KNL-NEXT: vpmovmskb %ymm1, %eax +; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: vpmovmskb %ymm0, %ecx +; KNL-NEXT: orq %rax, %rcx +; KNL-NEXT: cmpq $-1, %rcx ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -327,29 +308,10 @@ define i1 @allzeros_v64i8_sign(<64 x i8> %arg) { ; ; KNL-LABEL: allzeros_v64i8_sign: ; KNL: # %bb.0: -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm3 -; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: shll $16, %edx -; KNL-NEXT: orl %eax, %edx -; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %rcx, %rdx +; KNL-NEXT: vpmovmskb %ymm1, %eax +; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: vpmovmskb %ymm0, %ecx +; KNL-NEXT: orq %rax, %rcx ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1662,32 +1624,13 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) { ; ; KNL-LABEL: allones_v64i8_and1: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 -; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm3 -; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: shll $16, %edx -; KNL-NEXT: orl %eax, %edx -; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %rcx, %rdx -; KNL-NEXT: cmpq $-1, %rdx +; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpsllw $7, %ymm1, %ymm1 +; KNL-NEXT: vpmovmskb %ymm1, %eax +; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: vpmovmskb %ymm0, %ecx +; KNL-NEXT: orq %rax, %rcx +; KNL-NEXT: cmpq $-1, %rcx ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1771,31 +1714,12 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) { ; ; KNL-LABEL: allzeros_v64i8_and1: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 -; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm3 -; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: shll $16, %edx -; KNL-NEXT: orl %eax, %edx -; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %rcx, %rdx +; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpsllw $7, %ymm1, %ymm1 +; KNL-NEXT: vpmovmskb %ymm1, %eax +; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: vpmovmskb %ymm0, %ecx +; KNL-NEXT: orq %rax, %rcx ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -3299,32 +3223,13 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) { ; ; KNL-LABEL: allones_v64i8_and4: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] -; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 -; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm3 -; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: shll $16, %edx -; KNL-NEXT: orl %eax, %edx -; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %rcx, %rdx -; KNL-NEXT: cmpq $-1, %rdx +; KNL-NEXT: vpsllw $5, %ymm0, %ymm0 +; KNL-NEXT: vpsllw $5, %ymm1, %ymm1 +; KNL-NEXT: vpmovmskb %ymm1, %eax +; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: vpmovmskb %ymm0, %ecx +; KNL-NEXT: orq %rax, %rcx +; KNL-NEXT: cmpq $-1, %rcx ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -3408,31 +3313,12 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) { ; ; KNL-LABEL: allzeros_v64i8_and4: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] -; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 -; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm3 -; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: shll $16, %edx -; KNL-NEXT: orl %eax, %edx -; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %rcx, %rdx +; KNL-NEXT: vpsllw $5, %ymm0, %ymm0 +; KNL-NEXT: vpsllw $5, %ymm1, %ymm1 +; KNL-NEXT: vpmovmskb %ymm1, %eax +; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: vpmovmskb %ymm0, %ecx +; KNL-NEXT: orq %rax, %rcx ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq