From d6e2c2034c2bd21db812bbf73157fd0bb7ac14a8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 11 May 2019 19:14:19 +0000 Subject: [PATCH] [X86] Add avx512f tests for boolean reduction git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360529 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vector-reduce-and-bool.ll | 532 +++++++++++++--- test/CodeGen/X86/vector-reduce-or-bool.ll | 527 +++++++++++++--- test/CodeGen/X86/vector-reduce-xor-bool.ll | 680 +++++++++++++++++---- 3 files changed, 1464 insertions(+), 275 deletions(-) diff --git a/test/CodeGen/X86/vector-reduce-and-bool.ll b/test/CodeGen/X86/vector-reduce-and-bool.ll index 7c432a36397..b7ebe04cefb 100644 --- a/test/CodeGen/X86/vector-reduce-and-bool.ll +++ b/test/CodeGen/X86/vector-reduce-and-bool.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL @@ -27,6 +28,17 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v2i64_v2i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andb $3, %al +; AVX512F-NEXT: cmpb $3, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v2i64_v2i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -69,6 +81,17 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v4i32_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andb $15, %al +; AVX512F-NEXT: cmpb $15, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v4i32_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 @@ -113,6 +136,17 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v8i16_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: cmpb $-1, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v8i16_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -186,6 +220,17 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v4i64_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andb $15, %al +; AVX512F-NEXT: cmpb $15, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v4i64_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0 @@ -267,6 +312,16 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: trunc_v8i32_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: cmpb $-1, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v8i32_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 @@ -340,6 +395,16 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: trunc_v16i16_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kortestw %k0, %k0 +; AVX512F-NEXT: setb %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v16i16_v16i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $15, %ymm0, %ymm0 @@ -392,14 +457,43 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v32i8_v32i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512-NEXT: vpmovmskb %ymm0, %eax -; AVX512-NEXT: cmpl $-1, %eax -; AVX512-NEXT: sete %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v32i8_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v32i8_v32i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovmskb %ymm0, %eax +; AVX512BW-NEXT: cmpl $-1, %eax +; AVX512BW-NEXT: sete %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v32i8_v32i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovmskb %ymm0, %eax +; AVX512VL-NEXT: cmpl $-1, %eax +; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <32 x i8> %0 to <32 x i1> %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a) ret i1 %b @@ -478,15 +572,35 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v8i64_v8i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: cmpb $-1, %al -; AVX512-NEXT: sete %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v8i64_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: cmpb $-1, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v8i64_v8i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: cmpb $-1, %al +; AVX512BW-NEXT: sete %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v8i64_v8i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: cmpb $-1, %al +; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <8 x i64> %0 to <8 x i1> %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a) ret i1 %b @@ -635,14 +749,42 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v32i16_v32i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $15, %zmm0, %zmm0 -; AVX512-NEXT: vpmovw2m %zmm0, %k0 -; AVX512-NEXT: kortestd %k0, %k0 -; AVX512-NEXT: setb %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v32i16_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v32i16_v32i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 +; AVX512BW-NEXT: kortestd %k0, %k0 +; AVX512BW-NEXT: setb %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v32i16_v32i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllw $15, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovw2m %zmm0, %k0 +; AVX512VL-NEXT: kortestd %k0, %k0 +; AVX512VL-NEXT: setb %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <32 x i16> %0 to <32 x i1> %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a) ret i1 %b @@ -723,26 +865,70 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v64i8_v64i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $7, %zmm0, %zmm0 -; AVX512-NEXT: vpmovb2m %zmm0, %k0 -; AVX512-NEXT: kshiftrq $32, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $16, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $8, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $4, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $2, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $1, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v64i8_v64i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX512F-NEXT: vpand %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v64i8_v64i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 +; AVX512BW-NEXT: kshiftrq $32, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $16, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $8, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $4, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $2, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $1, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v64i8_v64i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllw $7, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovb2m %zmm0, %k0 +; AVX512VL-NEXT: kshiftrq $32, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $16, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $8, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $4, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $2, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $1, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: # kill: def $al killed $al killed $eax +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <64 x i8> %0 to <64 x i1> %b = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> %a) ret i1 %b @@ -782,6 +968,17 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v2i64_v2i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andb $3, %al +; AVX512F-NEXT: cmpb $3, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v2i64_v2i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -825,6 +1022,17 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v4i32_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andb $15, %al +; AVX512F-NEXT: cmpb $15, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v4i32_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -872,6 +1080,19 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v8i16_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: cmpb $-1, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v8i16_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -914,6 +1135,15 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) { ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v16i8_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovmskb %xmm0, %eax +; AVX512F-NEXT: cmpw $-1, %ax +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v16i8_v16i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -984,6 +1214,17 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v4i64_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andb $15, %al +; AVX512F-NEXT: cmpb $15, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v4i64_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1045,6 +1286,16 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v8i32_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: cmpb $-1, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v8i32_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1105,6 +1356,17 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v16i16_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kortestw %k0, %k0 +; AVX512F-NEXT: setb %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v16i16_v16i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1160,6 +1422,27 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v32i8_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v32i8_v32i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1253,14 +1536,32 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: icmp_v8i64_v8i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: cmpb $-1, %al -; AVX512-NEXT: sete %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: icmp_v8i64_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: cmpb $-1, %al +; AVX512F-NEXT: sete %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: icmp_v8i64_v8i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: cmpb $-1, %al +; AVX512BW-NEXT: sete %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: icmp_v8i64_v8i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: cmpb $-1, %al +; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = icmp eq <8 x i64> %0, zeroinitializer %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a) ret i1 %b @@ -1375,13 +1676,41 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: icmp_v32i16_v32i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmw %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestd %k0, %k0 -; AVX512-NEXT: setb %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: icmp_v32i16_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: icmp_v32i16_v32i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kortestd %k0, %k0 +; AVX512BW-NEXT: setb %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: icmp_v32i16_v32i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vptestnmw %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kortestd %k0, %k0 +; AVX512VL-NEXT: setb %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = icmp eq <32 x i16> %0, zeroinitializer %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a) ret i1 %b @@ -1428,25 +1757,70 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: icmp_v64i8_v64i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmb %zmm0, %zmm0, %k0 -; AVX512-NEXT: kshiftrq $32, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $16, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $8, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $4, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $2, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $1, %k0, %k1 -; AVX512-NEXT: kandq %k1, %k0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: icmp_v64i8_v64i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX512F-NEXT: vpand %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: icmp_v64i8_v64i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kshiftrq $32, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $16, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $8, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $4, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $2, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $1, %k0, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: icmp_v64i8_v64i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vptestnmb %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kshiftrq $32, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $16, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $8, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $4, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $2, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $1, %k0, %k1 +; AVX512VL-NEXT: kandq %k1, %k0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: # kill: def $al killed $al killed $eax +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = icmp eq <64 x i8> %0, zeroinitializer %b = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> %a) ret i1 %b diff --git a/test/CodeGen/X86/vector-reduce-or-bool.ll b/test/CodeGen/X86/vector-reduce-or-bool.ll index e887ee28ce0..d41403e6794 100644 --- a/test/CodeGen/X86/vector-reduce-or-bool.ll +++ b/test/CodeGen/X86/vector-reduce-or-bool.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL @@ -27,6 +28,16 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v2i64_v2i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb $3, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v2i64_v2i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -67,6 +78,16 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v4i32_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb $15, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v4i32_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 @@ -109,6 +130,17 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v8i16_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v8i16_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -182,6 +214,16 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v4i64_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb $15, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v4i64_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0 @@ -261,6 +303,16 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: trunc_v8i32_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v8i32_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 @@ -334,6 +386,16 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: trunc_v16i16_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kortestw %k0, %k0 +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v16i16_v16i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $15, %ymm0, %ymm0 @@ -386,14 +448,43 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v32i8_v32i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512-NEXT: vpmovmskb %ymm0, %eax -; AVX512-NEXT: testl %eax, %eax -; AVX512-NEXT: setne %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v32i8_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v32i8_v32i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovmskb %ymm0, %eax +; AVX512BW-NEXT: testl %eax, %eax +; AVX512BW-NEXT: setne %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v32i8_v32i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovmskb %ymm0, %eax +; AVX512VL-NEXT: testl %eax, %eax +; AVX512VL-NEXT: setne %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <32 x i8> %0 to <32 x i1> %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a) ret i1 %b @@ -472,15 +563,35 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v8i64_v8i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: testb %al, %al -; AVX512-NEXT: setne %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v8i64_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v8i64_v8i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: testb %al, %al +; AVX512BW-NEXT: setne %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v8i64_v8i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: setne %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <8 x i64> %0 to <8 x i1> %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a) ret i1 %b @@ -629,14 +740,42 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v32i16_v32i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $15, %zmm0, %zmm0 -; AVX512-NEXT: vpmovw2m %zmm0, %k0 -; AVX512-NEXT: kortestd %k0, %k0 -; AVX512-NEXT: setne %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v32i16_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v32i16_v32i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 +; AVX512BW-NEXT: kortestd %k0, %k0 +; AVX512BW-NEXT: setne %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v32i16_v32i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllw $15, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovw2m %zmm0, %k0 +; AVX512VL-NEXT: kortestd %k0, %k0 +; AVX512VL-NEXT: setne %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <32 x i16> %0 to <32 x i1> %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a) ret i1 %b @@ -717,26 +856,70 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v64i8_v64i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $7, %zmm0, %zmm0 -; AVX512-NEXT: vpmovb2m %zmm0, %k0 -; AVX512-NEXT: kshiftrq $32, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $16, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $8, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $4, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $2, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $1, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v64i8_v64i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v64i8_v64i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 +; AVX512BW-NEXT: kshiftrq $32, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $16, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $8, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $4, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $2, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $1, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v64i8_v64i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllw $7, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovb2m %zmm0, %k0 +; AVX512VL-NEXT: kshiftrq $32, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $16, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $8, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $4, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $2, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $1, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: # kill: def $al killed $al killed $eax +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <64 x i8> %0 to <64 x i1> %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a) ret i1 %b @@ -776,6 +959,16 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v2i64_v2i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb $3, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v2i64_v2i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -817,6 +1010,16 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v4i32_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb $15, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v4i32_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -862,6 +1065,19 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v8i16_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v8i16_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -904,6 +1120,15 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) { ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v16i8_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovmskb %xmm0, %eax +; AVX512F-NEXT: testw %ax, %ax +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v16i8_v16i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -974,6 +1199,16 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v4i64_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb $15, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v4i64_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1033,6 +1268,16 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v8i32_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v8i32_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1093,6 +1338,17 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v16i16_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kortestw %k0, %k0 +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v16i16_v16i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1149,6 +1405,27 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v32i8_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v32i8_v32i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1242,14 +1519,32 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: icmp_v8i64_v8i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: testb %al, %al -; AVX512-NEXT: setne %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: icmp_v8i64_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: setne %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: icmp_v8i64_v8i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: testb %al, %al +; AVX512BW-NEXT: setne %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: icmp_v8i64_v8i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: setne %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = icmp eq <8 x i64> %0, zeroinitializer %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a) ret i1 %b @@ -1364,13 +1659,42 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: icmp_v32i16_v32i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmw %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestd %k0, %k0 -; AVX512-NEXT: setne %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: icmp_v32i16_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: icmp_v32i16_v32i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kortestd %k0, %k0 +; AVX512BW-NEXT: setne %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: icmp_v32i16_v32i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vptestnmw %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kortestd %k0, %k0 +; AVX512VL-NEXT: setne %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = icmp eq <32 x i16> %0, zeroinitializer %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a) ret i1 %b @@ -1422,25 +1746,70 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: icmp_v64i8_v64i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmb %zmm0, %zmm0, %k0 -; AVX512-NEXT: kshiftrq $32, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $16, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $8, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $4, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $2, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $1, %k0, %k1 -; AVX512-NEXT: korq %k1, %k0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: icmp_v64i8_v64i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: icmp_v64i8_v64i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kshiftrq $32, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $16, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $8, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $4, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $2, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $1, %k0, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: icmp_v64i8_v64i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vptestnmb %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kshiftrq $32, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $16, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $8, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $4, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $2, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $1, %k0, %k1 +; AVX512VL-NEXT: korq %k1, %k0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: # kill: def $al killed $al killed $eax +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = icmp eq <64 x i8> %0, zeroinitializer %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a) ret i1 %b diff --git a/test/CodeGen/X86/vector-reduce-xor-bool.ll b/test/CodeGen/X86/vector-reduce-xor-bool.ll index 34d5e3f988f..9cfa1dc7561 100644 --- a/test/CodeGen/X86/vector-reduce-xor-bool.ll +++ b/test/CodeGen/X86/vector-reduce-xor-bool.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL @@ -27,6 +28,17 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v2i64_v2i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andl $3, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v2i64_v2i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -69,6 +81,17 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v4i32_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andl $15, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v4i32_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 @@ -113,6 +136,17 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v8i16_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v8i16_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -192,6 +226,17 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; +; AVX512F-LABEL: trunc_v4i64_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andl $15, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v4i64_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0 @@ -273,6 +318,16 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: trunc_v8i32_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v8i32_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 @@ -354,6 +409,19 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: trunc_v16i16_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, %ecx +; AVX512F-NEXT: shrl $8, %ecx +; AVX512F-NEXT: xorb %al, %cl +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: trunc_v16i16_v16i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $15, %ymm0, %ymm0 @@ -421,19 +489,53 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v32i8_v32i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512-NEXT: vpmovmskb %ymm0, %eax -; AVX512-NEXT: movl %eax, %ecx -; AVX512-NEXT: shrl $16, %ecx -; AVX512-NEXT: xorl %eax, %ecx -; AVX512-NEXT: movl %ecx, %eax -; AVX512-NEXT: shrl $8, %eax -; AVX512-NEXT: xorb %cl, %al -; AVX512-NEXT: setnp %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v32i8_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v32i8_v32i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovmskb %ymm0, %eax +; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: shrl $16, %ecx +; AVX512BW-NEXT: xorl %eax, %ecx +; AVX512BW-NEXT: movl %ecx, %eax +; AVX512BW-NEXT: shrl $8, %eax +; AVX512BW-NEXT: xorb %cl, %al +; AVX512BW-NEXT: setnp %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v32i8_v32i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovmskb %ymm0, %eax +; AVX512VL-NEXT: movl %eax, %ecx +; AVX512VL-NEXT: shrl $16, %ecx +; AVX512VL-NEXT: xorl %eax, %ecx +; AVX512VL-NEXT: movl %ecx, %eax +; AVX512VL-NEXT: shrl $8, %eax +; AVX512VL-NEXT: xorb %cl, %al +; AVX512VL-NEXT: setnp %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <32 x i8> %0 to <32 x i1> %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a) ret i1 %b @@ -512,15 +614,35 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v8i64_v8i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: xorb $0, %al -; AVX512-NEXT: setnp %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v8i64_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v8i64_v8i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: setnp %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v8i64_v8i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: setnp %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <8 x i64> %0 to <8 x i1> %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a) ret i1 %b @@ -602,17 +724,41 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v16i32_v16i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: movl %eax, %ecx -; AVX512-NEXT: shrl $8, %ecx -; AVX512-NEXT: xorb %al, %cl -; AVX512-NEXT: setnp %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v16i32_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, %ecx +; AVX512F-NEXT: shrl $8, %ecx +; AVX512F-NEXT: xorb %al, %cl +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v16i32_v16i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: shrl $8, %ecx +; AVX512BW-NEXT: xorb %al, %cl +; AVX512BW-NEXT: setnp %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v16i32_v16i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: movl %eax, %ecx +; AVX512VL-NEXT: shrl $8, %ecx +; AVX512VL-NEXT: xorb %al, %cl +; AVX512VL-NEXT: setnp %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <16 x i32> %0 to <16 x i1> %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a) ret i1 %b @@ -691,20 +837,54 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v32i16_v32i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $15, %zmm0, %zmm0 -; AVX512-NEXT: vpmovw2m %zmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: movl %eax, %ecx -; AVX512-NEXT: shrl $16, %ecx -; AVX512-NEXT: xorl %eax, %ecx -; AVX512-NEXT: movl %ecx, %eax -; AVX512-NEXT: shrl $8, %eax -; AVX512-NEXT: xorb %cl, %al -; AVX512-NEXT: setnp %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v32i16_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v32i16_v32i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: shrl $16, %ecx +; AVX512BW-NEXT: xorl %eax, %ecx +; AVX512BW-NEXT: movl %ecx, %eax +; AVX512BW-NEXT: shrl $8, %eax +; AVX512BW-NEXT: xorb %cl, %al +; AVX512BW-NEXT: setnp %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v32i16_v32i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllw $15, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovw2m %zmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: movl %eax, %ecx +; AVX512VL-NEXT: shrl $16, %ecx +; AVX512VL-NEXT: xorl %eax, %ecx +; AVX512VL-NEXT: movl %ecx, %eax +; AVX512VL-NEXT: shrl $8, %eax +; AVX512VL-NEXT: xorb %cl, %al +; AVX512VL-NEXT: setnp %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <32 x i16> %0 to <32 x i1> %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a) ret i1 %b @@ -785,26 +965,70 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_v64i8_v64i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $7, %zmm0, %zmm0 -; AVX512-NEXT: vpmovb2m %zmm0, %k0 -; AVX512-NEXT: kshiftrq $32, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $16, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $8, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $4, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $2, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $1, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_v64i8_v64i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX512F-NEXT: vpxor %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_v64i8_v64i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 +; AVX512BW-NEXT: kshiftrq $32, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $16, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $8, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $4, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $2, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $1, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: trunc_v64i8_v64i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllw $7, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovb2m %zmm0, %k0 +; AVX512VL-NEXT: kshiftrq $32, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $16, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $8, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $4, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $2, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $1, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: # kill: def $al killed $al killed $eax +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = trunc <64 x i8> %0 to <64 x i1> %b = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> %a) ret i1 %b @@ -844,6 +1068,17 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v2i64_v2i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andl $3, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v2i64_v2i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -887,6 +1122,17 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v4i32_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andl $15, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v4i32_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -934,6 +1180,19 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v8i16_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v8i16_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -980,6 +1239,17 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) { ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; +; AVX512F-LABEL: icmp_v16i8_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovmskb %xmm0, %eax +; AVX512F-NEXT: movl %eax, %ecx +; AVX512F-NEXT: shrl $8, %ecx +; AVX512F-NEXT: xorb %al, %cl +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v16i8_v16i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 @@ -1056,6 +1326,17 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v4i64_v4i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andl $15, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v4i64_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1117,6 +1398,16 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v8i32_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v8i32_v8i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1183,6 +1474,20 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v16i16_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, %ecx +; AVX512F-NEXT: shrl $8, %ecx +; AVX512F-NEXT: xorb %al, %cl +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v16i16_v16i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1254,6 +1559,27 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX512F-LABEL: icmp_v32i8_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; ; AVX512BW-LABEL: icmp_v32i8_v32i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -1359,14 +1685,32 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: icmp_v8i64_v8i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: xorb $0, %al -; AVX512-NEXT: setnp %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: icmp_v8i64_v8i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: icmp_v8i64_v8i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: setnp %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: icmp_v8i64_v8i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: setnp %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = icmp eq <8 x i64> %0, zeroinitializer %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a) ret i1 %b @@ -1427,16 +1771,38 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: icmp_v16i32_v16i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: movl %eax, %ecx -; AVX512-NEXT: shrl $8, %ecx -; AVX512-NEXT: xorb %al, %cl -; AVX512-NEXT: setnp %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: icmp_v16i32_v16i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, %ecx +; AVX512F-NEXT: shrl $8, %ecx +; AVX512F-NEXT: xorb %al, %cl +; AVX512F-NEXT: setnp %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: icmp_v16i32_v16i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: shrl $8, %ecx +; AVX512BW-NEXT: xorb %al, %cl +; AVX512BW-NEXT: setnp %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: icmp_v16i32_v16i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: movl %eax, %ecx +; AVX512VL-NEXT: shrl $8, %ecx +; AVX512VL-NEXT: xorb %al, %cl +; AVX512VL-NEXT: setnp %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = icmp eq <16 x i32> %0, zeroinitializer %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a) ret i1 %b @@ -1499,19 +1865,54 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: icmp_v32i16_v32i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmw %zmm0, %zmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: movl %eax, %ecx -; AVX512-NEXT: shrl $16, %ecx -; AVX512-NEXT: xorl %eax, %ecx -; AVX512-NEXT: movl %ecx, %eax -; AVX512-NEXT: shrl $8, %eax -; AVX512-NEXT: xorb %cl, %al -; AVX512-NEXT: setnp %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: icmp_v32i16_v32i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: icmp_v32i16_v32i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: shrl $16, %ecx +; AVX512BW-NEXT: xorl %eax, %ecx +; AVX512BW-NEXT: movl %ecx, %eax +; AVX512BW-NEXT: shrl $8, %eax +; AVX512BW-NEXT: xorb %cl, %al +; AVX512BW-NEXT: setnp %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: icmp_v32i16_v32i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vptestnmw %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: movl %eax, %ecx +; AVX512VL-NEXT: shrl $16, %ecx +; AVX512VL-NEXT: xorl %eax, %ecx +; AVX512VL-NEXT: movl %ecx, %eax +; AVX512VL-NEXT: shrl $8, %eax +; AVX512VL-NEXT: xorb %cl, %al +; AVX512VL-NEXT: setnp %al +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = icmp eq <32 x i16> %0, zeroinitializer %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a) ret i1 %b @@ -1614,25 +2015,70 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: icmp_v64i8_v64i1: -; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmb %zmm0, %zmm0, %k0 -; AVX512-NEXT: kshiftrq $32, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $16, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $8, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $4, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $2, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kshiftrq $1, %k0, %k1 -; AVX512-NEXT: kxorq %k1, %k0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: icmp_v64i8_v64i1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX512F-NEXT: vpxor %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $4, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftrw $1, %k0, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: icmp_v64i8_v64i1: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kshiftrq $32, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $16, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $8, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $4, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $2, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftrq $1, %k0, %k1 +; AVX512BW-NEXT: kxorq %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: icmp_v64i8_v64i1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vptestnmb %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: kshiftrq $32, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $16, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $8, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $4, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $2, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftrq $1, %k0, %k1 +; AVX512VL-NEXT: kxorq %k1, %k0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: # kill: def $al killed $al killed $eax +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %a = icmp eq <64 x i8> %0, zeroinitializer %b = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> %a) ret i1 %b -- 2.50.1