From 538fe17c99d6e70989a953f43c7c9f127619fb0f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 27 Nov 2017 18:00:49 +0000 Subject: [PATCH] [X86] Add avx512bw command lines to vselect-packss.ll This shows several places where we fail to use masked move or blendm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319063 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vselect-packss.ll | 145 ++++++++++++++++++++++------- 1 file changed, 109 insertions(+), 36 deletions(-) diff --git a/test/CodeGen/X86/vselect-packss.ll b/test/CodeGen/X86/vselect-packss.ll index 563c20633e1..cab8521160c 100644 --- a/test/CodeGen/X86/vselect-packss.ll +++ b/test/CodeGen/X86/vselect-packss.ll @@ -3,8 +3,10 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512NOBW --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512NOBW --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW --check-prefix=AVX512BWNOVL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW --check-prefix=AVX512BWVL ; ; General cases - packing of vector comparison to legal vector result types @@ -50,14 +52,30 @@ define <16 x i8> @vselect_packss_v16i16(<16 x i16> %a0, <16 x i16> %a1, <16 x i8 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: vselect_packss_v16i16: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512NOBW-LABEL: vselect_packss_v16i16: +; AVX512NOBW: # BB#0: +; AVX512NOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512NOBW-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512NOBW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512NOBW-NEXT: vzeroupper +; AVX512NOBW-NEXT: retq +; +; AVX512BWNOVL-LABEL: vselect_packss_v16i16: +; AVX512BWNOVL: # BB#0: +; AVX512BWNOVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512BWNOVL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512BWNOVL-NEXT: vzeroupper +; AVX512BWNOVL-NEXT: retq +; +; AVX512BWVL-LABEL: vselect_packss_v16i16: +; AVX512BWVL: # BB#0: +; AVX512BWVL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 +; AVX512BWVL-NEXT: vpmovm2b %k0, %xmm0 +; AVX512BWVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq %1 = icmp eq <16 x i16> %a0, %a1 %2 = sext <16 x i1> %1 to <16 x i8> %3 = and <16 x i8> %2, %a2 @@ -128,14 +146,30 @@ define <16 x i8> @vselect_packss_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i8 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: vselect_packss_v16i32: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512NOBW-LABEL: vselect_packss_v16i32: +; AVX512NOBW: # BB#0: +; AVX512NOBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 +; AVX512NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512NOBW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512NOBW-NEXT: vzeroupper +; AVX512NOBW-NEXT: retq +; +; AVX512BWNOVL-LABEL: vselect_packss_v16i32: +; AVX512BWNOVL: # BB#0: +; AVX512BWNOVL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; AVX512BWNOVL-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512BWNOVL-NEXT: vzeroupper +; AVX512BWNOVL-NEXT: retq +; +; AVX512BWVL-LABEL: vselect_packss_v16i32: +; AVX512BWVL: # BB#0: +; AVX512BWVL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; AVX512BWVL-NEXT: vpmovm2b %k0, %xmm0 +; AVX512BWVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq %1 = icmp eq <16 x i32> %a0, %a1 %2 = sext <16 x i1> %1 to <16 x i8> %3 = and <16 x i8> %2, %a2 @@ -260,16 +294,36 @@ define <16 x i8> @vselect_packss_v16i64(<16 x i64> %a0, <16 x i64> %a1, <16 x i8 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: vselect_packss_v16i64: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 -; AVX512-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 -; AVX512-NEXT: kunpckbw %k0, %k1, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512NOBW-LABEL: vselect_packss_v16i64: +; AVX512NOBW: # BB#0: +; AVX512NOBW-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512NOBW-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 +; AVX512NOBW-NEXT: kunpckbw %k0, %k1, %k1 +; AVX512NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512NOBW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0 +; AVX512NOBW-NEXT: vzeroupper +; AVX512NOBW-NEXT: retq +; +; AVX512BWNOVL-LABEL: vselect_packss_v16i64: +; AVX512BWNOVL: # BB#0: +; AVX512BWNOVL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512BWNOVL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 +; AVX512BWNOVL-NEXT: kunpckbw %k0, %k1, %k0 +; AVX512BWNOVL-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0 +; AVX512BWNOVL-NEXT: vzeroupper +; AVX512BWNOVL-NEXT: retq +; +; AVX512BWVL-LABEL: vselect_packss_v16i64: +; AVX512BWVL: # BB#0: +; AVX512BWVL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512BWVL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 +; AVX512BWVL-NEXT: kunpckbw %k0, %k1, %k0 +; AVX512BWVL-NEXT: vpmovm2b %k0, %xmm0 +; AVX512BWVL-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq %1 = icmp eq <16 x i64> %a0, %a1 %2 = sext <16 x i1> %1 to <16 x i8> %3 = and <16 x i8> %2, %a2 @@ -323,14 +377,33 @@ define <16 x i8> @vselect_packss(<16 x i16> %a0, <16 x i16> %a1, <16 x i8> %a2, ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: vselect_packss: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512NOBW-LABEL: vselect_packss: +; AVX512NOBW: # BB#0: +; AVX512NOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512NOBW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512NOBW-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512NOBW-NEXT: vzeroupper +; AVX512NOBW-NEXT: retq +; +; AVX512BWNOVL-LABEL: vselect_packss: +; AVX512BWNOVL: # BB#0: +; AVX512BWNOVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512BWNOVL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BWNOVL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512BWNOVL-NEXT: vzeroupper +; AVX512BWNOVL-NEXT: retq +; +; AVX512BWVL-LABEL: vselect_packss: +; AVX512BWVL: # BB#0: +; AVX512BWVL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 +; AVX512BWVL-NEXT: vpmovm2w %k0, %ymm0 +; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BWVL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq %1 = icmp eq <16 x i16> %a0, %a1 %2 = sext <16 x i1> %1 to <16 x i16> %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <8 x i32> -- 2.50.1