From e166f9ed844a67ec9e70bc425af72731f1b6bbac Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 13 Oct 2019 05:47:47 +0000 Subject: [PATCH] [X86] Enable v4i32->v4i16 and v8i16->v8i8 saturating truncates to use pack instructions with avx512. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374705 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 1 + test/CodeGen/X86/vector-trunc-packus.ll | 61 ++++--------------------- test/CodeGen/X86/vector-trunc-ssat.ll | 56 ++++------------------- 3 files changed, 17 insertions(+), 101 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c7d90ddfc9a..8d538198aef 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -39871,6 +39871,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, // registers, we should go ahead and use the pack instructions if possible. bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) || (Subtarget.hasBWI() && InSVT == MVT::i16)) && + (InVT.getSizeInBits() > 128) && (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) && !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256); diff --git a/test/CodeGen/X86/vector-trunc-packus.ll b/test/CodeGen/X86/vector-trunc-packus.ll index 2f433d8b8f7..a95a81f435e 100644 --- a/test/CodeGen/X86/vector-trunc-packus.ll +++ b/test/CodeGen/X86/vector-trunc-packus.ll @@ -2392,37 +2392,13 @@ define <4 x i16> @trunc_packus_v4i32_v4i16(<4 x i32> %a0) { ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: trunc_packus_v4i32_v4i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: trunc_packus_v4i32_v4i16: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: trunc_packus_v4i32_v4i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i16: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BWVL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512BWVL-NEXT: retq +; AVX512-LABEL: trunc_packus_v4i32_v4i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: retq ; ; SKX-LABEL: trunc_packus_v4i32_v4i16: ; SKX: # %bb.0: -; SKX-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; SKX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <4 x i32> %a0, @@ -5731,34 +5707,13 @@ define <8 x i8> @trunc_packus_v8i16_v8i8(<8 x i16> %a0) { ; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: trunc_packus_v8i16_v8i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: trunc_packus_v8i16_v8i8: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: trunc_packus_v8i16_v8i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: trunc_packus_v8i16_v8i8: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BWVL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 -; AVX512BWVL-NEXT: retq +; AVX512-LABEL: trunc_packus_v8i16_v8i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: retq ; ; SKX-LABEL: trunc_packus_v8i16_v8i8: ; SKX: # %bb.0: -; SKX-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ; SKX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <8 x i16> %a0, diff --git a/test/CodeGen/X86/vector-trunc-ssat.ll b/test/CodeGen/X86/vector-trunc-ssat.ll index d15559e4031..d9d9c367cea 100644 --- a/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/test/CodeGen/X86/vector-trunc-ssat.ll @@ -2380,34 +2380,13 @@ define <4 x i16> @trunc_ssat_v4i32_v4i16(<4 x i32> %a0) { ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: trunc_ssat_v4i32_v4i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: trunc_ssat_v4i32_v4i16: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VL-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: trunc_ssat_v4i32_v4i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i16: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512BWVL-NEXT: retq +; AVX512-LABEL: trunc_ssat_v4i32_v4i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: retq ; ; SKX-LABEL: trunc_ssat_v4i32_v4i16: ; SKX: # %bb.0: -; SKX-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; SKX-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; SKX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <4 x i32> %a0, @@ -5620,32 +5599,13 @@ define <8 x i8> @trunc_ssat_v8i16_v8i8(<8 x i16> %a0) { ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: trunc_ssat_v8i16_v8i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: trunc_ssat_v8i16_v8i8: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: trunc_ssat_v8i16_v8i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: trunc_ssat_v8i16_v8i8: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX512BWVL-NEXT: retq +; AVX512-LABEL: trunc_ssat_v8i16_v8i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: retq ; ; SKX-LABEL: trunc_ssat_v8i16_v8i8: ; SKX: # %bb.0: -; SKX-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0 ; SKX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <8 x i16> %a0, -- 2.40.0