From a8f7dd91f2af0f28318ec7d232b41db13905d9cf Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 23 Aug 2017 20:03:43 +0000 Subject: [PATCH] Merging r311572: ------------------------------------------------------------------------ r311572 | ctopper | 2017-08-23 09:41:02 -0700 (Wed, 23 Aug 2017) | 9 lines [AVX512] Don't create SHRUNKBLEND SDNodes for 512-bit vectors There are no 512-bit blend instructions so we shouldn't create SHRUNKBLEND for them. On a side note, it looks like there may be a missed opportunity for constant folding TESTM when LHS and RHS are equal. This fixes PR34139. Differential Revision: https://reviews.llvm.org/D36992 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@311593 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 +++ test/CodeGen/X86/pr34139.ll | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 test/CodeGen/X86/pr34139.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9c9eccfbb65..193ee8de619 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -30629,6 +30629,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Byte blends are only available in AVX2 if (VT == MVT::v32i8 && !Subtarget.hasAVX2()) return SDValue(); + // There are no 512-bit blend instructions that use sign bits. + if (VT.is512BitVector()) + return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); APInt DemandedMask(APInt::getSignMask(BitWidth)); diff --git a/test/CodeGen/X86/pr34139.ll b/test/CodeGen/X86/pr34139.ll new file mode 100644 index 00000000000..c20c2cd510c --- /dev/null +++ b/test/CodeGen/X86/pr34139.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s + +define void @f_f(<16 x double>* %ptr) { +; CHECK-LABEL: f_f: +; CHECK: # BB#0: +; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa %xmm0, (%rax) +; CHECK-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: vmovapd (%rdi), %zmm1 +; CHECK-NEXT: vmovapd 64(%rdi), %zmm2 +; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1 +; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovapd %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vmovapd %zmm2, 64(%rdi) +; CHECK-NEXT: vmovapd %zmm1, (%rdi) + store <16 x i8> , <16 x i8>* undef + %load_mask8.i.i.i = load <16 x i8>, <16 x i8>* undef + %v.i.i.i.i = load <16 x double>, <16 x double>* %ptr + %mask_vec_i1.i.i.i51.i.i = icmp ne <16 x i8> %load_mask8.i.i.i, zeroinitializer + %v1.i.i.i.i = select <16 x i1> %mask_vec_i1.i.i.i51.i.i, <16 x double> undef, <16 x double> %v.i.i.i.i + store <16 x double> %v1.i.i.i.i, <16 x double>* %ptr + unreachable +} -- 2.49.0