[X86] Mask off upper bits of splat element in LowerBUILD_VECTORvXi1 when forming...

author Craig Topper <craig.topper@intel.com>

Mon, 30 Sep 2019 18:43:44 +0000 (18:43 +0000)

committer Craig Topper <craig.topper@intel.com>

Mon, 30 Sep 2019 18:43:44 +0000 (18:43 +0000)
author Craig Topper <craig.topper@intel.com>
Mon, 30 Sep 2019 18:43:44 +0000 (18:43 +0000)
committer Craig Topper <craig.topper@intel.com>
Mon, 30 Sep 2019 18:43:44 +0000 (18:43 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 4dc8027d0bd9f983b55bd74a3fba41dd4a89ca29..c4794299e850785fc5ca37b98799ec7966f25ba7 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8459,10 +8459,20 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
    }
  
    // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
-  if (IsSplat)
-    return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx),
+  if (IsSplat) {
+    // The build_vector allows the scalar element to be larger than the vector
+    // element type. We need to mask it to use as a condition unless we know
+    // the upper bits are zero.
+    // FIXME: Use computeKnownBits instead of checking specific opcode?
+    SDValue Cond = Op.getOperand(SplatIdx);
+    assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!");
+    if (Cond.getOpcode() != ISD::SETCC)
+      Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond,
+                         DAG.getConstant(1, dl, MVT::i8));
+    return DAG.getSelect(dl, VT, Cond,
                           DAG.getConstant(1, dl, VT),
                           DAG.getConstant(0, dl, VT));
+  }
  
    // insert elements one by one
    SDValue DstVec;
diff --git a/test/CodeGen/X86/avx512-calling-conv.ll b/test/CodeGen/X86/avx512-calling-conv.ll

index 8901cee464630b46e5f21a753aaeed3345121d69..5fb114b3523ae66a7475a45bfbd4f1b629edfc80 100644 (file)
--- a/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/test/CodeGen/X86/avx512-calling-conv.ll
@@ -729,12 +729,12 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
  ; KNL-NEXT:    korw %k2, %k0, %k0
  ; KNL-NEXT:    kandw %k1, %k0, %k0
  ; KNL-NEXT:    xorl %ecx, %ecx
-; KNL-NEXT:    cmpb $0, {{[0-9]+}}(%rsp)
+; KNL-NEXT:    testb $1, {{[0-9]+}}(%rsp)
  ; KNL-NEXT:    movl $65535, %edx ## imm = 0xFFFF
  ; KNL-NEXT:    movl $0, %esi
  ; KNL-NEXT:    cmovnel %edx, %esi
  ; KNL-NEXT:    kmovw %esi, %k1
-; KNL-NEXT:    cmpb $0, {{[0-9]+}}(%rsp)
+; KNL-NEXT:    testb $1, {{[0-9]+}}(%rsp)
  ; KNL-NEXT:    cmovnel %edx, %ecx
  ; KNL-NEXT:    kmovw %ecx, %k2
  ; KNL-NEXT:    kandw %k1, %k2, %k1
@@ -1314,11 +1314,11 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
  ; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
  ; KNL_X32-NEXT:    korw %k2, %k1, %k1
  ; KNL_X32-NEXT:    xorl %eax, %eax
-; KNL_X32-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
  ; KNL_X32-NEXT:    movl $65535, %ecx ## imm = 0xFFFF
  ; KNL_X32-NEXT:    movl $0, %edx
  ; KNL_X32-NEXT:    cmovnel %ecx, %edx
-; KNL_X32-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
  ; KNL_X32-NEXT:    cmovnel %ecx, %eax
  ; KNL_X32-NEXT:    kandw %k0, %k1, %k0
  ; KNL_X32-NEXT:    kmovw %edx, %k1
diff --git a/test/CodeGen/X86/pr43507.ll b/test/CodeGen/X86/pr43507.ll

new file mode 100644 (file)

index 0000000..ec18d3c
--- /dev/null
+++ b/test/CodeGen/X86/pr43507.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s
+
+define <8 x i1> @ham(i64 %arg) {
+; CHECK-LABEL: ham:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb $1, %dil
+; CHECK-NEXT:    movl $255, %ecx
+; CHECK-NEXT:    cmovel %eax, %ecx
+; CHECK-NEXT:    kmovd %ecx, %k0
+; CHECK-NEXT:    vpmovm2w %k0, %xmm0
+; CHECK-NEXT:    retq
+  %tmp = trunc i64 %arg to i1
+  %tmp1 = insertelement <8 x i1> undef, i1 %tmp, i32 0
+  %tmp2 = shufflevector <8 x i1> %tmp1, <8 x i1> undef, <8 x i32> zeroinitializer
+  ret <8 x i1> %tmp2
+}
author	Craig Topper <craig.topper@intel.com>
	Mon, 30 Sep 2019 18:43:44 +0000 (18:43 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Mon, 30 Sep 2019 18:43:44 +0000 (18:43 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/avx512-calling-conv.ll		patch \| blob \| history
test/CodeGen/X86/pr43507.ll	[new file with mode: 0644]	patch \| blob