From: Craig Topper <craig.topper@intel.com>
Date: Thu, 19 Sep 2019 06:50:39 +0000 (+0000)
Subject: [X86] Prevent crash in LowerBUILD_VECTORvXi1 for v64i1 vectors on 32-bit targets... 
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5476168cf0ec21551085fe4e9d2937a1455c954a;p=llvm

[X86] Prevent crash in LowerBUILD_VECTORvXi1 for v64i1 vectors on 32-bit targets when the vector is a mix of constants and non-constant.

We need to materialize the constants as two 32-bit values that
are casted to v32i1 and then concatenated.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372304 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a41cde23785..e34998b59f2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8547,12 +8547,20 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
   // insert elements one by one
   SDValue DstVec;
   if (HasConstElts) {
-    MVT ImmVT = MVT::getIntegerVT(std::max(VT.getSizeInBits(), 8U));
-    SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT);
-    MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;
-    DstVec = DAG.getBitcast(VecVT, Imm);
-    DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec,
-                         DAG.getIntPtrConstant(0, dl));
+    if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
+      SDValue ImmL = DAG.getConstant(Lo_32(Immediate), dl, MVT::i32);
+      SDValue ImmH = DAG.getConstant(Hi_32(Immediate), dl, MVT::i32);
+      ImmL = DAG.getBitcast(MVT::v32i1, ImmL);
+      ImmH = DAG.getBitcast(MVT::v32i1, ImmH);
+      DstVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, ImmL, ImmH);
+    } else {
+      MVT ImmVT = MVT::getIntegerVT(std::max(VT.getSizeInBits(), 8U));
+      SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT);
+      MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;
+      DstVec = DAG.getBitcast(VecVT, Imm);
+      DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec,
+                           DAG.getIntPtrConstant(0, dl));
+    }
   } else
     DstVec = DAG.getUNDEF(VT);
 
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll
index a28816dbdbd..ea9742a5762 100644
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -5068,3 +5068,80 @@ bar:
 exit:
   ret void
 }
+
+define <64 x i1> @mask64_insert(i32 %a) {
+; KNL-LABEL: mask64_insert:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    movq %rdi, %rax
+; KNL-NEXT:    movw $-4, %cx
+; KNL-NEXT:    kmovw %ecx, %k0
+; KNL-NEXT:    kshiftrw $1, %k0, %k0
+; KNL-NEXT:    kshiftlw $1, %k0, %k0
+; KNL-NEXT:    andl $1, %esi
+; KNL-NEXT:    kmovw %esi, %k1
+; KNL-NEXT:    korw %k1, %k0, %k0
+; KNL-NEXT:    kmovw %k0, (%rdi)
+; KNL-NEXT:    movw $-3, 6(%rdi)
+; KNL-NEXT:    movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: mask64_insert:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    kmovd %edi, %k0
+; SKX-NEXT:    kshiftlq $63, %k0, %k0
+; SKX-NEXT:    kshiftrq $63, %k0, %k0
+; SKX-NEXT:    movabsq $-562958543486980, %rax ## imm = 0xFFFDFFFDFFFDFFFC
+; SKX-NEXT:    kmovq %rax, %k1
+; SKX-NEXT:    kshiftrq $1, %k1, %k1
+; SKX-NEXT:    kshiftlq $1, %k1, %k1
+; SKX-NEXT:    korq %k0, %k1, %k0
+; SKX-NEXT:    vpmovm2b %k0, %zmm0
+; SKX-NEXT:    retq
+;
+; AVX512BW-LABEL: mask64_insert:
+; AVX512BW:       ## %bb.0:
+; AVX512BW-NEXT:    kmovd %edi, %k0
+; AVX512BW-NEXT:    kshiftlq $63, %k0, %k0
+; AVX512BW-NEXT:    kshiftrq $63, %k0, %k0
+; AVX512BW-NEXT:    movabsq $-562958543486980, %rax ## imm = 0xFFFDFFFDFFFDFFFC
+; AVX512BW-NEXT:    kmovq %rax, %k1
+; AVX512BW-NEXT:    kshiftrq $1, %k1, %k1
+; AVX512BW-NEXT:    kshiftlq $1, %k1, %k1
+; AVX512BW-NEXT:    korq %k0, %k1, %k0
+; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: mask64_insert:
+; AVX512DQ:       ## %bb.0:
+; AVX512DQ-NEXT:    movq %rdi, %rax
+; AVX512DQ-NEXT:    movw $-4, %cx
+; AVX512DQ-NEXT:    kmovw %ecx, %k0
+; AVX512DQ-NEXT:    kshiftrw $1, %k0, %k0
+; AVX512DQ-NEXT:    kshiftlw $1, %k0, %k0
+; AVX512DQ-NEXT:    andl $1, %esi
+; AVX512DQ-NEXT:    kmovw %esi, %k1
+; AVX512DQ-NEXT:    korw %k1, %k0, %k0
+; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
+; AVX512DQ-NEXT:    movw $-3, 6(%rdi)
+; AVX512DQ-NEXT:    movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
+; AVX512DQ-NEXT:    retq
+;
+; X86-LABEL: mask64_insert:
+; X86:       ## %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
+; X86-NEXT:    movl $-131076, %eax ## imm = 0xFFFDFFFC
+; X86-NEXT:    kmovd %eax, %k1
+; X86-NEXT:    movl $-131075, %eax ## imm = 0xFFFDFFFD
+; X86-NEXT:    kmovd %eax, %k2
+; X86-NEXT:    kunpckdq %k1, %k2, %k1
+; X86-NEXT:    kshiftrq $1, %k1, %k1
+; X86-NEXT:    kshiftlq $1, %k1, %k1
+; X86-NEXT:    kshiftlq $63, %k0, %k0
+; X86-NEXT:    kshiftrq $63, %k0, %k0
+; X86-NEXT:    korq %k0, %k1, %k0
+; X86-NEXT:    vpmovm2b %k0, %zmm0
+; X86-NEXT:    retl
+  %a_i = trunc i32 %a to i1
+  %maskv = insertelement <64 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
+  ret <64 x i1> %maskv
+}