// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
// vpcmpeqd on 256-bit vectors.
if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
- if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget.hasInt256()))
+ if (VT == MVT::v4i32 || VT == MVT::v16i32 ||
+ (VT == MVT::v8i32 && Subtarget.hasInt256()))
return Op;
- if (!VT.is512BitVector())
- return getOnesVector(VT, Subtarget, DAG, DL);
+ return getOnesVector(VT, Subtarget, DAG, DL);
}
return SDValue();
isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllZerosV))]>;
+def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
+ [(set VR512:$dst, (v16i32 immAllOnesV))]>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
+ case X86::AVX512_512_SETALLONES: {
+ unsigned Reg = MIB->getOperand(0).getReg();
+ MIB->setDesc(get(X86::VPTERNLOGDZrri));
+ // VPTERNLOGD needs 3 register inputs and an immediate.
+ // 0xff will return 1s for any input.
+ MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef).addImm(0xff);
+ return true;
+ }
case X86::TEST8ri_NOREX:
MI.setDesc(get(X86::TEST8ri));
return true;
else
switch (LoadMI.getOpcode()) {
case X86::AVX512_512_SET0:
+ case X86::AVX512_512_SETALLONES:
Alignment = 64;
break;
case X86::AVX2_SETALLONES:
case X86::AVX512_128_SET0:
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0:
+ case X86::AVX512_512_SETALLONES:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
Ty = Type::getFloatTy(MF.getFunction()->getContext());
else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
- else if (Opc == X86::AVX512_512_SET0)
+ else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
Opc == X86::AVX512_256_SET0)
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES);
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
+ Opc == X86::AVX512_512_SETALLONES);
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
define <16 x i32> @test2(<16 x i32> %x) {
; CHECK-LABEL: test2:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
ret <16 x i32>%res
; CHECK: ## BB#0:
; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vmovss %xmm0, %xmm2, %xmm0
-; CHECK-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm1[1,0],xmm0[0,1]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
; KNL_X32-NEXT: vpslld $31, %zmm0, %zmm0
; KNL_X32-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_X32-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
-; KNL_X32-NEXT: vpbroadcastd LCPI1_0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL_X32-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
; KNL_X32-NEXT: retl
%c = and <16 x i1>%a, %b
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1}
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: retq
;
; KNL_X32-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL_X32-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1}
-; KNL_X32-NEXT: vpbroadcastd LCPI2_1, %zmm0
+; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; KNL_X32-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovqw %zmm0, %xmm0
; KNL_X32-NEXT: retl
; KNL-NEXT: Ltmp1:
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: callq _func16xi1
; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL_X32-NEXT: Ltmp1:
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; KNL_X32-NEXT: vpbroadcastd LCPI5_0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL_X32-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
; KNL_X32-NEXT: calll _func16xi1
; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: movb $85, %al
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1}
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
; KNL_X32-NEXT: movb $85, %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1}
-; KNL_X32-NEXT: vpbroadcastd LCPI7_1, %zmm0
+; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; KNL_X32-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovqw %zmm0, %xmm0
; KNL_X32-NEXT: addl $12, %esp
; KNL: ## BB#0:
; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vcvtdq2ps %zmm0, %zmm0
; KNL-NEXT: retq
;
; KNL: ## BB#0:
; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0
; KNL-NEXT: retq
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
; KNL-NEXT: knotw %k0, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: retq
;
; KNL-LABEL: sext_16i1_16i32:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: sext_16i1_16i32:
; KNL-NEXT: LBB17_1:
; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
; KNL-NEXT: LBB17_3:
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
; KNL-NEXT: LBB18_3:
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
; KNL-NEXT: movw $1, %cx
; KNL-NEXT: cmovgw %ax, %cx
; KNL-NEXT: kmovw %ecx, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
; KNL-NEXT: movl %edi, (%rsp)
; KNL-NEXT: shrq $32, %rdi
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; KNL-NEXT: movl {{.*}}(%rip), %eax
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; KNL-NEXT: kmovw (%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm0, %xmm0
-; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; KNL-NEXT: movl $1, %ecx
-; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
-; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2
+; KNL-NEXT: movl $1, %eax
+; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7]
+; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm2 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
+; KNL-NEXT: vpsllw $7, %ymm2, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; KNL-NEXT: movl %edi, (%rsp)
; KNL-NEXT: shrq $32, %rdi
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; KNL-NEXT: movl {{.*}}(%rip), %eax
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; KNL-NEXT: kmovw (%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; KNL-NEXT: xorl %ecx, %ecx
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
+; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: cmpl %edx, %esi
-; KNL-NEXT: setg %cl
-; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
-; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; KNL-NEXT: setg %al
+; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm2 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $7, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: retq
;
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: load_8i1:
; KNL-LABEL: load_16i1:
; KNL: ## BB#0:
; KNL-NEXT: kmovw (%rdi), %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: load_16i1:
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
; KNL-LABEL: load_32i1:
; KNL: ## BB#0:
; KNL-NEXT: kmovw (%rdi), %k1
-; KNL-NEXT: movl {{.*}}(%rip), %eax
-; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: kmovw 2(%rdi), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm1, %ymm1
; KNL-NEXT: retq
;
; KNL-LABEL: load_64i1:
; KNL: ## BB#0:
; KNL-NEXT: kmovw (%rdi), %k1
-; KNL-NEXT: movl {{.*}}(%rip), %eax
-; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: kmovw 2(%rdi), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
+; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; KNL-NEXT: kmovw 4(%rdi), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: kmovw 6(%rdi), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm2 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT: kmovw 6(%rdi), %k1
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; KNL-NEXT: retq
;
; SKX-LABEL: load_64i1:
; ALL: # BB#0: # %entry
; ALL-NEXT: vpxord %zmm0, %zmm0, %zmm0
; ALL-NEXT: vcmpunordps %zmm0, %zmm0, %k1
-; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; ALL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; ALL-NEXT: knotw %k1, %k1
; ALL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; ALL-NEXT: retq
; KNL-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
; KNL-NEXT: kxnorw %k1, %k0, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: retq
;
; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; KNL-NEXT: vpcmpgtd %zmm3, %zmm2, %k1
; KNL-NEXT: kxorw %k1, %k0, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
; AVX512F-NEXT: ## BB#31: ## %cond.load43
; AVX512F-NEXT: vpinsrb $15, 15(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: LBB50_32: ## %else44
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
; AVX512F-NEXT: ## BB#15: ## %cond.load19
; AVX512F-NEXT: vpinsrw $7, 14(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: LBB53_16: ## %else20
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm1, %xmm1
; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpxor %xmm2, %xmm1, %xmm2
; AVX512F-NEXT: vpinsrw $7, 30(%rdi), %xmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: LBB54_32: ## %else44
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
; AVX512-LABEL: test_cmp_v8f64:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ogt <8 x double> %a0, %a1
; AVX512-LABEL: test_cmp_v16f32:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ogt <16 x float> %a0, %a1
; AVX512-LABEL: test_cmp_v8i64:
; AVX512: # BB#0:
; AVX512-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = icmp sgt <8 x i64> %a0, %a1
; AVX512-LABEL: test_cmp_v16i32:
; AVX512: # BB#0:
; AVX512-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = icmp sgt <16 x i32> %a0, %a1
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512-LABEL: load_sext_16i1_to_16i8:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: kmovw (%rdi), %k1
-; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512-LABEL: load_sext_16i1_to_16i16:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: kmovw (%rdi), %k1
-; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512-LABEL: load_sext_32i1_to_32i8:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: kmovw (%rdi), %k1
-; AVX512-NEXT: movl {{.*}}(%rip), %eax
-; AVX512-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
-; AVX512-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512-NEXT: kmovw 2(%rdi), %k1
-; AVX512-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: kmovw 2(%rdi), %k1
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512-NEXT: retq
;
; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
; CHECK-LABEL: combine_pshufb_identity_mask:
; CHECK: # BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vmovdqu8 {{.*#+}} zmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; CHECK-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; CHECK-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
-; AVX512F-NEXT: movq {{.*}}(%rip), %rax
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
-; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,1,0,3,7,7,0]
+; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
; AVX512F-NEXT: vpcmpeqd %zmm3, %zmm1, %k2
-; AVX512F-NEXT: movl {{.*}}(%rip), %eax
-; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k2} {z}
-; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
-; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
-; AVX512F-NEXT: vpermt2d %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k2} {z}
+; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
+; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermt2d %zmm1, %zmm3, %zmm2
+; AVX512F-NEXT: vpslld $31, %zmm2, %zmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: movq {{.*}}(%rip), %rax
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0
-; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
-; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm1
+; AVX512F-NEXT: vpbroadcastq %xmm1, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512F-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
; AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2
; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
; AVX512F-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: movq {{.*}}(%rip), %rax
-; AVX512F-NEXT: movb $51, %cl
-; AVX512F-NEXT: kmovw %ecx, %k2
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k2} {z}
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: movb $51, %al
+; AVX512F-NEXT: kmovw %eax, %k2
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
-; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
-; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,1,2,3,4,5,6,7]
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: vpmovw2m %xmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
-; VL_BW_DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
+; VL_BW_DQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
; VL_BW_DQ-NEXT: vpsllq $63, %zmm2, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: andq $-32, %rsp
; AVX512F-NEXT: subq $96, %rsp
; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1