ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
return SDValue();
- // If this VSELECT has a vector if i1 as a mask, it will be directly matched
- // with patterns on the mask registers on AVX-512.
- if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1)
- return Op;
-
// Try to lower this to a blend-style vector shuffle. This can handle all
// constant condition cases.
if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG))
return BlendOp;
+ // If this VSELECT has a vector if i1 as a mask, it will be directly matched
+ // with patterns on the mask registers on AVX-512.
+ if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1)
+ return Op;
+
// Variable blends are only legal from SSE4.1 onward.
if (!Subtarget.hasSSE41())
return SDValue();
;
; SKX-LABEL: test_build_vec_v64i1:
; SKX: ## BB#0:
-; SKX-NEXT: movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544
-; SKX-NEXT: kmovq %rax, %k1
-; SKX-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test_build_vec_v64i1:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544
-; AVX512BW-NEXT: kmovq %rax, %k1
-; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_build_vec_v64i1:
define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
; CHECK-LABEL: test_build_vec_v64i1:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $6432645796886517060, %rax # imm = 0x5945594549549544
-; CHECK-NEXT: # sched: [1:0.25]
-; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00]
-; CHECK-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:1.00]
; CHECK-NEXT: retq # sched: [7:1.00]
%ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
ret <64 x i8> %ret
;
; X32-AVX512-LABEL: PR34577:
; X32-AVX512: # BB#0: # %entry
-; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm3 = <1,u,u,u,2,u,5,0>
-; X32-AVX512-NEXT: vpermps %ymm0, %ymm3, %ymm0
-; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm3 = <u,2,3,5,u,5,u,u>
-; X32-AVX512-NEXT: vpermps %ymm2, %ymm3, %ymm2
-; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5],ymm0[6,7]
-; X32-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX512-NEXT: movb $86, %al
-; X32-AVX512-NEXT: kmovw %eax, %k1
-; X32-AVX512-NEXT: vblendmps %zmm0, %zmm2, %zmm0 {%k1}
+; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0>
+; X32-AVX512-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; X32-AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; X32-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3]
; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
; X32-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1
;
; X64-AVX512-LABEL: PR34577:
; X64-AVX512: # BB#0: # %entry
-; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm3 = <1,u,u,u,2,u,5,0>
-; X64-AVX512-NEXT: vpermps %ymm0, %ymm3, %ymm0
-; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm3 = <u,2,3,5,u,5,u,u>
-; X64-AVX512-NEXT: vpermps %ymm2, %ymm3, %ymm2
-; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5],ymm0[6,7]
-; X64-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X64-AVX512-NEXT: movb $86, %al
-; X64-AVX512-NEXT: kmovw %eax, %k1
-; X64-AVX512-NEXT: vblendmps %zmm0, %zmm2, %zmm0 {%k1}
+; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0>
+; X64-AVX512-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; X64-AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; X64-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3]
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
; X64-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1