(v16i32 VR512:$src))),
(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
+multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
+ X86VectorVTInfo Wide> {
+ def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
+ Narrow.RC:$src1, Narrow.RC:$src0)),
+ (EXTRACT_SUBREG
+ (Wide.VT
+ (!cast<Instruction>(InstrStr#"rrk")
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
+ (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
+ Narrow.SubRegIdx)>;
+
+ def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
+ Narrow.RC:$src1, Narrow.ImmAllZerosV)),
+ (EXTRACT_SUBREG
+ (Wide.VT
+ (!cast<Instruction>(InstrStr#"rrkz")
+ (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
+ Narrow.SubRegIdx)>;
+}
+
// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
// available. Use a 512-bit operation and extract.
let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
- (v8f32 VR256X:$src0))),
- (EXTRACT_SUBREG
- (v16f32
- (VMOVAPSZrrk
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
- (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
- sub_ymm)>;
-
-def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
- (v8i32 VR256X:$src0))),
- (EXTRACT_SUBREG
- (v16i32
- (VMOVDQA32Zrrk
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
- (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
- sub_ymm)>;
+ defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
+ defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
}
let Predicates = [HasAVX512] in {
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
-; KNL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
-; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; KNL-NEXT: retq
;
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
-; KNL-NEXT: vpmovsxbd (%rdi), %ymm1
-; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; KNL-NEXT: vpmovsxbd (%rdi), %ymm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; KNL-NEXT: retq
;
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
-; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
-; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; KNL-NEXT: retq
;
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
-; KNL-NEXT: vpmovsxwd (%rdi), %ymm1
-; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; KNL-NEXT: vpmovsxwd (%rdi), %ymm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; KNL-NEXT: retq
;
; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
-; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; KNL-NEXT: retq
;