// will zero the upper bits.
// TODO: Is there a safe way to detect whether the producing instruction
// already zeroed the upper bits?
+
+ // 128->512 register form.
+ def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v2f64 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVAPDrr VR128:$src), sub_xmm)>;
+ def : Pat<(v16f32 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v4f32 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+ def : Pat<(v8i64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v2i64 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>;
+ def : Pat<(v16i32 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v4i32 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>;
+ def : Pat<(v32i16 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v8i16 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>;
+ def : Pat<(v64i8 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v16i8 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>;
+
+ // 128->512 memory form.
+ def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (loadv2f64 addr:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVAPDrm addr:$src), sub_xmm)>;
+ def : Pat<(v16f32 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (loadv4f32 addr:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVAPSrm addr:$src), sub_xmm)>;
+ def : Pat<(v8i64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (loadv2i64 addr:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>;
+ def : Pat<(v16i32 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (bc_v4i32 (loadv2i64 addr:$src)),
+ (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>;
+ def : Pat<(v32i16 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (bc_v8i16 (loadv2i64 addr:$src)),
+ (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>;
+ def : Pat<(v64i8 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (bc_v16i8 (loadv2i64 addr:$src)),
+ (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>;
+
+ // 256->512 register form.
def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
(v4f64 VR256:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0), (VMOVAPDYrr VR256:$src), sub_ymm)>;
(v32i8 VR256:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0), (VMOVDQAYrr VR256:$src), sub_ymm)>;
+ // 256->512 memory form.
def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
(loadv4f64 addr:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0), (VMOVAPDYrm addr:$src), sub_ymm)>;
; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL-NEXT: vpsraq $63, %zmm1, %zmm1
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
+; KNL-NEXT: vmovdqa %xmm1, %xmm1
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL-NEXT: vpslld $31, %xmm1, %xmm1
; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
+; KNL-NEXT: vmovdqa %xmm1, %xmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1}
; KNL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; KNL-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
+; KNL-NEXT: vmovaps %xmm1, %xmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: vexpandps (%rdi), %zmm0 {%k1}
; KNL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; KNL-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
+; KNL-NEXT: vmovaps %xmm1, %xmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1}
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
-; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
-; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-LABEL: test21:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
-; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; KNL_64-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
+; KNL_64-NEXT: vmovdqa %xmm2, %xmm2
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-LABEL: test21:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
-; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; KNL_32-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
+; KNL_32-NEXT: vmovdqa %xmm2, %xmm2
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
-; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
-; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm1, %zmm1
+; KNL_32-NEXT: vmovdqa {{.*#+}} xmm1 = [1,0,1,0]
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
-; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
-; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2
+; KNL_32-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,1,0]
; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; KNL_32-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2
+; KNL_32-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,1,0]
; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}