From: Craig Topper Date: Sun, 3 Sep 2017 22:25:50 +0000 (+0000) Subject: [X86] Add more patterns to use moves to zero the upper portions of a vector register... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=03f273f10e119a37c39947cff5f1b6bacc56c7b6;p=llvm [X86] Add more patterns to use moves to zero the upper portions of a vector register that I missed in r312450. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312459 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index b1778329fba..e4e3be33932 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4044,6 +4044,51 @@ let Predicates = [HasAVX512, NoVLX] in { // will zero the upper bits. // TODO: Is there a safe way to detect whether the producing instruction // already zeroed the upper bits? + + // 128->512 register form. + def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (v2f64 VR128:$src), (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVAPDrr VR128:$src), sub_xmm)>; + def : Pat<(v16f32 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (v4f32 VR128:$src), (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVAPSrr VR128:$src), sub_xmm)>; + def : Pat<(v8i64 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (v2i64 VR128:$src), (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>; + def : Pat<(v16i32 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (v4i32 VR128:$src), (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>; + def : Pat<(v32i16 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (v8i16 VR128:$src), (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>; + def : Pat<(v64i8 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (v16i8 VR128:$src), (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>; + + // 128->512 memory form. + def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (loadv2f64 addr:$src), (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVAPDrm addr:$src), sub_xmm)>; + def : Pat<(v16f32 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (loadv4f32 addr:$src), (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVAPSrm addr:$src), sub_xmm)>; + def : Pat<(v8i64 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (loadv2i64 addr:$src), (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>; + def : Pat<(v16i32 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (bc_v4i32 (loadv2i64 addr:$src)), + (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>; + def : Pat<(v32i16 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (bc_v8i16 (loadv2i64 addr:$src)), + (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>; + def : Pat<(v64i8 (insert_subvector (bitconvert (v16i32 immAllZerosV)), + (bc_v16i8 (loadv2i64 addr:$src)), + (iPTR 0))), + (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>; + + // 256->512 register form. def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)), (v4f64 VR256:$src), (iPTR 0))), (SUBREG_TO_REG (i64 0), (VMOVAPDYrr VR256:$src), sub_ymm)>; @@ -4063,6 +4108,7 @@ let Predicates = [HasAVX512, NoVLX] in { (v32i8 VR256:$src), (iPTR 0))), (SUBREG_TO_REG (i64 0), (VMOVDQAYrr VR256:$src), sub_ymm)>; + // 256->512 memory form. def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)), (loadv4f64 addr:$src), (iPTR 0))), (SUBREG_TO_REG (i64 0), (VMOVAPDYrm addr:$src), sub_ymm)>; diff --git a/test/CodeGen/X86/compress_expand.ll b/test/CodeGen/X86/compress_expand.ll index 81d36f2f54a..c6a1c07922e 100644 --- a/test/CodeGen/X86/compress_expand.ll +++ b/test/CodeGen/X86/compress_expand.ll @@ -226,8 +226,7 @@ define void @test11(i64* %base, <2 x i64> %V, <2 x i1> %mask) { ; KNL-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; KNL-NEXT: vpsllq $63, %xmm1, %xmm1 ; KNL-NEXT: vpsraq $63, %zmm1, %zmm1 -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1 +; KNL-NEXT: vmovdqa %xmm1, %xmm1 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1} @@ -249,8 +248,7 @@ define void @test12(float* %base, <4 x float> %V, <4 x i1> %mask) { ; KNL-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 ; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1 +; KNL-NEXT: vmovdqa %xmm1, %xmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1} @@ -275,8 +273,7 @@ define <2 x float> @test13(float* %base, <2 x float> %src0, <2 x i32> %trigger) ; KNL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] ; KNL-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1 +; KNL-NEXT: vmovaps %xmm1, %xmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL-NEXT: vexpandps (%rdi), %zmm0 {%k1} @@ -303,8 +300,7 @@ define void @test14(float* %base, <2 x float> %V, <2 x i32> %trigger) { ; KNL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] ; KNL-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1 +; KNL-NEXT: vmovaps %xmm1, %xmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1} diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 9e58f1dcf74..e66e44e0f76 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -883,8 +883,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x ; KNL_64: # BB#0: ; KNL_64-NEXT: # kill: %XMM2 %XMM2 %ZMM2 ; KNL_64-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 +; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 ; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} @@ -896,8 +895,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x ; KNL_32: # BB#0: ; KNL_32-NEXT: # kill: %XMM2 %XMM2 %ZMM2 ; KNL_32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 +; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 @@ -1084,8 +1082,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) { ; KNL_64-LABEL: test21: ; KNL_64: # BB#0: ; KNL_64-NEXT: # kill: %XMM1 %XMM1 %ZMM1 -; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL_64-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2 +; KNL_64-NEXT: vmovdqa %xmm2, %xmm2 ; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2 ; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1 @@ -1096,8 +1093,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) { ; KNL_32-LABEL: test21: ; KNL_32: # BB#0: ; KNL_32-NEXT: # kill: %XMM1 %XMM1 %ZMM1 -; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL_32-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2 +; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 ; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 @@ -1250,8 +1246,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; KNL_64: # BB#0: ; KNL_64-NEXT: # kill: %XMM2 %XMM2 %ZMM2 ; KNL_64-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 +; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 ; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1} @@ -1263,8 +1258,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; KNL_32: # BB#0: ; KNL_32-NEXT: # kill: %XMM2 %XMM2 %ZMM2 ; KNL_32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 +; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 @@ -1312,8 +1306,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { ; KNL_32: # BB#0: ; KNL_32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm1, %zmm1 +; KNL_32-NEXT: vmovdqa {{.*#+}} xmm1 = [1,0,1,0] ; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} @@ -1346,8 +1339,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> % ; KNL_64: # BB#0: ; KNL_64-NEXT: # kill: %XMM2 %XMM2 %ZMM2 ; KNL_64-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 +; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 ; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1} @@ -1359,8 +1351,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> % ; KNL_32: # BB#0: ; KNL_32-NEXT: # kill: %XMM2 %XMM2 %ZMM2 ; KNL_32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 +; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 @@ -1408,8 +1399,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) { ; KNL_32-NEXT: # kill: %XMM1 %XMM1 %ZMM1 ; KNL_32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2 +; KNL_32-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,1,0] ; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 ; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} @@ -1500,8 +1490,7 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) { ; KNL_32: # BB#0: ; KNL_32-NEXT: # kill: %XMM1 %XMM1 %ZMM1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2 +; KNL_32-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,1,0] ; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 ; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}