From: Craig Topper Date: Tue, 14 Jun 2016 03:13:00 +0000 (+0000) Subject: [AVX512] Use MOVZX32 instead of MOVZ16 for loading single v8/v4/v2/v1 masks when... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=62458bf56e88102317b255372459ccf14c0bced1;p=llvm [AVX512] Use MOVZX32 instead of MOVZ16 for loading single v8/v4/v2/v1 masks when KMOVB is not available. This has better behavior with respect to partial register stalls since it won't need to preserve the upper 16-bits of the GPR. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272626 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 8419448bd90..bea6e622d27 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2050,18 +2050,18 @@ let Predicates = [HasAVX512, NoDQI] in { sub_8bit))>; def : Pat<(v8i1 (load addr:$src)), - (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK8)>; + (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; def : Pat<(v2i1 (load addr:$src)), - (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK2)>; + (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>; def : Pat<(v4i1 (load addr:$src)), - (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK4)>; + (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>; } let Predicates = [HasAVX512] in { def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst), (KMOVWmk addr:$dst, VK16:$src)>; def : Pat<(i1 (load addr:$src)), - (COPY_TO_REGCLASS (AND16ri (MOVZX16rm8 addr:$src), (i16 1)), VK1)>; + (COPY_TO_REGCLASS (AND32ri (MOVZX32rm8 addr:$src), (i32 1)), VK1)>; def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))), (KMOVWkm addr:$src)>; } diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 6c621f3f884..308673bc395 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -200,7 +200,7 @@ define i16 @test15(i1 *%addr) { } ;CHECK-LABEL: test16 -;CHECK: movzbw (%rdi), %ax +;CHECK: movzbl (%rdi), %eax ;CHECK: kmovw ;CHECK: kshiftlw $10 ;CHECK: korw @@ -214,8 +214,8 @@ define i16 @test16(i1 *%addr, i16 %a) { } ;CHECK-LABEL: test17 -;KNL: movzbw (%rdi), %ax -;KNL: andw $1, %ax +;KNL: movzbl (%rdi), %eax +;KNL: andl $1, %eax ;KNL: kshiftlw $4 ;KNL: korw ;SKX: kshiftlb $4 diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index b8c3c73cb04..f935270d767 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -88,7 +88,7 @@ define void @mask16_mem(i16* %ptr) { define void @mask8_mem(i8* %ptr) { ; KNL-LABEL: mask8_mem: ; KNL: ## BB#0: -; KNL-NEXT: movzbw (%rdi), %ax +; KNL-NEXT: movzbl (%rdi), %eax ; KNL-NEXT: kmovw %eax, %k0 ; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -1341,7 +1341,7 @@ End: define <8 x i64> @load_8i1(<8 x i1>* %a) { ; KNL-LABEL: load_8i1: ; KNL: ## BB#0: -; KNL-NEXT: movzbw (%rdi), %ax +; KNL-NEXT: movzbl (%rdi), %eax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; KNL-NEXT: retq @@ -1376,7 +1376,7 @@ define <16 x i32> @load_16i1(<16 x i1>* %a) { define <2 x i16> @load_2i1(<2 x i1>* %a) { ; KNL-LABEL: load_2i1: ; KNL: ## BB#0: -; KNL-NEXT: movzbw (%rdi), %ax +; KNL-NEXT: movzbl (%rdi), %eax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; KNL-NEXT: retq @@ -1394,7 +1394,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) { define <4 x i16> @load_4i1(<4 x i1>* %a) { ; KNL-LABEL: load_4i1: ; KNL: ## BB#0: -; KNL-NEXT: movzbw (%rdi), %ax +; KNL-NEXT: movzbl (%rdi), %eax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; KNL-NEXT: vpmovqd %zmm0, %ymm0 diff --git a/test/CodeGen/X86/avx512-select.ll b/test/CodeGen/X86/avx512-select.ll index 10da8facbfe..6a4ec4af703 100644 --- a/test/CodeGen/X86/avx512-select.ll +++ b/test/CodeGen/X86/avx512-select.ll @@ -84,9 +84,9 @@ define i8 @select05(i8 %a.0, i8 %m) { define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) { ; CHECK-LABEL: select05_mem: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbw (%rsi), %ax +; CHECK-NEXT: movzbl (%rsi), %eax ; CHECK-NEXT: kmovw %eax, %k0 -; CHECK-NEXT: movzbw (%rdi), %ax +; CHECK-NEXT: movzbl (%rdi), %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: korw %k1, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax @@ -114,9 +114,9 @@ define i8 @select06(i8 %a.0, i8 %m) { define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) { ; CHECK-LABEL: select06_mem: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbw (%rsi), %ax +; CHECK-NEXT: movzbl (%rsi), %eax ; CHECK-NEXT: kmovw %eax, %k0 -; CHECK-NEXT: movzbw (%rdi), %ax +; CHECK-NEXT: movzbl (%rdi), %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: kandw %k1, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 9f8ff6add3e..6ebb2185d03 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -291,7 +291,7 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) { ; KNL_32-LABEL: test7: ; KNL_32: # BB#0: ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: movzbw {{[0-9]+}}(%esp), %cx +; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; KNL_32-NEXT: kmovw %ecx, %k1 ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 ; KNL_32-NEXT: kmovw %k1, %k2