From: Craig Topper Date: Tue, 14 Jun 2016 03:12:54 +0000 (+0000) Subject: [AVX512] Add patterns for zero-extending a mask that use the def of KMOVW/KMOVB witho... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=51ad7064a4a4d50d8773068be3a29ed0af5823a0;p=llvm [AVX512] Add patterns for zero-extending a mask that use the def of KMOVW/KMOVB without going through an EXTRACT_SUBREG and a MOVZX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272625 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index c3f9079f398..8419448bd90 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1992,12 +1992,16 @@ let Predicates = [HasDQI] in { (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>; def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>; + def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), + (KMOVBrk VK8:$src)>; } let Predicates = [HasAVX512] in { def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>; def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>; + def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), + (KMOVWrk VK16:$src)>; } let Predicates = [HasBWI] in { def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>; @@ -2137,6 +2141,8 @@ let Predicates = [HasAVX512, NoDQI] in { (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), sub_8bit)>; + def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), + (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16))>; } let Predicates = [HasAVX512] in { diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index 595e34cfa19..b8c3c73cb04 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -21,7 +21,6 @@ define i32 @mask16_zext(i16 %x) { ; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: knotw %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: movzwl %ax, %eax ; CHECK-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, @@ -56,7 +55,6 @@ define i32 @mask8_zext(i8 %x) { ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retq ; ; SKX-LABEL: mask8_zext: @@ -64,7 +62,6 @@ define i32 @mask8_zext(i8 %x) { ; SKX-NEXT: kmovb %edi, %k0 ; SKX-NEXT: knotb %k0, %k0 ; SKX-NEXT: kmovb %k0, %eax -; SKX-NEXT: movzbl %al, %eax ; SKX-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, diff --git a/test/CodeGen/X86/combine-testm-and.ll b/test/CodeGen/X86/combine-testm-and.ll index 76f542f8445..2b95a114540 100644 --- a/test/CodeGen/X86/combine-testm-and.ll +++ b/test/CodeGen/X86/combine-testm-and.ll @@ -6,7 +6,6 @@ define i32 @combineTESTM_AND_1(<8 x i64> %a, <8 x i64> %b) { ; CHECK: ## BB#0: ; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 ; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retq %and.i = and <8 x i64> %b, %a %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 -1) @@ -20,7 +19,6 @@ define i32 @combineTESTM_AND_2(<8 x i64> %a, <8 x i64> %b , i8 %mask) { ; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1} ; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retq %and.i = and <8 x i64> %b, %a %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask) @@ -34,7 +32,6 @@ define i32 @combineTESTM_AND_mask_3(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) { ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retq %b = load <8 x i64>, <8 x i64>* %bptr %and.i = and <8 x i64> %a, %b @@ -49,7 +46,6 @@ define i32 @combineTESTM_AND_mask_4(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) { ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retq %b = load <8 x i64>, <8 x i64>* %bptr %and.i = and <8 x i64> %b, %a