From ba1655de511076a84c2a9922bb4fda06fbd65b4b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 3 Nov 2016 06:04:28 +0000 Subject: [PATCH] [AVX-512] Use 'vnot' instead of 'not' in patterns involving vXi1 vectors. This fixes selection of KANDN instructions and allows us to remove an extra set of patterns for KNOT and KXNOR. Reviewers: delena, igorb Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D26134 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@285878 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 90 ++++++++++-------------------- test/CodeGen/X86/avx512-mask-op.ll | 10 ++-- test/CodeGen/X86/avx512-select.ll | 7 +-- 3 files changed, 36 insertions(+), 71 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index cef834ba506..104dc2659d5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2458,7 +2458,7 @@ multiclass avx512_mask_unop_all opc, string OpcodeStr, HasBWI>, VEX, PS, VEX_W; } -defm KNOT : avx512_mask_unop_all<0x44, "knot", not>; +defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>; multiclass avx512_mask_unop_int { let Predicates = [HasAVX512] in @@ -2469,27 +2469,15 @@ multiclass avx512_mask_unop_int { } defm : avx512_mask_unop_int<"knot", "KNOT">; -let Predicates = [HasDQI] in -def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>; -let Predicates = [HasAVX512] in -def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>; -let Predicates = [HasBWI] in -def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>; -let Predicates = [HasBWI] in -def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>; - // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit -let Predicates = [HasAVX512, NoDQI] in { -def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), - (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>; -def : Pat<(not VK8:$src), - (COPY_TO_REGCLASS - (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; -} -def : Pat<(xor VK4:$src1, (v4i1 immAllOnesV)), - (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src1, VK16)), VK4)>; -def : Pat<(xor VK2:$src1, (v2i1 immAllOnesV)), - (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src1, VK16)), VK2)>; +let Predicates = [HasAVX512, NoDQI] in +def : Pat<(vnot VK8:$src), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; + +def : Pat<(vnot VK4:$src), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; +def : Pat<(vnot VK2:$src), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; // Mask binary operation // - KAND, KANDN, KOR, KXNOR, KXOR @@ -2518,13 +2506,16 @@ multiclass avx512_mask_binop_all opc, string OpcodeStr, def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; +// These nodes use 'vnot' instead of 'not' to support vectors. +def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; +def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; -defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>; -defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>; -defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>; -defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>; -defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>; -defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>; +defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>; +defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>; +defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>; +defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>; +defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>; +defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>; multiclass avx512_mask_binop_int { let Predicates = [HasAVX512] in @@ -2541,11 +2532,12 @@ defm : avx512_mask_binop_int<"kor", "KOR">; defm : avx512_mask_binop_int<"kxnor", "KXNOR">; defm : avx512_mask_binop_int<"kxor", "KXOR">; -multiclass avx512_binop_pat { +multiclass avx512_binop_pat { // With AVX512F, 8-bit mask is promoted to 16-bit mask, // for the DQI set, this type is legal and KxxxB instruction is used let Predicates = [NoDQI] in - def : Pat<(OpNode VK8:$src1, VK8:$src2), + def : Pat<(VOpNode VK8:$src1, VK8:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; @@ -2555,47 +2547,21 @@ multiclass avx512_binop_pat { (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK1:$src1, VK16), (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; - def : Pat<(OpNode VK2:$src1, VK2:$src2), + def : Pat<(VOpNode VK2:$src1, VK2:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK2:$src1, VK16), (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>; - def : Pat<(OpNode VK4:$src1, VK4:$src2), + def : Pat<(VOpNode VK4:$src1, VK4:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK4:$src1, VK16), (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>; } -defm : avx512_binop_pat; -defm : avx512_binop_pat; -defm : avx512_binop_pat; -defm : avx512_binop_pat; -defm : avx512_binop_pat; - -def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)), - (KXNORWrr VK16:$src1, VK16:$src2)>; -def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)), - (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>; -def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)), - (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>; -def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)), - (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>; - -let Predicates = [NoDQI] in -def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)), - (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK8:$src1, VK16), - (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; - -def : Pat<(xor (xor VK4:$src1, VK4:$src2), (v4i1 immAllOnesV)), - (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK4:$src1, VK16), - (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; - -def : Pat<(xor (xor VK2:$src1, VK2:$src2), (v2i1 immAllOnesV)), - (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK2:$src1, VK16), - (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; - -def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)), - (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK1:$src1, VK16), - (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; +defm : avx512_binop_pat; +defm : avx512_binop_pat; +defm : avx512_binop_pat; +defm : avx512_binop_pat; +defm : avx512_binop_pat; // Mask unpacking multiclass avx512_mask_unpck @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1 ; ; SKX-LABEL: test4: ; SKX: ## BB#0: -; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 -; SKX-NEXT: knotw %k0, %k1 -; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} +; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 +; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 +; SKX-NEXT: kandnw %k0, %k1, %k0 ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: retq %x_gt_y = icmp sgt <4 x i64> %x, %y @@ -280,8 +280,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1 ; SKX-LABEL: test5: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 -; SKX-NEXT: knotw %k0, %k1 -; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1} +; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k1 +; SKX-NEXT: kandnw %k1, %k0, %k0 ; SKX-NEXT: vpmovm2q %k0, %xmm0 ; SKX-NEXT: retq %x_gt_y = icmp slt <2 x i64> %x, %y diff --git a/test/CodeGen/X86/avx512-select.ll b/test/CodeGen/X86/avx512-select.ll index 4a3695fab18..42579377ef3 100644 --- a/test/CodeGen/X86/avx512-select.ll +++ b/test/CodeGen/X86/avx512-select.ll @@ -133,10 +133,9 @@ define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) { ; CHECK-NEXT: kmovw %edx, %k0 ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: kmovw %esi, %k2 -; CHECK-NEXT: kandw %k0, %k1, %k1 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kandw %k0, %k2, %k0 -; CHECK-NEXT: korw %k0, %k1, %k0 +; CHECK-NEXT: kandnw %k2, %k0, %k2 +; CHECK-NEXT: kandw %k0, %k1, %k0 +; CHECK-NEXT: korw %k2, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %mask = bitcast i8 %m to <8 x i1> -- 2.40.0