From a610a6eaf32df8a4aa52380cba46c9390fd92c72 Mon Sep 17 00:00:00 2001 From: Guy Blank Date: Tue, 9 May 2017 16:16:48 +0000 Subject: [PATCH] VX512] Only look at lower bit in constant scalar masks for scalar masked instructions only the lower bit of the mask is relevant. so for constant masks we should either do an unmasked operation or no operation, depending on the value of the lower bit. This patch handles cases where the lower bit is '1'. Differential Revision: https://reviews.llvm.org/D32805 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302546 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++++-- test/CodeGen/X86/avx512-scalar_mask.ll | 12 ++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f0c1e0feed8..68d11100f21 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19021,8 +19021,10 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - if (isAllOnesConstant(Mask)) - return Op; + + if (auto *MaskConst = dyn_cast(Mask)) + if (MaskConst->getZExtValue() & 0x1) + return Op; MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); diff --git a/test/CodeGen/X86/avx512-scalar_mask.ll b/test/CodeGen/X86/avx512-scalar_mask.ll index e8f227bec48..47c6813fa8d 100644 --- a/test/CodeGen/X86/avx512-scalar_mask.ll +++ b/test/CodeGen/X86/avx512-scalar_mask.ll @@ -26,6 +26,7 @@ define <4 x float>@test_var_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> ret < 4 x float> %res } +; FIXME: we should just return %xmm0 here. define <4 x float>@test_const0_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: test_const0_mask: ; CHECK: ## BB#0: @@ -36,6 +37,7 @@ define <4 x float>@test_const0_mask(<4 x float> %v0, <4 x float> %v1, <4 x float ret < 4 x float> %res } +; FIXME: we should zero the lower element of xmm0 and return it. define <4 x float>@test_const0_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: test_const0_maskz: ; CHECK: ## BB#0: @@ -46,6 +48,7 @@ define <4 x float>@test_const0_maskz(<4 x float> %v0, <4 x float> %v1, <4 x floa ret < 4 x float> %res } +; FIXME: we should just return %xmm0 here. define <4 x float>@test_const2_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: test_const2_mask: ; CHECK: ## BB#0: @@ -56,6 +59,7 @@ define <4 x float>@test_const2_mask(<4 x float> %v0, <4 x float> %v1, <4 x float ret < 4 x float> %res } +; FIXME: we should zero the lower element of xmm0 and return it. define <4 x float>@test_const2_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: test_const2_maskz: ; CHECK: ## BB#0: @@ -87,9 +91,7 @@ define <4 x float>@test_const_allone_maskz(<4 x float> %v0, <4 x float> %v1, <4 define <4 x float>@test_const_3_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: test_const_3_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kxnorw %k0, %k0, %k0 -; CHECK-NEXT: kshiftrw $15, %k0, %k1 -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 3, i32 4) ret < 4 x float> %res @@ -98,9 +100,7 @@ define <4 x float>@test_const_3_mask(<4 x float> %v0, <4 x float> %v1, <4 x floa define <4 x float>@test_const_3_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: test_const_3_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kxnorw %k0, %k0, %k0 -; CHECK-NEXT: kshiftrw $15, %k0, %k1 -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 3, i32 4) ret < 4 x float> %res -- 2.40.0