/// If a vector select has an operand that is -1 or 0, simplify the select to a
/// bitwise logic operation.
-static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
if (N->getOpcode() != ISD::VSELECT)
return SDValue();
+ bool FValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
+ // Check if the first operand is all zeros.This situation only
+ // applies to avx512.
+ if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse()) {
+ //Invert the cond to not(cond) : xor(op,allones)=not(op)
+ SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, DL, Cond.getValueType()));
+ //Vselect cond, op1, op2 = Vselect not(cond), op2, op1
+ return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS);
+ }
assert(CondVT.isVector() && "Vector select expects a vector selector!");
// To use the condition operand as a bitwise mask, it must have elements that
return SDValue();
bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
- bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+ FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
// Try to invert the condition if true value is not all 1s and false value is
// not all 0s.
}
}
- if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG))
+ if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget))
return V;
// If this is a *dynamic* select (non-constant condition) and we can match
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
; CHECK-LABEL: test14:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; CHECK-NEXT: knotw %k0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2
+; CHECK-NEXT: vpcmpgtd %zmm0, %zmm2, %k1
+; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%sub_r = sub <16 x i32> %a, %b
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
; CHECK-LABEL: test15:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; CHECK-NEXT: knotw %k0, %k1
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2
+; CHECK-NEXT: vpcmpgtq %zmm0, %zmm2, %k1
+; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%sub_r = sub <8 x i64> %a, %b
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a