From 4ad4edc5302f50c1c85a5ff0f59d3fde72e5ec32 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 25 Jun 2019 14:46:52 +0000 Subject: [PATCH] [SDAG] expand ctpop != 1 Change the generic ctpop expansion to more efficiently handle a check for not-a-power-of-two value: (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) This is the inverted predicate sibling pattern that was added with: D63004 This should have been done before I changed IR canonicalization to favor this form with: rL364246 ...so if this requires revert/changing, the earlier commit may also need to modified. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364319 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 22 ++++++++--------- test/CodeGen/X86/ctpop-combine.ll | 27 +++++---------------- 2 files changed, 17 insertions(+), 32 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d83bd108b67..74ab96afe5a 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2715,19 +2715,19 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // If ctpop is not supported, expand a power-of-2 comparison based on it. - if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT)) { + if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) - if (Cond == ISD::SETEQ) { - SDValue Zero = DAG.getConstant(0, dl, CTVT); - SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); - SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); - SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, ISD::SETNE); - SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, ISD::SETEQ); - return DAG.getNode(ISD::AND, dl, VT, LHS, RHS); - } - // TODO: // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) + SDValue Zero = DAG.getConstant(0, dl, CTVT); + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true); + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); + SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); + SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); + unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR; + return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS); } } diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll index f351637c113..cdef5771f2c 100644 --- a/test/CodeGen/X86/ctpop-combine.ll +++ b/test/CodeGen/X86/ctpop-combine.ll @@ -147,28 +147,13 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone { ; ; NO-POPCOUNT-LABEL: ctpop_ne_one: ; NO-POPCOUNT: # %bb.0: -; NO-POPCOUNT-NEXT: movq %rdi, %rax -; NO-POPCOUNT-NEXT: shrq %rax -; NO-POPCOUNT-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 -; NO-POPCOUNT-NEXT: andq %rax, %rcx -; NO-POPCOUNT-NEXT: subq %rcx, %rdi -; NO-POPCOUNT-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 -; NO-POPCOUNT-NEXT: movq %rdi, %rcx -; NO-POPCOUNT-NEXT: andq %rax, %rcx -; NO-POPCOUNT-NEXT: shrq $2, %rdi -; NO-POPCOUNT-NEXT: andq %rax, %rdi -; NO-POPCOUNT-NEXT: addq %rcx, %rdi -; NO-POPCOUNT-NEXT: movq %rdi, %rax -; NO-POPCOUNT-NEXT: shrq $4, %rax -; NO-POPCOUNT-NEXT: addq %rdi, %rax -; NO-POPCOUNT-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F -; NO-POPCOUNT-NEXT: andq %rax, %rcx -; NO-POPCOUNT-NEXT: movabsq $72340172838076673, %rdx # imm = 0x101010101010101 -; NO-POPCOUNT-NEXT: imulq %rcx, %rdx -; NO-POPCOUNT-NEXT: shrq $56, %rdx -; NO-POPCOUNT-NEXT: xorl %eax, %eax -; NO-POPCOUNT-NEXT: cmpq $1, %rdx +; NO-POPCOUNT-NEXT: leaq -1(%rdi), %rax +; NO-POPCOUNT-NEXT: testq %rax, %rdi ; NO-POPCOUNT-NEXT: setne %al +; NO-POPCOUNT-NEXT: testq %rdi, %rdi +; NO-POPCOUNT-NEXT: sete %cl +; NO-POPCOUNT-NEXT: orb %al, %cl +; NO-POPCOUNT-NEXT: movzbl %cl, %eax ; NO-POPCOUNT-NEXT: retq %count = tail call i64 @llvm.ctpop.i64(i64 %x) %cmp = icmp ne i64 %count, 1 -- 2.40.0