From 4e5c5db786a3f332a48cce4df11a954cbf5544f2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Mon, 1 May 2017 16:08:06 +0000
Subject: [PATCH] [SelectionDAG] Use known ones to provide a better bound for
 the known zeros for CTTZ/CTLZ operations.

This is the SelectionDAG version of D32521. If know where at least one 1 is located in the input to these intrinsics we can place an upper bound on the number of bits needed to represent the count and thus increase the number of known zeros in the output.

I think we can also refine this further for CTTZ_UNDEF/CTLZ_UNDEF by assuming that the answer will never be BitWidth. I've left this out for now because it caused other test failures across multiple targets. Usually because of turning ADD into OR based on this new information.

I'll fix CTPOP in a future patch.

Differential Revision: https://reviews.llvm.org/D32692

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@301806 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 18 ++++++++++++++++--
 test/CodeGen/X86/clz.ll                   | 12 ++----------
 2 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 14b7b58cfc1..209abf9234d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2353,9 +2353,23 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   }
   case ISD::CTTZ:
-  case ISD::CTTZ_ZERO_UNDEF:
+  case ISD::CTTZ_ZERO_UNDEF: {
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    // If we have a known 1, its position is our upper bound.
+    unsigned PossibleTZ = Known2.One.countTrailingZeros();
+    unsigned LowBits = Log2_32(PossibleTZ) + 1;
+    Known.Zero.setBitsFrom(LowBits);
+    break;
+  }
   case ISD::CTLZ:
-  case ISD::CTLZ_ZERO_UNDEF:
+  case ISD::CTLZ_ZERO_UNDEF: {
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    // If we have a known 1, its position is our upper bound.
+    unsigned PossibleLZ = Known2.One.countLeadingZeros();
+    unsigned LowBits = Log2_32(PossibleLZ) + 1;
+    Known.Zero.setBitsFrom(LowBits);
+    break;
+  }
   case ISD::CTPOP: {
     Known.Zero.setBitsFrom(Log2_32(BitWidth)+1);
     break;
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index e9f59944adc..9d827fc88b3 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -786,7 +786,6 @@ define i8 @cttz_i8_knownbits(i8 %x)  {
 ; X32-NEXT:    orb $2, %al
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    bsfl %eax, %eax
-; X32-NEXT:    andb $1, %al
 ; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-NEXT:    retl
 ;
@@ -795,7 +794,6 @@ define i8 @cttz_i8_knownbits(i8 %x)  {
 ; X64-NEXT:    orb $2, %dil
 ; X64-NEXT:    movzbl %dil, %eax
 ; X64-NEXT:    bsfl %eax, %eax
-; X64-NEXT:    andb $1, %al
 ; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-NEXT:    retq
 ;
@@ -805,7 +803,6 @@ define i8 @cttz_i8_knownbits(i8 %x)  {
 ; X32-CLZ-NEXT:    orb $2, %al
 ; X32-CLZ-NEXT:    movzbl %al, %eax
 ; X32-CLZ-NEXT:    tzcntl %eax, %eax
-; X32-CLZ-NEXT:    andb $1, %al
 ; X32-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-CLZ-NEXT:    retl
 ;
@@ -814,7 +811,6 @@ define i8 @cttz_i8_knownbits(i8 %x)  {
 ; X64-CLZ-NEXT:    orb $2, %dil
 ; X64-CLZ-NEXT:    movzbl %dil, %eax
 ; X64-CLZ-NEXT:    tzcntl %eax, %eax
-; X64-CLZ-NEXT:    andb $1, %al
 ; X64-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-CLZ-NEXT:    retq
   %x2 = or i8 %x, 2
@@ -830,8 +826,7 @@ define i8 @ctlz_i8_knownbits(i8 %x)  {
 ; X32-NEXT:    orb $64, %al
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    bsrl %eax, %eax
-; X32-NEXT:    notl %eax
-; X32-NEXT:    andb $1, %al
+; X32-NEXT:    xorl $7, %eax
 ; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-NEXT:    retl
 ;
@@ -840,8 +835,7 @@ define i8 @ctlz_i8_knownbits(i8 %x)  {
 ; X64-NEXT:    orb $64, %dil
 ; X64-NEXT:    movzbl %dil, %eax
 ; X64-NEXT:    bsrl %eax, %eax
-; X64-NEXT:    notl %eax
-; X64-NEXT:    andb $1, %al
+; X64-NEXT:    xorl $7, %eax
 ; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-NEXT:    retq
 ;
@@ -852,7 +846,6 @@ define i8 @ctlz_i8_knownbits(i8 %x)  {
 ; X32-CLZ-NEXT:    movzbl %al, %eax
 ; X32-CLZ-NEXT:    lzcntl %eax, %eax
 ; X32-CLZ-NEXT:    addl $-24, %eax
-; X32-CLZ-NEXT:    andb $1, %al
 ; X32-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-CLZ-NEXT:    retl
 ;
@@ -862,7 +855,6 @@ define i8 @ctlz_i8_knownbits(i8 %x)  {
 ; X64-CLZ-NEXT:    movzbl %dil, %eax
 ; X64-CLZ-NEXT:    lzcntl %eax, %eax
 ; X64-CLZ-NEXT:    addl $-24, %eax
-; X64-CLZ-NEXT:    andb $1, %al
 ; X64-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-CLZ-NEXT:    retq
 
-- 
2.50.1