From 78c322d6f14864d31716000cff32d237768e813d Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 9 Nov 2016 00:24:44 +0000
Subject: [PATCH] [ValueTracking] recognize obfuscated variants of umin/umax

The smallest tests that expose this are codegen tests (because SelectionDAGBuilder::visitSelect() uses matchSelectPattern
to create UMAX/UMIN nodes), but it's also possible to see the effects in IR alone with folds of min/max pairs.

If these were written as unsigned compares in IR, InstCombine canonicalizes the unsigned compares to signed compares.
Ie, running the optimizer pessimizes the codegen for this case without this patch:

define <4 x i32> @umax_vec(<4 x i32> %x) {
  %cmp = icmp ugt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  %sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  ret <4 x i32> %sel
}

$ ./opt umax.ll -S | ./llc -o - -mattr=avx

vpmaxud LCPI0_0(%rip), %xmm0, %xmm0

$ ./opt -instcombine umax.ll -S | ./llc -o - -mattr=avx

vpxor %xmm1, %xmm1, %xmm1
vpcmpgtd  %xmm0, %xmm1, %xmm1
vmovaps LCPI0_0(%rip), %xmm2    ## xmm2 = [2147483647,2147483647,2147483647,2147483647]
vblendvps %xmm1, %xmm0, %xmm2, %xmm0

Differential Revision: https://reviews.llvm.org/D26096


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286318 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Analysis/ValueTracking.cpp       | 26 +++++++++++++++++++++++++-
 test/CodeGen/X86/vec_minmax_match.ll | 20 ++++----------------
 2 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index c8cba38d566..a14605d6ebf 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -3969,9 +3969,33 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
       }
     }
 
+    // An unsigned min/max can be written with a signed compare.
+    const APInt *C2;
+    if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) ||
+        (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) {
+      // Is the sign bit set?
+      // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
+      // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
+      if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) {
+        LHS = TrueVal;
+        RHS = FalseVal;
+        return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
+      }
+
+      // Is the sign bit clear?
+      // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
+      // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
+      if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() &&
+          C2->isMinSignedValue()) {
+        LHS = TrueVal;
+        RHS = FalseVal;
+        return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
+      }
+    }
+
+    // Look through 'not' ops to find disguised signed min/max.
     // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C)
     // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C)
-    const APInt *C2;
     if (match(TrueVal, m_Not(m_Specific(CmpLHS))) &&
         match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2 &&
         (Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SLT)) {
diff --git a/test/CodeGen/X86/vec_minmax_match.ll b/test/CodeGen/X86/vec_minmax_match.ll
index 3b1176fe286..d6860c0e846 100644
--- a/test/CodeGen/X86/vec_minmax_match.ll
+++ b/test/CodeGen/X86/vec_minmax_match.ll
@@ -60,15 +60,10 @@ define <4 x i32> @smax_vec2(<4 x i32> %x) {
   ret <4 x i32> %sel
 }
 
-; FIXME: These are unsigned min/max ops.
-
 define <4 x i32> @umax_vec1(<4 x i32> %x) {
 ; CHECK-LABEL: umax_vec1:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm1
-; CHECK-NEXT:    vmovaps {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
-; CHECK-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vpmaxud {{.*}}(%rip), %xmm0, %xmm0
 ; CHECK-NEXT:    retq
 ;
   %cmp = icmp slt <4 x i32> %x, zeroinitializer
@@ -79,9 +74,7 @@ define <4 x i32> @umax_vec1(<4 x i32> %x) {
 define <4 x i32> @umax_vec2(<4 x i32> %x) {
 ; CHECK-LABEL: umax_vec2:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1
-; CHECK-NEXT:    vblendvps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    vpmaxud {{.*}}(%rip), %xmm0, %xmm0
 ; CHECK-NEXT:    retq
 ;
   %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -92,9 +85,7 @@ define <4 x i32> @umax_vec2(<4 x i32> %x) {
 define <4 x i32> @umin_vec1(<4 x i32> %x) {
 ; CHECK-LABEL: umin_vec1:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm1
-; CHECK-NEXT:    vblendvps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm0
 ; CHECK-NEXT:    retq
 ;
   %cmp = icmp slt <4 x i32> %x, zeroinitializer
@@ -105,10 +96,7 @@ define <4 x i32> @umin_vec1(<4 x i32> %x) {
 define <4 x i32> @umin_vec2(<4 x i32> %x) {
 ; CHECK-LABEL: umin_vec2:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1
-; CHECK-NEXT:    vmovaps {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; CHECK-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm0
 ; CHECK-NEXT:    retq
 ;
   %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
-- 
2.50.1