From 78c322d6f14864d31716000cff32d237768e813d Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 9 Nov 2016 00:24:44 +0000 Subject: [PATCH] [ValueTracking] recognize obfuscated variants of umin/umax The smallest tests that expose this are codegen tests (because SelectionDAGBuilder::visitSelect() uses matchSelectPattern to create UMAX/UMIN nodes), but it's also possible to see the effects in IR alone with folds of min/max pairs. If these were written as unsigned compares in IR, InstCombine canonicalizes the unsigned compares to signed compares. Ie, running the optimizer pessimizes the codegen for this case without this patch: define <4 x i32> @umax_vec(<4 x i32> %x) { %cmp = icmp ugt <4 x i32> %x, %sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> ret <4 x i32> %sel } $ ./opt umax.ll -S | ./llc -o - -mattr=avx vpmaxud LCPI0_0(%rip), %xmm0, %xmm0 $ ./opt -instcombine umax.ll -S | ./llc -o - -mattr=avx vpxor %xmm1, %xmm1, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm1 vmovaps LCPI0_0(%rip), %xmm2 ## xmm2 = [2147483647,2147483647,2147483647,2147483647] vblendvps %xmm1, %xmm0, %xmm2, %xmm0 Differential Revision: https://reviews.llvm.org/D26096 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286318 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ValueTracking.cpp | 26 +++++++++++++++++++++++++- test/CodeGen/X86/vec_minmax_match.ll | 20 ++++---------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index c8cba38d566..a14605d6ebf 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -3969,9 +3969,33 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, } } + // An unsigned min/max can be written with a signed compare. + const APInt *C2; + if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) || + (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) { + // Is the sign bit set? + // (X (X >u MAXVAL) ? X : MAXVAL ==> UMAX + // (X (X >u MAXVAL) ? MAXVAL : X ==> UMIN + if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) { + LHS = TrueVal; + RHS = FalseVal; + return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; + } + + // Is the sign bit clear? + // (X >s -1) ? MINVAL : X ==> (X UMAX + // (X >s -1) ? X : MINVAL ==> (X UMIN + if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() && + C2->isMinSignedValue()) { + LHS = TrueVal; + RHS = FalseVal; + return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; + } + } + + // Look through 'not' ops to find disguised signed min/max. // (X >s C) ? ~X : ~C ==> (~X SMIN(~X, ~C) // (X (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C) - const APInt *C2; if (match(TrueVal, m_Not(m_Specific(CmpLHS))) && match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2 && (Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SLT)) { diff --git a/test/CodeGen/X86/vec_minmax_match.ll b/test/CodeGen/X86/vec_minmax_match.ll index 3b1176fe286..d6860c0e846 100644 --- a/test/CodeGen/X86/vec_minmax_match.ll +++ b/test/CodeGen/X86/vec_minmax_match.ll @@ -60,15 +60,10 @@ define <4 x i32> @smax_vec2(<4 x i32> %x) { ret <4 x i32> %sel } -; FIXME: These are unsigned min/max ops. - define <4 x i32> @umax_vec1(<4 x i32> %x) { ; CHECK-LABEL: umax_vec1: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm1 -; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] -; CHECK-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq ; %cmp = icmp slt <4 x i32> %x, zeroinitializer @@ -79,9 +74,7 @@ define <4 x i32> @umax_vec1(<4 x i32> %x) { define <4 x i32> @umax_vec2(<4 x i32> %x) { ; CHECK-LABEL: umax_vec2: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 -; CHECK-NEXT: vblendvps %xmm1, {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq ; %cmp = icmp sgt <4 x i32> %x, @@ -92,9 +85,7 @@ define <4 x i32> @umax_vec2(<4 x i32> %x) { define <4 x i32> @umin_vec1(<4 x i32> %x) { ; CHECK-LABEL: umin_vec1: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm1 -; CHECK-NEXT: vblendvps %xmm1, {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq ; %cmp = icmp slt <4 x i32> %x, zeroinitializer @@ -105,10 +96,7 @@ define <4 x i32> @umin_vec1(<4 x i32> %x) { define <4 x i32> @umin_vec2(<4 x i32> %x) { ; CHECK-LABEL: umin_vec2: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 -; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; CHECK-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq ; %cmp = icmp sgt <4 x i32> %x, -- 2.40.0